aboutsummaryrefslogtreecommitdiffstats
path: root/lib/librte_eal/linuxapp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/librte_eal/linuxapp')
-rw-r--r--lib/librte_eal/linuxapp/Makefile2
-rw-r--r--lib/librte_eal/linuxapp/eal/Makefile6
-rw-r--r--lib/librte_eal/linuxapp/eal/eal.c128
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_alarm.c5
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_debug.c4
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_hugepage_info.c9
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_interrupts.c117
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memory.c103
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci.c149
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_init.h3
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_uio.c3
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_vfio.c88
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.c386
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.h67
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c23
-rw-r--r--lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h14
-rw-r--r--lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h5
-rw-r--r--lib/librte_eal/linuxapp/eal/rte_eal_version.map74
-rw-r--r--lib/librte_eal/linuxapp/igb_uio/compat.h4
-rw-r--r--lib/librte_eal/linuxapp/igb_uio/igb_uio.c39
-rw-r--r--lib/librte_eal/linuxapp/kni/Makefile58
-rw-r--r--lib/librte_eal/linuxapp/kni/compat.h6
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c2
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c15
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h12
-rw-r--r--lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c4
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_dev.h39
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_fifo.h14
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_misc.c47
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_net.c13
-rw-r--r--lib/librte_eal/linuxapp/kni/kni_vhost.c842
-rw-r--r--lib/librte_eal/linuxapp/xen_dom0/Makefile3
32 files changed, 992 insertions, 1292 deletions
diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile
index 20d2a916..4794696b 100644
--- a/lib/librte_eal/linuxapp/Makefile
+++ b/lib/librte_eal/linuxapp/Makefile
@@ -34,6 +34,8 @@ include $(RTE_SDK)/mk/rte.vars.mk
DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal
DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio
DIRS-$(CONFIG_RTE_KNI_KMOD) += kni
+DEPDIRS-kni := eal
DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += xen_dom0
+DEPDIRS-xen_dom0 := eal
include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 4e206f09..640afd08 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -37,7 +37,7 @@ ARCH_DIR ?= $(RTE_ARCH)
EXPORT_MAP := rte_eal_version.map
VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
-LIBABIVER := 3
+LIBABIVER := 4
VPATH += $(RTE_SDK)/lib/librte_eal/common
@@ -87,6 +87,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_cpuflags.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_string_fns.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hexdump.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_devargs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_bus.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_dev.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_options.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_thread.c
@@ -130,7 +131,4 @@ INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \
$(addprefix include/exec-env/,$(INC))
-DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common
-DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common/arch/$(ARCH_DIR)
-
include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 2075282e..7c78f2dc 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -61,6 +61,7 @@
#include <rte_launch.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
+#include <rte_errno.h>
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_log.h>
@@ -69,6 +70,7 @@
#include <rte_string_fns.h>
#include <rte_cpuflags.h>
#include <rte_interrupts.h>
+#include <rte_bus.h>
#include <rte_pci.h>
#include <rte_dev.h>
#include <rte_devargs.h>
@@ -210,7 +212,7 @@ rte_eal_config_create(void)
rte_panic("Cannot mmap memory for rte_config\n");
}
memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config));
- rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr;
+ rte_config.mem_config = rte_mem_cfg_addr;
/* store address of the config in the config itself so that secondary
* processes could later map the config into this exact location */
@@ -490,8 +492,6 @@ eal_log_level_parse(int argc, char **argv)
argvopt = argv;
optind = 1;
- eal_reset_internal_config(&internal_config);
-
while ((opt = getopt_long(argc, argvopt, eal_short_options,
eal_long_options, &option_index)) != EOF) {
@@ -739,6 +739,12 @@ static int rte_eal_vfio_setup(void)
}
#endif
+static void rte_eal_init_alert(const char *msg)
+{
+ fprintf(stderr, "EAL: FATAL: %s\n", msg);
+ RTE_LOG(ERR, EAL, "%s\n", msg);
+}
+
/* Launch threads, called at application init(). */
int
rte_eal_init(int argc, char **argv)
@@ -751,33 +757,51 @@ rte_eal_init(int argc, char **argv)
char thread_name[RTE_MAX_THREAD_NAME_LEN];
/* checks if the machine is adequate */
- rte_cpu_check_supported();
+ if (!rte_cpu_is_supported()) {
+ rte_eal_init_alert("unsupported cpu type.");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
- if (!rte_atomic32_test_and_set(&run_once))
+ if (!rte_atomic32_test_and_set(&run_once)) {
+ rte_eal_init_alert("already called initialization.");
+ rte_errno = EALREADY;
return -1;
+ }
logid = strrchr(argv[0], '/');
logid = strdup(logid ? logid + 1: argv[0]);
thread_id = pthread_self();
- eal_log_level_parse(argc, argv);
+ eal_reset_internal_config(&internal_config);
/* set log level as early as possible */
- rte_set_log_level(internal_config.log_level);
+ eal_log_level_parse(argc, argv);
- if (rte_eal_cpu_init() < 0)
- rte_panic("Cannot detect lcores\n");
+ if (rte_eal_cpu_init() < 0) {
+ rte_eal_init_alert("Cannot detect lcores.");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
fctret = eal_parse_args(argc, argv);
- if (fctret < 0)
- exit(1);
+ if (fctret < 0) {
+ rte_eal_init_alert("Invalid 'command line' arguments.");
+ rte_errno = EINVAL;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
if (internal_config.no_hugetlbfs == 0 &&
internal_config.process_type != RTE_PROC_SECONDARY &&
internal_config.xen_dom0_support == 0 &&
- eal_hugepage_info_init() < 0)
- rte_panic("Cannot get hugepage information\n");
+ eal_hugepage_info_init() < 0) {
+ rte_eal_init_alert("Cannot get hugepage information.");
+ rte_errno = EACCES;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
if (internal_config.no_hugetlbfs)
@@ -799,39 +823,59 @@ rte_eal_init(int argc, char **argv)
rte_config_init();
- if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0)
- rte_panic("Cannot init logs\n");
-
- if (rte_eal_pci_init() < 0)
- rte_panic("Cannot init PCI\n");
+ if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) {
+ rte_eal_init_alert("Cannot init logging.");
+ rte_errno = ENOMEM;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
#ifdef VFIO_PRESENT
- if (rte_eal_vfio_setup() < 0)
- rte_panic("Cannot init VFIO\n");
+ if (rte_eal_vfio_setup() < 0) {
+ rte_eal_init_alert("Cannot init VFIO\n");
+ rte_errno = EAGAIN;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
#endif
- if (rte_eal_memory_init() < 0)
- rte_panic("Cannot init memory\n");
+ if (rte_eal_memory_init() < 0) {
+ rte_eal_init_alert("Cannot init memory\n");
+ rte_errno = ENOMEM;
+ return -1;
+ }
/* the directories are locked during eal_hugepage_info_init */
eal_hugedirs_unlock();
- if (rte_eal_memzone_init() < 0)
- rte_panic("Cannot init memzone\n");
+ if (rte_eal_memzone_init() < 0) {
+ rte_eal_init_alert("Cannot init memzone\n");
+ rte_errno = ENODEV;
+ return -1;
+ }
- if (rte_eal_tailqs_init() < 0)
- rte_panic("Cannot init tail queues for objects\n");
+ if (rte_eal_tailqs_init() < 0) {
+ rte_eal_init_alert("Cannot init tail queues for objects\n");
+ rte_errno = EFAULT;
+ return -1;
+ }
- if (rte_eal_alarm_init() < 0)
- rte_panic("Cannot init interrupt-handling thread\n");
+ if (rte_eal_alarm_init() < 0) {
+ rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+ /* rte_eal_alarm_init sets rte_errno on failure. */
+ return -1;
+ }
- if (rte_eal_timer_init() < 0)
- rte_panic("Cannot init HPET or TSC timers\n");
+ if (rte_eal_timer_init() < 0) {
+ rte_eal_init_alert("Cannot init HPET or TSC timers\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
eal_check_mem_on_local_socket();
if (eal_plugins_init() < 0)
- rte_panic("Cannot init plugins\n");
+ rte_eal_init_alert("Cannot init plugins\n");
eal_thread_init_master(rte_config.master_lcore);
@@ -841,11 +885,16 @@ rte_eal_init(int argc, char **argv)
rte_config.master_lcore, (int)thread_id, cpuset,
ret == 0 ? "" : "...");
- if (rte_eal_dev_init() < 0)
- rte_panic("Cannot init pmd devices\n");
+ if (rte_eal_intr_init() < 0) {
+ rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+ return -1;
+ }
- if (rte_eal_intr_init() < 0)
- rte_panic("Cannot init interrupt-handling thread\n");
+ if (rte_bus_scan()) {
+ rte_eal_init_alert("Cannot scan the buses for devices\n");
+ rte_errno = ENODEV;
+ return -1;
+ }
RTE_LCORE_FOREACH_SLAVE(i) {
@@ -883,9 +932,12 @@ rte_eal_init(int argc, char **argv)
rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER);
rte_eal_mp_wait_lcore();
- /* Probe & Initialize PCI devices */
- if (rte_eal_pci_probe())
- rte_panic("Cannot probe PCI\n");
+ /* Probe all the buses and devices/drivers on them */
+ if (rte_bus_probe()) {
+ rte_eal_init_alert("Cannot probe devices\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
rte_eal_mcfg_complete();
diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c
index 8b042abc..fbae4613 100644
--- a/lib/librte_eal/linuxapp/eal/eal_alarm.c
+++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c
@@ -83,7 +83,7 @@ static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER;
static struct rte_intr_handle intr_handle = {.fd = -1 };
static int handler_registered = 0;
-static void eal_alarm_callback(struct rte_intr_handle *hdl, void *arg);
+static void eal_alarm_callback(void *arg);
int
rte_eal_alarm_init(void)
@@ -102,8 +102,7 @@ error:
}
static void
-eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused,
- void *arg __rte_unused)
+eal_alarm_callback(void *arg __rte_unused)
{
struct timespec now;
struct alarm_entry *ap;
diff --git a/lib/librte_eal/linuxapp/eal/eal_debug.c b/lib/librte_eal/linuxapp/eal/eal_debug.c
index 5fbc17c5..e1c75548 100644
--- a/lib/librte_eal/linuxapp/eal/eal_debug.c
+++ b/lib/librte_eal/linuxapp/eal/eal_debug.c
@@ -31,7 +31,9 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#ifdef RTE_BACKTRACE
#include <execinfo.h>
+#endif
#include <stdarg.h>
#include <signal.h>
#include <stdlib.h>
@@ -47,6 +49,7 @@
/* dump the stack of the calling core */
void rte_dump_stack(void)
{
+#ifdef RTE_BACKTRACE
void *func[BACKTRACE_SIZE];
char **symb = NULL;
int size;
@@ -64,6 +67,7 @@ void rte_dump_stack(void)
}
free(symb);
+#endif /* RTE_BACKTRACE */
}
/* not implemented in this environment */
diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
index 18858e2d..7a21e8f6 100644
--- a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
@@ -283,9 +283,12 @@ eal_hugepage_info_init(void)
struct dirent *dirent;
dir = opendir(sys_dir_path);
- if (dir == NULL)
- rte_panic("Cannot open directory %s to read system hugepage "
- "info\n", sys_dir_path);
+ if (dir == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Cannot open directory %s to read system hugepage info\n",
+ sys_dir_path);
+ return -1;
+ }
for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) {
struct hugepage_info *hpi;
diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
index 47a3b20a..2e3bd12a 100644
--- a/lib/librte_eal/linuxapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c
@@ -46,6 +46,7 @@
#include <sys/ioctl.h>
#include <sys/eventfd.h>
#include <assert.h>
+#include <stdbool.h>
#include <rte_common.h>
#include <rte_interrupts.h>
@@ -136,7 +137,7 @@ static pthread_t intr_thread;
/* enable legacy (INTx) interrupts */
static int
-vfio_enable_intx(struct rte_intr_handle *intr_handle) {
+vfio_enable_intx(const struct rte_intr_handle *intr_handle) {
struct vfio_irq_set *irq_set;
char irq_set_buf[IRQ_SET_BUF_LEN];
int len, ret;
@@ -183,7 +184,7 @@ vfio_enable_intx(struct rte_intr_handle *intr_handle) {
/* disable legacy (INTx) interrupts */
static int
-vfio_disable_intx(struct rte_intr_handle *intr_handle) {
+vfio_disable_intx(const struct rte_intr_handle *intr_handle) {
struct vfio_irq_set *irq_set;
char irq_set_buf[IRQ_SET_BUF_LEN];
int len, ret;
@@ -194,14 +195,14 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) {
irq_set = (struct vfio_irq_set *) irq_set_buf;
irq_set->argsz = len;
irq_set->count = 1;
- irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK;
irq_set->index = VFIO_PCI_INTX_IRQ_INDEX;
irq_set->start = 0;
ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
if (ret) {
- RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n",
+ RTE_LOG(ERR, EAL, "Error masking INTx interrupts for fd %d\n",
intr_handle->fd);
return -1;
}
@@ -226,7 +227,7 @@ vfio_disable_intx(struct rte_intr_handle *intr_handle) {
/* enable MSI interrupts */
static int
-vfio_enable_msi(struct rte_intr_handle *intr_handle) {
+vfio_enable_msi(const struct rte_intr_handle *intr_handle) {
int len, ret;
char irq_set_buf[IRQ_SET_BUF_LEN];
struct vfio_irq_set *irq_set;
@@ -255,7 +256,7 @@ vfio_enable_msi(struct rte_intr_handle *intr_handle) {
/* disable MSI interrupts */
static int
-vfio_disable_msi(struct rte_intr_handle *intr_handle) {
+vfio_disable_msi(const struct rte_intr_handle *intr_handle) {
struct vfio_irq_set *irq_set;
char irq_set_buf[IRQ_SET_BUF_LEN];
int len, ret;
@@ -280,7 +281,7 @@ vfio_disable_msi(struct rte_intr_handle *intr_handle) {
/* enable MSI-X interrupts */
static int
-vfio_enable_msix(struct rte_intr_handle *intr_handle) {
+vfio_enable_msix(const struct rte_intr_handle *intr_handle) {
int len, ret;
char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
struct vfio_irq_set *irq_set;
@@ -290,12 +291,10 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
irq_set = (struct vfio_irq_set *) irq_set_buf;
irq_set->argsz = len;
- if (!intr_handle->max_intr)
- intr_handle->max_intr = 1;
- else if (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID)
- intr_handle->max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1;
-
- irq_set->count = intr_handle->max_intr;
+ /* 0 < irq_set->count < RTE_MAX_RXTX_INTR_VEC_ID + 1 */
+ irq_set->count = intr_handle->max_intr ?
+ (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID + 1 ?
+ RTE_MAX_RXTX_INTR_VEC_ID + 1 : intr_handle->max_intr) : 1;
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
irq_set->start = 0;
@@ -318,7 +317,7 @@ vfio_enable_msix(struct rte_intr_handle *intr_handle) {
/* disable MSI-X interrupts */
static int
-vfio_disable_msix(struct rte_intr_handle *intr_handle) {
+vfio_disable_msix(const struct rte_intr_handle *intr_handle) {
struct vfio_irq_set *irq_set;
char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
int len, ret;
@@ -343,7 +342,7 @@ vfio_disable_msix(struct rte_intr_handle *intr_handle) {
#endif
static int
-uio_intx_intr_disable(struct rte_intr_handle *intr_handle)
+uio_intx_intr_disable(const struct rte_intr_handle *intr_handle)
{
unsigned char command_high;
@@ -367,7 +366,7 @@ uio_intx_intr_disable(struct rte_intr_handle *intr_handle)
}
static int
-uio_intx_intr_enable(struct rte_intr_handle *intr_handle)
+uio_intx_intr_enable(const struct rte_intr_handle *intr_handle)
{
unsigned char command_high;
@@ -391,7 +390,7 @@ uio_intx_intr_enable(struct rte_intr_handle *intr_handle)
}
static int
-uio_intr_disable(struct rte_intr_handle *intr_handle)
+uio_intr_disable(const struct rte_intr_handle *intr_handle)
{
const int value = 0;
@@ -405,7 +404,7 @@ uio_intr_disable(struct rte_intr_handle *intr_handle)
}
static int
-uio_intr_enable(struct rte_intr_handle *intr_handle)
+uio_intr_enable(const struct rte_intr_handle *intr_handle)
{
const int value = 1;
@@ -419,7 +418,7 @@ uio_intr_enable(struct rte_intr_handle *intr_handle)
}
int
-rte_intr_callback_register(struct rte_intr_handle *intr_handle,
+rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
rte_intr_callback_fn cb, void *cb_arg)
{
int ret, wake_thread;
@@ -491,7 +490,7 @@ rte_intr_callback_register(struct rte_intr_handle *intr_handle,
}
int
-rte_intr_callback_unregister(struct rte_intr_handle *intr_handle,
+rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
rte_intr_callback_fn cb_fn, void *cb_arg)
{
int ret;
@@ -555,8 +554,11 @@ rte_intr_callback_unregister(struct rte_intr_handle *intr_handle,
}
int
-rte_intr_enable(struct rte_intr_handle *intr_handle)
+rte_intr_enable(const struct rte_intr_handle *intr_handle)
{
+ if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV)
+ return 0;
+
if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
return -1;
@@ -599,8 +601,11 @@ rte_intr_enable(struct rte_intr_handle *intr_handle)
}
int
-rte_intr_disable(struct rte_intr_handle *intr_handle)
+rte_intr_disable(const struct rte_intr_handle *intr_handle)
{
+ if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV)
+ return 0;
+
if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
return -1;
@@ -645,6 +650,7 @@ rte_intr_disable(struct rte_intr_handle *intr_handle)
static int
eal_intr_process_interrupts(struct epoll_event *events, int nfds)
{
+ bool call = false;
int n, bytes_read;
struct rte_intr_source *src;
struct rte_intr_callback *cb;
@@ -693,13 +699,18 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds)
bytes_read = sizeof(buf.vfio_intr_count);
break;
#endif
+ case RTE_INTR_HANDLE_VDEV:
case RTE_INTR_HANDLE_EXT:
+ bytes_read = 0;
+ call = true;
+ break;
+
default:
bytes_read = 1;
break;
}
- if (src->intr_handle.type != RTE_INTR_HANDLE_EXT) {
+ if (bytes_read > 0) {
/**
* read out to clear the ready-to-be-read flag
* for epoll_wait.
@@ -716,12 +727,14 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds)
} else if (bytes_read == 0)
RTE_LOG(ERR, EAL, "Read nothing from file "
"descriptor %d\n", events[n].data.fd);
+ else
+ call = true;
}
/* grab a lock, again to call callbacks and update status. */
rte_spinlock_lock(&intr_lock);
- if (bytes_read > 0) {
+ if (call) {
/* Finally, call all callbacks. */
TAILQ_FOREACH(cb, &src->callbacks, next) {
@@ -731,8 +744,7 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds)
rte_spinlock_unlock(&intr_lock);
/* call the actual callback */
- active_cb.cb_fn(&src->intr_handle,
- active_cb.cb_arg);
+ active_cb.cb_fn(active_cb.cb_arg);
/*get the lock back. */
rte_spinlock_lock(&intr_lock);
@@ -832,7 +844,7 @@ eal_intr_thread_main(__rte_unused void *arg)
TAILQ_FOREACH(src, &intr_sources, next) {
if (src->callbacks.tqh_first == NULL)
continue; /* skip those with no callbacks */
- ev.events = EPOLLIN | EPOLLPRI;
+ ev.events = EPOLLIN | EPOLLPRI | EPOLLRDHUP | EPOLLHUP;
ev.data.fd = src->intr_handle.fd;
/**
@@ -872,13 +884,16 @@ rte_eal_intr_init(void)
* create a pipe which will be waited by epoll and notified to
* rebuild the wait list of epoll.
*/
- if (pipe(intr_pipe.pipefd) < 0)
+ if (pipe(intr_pipe.pipefd) < 0) {
+ rte_errno = errno;
return -1;
+ }
/* create the host thread to wait/handle the interrupt */
ret = pthread_create(&intr_thread, NULL,
eal_intr_thread_main, NULL);
if (ret != 0) {
+ rte_errno = ret;
RTE_LOG(ERR, EAL,
"Failed to create thread for interrupt handling\n");
} else {
@@ -913,6 +928,14 @@ eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle)
bytes_read = sizeof(buf.vfio_intr_count);
break;
#endif
+ case RTE_INTR_HANDLE_VDEV:
+ /* for vdev, fd points to:
+ * a. eventfd which does not need to read out;
+ * b. datapath fd which needs PMD to read out.
+ */
+ return;
+ case RTE_INTR_HANDLE_EXT:
+ return;
default:
bytes_read = 1;
RTE_LOG(INFO, EAL, "unexpected intr type\n");
@@ -1141,6 +1164,24 @@ rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd,
return rc;
}
+void
+rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle)
+{
+ uint32_t i;
+ struct rte_epoll_event *rev;
+
+ for (i = 0; i < intr_handle->nb_efd; i++) {
+ rev = &intr_handle->elist[i];
+ if (rev->status == RTE_EPOLL_INVALID)
+ continue;
+ if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) {
+ /* force free if the entry valid */
+ eal_epoll_data_safe_free(rev);
+ rev->status = RTE_EPOLL_INVALID;
+ }
+ }
+}
+
int
rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
{
@@ -1157,12 +1198,14 @@ rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd)
RTE_LOG(ERR, EAL,
"can't setup eventfd, error %i (%s)\n",
errno, strerror(errno));
- return -1;
+ return -errno;
}
intr_handle->efds[i] = fd;
}
intr_handle->nb_efd = n;
intr_handle->max_intr = NB_OTHER_INTR + n;
+ } else if (intr_handle->type == RTE_INTR_HANDLE_VDEV) {
+ /* do nothing, and let vdev driver to initialize this struct */
} else {
intr_handle->efds[0] = intr_handle->fd;
intr_handle->nb_efd = RTE_MIN(nb_efd, 1U);
@@ -1176,19 +1219,8 @@ void
rte_intr_efd_disable(struct rte_intr_handle *intr_handle)
{
uint32_t i;
- struct rte_epoll_event *rev;
-
- for (i = 0; i < intr_handle->nb_efd; i++) {
- rev = &intr_handle->elist[i];
- if (rev->status == RTE_EPOLL_INVALID)
- continue;
- if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) {
- /* force free if the entry valid */
- eal_epoll_data_safe_free(rev);
- rev->status = RTE_EPOLL_INVALID;
- }
- }
+ rte_intr_free_epoll_fd(intr_handle);
if (intr_handle->max_intr > intr_handle->nb_efd) {
for (i = 0; i < intr_handle->nb_efd; i++)
close(intr_handle->efds[i]);
@@ -1218,5 +1250,8 @@ rte_intr_cap_multiple(struct rte_intr_handle *intr_handle)
if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX)
return 1;
+ if (intr_handle->type == RTE_INTR_HANDLE_VDEV)
+ return 1;
+
return 0;
}
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index a956bb22..ebe06833 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -64,6 +64,7 @@
#define _FILE_OFFSET_BITS 64
#include <errno.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>
@@ -122,26 +123,28 @@ int rte_xen_dom0_supported(void)
static uint64_t baseaddr_offset;
-static unsigned proc_pagemap_readable;
+static bool phys_addrs_available = true;
#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space"
static void
-test_proc_pagemap_readable(void)
+test_phys_addrs_available(void)
{
- int fd = open("/proc/self/pagemap", O_RDONLY);
+ uint64_t tmp;
+ phys_addr_t physaddr;
- if (fd < 0) {
+ /* For dom0, phys addresses can always be available */
+ if (rte_xen_dom0_supported())
+ return;
+
+ physaddr = rte_mem_virt2phy(&tmp);
+ if (physaddr == RTE_BAD_PHYS_ADDR) {
RTE_LOG(ERR, EAL,
- "Cannot open /proc/self/pagemap: %s. "
- "virt2phys address translation will not work\n",
+ "Cannot obtain physical addresses: %s. "
+ "Only vfio will function.\n",
strerror(errno));
- return;
+ phys_addrs_available = false;
}
-
- /* Is readable */
- close(fd);
- proc_pagemap_readable = 1;
}
/* Lock page in physical memory and prevent from swapping. */
@@ -190,7 +193,7 @@ rte_mem_virt2phy(const void *virtaddr)
}
/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
- if (!proc_pagemap_readable)
+ if (!phys_addrs_available)
return RTE_BAD_PHYS_ADDR;
/* standard page size */
@@ -229,6 +232,9 @@ rte_mem_virt2phy(const void *virtaddr)
* the pfn (page frame number) are bits 0-54 (see
* pagemap.txt in linux Documentation)
*/
+ if ((page & 0x7fffffffffffffULL) == 0)
+ return RTE_BAD_PHYS_ADDR;
+
physaddr = ((page & 0x7fffffffffffffULL) * page_size)
+ ((unsigned long)virtaddr % page_size);
@@ -242,7 +248,7 @@ rte_mem_virt2phy(const void *virtaddr)
static int
find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
{
- unsigned i;
+ unsigned int i;
phys_addr_t addr;
for (i = 0; i < hpi->num_pages[0]; i++) {
@@ -255,6 +261,22 @@ find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
}
/*
+ * For each hugepage in hugepg_tbl, fill the physaddr value sequentially.
+ */
+static int
+set_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi)
+{
+ unsigned int i;
+ static phys_addr_t addr;
+
+ for (i = 0; i < hpi->num_pages[0]; i++) {
+ hugepg_tbl[i].physaddr = addr;
+ addr += hugepg_tbl[i].size;
+ }
+ return 0;
+}
+
+/*
* Check whether address-space layout randomization is enabled in
* the kernel. This is important for multi-process as it can prevent
* two processes mapping data to the same virtual address
@@ -313,7 +335,13 @@ get_virtual_area(size_t *size, size_t hugepage_sz)
}
do {
addr = mmap(addr,
- (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0);
+ (*size) + hugepage_sz, PROT_READ,
+#ifdef RTE_ARCH_PPC_64
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+#else
+ MAP_PRIVATE,
+#endif
+ fd, 0);
if (addr == MAP_FAILED)
*size -= hugepage_sz;
} while (addr == MAP_FAILED && *size > 0);
@@ -592,12 +620,12 @@ static int
cmp_physaddr(const void *a, const void *b)
{
#ifndef RTE_ARCH_PPC_64
- const struct hugepage_file *p1 = (const struct hugepage_file *)a;
- const struct hugepage_file *p2 = (const struct hugepage_file *)b;
+ const struct hugepage_file *p1 = a;
+ const struct hugepage_file *p2 = b;
#else
/* PowerPC needs memory sorted in reverse order from x86 */
- const struct hugepage_file *p1 = (const struct hugepage_file *)b;
- const struct hugepage_file *p2 = (const struct hugepage_file *)a;
+ const struct hugepage_file *p1 = b;
+ const struct hugepage_file *p2 = a;
#endif
if (p1->physaddr < p2->physaddr)
return -1;
@@ -951,7 +979,7 @@ rte_eal_hugepage_init(void)
int nr_hugefiles, nr_hugepages = 0;
void *addr;
- test_proc_pagemap_readable();
+ test_phys_addrs_available();
memset(used_hp, 0, sizeof(used_hp));
@@ -1043,11 +1071,22 @@ rte_eal_hugepage_init(void)
continue;
}
- /* find physical addresses and sockets for each hugepage */
- if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){
- RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n",
- (unsigned)(hpi->hugepage_sz / 0x100000));
- goto fail;
+ if (phys_addrs_available) {
+ /* find physical addresses for each hugepage */
+ if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+ RTE_LOG(DEBUG, EAL, "Failed to find phys addr "
+ "for %u MB pages\n",
+ (unsigned int)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
+ } else {
+ /* set physical addresses for each hugepage */
+ if (set_physaddrs(&tmp_hp[hp_offset], hpi) < 0) {
+ RTE_LOG(DEBUG, EAL, "Failed to set phys addr "
+ "for %u MB pages\n",
+ (unsigned int)(hpi->hugepage_sz / 0x100000));
+ goto fail;
+ }
}
if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){
@@ -1289,7 +1328,7 @@ rte_eal_hugepage_attach(void)
"into secondary processes\n");
}
- test_proc_pagemap_readable();
+ test_phys_addrs_available();
if (internal_config.xen_dom0_support) {
#ifdef RTE_LIBRTE_XEN_DOM0
@@ -1330,7 +1369,13 @@ rte_eal_hugepage_attach(void)
* use mmap to get identical addresses as the primary process.
*/
base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len,
- PROT_READ, MAP_PRIVATE, fd_zero, 0);
+ PROT_READ,
+#ifdef RTE_ARCH_PPC_64
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+#else
+ MAP_PRIVATE,
+#endif
+ fd_zero, 0);
if (base_addr == MAP_FAILED ||
base_addr != mcfg->memseg[s].addr) {
max_seg = s;
@@ -1426,3 +1471,9 @@ error:
close(fd_hugepage);
return -1;
}
+
+bool
+rte_eal_using_phys_addrs(void)
+{
+ return phys_addrs_available;
+}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index 876ba381..595622b2 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -35,6 +35,7 @@
#include <dirent.h>
#include <rte_log.h>
+#include <rte_bus.h>
#include <rte_pci.h>
#include <rte_eal_memconfig.h>
#include <rte_malloc.h>
@@ -54,44 +55,7 @@
* IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it).
*/
-/* unbind kernel driver for this device */
-int
-pci_unbind_kernel_driver(struct rte_pci_device *dev)
-{
- int n;
- FILE *f;
- char filename[PATH_MAX];
- char buf[BUFSIZ];
- struct rte_pci_addr *loc = &dev->addr;
-
- /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */
- snprintf(filename, sizeof(filename),
- "%s/" PCI_PRI_FMT "/driver/unbind", pci_get_sysfs_path(),
- loc->domain, loc->bus, loc->devid, loc->function);
-
- f = fopen(filename, "w");
- if (f == NULL) /* device was not bound */
- return 0;
-
- n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n",
- loc->domain, loc->bus, loc->devid, loc->function);
- if ((n < 0) || (n >= (int)sizeof(buf))) {
- RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__);
- goto error;
- }
- if (fwrite(buf, n, 1, f) == 0) {
- RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__,
- filename);
- goto error;
- }
-
- fclose(f);
- return 0;
-
-error:
- fclose(f);
- return -1;
-}
+extern struct rte_pci_bus rte_pci_bus;
static int
pci_get_kernel_driver_by_path(const char *filename, char *dri_name)
@@ -124,7 +88,7 @@ pci_get_kernel_driver_by_path(const char *filename, char *dri_name)
/* Map pci device */
int
-rte_eal_pci_map_device(struct rte_pci_device *dev)
+rte_pci_map_device(struct rte_pci_device *dev)
{
int ret = -1;
@@ -138,8 +102,10 @@ rte_eal_pci_map_device(struct rte_pci_device *dev)
break;
case RTE_KDRV_IGB_UIO:
case RTE_KDRV_UIO_GENERIC:
- /* map resources for devices that use uio */
- ret = pci_uio_map_resource(dev);
+ if (rte_eal_using_phys_addrs()) {
+ /* map resources for devices that use uio */
+ ret = pci_uio_map_resource(dev);
+ }
break;
default:
RTE_LOG(DEBUG, EAL,
@@ -153,12 +119,15 @@ rte_eal_pci_map_device(struct rte_pci_device *dev)
/* Unmap pci device */
void
-rte_eal_pci_unmap_device(struct rte_pci_device *dev)
+rte_pci_unmap_device(struct rte_pci_device *dev)
{
/* try unmapping the NIC resources using VFIO if it exists */
switch (dev->kdrv) {
case RTE_KDRV_VFIO:
- RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n");
+#ifdef VFIO_PRESENT
+ if (pci_vfio_is_enabled())
+ pci_vfio_unmap_resource(dev);
+#endif
break;
case RTE_KDRV_IGB_UIO:
case RTE_KDRV_UIO_GENERIC:
@@ -267,8 +236,7 @@ error:
/* Scan one pci sysfs entry, and fill the devices list from it. */
static int
-pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
- uint8_t devid, uint8_t function)
+pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
{
char filename[PATH_MAX];
unsigned long tmp;
@@ -281,10 +249,7 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
return -1;
memset(dev, 0, sizeof(*dev));
- dev->addr.domain = domain;
- dev->addr.bus = bus;
- dev->addr.devid = devid;
- dev->addr.function = function;
+ dev->addr = *addr;
/* get vendor id */
snprintf(filename, sizeof(filename), "%s/vendor", dirname);
@@ -359,6 +324,9 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
dev->device.numa_node = tmp;
}
+ rte_pci_device_name(addr, dev->name, sizeof(dev->name));
+ dev->device.name = dev->name;
+
/* parse resources */
snprintf(filename, sizeof(filename), "%s/resource", dirname);
if (pci_parse_sysfs_resource(filename, dev) < 0) {
@@ -389,21 +357,19 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
dev->kdrv = RTE_KDRV_NONE;
/* device is valid, add in list (sorted) */
- if (TAILQ_EMPTY(&pci_device_list)) {
- rte_eal_device_insert(&dev->device);
- TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
+ if (TAILQ_EMPTY(&rte_pci_bus.device_list)) {
+ rte_pci_add_device(dev);
} else {
struct rte_pci_device *dev2;
int ret;
- TAILQ_FOREACH(dev2, &pci_device_list, next) {
+ TAILQ_FOREACH(dev2, &rte_pci_bus.device_list, next) {
ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr);
if (ret > 0)
continue;
if (ret < 0) {
- TAILQ_INSERT_BEFORE(dev2, dev, next);
- rte_eal_device_insert(&dev->device);
+ rte_pci_insert_device(dev2, dev);
} else { /* already registered */
dev2->kdrv = dev->kdrv;
dev2->max_vfs = dev->max_vfs;
@@ -413,8 +379,8 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
}
return 0;
}
- rte_eal_device_insert(&dev->device);
- TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
+
+ rte_pci_add_device(dev);
}
return 0;
@@ -429,16 +395,14 @@ pci_update_device(const struct rte_pci_addr *addr)
pci_get_sysfs_path(), addr->domain, addr->bus, addr->devid,
addr->function);
- return pci_scan_one(filename, addr->domain, addr->bus, addr->devid,
- addr->function);
+ return pci_scan_one(filename, addr);
}
/*
* split up a pci address into its constituent parts.
*/
static int
-parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain,
- uint8_t *bus, uint8_t *devid, uint8_t *function)
+parse_pci_addr_format(const char *buf, int bufsize, struct rte_pci_addr *addr)
{
/* first split on ':' */
union splitaddr {
@@ -466,10 +430,10 @@ parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain,
/* now convert to int values */
errno = 0;
- *domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16);
- *bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16);
- *devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16);
- *function = (uint8_t)strtoul(splitaddr.function, NULL, 10);
+ addr->domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16);
+ addr->bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16);
+ addr->devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16);
+ addr->function = (uint8_t)strtoul(splitaddr.function, NULL, 10);
if (errno != 0)
goto error;
@@ -485,13 +449,16 @@ error:
* list
*/
int
-rte_eal_pci_scan(void)
+rte_pci_scan(void)
{
struct dirent *e;
DIR *dir;
char dirname[PATH_MAX];
- uint16_t domain;
- uint8_t bus, devid, function;
+ struct rte_pci_addr addr;
+
+ /* for debug purposes, PCI can be disabled */
+ if (internal_config.no_pci)
+ return 0;
dir = opendir(pci_get_sysfs_path());
if (dir == NULL) {
@@ -504,13 +471,13 @@ rte_eal_pci_scan(void)
if (e->d_name[0] == '.')
continue;
- if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain,
- &bus, &devid, &function) != 0)
+ if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &addr) != 0)
continue;
snprintf(dirname, sizeof(dirname), "%s/%s",
pci_get_sysfs_path(), e->d_name);
- if (pci_scan_one(dirname, domain, bus, devid, function) < 0)
+
+ if (pci_scan_one(dirname, &addr) < 0)
goto error;
}
closedir(dir);
@@ -522,8 +489,8 @@ error:
}
/* Read PCI config space. */
-int rte_eal_pci_read_config(const struct rte_pci_device *device,
- void *buf, size_t len, off_t offset)
+int rte_pci_read_config(const struct rte_pci_device *device,
+ void *buf, size_t len, off_t offset)
{
const struct rte_intr_handle *intr_handle = &device->intr_handle;
@@ -547,8 +514,8 @@ int rte_eal_pci_read_config(const struct rte_pci_device *device,
}
/* Write PCI config space. */
-int rte_eal_pci_write_config(const struct rte_pci_device *device,
- const void *buf, size_t len, off_t offset)
+int rte_pci_write_config(const struct rte_pci_device *device,
+ const void *buf, size_t len, off_t offset)
{
const struct rte_intr_handle *intr_handle = &device->intr_handle;
@@ -574,7 +541,7 @@ int rte_eal_pci_write_config(const struct rte_pci_device *device,
#if defined(RTE_ARCH_X86)
static int
pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
- struct rte_pci_ioport *p)
+ struct rte_pci_ioport *p)
{
uint16_t start, end;
FILE *fp;
@@ -632,8 +599,8 @@ pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused,
#endif
int
-rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar,
- struct rte_pci_ioport *p)
+rte_pci_ioport_map(struct rte_pci_device *dev, int bar,
+ struct rte_pci_ioport *p)
{
int ret = -1;
@@ -670,8 +637,8 @@ rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar,
}
void
-rte_eal_pci_ioport_read(struct rte_pci_ioport *p,
- void *data, size_t len, off_t offset)
+rte_pci_ioport_read(struct rte_pci_ioport *p,
+ void *data, size_t len, off_t offset)
{
switch (p->dev->kdrv) {
#ifdef VFIO_PRESENT
@@ -696,8 +663,8 @@ rte_eal_pci_ioport_read(struct rte_pci_ioport *p,
}
void
-rte_eal_pci_ioport_write(struct rte_pci_ioport *p,
- const void *data, size_t len, off_t offset)
+rte_pci_ioport_write(struct rte_pci_ioport *p,
+ const void *data, size_t len, off_t offset)
{
switch (p->dev->kdrv) {
#ifdef VFIO_PRESENT
@@ -722,7 +689,7 @@ rte_eal_pci_ioport_write(struct rte_pci_ioport *p,
}
int
-rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p)
+rte_pci_ioport_unmap(struct rte_pci_ioport *p)
{
int ret = -1;
@@ -754,19 +721,3 @@ rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p)
return ret;
}
-
-/* Init the PCI EAL subsystem */
-int
-rte_eal_pci_init(void)
-{
- /* for debug purposes, PCI can be disabled */
- if (internal_config.no_pci)
- return 0;
-
- if (rte_eal_pci_scan() < 0) {
- RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__);
- return -1;
- }
-
- return 0;
-}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
index 6a960d1b..ae2980d6 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
@@ -88,8 +88,9 @@ void pci_vfio_ioport_write(struct rte_pci_ioport *p,
const void *data, size_t len, off_t offset);
int pci_vfio_ioport_unmap(struct rte_pci_ioport *p);
-/* map VFIO resource prototype */
+/* map/unmap VFIO resource prototype */
int pci_vfio_map_resource(struct rte_pci_device *dev);
+int pci_vfio_unmap_resource(struct rte_pci_device *dev);
#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
index 3e4ffb57..fa10329f 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
@@ -38,6 +38,7 @@
#include <inttypes.h>
#include <sys/stat.h>
#include <sys/mman.h>
+#include <sys/sysmacros.h>
#include <linux/pci_regs.h>
#if defined(RTE_ARCH_X86)
@@ -230,7 +231,7 @@ pci_uio_free_resource(struct rte_pci_device *dev,
close(dev->intr_handle.uio_cfg_fd);
dev->intr_handle.uio_cfg_fd = -1;
}
- if (dev->intr_handle.fd) {
+ if (dev->intr_handle.fd >= 0) {
close(dev->intr_handle.fd);
dev->intr_handle.fd = -1;
dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index 5f478c59..2be13195 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -38,6 +38,7 @@
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
+#include <stdbool.h>
#include <rte_log.h>
#include <rte_pci.h>
@@ -172,7 +173,7 @@ pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset,
/* set PCI bus mastering */
static int
-pci_vfio_set_bus_master(int dev_fd)
+pci_vfio_set_bus_master(int dev_fd, bool op)
{
uint16_t reg;
int ret;
@@ -185,8 +186,11 @@ pci_vfio_set_bus_master(int dev_fd)
return -1;
}
- /* set the master bit */
- reg |= PCI_COMMAND_MASTER;
+ if (op)
+ /* set the master bit */
+ reg |= PCI_COMMAND_MASTER;
+ else
+ reg &= ~(PCI_COMMAND_MASTER);
ret = pwrite64(dev_fd, &reg, sizeof(reg),
VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) +
@@ -355,7 +359,8 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
} else {
/* if we're in a secondary process, just find our tailq entry */
TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
- if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
+ if (rte_eal_compare_pci_addr(&vfio_res->pci_addr,
+ &dev->addr))
continue;
break;
}
@@ -517,7 +522,7 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
}
/* set bus mastering for the device */
- if (pci_vfio_set_bus_master(vfio_dev_fd)) {
+ if (pci_vfio_set_bus_master(vfio_dev_fd, true)) {
RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr);
close(vfio_dev_fd);
rte_free(vfio_res);
@@ -535,6 +540,79 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
}
int
+pci_vfio_unmap_resource(struct rte_pci_device *dev)
+{
+ char pci_addr[PATH_MAX] = {0};
+ struct rte_pci_addr *loc = &dev->addr;
+ int i, ret;
+ struct mapped_pci_resource *vfio_res = NULL;
+ struct mapped_pci_res_list *vfio_res_list;
+
+ struct pci_map *maps;
+
+ /* store PCI address string */
+ snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
+ loc->domain, loc->bus, loc->devid, loc->function);
+
+
+ if (close(dev->intr_handle.fd) < 0) {
+ RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n",
+ pci_addr);
+ return -1;
+ }
+
+ if (pci_vfio_set_bus_master(dev->intr_handle.vfio_dev_fd, false)) {
+ RTE_LOG(ERR, EAL, " %s cannot unset bus mastering for PCI device!\n",
+ pci_addr);
+ return -1;
+ }
+
+ ret = vfio_release_device(pci_get_sysfs_path(), pci_addr,
+ dev->intr_handle.vfio_dev_fd);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL,
+ "%s(): cannot release device\n", __func__);
+ return ret;
+ }
+
+ vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list);
+ /* Get vfio_res */
+ TAILQ_FOREACH(vfio_res, vfio_res_list, next) {
+ if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
+ continue;
+ break;
+ }
+ /* if we haven't found our tailq entry, something's wrong */
+ if (vfio_res == NULL) {
+ RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n",
+ pci_addr);
+ return -1;
+ }
+
+ /* unmap BARs */
+ maps = vfio_res->maps;
+
+ RTE_LOG(INFO, EAL, "Releasing pci mapped resource for %s\n",
+ pci_addr);
+ for (i = 0; i < (int) vfio_res->nb_maps; i++) {
+
+ /*
+ * We do not need to be aware of MSI-X table BAR mappings as
+ * when mapping. Just using current maps array is enough
+ */
+ if (maps[i].addr) {
+ RTE_LOG(INFO, EAL, "Calling pci_unmap_resource for %s at %p\n",
+ pci_addr, maps[i].addr);
+ pci_unmap_resource(maps[i].addr, maps[i].size);
+ }
+ }
+
+ TAILQ_REMOVE(vfio_res_list, vfio_res, next);
+
+ return 0;
+}
+
+int
pci_vfio_ioport_map(struct rte_pci_device *dev, int bar,
struct rte_pci_ioport *p)
{
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index 702f7a2e..53ac725d 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -50,12 +50,15 @@
static struct vfio_config vfio_cfg;
static int vfio_type1_dma_map(int);
+static int vfio_spapr_dma_map(int);
static int vfio_noiommu_dma_map(int);
/* IOMMU types we support */
static const struct vfio_iommu_type iommu_types[] = {
/* x86 IOMMU, otherwise known as type 1 */
{ RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
+ /* ppc64 IOMMU, otherwise known as spapr */
+ { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
/* IOMMU-less mode */
{ RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
};
@@ -65,13 +68,32 @@ vfio_get_group_fd(int iommu_group_no)
{
int i;
int vfio_group_fd;
+ int group_idx = -1;
char filename[PATH_MAX];
/* check if we already have the group descriptor open */
- for (i = 0; i < vfio_cfg.vfio_group_idx; i++)
+ for (i = 0; i < VFIO_MAX_GROUPS; i++)
if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no)
return vfio_cfg.vfio_groups[i].fd;
+ /* Lets see first if there is room for a new group */
+ if (vfio_cfg.vfio_active_groups == VFIO_MAX_GROUPS) {
+ RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
+ return -1;
+ }
+
+ /* Now lets get an index for the new group */
+ for (i = 0; i < VFIO_MAX_GROUPS; i++)
+ if (vfio_cfg.vfio_groups[i].group_no == -1) {
+ group_idx = i;
+ break;
+ }
+
+ /* This should not happen */
+ if (group_idx == -1) {
+ RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
+ return -1;
+ }
/* if primary, try to open the group */
if (internal_config.process_type == RTE_PROC_PRIMARY) {
/* try regular group format */
@@ -101,14 +123,9 @@ vfio_get_group_fd(int iommu_group_no)
/* noiommu group found */
}
- /* if the fd is valid, create a new group for it */
- if (vfio_cfg.vfio_group_idx == VFIO_MAX_GROUPS) {
- RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
- close(vfio_group_fd);
- return -1;
- }
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no;
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd;
+ vfio_cfg.vfio_groups[group_idx].group_no = iommu_group_no;
+ vfio_cfg.vfio_groups[group_idx].fd = vfio_group_fd;
+ vfio_cfg.vfio_active_groups++;
return vfio_group_fd;
}
/* if we're in a secondary process, request group fd from the primary
@@ -155,14 +172,115 @@ vfio_get_group_fd(int iommu_group_no)
return -1;
}
+
+static int
+get_vfio_group_idx(int vfio_group_fd)
+{
+ int i;
+ for (i = 0; i < VFIO_MAX_GROUPS; i++)
+ if (vfio_cfg.vfio_groups[i].fd == vfio_group_fd)
+ return i;
+ return -1;
+}
+
+static void
+vfio_group_device_get(int vfio_group_fd)
+{
+ int i;
+
+ i = get_vfio_group_idx(vfio_group_fd);
+ if (i < 0 || i > VFIO_MAX_GROUPS)
+ RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i);
+ else
+ vfio_cfg.vfio_groups[i].devices++;
+}
+
static void
-clear_current_group(void)
+vfio_group_device_put(int vfio_group_fd)
{
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = 0;
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = -1;
+ int i;
+
+ i = get_vfio_group_idx(vfio_group_fd);
+ if (i < 0 || i > VFIO_MAX_GROUPS)
+ RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i);
+ else
+ vfio_cfg.vfio_groups[i].devices--;
+}
+
+static int
+vfio_group_device_count(int vfio_group_fd)
+{
+ int i;
+
+ i = get_vfio_group_idx(vfio_group_fd);
+ if (i < 0 || i > VFIO_MAX_GROUPS) {
+ RTE_LOG(ERR, EAL, " wrong vfio_group index (%d)\n", i);
+ return -1;
+ }
+
+ return vfio_cfg.vfio_groups[i].devices;
+}
+
+int
+clear_group(int vfio_group_fd)
+{
+ int i;
+ int socket_fd, ret;
+
+ if (internal_config.process_type == RTE_PROC_PRIMARY) {
+
+ i = get_vfio_group_idx(vfio_group_fd);
+ if (i < 0)
+ return -1;
+ vfio_cfg.vfio_groups[i].group_no = -1;
+ vfio_cfg.vfio_groups[i].fd = -1;
+ vfio_cfg.vfio_groups[i].devices = 0;
+ vfio_cfg.vfio_active_groups--;
+ return 0;
+ }
+
+ /* This is just for SECONDARY processes */
+ socket_fd = vfio_mp_sync_connect_to_primary();
+
+ if (socket_fd < 0) {
+ RTE_LOG(ERR, EAL, " cannot connect to primary process!\n");
+ return -1;
+ }
+
+ if (vfio_mp_sync_send_request(socket_fd, SOCKET_CLR_GROUP) < 0) {
+ RTE_LOG(ERR, EAL, " cannot request container fd!\n");
+ close(socket_fd);
+ return -1;
+ }
+
+ if (vfio_mp_sync_send_request(socket_fd, vfio_group_fd) < 0) {
+ RTE_LOG(ERR, EAL, " cannot send group fd!\n");
+ close(socket_fd);
+ return -1;
+ }
+
+ ret = vfio_mp_sync_receive_request(socket_fd);
+ switch (ret) {
+ case SOCKET_NO_FD:
+ RTE_LOG(ERR, EAL, " BAD VFIO group fd!\n");
+ close(socket_fd);
+ break;
+ case SOCKET_OK:
+ close(socket_fd);
+ return 0;
+ case SOCKET_ERR:
+ RTE_LOG(ERR, EAL, " Socket error\n");
+ close(socket_fd);
+ break;
+ default:
+ RTE_LOG(ERR, EAL, " UNKNOWN reply, %d\n", ret);
+ close(socket_fd);
+ }
+ return -1;
}
-int vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+int
+vfio_setup_device(const char *sysfs_base, const char *dev_addr,
int *vfio_dev_fd, struct vfio_device_info *device_info)
{
struct vfio_group_status group_status = {
@@ -189,18 +307,10 @@ int vfio_setup_device(const char *sysfs_base, const char *dev_addr,
if (vfio_group_fd < 0)
return -1;
- /* store group fd */
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no;
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd;
-
/* if group_fd == 0, that means the device isn't managed by VFIO */
if (vfio_group_fd == 0) {
- RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
+ RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
dev_addr);
- /* we store 0 as group fd to distinguish between existing but
- * unbound VFIO groups, and groups that don't exist at all.
- */
- vfio_cfg.vfio_group_idx++;
return 1;
}
@@ -215,12 +325,12 @@ int vfio_setup_device(const char *sysfs_base, const char *dev_addr,
RTE_LOG(ERR, EAL, " %s cannot get group status, "
"error %i (%s)\n", dev_addr, errno, strerror(errno));
close(vfio_group_fd);
- clear_current_group();
+ clear_group(vfio_group_fd);
return -1;
} else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
RTE_LOG(ERR, EAL, " %s VFIO group is not viable!\n", dev_addr);
close(vfio_group_fd);
- clear_current_group();
+ clear_group(vfio_group_fd);
return -1;
}
@@ -234,60 +344,131 @@ int vfio_setup_device(const char *sysfs_base, const char *dev_addr,
RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, "
"error %i (%s)\n", dev_addr, errno, strerror(errno));
close(vfio_group_fd);
- clear_current_group();
+ clear_group(vfio_group_fd);
return -1;
}
+
/*
- * at this point we know that this group has been successfully
- * initialized, so we increment vfio_group_idx to indicate that we can
- * add new groups.
+ * pick an IOMMU type and set up DMA mappings for container
+ *
+ * needs to be done only once, only when first group is
+ * assigned to a container and only in primary process.
+ * Note this can happen several times with the hotplug
+ * functionality.
*/
- vfio_cfg.vfio_group_idx++;
- }
-
- /*
- * pick an IOMMU type and set up DMA mappings for container
- *
- * needs to be done only once, only when at least one group is assigned to
- * a container and only in primary process
- */
- if (internal_config.process_type == RTE_PROC_PRIMARY &&
- vfio_cfg.vfio_container_has_dma == 0) {
- /* select an IOMMU type which we will be using */
- const struct vfio_iommu_type *t =
+ if (internal_config.process_type == RTE_PROC_PRIMARY &&
+ vfio_cfg.vfio_active_groups == 1) {
+ /* select an IOMMU type which we will be using */
+ const struct vfio_iommu_type *t =
vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
- if (!t) {
- RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", dev_addr);
- return -1;
- }
- ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
- if (ret) {
- RTE_LOG(ERR, EAL, " %s DMA remapping failed, "
- "error %i (%s)\n", dev_addr, errno, strerror(errno));
- return -1;
+ if (!t) {
+ RTE_LOG(ERR, EAL,
+ " %s failed to select IOMMU type\n",
+ dev_addr);
+ close(vfio_group_fd);
+ clear_group(vfio_group_fd);
+ return -1;
+ }
+ ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
+ if (ret) {
+ RTE_LOG(ERR, EAL,
+ " %s DMA remapping failed, error %i (%s)\n",
+ dev_addr, errno, strerror(errno));
+ close(vfio_group_fd);
+ clear_group(vfio_group_fd);
+ return -1;
+ }
}
- vfio_cfg.vfio_container_has_dma = 1;
}
/* get a file descriptor for the device */
*vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr);
if (*vfio_dev_fd < 0) {
- /* if we cannot get a device fd, this simply means that this
- * particular port is not bound to VFIO
- */
- RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
+ /* if we cannot get a device fd, this implies a problem with
+ * the VFIO group or the container not having IOMMU configured.
+ */
+
+ RTE_LOG(WARNING, EAL, "Getting a vfio_dev_fd for %s failed\n",
dev_addr);
- return 1;
+ close(vfio_group_fd);
+ clear_group(vfio_group_fd);
+ return -1;
}
/* test and setup the device */
ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO, device_info);
if (ret) {
RTE_LOG(ERR, EAL, " %s cannot get device info, "
- "error %i (%s)\n", dev_addr, errno, strerror(errno));
+ "error %i (%s)\n", dev_addr, errno,
+ strerror(errno));
close(*vfio_dev_fd);
+ close(vfio_group_fd);
+ clear_group(vfio_group_fd);
return -1;
}
+ vfio_group_device_get(vfio_group_fd);
+
+ return 0;
+}
+
+int
+vfio_release_device(const char *sysfs_base, const char *dev_addr,
+ int vfio_dev_fd)
+{
+ struct vfio_group_status group_status = {
+ .argsz = sizeof(group_status)
+ };
+ int vfio_group_fd;
+ int iommu_group_no;
+ int ret;
+
+ /* get group number */
+ ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no);
+ if (ret <= 0) {
+ RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver\n",
+ dev_addr);
+ /* This is an error at this point. */
+ return -1;
+ }
+
+ /* get the actual group fd */
+ vfio_group_fd = vfio_get_group_fd(iommu_group_no);
+ if (vfio_group_fd <= 0) {
+ RTE_LOG(INFO, EAL, "vfio_get_group_fd failed for %s\n",
+ dev_addr);
+ return -1;
+ }
+
+ /* At this point we got an active group. Closing it will make the
+ * container detachment. If this is the last active group, VFIO kernel
+ * code will unset the container and the IOMMU mappings.
+ */
+
+ /* Closing a device */
+ if (close(vfio_dev_fd) < 0) {
+ RTE_LOG(INFO, EAL, "Error when closing vfio_dev_fd for %s\n",
+ dev_addr);
+ return -1;
+ }
+
+ /* An VFIO group can have several devices attached. Just when there is
+ * no devices remaining should the group be closed.
+ */
+ vfio_group_device_put(vfio_group_fd);
+ if (!vfio_group_device_count(vfio_group_fd)) {
+
+ if (close(vfio_group_fd) < 0) {
+ RTE_LOG(INFO, EAL, "Error when closing vfio_group_fd for %s\n",
+ dev_addr);
+ return -1;
+ }
+
+ if (clear_group(vfio_group_fd) < 0) {
+ RTE_LOG(INFO, EAL, "Error when clearing group for %s\n",
+ dev_addr);
+ return -1;
+ }
+ }
return 0;
}
@@ -302,6 +483,7 @@ vfio_enable(const char *modname)
for (i = 0; i < VFIO_MAX_GROUPS; i++) {
vfio_cfg.vfio_groups[i].fd = -1;
vfio_cfg.vfio_groups[i].group_no = -1;
+ vfio_cfg.vfio_groups[i].devices = 0;
}
/* inform the user that we are probing for VFIO */
@@ -531,7 +713,8 @@ vfio_type1_dma_map(int vfio_container_fd)
if (ret) {
RTE_LOG(ERR, EAL, " cannot set up DMA remapping, "
- "error %i (%s)\n", errno, strerror(errno));
+ "error %i (%s)\n", errno,
+ strerror(errno));
return -1;
}
}
@@ -540,6 +723,93 @@ vfio_type1_dma_map(int vfio_container_fd)
}
static int
+vfio_spapr_dma_map(int vfio_container_fd)
+{
+ const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+ int i, ret;
+
+ struct vfio_iommu_spapr_register_memory reg = {
+ .argsz = sizeof(reg),
+ .flags = 0
+ };
+ struct vfio_iommu_spapr_tce_info info = {
+ .argsz = sizeof(info),
+ };
+ struct vfio_iommu_spapr_tce_create create = {
+ .argsz = sizeof(create),
+ };
+ struct vfio_iommu_spapr_tce_remove remove = {
+ .argsz = sizeof(remove),
+ };
+
+ /* query spapr iommu info */
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot get iommu info, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* remove default DMA of 32 bit window */
+ remove.start_addr = info.dma32_window_start;
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot remove default DMA window, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* calculate window size based on number of hugepages configured */
+ create.window_size = rte_eal_get_physmem_size();
+ create.page_shift = __builtin_ctzll(ms->hugepage_sz);
+ create.levels = 2;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot create new DMA window, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ struct vfio_iommu_type1_dma_map dma_map;
+
+ if (ms[i].addr == NULL)
+ break;
+
+ reg.vaddr = (uintptr_t) ms[i].addr;
+ reg.size = ms[i].len;
+ ret = ioctl(vfio_container_fd,
+ VFIO_IOMMU_SPAPR_REGISTER_MEMORY, &reg);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot register vaddr for IOMMU, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ memset(&dma_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = ms[i].addr_64;
+ dma_map.size = ms[i].len;
+ dma_map.iova = ms[i].phys_addr;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ |
+ VFIO_DMA_MAP_FLAG_WRITE;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot set up DMA remapping, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ }
+
+ return 0;
+}
+
+static int
vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
{
/* No-IOMMU mode does not need DMA mapping */
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index 29f7f3ec..5ff63e5d 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -54,6 +54,62 @@
#define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU
+#ifndef VFIO_SPAPR_TCE_v2_IOMMU
+#define RTE_VFIO_SPAPR 7
+#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17)
+#define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19)
+#define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20)
+
+struct vfio_iommu_spapr_register_memory {
+ uint32_t argsz;
+ uint32_t flags;
+ uint64_t vaddr;
+ uint64_t size;
+};
+
+struct vfio_iommu_spapr_tce_create {
+ uint32_t argsz;
+ uint32_t flags;
+ /* in */
+ uint32_t page_shift;
+ uint32_t __resv1;
+ uint64_t window_size;
+ uint32_t levels;
+ uint32_t __resv2;
+ /* out */
+ uint64_t start_addr;
+};
+
+struct vfio_iommu_spapr_tce_remove {
+ uint32_t argsz;
+ uint32_t flags;
+ /* in */
+ uint64_t start_addr;
+};
+
+struct vfio_iommu_spapr_tce_ddw_info {
+ uint64_t pgsizes;
+ uint32_t max_dynamic_windows_supported;
+ uint32_t levels;
+};
+
+/* SPAPR_v2 is not present, but SPAPR might be */
+#ifndef VFIO_SPAPR_TCE_IOMMU
+#define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
+
+struct vfio_iommu_spapr_tce_info {
+ uint32_t argsz;
+ uint32_t flags;
+ uint32_t dma32_window_start;
+ uint32_t dma32_window_size;
+ struct vfio_iommu_spapr_tce_ddw_info ddw;
+};
+#endif /* VFIO_SPAPR_TCE_IOMMU */
+
+#else /* VFIO_SPAPR_TCE_v2_IOMMU */
+#define RTE_VFIO_SPAPR VFIO_SPAPR_TCE_v2_IOMMU
+#endif
+
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0)
#define RTE_VFIO_NOIOMMU 8
#else
@@ -78,13 +134,13 @@ int vfio_mp_sync_connect_to_primary(void);
struct vfio_group {
int group_no;
int fd;
+ int devices;
};
struct vfio_config {
int vfio_enabled;
int vfio_container_fd;
- int vfio_container_has_dma;
- int vfio_group_idx;
+ int vfio_active_groups;
struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
};
@@ -130,6 +186,10 @@ vfio_get_group_no(const char *sysfs_base,
int
vfio_get_group_fd(int iommu_group_no);
+/* remove group fd from internal VFIO group fd array */
+int
+clear_group(int vfio_group_fd);
+
/**
* Setup vfio_cfg for the device identified by its address. It discovers
* the configured I/O MMU groups or sets a new one for the device. If a new
@@ -140,6 +200,8 @@ vfio_get_group_fd(int iommu_group_no);
int vfio_setup_device(const char *sysfs_base, const char *dev_addr,
int *vfio_dev_fd, struct vfio_device_info *device_info);
+int vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd);
+
int vfio_enable(const char *modname);
int vfio_is_enabled(const char *modname);
@@ -150,6 +212,7 @@ int vfio_mp_sync_setup(void);
#define SOCKET_REQ_CONTAINER 0x100
#define SOCKET_REQ_GROUP 0x200
+#define SOCKET_CLR_GROUP 0x300
#define SOCKET_OK 0x0
#define SOCKET_NO_FD 0x1
#define SOCKET_ERR 0xFF
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
index fb4a2f84..7e8095cb 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
@@ -267,7 +267,7 @@ vfio_mp_sync_connect_to_primary(void)
static __attribute__((noreturn)) void *
vfio_mp_sync_thread(void __rte_unused * arg)
{
- int ret, fd, vfio_group_no;
+ int ret, fd, vfio_data;
/* wait for requests on the socket */
for (;;) {
@@ -305,13 +305,13 @@ vfio_mp_sync_thread(void __rte_unused * arg)
break;
case SOCKET_REQ_GROUP:
/* wait for group number */
- vfio_group_no = vfio_mp_sync_receive_request(conn_sock);
- if (vfio_group_no < 0) {
+ vfio_data = vfio_mp_sync_receive_request(conn_sock);
+ if (vfio_data < 0) {
close(conn_sock);
continue;
}
- fd = vfio_get_group_fd(vfio_group_no);
+ fd = vfio_get_group_fd(vfio_data);
if (fd < 0)
vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
@@ -324,6 +324,21 @@ vfio_mp_sync_thread(void __rte_unused * arg)
vfio_mp_sync_send_fd(conn_sock, fd);
}
break;
+ case SOCKET_CLR_GROUP:
+ /* wait for group fd */
+ vfio_data = vfio_mp_sync_receive_request(conn_sock);
+ if (vfio_data < 0) {
+ close(conn_sock);
+ continue;
+ }
+
+ ret = clear_group(vfio_data);
+
+ if (ret < 0)
+ vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD);
+ else
+ vfio_mp_sync_send_request(conn_sock, SOCKET_OK);
+ break;
default:
vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
break;
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
index d459bf48..6daffebf 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h
@@ -49,8 +49,9 @@ enum rte_intr_handle_type {
RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */
RTE_INTR_HANDLE_VFIO_MSI, /**< vfio device handle (MSI) */
RTE_INTR_HANDLE_VFIO_MSIX, /**< vfio device handle (MSIX) */
- RTE_INTR_HANDLE_ALARM, /**< alarm handle */
- RTE_INTR_HANDLE_EXT, /**< external handler */
+ RTE_INTR_HANDLE_ALARM, /**< alarm handle */
+ RTE_INTR_HANDLE_EXT, /**< external handler */
+ RTE_INTR_HANDLE_VDEV, /**< virtual device */
RTE_INTR_HANDLE_MAX
};
@@ -171,6 +172,15 @@ rte_intr_rx_ctl(struct rte_intr_handle *intr_handle,
int epfd, int op, unsigned int vec, void *data);
/**
+ * It deletes registered eventfds.
+ *
+ * @param intr_handle
+ * Pointer to the interrupt handle.
+ */
+void
+rte_intr_free_epoll_fd(struct rte_intr_handle *intr_handle);
+
+/**
* It enables the packet I/O interrupt event if it's necessary.
* It creates event fd for each interrupt vector when MSIX is used,
* otherwise it multiplexes a single event fd.
diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
index 09713b0c..2ac879fd 100644
--- a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
+++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
@@ -116,11 +116,10 @@ struct rte_kni_fifo {
struct rte_kni_mbuf {
void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE)));
uint64_t buf_physaddr;
- char pad0[2];
uint16_t data_off; /**< Start address of data in segment buffer. */
char pad1[2];
- uint8_t nb_segs; /**< Number of segments. */
- char pad4[1];
+ uint16_t nb_segs; /**< Number of segments. */
+ char pad4[2];
uint64_t ol_flags; /**< Offload features. */
char pad2[4];
uint32_t pkt_len; /**< Total pkt len: sum of all segment data_len. */
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 83721ba5..670bab3a 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -6,8 +6,6 @@ DPDK_2.0 {
eal_parse_sysfs_value;
eal_timer_source;
lcore_config;
- pci_device_list;
- pci_driver_list;
per_lcore__lcore_id;
per_lcore__rte_errno;
rte_calloc;
@@ -22,12 +20,9 @@ DPDK_2.0 {
rte_dump_tailq;
rte_eal_alarm_cancel;
rte_eal_alarm_set;
- rte_eal_dev_init;
rte_eal_devargs_add;
rte_eal_devargs_dump;
rte_eal_devargs_type_count;
- rte_eal_driver_register;
- rte_eal_driver_unregister;
rte_eal_get_configuration;
rte_eal_get_lcore_state;
rte_eal_get_physmem_layout;
@@ -40,18 +35,10 @@ DPDK_2.0 {
rte_eal_mp_remote_launch;
rte_eal_mp_wait_lcore;
rte_eal_parse_devargs_str;
- rte_eal_pci_dump;
- rte_eal_pci_probe;
- rte_eal_pci_probe_one;
- rte_eal_pci_register;
- rte_eal_pci_scan;
- rte_eal_pci_unregister;
rte_eal_process_type;
rte_eal_remote_launch;
rte_eal_tailq_lookup;
rte_eal_tailq_register;
- rte_eal_vdev_init;
- rte_eal_vdev_uninit;
rte_eal_wait_lcore;
rte_exit;
rte_free;
@@ -66,11 +53,8 @@ DPDK_2.0 {
rte_intr_disable;
rte_intr_enable;
rte_log;
- rte_log_add_in_history;
rte_log_cur_msg_loglevel;
rte_log_cur_msg_logtype;
- rte_log_dump_history;
- rte_log_set_history;
rte_logs;
rte_malloc;
rte_malloc_dump_stats;
@@ -114,9 +98,6 @@ DPDK_2.0 {
DPDK_2.1 {
global:
- rte_eal_pci_detach;
- rte_eal_pci_read_config;
- rte_eal_pci_write_config;
rte_epoll_ctl;
rte_epoll_wait;
rte_intr_allow_others;
@@ -146,12 +127,6 @@ DPDK_16.04 {
global:
rte_cpu_get_flag_name;
- rte_eal_pci_ioport_map;
- rte_eal_pci_ioport_read;
- rte_eal_pci_ioport_unmap;
- rte_eal_pci_ioport_write;
- rte_eal_pci_map_device;
- rte_eal_pci_unmap_device;
rte_eal_primary_proc_alive;
} DPDK_2.2;
@@ -174,7 +149,52 @@ DPDK_16.11 {
rte_delay_us_callback_register;
rte_eal_dev_attach;
rte_eal_dev_detach;
- rte_eal_vdrv_register;
- rte_eal_vdrv_unregister;
} DPDK_16.07;
+
+DPDK_17.02 {
+ global:
+
+ rte_bus_dump;
+ rte_bus_probe;
+ rte_bus_register;
+ rte_bus_scan;
+ rte_bus_unregister;
+
+} DPDK_16.11;
+
+DPDK_17.05 {
+ global:
+
+ rte_cpu_is_supported;
+ rte_intr_free_epoll_fd;
+ rte_log_dump;
+ rte_log_get_global_level;
+ rte_log_register;
+ rte_log_set_global_level;
+ rte_log_set_level;
+ rte_log_set_level_regexp;
+ rte_pci_detach;
+ rte_pci_dump;
+ rte_pci_ioport_map;
+ rte_pci_ioport_read;
+ rte_pci_ioport_unmap;
+ rte_pci_ioport_write;
+ rte_pci_map_device;
+ rte_pci_probe;
+ rte_pci_probe_one;
+ rte_pci_read_config;
+ rte_pci_register;
+ rte_pci_scan;
+ rte_pci_unmap_device;
+ rte_pci_unregister;
+ rte_pci_write_config;
+ rte_vdev_init;
+ rte_vdev_register;
+ rte_vdev_uninit;
+ rte_vdev_unregister;
+ vfio_get_container_fd;
+ vfio_get_group_fd;
+ vfio_get_group_no;
+
+} DPDK_17.02;
diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h
index 0d781e48..b800a53c 100644
--- a/lib/librte_eal/linuxapp/igb_uio/compat.h
+++ b/lib/librte_eal/linuxapp/igb_uio/compat.h
@@ -123,3 +123,7 @@ static bool pci_check_and_mask_intx(struct pci_dev *pdev)
}
#endif /* < 3.3.0 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)
+#define HAVE_PCI_ENABLE_MSIX
+#endif
diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
index df41e457..b9d427c5 100644
--- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
+++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
@@ -314,7 +314,7 @@ igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info)
}
}
- return (iom != 0) ? ret : -ENOENT;
+ return (iom != 0 || iop != 0) ? ret : -ENOENT;
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0)
@@ -325,7 +325,11 @@ static int
igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
struct rte_uio_pci_dev *udev;
+#ifdef HAVE_PCI_ENABLE_MSIX
struct msix_entry msix_entry;
+#endif
+ dma_addr_t map_dma_addr;
+ void *map_addr;
int err;
udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL);
@@ -379,18 +383,28 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
switch (igbuio_intr_mode_preferred) {
case RTE_INTR_MODE_MSIX:
/* Only 1 msi-x vector needed */
+#ifdef HAVE_PCI_ENABLE_MSIX
msix_entry.entry = 0;
if (pci_enable_msix(dev, &msix_entry, 1) == 0) {
dev_dbg(&dev->dev, "using MSI-X");
+ udev->info.irq_flags = IRQF_NO_THREAD;
udev->info.irq = msix_entry.vector;
udev->mode = RTE_INTR_MODE_MSIX;
break;
}
+#else
+ if (pci_alloc_irq_vectors(dev, 1, 1, PCI_IRQ_MSIX) == 1) {
+ dev_dbg(&dev->dev, "using MSI-X");
+ udev->info.irq = pci_irq_vector(dev, 0);
+ udev->mode = RTE_INTR_MODE_MSIX;
+ break;
+ }
+#endif
/* fall back to INTX */
case RTE_INTR_MODE_LEGACY:
if (pci_intx_mask_supported(dev)) {
dev_dbg(&dev->dev, "using INTX");
- udev->info.irq_flags = IRQF_SHARED;
+ udev->info.irq_flags = IRQF_SHARED | IRQF_NO_THREAD;
udev->info.irq = dev->irq;
udev->mode = RTE_INTR_MODE_LEGACY;
break;
@@ -423,6 +437,27 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
dev_info(&dev->dev, "uio device registered with irq %lx\n",
udev->info.irq);
+ /*
+ * Doing a harmless dma mapping for attaching the device to
+ * the iommu identity mapping if kernel boots with iommu=pt.
+ * Note this is not a problem if no IOMMU at all.
+ */
+ map_addr = dma_alloc_coherent(&dev->dev, 1024, &map_dma_addr,
+ GFP_KERNEL);
+ if (map_addr)
+ memset(map_addr, 0, 1024);
+
+ if (!map_addr)
+ dev_info(&dev->dev, "dma mapping failed\n");
+ else {
+ dev_info(&dev->dev, "mapping 1K dma=%#llx host=%p\n",
+ (unsigned long long)map_dma_addr, map_addr);
+
+ dma_free_coherent(&dev->dev, 1024, map_addr, map_dma_addr);
+ dev_info(&dev->dev, "unmapping 1K dma=%#llx host=%p\n",
+ (unsigned long long)map_dma_addr, map_addr);
+ }
+
return 0;
fail_remove_group:
diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile
index 4e99e07e..154c528d 100644
--- a/lib/librte_eal/linuxapp/kni/Makefile
+++ b/lib/librte_eal/linuxapp/kni/Makefile
@@ -44,45 +44,43 @@ MODULE_CFLAGS += -I$(RTE_OUTPUT)/include -I$(SRCDIR)/ethtool/ixgbe -I$(SRCDIR)/e
MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
MODULE_CFLAGS += -Wall -Werror
-ifeq ($(shell lsb_release -si 2>/dev/null),Ubuntu)
-MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(shell lsb_release -sr | tr -d .)
+-include /etc/lsb-release
+
+ifeq ($(DISTRIB_ID),Ubuntu)
+MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(subst .,,$(DISTRIB_RELEASE))
UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE $(RTE_KERNELDIR)/include/generated/utsrelease.h \
| cut -d '"' -f2 | cut -d- -f1,2 | tr .- ,`,1)
MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))"
endif
-# this lib needs main eal
-DEPDIRS-y += lib/librte_eal/linuxapp/eal
-
#
# all source are stored in SRCS-y
#
-SRCS-y := ethtool/ixgbe/ixgbe_main.c
-SRCS-y += ethtool/ixgbe/ixgbe_api.c
-SRCS-y += ethtool/ixgbe/ixgbe_common.c
-SRCS-y += ethtool/ixgbe/ixgbe_ethtool.c
-SRCS-y += ethtool/ixgbe/ixgbe_82599.c
-SRCS-y += ethtool/ixgbe/ixgbe_82598.c
-SRCS-y += ethtool/ixgbe/ixgbe_x540.c
-SRCS-y += ethtool/ixgbe/ixgbe_phy.c
-SRCS-y += ethtool/ixgbe/kcompat.c
+SRCS-y := kni_misc.c
+SRCS-y += kni_net.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += kni_ethtool.c
-SRCS-y += ethtool/igb/e1000_82575.c
-SRCS-y += ethtool/igb/e1000_i210.c
-SRCS-y += ethtool/igb/e1000_api.c
-SRCS-y += ethtool/igb/e1000_mac.c
-SRCS-y += ethtool/igb/e1000_manage.c
-SRCS-y += ethtool/igb/e1000_mbx.c
-SRCS-y += ethtool/igb/e1000_nvm.c
-SRCS-y += ethtool/igb/e1000_phy.c
-SRCS-y += ethtool/igb/igb_ethtool.c
-SRCS-y += ethtool/igb/igb_main.c
-SRCS-y += ethtool/igb/igb_param.c
-SRCS-y += ethtool/igb/igb_vmdq.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_main.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_api.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_common.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_ethtool.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_82599.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_82598.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_x540.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/ixgbe_phy.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/ixgbe/kcompat.c
-SRCS-y += kni_misc.c
-SRCS-y += kni_net.c
-SRCS-y += kni_ethtool.c
-SRCS-$(CONFIG_RTE_KNI_VHOST) += kni_vhost.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_82575.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_i210.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_api.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_mac.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_manage.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_mbx.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_nvm.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/e1000_phy.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_ethtool.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_main.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_param.c
+SRCS-$(CONFIG_RTE_KNI_KMOD_ETHTOOL) += ethtool/igb/igb_vmdq.c
include $(RTE_SDK)/mk/rte.module.mk
diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h
index 78da08e5..d96275af 100644
--- a/lib/librte_eal/linuxapp/kni/compat.h
+++ b/lib/librte_eal/linuxapp/kni/compat.h
@@ -2,6 +2,8 @@
* Minimal wrappers to allow compiling kni on older kernels.
*/
+#include <linux/version.h>
+
#ifndef RHEL_RELEASE_VERSION
#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b))
#endif
@@ -67,3 +69,7 @@
(LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34)))
#undef NET_NAME_UNKNOWN
#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
+#define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER
+#endif
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
index d7a987d5..95e262b7 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c
@@ -1126,7 +1126,7 @@ static int igb_eeprom_test(struct igb_adapter *adapter, u64 *data)
static irqreturn_t igb_test_intr(int irq, void *data)
{
- struct igb_adapter *adapter = (struct igb_adapter *) data;
+ struct igb_adapter *adapter = data;
struct e1000_hw *hw = &adapter->hw;
adapter->test_icr |= E1000_READ_REG(hw, E1000_ICR);
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
index f4dca5a3..5f1f3a6b 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c
@@ -1031,8 +1031,15 @@ static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix)
for (i = 0; i < numvecs; i++)
adapter->msix_entries[i].entry = i;
+#ifdef HAVE_PCI_ENABLE_MSIX
err = pci_enable_msix(pdev,
adapter->msix_entries, numvecs);
+#else
+ err = pci_enable_msix_range(pdev,
+ adapter->msix_entries,
+ numvecs,
+ numvecs);
+#endif
if (err == 0)
break;
}
@@ -1629,7 +1636,7 @@ static void igb_check_swap_media(struct igb_adapter *adapter)
*/
static int igb_get_i2c_data(void *data)
{
- struct igb_adapter *adapter = (struct igb_adapter *)data;
+ struct igb_adapter *adapter = data;
struct e1000_hw *hw = &adapter->hw;
s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
@@ -1644,7 +1651,7 @@ static int igb_get_i2c_data(void *data)
*/
static void igb_set_i2c_data(void *data, int state)
{
- struct igb_adapter *adapter = (struct igb_adapter *)data;
+ struct igb_adapter *adapter = data;
struct e1000_hw *hw = &adapter->hw;
s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
@@ -1669,7 +1676,7 @@ static void igb_set_i2c_data(void *data, int state)
*/
static void igb_set_i2c_clk(void *data, int state)
{
- struct igb_adapter *adapter = (struct igb_adapter *)data;
+ struct igb_adapter *adapter = data;
struct e1000_hw *hw = &adapter->hw;
s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
@@ -1691,7 +1698,7 @@ static void igb_set_i2c_clk(void *data, int state)
*/
static int igb_get_i2c_clk(void *data)
{
- struct igb_adapter *adapter = (struct igb_adapter *)data;
+ struct igb_adapter *adapter = data;
struct e1000_hw *hw = &adapter->hw;
s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS);
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
index 84826b26..4c52da3c 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
+++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h
@@ -710,6 +710,9 @@ struct _kc_ethtool_pauseparam {
#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) )
/* SLES12 is at least 3.12.28+ based */
#define SLE_VERSION_CODE SLE_VERSION(12,0,0)
+#elif (LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 57))
+/* SLES12SP3 is at least 4.4.57+ based */
+#define SLE_VERSION_CODE SLE_VERSION(12, 3, 0)
#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */
#endif /* CONFIG_SUSE_KERNEL */
#ifndef SLE_VERSION_CODE
@@ -3929,8 +3932,13 @@ skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type)
#define vlan_tx_tag_present skb_vlan_tag_present
#endif
-#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,9,0) )
+#if ((LINUX_VERSION_CODE >= KERNEL_VERSION(4, 9, 0)) || \
+ (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(12, 3, 0)))
#define HAVE_VF_VLAN_PROTO
-#endif /* >= 4.9.0 */
+#endif /* >= 4.9.0, >= SLES12SP3 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 8, 0)
+#define HAVE_PCI_ENABLE_MSIX
+#endif
#endif /* _KCOMPAT_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
index bc3cb2f4..cdfcb959 100644
--- a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
+++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c
@@ -1462,7 +1462,7 @@ static int ixgbe_eeprom_test(struct ixgbe_adapter *adapter, u64 *data)
static irqreturn_t ixgbe_test_intr(int irq, void *data)
{
- struct net_device *netdev = (struct net_device *) data;
+ struct net_device *netdev = data;
struct ixgbe_adapter *adapter = netdev_priv(netdev);
adapter->test_icr |= IXGBE_READ_REG(&adapter->hw, IXGBE_EICR);
@@ -2447,7 +2447,7 @@ static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
break;
case ETHTOOL_GRXCLSRLALL:
ret = ixgbe_get_ethtool_fdir_all(adapter, cmd,
- (u32 *)rule_locs);
+ rule_locs);
break;
case ETHTOOL_GRXFH:
ret = ixgbe_get_rss_hash_opts(adapter, cmd);
diff --git a/lib/librte_eal/linuxapp/kni/kni_dev.h b/lib/librte_eal/linuxapp/kni/kni_dev.h
index 58cbadd3..72385ab4 100644
--- a/lib/librte_eal/linuxapp/kni/kni_dev.h
+++ b/lib/librte_eal/linuxapp/kni/kni_dev.h
@@ -30,17 +30,19 @@
#endif
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include "compat.h"
+
#include <linux/if.h>
#include <linux/wait.h>
+#ifdef HAVE_SIGNAL_FUNCTIONS_OWN_HEADER
+#include <linux/sched/signal.h>
+#else
#include <linux/sched.h>
+#endif
#include <linux/netdevice.h>
#include <linux/spinlock.h>
#include <linux/list.h>
-#ifdef RTE_KNI_VHOST
-#include <net/sock.h>
-#endif
-
#include <exec-env/rte_kni_common.h>
#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
@@ -102,15 +104,6 @@ struct kni_dev {
/* synchro for request processing */
unsigned long synchro;
-#ifdef RTE_KNI_VHOST
- struct kni_vhost_queue *vhost_queue;
-
- volatile enum {
- BE_STOP = 0x1,
- BE_START = 0x2,
- BE_FINISH = 0x4,
- } vq_status;
-#endif
/* buffers */
void *pa[MBUF_BURST_SZ];
void *va[MBUF_BURST_SZ];
@@ -118,26 +111,6 @@ struct kni_dev {
void *alloc_va[MBUF_BURST_SZ];
};
-#ifdef RTE_KNI_VHOST
-uint32_t
-kni_poll(struct file *file, struct socket *sock, poll_table * wait);
-int kni_chk_vhost_rx(struct kni_dev *kni);
-int kni_vhost_init(struct kni_dev *kni);
-int kni_vhost_backend_release(struct kni_dev *kni);
-
-struct kni_vhost_queue {
- struct sock sk;
- struct socket *sock;
- int vnet_hdr_sz;
- struct kni_dev *kni;
- int sockfd;
- uint32_t flags;
- struct sk_buff *cache;
- struct rte_kni_fifo *fifo;
-};
-
-#endif
-
void kni_net_rx(struct kni_dev *kni);
void kni_net_init(struct net_device *dev);
void kni_net_config_lo_mode(char *lo_str);
diff --git a/lib/librte_eal/linuxapp/kni/kni_fifo.h b/lib/librte_eal/linuxapp/kni/kni_fifo.h
index 025ec1c9..14f4141f 100644
--- a/lib/librte_eal/linuxapp/kni/kni_fifo.h
+++ b/lib/librte_eal/linuxapp/kni/kni_fifo.h
@@ -91,18 +91,4 @@ kni_fifo_free_count(struct rte_kni_fifo *fifo)
return (fifo->read - fifo->write - 1) & (fifo->len - 1);
}
-#ifdef RTE_KNI_VHOST
-/**
- * Initializes the kni fifo structure
- */
-static inline void
-kni_fifo_init(struct rte_kni_fifo *fifo, uint32_t size)
-{
- fifo->write = 0;
- fifo->read = 0;
- fifo->len = size;
- fifo->elem_size = sizeof(void *);
-}
-#endif
-
#endif /* _KNI_FIFO_H_ */
diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c
index 497db9bd..7590f1fd 100644
--- a/lib/librte_eal/linuxapp/kni/kni_misc.c
+++ b/lib/librte_eal/linuxapp/kni/kni_misc.c
@@ -140,11 +140,7 @@ kni_thread_single(void *data)
down_read(&knet->kni_list_lock);
for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
list_for_each_entry(dev, &knet->kni_list_head, list) {
-#ifdef RTE_KNI_VHOST
- kni_chk_vhost_rx(dev);
-#else
kni_net_rx(dev);
-#endif
kni_net_poll_resp(dev);
}
}
@@ -163,15 +159,11 @@ static int
kni_thread_multiple(void *param)
{
int j;
- struct kni_dev *dev = (struct kni_dev *)param;
+ struct kni_dev *dev = param;
while (!kthread_should_stop()) {
for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
-#ifdef RTE_KNI_VHOST
- kni_chk_vhost_rx(dev);
-#else
kni_net_rx(dev);
-#endif
kni_net_poll_resp(dev);
}
#ifdef RTE_KNI_PREEMPT_DEFAULT
@@ -205,12 +197,14 @@ kni_dev_remove(struct kni_dev *dev)
if (!dev)
return -ENODEV;
+#ifdef RTE_KNI_KMOD_ETHTOOL
if (dev->pci_dev) {
if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev))
ixgbe_kni_remove(dev->pci_dev);
else if (pci_match_id(igb_pci_tbl, dev->pci_dev))
igb_kni_remove(dev->pci_dev);
}
+#endif
if (dev->net_dev) {
unregister_netdev(dev->net_dev);
@@ -246,9 +240,6 @@ kni_release(struct inode *inode, struct file *file)
dev->pthread = NULL;
}
-#ifdef RTE_KNI_VHOST
- kni_vhost_backend_release(dev);
-#endif
kni_dev_remove(dev);
list_del(&dev->list);
}
@@ -326,11 +317,13 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
struct kni_net *knet = net_generic(net, kni_net_id);
int ret;
struct rte_kni_device_info dev_info;
- struct pci_dev *pci = NULL;
- struct pci_dev *found_pci = NULL;
struct net_device *net_dev = NULL;
- struct net_device *lad_dev = NULL;
struct kni_dev *kni, *dev, *n;
+#ifdef RTE_KNI_KMOD_ETHTOOL
+ struct pci_dev *found_pci = NULL;
+ struct net_device *lad_dev = NULL;
+ struct pci_dev *pci = NULL;
+#endif
pr_info("Creating kni...\n");
/* Check the buffer size, to avoid warning */
@@ -344,6 +337,12 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
return -EIO;
}
+ /* Check if name is zero-ended */
+ if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) {
+ pr_err("kni.name not zero-terminated");
+ return -EINVAL;
+ }
+
/**
* Check if the cpu core id is valid for binding.
*/
@@ -363,8 +362,8 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
up_read(&knet->kni_list_lock);
net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
-#ifdef NET_NAME_UNKNOWN
- NET_NAME_UNKNOWN,
+#ifdef NET_NAME_USER
+ NET_NAME_USER,
#endif
kni_net_init);
if (net_dev == NULL) {
@@ -392,10 +391,6 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
kni->sync_va = dev_info.sync_va;
kni->sync_kva = phys_to_virt(dev_info.sync_phys);
-#ifdef RTE_KNI_VHOST
- kni->vhost_queue = NULL;
- kni->vq_status = BE_STOP;
-#endif
kni->mbuf_size = dev_info.mbuf_size;
pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n",
@@ -418,7 +413,7 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
dev_info.function,
dev_info.vendor_id,
dev_info.device_id);
-
+#ifdef RTE_KNI_KMOD_ETHTOOL
pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL);
/* Support Ethtool */
@@ -459,6 +454,7 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
}
if (pci)
pci_dev_put(pci);
+#endif
if (kni->lad_dev)
ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr);
@@ -479,10 +475,6 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
return -ENODEV;
}
-#ifdef RTE_KNI_VHOST
- kni_vhost_init(kni);
-#endif
-
ret = kni_run_thread(knet, kni, dev_info.force_bind);
if (ret != 0)
return ret;
@@ -526,9 +518,6 @@ kni_ioctl_release(struct net *net, uint32_t ioctl_num,
dev->pthread = NULL;
}
-#ifdef RTE_KNI_VHOST
- kni_vhost_backend_release(dev);
-#endif
kni_dev_remove(dev);
list_del(&dev->list);
ret = 0;
diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c
index 4ac99cfe..db9f4898 100644
--- a/lib/librte_eal/linuxapp/kni/kni_net.c
+++ b/lib/librte_eal/linuxapp/kni/kni_net.c
@@ -198,18 +198,6 @@ kni_net_config(struct net_device *dev, struct ifmap *map)
/*
* Transmit a packet (called by the kernel)
*/
-#ifdef RTE_KNI_VHOST
-static int
-kni_net_tx(struct sk_buff *skb, struct net_device *dev)
-{
- struct kni_dev *kni = netdev_priv(dev);
-
- dev_kfree_skb(skb);
- kni->stats.tx_dropped++;
-
- return NETDEV_TX_OK;
-}
-#else
static int
kni_net_tx(struct sk_buff *skb, struct net_device *dev)
{
@@ -289,7 +277,6 @@ drop:
return NETDEV_TX_OK;
}
-#endif
/*
* RX: normal working mode
diff --git a/lib/librte_eal/linuxapp/kni/kni_vhost.c b/lib/librte_eal/linuxapp/kni/kni_vhost.c
deleted file mode 100644
index f54c34b1..00000000
--- a/lib/librte_eal/linuxapp/kni/kni_vhost.c
+++ /dev/null
@@ -1,842 +0,0 @@
-/*-
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- * The full GNU General Public License is included in this distribution
- * in the file called LICENSE.GPL.
- *
- * Contact Information:
- * Intel Corporation
- */
-
-#include <linux/module.h>
-#include <linux/net.h>
-#include <net/sock.h>
-#include <linux/virtio_net.h>
-#include <linux/wait.h>
-#include <linux/mm.h>
-#include <linux/nsproxy.h>
-#include <linux/sched.h>
-#include <linux/if_tun.h>
-#include <linux/version.h>
-#include <linux/file.h>
-
-#include "compat.h"
-#include "kni_dev.h"
-#include "kni_fifo.h"
-
-#define RX_BURST_SZ 4
-
-#ifdef HAVE_STATIC_SOCK_MAP_FD
-static int kni_sock_map_fd(struct socket *sock)
-{
- struct file *file;
- int fd = get_unused_fd_flags(0);
-
- if (fd < 0)
- return fd;
-
- file = sock_alloc_file(sock, 0, NULL);
- if (IS_ERR(file)) {
- put_unused_fd(fd);
- return PTR_ERR(file);
- }
- fd_install(fd, file);
- return fd;
-}
-#endif
-
-static struct proto kni_raw_proto = {
- .name = "kni_vhost",
- .owner = THIS_MODULE,
- .obj_size = sizeof(struct kni_vhost_queue),
-};
-
-static inline int
-kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m,
- uint32_t offset, uint32_t len)
-{
- struct rte_kni_mbuf *pkt_kva = NULL;
- struct rte_kni_mbuf *pkt_va = NULL;
- int ret;
-
- pr_debug("tx offset=%d, len=%d, iovlen=%d\n",
-#ifdef HAVE_IOV_ITER_MSGHDR
- offset, len, (int)m->msg_iter.iov->iov_len);
-#else
- offset, len, (int)m->msg_iov->iov_len);
-#endif
-
- /**
- * Check if it has at least one free entry in tx_q and
- * one entry in alloc_q.
- */
- if (kni_fifo_free_count(kni->tx_q) == 0 ||
- kni_fifo_count(kni->alloc_q) == 0) {
- /**
- * If no free entry in tx_q or no entry in alloc_q,
- * drops skb and goes out.
- */
- goto drop;
- }
-
- /* dequeue a mbuf from alloc_q */
- ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1);
- if (likely(ret == 1)) {
- void *data_kva;
-
- pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva;
- data_kva = pkt_kva->buf_addr + pkt_kva->data_off
- - kni->mbuf_va + kni->mbuf_kva;
-
-#ifdef HAVE_IOV_ITER_MSGHDR
- copy_from_iter(data_kva, len, &m->msg_iter);
-#else
- memcpy_fromiovecend(data_kva, m->msg_iov, offset, len);
-#endif
-
- if (unlikely(len < ETH_ZLEN)) {
- memset(data_kva + len, 0, ETH_ZLEN - len);
- len = ETH_ZLEN;
- }
- pkt_kva->pkt_len = len;
- pkt_kva->data_len = len;
-
- /* enqueue mbuf into tx_q */
- ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1);
- if (unlikely(ret != 1)) {
- /* Failing should not happen */
- pr_err("Fail to enqueue mbuf into tx_q\n");
- goto drop;
- }
- } else {
- /* Failing should not happen */
- pr_err("Fail to dequeue mbuf from alloc_q\n");
- goto drop;
- }
-
- /* update statistics */
- kni->stats.tx_bytes += len;
- kni->stats.tx_packets++;
-
- return 0;
-
-drop:
- /* update statistics */
- kni->stats.tx_dropped++;
-
- return 0;
-}
-
-static inline int
-kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m,
- uint32_t offset, uint32_t len)
-{
- uint32_t pkt_len;
- struct rte_kni_mbuf *kva;
- struct rte_kni_mbuf *va;
- void *data_kva;
- struct sk_buff *skb;
- struct kni_vhost_queue *q = kni->vhost_queue;
-
- if (unlikely(q == NULL))
- return 0;
-
- /* ensure at least one entry in free_q */
- if (unlikely(kni_fifo_free_count(kni->free_q) == 0))
- return 0;
-
- skb = skb_dequeue(&q->sk.sk_receive_queue);
- if (unlikely(skb == NULL))
- return 0;
-
- kva = (struct rte_kni_mbuf *)skb->data;
-
- /* free skb to cache */
- skb->data = NULL;
- if (unlikely(kni_fifo_put(q->fifo, (void **)&skb, 1) != 1))
- /* Failing should not happen */
- pr_err("Fail to enqueue entries into rx cache fifo\n");
-
- pkt_len = kva->data_len;
- if (unlikely(pkt_len > len))
- goto drop;
-
- pr_debug("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n",
-#ifdef HAVE_IOV_ITER_MSGHDR
- offset, len, pkt_len, (int)m->msg_iter.iov->iov_len);
-#else
- offset, len, pkt_len, (int)m->msg_iov->iov_len);
-#endif
-
- data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + kni->mbuf_kva;
-#ifdef HAVE_IOV_ITER_MSGHDR
- if (unlikely(copy_to_iter(data_kva, pkt_len, &m->msg_iter)))
-#else
- if (unlikely(memcpy_toiovecend(m->msg_iov, data_kva, offset, pkt_len)))
-#endif
- goto drop;
-
- /* Update statistics */
- kni->stats.rx_bytes += pkt_len;
- kni->stats.rx_packets++;
-
- /* enqueue mbufs into free_q */
- va = (void *)kva - kni->mbuf_kva + kni->mbuf_va;
- if (unlikely(kni_fifo_put(kni->free_q, (void **)&va, 1) != 1))
- /* Failing should not happen */
- pr_err("Fail to enqueue entries into free_q\n");
-
- pr_debug("receive done %d\n", pkt_len);
-
- return pkt_len;
-
-drop:
- /* Update drop statistics */
- kni->stats.rx_dropped++;
-
- return 0;
-}
-
-static uint32_t
-kni_sock_poll(struct file *file, struct socket *sock, poll_table *wait)
-{
- struct kni_vhost_queue *q =
- container_of(sock->sk, struct kni_vhost_queue, sk);
- struct kni_dev *kni;
- uint32_t mask = 0;
-
- if (unlikely(q == NULL || q->kni == NULL))
- return POLLERR;
-
- kni = q->kni;
-#ifdef HAVE_SOCKET_WQ
- pr_debug("start kni_poll on group %d, wq 0x%16llx\n",
- kni->group_id, (uint64_t)sock->wq);
- poll_wait(file, &sock->wq->wait, wait);
-#else
- pr_debug("start kni_poll on group %d, wait at 0x%16llx\n",
- kni->group_id, (uint64_t)&sock->wait);
- poll_wait(file, &sock->wait, wait);
-#endif
-
- if (kni_fifo_count(kni->rx_q) > 0)
- mask |= POLLIN | POLLRDNORM;
-
- if (sock_writeable(&q->sk) ||
-#ifdef SOCKWQ_ASYNC_NOSPACE
- (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) &&
- sock_writeable(&q->sk)))
-#else
- (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) &&
- sock_writeable(&q->sk)))
-#endif
- mask |= POLLOUT | POLLWRNORM;
-
- return mask;
-}
-
-static inline void
-kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q,
- struct sk_buff *skb, struct rte_kni_mbuf *va)
-{
- struct rte_kni_mbuf *kva;
-
- kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva;
- (skb)->data = (unsigned char *)kva;
- (skb)->len = kva->data_len;
- skb_queue_tail(&q->sk.sk_receive_queue, skb);
-}
-
-static inline void
-kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q,
- struct sk_buff **skb, struct rte_kni_mbuf **va)
-{
- int i;
-
- for (i = 0; i < RX_BURST_SZ; skb++, va++, i++)
- kni_vhost_enqueue(kni, q, *skb, *va);
-}
-
-int
-kni_chk_vhost_rx(struct kni_dev *kni)
-{
- struct kni_vhost_queue *q = kni->vhost_queue;
- uint32_t nb_in, nb_mbuf, nb_skb;
- const uint32_t BURST_MASK = RX_BURST_SZ - 1;
- uint32_t nb_burst, nb_backlog, i;
- struct sk_buff *skb[RX_BURST_SZ];
- struct rte_kni_mbuf *va[RX_BURST_SZ];
-
- if (unlikely(BE_STOP & kni->vq_status)) {
- kni->vq_status |= BE_FINISH;
- return 0;
- }
-
- if (unlikely(q == NULL))
- return 0;
-
- nb_skb = kni_fifo_count(q->fifo);
- nb_mbuf = kni_fifo_count(kni->rx_q);
-
- nb_in = min(nb_mbuf, nb_skb);
- nb_in = min_t(uint32_t, nb_in, RX_BURST_SZ);
- nb_burst = (nb_in & ~BURST_MASK);
- nb_backlog = (nb_in & BURST_MASK);
-
- /* enqueue skb_queue per BURST_SIZE bulk */
- if (nb_burst != 0) {
- if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, RX_BURST_SZ)
- != RX_BURST_SZ))
- goto except;
-
- if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, RX_BURST_SZ)
- != RX_BURST_SZ))
- goto except;
-
- kni_vhost_enqueue_burst(kni, q, skb, va);
- }
-
- /* all leftover, do one by one */
- for (i = 0; i < nb_backlog; ++i) {
- if (unlikely(kni_fifo_get(kni->rx_q, (void **)&va, 1) != 1))
- goto except;
-
- if (unlikely(kni_fifo_get(q->fifo, (void **)&skb, 1) != 1))
- goto except;
-
- kni_vhost_enqueue(kni, q, *skb, *va);
- }
-
- /* Ondemand wake up */
- if ((nb_in == RX_BURST_SZ) || (nb_skb == 0) ||
- ((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) {
- wake_up_interruptible_poll(sk_sleep(&q->sk),
- POLLIN | POLLRDNORM | POLLRDBAND);
- pr_debug("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n",
- nb_mbuf, nb_skb, nb_in);
- }
-
- return 0;
-
-except:
- /* Failing should not happen */
- pr_err("Fail to enqueue fifo, it shouldn't happen\n");
- BUG_ON(1);
-
- return 0;
-}
-
-static int
-#ifdef HAVE_KIOCB_MSG_PARAM
-kni_sock_sndmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t total_len)
-#else
-kni_sock_sndmsg(struct socket *sock,
- struct msghdr *m, size_t total_len)
-#endif /* HAVE_KIOCB_MSG_PARAM */
-{
- struct kni_vhost_queue *q =
- container_of(sock->sk, struct kni_vhost_queue, sk);
- int vnet_hdr_len = 0;
- unsigned long len = total_len;
-
- if (unlikely(q == NULL || q->kni == NULL))
- return 0;
-
- pr_debug("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n",
-#ifdef HAVE_IOV_ITER_MSGHDR
- len, q->flags, (int)m->msg_iter.iov->iov_len);
-#else
- len, q->flags, (int)m->msg_iovlen);
-#endif
-
-#ifdef RTE_KNI_VHOST_VNET_HDR_EN
- if (likely(q->flags & IFF_VNET_HDR)) {
- vnet_hdr_len = q->vnet_hdr_sz;
- if (unlikely(len < vnet_hdr_len))
- return -EINVAL;
- len -= vnet_hdr_len;
- }
-#endif
-
- if (unlikely(len < ETH_HLEN + q->vnet_hdr_sz))
- return -EINVAL;
-
- return kni_vhost_net_tx(q->kni, m, vnet_hdr_len, len);
-}
-
-static int
-#ifdef HAVE_KIOCB_MSG_PARAM
-kni_sock_rcvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *m, size_t len, int flags)
-#else
-kni_sock_rcvmsg(struct socket *sock,
- struct msghdr *m, size_t len, int flags)
-#endif /* HAVE_KIOCB_MSG_PARAM */
-{
- int vnet_hdr_len = 0;
- int pkt_len = 0;
- struct kni_vhost_queue *q =
- container_of(sock->sk, struct kni_vhost_queue, sk);
- static struct virtio_net_hdr
- __attribute__ ((unused)) vnet_hdr = {
- .flags = 0,
- .gso_type = VIRTIO_NET_HDR_GSO_NONE
- };
-
- if (unlikely(q == NULL || q->kni == NULL))
- return 0;
-
-#ifdef RTE_KNI_VHOST_VNET_HDR_EN
- if (likely(q->flags & IFF_VNET_HDR)) {
- vnet_hdr_len = q->vnet_hdr_sz;
- len -= vnet_hdr_len;
- if (len < 0)
- return -EINVAL;
- }
-#endif
-
- pkt_len = kni_vhost_net_rx(q->kni, m, vnet_hdr_len, len);
- if (unlikely(pkt_len == 0))
- return 0;
-
-#ifdef RTE_KNI_VHOST_VNET_HDR_EN
- /* no need to copy hdr when no pkt received */
-#ifdef HAVE_IOV_ITER_MSGHDR
- if (unlikely(copy_to_iter((void *)&vnet_hdr, vnet_hdr_len,
- &m->msg_iter)))
-#else
- if (unlikely(memcpy_toiovecend(m->msg_iov,
- (void *)&vnet_hdr, 0, vnet_hdr_len)))
-#endif /* HAVE_IOV_ITER_MSGHDR */
- return -EFAULT;
-#endif /* RTE_KNI_VHOST_VNET_HDR_EN */
- pr_debug("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n",
- (unsigned long)len, q->flags, pkt_len);
-
- return pkt_len + vnet_hdr_len;
-}
-
-/* dummy tap like ioctl */
-static int
-kni_sock_ioctl(struct socket *sock, uint32_t cmd, unsigned long arg)
-{
- void __user *argp = (void __user *)arg;
- struct ifreq __user *ifr = argp;
- uint32_t __user *up = argp;
- struct kni_vhost_queue *q =
- container_of(sock->sk, struct kni_vhost_queue, sk);
- struct kni_dev *kni;
- uint32_t u;
- int __user *sp = argp;
- int s;
- int ret;
-
- pr_debug("tap ioctl cmd 0x%08x\n", cmd);
-
- switch (cmd) {
- case TUNSETIFF:
- pr_debug("TUNSETIFF\n");
- /* ignore the name, just look at flags */
- if (get_user(u, &ifr->ifr_flags))
- return -EFAULT;
-
- ret = 0;
- if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP))
- ret = -EINVAL;
- else
- q->flags = u;
-
- return ret;
-
- case TUNGETIFF:
- pr_debug("TUNGETIFF\n");
- rcu_read_lock_bh();
- kni = rcu_dereference_bh(q->kni);
- if (kni)
- dev_hold(kni->net_dev);
- rcu_read_unlock_bh();
-
- if (!kni)
- return -ENOLINK;
-
- ret = 0;
- if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ)
- || put_user(q->flags, &ifr->ifr_flags))
- ret = -EFAULT;
- dev_put(kni->net_dev);
- return ret;
-
- case TUNGETFEATURES:
- pr_debug("TUNGETFEATURES\n");
- u = IFF_TAP | IFF_NO_PI;
-#ifdef RTE_KNI_VHOST_VNET_HDR_EN
- u |= IFF_VNET_HDR;
-#endif
- if (put_user(u, up))
- return -EFAULT;
- return 0;
-
- case TUNSETSNDBUF:
- pr_debug("TUNSETSNDBUF\n");
- if (get_user(u, up))
- return -EFAULT;
-
- q->sk.sk_sndbuf = u;
- return 0;
-
- case TUNGETVNETHDRSZ:
- s = q->vnet_hdr_sz;
- if (put_user(s, sp))
- return -EFAULT;
- pr_debug("TUNGETVNETHDRSZ %d\n", s);
- return 0;
-
- case TUNSETVNETHDRSZ:
- if (get_user(s, sp))
- return -EFAULT;
- if (s < (int)sizeof(struct virtio_net_hdr))
- return -EINVAL;
-
- pr_debug("TUNSETVNETHDRSZ %d\n", s);
- q->vnet_hdr_sz = s;
- return 0;
-
- case TUNSETOFFLOAD:
- pr_debug("TUNSETOFFLOAD %lx\n", arg);
-#ifdef RTE_KNI_VHOST_VNET_HDR_EN
- /* not support any offload yet */
- if (!(q->flags & IFF_VNET_HDR))
- return -EINVAL;
-
- return 0;
-#else
- return -EINVAL;
-#endif
-
- default:
- pr_debug("NOT SUPPORT\n");
- return -EINVAL;
- }
-}
-
-static int
-kni_sock_compat_ioctl(struct socket *sock, uint32_t cmd,
- unsigned long arg)
-{
- /* 32 bits app on 64 bits OS to be supported later */
- pr_debug("Not implemented.\n");
-
- return -EINVAL;
-}
-
-#define KNI_VHOST_WAIT_WQ_SAFE() \
-do { \
- while ((BE_FINISH | BE_STOP) == kni->vq_status) \
- msleep(1); \
-} while (0) \
-
-
-static int
-kni_sock_release(struct socket *sock)
-{
- struct kni_vhost_queue *q =
- container_of(sock->sk, struct kni_vhost_queue, sk);
- struct kni_dev *kni;
-
- if (q == NULL)
- return 0;
-
- kni = q->kni;
- if (kni != NULL) {
- kni->vq_status = BE_STOP;
- KNI_VHOST_WAIT_WQ_SAFE();
- kni->vhost_queue = NULL;
- q->kni = NULL;
- }
-
- if (q->sockfd != -1)
- q->sockfd = -1;
-
- sk_set_socket(&q->sk, NULL);
- sock->sk = NULL;
-
- sock_put(&q->sk);
-
- pr_debug("dummy sock release done\n");
-
- return 0;
-}
-
-int
-kni_sock_getname(struct socket *sock, struct sockaddr *addr,
- int *sockaddr_len, int peer)
-{
- pr_debug("dummy sock getname\n");
- ((struct sockaddr_ll *)addr)->sll_family = AF_PACKET;
- return 0;
-}
-
-static const struct proto_ops kni_socket_ops = {
- .getname = kni_sock_getname,
- .sendmsg = kni_sock_sndmsg,
- .recvmsg = kni_sock_rcvmsg,
- .release = kni_sock_release,
- .poll = kni_sock_poll,
- .ioctl = kni_sock_ioctl,
- .compat_ioctl = kni_sock_compat_ioctl,
-};
-
-static void
-kni_sk_write_space(struct sock *sk)
-{
- wait_queue_head_t *wqueue;
-
- if (!sock_writeable(sk) ||
-#ifdef SOCKWQ_ASYNC_NOSPACE
- !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags))
-#else
- !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
-#endif
- return;
- wqueue = sk_sleep(sk);
- if (wqueue && waitqueue_active(wqueue))
- wake_up_interruptible_poll(
- wqueue, POLLOUT | POLLWRNORM | POLLWRBAND);
-}
-
-static void
-kni_sk_destruct(struct sock *sk)
-{
- struct kni_vhost_queue *q =
- container_of(sk, struct kni_vhost_queue, sk);
-
- if (!q)
- return;
-
- /* make sure there's no packet in buffer */
- while (skb_dequeue(&sk->sk_receive_queue) != NULL)
- ;
-
- mb();
-
- if (q->fifo != NULL) {
- kfree(q->fifo);
- q->fifo = NULL;
- }
-
- if (q->cache != NULL) {
- kfree(q->cache);
- q->cache = NULL;
- }
-}
-
-static int
-kni_vhost_backend_init(struct kni_dev *kni)
-{
- struct kni_vhost_queue *q;
- struct net *net = current->nsproxy->net_ns;
- int err, i, sockfd;
- struct rte_kni_fifo *fifo;
- struct sk_buff *elem;
-
- if (kni->vhost_queue != NULL)
- return -1;
-
-#ifdef HAVE_SK_ALLOC_KERN_PARAM
- q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
- &kni_raw_proto, 0);
-#else
- q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
- &kni_raw_proto);
-#endif
- if (!q)
- return -ENOMEM;
-
- err = sock_create_lite(AF_UNSPEC, SOCK_RAW, IPPROTO_RAW, &q->sock);
- if (err)
- goto free_sk;
-
- sockfd = kni_sock_map_fd(q->sock);
- if (sockfd < 0) {
- err = sockfd;
- goto free_sock;
- }
-
- /* cache init */
- q->cache = kzalloc(
- RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff),
- GFP_KERNEL);
- if (!q->cache)
- goto free_fd;
-
- fifo = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(void *)
- + sizeof(struct rte_kni_fifo), GFP_KERNEL);
- if (!fifo)
- goto free_cache;
-
- kni_fifo_init(fifo, RTE_KNI_VHOST_MAX_CACHE_SIZE);
-
- for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) {
- elem = &q->cache[i];
- kni_fifo_put(fifo, (void **)&elem, 1);
- }
- q->fifo = fifo;
-
- /* store sockfd in vhost_queue */
- q->sockfd = sockfd;
-
- /* init socket */
- q->sock->type = SOCK_RAW;
- q->sock->state = SS_CONNECTED;
- q->sock->ops = &kni_socket_ops;
- sock_init_data(q->sock, &q->sk);
-
- /* init sock data */
- q->sk.sk_write_space = kni_sk_write_space;
- q->sk.sk_destruct = kni_sk_destruct;
- q->flags = IFF_NO_PI | IFF_TAP;
- q->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
-#ifdef RTE_KNI_VHOST_VNET_HDR_EN
- q->flags |= IFF_VNET_HDR;
-#endif
-
- /* bind kni_dev with vhost_queue */
- q->kni = kni;
- kni->vhost_queue = q;
-
- wmb();
-
- kni->vq_status = BE_START;
-
-#ifdef HAVE_SOCKET_WQ
- pr_debug("backend init sockfd=%d, sock->wq=0x%16llx,sk->sk_wq=0x%16llx",
- q->sockfd, (uint64_t)q->sock->wq,
- (uint64_t)q->sk.sk_wq);
-#else
- pr_debug("backend init sockfd=%d, sock->wait at 0x%16llx,sk->sk_sleep=0x%16llx",
- q->sockfd, (uint64_t)&q->sock->wait,
- (uint64_t)q->sk.sk_sleep);
-#endif
-
- return 0;
-
-free_cache:
- kfree(q->cache);
- q->cache = NULL;
-
-free_fd:
- put_unused_fd(sockfd);
-
-free_sock:
- q->kni = NULL;
- kni->vhost_queue = NULL;
- kni->vq_status |= BE_FINISH;
- sock_release(q->sock);
- q->sock->ops = NULL;
- q->sock = NULL;
-
-free_sk:
- sk_free((struct sock *)q);
-
- return err;
-}
-
-/* kni vhost sock sysfs */
-static ssize_t
-show_sock_fd(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct net_device *net_dev = container_of(dev, struct net_device, dev);
- struct kni_dev *kni = netdev_priv(net_dev);
- int sockfd = -1;
-
- if (kni->vhost_queue != NULL)
- sockfd = kni->vhost_queue->sockfd;
- return snprintf(buf, 10, "%d\n", sockfd);
-}
-
-static ssize_t
-show_sock_en(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct net_device *net_dev = container_of(dev, struct net_device, dev);
- struct kni_dev *kni = netdev_priv(net_dev);
-
- return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1));
-}
-
-static ssize_t
-set_sock_en(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct net_device *net_dev = container_of(dev, struct net_device, dev);
- struct kni_dev *kni = netdev_priv(net_dev);
- unsigned long en;
- int err = 0;
-
- if (kstrtoul(buf, 0, &en) != 0)
- return -EINVAL;
-
- if (en)
- err = kni_vhost_backend_init(kni);
-
- return err ? err : count;
-}
-
-static DEVICE_ATTR(sock_fd, S_IRUGO | S_IRUSR, show_sock_fd, NULL);
-static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en);
-static struct attribute *dev_attrs[] = {
- &dev_attr_sock_fd.attr,
- &dev_attr_sock_en.attr,
- NULL,
-};
-
-static const struct attribute_group dev_attr_grp = {
- .attrs = dev_attrs,
-};
-
-int
-kni_vhost_backend_release(struct kni_dev *kni)
-{
- struct kni_vhost_queue *q = kni->vhost_queue;
-
- if (q == NULL)
- return 0;
-
- /* dettach from kni */
- q->kni = NULL;
-
- pr_debug("release backend done\n");
-
- return 0;
-}
-
-int
-kni_vhost_init(struct kni_dev *kni)
-{
- struct net_device *dev = kni->net_dev;
-
- if (sysfs_create_group(&dev->dev.kobj, &dev_attr_grp))
- sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
-
- kni->vq_status = BE_STOP;
-
- pr_debug("kni_vhost_init done\n");
-
- return 0;
-}
diff --git a/lib/librte_eal/linuxapp/xen_dom0/Makefile b/lib/librte_eal/linuxapp/xen_dom0/Makefile
index 9d22fb97..be51a82a 100644
--- a/lib/librte_eal/linuxapp/xen_dom0/Makefile
+++ b/lib/librte_eal/linuxapp/xen_dom0/Makefile
@@ -44,9 +44,6 @@ MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
MODULE_CFLAGS += -Wall -Werror
-# this lib needs main eal
-DEPDIRS-y += lib/librte_eal/linuxapp/eal
-
#
# all source are stored in SRCS-y
#