summaryrefslogtreecommitdiffstats
path: root/lib/librte_eal/linuxapp/eal
diff options
context:
space:
mode:
authorRicardo Salveti <ricardo.salveti@linaro.org>2016-07-18 15:30:06 -0300
committerRicardo Salveti <ricardo.salveti@linaro.org>2016-07-18 15:30:30 -0300
commita41e6ff15809d40e0f9bbc9576bf8f7f80fbec1d (patch)
treec9e6fc399c2738e84ed2585e6e51e90f9608ca12 /lib/librte_eal/linuxapp/eal
parent8b25d1ad5d2264bdfc2818c7bda74ee2697df6db (diff)
Imported Upstream version 16.07-rc2
Change-Id: Ie9e8ec528a2a0dace085c5e44aa7fa3b489d4ba0 Signed-off-by: Ricardo Salveti <ricardo.salveti@linaro.org>
Diffstat (limited to 'lib/librte_eal/linuxapp/eal')
-rw-r--r--lib/librte_eal/linuxapp/eal/Makefile5
-rw-r--r--lib/librte_eal/linuxapp/eal/eal.c33
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_memory.c32
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci.c17
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_init.h41
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_pci_vfio.c517
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.c547
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio.h94
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c (renamed from lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c)12
-rw-r--r--lib/librte_eal/linuxapp/eal/eal_xen_memory.c4
-rw-r--r--lib/librte_eal/linuxapp/eal/rte_eal_version.map1
11 files changed, 724 insertions, 579 deletions
diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile
index 30b30f33..1a976931 100644
--- a/lib/librte_eal/linuxapp/eal/Makefile
+++ b/lib/librte_eal/linuxapp/eal/Makefile
@@ -66,10 +66,11 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_xen_memory.c
endif
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_vfio_mp_sync.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_uio.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio.c
-SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio_mp_sync.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_debug.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c
SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c
@@ -110,7 +111,7 @@ CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
CFLAGS_eal.o := -D_GNU_SOURCE
CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
-CFLAGS_eal_pci_vfio_mp_sync.o := -D_GNU_SOURCE
+CFLAGS_eal_vfio_mp_sync.o := -D_GNU_SOURCE
CFLAGS_eal_timer.o := -D_GNU_SOURCE
CFLAGS_eal_lcore.o := -D_GNU_SOURCE
CFLAGS_eal_thread.o := -D_GNU_SOURCE
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 543ef869..3fb2188f 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -82,6 +82,7 @@
#include "eal_filesystem.h"
#include "eal_hugepages.h"
#include "eal_options.h"
+#include "eal_vfio.h"
#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL)
@@ -701,6 +702,33 @@ rte_eal_iopl_init(void)
return 0;
}
+#ifdef VFIO_PRESENT
+static int rte_eal_vfio_setup(void)
+{
+ int vfio_enabled = 0;
+
+ if (!internal_config.no_pci) {
+ pci_vfio_enable();
+ vfio_enabled |= pci_vfio_is_enabled();
+ }
+
+ if (vfio_enabled) {
+
+ /* if we are primary process, create a thread to communicate with
+ * secondary processes. the thread will use a socket to wait for
+ * requests from secondary process to send open file descriptors,
+ * because VFIO does not allow multiple open descriptors on a group or
+ * VFIO container.
+ */
+ if (internal_config.process_type == RTE_PROC_PRIMARY &&
+ vfio_mp_sync_setup() < 0)
+ return -1;
+ }
+
+ return 0;
+}
+#endif
+
/* Launch threads, called at application init(). */
int
rte_eal_init(int argc, char **argv)
@@ -764,6 +792,11 @@ rte_eal_init(int argc, char **argv)
if (rte_eal_pci_init() < 0)
rte_panic("Cannot init PCI\n");
+#ifdef VFIO_PRESENT
+ if (rte_eal_vfio_setup() < 0)
+ rte_panic("Cannot init VFIO\n");
+#endif
+
#ifdef RTE_LIBRTE_IVSHMEM
if (rte_eal_ivshmem_init() < 0)
rte_panic("Cannot init IVSHMEM\n");
diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c
index 5578c254..42a29faf 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memory.c
@@ -164,6 +164,29 @@ rte_mem_virt2phy(const void *virtaddr)
int page_size;
off_t offset;
+ /* when using dom0, /proc/self/pagemap always returns 0, check in
+ * dpdk memory by browsing the memsegs */
+ if (rte_xen_dom0_supported()) {
+ struct rte_mem_config *mcfg;
+ struct rte_memseg *memseg;
+ unsigned i;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ memseg = &mcfg->memseg[i];
+ if (memseg->addr == NULL)
+ break;
+ if (virtaddr > memseg->addr &&
+ virtaddr < RTE_PTR_ADD(memseg->addr,
+ memseg->len)) {
+ return memseg->phys_addr +
+ RTE_PTR_DIFF(virtaddr, memseg->addr);
+ }
+ }
+
+ return RTE_BAD_PHYS_ADDR;
+ }
+
/* Cannot parse /proc/self/pagemap, no need to log errors everywhere */
if (!proc_pagemap_readable)
return RTE_BAD_PHYS_ADDR;
@@ -1136,7 +1159,7 @@ int
rte_eal_hugepage_init(void)
{
struct rte_mem_config *mcfg;
- struct hugepage_file *hugepage, *tmp_hp = NULL;
+ struct hugepage_file *hugepage = NULL, *tmp_hp = NULL;
struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES];
uint64_t memory[RTE_MAX_NUMA_NODES];
@@ -1479,14 +1502,19 @@ rte_eal_hugepage_init(void)
"of memory.\n",
i, nr_hugefiles, RTE_STR(CONFIG_RTE_MAX_MEMSEG),
RTE_MAX_MEMSEG);
- return -ENOMEM;
+ goto fail;
}
+ munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file));
+
return 0;
fail:
huge_recover_sigbus();
free(tmp_hp);
+ if (hugepage != NULL)
+ munmap(hugepage, nr_hugefiles * sizeof(struct hugepage_file));
+
return -1;
}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index f9c3efd2..cd9de7cc 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -754,21 +754,6 @@ rte_eal_pci_init(void)
RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__);
return -1;
}
-#ifdef VFIO_PRESENT
- pci_vfio_enable();
-
- if (pci_vfio_is_enabled()) {
-
- /* if we are primary process, create a thread to communicate with
- * secondary processes. the thread will use a socket to wait for
- * requests from secondary process to send open file descriptors,
- * because VFIO does not allow multiple open descriptors on a group or
- * VFIO container.
- */
- if (internal_config.process_type == RTE_PROC_PRIMARY &&
- pci_vfio_mp_sync_setup() < 0)
- return -1;
- }
-#endif
+
return 0;
}
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
index f72a2548..6a960d1b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
@@ -74,12 +74,6 @@ int pci_uio_ioport_unmap(struct rte_pci_ioport *p);
#ifdef VFIO_PRESENT
-#define VFIO_MAX_GROUPS 64
-
-int pci_vfio_enable(void);
-int pci_vfio_is_enabled(void);
-int pci_vfio_mp_sync_setup(void);
-
/* access config space */
int pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
void *buf, size_t len, off_t offs);
@@ -96,41 +90,6 @@ int pci_vfio_ioport_unmap(struct rte_pci_ioport *p);
/* map VFIO resource prototype */
int pci_vfio_map_resource(struct rte_pci_device *dev);
-int pci_vfio_get_group_fd(int iommu_group_fd);
-int pci_vfio_get_container_fd(void);
-
-/*
- * Function prototypes for VFIO multiprocess sync functions
- */
-int vfio_mp_sync_send_request(int socket, int req);
-int vfio_mp_sync_receive_request(int socket);
-int vfio_mp_sync_send_fd(int socket, int fd);
-int vfio_mp_sync_receive_fd(int socket);
-int vfio_mp_sync_connect_to_primary(void);
-
-/* socket comm protocol definitions */
-#define SOCKET_REQ_CONTAINER 0x100
-#define SOCKET_REQ_GROUP 0x200
-#define SOCKET_OK 0x0
-#define SOCKET_NO_FD 0x1
-#define SOCKET_ERR 0xFF
-
-/*
- * we don't need to store device fd's anywhere since they can be obtained from
- * the group fd via an ioctl() call.
- */
-struct vfio_group {
- int group_no;
- int fd;
-};
-
-struct vfio_config {
- int vfio_enabled;
- int vfio_container_fd;
- int vfio_container_has_dma;
- int vfio_group_idx;
- struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
-};
#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index f91b9242..46cd6831 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -43,11 +43,11 @@
#include <rte_pci.h>
#include <rte_eal_memconfig.h>
#include <rte_malloc.h>
-#include <eal_private.h>
#include "eal_filesystem.h"
#include "eal_pci_init.h"
#include "eal_vfio.h"
+#include "eal_private.h"
/**
* @file
@@ -69,78 +69,6 @@ static struct rte_tailq_elem rte_vfio_tailq = {
};
EAL_REGISTER_TAILQ(rte_vfio_tailq)
-#define VFIO_DIR "/dev/vfio"
-#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
-#define VFIO_GROUP_FMT "/dev/vfio/%u"
-#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
-#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
-#define VFIO_GET_REGION_IDX(x) (x >> 40)
-
-/* per-process VFIO config */
-static struct vfio_config vfio_cfg;
-
-/* DMA mapping function prototype.
- * Takes VFIO container fd as a parameter.
- * Returns 0 on success, -1 on error.
- * */
-typedef int (*vfio_dma_func_t)(int);
-
-struct vfio_iommu_type {
- int type_id;
- const char *name;
- vfio_dma_func_t dma_map_func;
-};
-
-static int vfio_type1_dma_map(int);
-static int vfio_noiommu_dma_map(int);
-
-/* IOMMU types we support */
-static const struct vfio_iommu_type iommu_types[] = {
- /* x86 IOMMU, otherwise known as type 1 */
- { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
- /* IOMMU-less mode */
- { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
-};
-
-int
-vfio_type1_dma_map(int vfio_container_fd)
-{
- const struct rte_memseg *ms = rte_eal_get_physmem_layout();
- int i, ret;
-
- /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
- for (i = 0; i < RTE_MAX_MEMSEG; i++) {
- struct vfio_iommu_type1_dma_map dma_map;
-
- if (ms[i].addr == NULL)
- break;
-
- memset(&dma_map, 0, sizeof(dma_map));
- dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
- dma_map.vaddr = ms[i].addr_64;
- dma_map.size = ms[i].len;
- dma_map.iova = ms[i].phys_addr;
- dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
-
- ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
-
- if (ret) {
- RTE_LOG(ERR, EAL, " cannot set up DMA remapping, "
- "error %i (%s)\n", errno, strerror(errno));
- return -1;
- }
- }
-
- return 0;
-}
-
-int
-vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
-{
- /* No-IOMMU mode does not need DMA mapping */
- return 0;
-}
-
int
pci_vfio_read_config(const struct rte_intr_handle *intr_handle,
void *buf, size_t len, off_t offs)
@@ -272,63 +200,6 @@ pci_vfio_set_bus_master(int dev_fd)
return 0;
}
-/* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
-static const struct vfio_iommu_type *
-pci_vfio_set_iommu_type(int vfio_container_fd) {
- unsigned idx;
- for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
- const struct vfio_iommu_type *t = &iommu_types[idx];
-
- int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
- t->type_id);
- if (!ret) {
- RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n",
- t->type_id, t->name);
- return t;
- }
- /* not an error, there may be more supported IOMMU types */
- RTE_LOG(DEBUG, EAL, " set IOMMU type %d (%s) failed, "
- "error %i (%s)\n", t->type_id, t->name, errno,
- strerror(errno));
- }
- /* if we didn't find a suitable IOMMU type, fail */
- return NULL;
-}
-
-/* check if we have any supported extensions */
-static int
-pci_vfio_has_supported_extensions(int vfio_container_fd) {
- int ret;
- unsigned idx, n_extensions = 0;
- for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
- const struct vfio_iommu_type *t = &iommu_types[idx];
-
- ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION,
- t->type_id);
- if (ret < 0) {
- RTE_LOG(ERR, EAL, " could not get IOMMU type, "
- "error %i (%s)\n", errno,
- strerror(errno));
- close(vfio_container_fd);
- return -1;
- } else if (ret == 1) {
- /* we found a supported extension */
- n_extensions++;
- }
- RTE_LOG(DEBUG, EAL, " IOMMU type %d (%s) is %s\n",
- t->type_id, t->name,
- ret ? "supported" : "not supported");
- }
-
- /* if we didn't find any supported IOMMU types, fail */
- if (!n_extensions) {
- close(vfio_container_fd);
- return -1;
- }
-
- return 0;
-}
-
/* set up interrupt support (but not enable interrupts) */
static int
pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
@@ -425,220 +296,6 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
return -1;
}
-/* open container fd or get an existing one */
-int
-pci_vfio_get_container_fd(void)
-{
- int ret, vfio_container_fd;
-
- /* if we're in a primary process, try to open the container */
- if (internal_config.process_type == RTE_PROC_PRIMARY) {
- vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR);
- if (vfio_container_fd < 0) {
- RTE_LOG(ERR, EAL, " cannot open VFIO container, "
- "error %i (%s)\n", errno, strerror(errno));
- return -1;
- }
-
- /* check VFIO API version */
- ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION);
- if (ret != VFIO_API_VERSION) {
- if (ret < 0)
- RTE_LOG(ERR, EAL, " could not get VFIO API version, "
- "error %i (%s)\n", errno, strerror(errno));
- else
- RTE_LOG(ERR, EAL, " unsupported VFIO API version!\n");
- close(vfio_container_fd);
- return -1;
- }
-
- ret = pci_vfio_has_supported_extensions(vfio_container_fd);
- if (ret) {
- RTE_LOG(ERR, EAL, " no supported IOMMU "
- "extensions found!\n");
- return -1;
- }
-
- return vfio_container_fd;
- } else {
- /*
- * if we're in a secondary process, request container fd from the
- * primary process via our socket
- */
- int socket_fd;
-
- socket_fd = vfio_mp_sync_connect_to_primary();
- if (socket_fd < 0) {
- RTE_LOG(ERR, EAL, " cannot connect to primary process!\n");
- return -1;
- }
- if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) {
- RTE_LOG(ERR, EAL, " cannot request container fd!\n");
- close(socket_fd);
- return -1;
- }
- vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd);
- if (vfio_container_fd < 0) {
- RTE_LOG(ERR, EAL, " cannot get container fd!\n");
- close(socket_fd);
- return -1;
- }
- close(socket_fd);
- return vfio_container_fd;
- }
-
- return -1;
-}
-
-/* open group fd or get an existing one */
-int
-pci_vfio_get_group_fd(int iommu_group_no)
-{
- int i;
- int vfio_group_fd;
- char filename[PATH_MAX];
-
- /* check if we already have the group descriptor open */
- for (i = 0; i < vfio_cfg.vfio_group_idx; i++)
- if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no)
- return vfio_cfg.vfio_groups[i].fd;
-
- /* if primary, try to open the group */
- if (internal_config.process_type == RTE_PROC_PRIMARY) {
- /* try regular group format */
- snprintf(filename, sizeof(filename),
- VFIO_GROUP_FMT, iommu_group_no);
- vfio_group_fd = open(filename, O_RDWR);
- if (vfio_group_fd < 0) {
- /* if file not found, it's not an error */
- if (errno != ENOENT) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
- strerror(errno));
- return -1;
- }
-
- /* special case: try no-IOMMU path as well */
- snprintf(filename, sizeof(filename),
- VFIO_NOIOMMU_GROUP_FMT, iommu_group_no);
- vfio_group_fd = open(filename, O_RDWR);
- if (vfio_group_fd < 0) {
- if (errno != ENOENT) {
- RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
- strerror(errno));
- return -1;
- }
- return 0;
- }
- /* noiommu group found */
- }
-
- /* if the fd is valid, create a new group for it */
- if (vfio_cfg.vfio_group_idx == VFIO_MAX_GROUPS) {
- RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
- close(vfio_group_fd);
- return -1;
- }
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no;
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd;
- return vfio_group_fd;
- }
- /* if we're in a secondary process, request group fd from the primary
- * process via our socket
- */
- else {
- int socket_fd, ret;
-
- socket_fd = vfio_mp_sync_connect_to_primary();
-
- if (socket_fd < 0) {
- RTE_LOG(ERR, EAL, " cannot connect to primary process!\n");
- return -1;
- }
- if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) {
- RTE_LOG(ERR, EAL, " cannot request container fd!\n");
- close(socket_fd);
- return -1;
- }
- if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) {
- RTE_LOG(ERR, EAL, " cannot send group number!\n");
- close(socket_fd);
- return -1;
- }
- ret = vfio_mp_sync_receive_request(socket_fd);
- switch (ret) {
- case SOCKET_NO_FD:
- close(socket_fd);
- return 0;
- case SOCKET_OK:
- vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
- /* if we got the fd, return it */
- if (vfio_group_fd > 0) {
- close(socket_fd);
- return vfio_group_fd;
- }
- /* fall-through on error */
- default:
- RTE_LOG(ERR, EAL, " cannot get container fd!\n");
- close(socket_fd);
- return -1;
- }
- }
- return -1;
-}
-
-/* parse IOMMU group number for a PCI device
- * returns 1 on success, -1 for errors, 0 for non-existent group
- */
-static int
-pci_vfio_get_group_no(const char *pci_addr, int *iommu_group_no)
-{
- char linkname[PATH_MAX];
- char filename[PATH_MAX];
- char *tok[16], *group_tok, *end;
- int ret;
-
- memset(linkname, 0, sizeof(linkname));
- memset(filename, 0, sizeof(filename));
-
- /* try to find out IOMMU group for this device */
- snprintf(linkname, sizeof(linkname),
- "%s/%s/iommu_group", pci_get_sysfs_path(), pci_addr);
-
- ret = readlink(linkname, filename, sizeof(filename));
-
- /* if the link doesn't exist, no VFIO for us */
- if (ret < 0)
- return 0;
-
- ret = rte_strsplit(filename, sizeof(filename),
- tok, RTE_DIM(tok), '/');
-
- if (ret <= 0) {
- RTE_LOG(ERR, EAL, " %s cannot get IOMMU group\n", pci_addr);
- return -1;
- }
-
- /* IOMMU group is always the last token */
- errno = 0;
- group_tok = tok[ret - 1];
- end = group_tok;
- *iommu_group_no = strtol(group_tok, &end, 10);
- if ((end != group_tok && *end != '\0') || errno != 0) {
- RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", pci_addr);
- return -1;
- }
-
- return 1;
-}
-
-static void
-clear_current_group(void)
-{
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = 0;
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = -1;
-}
-
-
/*
* map the PCI resources of a PCI device in virtual memory (VFIO version).
* primary and secondary processes follow almost exactly the same path
@@ -646,13 +303,9 @@ clear_current_group(void)
int
pci_vfio_map_resource(struct rte_pci_device *dev)
{
- struct vfio_group_status group_status = {
- .argsz = sizeof(group_status)
- };
struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
- int vfio_group_fd, vfio_dev_fd;
- int iommu_group_no;
char pci_addr[PATH_MAX] = {0};
+ int vfio_dev_fd;
struct rte_pci_addr *loc = &dev->addr;
int i, ret, msix_bar;
struct mapped_pci_resource *vfio_res = NULL;
@@ -670,127 +323,9 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
loc->domain, loc->bus, loc->devid, loc->function);
- /* get group number */
- ret = pci_vfio_get_group_no(pci_addr, &iommu_group_no);
- if (ret == 0) {
- RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
- pci_addr);
- return 1;
- }
-
- /* if negative, something failed */
- if (ret < 0)
- return -1;
-
- /* get the actual group fd */
- vfio_group_fd = pci_vfio_get_group_fd(iommu_group_no);
- if (vfio_group_fd < 0)
- return -1;
-
- /* store group fd */
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no;
- vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd;
-
- /* if group_fd == 0, that means the device isn't managed by VFIO */
- if (vfio_group_fd == 0) {
- RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
- pci_addr);
- /* we store 0 as group fd to distinguish between existing but
- * unbound VFIO groups, and groups that don't exist at all.
- */
- vfio_cfg.vfio_group_idx++;
- return 1;
- }
-
- /*
- * at this point, we know at least one port on this device is bound to VFIO,
- * so we can proceed to try and set this particular port up
- */
-
- /* check if the group is viable */
- ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status);
- if (ret) {
- RTE_LOG(ERR, EAL, " %s cannot get group status, "
- "error %i (%s)\n", pci_addr, errno, strerror(errno));
- close(vfio_group_fd);
- clear_current_group();
- return -1;
- } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
- RTE_LOG(ERR, EAL, " %s VFIO group is not viable!\n", pci_addr);
- close(vfio_group_fd);
- clear_current_group();
- return -1;
- }
-
- /*
- * at this point, we know that this group is viable (meaning, all devices
- * are either bound to VFIO or not bound to anything)
- */
-
- /* check if group does not have a container yet */
- if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
-
- /* add group to a container */
- ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
- &vfio_cfg.vfio_container_fd);
- if (ret) {
- RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, "
- "error %i (%s)\n", pci_addr, errno, strerror(errno));
- close(vfio_group_fd);
- clear_current_group();
- return -1;
- }
- /*
- * at this point we know that this group has been successfully
- * initialized, so we increment vfio_group_idx to indicate that we can
- * add new groups.
- */
- vfio_cfg.vfio_group_idx++;
- }
-
- /*
- * pick an IOMMU type and set up DMA mappings for container
- *
- * needs to be done only once, only when at least one group is assigned to
- * a container and only in primary process
- */
- if (internal_config.process_type == RTE_PROC_PRIMARY &&
- vfio_cfg.vfio_container_has_dma == 0) {
- /* select an IOMMU type which we will be using */
- const struct vfio_iommu_type *t =
- pci_vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
- if (!t) {
- RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", pci_addr);
- return -1;
- }
- ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
- if (ret) {
- RTE_LOG(ERR, EAL, " %s DMA remapping failed, "
- "error %i (%s)\n", pci_addr, errno, strerror(errno));
- return -1;
- }
- vfio_cfg.vfio_container_has_dma = 1;
- }
-
- /* get a file descriptor for the device */
- vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, pci_addr);
- if (vfio_dev_fd < 0) {
- /* if we cannot get a device fd, this simply means that this
- * particular port is not bound to VFIO
- */
- RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
- pci_addr);
- return 1;
- }
-
- /* test and setup the device */
- ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_INFO, &device_info);
- if (ret) {
- RTE_LOG(ERR, EAL, " %s cannot get device info, "
- "error %i (%s)\n", pci_addr, errno, strerror(errno));
- close(vfio_dev_fd);
- return -1;
- }
+ if ((ret = vfio_setup_device(pci_get_sysfs_path(), pci_addr,
+ &vfio_dev_fd, &device_info)))
+ return ret;
/* get MSI-X BAR, if any (we have to know where it is because we can't
* easily mmap it when using VFIO) */
@@ -1048,50 +583,12 @@ pci_vfio_ioport_unmap(struct rte_pci_ioport *p)
int
pci_vfio_enable(void)
{
- /* initialize group list */
- int i;
- int vfio_available;
-
- for (i = 0; i < VFIO_MAX_GROUPS; i++) {
- vfio_cfg.vfio_groups[i].fd = -1;
- vfio_cfg.vfio_groups[i].group_no = -1;
- }
-
- /* inform the user that we are probing for VFIO */
- RTE_LOG(INFO, EAL, "Probing VFIO support...\n");
-
- /* check if vfio-pci module is loaded */
- vfio_available = rte_eal_check_module("vfio_pci");
-
- /* return error directly */
- if (vfio_available == -1) {
- RTE_LOG(INFO, EAL, "Could not get loaded module details!\n");
- return -1;
- }
-
- /* return 0 if VFIO modules not loaded */
- if (vfio_available == 0) {
- RTE_LOG(DEBUG, EAL, "VFIO modules not loaded, "
- "skipping VFIO support...\n");
- return 0;
- }
-
- vfio_cfg.vfio_container_fd = pci_vfio_get_container_fd();
-
- /* check if we have VFIO driver enabled */
- if (vfio_cfg.vfio_container_fd != -1) {
- RTE_LOG(NOTICE, EAL, "VFIO support initialized\n");
- vfio_cfg.vfio_enabled = 1;
- } else {
- RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n");
- }
-
- return 0;
+ return vfio_enable("vfio_pci");
}
int
pci_vfio_is_enabled(void)
{
- return vfio_cfg.vfio_enabled;
+ return vfio_is_enabled("vfio_pci");
}
#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
new file mode 100644
index 00000000..fcb0ab38
--- /dev/null
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -0,0 +1,547 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_eal_memconfig.h>
+
+#include "eal_filesystem.h"
+#include "eal_vfio.h"
+#include "eal_private.h"
+
+#ifdef VFIO_PRESENT
+
+/* per-process VFIO config */
+static struct vfio_config vfio_cfg;
+
+static int vfio_type1_dma_map(int);
+static int vfio_noiommu_dma_map(int);
+
+/* IOMMU types we support */
+static const struct vfio_iommu_type iommu_types[] = {
+ /* x86 IOMMU, otherwise known as type 1 */
+ { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
+ /* IOMMU-less mode */
+ { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
+};
+
+int
+vfio_get_group_fd(int iommu_group_no)
+{
+ int i;
+ int vfio_group_fd;
+ char filename[PATH_MAX];
+
+ /* check if we already have the group descriptor open */
+ for (i = 0; i < vfio_cfg.vfio_group_idx; i++)
+ if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no)
+ return vfio_cfg.vfio_groups[i].fd;
+
+ /* if primary, try to open the group */
+ if (internal_config.process_type == RTE_PROC_PRIMARY) {
+ /* try regular group format */
+ snprintf(filename, sizeof(filename),
+ VFIO_GROUP_FMT, iommu_group_no);
+ vfio_group_fd = open(filename, O_RDWR);
+ if (vfio_group_fd < 0) {
+ /* if file not found, it's not an error */
+ if (errno != ENOENT) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
+ strerror(errno));
+ return -1;
+ }
+
+ /* special case: try no-IOMMU path as well */
+ snprintf(filename, sizeof(filename),
+ VFIO_NOIOMMU_GROUP_FMT, iommu_group_no);
+ vfio_group_fd = open(filename, O_RDWR);
+ if (vfio_group_fd < 0) {
+ if (errno != ENOENT) {
+ RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename,
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+ }
+ /* noiommu group found */
+ }
+
+ /* if the fd is valid, create a new group for it */
+ if (vfio_cfg.vfio_group_idx == VFIO_MAX_GROUPS) {
+ RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
+ close(vfio_group_fd);
+ return -1;
+ }
+ vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no;
+ vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd;
+ return vfio_group_fd;
+ }
+ /* if we're in a secondary process, request group fd from the primary
+ * process via our socket
+ */
+ else {
+ int socket_fd, ret;
+
+ socket_fd = vfio_mp_sync_connect_to_primary();
+
+ if (socket_fd < 0) {
+ RTE_LOG(ERR, EAL, " cannot connect to primary process!\n");
+ return -1;
+ }
+ if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) {
+ RTE_LOG(ERR, EAL, " cannot request container fd!\n");
+ close(socket_fd);
+ return -1;
+ }
+ if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) {
+ RTE_LOG(ERR, EAL, " cannot send group number!\n");
+ close(socket_fd);
+ return -1;
+ }
+ ret = vfio_mp_sync_receive_request(socket_fd);
+ switch (ret) {
+ case SOCKET_NO_FD:
+ close(socket_fd);
+ return 0;
+ case SOCKET_OK:
+ vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd);
+ /* if we got the fd, return it */
+ if (vfio_group_fd > 0) {
+ close(socket_fd);
+ return vfio_group_fd;
+ }
+ /* fall-through on error */
+ default:
+ RTE_LOG(ERR, EAL, " cannot get container fd!\n");
+ close(socket_fd);
+ return -1;
+ }
+ }
+ return -1;
+}
+
+static void
+clear_current_group(void)
+{
+ vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = 0;
+ vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = -1;
+}
+
+int vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+ int *vfio_dev_fd, struct vfio_device_info *device_info)
+{
+ struct vfio_group_status group_status = {
+ .argsz = sizeof(group_status)
+ };
+ int vfio_group_fd;
+ int iommu_group_no;
+ int ret;
+
+ /* get group number */
+ ret = vfio_get_group_no(sysfs_base, dev_addr, &iommu_group_no);
+ if (ret == 0) {
+ RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
+ dev_addr);
+ return 1;
+ }
+
+ /* if negative, something failed */
+ if (ret < 0)
+ return -1;
+
+ /* get the actual group fd */
+ vfio_group_fd = vfio_get_group_fd(iommu_group_no);
+ if (vfio_group_fd < 0)
+ return -1;
+
+ /* store group fd */
+ vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no;
+ vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd;
+
+ /* if group_fd == 0, that means the device isn't managed by VFIO */
+ if (vfio_group_fd == 0) {
+ RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
+ dev_addr);
+ /* we store 0 as group fd to distinguish between existing but
+ * unbound VFIO groups, and groups that don't exist at all.
+ */
+ vfio_cfg.vfio_group_idx++;
+ return 1;
+ }
+
+ /*
+ * at this point, we know that this group is viable (meaning, all devices
+ * are either bound to VFIO or not bound to anything)
+ */
+
+ /* check if the group is viable */
+ ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " %s cannot get group status, "
+ "error %i (%s)\n", dev_addr, errno, strerror(errno));
+ close(vfio_group_fd);
+ clear_current_group();
+ return -1;
+ } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+ RTE_LOG(ERR, EAL, " %s VFIO group is not viable!\n", dev_addr);
+ close(vfio_group_fd);
+ clear_current_group();
+ return -1;
+ }
+
+ /* check if group does not have a container yet */
+ if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {
+
+ /* add group to a container */
+ ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,
+ &vfio_cfg.vfio_container_fd);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, "
+ "error %i (%s)\n", dev_addr, errno, strerror(errno));
+ close(vfio_group_fd);
+ clear_current_group();
+ return -1;
+ }
+ /*
+ * at this point we know that this group has been successfully
+ * initialized, so we increment vfio_group_idx to indicate that we can
+ * add new groups.
+ */
+ vfio_cfg.vfio_group_idx++;
+ }
+
+ /*
+ * pick an IOMMU type and set up DMA mappings for container
+ *
+ * needs to be done only once, only when at least one group is assigned to
+ * a container and only in primary process
+ */
+ if (internal_config.process_type == RTE_PROC_PRIMARY &&
+ vfio_cfg.vfio_container_has_dma == 0) {
+ /* select an IOMMU type which we will be using */
+ const struct vfio_iommu_type *t =
+ vfio_set_iommu_type(vfio_cfg.vfio_container_fd);
+ if (!t) {
+ RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", dev_addr);
+ return -1;
+ }
+ ret = t->dma_map_func(vfio_cfg.vfio_container_fd);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " %s DMA remapping failed, "
+ "error %i (%s)\n", dev_addr, errno, strerror(errno));
+ return -1;
+ }
+ vfio_cfg.vfio_container_has_dma = 1;
+ }
+
+ /* get a file descriptor for the device */
+ *vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, dev_addr);
+ if (*vfio_dev_fd < 0) {
+ /* if we cannot get a device fd, this simply means that this
+ * particular port is not bound to VFIO
+ */
+ RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n",
+ dev_addr);
+ return 1;
+ }
+
+ /* test and setup the device */
+ ret = ioctl(*vfio_dev_fd, VFIO_DEVICE_GET_INFO, device_info);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " %s cannot get device info, "
+ "error %i (%s)\n", dev_addr, errno, strerror(errno));
+ close(*vfio_dev_fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+vfio_enable(const char *modname)
+{
+ /* initialize group list */
+ int i;
+ int vfio_available;
+
+ for (i = 0; i < VFIO_MAX_GROUPS; i++) {
+ vfio_cfg.vfio_groups[i].fd = -1;
+ vfio_cfg.vfio_groups[i].group_no = -1;
+ }
+
+ /* inform the user that we are probing for VFIO */
+ RTE_LOG(INFO, EAL, "Probing VFIO support...\n");
+
+ /* check if vfio-pci module is loaded */
+ vfio_available = rte_eal_check_module(modname);
+
+ /* return error directly */
+ if (vfio_available == -1) {
+ RTE_LOG(INFO, EAL, "Could not get loaded module details!\n");
+ return -1;
+ }
+
+ /* return 0 if VFIO modules not loaded */
+ if (vfio_available == 0) {
+ RTE_LOG(DEBUG, EAL, "VFIO modules not loaded, "
+ "skipping VFIO support...\n");
+ return 0;
+ }
+
+ vfio_cfg.vfio_container_fd = vfio_get_container_fd();
+
+ /* check if we have VFIO driver enabled */
+ if (vfio_cfg.vfio_container_fd != -1) {
+ RTE_LOG(NOTICE, EAL, "VFIO support initialized\n");
+ vfio_cfg.vfio_enabled = 1;
+ } else {
+ RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n");
+ }
+
+ return 0;
+}
+
+int
+vfio_is_enabled(const char *modname)
+{
+ const int mod_available = rte_eal_check_module(modname);
+ return vfio_cfg.vfio_enabled && mod_available;
+}
+
+const struct vfio_iommu_type *
+vfio_set_iommu_type(int vfio_container_fd) {
+ unsigned idx;
+ for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
+ const struct vfio_iommu_type *t = &iommu_types[idx];
+
+ int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
+ t->type_id);
+ if (!ret) {
+ RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n",
+ t->type_id, t->name);
+ return t;
+ }
+ /* not an error, there may be more supported IOMMU types */
+ RTE_LOG(DEBUG, EAL, " set IOMMU type %d (%s) failed, "
+ "error %i (%s)\n", t->type_id, t->name, errno,
+ strerror(errno));
+ }
+ /* if we didn't find a suitable IOMMU type, fail */
+ return NULL;
+}
+
+int
+vfio_has_supported_extensions(int vfio_container_fd) {
+ int ret;
+ unsigned idx, n_extensions = 0;
+ for (idx = 0; idx < RTE_DIM(iommu_types); idx++) {
+ const struct vfio_iommu_type *t = &iommu_types[idx];
+
+ ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION,
+ t->type_id);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, " could not get IOMMU type, "
+ "error %i (%s)\n", errno,
+ strerror(errno));
+ close(vfio_container_fd);
+ return -1;
+ } else if (ret == 1) {
+ /* we found a supported extension */
+ n_extensions++;
+ }
+ RTE_LOG(DEBUG, EAL, " IOMMU type %d (%s) is %s\n",
+ t->type_id, t->name,
+ ret ? "supported" : "not supported");
+ }
+
+ /* if we didn't find any supported IOMMU types, fail */
+ if (!n_extensions) {
+ close(vfio_container_fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+int
+vfio_get_container_fd(void)
+{
+ int ret, vfio_container_fd;
+
+ /* if we're in a primary process, try to open the container */
+ if (internal_config.process_type == RTE_PROC_PRIMARY) {
+ vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR);
+ if (vfio_container_fd < 0) {
+ RTE_LOG(ERR, EAL, " cannot open VFIO container, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+
+ /* check VFIO API version */
+ ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION);
+ if (ret != VFIO_API_VERSION) {
+ if (ret < 0)
+ RTE_LOG(ERR, EAL, " could not get VFIO API version, "
+ "error %i (%s)\n", errno, strerror(errno));
+ else
+ RTE_LOG(ERR, EAL, " unsupported VFIO API version!\n");
+ close(vfio_container_fd);
+ return -1;
+ }
+
+ ret = vfio_has_supported_extensions(vfio_container_fd);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " no supported IOMMU "
+ "extensions found!\n");
+ return -1;
+ }
+
+ return vfio_container_fd;
+ } else {
+ /*
+ * if we're in a secondary process, request container fd from the
+ * primary process via our socket
+ */
+ int socket_fd;
+
+ socket_fd = vfio_mp_sync_connect_to_primary();
+ if (socket_fd < 0) {
+ RTE_LOG(ERR, EAL, " cannot connect to primary process!\n");
+ return -1;
+ }
+ if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) {
+ RTE_LOG(ERR, EAL, " cannot request container fd!\n");
+ close(socket_fd);
+ return -1;
+ }
+ vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd);
+ if (vfio_container_fd < 0) {
+ RTE_LOG(ERR, EAL, " cannot get container fd!\n");
+ close(socket_fd);
+ return -1;
+ }
+ close(socket_fd);
+ return vfio_container_fd;
+ }
+
+ return -1;
+}
+
+int
+vfio_get_group_no(const char *sysfs_base,
+ const char *dev_addr, int *iommu_group_no)
+{
+ char linkname[PATH_MAX];
+ char filename[PATH_MAX];
+ char *tok[16], *group_tok, *end;
+ int ret;
+
+ memset(linkname, 0, sizeof(linkname));
+ memset(filename, 0, sizeof(filename));
+
+ /* try to find out IOMMU group for this device */
+ snprintf(linkname, sizeof(linkname),
+ "%s/%s/iommu_group", sysfs_base, dev_addr);
+
+ ret = readlink(linkname, filename, sizeof(filename));
+
+ /* if the link doesn't exist, no VFIO for us */
+ if (ret < 0)
+ return 0;
+
+ ret = rte_strsplit(filename, sizeof(filename),
+ tok, RTE_DIM(tok), '/');
+
+ if (ret <= 0) {
+ RTE_LOG(ERR, EAL, " %s cannot get IOMMU group\n", dev_addr);
+ return -1;
+ }
+
+ /* IOMMU group is always the last token */
+ errno = 0;
+ group_tok = tok[ret - 1];
+ end = group_tok;
+ *iommu_group_no = strtol(group_tok, &end, 10);
+ if ((end != group_tok && *end != '\0') || errno != 0) {
+ RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", dev_addr);
+ return -1;
+ }
+
+ return 1;
+}
+
+static int
+vfio_type1_dma_map(int vfio_container_fd)
+{
+ const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+ int i, ret;
+
+ /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ struct vfio_iommu_type1_dma_map dma_map;
+
+ if (ms[i].addr == NULL)
+ break;
+
+ memset(&dma_map, 0, sizeof(dma_map));
+ dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ dma_map.vaddr = ms[i].addr_64;
+ dma_map.size = ms[i].len;
+ dma_map.iova = ms[i].phys_addr;
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
+
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot set up DMA remapping, "
+ "error %i (%s)\n", errno, strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
+{
+ /* No-IOMMU mode does not need DMA mapping */
+ return 0;
+}
+
+#endif
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h
index f483bf40..29f7f3ec 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h
@@ -60,6 +60,100 @@
#define RTE_VFIO_NOIOMMU VFIO_NOIOMMU_IOMMU
#endif
+#define VFIO_MAX_GROUPS 64
+
+/*
+ * Function prototypes for VFIO multiprocess sync functions
+ */
+int vfio_mp_sync_send_request(int socket, int req);
+int vfio_mp_sync_receive_request(int socket);
+int vfio_mp_sync_send_fd(int socket, int fd);
+int vfio_mp_sync_receive_fd(int socket);
+int vfio_mp_sync_connect_to_primary(void);
+
+/*
+ * we don't need to store device fd's anywhere since they can be obtained from
+ * the group fd via an ioctl() call.
+ */
+struct vfio_group {
+ int group_no;
+ int fd;
+};
+
+struct vfio_config {
+ int vfio_enabled;
+ int vfio_container_fd;
+ int vfio_container_has_dma;
+ int vfio_group_idx;
+ struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
+};
+
+#define VFIO_DIR "/dev/vfio"
+#define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
+#define VFIO_GROUP_FMT "/dev/vfio/%u"
+#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u"
+#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
+#define VFIO_GET_REGION_IDX(x) (x >> 40)
+
+/* DMA mapping function prototype.
+ * Takes VFIO container fd as a parameter.
+ * Returns 0 on success, -1 on error.
+ * */
+typedef int (*vfio_dma_func_t)(int);
+
+struct vfio_iommu_type {
+ int type_id;
+ const char *name;
+ vfio_dma_func_t dma_map_func;
+};
+
+/* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
+const struct vfio_iommu_type *
+vfio_set_iommu_type(int vfio_container_fd);
+
+/* check if we have any supported extensions */
+int
+vfio_has_supported_extensions(int vfio_container_fd);
+
+/* open container fd or get an existing one */
+int
+vfio_get_container_fd(void);
+
+/* parse IOMMU group number for a device
+ * returns 1 on success, -1 for errors, 0 for non-existent group
+ */
+int
+vfio_get_group_no(const char *sysfs_base,
+ const char *dev_addr, int *iommu_group_no);
+
+/* open group fd or get an existing one */
+int
+vfio_get_group_fd(int iommu_group_no);
+
+/**
+ * Setup vfio_cfg for the device identified by its address. It discovers
+ * the configured I/O MMU groups or sets a new one for the device. If a new
+ * groups is assigned, the DMA mapping is performed.
+ * Returns 0 on success, a negative value on failure and a positive value in
+ * case the given device cannot be managed this way.
+ */
+int vfio_setup_device(const char *sysfs_base, const char *dev_addr,
+ int *vfio_dev_fd, struct vfio_device_info *device_info);
+
+int vfio_enable(const char *modname);
+int vfio_is_enabled(const char *modname);
+
+int pci_vfio_enable(void);
+int pci_vfio_is_enabled(void);
+
+int vfio_mp_sync_setup(void);
+
+#define SOCKET_REQ_CONTAINER 0x100
+#define SOCKET_REQ_GROUP 0x200
+#define SOCKET_OK 0x0
+#define SOCKET_NO_FD 0x1
+#define SOCKET_ERR 0xFF
+
#define VFIO_PRESENT
#endif /* kernel version */
#endif /* RTE_EAL_VFIO */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
index d54ded88..00cf919b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
@@ -265,7 +265,7 @@ vfio_mp_sync_connect_to_primary(void)
* socket listening thread for primary process
*/
static __attribute__((noreturn)) void *
-pci_vfio_mp_sync_thread(void __rte_unused * arg)
+vfio_mp_sync_thread(void __rte_unused * arg)
{
int ret, fd, vfio_group_no;
@@ -296,7 +296,7 @@ pci_vfio_mp_sync_thread(void __rte_unused * arg)
switch (ret) {
case SOCKET_REQ_CONTAINER:
- fd = pci_vfio_get_container_fd();
+ fd = vfio_get_container_fd();
if (fd < 0)
vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
else
@@ -310,7 +310,7 @@ pci_vfio_mp_sync_thread(void __rte_unused * arg)
continue;
}
- fd = pci_vfio_get_group_fd(vfio_group_no);
+ fd = vfio_get_group_fd(vfio_group_no);
if (fd < 0)
vfio_mp_sync_send_request(conn_sock, SOCKET_ERR);
@@ -376,7 +376,7 @@ vfio_mp_sync_socket_setup(void)
* set up a local socket and tell it to listen for incoming connections
*/
int
-pci_vfio_mp_sync_setup(void)
+vfio_mp_sync_setup(void)
{
int ret;
char thread_name[RTE_MAX_THREAD_NAME_LEN];
@@ -387,7 +387,7 @@ pci_vfio_mp_sync_setup(void)
}
ret = pthread_create(&socket_thread, NULL,
- pci_vfio_mp_sync_thread, NULL);
+ vfio_mp_sync_thread, NULL);
if (ret) {
RTE_LOG(ERR, EAL,
"Failed to create thread for communication with secondary processes!\n");
@@ -396,7 +396,7 @@ pci_vfio_mp_sync_setup(void)
}
/* Set thread_name for aid in debugging. */
- snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "pci-vfio-sync");
+ snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "vfio-sync");
ret = rte_thread_setname(socket_thread, thread_name);
if (ret)
RTE_LOG(DEBUG, EAL,
diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
index 0b612bb1..bddbdb07 100644
--- a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
+++ b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c
@@ -167,8 +167,8 @@ rte_xen_mem_phy2mch(int32_t memseg_id, const phys_addr_t phy_addr)
if (memseg_id == -1) {
for (i = 0; i < RTE_MAX_MEMSEG; i++) {
if ((phy_addr >= memseg[i].phys_addr) &&
- (phys_addr < memseg[i].phys_addr +
- memseg[i].size)) {
+ (phy_addr < memseg[i].phys_addr +
+ memseg[i].len)) {
memseg_id = i;
break;
}
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index 05134673..a617b9e4 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -138,6 +138,7 @@ DPDK_2.2 {
rte_keepalive_mark_alive;
rte_keepalive_register_core;
rte_xen_dom0_supported;
+ rte_xen_mem_phy2mch;
} DPDK_2.1;