summaryrefslogtreecommitdiffstats
path: root/lib/librte_eal/bsdapp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/librte_eal/bsdapp')
-rw-r--r--lib/librte_eal/bsdapp/Makefile2
-rw-r--r--lib/librte_eal/bsdapp/contigmem/BSDmakefile8
-rw-r--r--lib/librte_eal/bsdapp/contigmem/Makefile24
-rw-r--r--lib/librte_eal/bsdapp/contigmem/contigmem.c353
-rw-r--r--lib/librte_eal/bsdapp/contigmem/meson.build4
-rw-r--r--lib/librte_eal/bsdapp/eal/Makefile11
-rw-r--r--lib/librte_eal/bsdapp/eal/eal.c290
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_alarm.c299
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_alarm_private.h19
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_cpuflags.c21
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_dev.c21
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_hugepage_info.c69
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_interrupts.c464
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_memalloc.c54
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_memory.c471
-rw-r--r--lib/librte_eal/bsdapp/eal/eal_thread.c2
-rw-r--r--lib/librte_eal/bsdapp/eal/meson.build5
-rw-r--r--lib/librte_eal/bsdapp/nic_uio/BSDmakefile8
-rw-r--r--lib/librte_eal/bsdapp/nic_uio/Makefile24
-rw-r--r--lib/librte_eal/bsdapp/nic_uio/meson.build4
-rw-r--r--lib/librte_eal/bsdapp/nic_uio/nic_uio.c350
21 files changed, 1558 insertions, 945 deletions
diff --git a/lib/librte_eal/bsdapp/Makefile b/lib/librte_eal/bsdapp/Makefile
index 9d8e2477..5b06b216 100644
--- a/lib/librte_eal/bsdapp/Makefile
+++ b/lib/librte_eal/bsdapp/Makefile
@@ -4,7 +4,5 @@
include $(RTE_SDK)/mk/rte.vars.mk
DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal
-DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += contigmem
-DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += nic_uio
include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/lib/librte_eal/bsdapp/contigmem/BSDmakefile b/lib/librte_eal/bsdapp/contigmem/BSDmakefile
deleted file mode 100644
index 33ce83ee..00000000
--- a/lib/librte_eal/bsdapp/contigmem/BSDmakefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2010-2014 Intel Corporation
-#
-
-KMOD= contigmem
-SRCS= contigmem.c device_if.h bus_if.h
-
-.include <bsd.kmod.mk>
diff --git a/lib/librte_eal/bsdapp/contigmem/Makefile b/lib/librte_eal/bsdapp/contigmem/Makefile
deleted file mode 100644
index 428a7ede..00000000
--- a/lib/librte_eal/bsdapp/contigmem/Makefile
+++ /dev/null
@@ -1,24 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2010-2014 Intel Corporation
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-#
-# module name and path
-#
-MODULE = contigmem
-
-#
-# CFLAGS
-#
-MODULE_CFLAGS += -I$(SRCDIR)
-MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
-MODULE_CFLAGS += -Winline -Wall -Werror
-MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
-
-#
-# all source are stored in SRCS-y
-#
-SRCS-y := contigmem.c
-
-include $(RTE_SDK)/mk/rte.bsdmodule.mk
diff --git a/lib/librte_eal/bsdapp/contigmem/contigmem.c b/lib/librte_eal/bsdapp/contigmem/contigmem.c
deleted file mode 100644
index 1715b5dc..00000000
--- a/lib/librte_eal/bsdapp/contigmem/contigmem.c
+++ /dev/null
@@ -1,353 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/bio.h>
-#include <sys/bus.h>
-#include <sys/conf.h>
-#include <sys/kernel.h>
-#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/proc.h>
-#include <sys/rwlock.h>
-#include <sys/systm.h>
-#include <sys/sysctl.h>
-#include <sys/vmmeter.h>
-
-#include <machine/bus.h>
-
-#include <vm/vm.h>
-#include <vm/pmap.h>
-#include <vm/vm_param.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pager.h>
-#include <vm/vm_phys.h>
-
-struct contigmem_buffer {
- void *addr;
- int refcnt;
- struct mtx mtx;
-};
-
-struct contigmem_vm_handle {
- int buffer_index;
-};
-
-static int contigmem_load(void);
-static int contigmem_unload(void);
-static int contigmem_physaddr(SYSCTL_HANDLER_ARGS);
-
-static d_mmap_single_t contigmem_mmap_single;
-static d_open_t contigmem_open;
-static d_close_t contigmem_close;
-
-static int contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS;
-static int64_t contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE;
-
-static eventhandler_tag contigmem_eh_tag;
-static struct contigmem_buffer contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS];
-static struct cdev *contigmem_cdev = NULL;
-static int contigmem_refcnt;
-
-TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers);
-TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size);
-
-static SYSCTL_NODE(_hw, OID_AUTO, contigmem, CTLFLAG_RD, 0, "contigmem");
-
-SYSCTL_INT(_hw_contigmem, OID_AUTO, num_buffers, CTLFLAG_RD,
- &contigmem_num_buffers, 0, "Number of contigmem buffers allocated");
-SYSCTL_QUAD(_hw_contigmem, OID_AUTO, buffer_size, CTLFLAG_RD,
- &contigmem_buffer_size, 0, "Size of each contiguous buffer");
-SYSCTL_INT(_hw_contigmem, OID_AUTO, num_references, CTLFLAG_RD,
- &contigmem_refcnt, 0, "Number of references to contigmem");
-
-static SYSCTL_NODE(_hw_contigmem, OID_AUTO, physaddr, CTLFLAG_RD, 0,
- "physaddr");
-
-MALLOC_DEFINE(M_CONTIGMEM, "contigmem", "contigmem(4) allocations");
-
-static int contigmem_modevent(module_t mod, int type, void *arg)
-{
- int error = 0;
-
- switch (type) {
- case MOD_LOAD:
- error = contigmem_load();
- break;
- case MOD_UNLOAD:
- error = contigmem_unload();
- break;
- default:
- break;
- }
-
- return error;
-}
-
-moduledata_t contigmem_mod = {
- "contigmem",
- (modeventhand_t)contigmem_modevent,
- 0
-};
-
-DECLARE_MODULE(contigmem, contigmem_mod, SI_SUB_DRIVERS, SI_ORDER_ANY);
-MODULE_VERSION(contigmem, 1);
-
-static struct cdevsw contigmem_ops = {
- .d_name = "contigmem",
- .d_version = D_VERSION,
- .d_flags = D_TRACKCLOSE,
- .d_mmap_single = contigmem_mmap_single,
- .d_open = contigmem_open,
- .d_close = contigmem_close,
-};
-
-static int
-contigmem_load()
-{
- char index_string[8], description[32];
- int i, error = 0;
- void *addr;
-
- if (contigmem_num_buffers > RTE_CONTIGMEM_MAX_NUM_BUFS) {
- printf("%d buffers requested is greater than %d allowed\n",
- contigmem_num_buffers, RTE_CONTIGMEM_MAX_NUM_BUFS);
- error = EINVAL;
- goto error;
- }
-
- if (contigmem_buffer_size < PAGE_SIZE ||
- (contigmem_buffer_size & (contigmem_buffer_size - 1)) != 0) {
- printf("buffer size 0x%lx is not greater than PAGE_SIZE and "
- "power of two\n", contigmem_buffer_size);
- error = EINVAL;
- goto error;
- }
-
- for (i = 0; i < contigmem_num_buffers; i++) {
- addr = contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO,
- 0, BUS_SPACE_MAXADDR, contigmem_buffer_size, 0);
- if (addr == NULL) {
- printf("contigmalloc failed for buffer %d\n", i);
- error = ENOMEM;
- goto error;
- }
-
- printf("%2u: virt=%p phys=%p\n", i, addr,
- (void *)pmap_kextract((vm_offset_t)addr));
-
- mtx_init(&contigmem_buffers[i].mtx, "contigmem", NULL, MTX_DEF);
- contigmem_buffers[i].addr = addr;
- contigmem_buffers[i].refcnt = 0;
-
- snprintf(index_string, sizeof(index_string), "%d", i);
- snprintf(description, sizeof(description),
- "phys addr for buffer %d", i);
- SYSCTL_ADD_PROC(NULL,
- &SYSCTL_NODE_CHILDREN(_hw_contigmem, physaddr), OID_AUTO,
- index_string, CTLTYPE_U64 | CTLFLAG_RD,
- (void *)(uintptr_t)i, 0, contigmem_physaddr, "LU",
- description);
- }
-
- contigmem_cdev = make_dev_credf(0, &contigmem_ops, 0, NULL, UID_ROOT,
- GID_WHEEL, 0600, "contigmem");
-
- return 0;
-
-error:
- for (i = 0; i < contigmem_num_buffers; i++) {
- if (contigmem_buffers[i].addr != NULL)
- contigfree(contigmem_buffers[i].addr,
- contigmem_buffer_size, M_CONTIGMEM);
- if (mtx_initialized(&contigmem_buffers[i].mtx))
- mtx_destroy(&contigmem_buffers[i].mtx);
- }
-
- return error;
-}
-
-static int
-contigmem_unload()
-{
- int i;
-
- if (contigmem_refcnt > 0)
- return EBUSY;
-
- if (contigmem_cdev != NULL)
- destroy_dev(contigmem_cdev);
-
- if (contigmem_eh_tag != NULL)
- EVENTHANDLER_DEREGISTER(process_exit, contigmem_eh_tag);
-
- for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++) {
- if (contigmem_buffers[i].addr != NULL)
- contigfree(contigmem_buffers[i].addr,
- contigmem_buffer_size, M_CONTIGMEM);
- if (mtx_initialized(&contigmem_buffers[i].mtx))
- mtx_destroy(&contigmem_buffers[i].mtx);
- }
-
- return 0;
-}
-
-static int
-contigmem_physaddr(SYSCTL_HANDLER_ARGS)
-{
- uint64_t physaddr;
- int index = (int)(uintptr_t)arg1;
-
- physaddr = (uint64_t)vtophys(contigmem_buffers[index].addr);
- return sysctl_handle_64(oidp, &physaddr, 0, req);
-}
-
-static int
-contigmem_open(struct cdev *cdev, int fflags, int devtype,
- struct thread *td)
-{
-
- atomic_add_int(&contigmem_refcnt, 1);
-
- return 0;
-}
-
-static int
-contigmem_close(struct cdev *cdev, int fflags, int devtype,
- struct thread *td)
-{
-
- atomic_subtract_int(&contigmem_refcnt, 1);
-
- return 0;
-}
-
-static int
-contigmem_cdev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
- vm_ooffset_t foff, struct ucred *cred, u_short *color)
-{
- struct contigmem_vm_handle *vmh = handle;
- struct contigmem_buffer *buf;
-
- buf = &contigmem_buffers[vmh->buffer_index];
-
- atomic_add_int(&contigmem_refcnt, 1);
-
- mtx_lock(&buf->mtx);
- if (buf->refcnt == 0)
- memset(buf->addr, 0, contigmem_buffer_size);
- buf->refcnt++;
- mtx_unlock(&buf->mtx);
-
- return 0;
-}
-
-static void
-contigmem_cdev_pager_dtor(void *handle)
-{
- struct contigmem_vm_handle *vmh = handle;
- struct contigmem_buffer *buf;
-
- buf = &contigmem_buffers[vmh->buffer_index];
-
- mtx_lock(&buf->mtx);
- buf->refcnt--;
- mtx_unlock(&buf->mtx);
-
- free(vmh, M_CONTIGMEM);
-
- atomic_subtract_int(&contigmem_refcnt, 1);
-}
-
-static int
-contigmem_cdev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot,
- vm_page_t *mres)
-{
- vm_paddr_t paddr;
- vm_page_t m_paddr, page;
- vm_memattr_t memattr, memattr1;
-
- memattr = object->memattr;
-
- VM_OBJECT_WUNLOCK(object);
-
- paddr = offset;
-
- m_paddr = vm_phys_paddr_to_vm_page(paddr);
- if (m_paddr != NULL) {
- memattr1 = pmap_page_get_memattr(m_paddr);
- if (memattr1 != memattr)
- memattr = memattr1;
- }
-
- if (((*mres)->flags & PG_FICTITIOUS) != 0) {
- /*
- * If the passed in result page is a fake page, update it with
- * the new physical address.
- */
- page = *mres;
- VM_OBJECT_WLOCK(object);
- vm_page_updatefake(page, paddr, memattr);
- } else {
- vm_page_t mret;
- /*
- * Replace the passed in reqpage page with our own fake page and
- * free up the original page.
- */
- page = vm_page_getfake(paddr, memattr);
- VM_OBJECT_WLOCK(object);
- mret = vm_page_replace(page, object, (*mres)->pindex);
- KASSERT(mret == *mres,
- ("invalid page replacement, old=%p, ret=%p", *mres, mret));
- vm_page_lock(mret);
- vm_page_free(mret);
- vm_page_unlock(mret);
- *mres = page;
- }
-
- page->valid = VM_PAGE_BITS_ALL;
-
- return VM_PAGER_OK;
-}
-
-static struct cdev_pager_ops contigmem_cdev_pager_ops = {
- .cdev_pg_ctor = contigmem_cdev_pager_ctor,
- .cdev_pg_dtor = contigmem_cdev_pager_dtor,
- .cdev_pg_fault = contigmem_cdev_pager_fault,
-};
-
-static int
-contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
- struct vm_object **obj, int nprot)
-{
- struct contigmem_vm_handle *vmh;
- uint64_t buffer_index;
-
- /*
- * The buffer index is encoded in the offset. Divide the offset by
- * PAGE_SIZE to get the index of the buffer requested by the user
- * app.
- */
- buffer_index = *offset / PAGE_SIZE;
- if (buffer_index >= contigmem_num_buffers)
- return EINVAL;
-
- if (size > contigmem_buffer_size)
- return EINVAL;
-
- vmh = malloc(sizeof(*vmh), M_CONTIGMEM, M_NOWAIT | M_ZERO);
- if (vmh == NULL)
- return ENOMEM;
- vmh->buffer_index = buffer_index;
-
- *offset = (vm_ooffset_t)vtophys(contigmem_buffers[buffer_index].addr);
- *obj = cdev_pager_allocate(vmh, OBJT_DEVICE, &contigmem_cdev_pager_ops,
- size, nprot, *offset, curthread->td_ucred);
-
- return 0;
-}
diff --git a/lib/librte_eal/bsdapp/contigmem/meson.build b/lib/librte_eal/bsdapp/contigmem/meson.build
deleted file mode 100644
index 8fb2ab78..00000000
--- a/lib/librte_eal/bsdapp/contigmem/meson.build
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2017 Intel Corporation
-
-sources = files('contigmem.c')
diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile
index dd455e67..d27da3d1 100644
--- a/lib/librte_eal/bsdapp/eal/Makefile
+++ b/lib/librte_eal/bsdapp/eal/Makefile
@@ -18,21 +18,25 @@ CFLAGS += $(WERROR_FLAGS) -O3
LDLIBS += -lexecinfo
LDLIBS += -lpthread
LDLIBS += -lgcc_s
+LDLIBS += -lrte_kvargs
EXPORT_MAP := ../../rte_eal_version.map
-LIBABIVER := 6
+LIBABIVER := 8
# specific to bsdapp exec-env
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_cpuflags.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memalloc.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_interrupts.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_alarm.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_dev.c
# from common dir
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_lcore.c
@@ -40,6 +44,7 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memalloc.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_tailqs.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_errno.c
@@ -48,14 +53,18 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hypervisor.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_string_fns.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hexdump.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_devargs.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_class.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_bus.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_dev.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_options.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_fbarray.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_uuid.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_mp.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_reciprocal.c
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index 4eafcb5a..d7ae9d68 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -18,6 +18,7 @@
#include <limits.h>
#include <sys/mman.h>
#include <sys/queue.h>
+#include <sys/stat.h>
#include <rte_compat.h>
#include <rte_common.h>
@@ -40,6 +41,7 @@
#include <rte_dev.h>
#include <rte_devargs.h>
#include <rte_version.h>
+#include <rte_vfio.h>
#include <rte_atomic.h>
#include <malloc_heap.h>
@@ -64,8 +66,8 @@ static int mem_cfg_fd = -1;
static struct flock wr_lock = {
.l_type = F_WRLCK,
.l_whence = SEEK_SET,
- .l_start = offsetof(struct rte_mem_config, memseg),
- .l_len = sizeof(early_mem_config.memseg),
+ .l_start = offsetof(struct rte_mem_config, memsegs),
+ .l_len = sizeof(early_mem_config.memsegs),
};
/* Address of global and public configuration */
@@ -82,20 +84,72 @@ struct internal_config internal_config;
/* used by rte_rdtsc() */
int rte_cycles_vmware_tsc_map;
-/* Return user provided mbuf pool ops name */
-const char * __rte_experimental
-rte_eal_mbuf_user_pool_ops(void)
-{
- return internal_config.user_mbuf_pool_ops_name;
+/* platform-specific runtime dir */
+static char runtime_dir[PATH_MAX];
+
+static const char *default_runtime_dir = "/var/run";
+
+int
+eal_create_runtime_dir(void)
+{
+ const char *directory = default_runtime_dir;
+ const char *xdg_runtime_dir = getenv("XDG_RUNTIME_DIR");
+ const char *fallback = "/tmp";
+ char tmp[PATH_MAX];
+ int ret;
+
+ if (getuid() != 0) {
+ /* try XDG path first, fall back to /tmp */
+ if (xdg_runtime_dir != NULL)
+ directory = xdg_runtime_dir;
+ else
+ directory = fallback;
+ }
+ /* create DPDK subdirectory under runtime dir */
+ ret = snprintf(tmp, sizeof(tmp), "%s/dpdk", directory);
+ if (ret < 0 || ret == sizeof(tmp)) {
+ RTE_LOG(ERR, EAL, "Error creating DPDK runtime path name\n");
+ return -1;
+ }
+
+ /* create prefix-specific subdirectory under DPDK runtime dir */
+ ret = snprintf(runtime_dir, sizeof(runtime_dir), "%s/%s",
+ tmp, internal_config.hugefile_prefix);
+ if (ret < 0 || ret == sizeof(runtime_dir)) {
+ RTE_LOG(ERR, EAL, "Error creating prefix-specific runtime path name\n");
+ return -1;
+ }
+
+ /* create the path if it doesn't exist. no "mkdir -p" here, so do it
+ * step by step.
+ */
+ ret = mkdir(tmp, 0700);
+ if (ret < 0 && errno != EEXIST) {
+ RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
+ tmp, strerror(errno));
+ return -1;
+ }
+
+ ret = mkdir(runtime_dir, 0700);
+ if (ret < 0 && errno != EEXIST) {
+ RTE_LOG(ERR, EAL, "Error creating '%s': %s\n",
+ runtime_dir, strerror(errno));
+ return -1;
+ }
+
+ return 0;
}
-/* Return mbuf pool ops name */
const char *
-rte_eal_mbuf_default_mempool_ops(void)
+eal_get_runtime_dir(void)
{
- if (internal_config.user_mbuf_pool_ops_name == NULL)
- return RTE_MBUF_DEFAULT_MEMPOOL_OPS;
+ return runtime_dir;
+}
+/* Return user provided mbuf pool ops name */
+const char *
+rte_eal_mbuf_user_pool_ops(void)
+{
return internal_config.user_mbuf_pool_ops_name;
}
@@ -222,12 +276,17 @@ eal_proc_type_detect(void)
enum rte_proc_type_t ptype = RTE_PROC_PRIMARY;
const char *pathname = eal_runtime_config_path();
- /* if we can open the file but not get a write-lock we are a secondary
- * process. NOTE: if we get a file handle back, we keep that open
- * and don't close it to prevent a race condition between multiple opens */
- if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
- (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
- ptype = RTE_PROC_SECONDARY;
+ /* if there no shared config, there can be no secondary processes */
+ if (!internal_config.no_shconf) {
+ /* if we can open the file but not get a write-lock we are a
+ * secondary process. NOTE: if we get a file handle back, we
+ * keep that open and don't close it to prevent a race condition
+ * between multiple opens.
+ */
+ if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) &&
+ (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0))
+ ptype = RTE_PROC_SECONDARY;
+ }
RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n",
ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY");
@@ -289,7 +348,7 @@ eal_get_hugepage_mem_size(void)
for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
struct hugepage_info *hpi = &internal_config.hugepage_info[i];
- if (hpi->hugedir != NULL) {
+ if (strnlen(hpi->hugedir, sizeof(hpi->hugedir)) != 0) {
for (j = 0; j < RTE_MAX_NUMA_NODES; j++) {
size += hpi->hugepage_sz * hpi->num_pages[j];
}
@@ -379,7 +438,8 @@ eal_parse_args(int argc, char **argv)
switch (opt) {
case OPT_MBUF_POOL_OPS_NAME_NUM:
- internal_config.user_mbuf_pool_ops_name = optarg;
+ internal_config.user_mbuf_pool_ops_name =
+ strdup(optarg);
break;
case 'h':
eal_usage(prgname);
@@ -403,6 +463,14 @@ eal_parse_args(int argc, char **argv)
}
}
+ /* create runtime data directory */
+ if (internal_config.no_shconf == 0 &&
+ eal_create_runtime_dir() < 0) {
+ RTE_LOG(ERR, EAL, "Cannot create runtime directory\n");
+ ret = -1;
+ goto out;
+ }
+
if (eal_adjust_config(&internal_config) != 0) {
ret = -1;
goto out;
@@ -429,25 +497,29 @@ out:
return ret;
}
+static int
+check_socket(const struct rte_memseg_list *msl, void *arg)
+{
+ int *socket_id = arg;
+
+ if (msl->socket_id == *socket_id && msl->memseg_arr.count != 0)
+ return 1;
+
+ return 0;
+}
+
static void
eal_check_mem_on_local_socket(void)
{
- const struct rte_memseg *ms;
- int i, socket_id;
+ int socket_id;
socket_id = rte_lcore_to_socket_id(rte_config.master_lcore);
- ms = rte_eal_get_physmem_layout();
-
- for (i = 0; i < RTE_MAX_MEMSEG; i++)
- if (ms[i].socket_id == socket_id &&
- ms[i].len > 0)
- return;
-
- RTE_LOG(WARNING, EAL, "WARNING: Master core has no "
- "memory on local socket!\n");
+ if (rte_memseg_list_walk(check_socket, &socket_id) == 0)
+ RTE_LOG(WARNING, EAL, "WARNING: Master core has no memory on local socket!\n");
}
+
static int
sync_func(__attribute__((unused)) void *arg)
{
@@ -531,6 +603,9 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ /* FreeBSD always uses legacy memory model */
+ internal_config.legacy_mem = true;
+
if (eal_plugins_init() < 0) {
rte_eal_init_alert("Cannot init plugins\n");
rte_errno = EINVAL;
@@ -544,6 +619,24 @@ rte_eal_init(int argc, char **argv)
return -1;
}
+ rte_config_init();
+
+ if (rte_eal_intr_init() < 0) {
+ rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+ return -1;
+ }
+
+ /* Put mp channel init before bus scan so that we can init the vdev
+ * bus through mp channel in the secondary process before the bus scan.
+ */
+ if (rte_mp_channel_init() < 0) {
+ rte_eal_init_alert("failed to init mp channel\n");
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ rte_errno = EFAULT;
+ return -1;
+ }
+ }
+
if (rte_bus_scan()) {
rte_eal_init_alert("Cannot scan the buses for devices\n");
rte_errno = ENODEV;
@@ -554,13 +647,17 @@ rte_eal_init(int argc, char **argv)
/* autodetect the iova mapping mode (default is iova_pa) */
rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class();
- if (internal_config.no_hugetlbfs == 0 &&
- internal_config.process_type != RTE_PROC_SECONDARY &&
- eal_hugepage_info_init() < 0) {
- rte_eal_init_alert("Cannot get hugepage information.");
- rte_errno = EACCES;
- rte_atomic32_clear(&run_once);
- return -1;
+ if (internal_config.no_hugetlbfs == 0) {
+ /* rte_config isn't initialized yet */
+ ret = internal_config.process_type == RTE_PROC_PRIMARY ?
+ eal_hugepage_info_init() :
+ eal_hugepage_info_read();
+ if (ret < 0) {
+ rte_eal_init_alert("Cannot get hugepage information.");
+ rte_errno = EACCES;
+ rte_atomic32_clear(&run_once);
+ return -1;
+ }
}
if (internal_config.memory == 0 && internal_config.force_sockets == 0) {
@@ -583,14 +680,14 @@ rte_eal_init(int argc, char **argv)
rte_srand(rte_rdtsc());
- rte_config_init();
-
- if (rte_mp_channel_init() < 0) {
- rte_eal_init_alert("failed to init mp channel\n");
- if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
- rte_errno = EFAULT;
- return -1;
- }
+ /* in secondary processes, memory init may allocate additional fbarrays
+ * not present in primary processes, so to avoid any potential issues,
+ * initialize memzones first.
+ */
+ if (rte_eal_memzone_init() < 0) {
+ rte_eal_init_alert("Cannot init memzone\n");
+ rte_errno = ENODEV;
+ return -1;
}
if (rte_eal_memory_init() < 0) {
@@ -599,8 +696,8 @@ rte_eal_init(int argc, char **argv)
return -1;
}
- if (rte_eal_memzone_init() < 0) {
- rte_eal_init_alert("Cannot init memzone\n");
+ if (rte_eal_malloc_heap_init() < 0) {
+ rte_eal_init_alert("Cannot init malloc heap\n");
rte_errno = ENODEV;
return -1;
}
@@ -617,11 +714,6 @@ rte_eal_init(int argc, char **argv)
return -1;
}
- if (rte_eal_intr_init() < 0) {
- rte_eal_init_alert("Cannot init interrupt-handling thread\n");
- return -1;
- }
-
if (rte_eal_timer_init() < 0) {
rte_eal_init_alert("Cannot init HPET or TSC timers\n");
rte_errno = ENOTSUP;
@@ -632,7 +724,7 @@ rte_eal_init(int argc, char **argv)
eal_thread_init_master(rte_config.master_lcore);
- ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+ ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
rte_config.master_lcore, thread_id, cpuset,
@@ -658,7 +750,7 @@ rte_eal_init(int argc, char **argv)
rte_panic("Cannot create thread\n");
/* Set thread_name for aid in debugging. */
- snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
+ snprintf(thread_name, sizeof(thread_name),
"lcore-slave-%d", i);
rte_thread_setname(lcore_config[i].thread_id, thread_name);
}
@@ -735,18 +827,6 @@ rte_eal_vfio_intr_mode(void)
return RTE_INTR_MODE_NONE;
}
-/* dummy forward declaration. */
-struct vfio_device_info;
-
-/* dummy prototypes. */
-int rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,
- int *vfio_dev_fd, struct vfio_device_info *device_info);
-int rte_vfio_release_device(const char *sysfs_base, const char *dev_addr, int fd);
-int rte_vfio_enable(const char *modname);
-int rte_vfio_is_enabled(const char *modname);
-int rte_vfio_noiommu_is_enabled(void);
-int rte_vfio_clear_group(int vfio_group_fd);
-
int rte_vfio_setup_device(__rte_unused const char *sysfs_base,
__rte_unused const char *dev_addr,
__rte_unused int *vfio_dev_fd,
@@ -781,3 +861,81 @@ int rte_vfio_clear_group(__rte_unused int vfio_group_fd)
{
return 0;
}
+
+int
+rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
+int
+rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
+int
+rte_vfio_get_group_num(__rte_unused const char *sysfs_base,
+ __rte_unused const char *dev_addr,
+ __rte_unused int *iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_get_container_fd(void)
+{
+ return -1;
+}
+
+int
+rte_vfio_get_group_fd(__rte_unused int iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_create(void)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_destroy(__rte_unused int container_fd)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_group_bind(__rte_unused int container_fd,
+ __rte_unused int iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_group_unbind(__rte_unused int container_fd,
+ __rte_unused int iommu_group_num)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_dma_map(__rte_unused int container_fd,
+ __rte_unused uint64_t vaddr,
+ __rte_unused uint64_t iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
+
+int
+rte_vfio_container_dma_unmap(__rte_unused int container_fd,
+ __rte_unused uint64_t vaddr,
+ __rte_unused uint64_t iova,
+ __rte_unused uint64_t len)
+{
+ return -1;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm.c b/lib/librte_eal/bsdapp/eal/eal_alarm.c
index eb3913c9..51ea4b8c 100644
--- a/lib/librte_eal/bsdapp/eal/eal_alarm.c
+++ b/lib/librte_eal/bsdapp/eal/eal_alarm.c
@@ -1,31 +1,314 @@
/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
+#include <time.h>
#include <errno.h>
#include <rte_alarm.h>
+#include <rte_cycles.h>
#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_interrupts.h>
+#include <rte_spinlock.h>
+
#include "eal_private.h"
+#include "eal_alarm_private.h"
+
+#define NS_PER_US 1000
+
+#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW
+#else
+#define CLOCK_TYPE_ID CLOCK_MONOTONIC
+#endif
+
+struct alarm_entry {
+ LIST_ENTRY(alarm_entry) next;
+ struct rte_intr_handle handle;
+ struct timespec time;
+ rte_eal_alarm_callback cb_fn;
+ void *cb_arg;
+ volatile uint8_t executing;
+ volatile pthread_t executing_id;
+};
+
+static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER();
+static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER;
+
+static struct rte_intr_handle intr_handle = {.fd = -1 };
+static void eal_alarm_callback(void *arg);
int
rte_eal_alarm_init(void)
{
+ intr_handle.type = RTE_INTR_HANDLE_ALARM;
+
+ /* on FreeBSD, timers don't use fd's, and their identifiers are stored
+ * in separate namespace from fd's, so using any value is OK. however,
+ * EAL interrupts handler expects fd's to be unique, so use an actual fd
+ * to guarantee unique timer identifier.
+ */
+ intr_handle.fd = open("/dev/zero", O_RDONLY);
+
+ return 0;
+}
+
+static inline int
+timespec_cmp(const struct timespec *now, const struct timespec *at)
+{
+ if (now->tv_sec < at->tv_sec)
+ return -1;
+ if (now->tv_sec > at->tv_sec)
+ return 1;
+ if (now->tv_nsec < at->tv_nsec)
+ return -1;
+ if (now->tv_nsec > at->tv_nsec)
+ return 1;
return 0;
}
+static inline uint64_t
+diff_ns(struct timespec *now, struct timespec *at)
+{
+ uint64_t now_ns, at_ns;
+
+ if (timespec_cmp(now, at) >= 0)
+ return 0;
+
+ now_ns = now->tv_sec * NS_PER_S + now->tv_nsec;
+ at_ns = at->tv_sec * NS_PER_S + at->tv_nsec;
+
+ return at_ns - now_ns;
+}
int
-rte_eal_alarm_set(uint64_t us __rte_unused,
- rte_eal_alarm_callback cb_fn __rte_unused,
- void *cb_arg __rte_unused)
+eal_alarm_get_timeout_ns(uint64_t *val)
{
- return -ENOTSUP;
+ struct alarm_entry *ap;
+ struct timespec now;
+
+ if (clock_gettime(CLOCK_TYPE_ID, &now) < 0)
+ return -1;
+
+ if (LIST_EMPTY(&alarm_list))
+ return -1;
+
+ ap = LIST_FIRST(&alarm_list);
+
+ *val = diff_ns(&now, &ap->time);
+
+ return 0;
+}
+
+static int
+unregister_current_callback(void)
+{
+ struct alarm_entry *ap;
+ int ret = 0;
+
+ if (!LIST_EMPTY(&alarm_list)) {
+ ap = LIST_FIRST(&alarm_list);
+
+ do {
+ ret = rte_intr_callback_unregister(&intr_handle,
+ eal_alarm_callback, &ap->time);
+ } while (ret == -EAGAIN);
+ }
+
+ return ret;
}
+static int
+register_first_callback(void)
+{
+ struct alarm_entry *ap;
+ int ret = 0;
+
+ if (!LIST_EMPTY(&alarm_list)) {
+ ap = LIST_FIRST(&alarm_list);
+
+ /* register a new callback */
+ ret = rte_intr_callback_register(&intr_handle,
+ eal_alarm_callback, &ap->time);
+ }
+ return ret;
+}
+
+static void
+eal_alarm_callback(void *arg __rte_unused)
+{
+ struct timespec now;
+ struct alarm_entry *ap;
+
+ rte_spinlock_lock(&alarm_list_lk);
+ ap = LIST_FIRST(&alarm_list);
+
+ if (clock_gettime(CLOCK_TYPE_ID, &now) < 0)
+ return;
+
+ while (ap != NULL && timespec_cmp(&now, &ap->time) >= 0) {
+ ap->executing = 1;
+ ap->executing_id = pthread_self();
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ ap->cb_fn(ap->cb_arg);
+
+ rte_spinlock_lock(&alarm_list_lk);
+
+ LIST_REMOVE(ap, next);
+ free(ap);
+
+ ap = LIST_FIRST(&alarm_list);
+ }
+
+ /* timer has been deleted from the kqueue, so recreate it if needed */
+ register_first_callback();
+
+ rte_spinlock_unlock(&alarm_list_lk);
+}
+
+
int
-rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn __rte_unused,
- void *cb_arg __rte_unused)
+rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg)
{
- return -ENOTSUP;
+ struct alarm_entry *ap, *new_alarm;
+ struct timespec now;
+ uint64_t ns;
+ int ret = 0;
+
+ /* check parameters, also ensure us won't cause a uint64_t overflow */
+ if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL)
+ return -EINVAL;
+
+ new_alarm = calloc(1, sizeof(*new_alarm));
+ if (new_alarm == NULL)
+ return -ENOMEM;
+
+ /* use current time to calculate absolute time of alarm */
+ clock_gettime(CLOCK_TYPE_ID, &now);
+
+ ns = us * NS_PER_US;
+
+ new_alarm->cb_fn = cb_fn;
+ new_alarm->cb_arg = cb_arg;
+ new_alarm->time.tv_nsec = (now.tv_nsec + ns) % NS_PER_S;
+ new_alarm->time.tv_sec = now.tv_sec + ((now.tv_nsec + ns) / NS_PER_S);
+
+ rte_spinlock_lock(&alarm_list_lk);
+
+ if (LIST_EMPTY(&alarm_list))
+ LIST_INSERT_HEAD(&alarm_list, new_alarm, next);
+ else {
+ LIST_FOREACH(ap, &alarm_list, next) {
+ if (timespec_cmp(&new_alarm->time, &ap->time) < 0) {
+ LIST_INSERT_BEFORE(ap, new_alarm, next);
+ break;
+ }
+ if (LIST_NEXT(ap, next) == NULL) {
+ LIST_INSERT_AFTER(ap, new_alarm, next);
+ break;
+ }
+ }
+ }
+
+ /* re-register first callback just in case */
+ register_first_callback();
+
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ return ret;
+}
+
+int
+rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg)
+{
+ struct alarm_entry *ap, *ap_prev;
+ int count = 0;
+ int err = 0;
+ int executing;
+
+ if (!cb_fn) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ do {
+ executing = 0;
+ rte_spinlock_lock(&alarm_list_lk);
+ /* remove any matches at the start of the list */
+ while (1) {
+ ap = LIST_FIRST(&alarm_list);
+ if (ap == NULL)
+ break;
+ if (cb_fn != ap->cb_fn)
+ break;
+ if (cb_arg != ap->cb_arg && cb_arg != (void *) -1)
+ break;
+ if (ap->executing == 0) {
+ LIST_REMOVE(ap, next);
+ free(ap);
+ count++;
+ } else {
+ /* If calling from other context, mark that
+ * alarm is executing so loop can spin till it
+ * finish. Otherwise we are trying to cancel
+ * ourselves - mark it by EINPROGRESS.
+ */
+ if (pthread_equal(ap->executing_id,
+ pthread_self()) == 0)
+ executing++;
+ else
+ err = EINPROGRESS;
+
+ break;
+ }
+ }
+ ap_prev = ap;
+
+ /* now go through list, removing entries not at start */
+ LIST_FOREACH(ap, &alarm_list, next) {
+ /* this won't be true first time through */
+ if (cb_fn == ap->cb_fn &&
+ (cb_arg == (void *)-1 ||
+ cb_arg == ap->cb_arg)) {
+ if (ap->executing == 0) {
+ LIST_REMOVE(ap, next);
+ free(ap);
+ count++;
+ ap = ap_prev;
+ } else if (pthread_equal(ap->executing_id,
+ pthread_self()) == 0) {
+ executing++;
+ } else {
+ err = EINPROGRESS;
+ }
+ }
+ ap_prev = ap;
+ }
+ rte_spinlock_unlock(&alarm_list_lk);
+ } while (executing != 0);
+
+ if (count == 0 && err == 0)
+ rte_errno = ENOENT;
+ else if (err)
+ rte_errno = err;
+
+ rte_spinlock_lock(&alarm_list_lk);
+
+ /* unregister if no alarms left, otherwise re-register first */
+ if (LIST_EMPTY(&alarm_list))
+ unregister_current_callback();
+ else
+ register_first_callback();
+
+ rte_spinlock_unlock(&alarm_list_lk);
+
+ return count;
}
diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm_private.h b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h
new file mode 100644
index 00000000..65c71151
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_alarm_private.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef EAL_ALARM_PRIVATE_H
+#define EAL_ALARM_PRIVATE_H
+
+#include <inttypes.h>
+
+/*
+ * FreeBSD needs a back-channel communication mechanism between interrupt and
+ * alarm thread, because on FreeBSD, timer period is set up inside the interrupt
+ * API and not inside alarm API like on Linux.
+ */
+
+int
+eal_alarm_get_timeout_ns(uint64_t *val);
+
+#endif // EAL_ALARM_PRIVATE_H
diff --git a/lib/librte_eal/bsdapp/eal/eal_cpuflags.c b/lib/librte_eal/bsdapp/eal/eal_cpuflags.c
new file mode 100644
index 00000000..69b161ea
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_cpuflags.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <rte_common.h>
+#include <rte_cpuflags.h>
+
+unsigned long
+rte_cpu_getauxval(unsigned long type __rte_unused)
+{
+ /* not implemented */
+ return 0;
+}
+
+int
+rte_cpu_strcmp_auxval(unsigned long type __rte_unused,
+ const char *str __rte_unused)
+{
+ /* not implemented */
+ return -1;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_dev.c b/lib/librte_eal/bsdapp/eal/eal_dev.c
new file mode 100644
index 00000000..1c6c51bd
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_dev.c
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_log.h>
+#include <rte_compat.h>
+#include <rte_dev.h>
+
+int __rte_experimental
+rte_dev_event_monitor_start(void)
+{
+ RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
+ return -1;
+}
+
+int __rte_experimental
+rte_dev_event_monitor_stop(void)
+{
+ RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
+ return -1;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
index be2dbf0e..1e8f5df2 100644
--- a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
+++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c
@@ -19,10 +19,10 @@
* Used in this file to store the hugepage file map on disk
*/
static void *
-create_shared_memory(const char *filename, const size_t mem_size)
+map_shared_memory(const char *filename, const size_t mem_size, int flags)
{
void *retval;
- int fd = open(filename, O_CREAT | O_RDWR, 0666);
+ int fd = open(filename, flags, 0666);
if (fd < 0)
return NULL;
if (ftruncate(fd, mem_size) < 0) {
@@ -34,6 +34,18 @@ create_shared_memory(const char *filename, const size_t mem_size)
return retval;
}
+static void *
+open_shared_memory(const char *filename, const size_t mem_size)
+{
+ return map_shared_memory(filename, mem_size, O_RDWR);
+}
+
+static void *
+create_shared_memory(const char *filename, const size_t mem_size)
+{
+ return map_shared_memory(filename, mem_size, O_RDWR | O_CREAT);
+}
+
/*
* No hugepage support on freebsd, but we dummy it, using contigmem driver
*/
@@ -46,13 +58,16 @@ eal_hugepage_info_init(void)
/* re-use the linux "internal config" structure for our memory data */
struct hugepage_info *hpi = &internal_config.hugepage_info[0];
struct hugepage_info *tmp_hpi;
+ unsigned int i;
+
+ internal_config.num_hugepage_sizes = 1;
sysctl_size = sizeof(num_buffers);
error = sysctlbyname("hw.contigmem.num_buffers", &num_buffers,
&sysctl_size, NULL, 0);
if (error != 0) {
- RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.num_buffers");
+ RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.num_buffers\n");
return -1;
}
@@ -61,7 +76,7 @@ eal_hugepage_info_init(void)
&sysctl_size, NULL, 0);
if (error != 0) {
- RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.buffer_size");
+ RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.buffer_size\n");
return -1;
}
@@ -81,25 +96,61 @@ eal_hugepage_info_init(void)
RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dKB\n",
num_buffers, (int)(buffer_size>>10));
- internal_config.num_hugepage_sizes = 1;
- hpi->hugedir = CONTIGMEM_DEV;
+ strlcpy(hpi->hugedir, CONTIGMEM_DEV, sizeof(hpi->hugedir));
hpi->hugepage_sz = buffer_size;
hpi->num_pages[0] = num_buffers;
hpi->lock_descriptor = fd;
+ /* for no shared files mode, do not create shared memory config */
+ if (internal_config.no_shconf)
+ return 0;
+
tmp_hpi = create_shared_memory(eal_hugepage_info_path(),
- sizeof(struct hugepage_info));
+ sizeof(internal_config.hugepage_info));
if (tmp_hpi == NULL ) {
RTE_LOG(ERR, EAL, "Failed to create shared memory!\n");
return -1;
}
- memcpy(tmp_hpi, hpi, sizeof(struct hugepage_info));
+ memcpy(tmp_hpi, hpi, sizeof(internal_config.hugepage_info));
+
+ /* we've copied file descriptors along with everything else, but they
+ * will be invalid in secondary process, so overwrite them
+ */
+ for (i = 0; i < RTE_DIM(internal_config.hugepage_info); i++) {
+ struct hugepage_info *tmp = &tmp_hpi[i];
+ tmp->lock_descriptor = -1;
+ }
- if ( munmap(tmp_hpi, sizeof(struct hugepage_info)) < 0) {
+ if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) {
RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
return -1;
}
return 0;
}
+
+/* copy stuff from shared info into internal config */
+int
+eal_hugepage_info_read(void)
+{
+ struct hugepage_info *hpi = &internal_config.hugepage_info[0];
+ struct hugepage_info *tmp_hpi;
+
+ internal_config.num_hugepage_sizes = 1;
+
+ tmp_hpi = open_shared_memory(eal_hugepage_info_path(),
+ sizeof(internal_config.hugepage_info));
+ if (tmp_hpi == NULL) {
+ RTE_LOG(ERR, EAL, "Failed to open shared memory!\n");
+ return -1;
+ }
+
+ memcpy(hpi, tmp_hpi, sizeof(internal_config.hugepage_info));
+
+ if (munmap(tmp_hpi, sizeof(internal_config.hugepage_info)) < 0) {
+ RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n");
+ return -1;
+ }
+ return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
index 290d53ab..2feee2d5 100644
--- a/lib/librte_eal/bsdapp/eal/eal_interrupts.c
+++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c
@@ -1,51 +1,479 @@
/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
*/
+#include <string.h>
+#include <sys/types.h>
+#include <sys/event.h>
+#include <sys/queue.h>
+#include <unistd.h>
+
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_spinlock.h>
#include <rte_common.h>
#include <rte_interrupts.h>
+
#include "eal_private.h"
+#include "eal_alarm_private.h"
+
+#define MAX_INTR_EVENTS 16
+
+/**
+ * union buffer for reading on different devices
+ */
+union rte_intr_read_buffer {
+ char charbuf[16]; /* for others */
+};
+
+TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback);
+TAILQ_HEAD(rte_intr_source_list, rte_intr_source);
+
+struct rte_intr_callback {
+ TAILQ_ENTRY(rte_intr_callback) next;
+ rte_intr_callback_fn cb_fn; /**< callback address */
+ void *cb_arg; /**< parameter for callback */
+};
+
+struct rte_intr_source {
+ TAILQ_ENTRY(rte_intr_source) next;
+ struct rte_intr_handle intr_handle; /**< interrupt handle */
+ struct rte_intr_cb_list callbacks; /**< user callbacks */
+ uint32_t active;
+};
+
+/* global spinlock for interrupt data operation */
+static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER;
+
+/* interrupt sources list */
+static struct rte_intr_source_list intr_sources;
+
+/* interrupt handling thread */
+static pthread_t intr_thread;
+
+static volatile int kq = -1;
+
+static int
+intr_source_to_kevent(const struct rte_intr_handle *ih, struct kevent *ke)
+{
+ /* alarm callbacks are special case */
+ if (ih->type == RTE_INTR_HANDLE_ALARM) {
+ uint64_t timeout_ns;
+
+ /* get soonest alarm timeout */
+ if (eal_alarm_get_timeout_ns(&timeout_ns) < 0)
+ return -1;
+
+ ke->filter = EVFILT_TIMER;
+ /* timers are one shot */
+ ke->flags |= EV_ONESHOT;
+ ke->fflags = NOTE_NSECONDS;
+ ke->data = timeout_ns;
+ } else {
+ ke->filter = EVFILT_READ;
+ }
+ ke->ident = ih->fd;
+
+ return 0;
+}
int
rte_intr_callback_register(const struct rte_intr_handle *intr_handle,
- rte_intr_callback_fn cb,
- void *cb_arg)
+ rte_intr_callback_fn cb, void *cb_arg)
{
- RTE_SET_USED(intr_handle);
- RTE_SET_USED(cb);
- RTE_SET_USED(cb_arg);
+ struct rte_intr_callback *callback = NULL;
+ struct rte_intr_source *src = NULL;
+ int ret, add_event;
- return -ENOTSUP;
+ /* first do parameter checking */
+ if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Registering with invalid input parameter\n");
+ return -EINVAL;
+ }
+ if (kq < 0) {
+ RTE_LOG(ERR, EAL, "Kqueue is not active: %d\n", kq);
+ return -ENODEV;
+ }
+
+ /* allocate a new interrupt callback entity */
+ callback = calloc(1, sizeof(*callback));
+ if (callback == NULL) {
+ RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+ return -ENOMEM;
+ }
+ callback->cb_fn = cb;
+ callback->cb_arg = cb_arg;
+
+ rte_spinlock_lock(&intr_lock);
+
+ /* check if there is at least one callback registered for the fd */
+ TAILQ_FOREACH(src, &intr_sources, next) {
+ if (src->intr_handle.fd == intr_handle->fd) {
+ /* we had no interrupts for this */
+ if (TAILQ_EMPTY(&src->callbacks))
+ add_event = 1;
+
+ TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+ ret = 0;
+ break;
+ }
+ }
+
+ /* no existing callbacks for this - add new source */
+ if (src == NULL) {
+ src = calloc(1, sizeof(*src));
+ if (src == NULL) {
+ RTE_LOG(ERR, EAL, "Can not allocate memory\n");
+ ret = -ENOMEM;
+ goto fail;
+ } else {
+ src->intr_handle = *intr_handle;
+ TAILQ_INIT(&src->callbacks);
+ TAILQ_INSERT_TAIL(&(src->callbacks), callback, next);
+ TAILQ_INSERT_TAIL(&intr_sources, src, next);
+ add_event = 1;
+ ret = 0;
+ }
+ }
+
+ /* add events to the queue. timer events are special as we need to
+ * re-set the timer.
+ */
+ if (add_event || src->intr_handle.type == RTE_INTR_HANDLE_ALARM) {
+ struct kevent ke;
+
+ memset(&ke, 0, sizeof(ke));
+ ke.flags = EV_ADD; /* mark for addition to the queue */
+
+ if (intr_source_to_kevent(intr_handle, &ke) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot convert interrupt handle to kevent\n");
+ ret = -ENODEV;
+ goto fail;
+ }
+
+ /**
+ * add the intr file descriptor into wait list.
+ */
+ if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
+ /* currently, nic_uio does not support interrupts, so
+ * this error will always be triggered and output to the
+ * user. so, don't output it unless debug log level set.
+ */
+ if (errno == ENODEV)
+ RTE_LOG(DEBUG, EAL, "Interrupt handle %d not supported\n",
+ src->intr_handle.fd);
+ else
+ RTE_LOG(ERR, EAL, "Error adding fd %d "
+ "kevent, %s\n",
+ src->intr_handle.fd,
+ strerror(errno));
+ ret = -errno;
+ goto fail;
+ }
+ }
+ rte_spinlock_unlock(&intr_lock);
+
+ return ret;
+fail:
+ /* clean up */
+ if (src != NULL) {
+ TAILQ_REMOVE(&(src->callbacks), callback, next);
+ if (TAILQ_EMPTY(&(src->callbacks))) {
+ TAILQ_REMOVE(&intr_sources, src, next);
+ free(src);
+ }
+ }
+ free(callback);
+ rte_spinlock_unlock(&intr_lock);
+ return ret;
}
int
rte_intr_callback_unregister(const struct rte_intr_handle *intr_handle,
- rte_intr_callback_fn cb,
- void *cb_arg)
+ rte_intr_callback_fn cb_fn, void *cb_arg)
{
- RTE_SET_USED(intr_handle);
- RTE_SET_USED(cb);
- RTE_SET_USED(cb_arg);
+ int ret;
+ struct rte_intr_source *src;
+ struct rte_intr_callback *cb, *next;
- return -ENOTSUP;
+ /* do parameter checking first */
+ if (intr_handle == NULL || intr_handle->fd < 0) {
+ RTE_LOG(ERR, EAL,
+ "Unregistering with invalid input parameter\n");
+ return -EINVAL;
+ }
+ if (kq < 0) {
+ RTE_LOG(ERR, EAL, "Kqueue is not active\n");
+ return -ENODEV;
+ }
+
+ rte_spinlock_lock(&intr_lock);
+
+ /* check if the insterrupt source for the fd is existent */
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd == intr_handle->fd)
+ break;
+
+ /* No interrupt source registered for the fd */
+ if (src == NULL) {
+ ret = -ENOENT;
+
+ /* interrupt source has some active callbacks right now. */
+ } else if (src->active != 0) {
+ ret = -EAGAIN;
+
+ /* ok to remove. */
+ } else {
+ struct kevent ke;
+
+ ret = 0;
+
+ /* remove it from the kqueue */
+ memset(&ke, 0, sizeof(ke));
+ ke.flags = EV_DELETE; /* mark for deletion from the queue */
+
+ if (intr_source_to_kevent(intr_handle, &ke) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot convert to kevent\n");
+ ret = -ENODEV;
+ goto out;
+ }
+
+ /**
+ * remove intr file descriptor from wait list.
+ */
+ if (kevent(kq, &ke, 1, NULL, 0, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Error removing fd %d kevent, %s\n",
+ src->intr_handle.fd, strerror(errno));
+ /* removing non-existent even is an expected condition
+ * in some circumstances (e.g. oneshot events).
+ */
+ }
+
+ /*walk through the callbacks and remove all that match. */
+ for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) {
+ next = TAILQ_NEXT(cb, next);
+ if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 ||
+ cb->cb_arg == cb_arg)) {
+ TAILQ_REMOVE(&src->callbacks, cb, next);
+ free(cb);
+ ret++;
+ }
+ }
+
+ /* all callbacks for that source are removed. */
+ if (TAILQ_EMPTY(&src->callbacks)) {
+ TAILQ_REMOVE(&intr_sources, src, next);
+ free(src);
+ }
+ }
+out:
+ rte_spinlock_unlock(&intr_lock);
+
+ return ret;
}
int
-rte_intr_enable(const struct rte_intr_handle *intr_handle __rte_unused)
+rte_intr_enable(const struct rte_intr_handle *intr_handle)
{
- return -ENOTSUP;
+ if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV)
+ return 0;
+
+ if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
+ return -1;
+
+ switch (intr_handle->type) {
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_ALARM:
+ return -1;
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_DEV_EVENT:
+ return -1;
+ /* unknown handle type */
+ default:
+ RTE_LOG(ERR, EAL,
+ "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ return 0;
}
int
-rte_intr_disable(const struct rte_intr_handle *intr_handle __rte_unused)
+rte_intr_disable(const struct rte_intr_handle *intr_handle)
{
- return -ENOTSUP;
+ if (intr_handle && intr_handle->type == RTE_INTR_HANDLE_VDEV)
+ return 0;
+
+ if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0)
+ return -1;
+
+ switch (intr_handle->type) {
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_ALARM:
+ return -1;
+ /* not used at this moment */
+ case RTE_INTR_HANDLE_DEV_EVENT:
+ return -1;
+ /* unknown handle type */
+ default:
+ RTE_LOG(ERR, EAL,
+ "Unknown handle type of fd %d\n",
+ intr_handle->fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+eal_intr_process_interrupts(struct kevent *events, int nfds)
+{
+ struct rte_intr_callback active_cb;
+ union rte_intr_read_buffer buf;
+ struct rte_intr_callback *cb;
+ struct rte_intr_source *src;
+ bool call = false;
+ int n, bytes_read;
+
+ for (n = 0; n < nfds; n++) {
+ int event_fd = events[n].ident;
+
+ rte_spinlock_lock(&intr_lock);
+ TAILQ_FOREACH(src, &intr_sources, next)
+ if (src->intr_handle.fd == event_fd)
+ break;
+ if (src == NULL) {
+ rte_spinlock_unlock(&intr_lock);
+ continue;
+ }
+
+ /* mark this interrupt source as active and release the lock. */
+ src->active = 1;
+ rte_spinlock_unlock(&intr_lock);
+
+ /* set the length to be read dor different handle type */
+ switch (src->intr_handle.type) {
+ case RTE_INTR_HANDLE_ALARM:
+ bytes_read = 0;
+ call = true;
+ break;
+ case RTE_INTR_HANDLE_VDEV:
+ case RTE_INTR_HANDLE_EXT:
+ bytes_read = 0;
+ call = true;
+ break;
+ case RTE_INTR_HANDLE_DEV_EVENT:
+ bytes_read = 0;
+ call = true;
+ break;
+ default:
+ bytes_read = 1;
+ break;
+ }
+
+ if (bytes_read > 0) {
+ /**
+ * read out to clear the ready-to-be-read flag
+ * for epoll_wait.
+ */
+ bytes_read = read(event_fd, &buf, bytes_read);
+ if (bytes_read < 0) {
+ if (errno == EINTR || errno == EWOULDBLOCK)
+ continue;
+
+ RTE_LOG(ERR, EAL, "Error reading from file "
+ "descriptor %d: %s\n",
+ event_fd,
+ strerror(errno));
+ } else if (bytes_read == 0)
+ RTE_LOG(ERR, EAL, "Read nothing from file "
+ "descriptor %d\n", event_fd);
+ else
+ call = true;
+ }
+
+ /* grab a lock, again to call callbacks and update status. */
+ rte_spinlock_lock(&intr_lock);
+
+ if (call) {
+ /* Finally, call all callbacks. */
+ TAILQ_FOREACH(cb, &src->callbacks, next) {
+
+ /* make a copy and unlock. */
+ active_cb = *cb;
+ rte_spinlock_unlock(&intr_lock);
+
+ /* call the actual callback */
+ active_cb.cb_fn(active_cb.cb_arg);
+
+ /*get the lock back. */
+ rte_spinlock_lock(&intr_lock);
+ }
+ }
+
+ /* we done with that interrupt source, release it. */
+ src->active = 0;
+ rte_spinlock_unlock(&intr_lock);
+ }
+}
+
+static void *
+eal_intr_thread_main(void *arg __rte_unused)
+{
+ struct kevent events[MAX_INTR_EVENTS];
+ int nfds;
+
+ /* host thread, never break out */
+ for (;;) {
+ /* do not change anything, just wait */
+ nfds = kevent(kq, NULL, 0, events, MAX_INTR_EVENTS, NULL);
+
+ /* kevent fail */
+ if (nfds < 0) {
+ if (errno == EINTR)
+ continue;
+ RTE_LOG(ERR, EAL,
+ "kevent returns with fail\n");
+ break;
+ }
+ /* kevent timeout, will never happen here */
+ else if (nfds == 0)
+ continue;
+
+ /* kevent has at least one fd ready to read */
+ eal_intr_process_interrupts(events, nfds);
+ }
+ close(kq);
+ kq = -1;
+ return NULL;
}
int
rte_eal_intr_init(void)
{
- return 0;
+ int ret = 0;
+
+ /* init the global interrupt source head */
+ TAILQ_INIT(&intr_sources);
+
+ kq = kqueue();
+ if (kq < 0) {
+ RTE_LOG(ERR, EAL, "Cannot create kqueue instance\n");
+ return -1;
+ }
+
+ /* create the host thread to wait/handle the interrupt */
+ ret = rte_ctrl_thread_create(&intr_thread, "eal-intr-thread", NULL,
+ eal_intr_thread_main, NULL);
+ if (ret != 0) {
+ rte_errno = -ret;
+ RTE_LOG(ERR, EAL,
+ "Failed to create thread for interrupt handling\n");
+ }
+
+ return ret;
}
int
diff --git a/lib/librte_eal/bsdapp/eal/eal_memalloc.c b/lib/librte_eal/bsdapp/eal/eal_memalloc.c
new file mode 100644
index 00000000..f7f07abd
--- /dev/null
+++ b/lib/librte_eal/bsdapp/eal/eal_memalloc.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <inttypes.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+
+#include "eal_memalloc.h"
+
+int
+eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms __rte_unused,
+ int __rte_unused n_segs, size_t __rte_unused page_sz,
+ int __rte_unused socket, bool __rte_unused exact)
+{
+ RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+ return -1;
+}
+
+struct rte_memseg *
+eal_memalloc_alloc_seg(size_t __rte_unused page_sz, int __rte_unused socket)
+{
+ RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+ return NULL;
+}
+
+int
+eal_memalloc_free_seg(struct rte_memseg *ms __rte_unused)
+{
+ RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+ return -1;
+}
+
+int
+eal_memalloc_free_seg_bulk(struct rte_memseg **ms __rte_unused,
+ int n_segs __rte_unused)
+{
+ RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+ return -1;
+}
+
+int
+eal_memalloc_sync_with_primary(void)
+{
+ RTE_LOG(ERR, EAL, "Memory hotplug not supported on FreeBSD\n");
+ return -1;
+}
+
+int
+eal_memalloc_init(void)
+{
+ return 0;
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c
index bdfb8828..16d2bc7c 100644
--- a/lib/librte_eal/bsdapp/eal/eal_memory.c
+++ b/lib/librte_eal/bsdapp/eal/eal_memory.c
@@ -6,10 +6,13 @@
#include <sys/types.h>
#include <sys/sysctl.h>
#include <inttypes.h>
+#include <errno.h>
+#include <string.h>
#include <fcntl.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
+#include <rte_errno.h>
#include <rte_log.h>
#include <rte_string_fns.h>
#include "eal_private.h"
@@ -41,129 +44,253 @@ rte_eal_hugepage_init(void)
struct rte_mem_config *mcfg;
uint64_t total_mem = 0;
void *addr;
- unsigned i, j, seg_idx = 0;
+ unsigned int i, j, seg_idx = 0;
/* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
/* for debug purposes, hugetlbfs can be disabled */
if (internal_config.no_hugetlbfs) {
- addr = malloc(internal_config.memory);
- mcfg->memseg[0].iova = (rte_iova_t)(uintptr_t)addr;
- mcfg->memseg[0].addr = addr;
- mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K;
- mcfg->memseg[0].len = internal_config.memory;
- mcfg->memseg[0].socket_id = 0;
+ struct rte_memseg_list *msl;
+ struct rte_fbarray *arr;
+ struct rte_memseg *ms;
+ uint64_t page_sz;
+ int n_segs, cur_seg;
+
+ /* create a memseg list */
+ msl = &mcfg->memsegs[0];
+
+ page_sz = RTE_PGSIZE_4K;
+ n_segs = internal_config.memory / page_sz;
+
+ if (rte_fbarray_init(&msl->memseg_arr, "nohugemem", n_segs,
+ sizeof(struct rte_memseg))) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
+ return -1;
+ }
+
+ addr = mmap(NULL, internal_config.memory,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__,
+ strerror(errno));
+ return -1;
+ }
+ msl->base_va = addr;
+ msl->page_sz = page_sz;
+ msl->socket_id = 0;
+
+ /* populate memsegs. each memseg is 1 page long */
+ for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
+ arr = &msl->memseg_arr;
+
+ ms = rte_fbarray_get(arr, cur_seg);
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ ms->iova = (uintptr_t)addr;
+ else
+ ms->iova = RTE_BAD_IOVA;
+ ms->addr = addr;
+ ms->hugepage_sz = page_sz;
+ ms->len = page_sz;
+ ms->socket_id = 0;
+
+ rte_fbarray_set_used(arr, cur_seg);
+
+ addr = RTE_PTR_ADD(addr, page_sz);
+ }
return 0;
}
/* map all hugepages and sort them */
for (i = 0; i < internal_config.num_hugepage_sizes; i ++){
struct hugepage_info *hpi;
+ rte_iova_t prev_end = 0;
+ int prev_ms_idx = -1;
+ uint64_t page_sz, mem_needed;
+ unsigned int n_pages, max_pages;
hpi = &internal_config.hugepage_info[i];
- for (j = 0; j < hpi->num_pages[0]; j++) {
+ page_sz = hpi->hugepage_sz;
+ max_pages = hpi->num_pages[0];
+ mem_needed = RTE_ALIGN_CEIL(internal_config.memory - total_mem,
+ page_sz);
+
+ n_pages = RTE_MIN(mem_needed / page_sz, max_pages);
+
+ for (j = 0; j < n_pages; j++) {
+ struct rte_memseg_list *msl;
+ struct rte_fbarray *arr;
struct rte_memseg *seg;
+ int msl_idx, ms_idx;
rte_iova_t physaddr;
int error;
size_t sysctl_size = sizeof(physaddr);
char physaddr_str[64];
+ bool is_adjacent;
- addr = mmap(NULL, hpi->hugepage_sz, PROT_READ|PROT_WRITE,
- MAP_SHARED, hpi->lock_descriptor,
- j * EAL_PAGE_SIZE);
- if (addr == MAP_FAILED) {
- RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
- j, hpi->hugedir);
- return -1;
- }
-
- snprintf(physaddr_str, sizeof(physaddr_str), "hw.contigmem"
- ".physaddr.%d", j);
- error = sysctlbyname(physaddr_str, &physaddr, &sysctl_size,
- NULL, 0);
+ /* first, check if this segment is IOVA-adjacent to
+ * the previous one.
+ */
+ snprintf(physaddr_str, sizeof(physaddr_str),
+ "hw.contigmem.physaddr.%d", j);
+ error = sysctlbyname(physaddr_str, &physaddr,
+ &sysctl_size, NULL, 0);
if (error < 0) {
RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u "
"from %s\n", j, hpi->hugedir);
return -1;
}
- seg = &mcfg->memseg[seg_idx++];
+ is_adjacent = prev_end != 0 && physaddr == prev_end;
+ prev_end = physaddr + hpi->hugepage_sz;
+
+ for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS;
+ msl_idx++) {
+ bool empty, need_hole;
+ msl = &mcfg->memsegs[msl_idx];
+ arr = &msl->memseg_arr;
+
+ if (msl->page_sz != page_sz)
+ continue;
+
+ empty = arr->count == 0;
+
+ /* we need a hole if this isn't an empty memseg
+ * list, and if previous segment was not
+ * adjacent to current one.
+ */
+ need_hole = !empty && !is_adjacent;
+
+ /* we need 1, plus hole if not adjacent */
+ ms_idx = rte_fbarray_find_next_n_free(arr,
+ 0, 1 + (need_hole ? 1 : 0));
+
+ /* memseg list is full? */
+ if (ms_idx < 0)
+ continue;
+
+ if (need_hole && prev_ms_idx == ms_idx - 1)
+ ms_idx++;
+ prev_ms_idx = ms_idx;
+
+ break;
+ }
+ if (msl_idx == RTE_MAX_MEMSEG_LISTS) {
+ RTE_LOG(ERR, EAL, "Could not find space for memseg. Please increase %s and/or %s in configuration.\n",
+ RTE_STR(CONFIG_RTE_MAX_MEMSEG_PER_TYPE),
+ RTE_STR(CONFIG_RTE_MAX_MEM_PER_TYPE));
+ return -1;
+ }
+ arr = &msl->memseg_arr;
+ seg = rte_fbarray_get(arr, ms_idx);
+
+ addr = RTE_PTR_ADD(msl->base_va,
+ (size_t)msl->page_sz * ms_idx);
+
+ /* address is already mapped in memseg list, so using
+ * MAP_FIXED here is safe.
+ */
+ addr = mmap(addr, page_sz, PROT_READ|PROT_WRITE,
+ MAP_SHARED | MAP_FIXED,
+ hpi->lock_descriptor,
+ j * EAL_PAGE_SIZE);
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
+ j, hpi->hugedir);
+ return -1;
+ }
+
seg->addr = addr;
seg->iova = physaddr;
- seg->hugepage_sz = hpi->hugepage_sz;
- seg->len = hpi->hugepage_sz;
+ seg->hugepage_sz = page_sz;
+ seg->len = page_sz;
seg->nchannel = mcfg->nchannel;
seg->nrank = mcfg->nrank;
seg->socket_id = 0;
+ rte_fbarray_set_used(arr, ms_idx);
+
RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%"
PRIx64", len %zu\n",
- seg_idx, addr, physaddr, hpi->hugepage_sz);
- if (total_mem >= internal_config.memory ||
- seg_idx >= RTE_MAX_MEMSEG)
- break;
+ seg_idx++, addr, physaddr, page_sz);
+
+ total_mem += seg->len;
}
+ if (total_mem >= internal_config.memory)
+ break;
+ }
+ if (total_mem < internal_config.memory) {
+ RTE_LOG(ERR, EAL, "Couldn't reserve requested memory, "
+ "requested: %" PRIu64 "M "
+ "available: %" PRIu64 "M\n",
+ internal_config.memory >> 20, total_mem >> 20);
+ return -1;
}
return 0;
}
+struct attach_walk_args {
+ int fd_hugepage;
+ int seg_idx;
+};
+static int
+attach_segment(const struct rte_memseg_list *msl __rte_unused,
+ const struct rte_memseg *ms, void *arg)
+{
+ struct attach_walk_args *wa = arg;
+ void *addr;
+
+ addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
+ wa->seg_idx * EAL_PAGE_SIZE);
+ if (addr == MAP_FAILED || addr != ms->addr)
+ return -1;
+ wa->seg_idx++;
+
+ return 0;
+}
+
int
rte_eal_hugepage_attach(void)
{
const struct hugepage_info *hpi;
- int fd_hugepage_info, fd_hugepage = -1;
- unsigned i = 0;
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int fd_hugepage = -1;
+ unsigned int i;
- /* Obtain a file descriptor for hugepage_info */
- fd_hugepage_info = open(eal_hugepage_info_path(), O_RDONLY);
- if (fd_hugepage_info < 0) {
- RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path());
- return -1;
- }
+ hpi = &internal_config.hugepage_info[0];
- /* Map the shared hugepage_info into the process address spaces */
- hpi = mmap(NULL, sizeof(struct hugepage_info), PROT_READ, MAP_PRIVATE,
- fd_hugepage_info, 0);
- if (hpi == MAP_FAILED) {
- RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path());
- goto error;
- }
-
- /* Obtain a file descriptor for contiguous memory */
- fd_hugepage = open(hpi->hugedir, O_RDWR);
- if (fd_hugepage < 0) {
- RTE_LOG(ERR, EAL, "Could not open %s\n", hpi->hugedir);
- goto error;
- }
+ for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+ const struct hugepage_info *cur_hpi = &hpi[i];
+ struct attach_walk_args wa;
- /* Map the contiguous memory into each memory segment */
- for (i = 0; i < hpi->num_pages[0]; i++) {
+ memset(&wa, 0, sizeof(wa));
- void *addr;
- struct rte_memseg *seg = &mcfg->memseg[i];
+ /* Obtain a file descriptor for contiguous memory */
+ fd_hugepage = open(cur_hpi->hugedir, O_RDWR);
+ if (fd_hugepage < 0) {
+ RTE_LOG(ERR, EAL, "Could not open %s\n",
+ cur_hpi->hugedir);
+ goto error;
+ }
+ wa.fd_hugepage = fd_hugepage;
+ wa.seg_idx = 0;
- addr = mmap(seg->addr, hpi->hugepage_sz, PROT_READ|PROT_WRITE,
- MAP_SHARED|MAP_FIXED, fd_hugepage,
- i * EAL_PAGE_SIZE);
- if (addr == MAP_FAILED || addr != seg->addr) {
+ /* Map the contiguous memory into each memory segment */
+ if (rte_memseg_walk(attach_segment, &wa) < 0) {
RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n",
- i, hpi->hugedir);
+ wa.seg_idx, cur_hpi->hugedir);
goto error;
}
+ close(fd_hugepage);
+ fd_hugepage = -1;
}
/* hugepage_info is no longer required */
- munmap((void *)(uintptr_t)hpi, sizeof(struct hugepage_info));
- close(fd_hugepage_info);
- close(fd_hugepage);
return 0;
error:
- if (fd_hugepage_info >= 0)
- close(fd_hugepage_info);
if (fd_hugepage >= 0)
close(fd_hugepage);
return -1;
@@ -174,3 +301,217 @@ rte_eal_using_phys_addrs(void)
{
return 0;
}
+
+static uint64_t
+get_mem_amount(uint64_t page_sz, uint64_t max_mem)
+{
+ uint64_t area_sz, max_pages;
+
+ /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
+ max_pages = RTE_MAX_MEMSEG_PER_LIST;
+ max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
+
+ area_sz = RTE_MIN(page_sz * max_pages, max_mem);
+
+ /* make sure the list isn't smaller than the page size */
+ area_sz = RTE_MAX(area_sz, page_sz);
+
+ return RTE_ALIGN(area_sz, page_sz);
+}
+
+#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
+static int
+alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
+ int n_segs, int socket_id, int type_msl_idx)
+{
+ char name[RTE_FBARRAY_NAME_LEN];
+
+ snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
+ type_msl_idx);
+ if (rte_fbarray_init(&msl->memseg_arr, name, n_segs,
+ sizeof(struct rte_memseg))) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
+ rte_strerror(rte_errno));
+ return -1;
+ }
+
+ msl->page_sz = page_sz;
+ msl->socket_id = socket_id;
+ msl->base_va = NULL;
+
+ RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
+ (size_t)page_sz >> 10, socket_id);
+
+ return 0;
+}
+
+static int
+alloc_va_space(struct rte_memseg_list *msl)
+{
+ uint64_t page_sz;
+ size_t mem_sz;
+ void *addr;
+ int flags = 0;
+
+#ifdef RTE_ARCH_PPC_64
+ flags |= MAP_HUGETLB;
+#endif
+
+ page_sz = msl->page_sz;
+ mem_sz = page_sz * msl->memseg_arr.len;
+
+ addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
+ if (addr == NULL) {
+ if (rte_errno == EADDRNOTAVAIL)
+ RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
+ (unsigned long long)mem_sz, msl->base_va);
+ else
+ RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
+ return -1;
+ }
+ msl->base_va = addr;
+
+ return 0;
+}
+
+
+static int
+memseg_primary_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int hpi_idx, msl_idx = 0;
+ struct rte_memseg_list *msl;
+ uint64_t max_mem, total_mem;
+
+ /* no-huge does not need this at all */
+ if (internal_config.no_hugetlbfs)
+ return 0;
+
+ /* FreeBSD has an issue where core dump will dump the entire memory
+ * contents, including anonymous zero-page memory. Therefore, while we
+ * will be limiting total amount of memory to RTE_MAX_MEM_MB, we will
+ * also be further limiting total memory amount to whatever memory is
+ * available to us through contigmem driver (plus spacing blocks).
+ *
+ * so, at each stage, we will be checking how much memory we are
+ * preallocating, and adjust all the values accordingly.
+ */
+
+ max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+ total_mem = 0;
+
+ /* create memseg lists */
+ for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
+ hpi_idx++) {
+ uint64_t max_type_mem, total_type_mem = 0;
+ uint64_t avail_mem;
+ int type_msl_idx, max_segs, avail_segs, total_segs = 0;
+ struct hugepage_info *hpi;
+ uint64_t hugepage_sz;
+
+ hpi = &internal_config.hugepage_info[hpi_idx];
+ hugepage_sz = hpi->hugepage_sz;
+
+ /* no NUMA support on FreeBSD */
+
+ /* check if we've already exceeded total memory amount */
+ if (total_mem >= max_mem)
+ break;
+
+ /* first, calculate theoretical limits according to config */
+ max_type_mem = RTE_MIN(max_mem - total_mem,
+ (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
+ max_segs = RTE_MAX_MEMSEG_PER_TYPE;
+
+ /* now, limit all of that to whatever will actually be
+ * available to us, because without dynamic allocation support,
+ * all of that extra memory will be sitting there being useless
+ * and slowing down core dumps in case of a crash.
+ *
+ * we need (N*2)-1 segments because we cannot guarantee that
+ * each segment will be IOVA-contiguous with the previous one,
+ * so we will allocate more and put spaces inbetween segments
+ * that are non-contiguous.
+ */
+ avail_segs = (hpi->num_pages[0] * 2) - 1;
+ avail_mem = avail_segs * hugepage_sz;
+
+ max_type_mem = RTE_MIN(avail_mem, max_type_mem);
+ max_segs = RTE_MIN(avail_segs, max_segs);
+
+ type_msl_idx = 0;
+ while (total_type_mem < max_type_mem &&
+ total_segs < max_segs) {
+ uint64_t cur_max_mem, cur_mem;
+ unsigned int n_segs;
+
+ if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+ RTE_LOG(ERR, EAL,
+ "No more space in memseg lists, please increase %s\n",
+ RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+ return -1;
+ }
+
+ msl = &mcfg->memsegs[msl_idx++];
+
+ cur_max_mem = max_type_mem - total_type_mem;
+
+ cur_mem = get_mem_amount(hugepage_sz,
+ cur_max_mem);
+ n_segs = cur_mem / hugepage_sz;
+
+ if (alloc_memseg_list(msl, hugepage_sz, n_segs,
+ 0, type_msl_idx))
+ return -1;
+
+ total_segs += msl->memseg_arr.len;
+ total_type_mem = total_segs * hugepage_sz;
+ type_msl_idx++;
+
+ if (alloc_va_space(msl)) {
+ RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
+ return -1;
+ }
+ }
+ total_mem += total_type_mem;
+ }
+ return 0;
+}
+
+static int
+memseg_secondary_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int msl_idx = 0;
+ struct rte_memseg_list *msl;
+
+ for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+
+ msl = &mcfg->memsegs[msl_idx];
+
+ /* skip empty memseg lists */
+ if (msl->memseg_arr.len == 0)
+ continue;
+
+ if (rte_fbarray_attach(&msl->memseg_arr)) {
+ RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
+ return -1;
+ }
+
+ /* preallocate VA space */
+ if (alloc_va_space(msl)) {
+ RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int
+rte_eal_memseg_init(void)
+{
+ return rte_eal_process_type() == RTE_PROC_PRIMARY ?
+ memseg_primary_init() :
+ memseg_secondary_init();
+}
diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c
index d602daf8..309b5872 100644
--- a/lib/librte_eal/bsdapp/eal/eal_thread.c
+++ b/lib/librte_eal/bsdapp/eal/eal_thread.c
@@ -119,7 +119,7 @@ eal_thread_loop(__attribute__((unused)) void *arg)
if (eal_thread_set_affinity() < 0)
rte_panic("cannot set affinity\n");
- ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN);
+ ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n",
lcore_id, thread_id, cpuset, ret == 0 ? "" : "...");
diff --git a/lib/librte_eal/bsdapp/eal/meson.build b/lib/librte_eal/bsdapp/eal/meson.build
index e83fc919..3945b529 100644
--- a/lib/librte_eal/bsdapp/eal/meson.build
+++ b/lib/librte_eal/bsdapp/eal/meson.build
@@ -4,12 +4,17 @@
env_objs = []
env_headers = []
env_sources = files('eal_alarm.c',
+ 'eal_cpuflags.c',
'eal_debug.c',
'eal_hugepage_info.c',
'eal_interrupts.c',
'eal_lcore.c',
+ 'eal_memalloc.c',
'eal_thread.c',
'eal_timer.c',
'eal.c',
'eal_memory.c',
+ 'eal_dev.c'
)
+
+deps += ['kvargs']
diff --git a/lib/librte_eal/bsdapp/nic_uio/BSDmakefile b/lib/librte_eal/bsdapp/nic_uio/BSDmakefile
deleted file mode 100644
index b6f92d55..00000000
--- a/lib/librte_eal/bsdapp/nic_uio/BSDmakefile
+++ /dev/null
@@ -1,8 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2010-2014 Intel Corporation
-#
-
-KMOD= nic_uio
-SRCS= nic_uio.c device_if.h bus_if.h pci_if.h
-
-.include <bsd.kmod.mk>
diff --git a/lib/librte_eal/bsdapp/nic_uio/Makefile b/lib/librte_eal/bsdapp/nic_uio/Makefile
deleted file mode 100644
index 376ef3a3..00000000
--- a/lib/librte_eal/bsdapp/nic_uio/Makefile
+++ /dev/null
@@ -1,24 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2010-2014 Intel Corporation
-
-include $(RTE_SDK)/mk/rte.vars.mk
-
-#
-# module name and path
-#
-MODULE = nic_uio
-
-#
-# CFLAGS
-#
-MODULE_CFLAGS += -I$(SRCDIR)
-MODULE_CFLAGS += -I$(RTE_OUTPUT)/include
-MODULE_CFLAGS += -Winline -Wall -Werror
-MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h
-
-#
-# all source are stored in SRCS-y
-#
-SRCS-y := nic_uio.c
-
-include $(RTE_SDK)/mk/rte.bsdmodule.mk
diff --git a/lib/librte_eal/bsdapp/nic_uio/meson.build b/lib/librte_eal/bsdapp/nic_uio/meson.build
deleted file mode 100644
index 4bdaf969..00000000
--- a/lib/librte_eal/bsdapp/nic_uio/meson.build
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright(c) 2017 Intel Corporation
-
-sources = files('nic_uio.c')
diff --git a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c
deleted file mode 100644
index 401b487e..00000000
--- a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c
+++ /dev/null
@@ -1,350 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2014 Intel Corporation
- */
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h> /* defines used in kernel.h */
-#include <sys/module.h>
-#include <sys/kernel.h> /* types used in module initialization */
-#include <sys/conf.h> /* cdevsw struct */
-#include <sys/bus.h> /* structs, prototypes for pci bus stuff and DEVMETHOD */
-#include <sys/rman.h>
-#include <sys/systm.h>
-#include <sys/rwlock.h>
-#include <sys/proc.h>
-
-#include <machine/bus.h>
-#include <dev/pci/pcivar.h> /* For pci_get macros! */
-#include <dev/pci/pcireg.h> /* The softc holds our per-instance data. */
-#include <vm/vm.h>
-#include <vm/uma.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pager.h>
-
-
-#define MAX_BARS (PCIR_MAX_BAR_0 + 1)
-
-#define MAX_DETACHED_DEVICES 128
-static device_t detached_devices[MAX_DETACHED_DEVICES] = {};
-static int num_detached = 0;
-
-struct nic_uio_softc {
- device_t dev_t;
- struct cdev *my_cdev;
- int bar_id[MAX_BARS];
- struct resource *bar_res[MAX_BARS];
- u_long bar_start[MAX_BARS];
- u_long bar_size[MAX_BARS];
-};
-
-/* Function prototypes */
-static d_open_t nic_uio_open;
-static d_close_t nic_uio_close;
-static d_mmap_t nic_uio_mmap;
-static d_mmap_single_t nic_uio_mmap_single;
-static int nic_uio_probe(device_t dev);
-static int nic_uio_attach(device_t dev);
-static int nic_uio_detach(device_t dev);
-static int nic_uio_shutdown(void);
-static int nic_uio_modevent(module_t mod, int type, void *arg);
-
-static struct cdevsw uio_cdevsw = {
- .d_name = "nic_uio",
- .d_version = D_VERSION,
- .d_open = nic_uio_open,
- .d_close = nic_uio_close,
- .d_mmap = nic_uio_mmap,
- .d_mmap_single = nic_uio_mmap_single,
-};
-
-static device_method_t nic_uio_methods[] = {
- DEVMETHOD(device_probe, nic_uio_probe),
- DEVMETHOD(device_attach, nic_uio_attach),
- DEVMETHOD(device_detach, nic_uio_detach),
- DEVMETHOD_END
-};
-
-struct device {
- int vend;
- int dev;
-};
-
-struct pci_bdf {
- uint32_t bus;
- uint32_t devid;
- uint32_t function;
-};
-
-static devclass_t nic_uio_devclass;
-
-DEFINE_CLASS_0(nic_uio, nic_uio_driver, nic_uio_methods, sizeof(struct nic_uio_softc));
-DRIVER_MODULE(nic_uio, pci, nic_uio_driver, nic_uio_devclass, nic_uio_modevent, 0);
-
-static int
-nic_uio_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr,
- int prot, vm_memattr_t *memattr)
-{
- *paddr = offset;
- return 0;
-}
-
-static int
-nic_uio_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size,
- struct vm_object **obj, int nprot)
-{
- /*
- * The BAR index is encoded in the offset. Divide the offset by
- * PAGE_SIZE to get the index of the bar requested by the user
- * app.
- */
- unsigned bar = *offset/PAGE_SIZE;
- struct nic_uio_softc *sc = cdev->si_drv1;
-
- if (bar >= MAX_BARS)
- return EINVAL;
-
- if (sc->bar_res[bar] == NULL) {
- sc->bar_id[bar] = PCIR_BAR(bar);
-
- if (PCI_BAR_IO(pci_read_config(sc->dev_t, sc->bar_id[bar], 4)))
- sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_IOPORT,
- &sc->bar_id[bar], RF_ACTIVE);
- else
- sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_MEMORY,
- &sc->bar_id[bar], RF_ACTIVE);
- }
- if (sc->bar_res[bar] == NULL)
- return ENXIO;
-
- sc->bar_start[bar] = rman_get_start(sc->bar_res[bar]);
- sc->bar_size[bar] = rman_get_size(sc->bar_res[bar]);
-
- device_printf(sc->dev_t, "Bar %u @ %lx, size %lx\n", bar,
- sc->bar_start[bar], sc->bar_size[bar]);
-
- *offset = sc->bar_start[bar];
- *obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset,
- curthread->td_ucred);
- return 0;
-}
-
-
-int
-nic_uio_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
-{
- return 0;
-}
-
-int
-nic_uio_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
-{
- return 0;
-}
-
-static int
-nic_uio_probe (device_t dev)
-{
- int i;
- unsigned int bus = pci_get_bus(dev);
- unsigned int device = pci_get_slot(dev);
- unsigned int function = pci_get_function(dev);
-
- char bdf_str[256];
- char *token, *remaining;
-
- /* First check if we found this on load */
- for (i = 0; i < num_detached; i++)
- if (bus == pci_get_bus(detached_devices[i]) &&
- device == pci_get_slot(detached_devices[i]) &&
- function == pci_get_function(detached_devices[i])) {
- device_set_desc(dev, "DPDK PCI Device");
- return BUS_PROBE_SPECIFIC;
- }
-
- /* otherwise check if it's a new device and if it matches the BDF */
- memset(bdf_str, 0, sizeof(bdf_str));
- TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str));
- remaining = bdf_str;
- while (1) {
- if (remaining == NULL || remaining[0] == '\0')
- break;
- token = strsep(&remaining, ",:");
- if (token == NULL)
- break;
- bus = strtol(token, NULL, 10);
- token = strsep(&remaining, ",:");
- if (token == NULL)
- break;
- device = strtol(token, NULL, 10);
- token = strsep(&remaining, ",:");
- if (token == NULL)
- break;
- function = strtol(token, NULL, 10);
-
- if (bus == pci_get_bus(dev) &&
- device == pci_get_slot(dev) &&
- function == pci_get_function(dev)) {
-
- if (num_detached < MAX_DETACHED_DEVICES) {
- printf("%s: probed dev=%p\n",
- __func__, dev);
- detached_devices[num_detached++] = dev;
- device_set_desc(dev, "DPDK PCI Device");
- return BUS_PROBE_SPECIFIC;
- } else {
- printf("%s: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n",
- __func__, MAX_DETACHED_DEVICES,
- dev);
- break;
- }
- }
- }
-
- return ENXIO;
-}
-
-static int
-nic_uio_attach(device_t dev)
-{
- int i;
- struct nic_uio_softc *sc;
-
- sc = device_get_softc(dev);
- sc->dev_t = dev;
- sc->my_cdev = make_dev(&uio_cdevsw, device_get_unit(dev),
- UID_ROOT, GID_WHEEL, 0600, "uio@pci:%u:%u:%u",
- pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev));
- if (sc->my_cdev == NULL)
- return ENXIO;
- sc->my_cdev->si_drv1 = sc;
-
- for (i = 0; i < MAX_BARS; i++)
- sc->bar_res[i] = NULL;
-
- pci_enable_busmaster(dev);
-
- return 0;
-}
-
-static int
-nic_uio_detach(device_t dev)
-{
- int i;
- struct nic_uio_softc *sc;
- sc = device_get_softc(dev);
-
- for (i = 0; i < MAX_BARS; i++)
- if (sc->bar_res[i] != NULL) {
-
- if (PCI_BAR_IO(pci_read_config(dev, sc->bar_id[i], 4)))
- bus_release_resource(dev, SYS_RES_IOPORT, sc->bar_id[i],
- sc->bar_res[i]);
- else
- bus_release_resource(dev, SYS_RES_MEMORY, sc->bar_id[i],
- sc->bar_res[i]);
- }
-
- if (sc->my_cdev != NULL)
- destroy_dev(sc->my_cdev);
- return 0;
-}
-
-static void
-nic_uio_load(void)
-{
- uint32_t bus, device, function;
- device_t dev;
- char bdf_str[256];
- char *token, *remaining;
-
- memset(bdf_str, 0, sizeof(bdf_str));
- TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str));
- remaining = bdf_str;
- printf("nic_uio: hw.nic_uio.bdfs = '%s'\n", bdf_str);
- /*
- * Users should specify PCI BDFs in the format "b:d:f,b:d:f,b:d:f".
- * But the code below does not try differentiate between : and ,
- * and just blindly uses 3 tokens at a time to construct a
- * bus/device/function tuple.
- *
- * There is no checking on strtol() return values, but this should
- * be OK. Worst case is it cannot convert and returns 0. This
- * could give us a different BDF than intended, but as long as the
- * PCI device/vendor ID does not match it will not matter.
- */
- while (1) {
- if (remaining == NULL || remaining[0] == '\0')
- break;
- token = strsep(&remaining, ",:");
- if (token == NULL)
- break;
- bus = strtol(token, NULL, 10);
- token = strsep(&remaining, ",:");
- if (token == NULL)
- break;
- device = strtol(token, NULL, 10);
- token = strsep(&remaining, ",:");
- if (token == NULL)
- break;
- function = strtol(token, NULL, 10);
-
- dev = pci_find_bsf(bus, device, function);
- if (dev == NULL)
- continue;
-
- if (num_detached < MAX_DETACHED_DEVICES) {
- printf("nic_uio_load: detaching and storing dev=%p\n",
- dev);
- detached_devices[num_detached++] = dev;
- } else {
- printf("nic_uio_load: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n",
- MAX_DETACHED_DEVICES, dev);
- }
- device_detach(dev);
- }
-}
-
-static void
-nic_uio_unload(void)
-{
- int i;
- printf("nic_uio_unload: entered...\n");
-
- for (i = 0; i < num_detached; i++) {
- printf("nic_uio_unload: calling to device_probe_and_attach for dev=%p...\n",
- detached_devices[i]);
- device_probe_and_attach(detached_devices[i]);
- printf("nic_uio_unload: done.\n");
- }
-
- printf("nic_uio_unload: leaving...\n");
-}
-
-static int
-nic_uio_shutdown(void)
-{
- return 0;
-}
-
-static int
-nic_uio_modevent(module_t mod, int type, void *arg)
-{
-
- switch (type) {
- case MOD_LOAD:
- nic_uio_load();
- break;
- case MOD_UNLOAD:
- nic_uio_unload();
- break;
- case MOD_SHUTDOWN:
- nic_uio_shutdown();
- break;
- default:
- break;
- }
-
- return 0;
-}