aboutsummaryrefslogtreecommitdiffstats
path: root/lib/librte_eal/common
diff options
context:
space:
mode:
authorLuca Boccassi <luca.boccassi@gmail.com>2018-11-01 11:59:50 +0000
committerLuca Boccassi <luca.boccassi@gmail.com>2018-11-01 12:00:19 +0000
commit8d01b9cd70a67cdafd5b965a70420c3bd7fb3f82 (patch)
tree208e3bc33c220854d89d010e3abf720a2e62e546 /lib/librte_eal/common
parentb63264c8342e6a1b6971c79550d2af2024b6a4de (diff)
New upstream version 18.11-rc1upstream/18.11-rc1
Change-Id: Iaa71986dd6332e878d8f4bf493101b2bbc6313bb Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'lib/librte_eal/common')
-rw-r--r--lib/librte_eal/common/Makefile1
-rw-r--r--lib/librte_eal/common/arch/arm/meson.build2
-rw-r--r--lib/librte_eal/common/arch/ppc_64/meson.build5
-rw-r--r--lib/librte_eal/common/arch/x86/meson.build2
-rw-r--r--lib/librte_eal/common/eal_common_bus.c45
-rw-r--r--lib/librte_eal/common/eal_common_class.c2
-rw-r--r--lib/librte_eal/common/eal_common_dev.c347
-rw-r--r--lib/librte_eal/common/eal_common_devargs.c52
-rw-r--r--lib/librte_eal/common/eal_common_fbarray.c5
-rw-r--r--lib/librte_eal/common/eal_common_memory.c210
-rw-r--r--lib/librte_eal/common/eal_common_memzone.c8
-rw-r--r--lib/librte_eal/common/eal_common_options.c42
-rw-r--r--lib/librte_eal/common/eal_common_proc.c10
-rw-r--r--lib/librte_eal/common/eal_common_string_fns.c26
-rw-r--r--lib/librte_eal/common/eal_common_timer.c24
-rw-r--r--lib/librte_eal/common/eal_filesystem.h15
-rw-r--r--lib/librte_eal/common/eal_internal_cfg.h1
-rw-r--r--lib/librte_eal/common/eal_memalloc.h11
-rw-r--r--lib/librte_eal/common/eal_options.h2
-rw-r--r--lib/librte_eal/common/eal_private.h90
-rw-r--r--lib/librte_eal/common/hotplug_mp.c426
-rw-r--r--lib/librte_eal/common/hotplug_mp.h46
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_cycles_32.h4
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/meson.build16
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_pause.h7
-rw-r--r--lib/librte_eal/common/include/generic/rte_cycles.h11
-rw-r--r--lib/librte_eal/common/include/rte_bitmap.h14
-rw-r--r--lib/librte_eal/common/include/rte_bus.h34
-rw-r--r--lib/librte_eal/common/include/rte_common.h11
-rw-r--r--lib/librte_eal/common/include/rte_dev.h123
-rw-r--r--lib/librte_eal/common/include/rte_devargs.h81
-rw-r--r--lib/librte_eal/common/include/rte_eal.h20
-rw-r--r--lib/librte_eal/common/include/rte_eal_interrupts.h1
-rw-r--r--lib/librte_eal/common/include/rte_eal_memconfig.h18
-rw-r--r--lib/librte_eal/common/include/rte_malloc.h192
-rw-r--r--lib/librte_eal/common/include/rte_malloc_heap.h3
-rw-r--r--lib/librte_eal/common/include/rte_memory.h109
-rw-r--r--lib/librte_eal/common/include/rte_option.h63
-rw-r--r--lib/librte_eal/common/include/rte_string_fns.h26
-rw-r--r--lib/librte_eal/common/include/rte_version.h6
-rw-r--r--lib/librte_eal/common/include/rte_vfio.h31
-rw-r--r--lib/librte_eal/common/malloc_elem.c10
-rw-r--r--lib/librte_eal/common/malloc_heap.c340
-rw-r--r--lib/librte_eal/common/malloc_heap.h17
-rw-r--r--lib/librte_eal/common/malloc_mp.c4
-rw-r--r--lib/librte_eal/common/meson.build5
-rw-r--r--lib/librte_eal/common/rte_malloc.c436
-rw-r--r--lib/librte_eal/common/rte_option.c54
48 files changed, 2662 insertions, 346 deletions
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index cca68826..87d8c455 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -12,6 +12,7 @@ INC += rte_tailq.h rte_interrupts.h rte_alarm.h
INC += rte_string_fns.h rte_version.h
INC += rte_eal_memconfig.h rte_malloc_heap.h
INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_class.h
+INC += rte_option.h
INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
INC += rte_malloc.h rte_keepalive.h rte_time.h
INC += rte_service.h rte_service_component.h
diff --git a/lib/librte_eal/common/arch/arm/meson.build b/lib/librte_eal/common/arch/arm/meson.build
index c6bd9227..79731e1a 100644
--- a/lib/librte_eal/common/arch/arm/meson.build
+++ b/lib/librte_eal/common/arch/arm/meson.build
@@ -2,4 +2,4 @@
# Copyright(c) 2017 Intel Corporation.
eal_common_arch_sources = files('rte_cpuflags.c',
- 'rte_cycles.c')
+ 'rte_cycles.c', 'rte_hypervisor.c')
diff --git a/lib/librte_eal/common/arch/ppc_64/meson.build b/lib/librte_eal/common/arch/ppc_64/meson.build
new file mode 100644
index 00000000..40b3dc53
--- /dev/null
+++ b/lib/librte_eal/common/arch/ppc_64/meson.build
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+eal_common_arch_sources = files('rte_cpuflags.c',
+ 'rte_cycles.c', 'rte_hypervisor.c')
diff --git a/lib/librte_eal/common/arch/x86/meson.build b/lib/librte_eal/common/arch/x86/meson.build
index 4e0f7790..14bf204c 100644
--- a/lib/librte_eal/common/arch/x86/meson.build
+++ b/lib/librte_eal/common/arch/x86/meson.build
@@ -2,4 +2,4 @@
# Copyright(c) 2017 Intel Corporation
eal_common_arch_sources = files('rte_spinlock.c', 'rte_cpuflags.c',
- 'rte_cycles.c')
+ 'rte_cycles.c', 'rte_hypervisor.c')
diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c
index 0943851c..c8f1901f 100644
--- a/lib/librte_eal/common/eal_common_bus.c
+++ b/lib/librte_eal/common/eal_common_bus.c
@@ -37,10 +37,11 @@
#include <rte_bus.h>
#include <rte_debug.h>
#include <rte_string_fns.h>
+#include <rte_errno.h>
#include "eal_private.h"
-struct rte_bus_list rte_bus_list =
+static struct rte_bus_list rte_bus_list =
TAILQ_HEAD_INITIALIZER(rte_bus_list);
void
@@ -242,3 +243,45 @@ rte_bus_get_iommu_class(void)
}
return mode;
}
+
+static int
+bus_handle_sigbus(const struct rte_bus *bus,
+ const void *failure_addr)
+{
+ int ret;
+
+ if (!bus->sigbus_handler)
+ return -1;
+
+ ret = bus->sigbus_handler(failure_addr);
+
+ /* find bus but handle failed, keep the errno be set. */
+ if (ret < 0 && rte_errno == 0)
+ rte_errno = ENOTSUP;
+
+ return ret > 0;
+}
+
+int
+rte_bus_sigbus_handler(const void *failure_addr)
+{
+ struct rte_bus *bus;
+
+ int ret = 0;
+ int old_errno = rte_errno;
+
+ rte_errno = 0;
+
+ bus = rte_bus_find(NULL, bus_handle_sigbus, failure_addr);
+ /* can not find bus. */
+ if (!bus)
+ return 1;
+ /* find bus but handle failed, pass on the new errno. */
+ else if (rte_errno != 0)
+ return -1;
+
+ /* restore the old errno. */
+ rte_errno = old_errno;
+
+ return ret;
+}
diff --git a/lib/librte_eal/common/eal_common_class.c b/lib/librte_eal/common/eal_common_class.c
index 404a9065..d922266d 100644
--- a/lib/librte_eal/common/eal_common_class.c
+++ b/lib/librte_eal/common/eal_common_class.c
@@ -9,7 +9,7 @@
#include <rte_class.h>
#include <rte_debug.h>
-struct rte_class_list rte_class_list =
+static struct rte_class_list rte_class_list =
TAILQ_HEAD_INITIALIZER(rte_class_list);
__rte_experimental void
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 678dbcac..62e9ed47 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -19,8 +19,10 @@
#include <rte_log.h>
#include <rte_spinlock.h>
#include <rte_malloc.h>
+#include <rte_string_fns.h>
#include "eal_private.h"
+#include "hotplug_mp.h"
/**
* The device event callback description.
@@ -74,119 +76,110 @@ static int cmp_dev_name(const struct rte_device *dev, const void *_name)
return strcmp(dev->name, name);
}
-int rte_eal_dev_attach(const char *name, const char *devargs)
+int __rte_experimental
+rte_dev_is_probed(const struct rte_device *dev)
{
- struct rte_bus *bus;
+ /* The field driver should be set only when the probe is successful. */
+ return dev->driver != NULL;
+}
- if (name == NULL || devargs == NULL) {
- RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n");
+/* helper function to build devargs, caller should free the memory */
+static int
+build_devargs(const char *busname, const char *devname,
+ const char *drvargs, char **devargs)
+{
+ int length;
+
+ length = snprintf(NULL, 0, "%s:%s,%s", busname, devname, drvargs);
+ if (length < 0)
return -EINVAL;
- }
- bus = rte_bus_find_by_device_name(name);
- if (bus == NULL) {
- RTE_LOG(ERR, EAL, "Unable to find a bus for the device '%s'\n",
- name);
+ *devargs = malloc(length + 1);
+ if (*devargs == NULL)
+ return -ENOMEM;
+
+ length = snprintf(*devargs, length + 1, "%s:%s,%s",
+ busname, devname, drvargs);
+ if (length < 0) {
+ free(*devargs);
return -EINVAL;
}
- if (strcmp(bus->name, "pci") == 0 || strcmp(bus->name, "vdev") == 0)
- return rte_eal_hotplug_add(bus->name, name, devargs);
-
- RTE_LOG(ERR, EAL,
- "Device attach is only supported for PCI and vdev devices.\n");
- return -ENOTSUP;
+ return 0;
}
-int rte_eal_dev_detach(struct rte_device *dev)
+int
+rte_eal_hotplug_add(const char *busname, const char *devname,
+ const char *drvargs)
{
- struct rte_bus *bus;
- int ret;
- if (dev == NULL) {
- RTE_LOG(ERR, EAL, "Invalid device provided.\n");
- return -EINVAL;
- }
+ char *devargs;
+ int ret;
- bus = rte_bus_find_by_device(dev);
- if (bus == NULL) {
- RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n",
- dev->name);
- return -EINVAL;
- }
+ ret = build_devargs(busname, devname, drvargs, &devargs);
+ if (ret != 0)
+ return ret;
- if (bus->unplug == NULL) {
- RTE_LOG(ERR, EAL, "Bus function not supported\n");
- return -ENOTSUP;
- }
+ ret = rte_dev_probe(devargs);
+ free(devargs);
- ret = bus->unplug(dev);
- if (ret)
- RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
- dev->name);
return ret;
}
-int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devname,
- const char *devargs)
+/* probe device at local process. */
+int
+local_dev_probe(const char *devargs, struct rte_device **new_dev)
{
- struct rte_bus *bus;
struct rte_device *dev;
struct rte_devargs *da;
int ret;
- bus = rte_bus_find_by_name(busname);
- if (bus == NULL) {
- RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname);
- return -ENOENT;
- }
-
- if (bus->plug == NULL) {
- RTE_LOG(ERR, EAL, "Function plug not supported by bus (%s)\n",
- bus->name);
- return -ENOTSUP;
- }
-
+ *new_dev = NULL;
da = calloc(1, sizeof(*da));
if (da == NULL)
return -ENOMEM;
- ret = rte_devargs_parsef(da, "%s:%s,%s",
- busname, devname, devargs);
+ ret = rte_devargs_parse(da, devargs);
if (ret)
goto err_devarg;
+ if (da->bus->plug == NULL) {
+ RTE_LOG(ERR, EAL, "Function plug not supported by bus (%s)\n",
+ da->bus->name);
+ ret = -ENOTSUP;
+ goto err_devarg;
+ }
+
ret = rte_devargs_insert(da);
if (ret)
goto err_devarg;
- ret = bus->scan();
+ ret = da->bus->scan();
if (ret)
goto err_devarg;
- dev = bus->find_device(NULL, cmp_dev_name, devname);
+ dev = da->bus->find_device(NULL, cmp_dev_name, da->name);
if (dev == NULL) {
RTE_LOG(ERR, EAL, "Cannot find device (%s)\n",
- devname);
+ da->name);
ret = -ENODEV;
goto err_devarg;
}
- if (dev->driver != NULL) {
- RTE_LOG(ERR, EAL, "Device is already plugged\n");
- return -EEXIST;
- }
-
- ret = bus->plug(dev);
+ ret = dev->bus->plug(dev);
if (ret) {
+ if (rte_dev_is_probed(dev)) /* if already succeeded earlier */
+ return ret; /* no rollback */
RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
dev->name);
goto err_devarg;
}
+
+ *new_dev = dev;
return 0;
err_devarg:
- if (rte_devargs_remove(busname, devname)) {
+ if (rte_devargs_remove(da) != 0) {
free(da->args);
free(da);
}
@@ -194,40 +187,235 @@ err_devarg:
}
int __rte_experimental
-rte_eal_hotplug_remove(const char *busname, const char *devname)
+rte_dev_probe(const char *devargs)
{
- struct rte_bus *bus;
+ struct eal_dev_mp_req req;
struct rte_device *dev;
int ret;
+ memset(&req, 0, sizeof(req));
+ req.t = EAL_DEV_REQ_TYPE_ATTACH;
+ strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /**
+ * If in secondary process, just send IPC request to
+ * primary process.
+ */
+ ret = eal_dev_hotplug_request_to_primary(&req);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to send hotplug request to primary\n");
+ return -ENOMSG;
+ }
+ if (req.result != 0)
+ RTE_LOG(ERR, EAL,
+ "Failed to hotplug add device\n");
+ return req.result;
+ }
+
+ /* attach a shared device from primary start from here: */
+
+ /* primary attach the new device itself. */
+ ret = local_dev_probe(devargs, &dev);
+
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to attach device on primary process\n");
+
+ /**
+ * it is possible that secondary process failed to attached a
+ * device that primary process have during initialization,
+ * so for -EEXIST case, we still need to sync with secondary
+ * process.
+ */
+ if (ret != -EEXIST)
+ return ret;
+ }
+
+ /* primary send attach sync request to secondary. */
+ ret = eal_dev_hotplug_request_to_secondary(&req);
+
+ /* if any communication error, we need to rollback. */
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to send hotplug add request to secondary\n");
+ ret = -ENOMSG;
+ goto rollback;
+ }
+
+ /**
+ * if any secondary failed to attach, we need to consider if rollback
+ * is necessary.
+ */
+ if (req.result != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to attach device on secondary process\n");
+ ret = req.result;
+
+ /* for -EEXIST, we don't need to rollback. */
+ if (ret == -EEXIST)
+ return ret;
+ goto rollback;
+ }
+
+ return 0;
+
+rollback:
+ req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
+
+ /* primary send rollback request to secondary. */
+ if (eal_dev_hotplug_request_to_secondary(&req) != 0)
+ RTE_LOG(WARNING, EAL,
+ "Failed to rollback device attach on secondary."
+ "Devices in secondary may not sync with primary\n");
+
+ /* primary rollback itself. */
+ if (local_dev_remove(dev) != 0)
+ RTE_LOG(WARNING, EAL,
+ "Failed to rollback device attach on primary."
+ "Devices in secondary may not sync with primary\n");
+
+ return ret;
+}
+
+int
+rte_eal_hotplug_remove(const char *busname, const char *devname)
+{
+ struct rte_device *dev;
+ struct rte_bus *bus;
+
bus = rte_bus_find_by_name(busname);
if (bus == NULL) {
RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname);
return -ENOENT;
}
- if (bus->unplug == NULL) {
- RTE_LOG(ERR, EAL, "Function unplug not supported by bus (%s)\n",
- bus->name);
- return -ENOTSUP;
- }
-
dev = bus->find_device(NULL, cmp_dev_name, devname);
if (dev == NULL) {
RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", devname);
return -EINVAL;
}
- if (dev->driver == NULL) {
- RTE_LOG(ERR, EAL, "Device is already unplugged\n");
- return -ENOENT;
+ return rte_dev_remove(dev);
+}
+
+/* remove device at local process. */
+int
+local_dev_remove(struct rte_device *dev)
+{
+ int ret;
+
+ if (dev->bus->unplug == NULL) {
+ RTE_LOG(ERR, EAL, "Function unplug not supported by bus (%s)\n",
+ dev->bus->name);
+ return -ENOTSUP;
}
- ret = bus->unplug(dev);
- if (ret)
+ ret = dev->bus->unplug(dev);
+ if (ret) {
RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
dev->name);
- rte_devargs_remove(busname, devname);
+ return ret;
+ }
+
+ return 0;
+}
+
+int __rte_experimental
+rte_dev_remove(struct rte_device *dev)
+{
+ struct eal_dev_mp_req req;
+ char *devargs;
+ int ret;
+
+ if (!rte_dev_is_probed(dev)) {
+ RTE_LOG(ERR, EAL, "Device is not probed\n");
+ return -ENOENT;
+ }
+
+ ret = build_devargs(dev->bus->name, dev->name, "", &devargs);
+ if (ret != 0)
+ return ret;
+
+ memset(&req, 0, sizeof(req));
+ req.t = EAL_DEV_REQ_TYPE_DETACH;
+ strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
+ free(devargs);
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /**
+ * If in secondary process, just send IPC request to
+ * primary process.
+ */
+ ret = eal_dev_hotplug_request_to_primary(&req);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to send hotplug request to primary\n");
+ return -ENOMSG;
+ }
+ if (req.result != 0)
+ RTE_LOG(ERR, EAL,
+ "Failed to hotplug remove device\n");
+ return req.result;
+ }
+
+ /* detach a device from primary start from here: */
+
+ /* primary send detach sync request to secondary */
+ ret = eal_dev_hotplug_request_to_secondary(&req);
+
+ /**
+ * if communication error, we need to rollback, because it is possible
+ * part of the secondary processes still detached it successfully.
+ */
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to send device detach request to secondary\n");
+ ret = -ENOMSG;
+ goto rollback;
+ }
+
+ /**
+ * if any secondary failed to detach, we need to consider if rollback
+ * is necessary.
+ */
+ if (req.result != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to detach device on secondary process\n");
+ ret = req.result;
+ /**
+ * if -ENOENT, we don't need to rollback, since devices is
+ * already detached on secondary process.
+ */
+ if (ret != -ENOENT)
+ goto rollback;
+ }
+
+ /* primary detach the device itself. */
+ ret = local_dev_remove(dev);
+
+ /* if primary failed, still need to consider if rollback is necessary */
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL,
+ "Failed to detach device on primary process\n");
+ /* if -ENOENT, we don't need to rollback */
+ if (ret == -ENOENT)
+ return ret;
+ goto rollback;
+ }
+
+ return 0;
+
+rollback:
+ req.t = EAL_DEV_REQ_TYPE_DETACH_ROLLBACK;
+
+ /* primary send rollback request to secondary. */
+ if (eal_dev_hotplug_request_to_secondary(&req) != 0)
+ RTE_LOG(WARNING, EAL,
+ "Failed to rollback device detach on secondary."
+ "Devices in secondary may not sync with primary\n");
+
return ret;
}
@@ -342,8 +530,9 @@ rte_dev_event_callback_unregister(const char *device_name,
return ret;
}
-void
-dev_callback_process(char *device_name, enum rte_dev_event_type event)
+void __rte_experimental
+rte_dev_event_callback_process(const char *device_name,
+ enum rte_dev_event_type event)
{
struct dev_event_callback *cb_lst;
diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
index dac2402a..b7b9cb69 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -4,9 +4,6 @@
/* This file manages the list of devices and their arguments, as given
* by the user at startup
- *
- * Code here should not call rte_log since the EAL environment
- * may not be initialized.
*/
#include <stdio.h>
@@ -28,39 +25,9 @@
TAILQ_HEAD(rte_devargs_list, rte_devargs);
/** Global list of user devices */
-struct rte_devargs_list devargs_list =
+static struct rte_devargs_list devargs_list =
TAILQ_HEAD_INITIALIZER(devargs_list);
-int
-rte_eal_parse_devargs_str(const char *devargs_str,
- char **drvname, char **drvargs)
-{
- char *sep;
-
- if ((devargs_str) == NULL || (drvname) == NULL || (drvargs == NULL))
- return -1;
-
- *drvname = strdup(devargs_str);
- if (*drvname == NULL)
- return -1;
-
- /* set the first ',' to '\0' to split name and arguments */
- sep = strchr(*drvname, ',');
- if (sep != NULL) {
- sep[0] = '\0';
- *drvargs = strdup(sep + 1);
- } else {
- *drvargs = strdup("");
- }
-
- if (*drvargs == NULL) {
- free(*drvname);
- *drvname = NULL;
- return -1;
- }
- return 0;
-}
-
static size_t
devargs_layer_count(const char *s)
{
@@ -270,6 +237,7 @@ rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
va_list ap;
size_t len;
char *dev;
+ int ret;
if (da == NULL)
return -EINVAL;
@@ -288,7 +256,10 @@ rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
vsnprintf(dev, len + 1, format, ap);
va_end(ap);
- return rte_devargs_parse(da, dev);
+ ret = rte_devargs_parse(da, dev);
+
+ free(dev);
+ return ret;
}
int __rte_experimental
@@ -296,7 +267,7 @@ rte_devargs_insert(struct rte_devargs *da)
{
int ret;
- ret = rte_devargs_remove(da->bus->name, da->name);
+ ret = rte_devargs_remove(da);
if (ret < 0)
return ret;
TAILQ_INSERT_TAIL(&devargs_list, da, next);
@@ -342,14 +313,17 @@ fail:
}
int __rte_experimental
-rte_devargs_remove(const char *busname, const char *devname)
+rte_devargs_remove(struct rte_devargs *devargs)
{
struct rte_devargs *d;
void *tmp;
+ if (devargs == NULL || devargs->bus == NULL)
+ return -1;
+
TAILQ_FOREACH_SAFE(d, &devargs_list, next, tmp) {
- if (strcmp(d->bus->name, busname) == 0 &&
- strcmp(d->name, devname) == 0) {
+ if (strcmp(d->bus->name, devargs->bus->name) == 0 &&
+ strcmp(d->name, devargs->name) == 0) {
TAILQ_REMOVE(&devargs_list, d, next);
free(d->args);
free(d);
diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c
index 43caf3ce..ea0735cb 100644
--- a/lib/librte_eal/common/eal_common_fbarray.c
+++ b/lib/librte_eal/common/eal_common_fbarray.c
@@ -2,6 +2,7 @@
* Copyright(c) 2017-2018 Intel Corporation
*/
+#include <fcntl.h>
#include <inttypes.h>
#include <limits.h>
#include <sys/mman.h>
@@ -878,6 +879,10 @@ rte_fbarray_destroy(struct rte_fbarray *arr)
if (ret)
return ret;
+ /* with no shconf, there were never any files to begin with */
+ if (internal_config.no_shconf)
+ return 0;
+
/* try deleting the file */
eal_get_fbarray_path(path, sizeof(path), arr->name);
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index fbfb1b05..12dcedf5 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -2,6 +2,7 @@
* Copyright(c) 2010-2014 Intel Corporation
*/
+#include <fcntl.h>
#include <errno.h>
#include <stdio.h>
#include <stdint.h>
@@ -37,6 +38,23 @@
static void *next_baseaddr;
static uint64_t system_page_sz;
+#ifdef RTE_ARCH_64
+/*
+ * Linux kernel uses a really high address as starting address for serving
+ * mmaps calls. If there exists addressing limitations and IOVA mode is VA,
+ * this starting address is likely too high for those devices. However, it
+ * is possible to use a lower address in the process virtual address space
+ * as with 64 bits there is a lot of available space.
+ *
+ * Current known limitations are 39 or 40 bits. Setting the starting address
+ * at 4GB implies there are 508GB or 1020GB for mapping the available
+ * hugepages. This is likely enough for most systems, although a device with
+ * addressing limitations should call rte_eal_check_dma_mask for ensuring all
+ * memory is within supported range.
+ */
+static uint64_t baseaddr = 0x100000000;
+#endif
+
void *
eal_get_virtual_area(void *requested_addr, size_t *size,
size_t page_sz, int flags, int mmap_flags)
@@ -60,6 +78,11 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
rte_eal_process_type() == RTE_PROC_PRIMARY)
next_baseaddr = (void *) internal_config.base_virtaddr;
+#ifdef RTE_ARCH_64
+ if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY)
+ next_baseaddr = (void *) baseaddr;
+#endif
if (requested_addr == NULL && next_baseaddr != NULL) {
requested_addr = next_baseaddr;
requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
@@ -91,7 +114,17 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
mmap_flags, -1, 0);
if (mapped_addr == MAP_FAILED && allow_shrink)
*size -= page_sz;
- } while (allow_shrink && mapped_addr == MAP_FAILED && *size > 0);
+
+ if (mapped_addr != MAP_FAILED && addr_is_hint &&
+ mapped_addr != requested_addr) {
+ /* hint was not used. Try with another offset */
+ munmap(mapped_addr, map_sz);
+ mapped_addr = MAP_FAILED;
+ next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
+ requested_addr = next_baseaddr;
+ }
+ } while ((allow_shrink || addr_is_hint) &&
+ mapped_addr == MAP_FAILED && *size > 0);
/* align resulting address - if map failed, we will ignore the value
* anyway, so no need to add additional checks.
@@ -171,7 +204,7 @@ virt2memseg(const void *addr, const struct rte_memseg_list *msl)
/* a memseg list was specified, check if it's the right one */
start = msl->base_va;
- end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len);
+ end = RTE_PTR_ADD(start, msl->len);
if (addr < start || addr >= end)
return NULL;
@@ -194,8 +227,7 @@ virt2memseg_list(const void *addr)
msl = &mcfg->memsegs[msl_idx];
start = msl->base_va;
- end = RTE_PTR_ADD(start,
- (size_t)msl->page_sz * msl->memseg_arr.len);
+ end = RTE_PTR_ADD(start, msl->len);
if (addr >= start && addr < end)
break;
}
@@ -273,6 +305,9 @@ physmem_size(const struct rte_memseg_list *msl, void *arg)
{
uint64_t *total_len = arg;
+ if (msl->external)
+ return 0;
+
*total_len += msl->memseg_arr.count * msl->page_sz;
return 0;
@@ -294,7 +329,7 @@ dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
void *arg)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int msl_idx, ms_idx;
+ int msl_idx, ms_idx, fd;
FILE *f = arg;
msl_idx = msl - mcfg->memsegs;
@@ -305,10 +340,11 @@ dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
if (ms_idx < 0)
return -1;
+ fd = eal_memalloc_get_seg_fd(msl_idx, ms_idx);
fprintf(f, "Segment %i-%i: IOVA:0x%"PRIx64", len:%zu, "
"virt:%p, socket_id:%"PRId32", "
"hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
- "nrank:%"PRIx32"\n",
+ "nrank:%"PRIx32" fd:%i\n",
msl_idx, ms_idx,
ms->iova,
ms->len,
@@ -316,7 +352,8 @@ dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
ms->socket_id,
ms->hugepage_sz,
ms->nchannel,
- ms->nrank);
+ ms->nrank,
+ fd);
return 0;
}
@@ -383,6 +420,66 @@ rte_dump_physmem_layout(FILE *f)
rte_memseg_walk(dump_memseg, f);
}
+static int
+check_iova(const struct rte_memseg_list *msl __rte_unused,
+ const struct rte_memseg *ms, void *arg)
+{
+ uint64_t *mask = arg;
+ rte_iova_t iova;
+
+ /* higher address within segment */
+ iova = (ms->iova + ms->len) - 1;
+ if (!(iova & *mask))
+ return 0;
+
+ RTE_LOG(DEBUG, EAL, "memseg iova %"PRIx64", len %zx, out of range\n",
+ ms->iova, ms->len);
+
+ RTE_LOG(DEBUG, EAL, "\tusing dma mask %"PRIx64"\n", *mask);
+ return 1;
+}
+
+#if defined(RTE_ARCH_64)
+#define MAX_DMA_MASK_BITS 63
+#else
+#define MAX_DMA_MASK_BITS 31
+#endif
+
+/* check memseg iovas are within the required range based on dma mask */
+int __rte_experimental
+rte_eal_check_dma_mask(uint8_t maskbits)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ uint64_t mask;
+
+ /* sanity check */
+ if (maskbits > MAX_DMA_MASK_BITS) {
+ RTE_LOG(ERR, EAL, "wrong dma mask size %u (Max: %u)\n",
+ maskbits, MAX_DMA_MASK_BITS);
+ return -1;
+ }
+
+ /* create dma mask */
+ mask = ~((1ULL << maskbits) - 1);
+
+ if (rte_memseg_walk(check_iova, &mask))
+ /*
+ * Dma mask precludes hugepage usage.
+ * This device can not be used and we do not need to keep
+ * the dma mask.
+ */
+ return 1;
+
+ /*
+ * we need to keep the more restricted maskbit for checking
+ * potential dynamic memory allocation in the future.
+ */
+ mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
+ RTE_MIN(mcfg->dma_maskbits, maskbits);
+
+ return 0;
+}
+
/* return the number of memory channels */
unsigned rte_memory_get_nchannel(void)
{
@@ -548,6 +645,105 @@ rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
return ret;
}
+int __rte_experimental
+rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl;
+ struct rte_fbarray *arr;
+ int msl_idx, seg_idx, ret;
+
+ if (ms == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ msl = rte_mem_virt2memseg_list(ms->addr);
+ if (msl == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ arr = &msl->memseg_arr;
+
+ msl_idx = msl - mcfg->memsegs;
+ seg_idx = rte_fbarray_find_idx(arr, ms);
+
+ if (!rte_fbarray_is_used(arr, seg_idx)) {
+ rte_errno = ENOENT;
+ return -1;
+ }
+
+ ret = eal_memalloc_get_seg_fd(msl_idx, seg_idx);
+ if (ret < 0) {
+ rte_errno = -ret;
+ ret = -1;
+ }
+ return ret;
+}
+
+int __rte_experimental
+rte_memseg_get_fd(const struct rte_memseg *ms)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int ret;
+
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ ret = rte_memseg_get_fd_thread_unsafe(ms);
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
+}
+
+int __rte_experimental
+rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
+ size_t *offset)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl;
+ struct rte_fbarray *arr;
+ int msl_idx, seg_idx, ret;
+
+ if (ms == NULL || offset == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ msl = rte_mem_virt2memseg_list(ms->addr);
+ if (msl == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ arr = &msl->memseg_arr;
+
+ msl_idx = msl - mcfg->memsegs;
+ seg_idx = rte_fbarray_find_idx(arr, ms);
+
+ if (!rte_fbarray_is_used(arr, seg_idx)) {
+ rte_errno = ENOENT;
+ return -1;
+ }
+
+ ret = eal_memalloc_get_seg_fd_offset(msl_idx, seg_idx, offset);
+ if (ret < 0) {
+ rte_errno = -ret;
+ ret = -1;
+ }
+ return ret;
+}
+
+int __rte_experimental
+rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int ret;
+
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ ret = rte_memseg_get_fd_offset_thread_unsafe(ms, offset);
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
+}
+
/* init memory subsystem */
int
rte_eal_memory_init(void)
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 7300fe05..b7081afb 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -120,13 +120,15 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
return NULL;
}
- if ((socket_id != SOCKET_ID_ANY) &&
- (socket_id >= RTE_MAX_NUMA_NODES || socket_id < 0)) {
+ if ((socket_id != SOCKET_ID_ANY) && socket_id < 0) {
rte_errno = EINVAL;
return NULL;
}
- if (!rte_eal_has_hugepages())
+ /* only set socket to SOCKET_ID_ANY if we aren't allocating for an
+ * external heap.
+ */
+ if (!rte_eal_has_hugepages() && socket_id < RTE_MAX_NUMA_NODES)
socket_id = SOCKET_ID_ANY;
contig = (flags & RTE_MEMZONE_IOVA_CONTIG) != 0;
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index dd5f9740..b82f3ddd 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -58,6 +58,7 @@ eal_long_options[] = {
{OPT_HELP, 0, NULL, OPT_HELP_NUM },
{OPT_HUGE_DIR, 1, NULL, OPT_HUGE_DIR_NUM },
{OPT_HUGE_UNLINK, 0, NULL, OPT_HUGE_UNLINK_NUM },
+ {OPT_IOVA_MODE, 1, NULL, OPT_IOVA_MODE_NUM },
{OPT_LCORES, 1, NULL, OPT_LCORES_NUM },
{OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM },
{OPT_MASTER_LCORE, 1, NULL, OPT_MASTER_LCORE_NUM },
@@ -205,6 +206,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
#endif
internal_cfg->vmware_tsc_map = 0;
internal_cfg->create_uio_dev = 0;
+ internal_cfg->iova_mode = RTE_IOVA_DC;
internal_cfg->user_mbuf_pool_ops_name = NULL;
internal_cfg->init_complete = 0;
}
@@ -1075,6 +1077,25 @@ eal_parse_proc_type(const char *arg)
return RTE_PROC_INVALID;
}
+static int
+eal_parse_iova_mode(const char *name)
+{
+ int mode;
+
+ if (name == NULL)
+ return -1;
+
+ if (!strcmp("pa", name))
+ mode = RTE_IOVA_PA;
+ else if (!strcmp("va", name))
+ mode = RTE_IOVA_VA;
+ else
+ return -1;
+
+ internal_config.iova_mode = mode;
+ return 0;
+}
+
int
eal_parse_common_option(int opt, const char *optarg,
struct internal_config *conf)
@@ -1281,6 +1302,13 @@ eal_parse_common_option(int opt, const char *optarg,
case OPT_SINGLE_FILE_SEGMENTS_NUM:
conf->single_file_segments = 1;
break;
+ case OPT_IOVA_MODE_NUM:
+ if (eal_parse_iova_mode(optarg) < 0) {
+ RTE_LOG(ERR, EAL, "invalid parameters for --"
+ OPT_IOVA_MODE "\n");
+ return -1;
+ }
+ break;
/* don't know what to do, leave this to caller */
default:
@@ -1384,10 +1412,16 @@ eal_check_common_options(struct internal_config *internal_cfg)
" is only supported in non-legacy memory mode\n");
}
if (internal_cfg->single_file_segments &&
- internal_cfg->hugepage_unlink) {
+ internal_cfg->hugepage_unlink &&
+ !internal_cfg->in_memory) {
RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE_SEGMENTS" is "
- "not compatible with neither --"OPT_IN_MEMORY" nor "
- "--"OPT_HUGE_UNLINK"\n");
+ "not compatible with --"OPT_HUGE_UNLINK"\n");
+ return -1;
+ }
+ if (internal_cfg->legacy_mem &&
+ internal_cfg->in_memory) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_LEGACY_MEM" is not compatible "
+ "with --"OPT_IN_MEMORY"\n");
return -1;
}
@@ -1428,6 +1462,8 @@ eal_common_usage(void)
" --"OPT_VDEV" Add a virtual device.\n"
" The argument format is <driver><id>[,key=val,...]\n"
" (ex: --vdev=net_pcap0,iface=eth2).\n"
+ " --"OPT_IOVA_MODE" Set IOVA mode. 'pa' for IOVA_PA\n"
+ " 'va' for IOVA_VA\n"
" -d LIB.so|DIR Add a driver or driver directory\n"
" (can be used multiple times)\n"
" --"OPT_VMWARE_TSC_MAP" Use VMware TSC map instead of native RDTSC\n"
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 9fcb9121..97663d3b 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -939,13 +939,17 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
if (check_input(req) == false)
return -1;
+ reply->nb_sent = 0;
+ reply->nb_received = 0;
+ reply->msgs = NULL;
+
if (internal_config.no_shconf) {
RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
return 0;
}
if (gettimeofday(&now, NULL) < 0) {
- RTE_LOG(ERR, EAL, "Faile to get current time\n");
+ RTE_LOG(ERR, EAL, "Failed to get current time\n");
rte_errno = errno;
return -1;
}
@@ -954,10 +958,6 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
end.tv_sec = now.tv_sec + ts->tv_sec +
(now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
- reply->nb_sent = 0;
- reply->nb_received = 0;
- reply->msgs = NULL;
-
/* for secondary process, send request to the primary process only */
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
pthread_mutex_lock(&pending_requests.lock);
diff --git a/lib/librte_eal/common/eal_common_string_fns.c b/lib/librte_eal/common/eal_common_string_fns.c
index 6ac5f828..60c5dd66 100644
--- a/lib/librte_eal/common/eal_common_string_fns.c
+++ b/lib/librte_eal/common/eal_common_string_fns.c
@@ -38,3 +38,29 @@ einval_error:
errno = EINVAL;
return -1;
}
+
+/* Copy src string into dst.
+ *
+ * Return negative value and NUL-terminate if dst is too short,
+ * Otherwise return number of bytes copied.
+ */
+ssize_t
+rte_strscpy(char *dst, const char *src, size_t dsize)
+{
+ size_t nleft = dsize;
+ size_t res = 0;
+
+ /* Copy as many bytes as will fit. */
+ while (nleft != 0) {
+ dst[res] = src[res];
+ if (src[res] == '\0')
+ return res;
+ res++;
+ nleft--;
+ }
+
+ /* Not enough room in dst, set NUL and return error. */
+ if (res != 0)
+ dst[res - 1] = '\0';
+ return -E2BIG;
+}
diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c
index 2e2b770f..dcf26bfe 100644
--- a/lib/librte_eal/common/eal_common_timer.c
+++ b/lib/librte_eal/common/eal_common_timer.c
@@ -7,9 +7,11 @@
#include <unistd.h>
#include <inttypes.h>
#include <sys/types.h>
+#include <time.h>
#include <errno.h>
#include <rte_common.h>
+#include <rte_compat.h>
#include <rte_log.h>
#include <rte_cycles.h>
#include <rte_pause.h>
@@ -31,6 +33,28 @@ rte_delay_us_block(unsigned int us)
rte_pause();
}
+void __rte_experimental
+rte_delay_us_sleep(unsigned int us)
+{
+ struct timespec wait[2];
+ int ind = 0;
+
+ wait[0].tv_sec = 0;
+ if (us >= US_PER_S) {
+ wait[0].tv_sec = us / US_PER_S;
+ us -= wait[0].tv_sec * US_PER_S;
+ }
+ wait[0].tv_nsec = 1000 * us;
+
+ while (nanosleep(&wait[ind], &wait[1 - ind]) && errno == EINTR) {
+ /*
+ * Sleep was interrupted. Flip the index, so the 'remainder'
+ * will become the 'request' for a next call.
+ */
+ ind = 1 - ind;
+ }
+}
+
uint64_t
rte_get_tsc_hz(void)
{
diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
index de05febf..b3e8ae5e 100644
--- a/lib/librte_eal/common/eal_filesystem.h
+++ b/lib/librte_eal/common/eal_filesystem.h
@@ -27,7 +27,7 @@ eal_create_runtime_dir(void);
/* returns runtime dir */
const char *
-eal_get_runtime_dir(void);
+rte_eal_get_runtime_dir(void);
#define RUNTIME_CONFIG_FNAME "config"
static inline const char *
@@ -35,7 +35,7 @@ eal_runtime_config_path(void)
{
static char buffer[PATH_MAX]; /* static so auto-zeroed */
- snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+ snprintf(buffer, sizeof(buffer) - 1, "%s/%s", rte_eal_get_runtime_dir(),
RUNTIME_CONFIG_FNAME);
return buffer;
}
@@ -47,7 +47,7 @@ eal_mp_socket_path(void)
{
static char buffer[PATH_MAX]; /* static so auto-zeroed */
- snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+ snprintf(buffer, sizeof(buffer) - 1, "%s/%s", rte_eal_get_runtime_dir(),
MP_SOCKET_FNAME);
return buffer;
}
@@ -55,7 +55,8 @@ eal_mp_socket_path(void)
#define FBARRAY_NAME_FMT "%s/fbarray_%s"
static inline const char *
eal_get_fbarray_path(char *buffer, size_t buflen, const char *name) {
- snprintf(buffer, buflen, FBARRAY_NAME_FMT, eal_get_runtime_dir(), name);
+ snprintf(buffer, buflen, FBARRAY_NAME_FMT, rte_eal_get_runtime_dir(),
+ name);
return buffer;
}
@@ -66,7 +67,7 @@ eal_hugepage_info_path(void)
{
static char buffer[PATH_MAX]; /* static so auto-zeroed */
- snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+ snprintf(buffer, sizeof(buffer) - 1, "%s/%s", rte_eal_get_runtime_dir(),
HUGEPAGE_INFO_FNAME);
return buffer;
}
@@ -78,7 +79,7 @@ eal_hugepage_data_path(void)
{
static char buffer[PATH_MAX]; /* static so auto-zeroed */
- snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+ snprintf(buffer, sizeof(buffer) - 1, "%s/%s", rte_eal_get_runtime_dir(),
HUGEPAGE_DATA_FNAME);
return buffer;
}
@@ -99,7 +100,7 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id
static inline const char *
eal_get_hugefile_lock_path(char *buffer, size_t buflen, int f_id)
{
- snprintf(buffer, buflen, HUGEFILE_LOCK_FMT, eal_get_runtime_dir(),
+ snprintf(buffer, buflen, HUGEFILE_LOCK_FMT, rte_eal_get_runtime_dir(),
f_id);
buffer[buflen - 1] = '\0';
return buffer;
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 00ee6e06..737f17e3 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -70,6 +70,7 @@ struct internal_config {
/**< user defined mbuf pool ops name */
unsigned num_hugepage_sizes; /**< how many sizes on this system */
struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+ enum rte_iova_mode iova_mode ; /**< Set IOVA mode on this system */
volatile unsigned int init_complete;
/**< indicates whether EAL has completed initialization */
};
diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h
index 36bb1a02..af917c2f 100644
--- a/lib/librte_eal/common/eal_memalloc.h
+++ b/lib/librte_eal/common/eal_memalloc.h
@@ -76,6 +76,17 @@ eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id);
int
eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len);
+/* returns fd or -errno */
+int
+eal_memalloc_get_seg_fd(int list_idx, int seg_idx);
+
+/* returns 0 or -errno */
+int
+eal_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd);
+
+int
+eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset);
+
int
eal_memalloc_init(void);
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 96e16678..5271f944 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -63,6 +63,8 @@ enum {
OPT_LEGACY_MEM_NUM,
#define OPT_SINGLE_FILE_SEGMENTS "single-file-segments"
OPT_SINGLE_FILE_SEGMENTS_NUM,
+#define OPT_IOVA_MODE "iova-mode"
+ OPT_IOVA_MODE_NUM,
OPT_LONG_MAX_NUM
};
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 4f809a83..442c6dc4 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -259,18 +259,6 @@ struct rte_bus *rte_bus_find_by_device_name(const char *str);
int rte_mp_channel_init(void);
/**
- * Internal Executes all the user application registered callbacks for
- * the specific device. It is for DPDK internal user only. User
- * application should not call it directly.
- *
- * @param device_name
- * The device name.
- * @param event
- * the device event type.
- */
-void dev_callback_process(char *device_name, enum rte_dev_event_type event);
-
-/**
* @internal
* Parse a device string and store its information in an
* rte_devargs structure.
@@ -304,4 +292,82 @@ int
rte_devargs_layers_parse(struct rte_devargs *devargs,
const char *devstr);
+/*
+ * probe a device at local process.
+ *
+ * @param devargs
+ * Device arguments including bus, class and driver properties.
+ * @param new_dev
+ * new device be probed as output.
+ * @return
+ * 0 on success, negative on error.
+ */
+int local_dev_probe(const char *devargs, struct rte_device **new_dev);
+
+/**
+ * Hotplug remove a given device from a specific bus at local process.
+ *
+ * @param dev
+ * Data structure of the device to remove.
+ * @return
+ * 0 on success, negative on error.
+ */
+int local_dev_remove(struct rte_device *dev);
+
+/**
+ * Iterate over all buses to find the corresponding bus to handle the sigbus
+ * error.
+ * @param failure_addr
+ * Pointer of the fault address of the sigbus error.
+ *
+ * @return
+ * 0 success to handle the sigbus.
+ * -1 failed to handle the sigbus
+ * 1 no bus can handler the sigbus
+ */
+int rte_bus_sigbus_handler(const void *failure_addr);
+
+/**
+ * @internal
+ * Register the sigbus handler.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+dev_sigbus_handler_register(void);
+
+/**
+ * @internal
+ * Unregister the sigbus handler.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+dev_sigbus_handler_unregister(void);
+
+/**
+ * Check if the option is registered.
+ *
+ * @param option
+ * The option to be parsed.
+ *
+ * @return
+ * 0 on success
+ * @return
+ * -1 on fail
+ */
+int
+rte_option_parse(const char *opt);
+
+/**
+ * Iterate through the registered options and execute the associated
+ * callback if enabled.
+ */
+void
+rte_option_init(void);
+
#endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/hotplug_mp.c b/lib/librte_eal/common/hotplug_mp.c
new file mode 100644
index 00000000..84f59d95
--- /dev/null
+++ b/lib/librte_eal/common/hotplug_mp.c
@@ -0,0 +1,426 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include <string.h>
+
+#include <rte_eal.h>
+#include <rte_alarm.h>
+#include <rte_string_fns.h>
+#include <rte_devargs.h>
+
+#include "hotplug_mp.h"
+#include "eal_private.h"
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+struct mp_reply_bundle {
+ struct rte_mp_msg msg;
+ void *peer;
+};
+
+static int cmp_dev_name(const struct rte_device *dev, const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(dev->name, name);
+}
+
+/**
+ * Secondary to primary request.
+ * start from function eal_dev_hotplug_request_to_primary.
+ *
+ * device attach on secondary:
+ * a) secondary send sync request to the primary.
+ * b) primary receive the request and attach the new device if
+ * failed goto i).
+ * c) primary forward attach sync request to all secondary.
+ * d) secondary receive the request and attach the device and send a reply.
+ * e) primary check the reply if all success goes to j).
+ * f) primary send attach rollback sync request to all secondary.
+ * g) secondary receive the request and detach the device and send a reply.
+ * h) primary receive the reply and detach device as rollback action.
+ * i) send attach fail to secondary as a reply of step a), goto k).
+ * j) send attach success to secondary as a reply of step a).
+ * k) secondary receive reply and return.
+ *
+ * device detach on secondary:
+ * a) secondary send sync request to the primary.
+ * b) primary send detach sync request to all secondary.
+ * c) secondary detach the device and send a reply.
+ * d) primary check the reply if all success goes to g).
+ * e) primary send detach rollback sync request to all secondary.
+ * f) secondary receive the request and attach back device. goto h).
+ * g) primary detach the device if success goto i), else goto e).
+ * h) primary send detach fail to secondary as a reply of step a), goto j).
+ * i) primary send detach success to secondary as a reply of step a).
+ * j) secondary receive reply and return.
+ */
+
+static int
+send_response_to_secondary(const struct eal_dev_mp_req *req,
+ int result,
+ const void *peer)
+{
+ struct rte_mp_msg mp_resp;
+ struct eal_dev_mp_req *resp =
+ (struct eal_dev_mp_req *)mp_resp.param;
+ int ret;
+
+ memset(&mp_resp, 0, sizeof(mp_resp));
+ mp_resp.len_param = sizeof(*resp);
+ strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+ memcpy(resp, req, sizeof(*req));
+ resp->result = result;
+
+ ret = rte_mp_reply(&mp_resp, peer);
+ if (ret != 0)
+ RTE_LOG(ERR, EAL, "failed to send response to secondary\n");
+
+ return ret;
+}
+
+static void
+__handle_secondary_request(void *param)
+{
+ struct mp_reply_bundle *bundle = param;
+ const struct rte_mp_msg *msg = &bundle->msg;
+ const struct eal_dev_mp_req *req =
+ (const struct eal_dev_mp_req *)msg->param;
+ struct eal_dev_mp_req tmp_req;
+ struct rte_devargs *da;
+ struct rte_device *dev;
+ struct rte_bus *bus;
+ int ret = 0;
+
+ tmp_req = *req;
+
+ if (req->t == EAL_DEV_REQ_TYPE_ATTACH) {
+ ret = local_dev_probe(req->devargs, &dev);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Failed to hotplug add device on primary\n");
+ if (ret != -EEXIST)
+ goto finish;
+ }
+ ret = eal_dev_hotplug_request_to_secondary(&tmp_req);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Failed to send hotplug request to secondary\n");
+ ret = -ENOMSG;
+ goto rollback;
+ }
+ if (tmp_req.result != 0) {
+ ret = tmp_req.result;
+ RTE_LOG(ERR, EAL, "Failed to hotplug add device on secondary\n");
+ if (ret != -EEXIST)
+ goto rollback;
+ }
+ } else if (req->t == EAL_DEV_REQ_TYPE_DETACH) {
+ da = calloc(1, sizeof(*da));
+ if (da == NULL) {
+ ret = -ENOMEM;
+ goto finish;
+ }
+
+ ret = rte_devargs_parse(da, req->devargs);
+ if (ret != 0)
+ goto finish;
+
+ ret = eal_dev_hotplug_request_to_secondary(&tmp_req);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Failed to send hotplug request to secondary\n");
+ ret = -ENOMSG;
+ goto rollback;
+ }
+
+ bus = rte_bus_find_by_name(da->bus->name);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", da->bus->name);
+ ret = -ENOENT;
+ goto finish;
+ }
+
+ dev = bus->find_device(NULL, cmp_dev_name, da->name);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", da->name);
+ ret = -ENOENT;
+ goto finish;
+ }
+
+ if (tmp_req.result != 0) {
+ RTE_LOG(ERR, EAL, "Failed to hotplug remove device on secondary\n");
+ ret = tmp_req.result;
+ if (ret != -ENOENT)
+ goto rollback;
+ }
+
+ ret = local_dev_remove(dev);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Failed to hotplug remove device on primary\n");
+ if (ret != -ENOENT)
+ goto rollback;
+ }
+ } else {
+ RTE_LOG(ERR, EAL, "unsupported secondary to primary request\n");
+ ret = -ENOTSUP;
+ }
+ goto finish;
+
+rollback:
+ if (req->t == EAL_DEV_REQ_TYPE_ATTACH) {
+ tmp_req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
+ eal_dev_hotplug_request_to_secondary(&tmp_req);
+ local_dev_remove(dev);
+ } else {
+ tmp_req.t = EAL_DEV_REQ_TYPE_DETACH_ROLLBACK;
+ eal_dev_hotplug_request_to_secondary(&tmp_req);
+ }
+
+finish:
+ ret = send_response_to_secondary(&tmp_req, ret, bundle->peer);
+ if (ret)
+ RTE_LOG(ERR, EAL, "failed to send response to secondary\n");
+
+ free(bundle->peer);
+ free(bundle);
+}
+
+static int
+handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ struct mp_reply_bundle *bundle;
+ const struct eal_dev_mp_req *req =
+ (const struct eal_dev_mp_req *)msg->param;
+ int ret = 0;
+
+ bundle = malloc(sizeof(*bundle));
+ if (bundle == NULL) {
+ RTE_LOG(ERR, EAL, "not enough memory\n");
+ return send_response_to_secondary(req, -ENOMEM, peer);
+ }
+
+ bundle->msg = *msg;
+ /**
+ * We need to send reply on interrupt thread, but peer can't be
+ * parsed directly, so this is a temporal hack, need to be fixed
+ * when it is ready.
+ */
+ bundle->peer = strdup(peer);
+
+ /**
+ * We are at IPC callback thread, sync IPC is not allowed due to
+ * dead lock, so we delegate the task to interrupt thread.
+ */
+ ret = rte_eal_alarm_set(1, __handle_secondary_request, bundle);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "failed to add mp task\n");
+ return send_response_to_secondary(req, ret, peer);
+ }
+ return 0;
+}
+
+static void __handle_primary_request(void *param)
+{
+ struct mp_reply_bundle *bundle = param;
+ struct rte_mp_msg *msg = &bundle->msg;
+ const struct eal_dev_mp_req *req =
+ (const struct eal_dev_mp_req *)msg->param;
+ struct rte_mp_msg mp_resp;
+ struct eal_dev_mp_req *resp =
+ (struct eal_dev_mp_req *)mp_resp.param;
+ struct rte_devargs *da;
+ struct rte_device *dev;
+ struct rte_bus *bus;
+ int ret = 0;
+
+ memset(&mp_resp, 0, sizeof(mp_resp));
+
+ switch (req->t) {
+ case EAL_DEV_REQ_TYPE_ATTACH:
+ case EAL_DEV_REQ_TYPE_DETACH_ROLLBACK:
+ ret = local_dev_probe(req->devargs, &dev);
+ break;
+ case EAL_DEV_REQ_TYPE_DETACH:
+ case EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK:
+ da = calloc(1, sizeof(*da));
+ if (da == NULL) {
+ ret = -ENOMEM;
+ goto quit;
+ }
+
+ ret = rte_devargs_parse(da, req->devargs);
+ if (ret != 0)
+ goto quit;
+
+ bus = rte_bus_find_by_name(da->bus->name);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", da->bus->name);
+ ret = -ENOENT;
+ goto quit;
+ }
+
+ dev = bus->find_device(NULL, cmp_dev_name, da->name);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", da->name);
+ ret = -ENOENT;
+ goto quit;
+ }
+
+ ret = local_dev_remove(dev);
+quit:
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+ mp_resp.len_param = sizeof(*req);
+ memcpy(resp, req, sizeof(*resp));
+ resp->result = ret;
+ if (rte_mp_reply(&mp_resp, bundle->peer) < 0)
+ RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+
+ free(bundle->peer);
+ free(bundle);
+}
+
+static int
+handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+ struct rte_mp_msg mp_resp;
+ const struct eal_dev_mp_req *req =
+ (const struct eal_dev_mp_req *)msg->param;
+ struct eal_dev_mp_req *resp =
+ (struct eal_dev_mp_req *)mp_resp.param;
+ struct mp_reply_bundle *bundle;
+ int ret = 0;
+
+ memset(&mp_resp, 0, sizeof(mp_resp));
+ strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+ mp_resp.len_param = sizeof(*req);
+ memcpy(resp, req, sizeof(*resp));
+
+ bundle = calloc(1, sizeof(*bundle));
+ if (bundle == NULL) {
+ resp->result = -ENOMEM;
+ ret = rte_mp_reply(&mp_resp, peer);
+ if (ret)
+ RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+ return ret;
+ }
+
+ bundle->msg = *msg;
+ /**
+ * We need to send reply on interrupt thread, but peer can't be
+ * parsed directly, so this is a temporal hack, need to be fixed
+ * when it is ready.
+ */
+ bundle->peer = (void *)strdup(peer);
+
+ /**
+ * We are at IPC callback thread, sync IPC is not allowed due to
+ * dead lock, so we delegate the task to interrupt thread.
+ */
+ ret = rte_eal_alarm_set(1, __handle_primary_request, bundle);
+ if (ret != 0) {
+ resp->result = ret;
+ ret = rte_mp_reply(&mp_resp, peer);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+ return ret;
+ }
+ }
+ return 0;
+}
+
+int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_reply mp_reply;
+ struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+ struct eal_dev_mp_req *resp;
+ int ret;
+
+ memset(&mp_req, 0, sizeof(mp_req));
+ memcpy(mp_req.param, req, sizeof(*req));
+ mp_req.len_param = sizeof(*req);
+ strlcpy(mp_req.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+ ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+ if (ret || mp_reply.nb_received != 1) {
+ RTE_LOG(ERR, EAL, "cannot send request to primary");
+ if (!ret)
+ return -1;
+ return ret;
+ }
+
+ resp = (struct eal_dev_mp_req *)mp_reply.msgs[0].param;
+ req->result = resp->result;
+
+ return ret;
+}
+
+int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req)
+{
+ struct rte_mp_msg mp_req;
+ struct rte_mp_reply mp_reply;
+ struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+ int ret;
+ int i;
+
+ memset(&mp_req, 0, sizeof(mp_req));
+ memcpy(mp_req.param, req, sizeof(*req));
+ mp_req.len_param = sizeof(*req);
+ strlcpy(mp_req.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+ ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "rte_mp_request_sync failed\n");
+ return ret;
+ }
+
+ if (mp_reply.nb_sent != mp_reply.nb_received) {
+ RTE_LOG(ERR, EAL, "not all secondary reply\n");
+ return -1;
+ }
+
+ req->result = 0;
+ for (i = 0; i < mp_reply.nb_received; i++) {
+ struct eal_dev_mp_req *resp =
+ (struct eal_dev_mp_req *)mp_reply.msgs[i].param;
+ if (resp->result != 0) {
+ req->result = resp->result;
+ if (req->t == EAL_DEV_REQ_TYPE_ATTACH &&
+ req->result != -EEXIST)
+ break;
+ if (req->t == EAL_DEV_REQ_TYPE_DETACH &&
+ req->result != -ENOENT)
+ break;
+ }
+ }
+
+ return 0;
+}
+
+int rte_mp_dev_hotplug_init(void)
+{
+ int ret;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ ret = rte_mp_action_register(EAL_DEV_MP_ACTION_REQUEST,
+ handle_secondary_request);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ EAL_DEV_MP_ACTION_REQUEST);
+ return ret;
+ }
+ } else {
+ ret = rte_mp_action_register(EAL_DEV_MP_ACTION_REQUEST,
+ handle_primary_request);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ EAL_DEV_MP_ACTION_REQUEST);
+ return ret;
+ }
+ }
+
+ return 0;
+}
diff --git a/lib/librte_eal/common/hotplug_mp.h b/lib/librte_eal/common/hotplug_mp.h
new file mode 100644
index 00000000..597fde3d
--- /dev/null
+++ b/lib/librte_eal/common/hotplug_mp.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _HOTPLUG_MP_H_
+#define _HOTPLUG_MP_H_
+
+#include "rte_dev.h"
+#include "rte_bus.h"
+
+#define EAL_DEV_MP_ACTION_REQUEST "eal_dev_mp_request"
+#define EAL_DEV_MP_ACTION_RESPONSE "eal_dev_mp_response"
+
+#define EAL_DEV_MP_DEV_NAME_MAX_LEN RTE_DEV_NAME_MAX_LEN
+#define EAL_DEV_MP_BUS_NAME_MAX_LEN 32
+#define EAL_DEV_MP_DEV_ARGS_MAX_LEN 128
+
+enum eal_dev_req_type {
+ EAL_DEV_REQ_TYPE_ATTACH,
+ EAL_DEV_REQ_TYPE_DETACH,
+ EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK,
+ EAL_DEV_REQ_TYPE_DETACH_ROLLBACK,
+};
+
+struct eal_dev_mp_req {
+ enum eal_dev_req_type t;
+ char devargs[EAL_DEV_MP_DEV_ARGS_MAX_LEN];
+ int result;
+};
+
+/**
+ * This is a synchronous wrapper for secondary process send
+ * request to primary process, this is invoked when an attach
+ * or detach request is issued from primary process.
+ */
+int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req);
+
+/**
+ * this is a synchronous wrapper for primary process send
+ * request to secondary process, this is invoked when an attach
+ * or detach request issued from secondary process.
+ */
+int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req);
+
+
+#endif /* _HOTPLUG_MP_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
index c4f974fe..859b0974 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
@@ -29,8 +29,8 @@ extern "C" {
#ifndef RTE_ARM_EAL_RDTSC_USE_PMU
/**
- * This call is easily portable to any ARM architecture, however,
- * it may be damn slow and inprecise for some tasks.
+ * This call is easily portable to any architecture, however,
+ * it may require a system call and inprecise for some tasks.
*/
static inline uint64_t
__rte_rdtsc_syscall(void)
diff --git a/lib/librte_eal/common/include/arch/ppc_64/meson.build b/lib/librte_eal/common/include/arch/ppc_64/meson.build
new file mode 100644
index 00000000..00f96117
--- /dev/null
+++ b/lib/librte_eal/common/include/arch/ppc_64/meson.build
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+install_headers(
+ 'rte_atomic.h',
+ 'rte_byteorder.h',
+ 'rte_cpuflags.h',
+ 'rte_cycles.h',
+ 'rte_io.h',
+ 'rte_memcpy.h',
+ 'rte_pause.h',
+ 'rte_prefetch.h',
+ 'rte_rwlock.h',
+ 'rte_spinlock.h',
+ 'rte_vect.h',
+ subdir: get_option('include_subdir_arch'))
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_pause.h b/lib/librte_eal/common/include/arch/ppc_64/rte_pause.h
index 8bd83576..16e47ce2 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_pause.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_pause.h
@@ -9,10 +9,17 @@
extern "C" {
#endif
+#include "rte_atomic.h"
+
#include "generic/rte_pause.h"
static inline void rte_pause(void)
{
+ /* Set hardware multi-threading low priority */
+ asm volatile("or 1,1,1");
+ /* Set hardware multi-threading medium priority */
+ asm volatile("or 2,2,2");
+ rte_compiler_barrier();
}
#ifdef __cplusplus
diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h
index 0ff1af50..ac379e87 100644
--- a/lib/librte_eal/common/include/generic/rte_cycles.h
+++ b/lib/librte_eal/common/include/generic/rte_cycles.h
@@ -13,6 +13,7 @@
*/
#include <stdint.h>
+#include <rte_compat.h>
#include <rte_debug.h>
#include <rte_atomic.h>
@@ -158,6 +159,16 @@ rte_delay_ms(unsigned ms)
void rte_delay_us_block(unsigned int us);
/**
+ * Delay function that uses system sleep.
+ * Does not block the CPU core.
+ *
+ * @param us
+ * Number of microseconds to wait.
+ */
+void __rte_experimental
+rte_delay_us_sleep(unsigned int us);
+
+/**
* Replace rte_delay_us with user defined function.
*
* @param userfunc
diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h
index d9facc64..7a36ce73 100644
--- a/lib/librte_eal/common/include/rte_bitmap.h
+++ b/lib/librte_eal/common/include/rte_bitmap.h
@@ -88,7 +88,7 @@ __rte_bitmap_index1_inc(struct rte_bitmap *bmp)
static inline uint64_t
__rte_bitmap_mask1_get(struct rte_bitmap *bmp)
{
- return (~1lu) << bmp->offset1;
+ return (~1llu) << bmp->offset1;
}
static inline void
@@ -317,7 +317,7 @@ rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos)
index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
slab2 = bmp->array2 + index2;
- return (*slab2) & (1lu << offset2);
+ return (*slab2) & (1llu << offset2);
}
/**
@@ -342,8 +342,8 @@ rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos)
slab2 = bmp->array2 + index2;
slab1 = bmp->array1 + index1;
- *slab2 |= 1lu << offset2;
- *slab1 |= 1lu << offset1;
+ *slab2 |= 1llu << offset2;
+ *slab1 |= 1llu << offset1;
}
/**
@@ -370,7 +370,7 @@ rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab)
slab1 = bmp->array1 + index1;
*slab2 |= slab;
- *slab1 |= 1lu << offset1;
+ *slab1 |= 1llu << offset1;
}
static inline uint64_t
@@ -408,7 +408,7 @@ rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos)
slab2 = bmp->array2 + index2;
/* Return if array2 slab is not all-zeros */
- *slab2 &= ~(1lu << offset2);
+ *slab2 &= ~(1llu << offset2);
if (*slab2){
return;
}
@@ -424,7 +424,7 @@ rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos)
index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
slab1 = bmp->array1 + index1;
- *slab1 &= ~(1lu << offset1);
+ *slab1 &= ~(1llu << offset1);
return;
}
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index b7b5b084..6be4b5ca 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -168,6 +168,35 @@ typedef int (*rte_bus_unplug_t)(struct rte_device *dev);
typedef int (*rte_bus_parse_t)(const char *name, void *addr);
/**
+ * Implement a specific hot-unplug handler, which is responsible for
+ * handle the failure when device be hot-unplugged. When the event of
+ * hot-unplug be detected, it could call this function to handle
+ * the hot-unplug failure and avoid app crash.
+ * @param dev
+ * Pointer of the device structure.
+ *
+ * @return
+ * 0 on success.
+ * !0 on error.
+ */
+typedef int (*rte_bus_hot_unplug_handler_t)(struct rte_device *dev);
+
+/**
+ * Implement a specific sigbus handler, which is responsible for handling
+ * the sigbus error which is either original memory error, or specific memory
+ * error that caused of device be hot-unplugged. When sigbus error be captured,
+ * it could call this function to handle sigbus error.
+ * @param failure_addr
+ * Pointer of the fault address of the sigbus error.
+ *
+ * @return
+ * 0 for success handle the sigbus for hot-unplug.
+ * 1 for not process it, because it is a generic sigbus error.
+ * -1 for failed to handle the sigbus for hot-unplug.
+ */
+typedef int (*rte_bus_sigbus_handler_t)(const void *failure_addr);
+
+/**
* Bus scan policies
*/
enum rte_bus_scan_mode {
@@ -212,6 +241,11 @@ struct rte_bus {
struct rte_bus_conf conf; /**< Bus configuration */
rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
rte_dev_iterate_t dev_iterate; /**< Device iterator. */
+ rte_bus_hot_unplug_handler_t hot_unplug_handler;
+ /**< handle hot-unplug failure on the bus */
+ rte_bus_sigbus_handler_t sigbus_handler;
+ /**< handle sigbus error on the bus */
+
};
/**
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index 069c13ec..cba7bbc1 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -68,6 +68,11 @@ typedef uint16_t unaligned_uint16_t;
/******* Macro to mark functions and fields scheduled for removal *****/
#define __rte_deprecated __attribute__((__deprecated__))
+/**
+ * Mark a function or variable to a weak reference.
+ */
+#define __rte_weak __attribute__((__weak__))
+
/*********** Macros to eliminate unused variable warnings ********/
/**
@@ -164,6 +169,12 @@ static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
*/
#define RTE_PTR_DIFF(ptr1, ptr2) ((uintptr_t)(ptr1) - (uintptr_t)(ptr2))
+/**
+ * Workaround to cast a const field of a structure to non-const type.
+ */
+#define RTE_CAST_FIELD(var, field, type) \
+ (*(type *)((uintptr_t)(var) + offsetof(typeof(*(var)), field)))
+
/*********** Macros/static functions for doing alignment ********/
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index b80a8059..cd6c187c 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -39,7 +39,7 @@ struct rte_dev_event {
char *devname; /**< device name */
};
-typedef void (*rte_dev_event_cb_fn)(char *device_name,
+typedef void (*rte_dev_event_cb_fn)(const char *device_name,
enum rte_dev_event_type event,
void *cb_arg);
@@ -156,63 +156,67 @@ struct rte_driver {
struct rte_device {
TAILQ_ENTRY(rte_device) next; /**< Next device */
const char *name; /**< Device name */
- const struct rte_driver *driver;/**< Associated driver */
+ const struct rte_driver *driver; /**< Driver assigned after probing */
+ const struct rte_bus *bus; /**< Bus handle assigned on scan */
int numa_node; /**< NUMA node connection */
- struct rte_devargs *devargs; /**< Device user arguments */
+ struct rte_devargs *devargs; /**< Arguments for latest probing */
};
/**
- * Attach a device to a registered driver.
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
*
- * @param name
- * The device name, that refers to a pci device (or some private
- * way of designating a vdev device). Based on this device name, eal
- * will identify a driver capable of handling it and pass it to the
- * driver probing function.
- * @param devargs
- * Device arguments to be passed to the driver.
- * @return
- * 0 on success, negative on error.
- */
-__rte_deprecated
-int rte_eal_dev_attach(const char *name, const char *devargs);
-
-/**
- * Detach a device from its driver.
+ * Query status of a device.
*
* @param dev
- * A pointer to a rte_device structure.
+ * Generic device pointer.
* @return
- * 0 on success, negative on error.
+ * (int)true if already probed successfully, 0 otherwise.
*/
-__rte_deprecated
-int rte_eal_dev_detach(struct rte_device *dev);
+__rte_experimental
+int rte_dev_is_probed(const struct rte_device *dev);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Hotplug add a given device to a specific bus.
*
+ * In multi-process, it will request other processes to add the same device.
+ * A failure, in any process, will rollback the action
+ *
* @param busname
* The bus name the device is added to.
* @param devname
* The device name. Based on this device name, eal will identify a driver
* capable of handling it and pass it to the driver probing function.
- * @param devargs
+ * @param drvargs
* Device arguments to be passed to the driver.
* @return
* 0 on success, negative on error.
*/
-int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devname,
- const char *devargs);
+int rte_eal_hotplug_add(const char *busname, const char *devname,
+ const char *drvargs);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
+ * Add matching devices.
+ *
+ * In multi-process, it will request other processes to add the same device.
+ * A failure, in any process, will rollback the action
+ *
+ * @param devargs
+ * Device arguments including bus, class and driver properties.
+ * @return
+ * 0 on success, negative on error.
+ */
+int __rte_experimental rte_dev_probe(const char *devargs);
+
+/**
* Hotplug remove a given device from a specific bus.
*
+ * In multi-process, it will request other processes to remove the same device.
+ * A failure, in any process, will rollback the action
+ *
* @param busname
* The bus name the device is removed from.
* @param devname
@@ -220,8 +224,23 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
* @return
* 0 on success, negative on error.
*/
-int __rte_experimental rte_eal_hotplug_remove(const char *busname,
- const char *devname);
+int rte_eal_hotplug_remove(const char *busname, const char *devname);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Remove one device.
+ *
+ * In multi-process, it will request other processes to remove the same device.
+ * A failure, in any process, will rollback the action
+ *
+ * @param dev
+ * Data structure of the device to remove.
+ * @return
+ * 0 on success, negative on error.
+ */
+int __rte_experimental rte_dev_remove(struct rte_device *dev);
/**
* Device comparison function.
@@ -438,6 +457,22 @@ rte_dev_event_callback_unregister(const char *device_name,
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
+ * Executes all the user application registered callbacks for
+ * the specific device.
+ *
+ * @param device_name
+ * The device name.
+ * @param event
+ * the device event type.
+ */
+void __rte_experimental
+rte_dev_event_callback_process(const char *device_name,
+ enum rte_dev_event_type event);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
* Start the device event monitoring.
*
* @return
@@ -460,4 +495,30 @@ rte_dev_event_monitor_start(void);
int __rte_experimental
rte_dev_event_monitor_stop(void);
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Enable hotplug handling for devices.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_hotplug_handle_enable(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Disable hotplug handling for devices.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_hotplug_handle_disable(void);
+
#endif /* _RTE_DEV_H_ */
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
index 097a4ce7..b1f121f8 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -67,36 +67,6 @@ struct rte_devargs {
};
/**
- * @deprecated
- * Parse a devargs string.
- *
- * For PCI devices, the format of arguments string is "PCI_ADDR" or
- * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0",
- * "04:00.0,arg=val".
- *
- * For virtual devices, the format of arguments string is "DRIVER_NAME*"
- * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring",
- * "net_ring0", "net_pmdAnything,arg=0:arg2=1".
- *
- * The function parses the arguments string to get driver name and driver
- * arguments.
- *
- * @param devargs_str
- * The arguments as given by the user.
- * @param drvname
- * The pointer to the string to store parsed driver name.
- * @param drvargs
- * The pointer to the string to store parsed driver arguments.
- *
- * @return
- * - 0 on success
- * - A negative value on error
- */
-__rte_deprecated
-int rte_eal_parse_devargs_str(const char *devargs_str,
- char **drvname, char **drvargs);
-
-/**
* Parse a device string.
*
* Verify that a bus is capable of handling the device passed
@@ -202,32 +172,12 @@ __rte_experimental
int rte_devargs_add(enum rte_devtype devtype, const char *devargs_str);
/**
- * @deprecated
- * Add a device to the user device list
- * See rte_devargs_parse() for details.
- *
- * @param devtype
- * The type of the device.
- * @param devargs_str
- * The arguments as given by the user.
- *
- * @return
- * - 0 on success
- * - A negative value on error
- */
-__rte_deprecated
-int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);
-
-/**
* Remove a device from the user device list.
* Its resources are freed.
* If the devargs cannot be found, nothing happens.
*
- * @param busname
- * bus name of the devargs to remove.
- *
- * @param devname
- * device name of the devargs to remove.
+ * @param devargs
+ * The instance or a copy of devargs to remove.
*
* @return
* 0 on success.
@@ -235,8 +185,7 @@ int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);
* >0 if the devargs was not within the user device list.
*/
__rte_experimental
-int rte_devargs_remove(const char *busname,
- const char *devname);
+int rte_devargs_remove(struct rte_devargs *devargs);
/**
* Count the number of user devices of a specified type
@@ -252,20 +201,6 @@ unsigned int
rte_devargs_type_count(enum rte_devtype devtype);
/**
- * @deprecated
- * Count the number of user devices of a specified type
- *
- * @param devtype
- * The type of the devices to counted.
- *
- * @return
- * The number of devices.
- */
-__rte_deprecated
-unsigned int
-rte_eal_devargs_type_count(enum rte_devtype devtype);
-
-/**
* This function dumps the list of user device and their arguments.
*
* @param f
@@ -275,16 +210,6 @@ __rte_experimental
void rte_devargs_dump(FILE *f);
/**
- * @deprecated
- * This function dumps the list of user device and their arguments.
- *
- * @param f
- * A pointer to a file for output
- */
-__rte_deprecated
-void rte_eal_devargs_dump(FILE *f);
-
-/**
* Find next rte_devargs matching the provided bus name.
*
* @param busname
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index e114dcbd..a0cedd57 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -316,7 +316,7 @@ rte_mp_sendmsg(struct rte_mp_msg *msg);
*
* @param reply
* The reply argument will be for storing all the replied messages;
- * the caller is responsible for free reply->replies.
+ * the caller is responsible for free reply->msgs.
*
* @param ts
* The ts argument specifies how long we can wait for the peer(s) to reply.
@@ -378,6 +378,15 @@ int __rte_experimental
rte_mp_reply(struct rte_mp_msg *msg, const char *peer);
/**
+ * Register all mp action callbacks for hotplug.
+ *
+ * @return
+ * 0 on success, negative on error.
+ */
+int __rte_experimental
+rte_mp_dev_hotplug_init(void);
+
+/**
* Usage function typedef used by the application usage function.
*
* Use this function typedef to define and call rte_set_application_usage_hook()
@@ -498,6 +507,15 @@ enum rte_iova_mode rte_eal_iova_mode(void);
const char *
rte_eal_mbuf_user_pool_ops(void);
+/**
+ * Get the runtime directory of DPDK
+ *
+ * @return
+ * The runtime directory path of DPDK
+ */
+const char *
+rte_eal_get_runtime_dir(void);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/rte_eal_interrupts.h b/lib/librte_eal/common/include/rte_eal_interrupts.h
index 6eb49327..9d302f41 100644
--- a/lib/librte_eal/common/include/rte_eal_interrupts.h
+++ b/lib/librte_eal/common/include/rte_eal_interrupts.h
@@ -35,6 +35,7 @@ enum rte_intr_handle_type {
RTE_INTR_HANDLE_EXT, /**< external handler */
RTE_INTR_HANDLE_VDEV, /**< virtual device */
RTE_INTR_HANDLE_DEV_EVENT, /**< device event handle */
+ RTE_INTR_HANDLE_VFIO_REQ, /**< VFIO request handle */
RTE_INTR_HANDLE_MAX /**< count of elements */
};
diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h
index aff0688d..84aabe36 100644
--- a/lib/librte_eal/common/include/rte_eal_memconfig.h
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -30,9 +30,11 @@ struct rte_memseg_list {
uint64_t addr_64;
/**< Makes sure addr is always 64-bits */
};
- int socket_id; /**< Socket ID for all memsegs in this list. */
uint64_t page_sz; /**< Page size for all memsegs in this list. */
+ int socket_id; /**< Socket ID for all memsegs in this list. */
volatile uint32_t version; /**< version number for multiprocess sync. */
+ size_t len; /**< Length of memory area covered by this memseg list. */
+ unsigned int external; /**< 1 if this list points to external memory */
struct rte_fbarray memseg_arr;
};
@@ -70,13 +72,23 @@ struct rte_mem_config {
struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */
- /* Heaps of Malloc per socket */
- struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES];
+ /* Heaps of Malloc */
+ struct malloc_heap malloc_heaps[RTE_MAX_HEAPS];
+
+ /* next socket ID for external malloc heap */
+ int next_socket_id;
/* address of mem_config in primary process. used to map shared config into
* exact same address the primary process maps it.
*/
uint64_t mem_cfg_addr;
+
+ /* legacy mem and single file segments options are shared */
+ uint32_t legacy_mem;
+ uint32_t single_file_segments;
+
+ /* keeps the more restricted dma mask */
+ uint8_t dma_maskbits;
} __attribute__((__packed__));
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
index a9fb7e45..7249e6aa 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -264,6 +264,198 @@ rte_malloc_get_socket_stats(int socket,
struct rte_malloc_socket_stats *socket_stats);
/**
+ * Add memory chunk to a heap with specified name.
+ *
+ * @note Multiple memory chunks can be added to the same heap
+ *
+ * @note Before accessing this memory in other processes, it needs to be
+ * attached in each of those processes by calling
+ * ``rte_malloc_heap_memory_attach`` in each other process.
+ *
+ * @note Memory must be previously allocated for DPDK to be able to use it as a
+ * malloc heap. Failing to do so will result in undefined behavior, up to and
+ * including segmentation faults.
+ *
+ * @note Calling this function will erase any contents already present at the
+ * supplied memory address.
+ *
+ * @param heap_name
+ * Name of the heap to add memory chunk to
+ * @param va_addr
+ * Start of virtual area to add to the heap
+ * @param len
+ * Length of virtual area to add to the heap
+ * @param iova_addrs
+ * Array of page IOVA addresses corresponding to each page in this memory
+ * area. Can be NULL, in which case page IOVA addresses will be set to
+ * RTE_BAD_IOVA.
+ * @param n_pages
+ * Number of elements in the iova_addrs array. Ignored if ``iova_addrs``
+ * is NULL.
+ * @param page_sz
+ * Page size of the underlying memory
+ *
+ * @return
+ * - 0 on success
+ * - -1 in case of error, with rte_errno set to one of the following:
+ * EINVAL - one of the parameters was invalid
+ * EPERM - attempted to add memory to a reserved heap
+ * ENOSPC - no more space in internal config to store a new memory chunk
+ */
+int __rte_experimental
+rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
+ rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz);
+
+/**
+ * Remove memory chunk from heap with specified name.
+ *
+ * @note Memory chunk being removed must be the same as one that was added;
+ * partially removing memory chunks is not supported
+ *
+ * @note Memory area must not contain any allocated elements to allow its
+ * removal from the heap
+ *
+ * @note All other processes must detach from the memory chunk prior to it being
+ * removed from the heap.
+ *
+ * @param heap_name
+ * Name of the heap to remove memory from
+ * @param va_addr
+ * Virtual address to remove from the heap
+ * @param len
+ * Length of virtual area to remove from the heap
+ *
+ * @return
+ * - 0 on success
+ * - -1 in case of error, with rte_errno set to one of the following:
+ * EINVAL - one of the parameters was invalid
+ * EPERM - attempted to remove memory from a reserved heap
+ * ENOENT - heap or memory chunk was not found
+ * EBUSY - memory chunk still contains data
+ */
+int __rte_experimental
+rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len);
+
+/**
+ * Attach to an already existing chunk of external memory in another process.
+ *
+ * @note This function must be called before any attempt is made to use an
+ * already existing external memory chunk. This function does *not* need to
+ * be called if a call to ``rte_malloc_heap_memory_add`` was made in the
+ * current process.
+ *
+ * @param heap_name
+ * Heap name to which this chunk of memory belongs
+ * @param va_addr
+ * Start address of memory chunk to attach to
+ * @param len
+ * Length of memory chunk to attach to
+ * @return
+ * 0 on successful attach
+ * -1 on unsuccessful attach, with rte_errno set to indicate cause for error:
+ * EINVAL - one of the parameters was invalid
+ * EPERM - attempted to attach memory to a reserved heap
+ * ENOENT - heap or memory chunk was not found
+ */
+int __rte_experimental
+rte_malloc_heap_memory_attach(const char *heap_name, void *va_addr, size_t len);
+
+/**
+ * Detach from a chunk of external memory in secondary process.
+ *
+ * @note This function must be called in before any attempt is made to remove
+ * external memory from the heap in another process. This function does *not*
+ * need to be called if a call to ``rte_malloc_heap_memory_remove`` will be
+ * called in current process.
+ *
+ * @param heap_name
+ * Heap name to which this chunk of memory belongs
+ * @param va_addr
+ * Start address of memory chunk to attach to
+ * @param len
+ * Length of memory chunk to attach to
+ * @return
+ * 0 on successful detach
+ * -1 on unsuccessful detach, with rte_errno set to indicate cause for error:
+ * EINVAL - one of the parameters was invalid
+ * EPERM - attempted to detach memory from a reserved heap
+ * ENOENT - heap or memory chunk was not found
+ */
+int __rte_experimental
+rte_malloc_heap_memory_detach(const char *heap_name, void *va_addr, size_t len);
+
+/**
+ * Creates a new empty malloc heap with a specified name.
+ *
+ * @note Heaps created via this call will automatically get assigned a unique
+ * socket ID, which can be found using ``rte_malloc_heap_get_socket()``
+ *
+ * @param heap_name
+ * Name of the heap to create.
+ *
+ * @return
+ * - 0 on successful creation
+ * - -1 in case of error, with rte_errno set to one of the following:
+ * EINVAL - ``heap_name`` was NULL, empty or too long
+ * EEXIST - heap by name of ``heap_name`` already exists
+ * ENOSPC - no more space in internal config to store a new heap
+ */
+int __rte_experimental
+rte_malloc_heap_create(const char *heap_name);
+
+/**
+ * Destroys a previously created malloc heap with specified name.
+ *
+ * @note This function will return a failure result if not all memory allocated
+ * from the heap has been freed back to the heap
+ *
+ * @note This function will return a failure result if not all memory segments
+ * were removed from the heap prior to its destruction
+ *
+ * @param heap_name
+ * Name of the heap to create.
+ *
+ * @return
+ * - 0 on success
+ * - -1 in case of error, with rte_errno set to one of the following:
+ * EINVAL - ``heap_name`` was NULL, empty or too long
+ * ENOENT - heap by the name of ``heap_name`` was not found
+ * EPERM - attempting to destroy reserved heap
+ * EBUSY - heap still contains data
+ */
+int __rte_experimental
+rte_malloc_heap_destroy(const char *heap_name);
+
+/**
+ * Find socket ID corresponding to a named heap.
+ *
+ * @param name
+ * Heap name to find socket ID for
+ * @return
+ * Socket ID in case of success (a non-negative number)
+ * -1 in case of error, with rte_errno set to one of the following:
+ * EINVAL - ``name`` was NULL
+ * ENOENT - heap identified by the name ``name`` was not found
+ */
+int __rte_experimental
+rte_malloc_heap_get_socket(const char *name);
+
+/**
+ * Check if a given socket ID refers to externally allocated memory.
+ *
+ * @note Passing SOCKET_ID_ANY will return 0.
+ *
+ * @param socket_id
+ * Socket ID to check
+ * @return
+ * 1 if socket ID refers to externally allocated memory
+ * 0 if socket ID refers to internal DPDK memory
+ * -1 if socket ID is invalid
+ */
+int __rte_experimental
+rte_malloc_heap_socket_is_external(int socket_id);
+
+/**
* Dump statistics.
*
* Dump for the specified type to a file. If the type argument is
diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h
index d43fa909..4a7e0eb1 100644
--- a/lib/librte_eal/common/include/rte_malloc_heap.h
+++ b/lib/librte_eal/common/include/rte_malloc_heap.h
@@ -12,6 +12,7 @@
/* Number of free lists per heap, grouped by size. */
#define RTE_HEAP_NUM_FREELISTS 13
+#define RTE_HEAP_NAME_MAX_LEN 32
/* dummy definition, for pointers */
struct malloc_elem;
@@ -26,7 +27,9 @@ struct malloc_heap {
struct malloc_elem *volatile last;
unsigned alloc_count;
+ unsigned int socket_id;
size_t total_size;
+ char name[RTE_HEAP_NAME_MAX_LEN];
} __rte_cache_aligned;
#endif /* _RTE_MALLOC_HEAP_H_ */
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index c4b7f4cf..ce937058 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -215,6 +215,9 @@ typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
* @note This function read-locks the memory hotplug subsystem, and thus cannot
* be used within memory-related callback functions.
*
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
* @param func
* Iterator function
* @param arg
@@ -233,6 +236,9 @@ rte_memseg_walk(rte_memseg_walk_t func, void *arg);
* @note This function read-locks the memory hotplug subsystem, and thus cannot
* be used within memory-related callback functions.
*
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
* @param func
* Iterator function
* @param arg
@@ -251,6 +257,9 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
* @note This function read-locks the memory hotplug subsystem, and thus cannot
* be used within memory-related callback functions.
*
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
* @param func
* Iterator function
* @param arg
@@ -318,6 +327,103 @@ int __rte_experimental
rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
/**
+ * Return file descriptor associated with a particular memseg (if available).
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @note This returns an internal file descriptor. Performing any operations on
+ * this file descriptor is inherently dangerous, so it should be treated
+ * as read-only for all intents and purposes.
+ *
+ * @param ms
+ * A pointer to memseg for which to get file descriptor.
+ *
+ * @return
+ * Valid file descriptor in case of success.
+ * -1 in case of error, with ``rte_errno`` set to the following values:
+ * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
+ * - ENODEV - ``ms`` fd is not available
+ * - ENOENT - ``ms`` is an unused segment
+ * - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd(const struct rte_memseg *ms);
+
+/**
+ * Return file descriptor associated with a particular memseg (if available).
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @note This returns an internal file descriptor. Performing any operations on
+ * this file descriptor is inherently dangerous, so it should be treated
+ * as read-only for all intents and purposes.
+ *
+ * @param ms
+ * A pointer to memseg for which to get file descriptor.
+ *
+ * @return
+ * Valid file descriptor in case of success.
+ * -1 in case of error, with ``rte_errno`` set to the following values:
+ * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
+ * - ENODEV - ``ms`` fd is not available
+ * - ENOENT - ``ms`` is an unused segment
+ * - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms);
+
+/**
+ * Get offset into segment file descriptor associated with a particular memseg
+ * (if available).
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @param ms
+ * A pointer to memseg for which to get file descriptor.
+ * @param offset
+ * A pointer to offset value where the result will be stored.
+ *
+ * @return
+ * Valid file descriptor in case of success.
+ * -1 in case of error, with ``rte_errno`` set to the following values:
+ * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
+ * - EINVAL - ``offset`` pointer was NULL
+ * - ENODEV - ``ms`` fd is not available
+ * - ENOENT - ``ms`` is an unused segment
+ * - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset);
+
+/**
+ * Get offset into segment file descriptor associated with a particular memseg
+ * (if available).
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param ms
+ * A pointer to memseg for which to get file descriptor.
+ * @param offset
+ * A pointer to offset value where the result will be stored.
+ *
+ * @return
+ * Valid file descriptor in case of success.
+ * -1 in case of error, with ``rte_errno`` set to the following values:
+ * - EINVAL - ``ms`` pointer was NULL or did not point to a valid memseg
+ * - EINVAL - ``offset`` pointer was NULL
+ * - ENODEV - ``ms`` fd is not available
+ * - ENOENT - ``ms`` is an unused segment
+ * - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
+ size_t *offset);
+
+/**
* Dump the physical memory layout to a file.
*
* @note This function read-locks the memory hotplug subsystem, and thus cannot
@@ -357,6 +463,9 @@ unsigned rte_memory_get_nchannel(void);
*/
unsigned rte_memory_get_nrank(void);
+/* check memsegs iovas are within a range based on dma mask */
+int __rte_experimental rte_eal_check_dma_mask(uint8_t maskbits);
+
/**
* Drivers based on uio will not load unless physical
* addresses are obtainable. It is only possible to get
diff --git a/lib/librte_eal/common/include/rte_option.h b/lib/librte_eal/common/include/rte_option.h
new file mode 100644
index 00000000..8957b970
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_option.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#ifndef __INCLUDE_RTE_OPTION_H__
+#define __INCLUDE_RTE_OPTION_H__
+
+/**
+ * @file
+ *
+ * This API offers the ability to register options to the EAL command line and
+ * map those options to functions that will be executed at the end of EAL
+ * initialization. These options will be available as part of the EAL command
+ * line of applications and are dynamically managed.
+ *
+ * This is used primarily by DPDK libraries offering command line options.
+ * Currently, this API is limited to registering options without argument.
+ *
+ * The register API can be used to resolve circular dependency issues
+ * between EAL and the library. The library uses EAL, but is also initialized
+ * by EAL. Hence, EAL depends on the init function of the library. The API
+ * introduced in rte_option allows us to register the library init with EAL
+ * (passing a function pointer) and avoid the circular dependency.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int (*rte_option_cb)(void);
+
+/*
+ * Structure describing the EAL command line option being registered.
+ */
+struct rte_option {
+ TAILQ_ENTRY(rte_option) next; /**< Next entry in the list. */
+ char *opt_str; /**< The option name. */
+ rte_option_cb cb; /**< Function called when option is used. */
+ int enabled; /**< Set when the option is used. */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register an option to the EAL command line.
+ * When recognized, the associated function will be executed at the end of EAL
+ * initialization.
+ *
+ * The associated structure must be available the whole time this option is
+ * registered (i.e. not stack memory).
+ *
+ * @param opt
+ * Structure describing the option to parse.
+ */
+void __rte_experimental
+rte_option_register(struct rte_option *opt);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_eal/common/include/rte_string_fns.h b/lib/librte_eal/common/include/rte_string_fns.h
index 97597a14..9a2a1ff9 100644
--- a/lib/librte_eal/common/include/rte_string_fns.h
+++ b/lib/librte_eal/common/include/rte_string_fns.h
@@ -16,6 +16,7 @@ extern "C" {
#endif
#include <stdio.h>
+#include <string.h>
/**
* Takes string "string" parameter and splits it at character "delim"
@@ -60,12 +61,10 @@ rte_strlcpy(char *dst, const char *src, size_t size)
/* pull in a strlcpy function */
#ifdef RTE_EXEC_ENV_BSDAPP
-#include <string.h>
#ifndef __BSD_VISIBLE /* non-standard functions are hidden */
#define strlcpy(dst, src, size) rte_strlcpy(dst, src, size)
#endif
-
#else /* non-BSD platforms */
#ifdef RTE_USE_LIBBSD
#include <bsd/string.h>
@@ -76,6 +75,29 @@ rte_strlcpy(char *dst, const char *src, size_t size)
#endif /* RTE_USE_LIBBSD */
#endif /* BSDAPP */
+/**
+ * Copy string src to buffer dst of size dsize.
+ * At most dsize-1 chars will be copied.
+ * Always NUL-terminates, unless (dsize == 0).
+ * Returns number of bytes copied (terminating NUL-byte excluded) on success ;
+ * negative errno on error.
+ *
+ * @param dst
+ * The destination string.
+ *
+ * @param src
+ * The input string to be copied.
+ *
+ * @param dsize
+ * Length in bytes of the destination buffer.
+ *
+ * @return
+ * The number of bytes copied on success
+ * -E2BIG if the destination buffer is too small.
+ */
+ssize_t
+rte_strscpy(char *dst, const char *src, size_t dsize);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index 7c6714a2..412ed2db 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -32,7 +32,7 @@ extern "C" {
/**
* Minor version/month number i.e. the mm in yy.mm.z
*/
-#define RTE_VER_MONTH 8
+#define RTE_VER_MONTH 11
/**
* Patch level number i.e. the z in yy.mm.z
@@ -42,14 +42,14 @@ extern "C" {
/**
* Extra string to be appended to version number
*/
-#define RTE_VER_SUFFIX ""
+#define RTE_VER_SUFFIX "-rc"
/**
* Patch release number
* 0-15 = release candidates
* 16 = release
*/
-#define RTE_VER_RELEASE 16
+#define RTE_VER_RELEASE 1
/**
* Macro to compute a version number usable for comparisons
diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
index 5ca13fcc..cae96fab 100644
--- a/lib/librte_eal/common/include/rte_vfio.h
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -14,6 +14,8 @@
extern "C" {
#endif
+#include <stdint.h>
+
/*
* determine if VFIO is present on the system
*/
@@ -22,6 +24,9 @@ extern "C" {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
#define VFIO_PRESENT
#endif /* kernel version >= 3.6.0 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
+#define HAVE_VFIO_DEV_REQ_INTERFACE
+#endif /* kernel version >= 4.0.0 */
#endif /* RTE_EAL_VFIO */
#ifdef VFIO_PRESENT
@@ -44,6 +49,30 @@ extern "C" {
#define RTE_VFIO_NOIOMMU 8
#endif
+/*
+ * capabilities are only supported on kernel 4.6+. there were also some API
+ * changes as well, so add a macro to get cap offset.
+ */
+#ifdef VFIO_REGION_INFO_FLAG_CAPS
+#define RTE_VFIO_INFO_FLAG_CAPS VFIO_REGION_INFO_FLAG_CAPS
+#define VFIO_CAP_OFFSET(x) (x->cap_offset)
+#else
+#define RTE_VFIO_INFO_FLAG_CAPS (1 << 3)
+#define VFIO_CAP_OFFSET(x) (x->resv)
+struct vfio_info_cap_header {
+ uint16_t id;
+ uint16_t version;
+ uint32_t next;
+};
+#endif
+
+/* kernels 4.16+ can map BAR containing MSI-X table */
+#ifdef VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
+#define RTE_VFIO_CAP_MSIX_MAPPABLE VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
+#else
+#define RTE_VFIO_CAP_MSIX_MAPPABLE 3
+#endif
+
#else /* not VFIO_PRESENT */
/* we don't need an actual definition, only pointer is used */
@@ -227,7 +256,7 @@ rte_vfio_get_group_num(const char *sysfs_base,
const char *dev_addr, int *iommu_group_num);
/**
- * Open VFIO container fd or get an existing one
+ * Open a new VFIO container fd
*
* This function is only relevant to linux and will return
* an error on BSD.
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index e0a8ed15..1a74660d 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -39,10 +39,14 @@ malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
/* if we're in IOVA as VA mode, or if we're in legacy mode with
- * hugepages, all elements are IOVA-contiguous.
+ * hugepages, all elements are IOVA-contiguous. however, we can only
+ * make these assumptions about internal memory - externally allocated
+ * segments have to be checked.
*/
- if (rte_eal_iova_mode() == RTE_IOVA_VA ||
- (internal_config.legacy_mem && rte_eal_has_hugepages()))
+ if (!elem->msl->external &&
+ (rte_eal_iova_mode() == RTE_IOVA_VA ||
+ (internal_config.legacy_mem &&
+ rte_eal_has_hugepages())))
return RTE_PTR_DIFF(data_end, contig_seg_start);
cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index 12aaf2d7..1973b6e6 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -29,6 +29,10 @@
#include "malloc_heap.h"
#include "malloc_mp.h"
+/* start external socket ID's at a very high number */
+#define CONST_MAX(a, b) (a > b ? a : b) /* RTE_MAX is not a constant */
+#define EXTERNAL_HEAP_MIN_SOCKET_ID (CONST_MAX((1 << 8), RTE_MAX_NUMA_NODES))
+
static unsigned
check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
{
@@ -66,6 +70,21 @@ check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
return check_flag & flags;
}
+int
+malloc_socket_to_heap_id(unsigned int socket_id)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i;
+
+ for (i = 0; i < RTE_MAX_HEAPS; i++) {
+ struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+ if (heap->socket_id == socket_id)
+ return i;
+ }
+ return -1;
+}
+
/*
* Expand the heap with a memory area.
*/
@@ -93,9 +112,17 @@ malloc_add_seg(const struct rte_memseg_list *msl,
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
struct rte_memseg_list *found_msl;
struct malloc_heap *heap;
- int msl_idx;
+ int msl_idx, heap_idx;
+
+ if (msl->external)
+ return 0;
- heap = &mcfg->malloc_heaps[msl->socket_id];
+ heap_idx = malloc_socket_to_heap_id(msl->socket_id);
+ if (heap_idx < 0) {
+ RTE_LOG(ERR, EAL, "Memseg list has invalid socket id\n");
+ return -1;
+ }
+ heap = &mcfg->malloc_heaps[heap_idx];
/* msl is const, so find it */
msl_idx = msl - mcfg->memsegs;
@@ -165,7 +192,9 @@ find_biggest_element(struct malloc_heap *heap, size_t *size,
for (elem = LIST_FIRST(&heap->free_head[idx]);
!!elem; elem = LIST_NEXT(elem, free_list)) {
size_t cur_size;
- if (!check_hugepage_sz(flags, elem->msl->page_sz))
+ if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) == 0 &&
+ !check_hugepage_sz(flags,
+ elem->msl->page_sz))
continue;
if (contig) {
cur_size =
@@ -259,11 +288,13 @@ alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
int socket, unsigned int flags, size_t align, size_t bound,
bool contig, struct rte_memseg **ms, int n_segs)
{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
struct rte_memseg_list *msl;
struct malloc_elem *elem = NULL;
size_t alloc_sz;
int allocd_pages;
void *ret, *map_addr;
+ uint64_t mask;
alloc_sz = (size_t)pg_sz * n_segs;
@@ -291,6 +322,16 @@ alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
goto fail;
}
+ if (mcfg->dma_maskbits) {
+ mask = ~((1ULL << mcfg->dma_maskbits) - 1);
+ if (rte_eal_check_dma_mask(mask)) {
+ RTE_LOG(ERR, EAL,
+ "%s(): couldn't allocate memory due to DMA mask\n",
+ __func__);
+ goto fail;
+ }
+ }
+
/* add newly minted memsegs to malloc heap */
elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz);
@@ -326,11 +367,9 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
/* we can't know in advance how many pages we'll need, so we malloc */
ms = malloc(sizeof(*ms) * n_segs);
-
- memset(ms, 0, sizeof(*ms) * n_segs);
-
if (ms == NULL)
return -1;
+ memset(ms, 0, sizeof(*ms) * n_segs);
elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align,
bound, contig, ms, n_segs);
@@ -560,12 +599,14 @@ alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
/* this will try lower page sizes first */
static void *
-heap_alloc_on_socket(const char *type, size_t size, int socket,
- unsigned int flags, size_t align, size_t bound, bool contig)
+malloc_heap_alloc_on_heap_id(const char *type, size_t size,
+ unsigned int heap_id, unsigned int flags, size_t align,
+ size_t bound, bool contig)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+ struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+ int socket_id;
void *ret;
rte_spinlock_lock(&(heap->lock));
@@ -583,12 +624,28 @@ heap_alloc_on_socket(const char *type, size_t size, int socket,
* we may still be able to allocate memory from appropriate page sizes,
* we just need to request more memory first.
*/
+
+ socket_id = rte_socket_id_by_idx(heap_id);
+ /*
+ * if socket ID is negative, we cannot find a socket ID for this heap -
+ * which means it's an external heap. those can have unexpected page
+ * sizes, so if the user asked to allocate from there - assume user
+ * knows what they're doing, and allow allocating from there with any
+ * page size flags.
+ */
+ if (socket_id < 0)
+ size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY;
+
ret = heap_alloc(heap, type, size, size_flags, align, bound, contig);
if (ret != NULL)
goto alloc_unlock;
- if (!alloc_more_mem_on_socket(heap, size, socket, flags, align, bound,
- contig)) {
+ /* if socket ID is invalid, this is an external heap */
+ if (socket_id < 0)
+ goto alloc_unlock;
+
+ if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align,
+ bound, contig)) {
ret = heap_alloc(heap, type, size, flags, align, bound, contig);
/* this should have succeeded */
@@ -604,14 +661,14 @@ void *
malloc_heap_alloc(const char *type, size_t size, int socket_arg,
unsigned int flags, size_t align, size_t bound, bool contig)
{
- int socket, i, cur_socket;
+ int socket, heap_id, i;
void *ret;
/* return NULL if size is 0 or alignment is not power-of-2 */
if (size == 0 || (align && !rte_is_power_of_2(align)))
return NULL;
- if (!rte_eal_has_hugepages())
+ if (!rte_eal_has_hugepages() && socket_arg < RTE_MAX_NUMA_NODES)
socket_arg = SOCKET_ID_ANY;
if (socket_arg == SOCKET_ID_ANY)
@@ -619,22 +676,25 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
else
socket = socket_arg;
- /* Check socket parameter */
- if (socket >= RTE_MAX_NUMA_NODES)
+ /* turn socket ID into heap ID */
+ heap_id = malloc_socket_to_heap_id(socket);
+ /* if heap id is negative, socket ID was invalid */
+ if (heap_id < 0)
return NULL;
- ret = heap_alloc_on_socket(type, size, socket, flags, align, bound,
- contig);
+ ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align,
+ bound, contig);
if (ret != NULL || socket_arg != SOCKET_ID_ANY)
return ret;
- /* try other heaps */
+ /* try other heaps. we are only iterating through native DPDK sockets,
+ * so external heaps won't be included.
+ */
for (i = 0; i < (int) rte_socket_count(); i++) {
- cur_socket = rte_socket_id_by_idx(i);
- if (cur_socket == socket)
+ if (i == heap_id)
continue;
- ret = heap_alloc_on_socket(type, size, cur_socket, flags,
- align, bound, contig);
+ ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align,
+ bound, contig);
if (ret != NULL)
return ret;
}
@@ -642,11 +702,11 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
}
static void *
-heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags,
- size_t align, bool contig)
+heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id,
+ unsigned int flags, size_t align, bool contig)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+ struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
void *ret;
rte_spinlock_lock(&(heap->lock));
@@ -664,7 +724,7 @@ void *
malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
size_t align, bool contig)
{
- int socket, i, cur_socket;
+ int socket, i, cur_socket, heap_id;
void *ret;
/* return NULL if align is not power-of-2 */
@@ -679,11 +739,13 @@ malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
else
socket = socket_arg;
- /* Check socket parameter */
- if (socket >= RTE_MAX_NUMA_NODES)
+ /* turn socket ID into heap ID */
+ heap_id = malloc_socket_to_heap_id(socket);
+ /* if heap id is negative, socket ID was invalid */
+ if (heap_id < 0)
return NULL;
- ret = heap_alloc_biggest_on_socket(type, socket, flags, align,
+ ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align,
contig);
if (ret != NULL || socket_arg != SOCKET_ID_ANY)
return ret;
@@ -693,8 +755,8 @@ malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
cur_socket = rte_socket_id_by_idx(i);
if (cur_socket == socket)
continue;
- ret = heap_alloc_biggest_on_socket(type, cur_socket, flags,
- align, contig);
+ ret = heap_alloc_biggest_on_heap_id(type, i, flags, align,
+ contig);
if (ret != NULL)
return ret;
}
@@ -756,8 +818,10 @@ malloc_heap_free(struct malloc_elem *elem)
/* anything after this is a bonus */
ret = 0;
- /* ...of which we can't avail if we are in legacy mode */
- if (internal_config.legacy_mem)
+ /* ...of which we can't avail if we are in legacy mode, or if this is an
+ * externally allocated segment.
+ */
+ if (internal_config.legacy_mem || (msl->external > 0))
goto free_unlock;
/* check if we can free any memory back to the system */
@@ -914,7 +978,7 @@ malloc_heap_resize(struct malloc_elem *elem, size_t size)
}
/*
- * Function to retrieve data for heap on given socket
+ * Function to retrieve data for a given heap
*/
int
malloc_heap_get_stats(struct malloc_heap *heap,
@@ -952,7 +1016,7 @@ malloc_heap_get_stats(struct malloc_heap *heap,
}
/*
- * Function to retrieve data for heap on given socket
+ * Function to retrieve data for a given heap
*/
void
malloc_heap_dump(struct malloc_heap *heap, FILE *f)
@@ -973,10 +1037,216 @@ malloc_heap_dump(struct malloc_heap *heap, FILE *f)
rte_spinlock_unlock(&heap->lock);
}
+static int
+destroy_seg(struct malloc_elem *elem, size_t len)
+{
+ struct malloc_heap *heap = elem->heap;
+ struct rte_memseg_list *msl;
+
+ msl = elem->msl;
+
+ /* notify all subscribers that a memory area is going to be removed */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, elem, len);
+
+ /* this element can be removed */
+ malloc_elem_free_list_remove(elem);
+ malloc_elem_hide_region(elem, elem, len);
+
+ heap->total_size -= len;
+
+ memset(elem, 0, sizeof(*elem));
+
+ /* destroy the fbarray backing this memory */
+ if (rte_fbarray_destroy(&msl->memseg_arr) < 0)
+ return -1;
+
+ /* reset the memseg list */
+ memset(msl, 0, sizeof(*msl));
+
+ return 0;
+}
+
+int
+malloc_heap_add_external_memory(struct malloc_heap *heap, void *va_addr,
+ rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ char fbarray_name[RTE_FBARRAY_NAME_LEN];
+ struct rte_memseg_list *msl = NULL;
+ struct rte_fbarray *arr;
+ size_t seg_len = n_pages * page_sz;
+ unsigned int i;
+
+ /* first, find a free memseg list */
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *tmp = &mcfg->memsegs[i];
+ if (tmp->base_va == NULL) {
+ msl = tmp;
+ break;
+ }
+ }
+ if (msl == NULL) {
+ RTE_LOG(ERR, EAL, "Couldn't find empty memseg list\n");
+ rte_errno = ENOSPC;
+ return -1;
+ }
+
+ snprintf(fbarray_name, sizeof(fbarray_name) - 1, "%s_%p",
+ heap->name, va_addr);
+
+ /* create the backing fbarray */
+ if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages,
+ sizeof(struct rte_memseg)) < 0) {
+ RTE_LOG(ERR, EAL, "Couldn't create fbarray backing the memseg list\n");
+ return -1;
+ }
+ arr = &msl->memseg_arr;
+
+ /* fbarray created, fill it up */
+ for (i = 0; i < n_pages; i++) {
+ struct rte_memseg *ms;
+
+ rte_fbarray_set_used(arr, i);
+ ms = rte_fbarray_get(arr, i);
+ ms->addr = RTE_PTR_ADD(va_addr, i * page_sz);
+ ms->iova = iova_addrs == NULL ? RTE_BAD_IOVA : iova_addrs[i];
+ ms->hugepage_sz = page_sz;
+ ms->len = page_sz;
+ ms->nchannel = rte_memory_get_nchannel();
+ ms->nrank = rte_memory_get_nrank();
+ ms->socket_id = heap->socket_id;
+ }
+
+ /* set up the memseg list */
+ msl->base_va = va_addr;
+ msl->page_sz = page_sz;
+ msl->socket_id = heap->socket_id;
+ msl->len = seg_len;
+ msl->version = 0;
+ msl->external = 1;
+
+ /* erase contents of new memory */
+ memset(va_addr, 0, seg_len);
+
+ /* now, add newly minted memory to the malloc heap */
+ malloc_heap_add_memory(heap, msl, va_addr, seg_len);
+
+ heap->total_size += seg_len;
+
+ /* all done! */
+ RTE_LOG(DEBUG, EAL, "Added segment for heap %s starting at %p\n",
+ heap->name, va_addr);
+
+ /* notify all subscribers that a new memory area has been added */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
+ va_addr, seg_len);
+
+ return 0;
+}
+
+int
+malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
+ size_t len)
+{
+ struct malloc_elem *elem = heap->first;
+
+ /* find element with specified va address */
+ while (elem != NULL && elem != va_addr) {
+ elem = elem->next;
+ /* stop if we've blown past our VA */
+ if (elem > (struct malloc_elem *)va_addr) {
+ rte_errno = ENOENT;
+ return -1;
+ }
+ }
+ /* check if element was found */
+ if (elem == NULL || elem->msl->len != len) {
+ rte_errno = ENOENT;
+ return -1;
+ }
+ /* if element's size is not equal to segment len, segment is busy */
+ if (elem->state == ELEM_BUSY || elem->size != len) {
+ rte_errno = EBUSY;
+ return -1;
+ }
+ return destroy_seg(elem, len);
+}
+
+int
+malloc_heap_create(struct malloc_heap *heap, const char *heap_name)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ uint32_t next_socket_id = mcfg->next_socket_id;
+
+ /* prevent overflow. did you really create 2 billion heaps??? */
+ if (next_socket_id > INT32_MAX) {
+ RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n");
+ rte_errno = ENOSPC;
+ return -1;
+ }
+
+ /* initialize empty heap */
+ heap->alloc_count = 0;
+ heap->first = NULL;
+ heap->last = NULL;
+ LIST_INIT(heap->free_head);
+ rte_spinlock_init(&heap->lock);
+ heap->total_size = 0;
+ heap->socket_id = next_socket_id;
+
+ /* we hold a global mem hotplug writelock, so it's safe to increment */
+ mcfg->next_socket_id++;
+
+ /* set up name */
+ strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
+ return 0;
+}
+
+int
+malloc_heap_destroy(struct malloc_heap *heap)
+{
+ if (heap->alloc_count != 0) {
+ RTE_LOG(ERR, EAL, "Heap is still in use\n");
+ rte_errno = EBUSY;
+ return -1;
+ }
+ if (heap->first != NULL || heap->last != NULL) {
+ RTE_LOG(ERR, EAL, "Heap still contains memory segments\n");
+ rte_errno = EBUSY;
+ return -1;
+ }
+ if (heap->total_size != 0)
+ RTE_LOG(ERR, EAL, "Total size not zero, heap is likely corrupt\n");
+
+ /* after this, the lock will be dropped */
+ memset(heap, 0, sizeof(*heap));
+
+ return 0;
+}
+
int
rte_eal_malloc_heap_init(void)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int i;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* assign min socket ID to external heaps */
+ mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID;
+
+ /* assign names to default DPDK heaps */
+ for (i = 0; i < rte_socket_count(); i++) {
+ struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+ char heap_name[RTE_HEAP_NAME_MAX_LEN];
+ int socket_id = rte_socket_id_by_idx(i);
+
+ snprintf(heap_name, sizeof(heap_name) - 1,
+ "socket_%i", socket_id);
+ strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
+ heap->socket_id = socket_id;
+ }
+ }
+
if (register_mp_requests()) {
RTE_LOG(ERR, EAL, "Couldn't register malloc multiprocess actions\n");
diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h
index f52cb555..e48996d5 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -34,6 +34,20 @@ malloc_heap_alloc_biggest(const char *type, int socket, unsigned int flags,
size_t align, bool contig);
int
+malloc_heap_create(struct malloc_heap *heap, const char *heap_name);
+
+int
+malloc_heap_destroy(struct malloc_heap *heap);
+
+int
+malloc_heap_add_external_memory(struct malloc_heap *heap, void *va_addr,
+ rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz);
+
+int
+malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
+ size_t len);
+
+int
malloc_heap_free(struct malloc_elem *elem);
int
@@ -47,6 +61,9 @@ void
malloc_heap_dump(struct malloc_heap *heap, FILE *f);
int
+malloc_socket_to_heap_id(unsigned int socket_id);
+
+int
rte_eal_malloc_heap_init(void);
#ifdef __cplusplus
diff --git a/lib/librte_eal/common/malloc_mp.c b/lib/librte_eal/common/malloc_mp.c
index 931c14bc..5f2d4e0b 100644
--- a/lib/librte_eal/common/malloc_mp.c
+++ b/lib/librte_eal/common/malloc_mp.c
@@ -194,13 +194,11 @@ handle_alloc_request(const struct malloc_mp_req *m,
/* we can't know in advance how many pages we'll need, so we malloc */
ms = malloc(sizeof(*ms) * n_segs);
-
- memset(ms, 0, sizeof(*ms) * n_segs);
-
if (ms == NULL) {
RTE_LOG(ERR, EAL, "Couldn't allocate memory for request state\n");
goto fail;
}
+ memset(ms, 0, sizeof(*ms) * n_segs);
elem = alloc_pages_on_heap(heap, ar->page_sz, ar->elt_size, ar->socket,
ar->flags, ar->align, ar->bound, ar->contig, ms,
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 56005bea..2a10d57d 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -14,6 +14,7 @@ common_sources = files(
'eal_common_errno.c',
'eal_common_fbarray.c',
'eal_common_hexdump.c',
+ 'eal_common_hypervisor.c',
'eal_common_launch.c',
'eal_common_lcore.c',
'eal_common_log.c',
@@ -27,11 +28,13 @@ common_sources = files(
'eal_common_thread.c',
'eal_common_timer.c',
'eal_common_uuid.c',
+ 'hotplug_mp.c',
'malloc_elem.c',
'malloc_heap.c',
'malloc_mp.c',
'rte_keepalive.c',
'rte_malloc.c',
+ 'rte_option.c',
'rte_reciprocal.c',
'rte_service.c'
)
@@ -59,6 +62,7 @@ common_headers = files(
'include/rte_errno.h',
'include/rte_fbarray.h',
'include/rte_hexdump.h',
+ 'include/rte_hypervisor.h',
'include/rte_interrupts.h',
'include/rte_keepalive.h',
'include/rte_launch.h',
@@ -68,6 +72,7 @@ common_headers = files(
'include/rte_malloc_heap.h',
'include/rte_memory.h',
'include/rte_memzone.h',
+ 'include/rte_option.h',
'include/rte_pci_dev_feature_defs.h',
'include/rte_pci_dev_features.h',
'include/rte_per_lcore.h',
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index b51a6d11..9e61dc41 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -8,6 +8,7 @@
#include <string.h>
#include <sys/queue.h>
+#include <rte_errno.h>
#include <rte_memcpy.h>
#include <rte_memory.h>
#include <rte_eal.h>
@@ -23,6 +24,7 @@
#include <rte_malloc.h>
#include "malloc_elem.h"
#include "malloc_heap.h"
+#include "eal_memalloc.h"
/* Free the memory space back to heap */
@@ -44,13 +46,15 @@ rte_malloc_socket(const char *type, size_t size, unsigned int align,
if (size == 0 || (align && !rte_is_power_of_2(align)))
return NULL;
- if (!rte_eal_has_hugepages())
+ /* if there are no hugepages and if we are not allocating from an
+ * external heap, use memory from any socket available. checking for
+ * socket being external may return -1 in case of invalid socket, but
+ * that's OK - if there are no hugepages, it doesn't matter.
+ */
+ if (rte_malloc_heap_socket_is_external(socket_arg) != 1 &&
+ !rte_eal_has_hugepages())
socket_arg = SOCKET_ID_ANY;
- /* Check socket parameter */
- if (socket_arg >= RTE_MAX_NUMA_NODES)
- return NULL;
-
return malloc_heap_alloc(type, size, socket_arg, 0,
align == 0 ? 1 : align, 0, false);
}
@@ -152,11 +156,20 @@ rte_malloc_get_socket_stats(int socket,
struct rte_malloc_socket_stats *socket_stats)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int heap_idx, ret = -1;
- if (socket >= RTE_MAX_NUMA_NODES || socket < 0)
- return -1;
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
+ heap_idx = malloc_socket_to_heap_id(socket);
+ if (heap_idx < 0)
+ goto unlock;
- return malloc_heap_get_stats(&mcfg->malloc_heaps[socket], socket_stats);
+ ret = malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
+ socket_stats);
+unlock:
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
}
/*
@@ -168,12 +181,75 @@ rte_malloc_dump_heaps(FILE *f)
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
unsigned int idx;
- for (idx = 0; idx < rte_socket_count(); idx++) {
- unsigned int socket = rte_socket_id_by_idx(idx);
- fprintf(f, "Heap on socket %i:\n", socket);
- malloc_heap_dump(&mcfg->malloc_heaps[socket], f);
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
+ for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+ fprintf(f, "Heap id: %u\n", idx);
+ malloc_heap_dump(&mcfg->malloc_heaps[idx], f);
+ }
+
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+}
+
+int
+rte_malloc_heap_get_socket(const char *name)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = NULL;
+ unsigned int idx;
+ int ret;
+
+ if (name == NULL ||
+ strnlen(name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+ struct malloc_heap *tmp = &mcfg->malloc_heaps[idx];
+
+ if (!strncmp(name, tmp->name, RTE_HEAP_NAME_MAX_LEN)) {
+ heap = tmp;
+ break;
+ }
+ }
+
+ if (heap != NULL) {
+ ret = heap->socket_id;
+ } else {
+ rte_errno = ENOENT;
+ ret = -1;
+ }
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
+}
+
+int
+rte_malloc_heap_socket_is_external(int socket_id)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int idx;
+ int ret = -1;
+
+ if (socket_id == SOCKET_ID_ANY)
+ return 0;
+
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+ struct malloc_heap *tmp = &mcfg->malloc_heaps[idx];
+
+ if ((int)tmp->socket_id == socket_id) {
+ /* external memory always has large socket ID's */
+ ret = tmp->socket_id >= RTE_MAX_NUMA_NODES;
+ break;
+ }
}
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+ return ret;
}
/*
@@ -182,14 +258,20 @@ rte_malloc_dump_heaps(FILE *f)
void
rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
{
- unsigned int socket;
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int heap_id;
struct rte_malloc_socket_stats sock_stats;
+
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
/* Iterate through all initialised heaps */
- for (socket=0; socket< RTE_MAX_NUMA_NODES; socket++) {
- if ((rte_malloc_get_socket_stats(socket, &sock_stats) < 0))
- continue;
+ for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
+ struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
+
+ malloc_heap_get_stats(heap, &sock_stats);
- fprintf(f, "Socket:%u\n", socket);
+ fprintf(f, "Heap id:%u\n", heap_id);
+ fprintf(f, "\tHeap name:%s\n", heap->name);
fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes);
fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes);
fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes);
@@ -198,6 +280,7 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
}
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
return;
}
@@ -223,7 +306,7 @@ rte_malloc_virt2iova(const void *addr)
if (elem == NULL)
return RTE_BAD_IOVA;
- if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ if (!elem->msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
return (uintptr_t) addr;
ms = rte_mem_virt2memseg(addr, elem->msl);
@@ -235,3 +318,320 @@ rte_malloc_virt2iova(const void *addr)
return ms->iova + RTE_PTR_DIFF(addr, ms->addr);
}
+
+static struct malloc_heap *
+find_named_heap(const char *name)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int i;
+
+ for (i = 0; i < RTE_MAX_HEAPS; i++) {
+ struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+ if (!strncmp(name, heap->name, RTE_HEAP_NAME_MAX_LEN))
+ return heap;
+ }
+ return NULL;
+}
+
+int
+rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
+ rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = NULL;
+ unsigned int n;
+ int ret;
+
+ if (heap_name == NULL || va_addr == NULL ||
+ page_sz == 0 || !rte_is_power_of_2(page_sz) ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ ret = -1;
+ goto unlock;
+ }
+ rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
+ /* find our heap */
+ heap = find_named_heap(heap_name);
+ if (heap == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+ /* cannot add memory to internal heaps */
+ rte_errno = EPERM;
+ ret = -1;
+ goto unlock;
+ }
+ n = len / page_sz;
+ if (n != n_pages && iova_addrs != NULL) {
+ rte_errno = EINVAL;
+ ret = -1;
+ goto unlock;
+ }
+
+ rte_spinlock_lock(&heap->lock);
+ ret = malloc_heap_add_external_memory(heap, va_addr, iova_addrs, n,
+ page_sz);
+ rte_spinlock_unlock(&heap->lock);
+
+unlock:
+ rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
+}
+
+int
+rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = NULL;
+ int ret;
+
+ if (heap_name == NULL || va_addr == NULL || len == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+ /* find our heap */
+ heap = find_named_heap(heap_name);
+ if (heap == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+ /* cannot remove memory from internal heaps */
+ rte_errno = EPERM;
+ ret = -1;
+ goto unlock;
+ }
+
+ rte_spinlock_lock(&heap->lock);
+ ret = malloc_heap_remove_external_memory(heap, va_addr, len);
+ rte_spinlock_unlock(&heap->lock);
+
+unlock:
+ rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
+}
+
+struct sync_mem_walk_arg {
+ void *va_addr;
+ size_t len;
+ int result;
+ bool attach;
+};
+
+static int
+sync_mem_walk(const struct rte_memseg_list *msl, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct sync_mem_walk_arg *wa = arg;
+ size_t len = msl->page_sz * msl->memseg_arr.len;
+
+ if (msl->base_va == wa->va_addr &&
+ len == wa->len) {
+ struct rte_memseg_list *found_msl;
+ int msl_idx, ret;
+
+ /* msl is const */
+ msl_idx = msl - mcfg->memsegs;
+ found_msl = &mcfg->memsegs[msl_idx];
+
+ if (wa->attach) {
+ ret = rte_fbarray_attach(&found_msl->memseg_arr);
+ } else {
+ /* notify all subscribers that a memory area is about to
+ * be removed
+ */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ msl->base_va, msl->len);
+ ret = rte_fbarray_detach(&found_msl->memseg_arr);
+ }
+
+ if (ret < 0) {
+ wa->result = -rte_errno;
+ } else {
+ /* notify all subscribers that a new memory area was
+ * added
+ */
+ if (wa->attach)
+ eal_memalloc_mem_event_notify(
+ RTE_MEM_EVENT_ALLOC,
+ msl->base_va, msl->len);
+ wa->result = 0;
+ }
+ return 1;
+ }
+ return 0;
+}
+
+static int
+sync_memory(const char *heap_name, void *va_addr, size_t len, bool attach)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = NULL;
+ struct sync_mem_walk_arg wa;
+ int ret;
+
+ if (heap_name == NULL || va_addr == NULL || len == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
+ /* find our heap */
+ heap = find_named_heap(heap_name);
+ if (heap == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ /* we shouldn't be able to sync to internal heaps */
+ if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+ rte_errno = EPERM;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* find corresponding memseg list to sync to */
+ wa.va_addr = va_addr;
+ wa.len = len;
+ wa.result = -ENOENT; /* fail unless explicitly told to succeed */
+ wa.attach = attach;
+
+ /* we're already holding a read lock */
+ rte_memseg_list_walk_thread_unsafe(sync_mem_walk, &wa);
+
+ if (wa.result < 0) {
+ rte_errno = -wa.result;
+ ret = -1;
+ } else {
+ /* notify all subscribers that a new memory area was added */
+ if (attach)
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
+ va_addr, len);
+ ret = 0;
+ }
+unlock:
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+ return ret;
+}
+
+int
+rte_malloc_heap_memory_attach(const char *heap_name, void *va_addr, size_t len)
+{
+ return sync_memory(heap_name, va_addr, len, true);
+}
+
+int
+rte_malloc_heap_memory_detach(const char *heap_name, void *va_addr, size_t len)
+{
+ return sync_memory(heap_name, va_addr, len, false);
+}
+
+int
+rte_malloc_heap_create(const char *heap_name)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = NULL;
+ int i, ret;
+
+ if (heap_name == NULL ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ /* check if there is space in the heap list, or if heap with this name
+ * already exists.
+ */
+ rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
+ for (i = 0; i < RTE_MAX_HEAPS; i++) {
+ struct malloc_heap *tmp = &mcfg->malloc_heaps[i];
+ /* existing heap */
+ if (strncmp(heap_name, tmp->name,
+ RTE_HEAP_NAME_MAX_LEN) == 0) {
+ RTE_LOG(ERR, EAL, "Heap %s already exists\n",
+ heap_name);
+ rte_errno = EEXIST;
+ ret = -1;
+ goto unlock;
+ }
+ /* empty heap */
+ if (strnlen(tmp->name, RTE_HEAP_NAME_MAX_LEN) == 0) {
+ heap = tmp;
+ break;
+ }
+ }
+ if (heap == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot create new heap: no space\n");
+ rte_errno = ENOSPC;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* we're sure that we can create a new heap, so do it */
+ ret = malloc_heap_create(heap, heap_name);
+unlock:
+ rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
+}
+
+int
+rte_malloc_heap_destroy(const char *heap_name)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = NULL;
+ int ret;
+
+ if (heap_name == NULL ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+ strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+ RTE_HEAP_NAME_MAX_LEN) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
+ /* start from non-socket heaps */
+ heap = find_named_heap(heap_name);
+ if (heap == NULL) {
+ RTE_LOG(ERR, EAL, "Heap %s not found\n", heap_name);
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ /* we shouldn't be able to destroy internal heaps */
+ if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+ rte_errno = EPERM;
+ ret = -1;
+ goto unlock;
+ }
+ /* sanity checks done, now we can destroy the heap */
+ rte_spinlock_lock(&heap->lock);
+ ret = malloc_heap_destroy(heap);
+
+ /* if we failed, lock is still active */
+ if (ret < 0)
+ rte_spinlock_unlock(&heap->lock);
+unlock:
+ rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
+}
diff --git a/lib/librte_eal/common/rte_option.c b/lib/librte_eal/common/rte_option.c
new file mode 100644
index 00000000..02d59a86
--- /dev/null
+++ b/lib/librte_eal/common/rte_option.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#include <unistd.h>
+#include <string.h>
+
+#include <rte_eal.h>
+#include <rte_option.h>
+
+#include "eal_private.h"
+
+TAILQ_HEAD(rte_option_list, rte_option);
+
+struct rte_option_list rte_option_list =
+ TAILQ_HEAD_INITIALIZER(rte_option_list);
+
+static struct rte_option *option;
+
+int
+rte_option_parse(const char *opt)
+{
+ /* Check if the option is registered */
+ TAILQ_FOREACH(option, &rte_option_list, next) {
+ if (strcmp(opt, option->opt_str) == 0) {
+ option->enabled = 1;
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+void __rte_experimental
+rte_option_register(struct rte_option *opt)
+{
+ TAILQ_FOREACH(option, &rte_option_list, next) {
+ if (strcmp(opt->opt_str, option->opt_str) == 0)
+ RTE_LOG(INFO, EAL, "Option %s has already been registered.",
+ opt->opt_str);
+ return;
+ }
+
+ TAILQ_INSERT_HEAD(&rte_option_list, opt, next);
+}
+
+void
+rte_option_init(void)
+{
+ TAILQ_FOREACH(option, &rte_option_list, next) {
+ if (option->enabled)
+ option->cb();
+ }
+}