aboutsummaryrefslogtreecommitdiffstats
path: root/lib/librte_eal/common
diff options
context:
space:
mode:
authorLuca Boccassi <luca.boccassi@gmail.com>2018-08-14 18:52:30 +0100
committerLuca Boccassi <luca.boccassi@gmail.com>2018-08-14 18:53:17 +0100
commitb63264c8342e6a1b6971c79550d2af2024b6a4de (patch)
tree83114aac64286fe616506c0b3dfaec2ab86ef835 /lib/librte_eal/common
parentca33590b6af032bff57d9cc70455660466a654b2 (diff)
New upstream version 18.08upstream/18.08
Change-Id: I32fdf5e5016556d9c0a6d88ddaf1fc468961790a Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'lib/librte_eal/common')
-rw-r--r--lib/librte_eal/common/Makefile4
-rw-r--r--lib/librte_eal/common/arch/arm/rte_cpuflags.c54
-rw-r--r--lib/librte_eal/common/arch/arm/rte_hypervisor.c2
-rw-r--r--lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c15
-rw-r--r--lib/librte_eal/common/arch/ppc_64/rte_hypervisor.c2
-rw-r--r--lib/librte_eal/common/arch/x86/rte_hypervisor.c2
-rw-r--r--lib/librte_eal/common/eal_common_bus.c3
-rw-r--r--lib/librte_eal/common/eal_common_class.c64
-rw-r--r--lib/librte_eal/common/eal_common_dev.c443
-rw-r--r--lib/librte_eal/common/eal_common_devargs.c228
-rw-r--r--lib/librte_eal/common/eal_common_fbarray.c1239
-rw-r--r--lib/librte_eal/common/eal_common_hypervisor.c2
-rw-r--r--lib/librte_eal/common/eal_common_lcore.c75
-rw-r--r--lib/librte_eal/common/eal_common_log.c121
-rw-r--r--lib/librte_eal/common/eal_common_memalloc.c364
-rw-r--r--lib/librte_eal/common/eal_common_memory.c528
-rw-r--r--lib/librte_eal/common/eal_common_memzone.c290
-rw-r--r--lib/librte_eal/common/eal_common_options.c192
-rw-r--r--lib/librte_eal/common/eal_common_proc.c713
-rw-r--r--lib/librte_eal/common/eal_common_thread.c98
-rw-r--r--lib/librte_eal/common/eal_common_uuid.c193
-rw-r--r--lib/librte_eal/common/eal_filesystem.h70
-rw-r--r--lib/librte_eal/common/eal_hugepages.h11
-rw-r--r--lib/librte_eal/common/eal_internal_cfg.h20
-rw-r--r--lib/librte_eal/common/eal_memalloc.h82
-rw-r--r--lib/librte_eal/common/eal_options.h8
-rw-r--r--lib/librte_eal/common/eal_private.h99
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_atomic.h32
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_atomic_32.h32
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_byteorder.h32
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_cpuflags.h32
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h32
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_cycles.h32
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_cycles_32.h32
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_memcpy.h32
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h32
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_prefetch.h32
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h32
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_rwlock.h2
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_spinlock.h32
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h23
-rw-r--r--lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h2
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_atomic.h24
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_atomic_32.h12
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_atomic_64.h12
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_memcpy.h24
-rw-r--r--lib/librte_eal/common/include/arch/x86/rte_spinlock.h4
-rw-r--r--lib/librte_eal/common/include/generic/rte_atomic.h90
-rw-r--r--lib/librte_eal/common/include/generic/rte_byteorder.h6
-rw-r--r--lib/librte_eal/common/include/generic/rte_cpuflags.h21
-rw-r--r--lib/librte_eal/common/include/generic/rte_rwlock.h4
-rw-r--r--lib/librte_eal/common/include/rte_bitmap.h8
-rw-r--r--lib/librte_eal/common/include/rte_bus.h4
-rw-r--r--lib/librte_eal/common/include/rte_class.h134
-rw-r--r--lib/librte_eal/common/include/rte_common.h160
-rw-r--r--lib/librte_eal/common/include/rte_dev.h211
-rw-r--r--lib/librte_eal/common/include/rte_devargs.h173
-rw-r--r--lib/librte_eal/common/include/rte_eal.h54
-rw-r--r--lib/librte_eal/common/include/rte_eal_interrupts.h1
-rw-r--r--lib/librte_eal/common/include/rte_eal_memconfig.h28
-rw-r--r--lib/librte_eal/common/include/rte_fbarray.h470
-rw-r--r--lib/librte_eal/common/include/rte_hypervisor.h2
-rw-r--r--lib/librte_eal/common/include/rte_lcore.h60
-rw-r--r--lib/librte_eal/common/include/rte_log.h40
-rw-r--r--lib/librte_eal/common/include/rte_malloc.h10
-rw-r--r--lib/librte_eal/common/include/rte_malloc_heap.h6
-rw-r--r--lib/librte_eal/common/include/rte_memory.h330
-rw-r--r--lib/librte_eal/common/include/rte_memzone.h45
-rw-r--r--lib/librte_eal/common/include/rte_pci_dev_feature_defs.h58
-rw-r--r--lib/librte_eal/common/include/rte_pci_dev_features.h58
-rw-r--r--lib/librte_eal/common/include/rte_random.h6
-rw-r--r--lib/librte_eal/common/include/rte_service.h167
-rw-r--r--lib/librte_eal/common/include/rte_service_component.h38
-rw-r--r--lib/librte_eal/common/include/rte_string_fns.h31
-rw-r--r--lib/librte_eal/common/include/rte_tailq.h3
-rw-r--r--lib/librte_eal/common/include/rte_uuid.h129
-rw-r--r--lib/librte_eal/common/include/rte_version.h2
-rw-r--r--lib/librte_eal/common/include/rte_vfio.h243
-rw-r--r--lib/librte_eal/common/malloc_elem.c479
-rw-r--r--lib/librte_eal/common/malloc_elem.h51
-rw-r--r--lib/librte_eal/common/malloc_heap.c868
-rw-r--r--lib/librte_eal/common/malloc_heap.h19
-rw-r--r--lib/librte_eal/common/malloc_mp.c743
-rw-r--r--lib/librte_eal/common/malloc_mp.h86
-rw-r--r--lib/librte_eal/common/meson.build8
-rw-r--r--lib/librte_eal/common/rte_malloc.c85
-rw-r--r--lib/librte_eal/common/rte_service.c130
87 files changed, 9043 insertions, 1427 deletions
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index ea824a3a..cca68826 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -11,12 +11,12 @@ INC += rte_per_lcore.h rte_random.h
INC += rte_tailq.h rte_interrupts.h rte_alarm.h
INC += rte_string_fns.h rte_version.h
INC += rte_eal_memconfig.h rte_malloc_heap.h
-INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h
+INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_class.h
INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
INC += rte_malloc.h rte_keepalive.h rte_time.h
INC += rte_service.h rte_service_component.h
INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
-INC += rte_reciprocal.h
+INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h
diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
index 88f1cbe3..caf3dc83 100644
--- a/lib/librte_eal/common/arch/arm/rte_cpuflags.c
+++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c
@@ -1,34 +1,6 @@
-/*
- * BSD LICENSE
- *
- * Copyright (C) Cavium, Inc. 2015.
- * Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Cavium, Inc nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) Cavium, Inc. 2015.
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
*/
#include "rte_cpuflags.h"
@@ -133,22 +105,10 @@ const struct feature_entry rte_cpu_feature_table[] = {
static void
rte_cpu_get_features(hwcap_registers_t out)
{
- int auxv_fd;
- _Elfx_auxv_t auxv;
-
- auxv_fd = open("/proc/self/auxv", O_RDONLY);
- assert(auxv_fd != -1);
- while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) {
- if (auxv.a_type == AT_HWCAP) {
- out[REG_HWCAP] = auxv.a_un.a_val;
- } else if (auxv.a_type == AT_HWCAP2) {
- out[REG_HWCAP2] = auxv.a_un.a_val;
- } else if (auxv.a_type == AT_PLATFORM) {
- if (!strcmp((const char *)auxv.a_un.a_val, PLATFORM_STR))
- out[REG_PLATFORM] = 0x0001;
- }
- }
- close(auxv_fd);
+ out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP);
+ out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2);
+ if (!rte_cpu_strcmp_auxval(AT_PLATFORM, PLATFORM_STR))
+ out[REG_PLATFORM] = 0x0001;
}
/*
diff --git a/lib/librte_eal/common/arch/arm/rte_hypervisor.c b/lib/librte_eal/common/arch/arm/rte_hypervisor.c
index 3792fe2c..08a1c97d 100644
--- a/lib/librte_eal/common/arch/arm/rte_hypervisor.c
+++ b/lib/librte_eal/common/arch/arm/rte_hypervisor.c
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2017 Mellanox Technologies, Ltd.
+ * Copyright 2017 Mellanox Technologies, Ltd
*/
#include "rte_hypervisor.h"
diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
index 970a61c5..e7a82452 100644
--- a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
+++ b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c
@@ -104,19 +104,8 @@ const struct feature_entry rte_cpu_feature_table[] = {
static void
rte_cpu_get_features(hwcap_registers_t out)
{
- int auxv_fd;
- Elf64_auxv_t auxv;
-
- auxv_fd = open("/proc/self/auxv", O_RDONLY);
- assert(auxv_fd != -1);
- while (read(auxv_fd, &auxv,
- sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) {
- if (auxv.a_type == AT_HWCAP)
- out[REG_HWCAP] = auxv.a_un.a_val;
- else if (auxv.a_type == AT_HWCAP2)
- out[REG_HWCAP2] = auxv.a_un.a_val;
- }
- close(auxv_fd);
+ out[REG_HWCAP] = rte_cpu_getauxval(AT_HWCAP);
+ out[REG_HWCAP2] = rte_cpu_getauxval(AT_HWCAP2);
}
/*
diff --git a/lib/librte_eal/common/arch/ppc_64/rte_hypervisor.c b/lib/librte_eal/common/arch/ppc_64/rte_hypervisor.c
index 3792fe2c..08a1c97d 100644
--- a/lib/librte_eal/common/arch/ppc_64/rte_hypervisor.c
+++ b/lib/librte_eal/common/arch/ppc_64/rte_hypervisor.c
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2017 Mellanox Technologies, Ltd.
+ * Copyright 2017 Mellanox Technologies, Ltd
*/
#include "rte_hypervisor.h"
diff --git a/lib/librte_eal/common/arch/x86/rte_hypervisor.c b/lib/librte_eal/common/arch/x86/rte_hypervisor.c
index edf07be1..c38cfc09 100644
--- a/lib/librte_eal/common/arch/x86/rte_hypervisor.c
+++ b/lib/librte_eal/common/arch/x86/rte_hypervisor.c
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2017 Mellanox Technologies, Ltd.
+ * Copyright 2017 Mellanox Technologies, Ltd
*/
#include "rte_hypervisor.h"
diff --git a/lib/librte_eal/common/eal_common_bus.c b/lib/librte_eal/common/eal_common_bus.c
index 3e022d51..0943851c 100644
--- a/lib/librte_eal/common/eal_common_bus.c
+++ b/lib/librte_eal/common/eal_common_bus.c
@@ -36,6 +36,7 @@
#include <rte_bus.h>
#include <rte_debug.h>
+#include <rte_string_fns.h>
#include "eal_private.h"
@@ -212,7 +213,7 @@ rte_bus_find_by_device_name(const char *str)
char name[RTE_DEV_NAME_MAX_LEN];
char *c;
- snprintf(name, sizeof(name), "%s", str);
+ strlcpy(name, str, sizeof(name));
c = strchr(name, ',');
if (c != NULL)
c[0] = '\0';
diff --git a/lib/librte_eal/common/eal_common_class.c b/lib/librte_eal/common/eal_common_class.c
new file mode 100644
index 00000000..404a9065
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_class.c
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaƫtan Rivet
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_class.h>
+#include <rte_debug.h>
+
+struct rte_class_list rte_class_list =
+ TAILQ_HEAD_INITIALIZER(rte_class_list);
+
+__rte_experimental void
+rte_class_register(struct rte_class *class)
+{
+ RTE_VERIFY(class);
+ RTE_VERIFY(class->name && strlen(class->name));
+
+ TAILQ_INSERT_TAIL(&rte_class_list, class, next);
+ RTE_LOG(DEBUG, EAL, "Registered [%s] device class.\n", class->name);
+}
+
+__rte_experimental void
+rte_class_unregister(struct rte_class *class)
+{
+ TAILQ_REMOVE(&rte_class_list, class, next);
+ RTE_LOG(DEBUG, EAL, "Unregistered [%s] device class.\n", class->name);
+}
+
+__rte_experimental
+struct rte_class *
+rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
+ const void *data)
+{
+ struct rte_class *cls;
+
+ if (start != NULL)
+ cls = TAILQ_NEXT(start, next);
+ else
+ cls = TAILQ_FIRST(&rte_class_list);
+ while (cls != NULL) {
+ if (cmp(cls, data) == 0)
+ break;
+ cls = TAILQ_NEXT(cls, next);
+ }
+ return cls;
+}
+
+static int
+cmp_class_name(const struct rte_class *class, const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(class->name, name);
+}
+
+__rte_experimental
+struct rte_class *
+rte_class_find_by_name(const char *name)
+{
+ return rte_class_find(NULL, cmp_class_name, (const void *)name);
+}
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index cd071442..678dbcac 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -10,24 +10,62 @@
#include <rte_compat.h>
#include <rte_bus.h>
+#include <rte_class.h>
#include <rte_dev.h>
#include <rte_devargs.h>
#include <rte_debug.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
#include <rte_log.h>
+#include <rte_spinlock.h>
+#include <rte_malloc.h>
#include "eal_private.h"
-static int cmp_detached_dev_name(const struct rte_device *dev,
- const void *_name)
-{
- const char *name = _name;
+/**
+ * The device event callback description.
+ *
+ * It contains callback address to be registered by user application,
+ * the pointer to the parameters for callback, and the device name.
+ */
+struct dev_event_callback {
+ TAILQ_ENTRY(dev_event_callback) next; /**< Callbacks list */
+ rte_dev_event_cb_fn cb_fn; /**< Callback address */
+ void *cb_arg; /**< Callback parameter */
+ char *dev_name; /**< Callback device name, NULL is for all device */
+ uint32_t active; /**< Callback is executing */
+};
- /* skip attached devices */
- if (dev->driver != NULL)
- return 1;
+/** @internal Structure to keep track of registered callbacks */
+TAILQ_HEAD(dev_event_cb_list, dev_event_callback);
- return strcmp(dev->name, name);
-}
+/* The device event callback list for all registered callbacks. */
+static struct dev_event_cb_list dev_event_cbs;
+
+/* spinlock for device callbacks */
+static rte_spinlock_t dev_event_lock = RTE_SPINLOCK_INITIALIZER;
+
+struct dev_next_ctx {
+ struct rte_dev_iterator *it;
+ const char *bus_str;
+ const char *cls_str;
+};
+
+#define CTX(it, bus_str, cls_str) \
+ (&(const struct dev_next_ctx){ \
+ .it = it, \
+ .bus_str = bus_str, \
+ .cls_str = cls_str, \
+ })
+
+#define ITCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->it)
+
+#define BUSCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->bus_str)
+
+#define CLSCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->cls_str)
static int cmp_dev_name(const struct rte_device *dev, const void *_name)
{
@@ -89,29 +127,12 @@ int rte_eal_dev_detach(struct rte_device *dev)
return ret;
}
-static char *
-full_dev_name(const char *bus, const char *dev, const char *args)
-{
- char *name;
- size_t len;
-
- len = snprintf(NULL, 0, "%s:%s,%s", bus, dev, args) + 1;
- name = calloc(1, len);
- if (name == NULL) {
- RTE_LOG(ERR, EAL, "Could not allocate full device name\n");
- return NULL;
- }
- snprintf(name, len, "%s:%s,%s", bus, dev, args);
- return name;
-}
-
int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devname,
const char *devargs)
{
struct rte_bus *bus;
struct rte_device *dev;
struct rte_devargs *da;
- char *name;
int ret;
bus = rte_bus_find_by_name(busname);
@@ -126,21 +147,16 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
return -ENOTSUP;
}
- name = full_dev_name(busname, devname, devargs);
- if (name == NULL)
- return -ENOMEM;
-
da = calloc(1, sizeof(*da));
- if (da == NULL) {
- ret = -ENOMEM;
- goto err_name;
- }
+ if (da == NULL)
+ return -ENOMEM;
- ret = rte_eal_devargs_parse(name, da);
+ ret = rte_devargs_parsef(da, "%s:%s,%s",
+ busname, devname, devargs);
if (ret)
goto err_devarg;
- ret = rte_eal_devargs_insert(da);
+ ret = rte_devargs_insert(da);
if (ret)
goto err_devarg;
@@ -148,30 +164,32 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
if (ret)
goto err_devarg;
- dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
+ dev = bus->find_device(NULL, cmp_dev_name, devname);
if (dev == NULL) {
- RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n",
+ RTE_LOG(ERR, EAL, "Cannot find device (%s)\n",
devname);
ret = -ENODEV;
goto err_devarg;
}
+ if (dev->driver != NULL) {
+ RTE_LOG(ERR, EAL, "Device is already plugged\n");
+ return -EEXIST;
+ }
+
ret = bus->plug(dev);
if (ret) {
RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
dev->name);
goto err_devarg;
}
- free(name);
return 0;
err_devarg:
- if (rte_eal_devargs_remove(busname, devname)) {
+ if (rte_devargs_remove(busname, devname)) {
free(da->args);
free(da);
}
-err_name:
- free(name);
return ret;
}
@@ -200,10 +218,349 @@ rte_eal_hotplug_remove(const char *busname, const char *devname)
return -EINVAL;
}
+ if (dev->driver == NULL) {
+ RTE_LOG(ERR, EAL, "Device is already unplugged\n");
+ return -ENOENT;
+ }
+
ret = bus->unplug(dev);
if (ret)
RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
dev->name);
- rte_eal_devargs_remove(busname, devname);
+ rte_devargs_remove(busname, devname);
+ return ret;
+}
+
+int __rte_experimental
+rte_dev_event_callback_register(const char *device_name,
+ rte_dev_event_cb_fn cb_fn,
+ void *cb_arg)
+{
+ struct dev_event_callback *event_cb;
+ int ret;
+
+ if (!cb_fn)
+ return -EINVAL;
+
+ rte_spinlock_lock(&dev_event_lock);
+
+ if (TAILQ_EMPTY(&dev_event_cbs))
+ TAILQ_INIT(&dev_event_cbs);
+
+ TAILQ_FOREACH(event_cb, &dev_event_cbs, next) {
+ if (event_cb->cb_fn == cb_fn && event_cb->cb_arg == cb_arg) {
+ if (device_name == NULL && event_cb->dev_name == NULL)
+ break;
+ if (device_name == NULL || event_cb->dev_name == NULL)
+ continue;
+ if (!strcmp(event_cb->dev_name, device_name))
+ break;
+ }
+ }
+
+ /* create a new callback. */
+ if (event_cb == NULL) {
+ event_cb = malloc(sizeof(struct dev_event_callback));
+ if (event_cb != NULL) {
+ event_cb->cb_fn = cb_fn;
+ event_cb->cb_arg = cb_arg;
+ event_cb->active = 0;
+ if (!device_name) {
+ event_cb->dev_name = NULL;
+ } else {
+ event_cb->dev_name = strdup(device_name);
+ if (event_cb->dev_name == NULL) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ }
+ TAILQ_INSERT_TAIL(&dev_event_cbs, event_cb, next);
+ } else {
+ RTE_LOG(ERR, EAL,
+ "Failed to allocate memory for device "
+ "event callback.");
+ ret = -ENOMEM;
+ goto error;
+ }
+ } else {
+ RTE_LOG(ERR, EAL,
+ "The callback is already exist, no need "
+ "to register again.\n");
+ ret = -EEXIST;
+ }
+
+ rte_spinlock_unlock(&dev_event_lock);
+ return 0;
+error:
+ free(event_cb);
+ rte_spinlock_unlock(&dev_event_lock);
+ return ret;
+}
+
+int __rte_experimental
+rte_dev_event_callback_unregister(const char *device_name,
+ rte_dev_event_cb_fn cb_fn,
+ void *cb_arg)
+{
+ int ret = 0;
+ struct dev_event_callback *event_cb, *next;
+
+ if (!cb_fn)
+ return -EINVAL;
+
+ rte_spinlock_lock(&dev_event_lock);
+ /*walk through the callbacks and remove all that match. */
+ for (event_cb = TAILQ_FIRST(&dev_event_cbs); event_cb != NULL;
+ event_cb = next) {
+
+ next = TAILQ_NEXT(event_cb, next);
+
+ if (device_name != NULL && event_cb->dev_name != NULL) {
+ if (!strcmp(event_cb->dev_name, device_name)) {
+ if (event_cb->cb_fn != cb_fn ||
+ (cb_arg != (void *)-1 &&
+ event_cb->cb_arg != cb_arg))
+ continue;
+ }
+ } else if (device_name != NULL) {
+ continue;
+ }
+
+ /*
+ * if this callback is not executing right now,
+ * then remove it.
+ */
+ if (event_cb->active == 0) {
+ TAILQ_REMOVE(&dev_event_cbs, event_cb, next);
+ free(event_cb);
+ ret++;
+ } else {
+ continue;
+ }
+ }
+ rte_spinlock_unlock(&dev_event_lock);
return ret;
}
+
+void
+dev_callback_process(char *device_name, enum rte_dev_event_type event)
+{
+ struct dev_event_callback *cb_lst;
+
+ if (device_name == NULL)
+ return;
+
+ rte_spinlock_lock(&dev_event_lock);
+
+ TAILQ_FOREACH(cb_lst, &dev_event_cbs, next) {
+ if (cb_lst->dev_name) {
+ if (strcmp(cb_lst->dev_name, device_name))
+ continue;
+ }
+ cb_lst->active = 1;
+ rte_spinlock_unlock(&dev_event_lock);
+ cb_lst->cb_fn(device_name, event,
+ cb_lst->cb_arg);
+ rte_spinlock_lock(&dev_event_lock);
+ cb_lst->active = 0;
+ }
+ rte_spinlock_unlock(&dev_event_lock);
+}
+
+__rte_experimental
+int
+rte_dev_iterator_init(struct rte_dev_iterator *it,
+ const char *dev_str)
+{
+ struct rte_devargs devargs;
+ struct rte_class *cls = NULL;
+ struct rte_bus *bus = NULL;
+
+ /* Having both bus_str and cls_str NULL is illegal,
+ * marking this iterator as invalid unless
+ * everything goes well.
+ */
+ it->bus_str = NULL;
+ it->cls_str = NULL;
+
+ devargs.data = dev_str;
+ if (rte_devargs_layers_parse(&devargs, dev_str))
+ goto get_out;
+
+ bus = devargs.bus;
+ cls = devargs.cls;
+ /* The string should have at least
+ * one layer specified.
+ */
+ if (bus == NULL && cls == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Either bus or class must be specified.\n");
+ rte_errno = EINVAL;
+ goto get_out;
+ }
+ if (bus != NULL && bus->dev_iterate == NULL) {
+ RTE_LOG(ERR, EAL, "Bus %s not supported\n", bus->name);
+ rte_errno = ENOTSUP;
+ goto get_out;
+ }
+ if (cls != NULL && cls->dev_iterate == NULL) {
+ RTE_LOG(ERR, EAL, "Class %s not supported\n", cls->name);
+ rte_errno = ENOTSUP;
+ goto get_out;
+ }
+ it->bus_str = devargs.bus_str;
+ it->cls_str = devargs.cls_str;
+ it->dev_str = dev_str;
+ it->bus = bus;
+ it->cls = cls;
+ it->device = NULL;
+ it->class_device = NULL;
+get_out:
+ return -rte_errno;
+}
+
+static char *
+dev_str_sane_copy(const char *str)
+{
+ size_t end;
+ char *copy;
+
+ end = strcspn(str, ",/");
+ if (str[end] == ',') {
+ copy = strdup(&str[end + 1]);
+ } else {
+ /* '/' or '\0' */
+ copy = strdup("");
+ }
+ if (copy == NULL) {
+ rte_errno = ENOMEM;
+ } else {
+ char *slash;
+
+ slash = strchr(copy, '/');
+ if (slash != NULL)
+ slash[0] = '\0';
+ }
+ return copy;
+}
+
+static int
+class_next_dev_cmp(const struct rte_class *cls,
+ const void *ctx)
+{
+ struct rte_dev_iterator *it;
+ const char *cls_str = NULL;
+ void *dev;
+
+ if (cls->dev_iterate == NULL)
+ return 1;
+ it = ITCTX(ctx);
+ cls_str = CLSCTX(ctx);
+ dev = it->class_device;
+ /* it->cls_str != NULL means a class
+ * was specified in the devstr.
+ */
+ if (it->cls_str != NULL && cls != it->cls)
+ return 1;
+ /* If an error occurred previously,
+ * no need to test further.
+ */
+ if (rte_errno != 0)
+ return -1;
+ dev = cls->dev_iterate(dev, cls_str, it);
+ it->class_device = dev;
+ return dev == NULL;
+}
+
+static int
+bus_next_dev_cmp(const struct rte_bus *bus,
+ const void *ctx)
+{
+ struct rte_device *dev = NULL;
+ struct rte_class *cls = NULL;
+ struct rte_dev_iterator *it;
+ const char *bus_str = NULL;
+
+ if (bus->dev_iterate == NULL)
+ return 1;
+ it = ITCTX(ctx);
+ bus_str = BUSCTX(ctx);
+ dev = it->device;
+ /* it->bus_str != NULL means a bus
+ * was specified in the devstr.
+ */
+ if (it->bus_str != NULL && bus != it->bus)
+ return 1;
+ /* If an error occurred previously,
+ * no need to test further.
+ */
+ if (rte_errno != 0)
+ return -1;
+ if (it->cls_str == NULL) {
+ dev = bus->dev_iterate(dev, bus_str, it);
+ goto end;
+ }
+ /* cls_str != NULL */
+ if (dev == NULL) {
+next_dev_on_bus:
+ dev = bus->dev_iterate(dev, bus_str, it);
+ it->device = dev;
+ }
+ if (dev == NULL)
+ return 1;
+ if (it->cls != NULL)
+ cls = TAILQ_PREV(it->cls, rte_class_list, next);
+ cls = rte_class_find(cls, class_next_dev_cmp, ctx);
+ if (cls != NULL) {
+ it->cls = cls;
+ goto end;
+ }
+ goto next_dev_on_bus;
+end:
+ it->device = dev;
+ return dev == NULL;
+}
+__rte_experimental
+struct rte_device *
+rte_dev_iterator_next(struct rte_dev_iterator *it)
+{
+ struct rte_bus *bus = NULL;
+ int old_errno = rte_errno;
+ char *bus_str = NULL;
+ char *cls_str = NULL;
+
+ rte_errno = 0;
+ if (it->bus_str == NULL && it->cls_str == NULL) {
+ /* Invalid iterator. */
+ rte_errno = EINVAL;
+ return NULL;
+ }
+ if (it->bus != NULL)
+ bus = TAILQ_PREV(it->bus, rte_bus_list, next);
+ if (it->bus_str != NULL) {
+ bus_str = dev_str_sane_copy(it->bus_str);
+ if (bus_str == NULL)
+ goto out;
+ }
+ if (it->cls_str != NULL) {
+ cls_str = dev_str_sane_copy(it->cls_str);
+ if (cls_str == NULL)
+ goto out;
+ }
+ while ((bus = rte_bus_find(bus, bus_next_dev_cmp,
+ CTX(it, bus_str, cls_str)))) {
+ if (it->device != NULL) {
+ it->bus = bus;
+ goto out;
+ }
+ if (it->bus_str != NULL ||
+ rte_errno != 0)
+ break;
+ }
+ if (rte_errno == 0)
+ rte_errno = old_errno;
+out:
+ free(bus_str);
+ free(cls_str);
+ return it->device;
+}
diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
index 810b3e18..dac2402a 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -11,13 +11,22 @@
#include <stdio.h>
#include <string.h>
+#include <stdarg.h>
+#include <rte_bus.h>
+#include <rte_class.h>
#include <rte_compat.h>
#include <rte_dev.h>
#include <rte_devargs.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
#include <rte_tailq.h>
#include "eal_private.h"
+/** user device double-linked queue type definition */
+TAILQ_HEAD(rte_devargs_list, rte_devargs);
+
/** Global list of user devices */
struct rte_devargs_list devargs_list =
TAILQ_HEAD_INITIALIZER(devargs_list);
@@ -52,22 +61,164 @@ rte_eal_parse_devargs_str(const char *devargs_str,
return 0;
}
+static size_t
+devargs_layer_count(const char *s)
+{
+ size_t i = s ? 1 : 0;
+
+ while (s != NULL && s[0] != '\0') {
+ i += s[0] == '/';
+ s++;
+ }
+ return i;
+}
+
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+ const char *devstr)
+{
+ struct {
+ const char *key;
+ const char *str;
+ struct rte_kvargs *kvlist;
+ } layers[] = {
+ { "bus=", NULL, NULL, },
+ { "class=", NULL, NULL, },
+ { "driver=", NULL, NULL, },
+ };
+ struct rte_kvargs_pair *kv = NULL;
+ struct rte_class *cls = NULL;
+ struct rte_bus *bus = NULL;
+ const char *s = devstr;
+ size_t nblayer;
+ size_t i = 0;
+ int ret = 0;
+
+ /* Split each sub-lists. */
+ nblayer = devargs_layer_count(devstr);
+ if (nblayer > RTE_DIM(layers)) {
+ RTE_LOG(ERR, EAL, "Invalid format: too many layers (%zu)\n",
+ nblayer);
+ ret = -E2BIG;
+ goto get_out;
+ }
+
+ /* If the devargs points the devstr
+ * as source data, then it should not allocate
+ * anything and keep referring only to it.
+ */
+ if (devargs->data != devstr) {
+ devargs->data = strdup(devstr);
+ if (devargs->data == NULL) {
+ RTE_LOG(ERR, EAL, "OOM\n");
+ ret = -ENOMEM;
+ goto get_out;
+ }
+ s = devargs->data;
+ }
+
+ while (s != NULL) {
+ if (i >= RTE_DIM(layers)) {
+ RTE_LOG(ERR, EAL, "Unrecognized layer %s\n", s);
+ ret = -EINVAL;
+ goto get_out;
+ }
+ /*
+ * The last layer is free-form.
+ * The "driver" key is not required (but accepted).
+ */
+ if (strncmp(layers[i].key, s, strlen(layers[i].key)) &&
+ i != RTE_DIM(layers) - 1)
+ goto next_layer;
+ layers[i].str = s;
+ layers[i].kvlist = rte_kvargs_parse_delim(s, NULL, "/");
+ if (layers[i].kvlist == NULL) {
+ RTE_LOG(ERR, EAL, "Could not parse %s\n", s);
+ ret = -EINVAL;
+ goto get_out;
+ }
+ s = strchr(s, '/');
+ if (s != NULL)
+ s++;
+next_layer:
+ i++;
+ }
+
+ /* Parse each sub-list. */
+ for (i = 0; i < RTE_DIM(layers); i++) {
+ if (layers[i].kvlist == NULL)
+ continue;
+ kv = &layers[i].kvlist->pairs[0];
+ if (strcmp(kv->key, "bus") == 0) {
+ bus = rte_bus_find_by_name(kv->value);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Could not find bus \"%s\"\n",
+ kv->value);
+ ret = -EFAULT;
+ goto get_out;
+ }
+ } else if (strcmp(kv->key, "class") == 0) {
+ cls = rte_class_find_by_name(kv->value);
+ if (cls == NULL) {
+ RTE_LOG(ERR, EAL, "Could not find class \"%s\"\n",
+ kv->value);
+ ret = -EFAULT;
+ goto get_out;
+ }
+ } else if (strcmp(kv->key, "driver") == 0) {
+ /* Ignore */
+ continue;
+ }
+ }
+
+ /* Fill devargs fields. */
+ devargs->bus_str = layers[0].str;
+ devargs->cls_str = layers[1].str;
+ devargs->drv_str = layers[2].str;
+ devargs->bus = bus;
+ devargs->cls = cls;
+
+ /* If we own the data, clean up a bit
+ * the several layers string, to ease
+ * their parsing afterward.
+ */
+ if (devargs->data != devstr) {
+ char *s = (void *)(intptr_t)(devargs->data);
+
+ while ((s = strchr(s, '/'))) {
+ *s = '\0';
+ s++;
+ }
+ }
+
+get_out:
+ for (i = 0; i < RTE_DIM(layers); i++) {
+ if (layers[i].kvlist)
+ rte_kvargs_free(layers[i].kvlist);
+ }
+ if (ret != 0)
+ rte_errno = -ret;
+ return ret;
+}
+
static int
bus_name_cmp(const struct rte_bus *bus, const void *name)
{
return strncmp(bus->name, name, strlen(bus->name));
}
-int __rte_experimental
-rte_eal_devargs_parse(const char *dev, struct rte_devargs *da)
+__rte_experimental
+int
+rte_devargs_parse(struct rte_devargs *da, const char *dev)
{
struct rte_bus *bus = NULL;
const char *devname;
const size_t maxlen = sizeof(da->name);
size_t i;
- if (dev == NULL || da == NULL)
+ if (da == NULL)
return -EINVAL;
+
/* Retrieve eventual bus info */
do {
devname = dev;
@@ -84,7 +235,7 @@ rte_eal_devargs_parse(const char *dev, struct rte_devargs *da)
da->name[i] = devname[i];
i++;
if (i == maxlen) {
- fprintf(stderr, "WARNING: Parsing \"%s\": device name should be shorter than %zu\n",
+ RTE_LOG(WARNING, EAL, "Parsing \"%s\": device name should be shorter than %zu\n",
dev, maxlen);
da->name[i - 1] = '\0';
return -EINVAL;
@@ -94,7 +245,7 @@ rte_eal_devargs_parse(const char *dev, struct rte_devargs *da)
if (bus == NULL) {
bus = rte_bus_find_by_device_name(da->name);
if (bus == NULL) {
- fprintf(stderr, "ERROR: failed to parse device \"%s\"\n",
+ RTE_LOG(ERR, EAL, "failed to parse device \"%s\"\n",
da->name);
return -EFAULT;
}
@@ -106,18 +257,46 @@ rte_eal_devargs_parse(const char *dev, struct rte_devargs *da)
else
da->args = strdup("");
if (da->args == NULL) {
- fprintf(stderr, "ERROR: not enough memory to parse arguments\n");
+ RTE_LOG(ERR, EAL, "not enough memory to parse arguments\n");
return -ENOMEM;
}
return 0;
}
+__rte_experimental
+int
+rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
+{
+ va_list ap;
+ size_t len;
+ char *dev;
+
+ if (da == NULL)
+ return -EINVAL;
+
+ va_start(ap, format);
+ len = vsnprintf(NULL, 0, format, ap);
+ va_end(ap);
+
+ dev = calloc(1, len + 1);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "not enough memory to parse device\n");
+ return -ENOMEM;
+ }
+
+ va_start(ap, format);
+ vsnprintf(dev, len + 1, format, ap);
+ va_end(ap);
+
+ return rte_devargs_parse(da, dev);
+}
+
int __rte_experimental
-rte_eal_devargs_insert(struct rte_devargs *da)
+rte_devargs_insert(struct rte_devargs *da)
{
int ret;
- ret = rte_eal_devargs_remove(da->bus->name, da->name);
+ ret = rte_devargs_remove(da->bus->name, da->name);
if (ret < 0)
return ret;
TAILQ_INSERT_TAIL(&devargs_list, da, next);
@@ -125,8 +304,9 @@ rte_eal_devargs_insert(struct rte_devargs *da)
}
/* store a whitelist parameter for later parsing */
+__rte_experimental
int
-rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
+rte_devargs_add(enum rte_devtype devtype, const char *devargs_str)
{
struct rte_devargs *devargs = NULL;
struct rte_bus *bus = NULL;
@@ -137,7 +317,7 @@ rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str)
if (devargs == NULL)
goto fail;
- if (rte_eal_devargs_parse(dev, devargs))
+ if (rte_devargs_parse(devargs, dev))
goto fail;
devargs->type = devtype;
bus = devargs->bus;
@@ -162,7 +342,7 @@ fail:
}
int __rte_experimental
-rte_eal_devargs_remove(const char *busname, const char *devname)
+rte_devargs_remove(const char *busname, const char *devname)
{
struct rte_devargs *d;
void *tmp;
@@ -180,8 +360,9 @@ rte_eal_devargs_remove(const char *busname, const char *devname)
}
/* count the number of devices of a specified type */
+__rte_experimental
unsigned int
-rte_eal_devargs_type_count(enum rte_devtype devtype)
+rte_devargs_type_count(enum rte_devtype devtype)
{
struct rte_devargs *devargs;
unsigned int count = 0;
@@ -195,8 +376,9 @@ rte_eal_devargs_type_count(enum rte_devtype devtype)
}
/* dump the user devices on the console */
+__rte_experimental
void
-rte_eal_devargs_dump(FILE *f)
+rte_devargs_dump(FILE *f)
{
struct rte_devargs *devargs;
@@ -207,3 +389,23 @@ rte_eal_devargs_dump(FILE *f)
devargs->name, devargs->args);
}
}
+
+/* bus-aware rte_devargs iterator. */
+__rte_experimental
+struct rte_devargs *
+rte_devargs_next(const char *busname, const struct rte_devargs *start)
+{
+ struct rte_devargs *da;
+
+ if (start != NULL)
+ da = TAILQ_NEXT(start, next);
+ else
+ da = TAILQ_FIRST(&devargs_list);
+ while (da != NULL) {
+ if (busname == NULL ||
+ (strcmp(busname, da->bus->name) == 0))
+ return da;
+ da = TAILQ_NEXT(da, next);
+ }
+ return NULL;
+}
diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c
new file mode 100644
index 00000000..43caf3ce
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_fbarray.c
@@ -0,0 +1,1239 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <inttypes.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <stdint.h>
+#include <errno.h>
+#include <sys/file.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_errno.h>
+#include <rte_spinlock.h>
+#include <rte_tailq.h>
+
+#include "eal_filesystem.h"
+#include "eal_private.h"
+
+#include "rte_fbarray.h"
+
+#define MASK_SHIFT 6ULL
+#define MASK_ALIGN (1ULL << MASK_SHIFT)
+#define MASK_LEN_TO_IDX(x) ((x) >> MASK_SHIFT)
+#define MASK_LEN_TO_MOD(x) ((x) - RTE_ALIGN_FLOOR(x, MASK_ALIGN))
+#define MASK_GET_IDX(idx, mod) ((idx << MASK_SHIFT) + mod)
+
+/*
+ * This is a mask that is always stored at the end of array, to provide fast
+ * way of finding free/used spots without looping through each element.
+ */
+
+struct used_mask {
+ unsigned int n_masks;
+ uint64_t data[];
+};
+
+static size_t
+calc_mask_size(unsigned int len)
+{
+ /* mask must be multiple of MASK_ALIGN, even though length of array
+ * itself may not be aligned on that boundary.
+ */
+ len = RTE_ALIGN_CEIL(len, MASK_ALIGN);
+ return sizeof(struct used_mask) +
+ sizeof(uint64_t) * MASK_LEN_TO_IDX(len);
+}
+
+static size_t
+calc_data_size(size_t page_sz, unsigned int elt_sz, unsigned int len)
+{
+ size_t data_sz = elt_sz * len;
+ size_t msk_sz = calc_mask_size(len);
+ return RTE_ALIGN_CEIL(data_sz + msk_sz, page_sz);
+}
+
+static struct used_mask *
+get_used_mask(void *data, unsigned int elt_sz, unsigned int len)
+{
+ return (struct used_mask *) RTE_PTR_ADD(data, elt_sz * len);
+}
+
+static int
+resize_and_map(int fd, void *addr, size_t len)
+{
+ char path[PATH_MAX];
+ void *map_addr;
+
+ if (ftruncate(fd, len)) {
+ RTE_LOG(ERR, EAL, "Cannot truncate %s\n", path);
+ /* pass errno up the chain */
+ rte_errno = errno;
+ return -1;
+ }
+
+ map_addr = mmap(addr, len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, fd, 0);
+ if (map_addr != addr) {
+ RTE_LOG(ERR, EAL, "mmap() failed: %s\n", strerror(errno));
+ /* pass errno up the chain */
+ rte_errno = errno;
+ return -1;
+ }
+ return 0;
+}
+
+static int
+find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
+ bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int msk_idx, lookahead_idx, first, first_mod;
+ unsigned int last, last_mod;
+ uint64_t last_msk, ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing ctz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ ignore_msk = ~((1ULL << first_mod) - 1);
+
+ /* array length may not be aligned, so calculate ignore mask for last
+ * mask index.
+ */
+ last = MASK_LEN_TO_IDX(arr->len);
+ last_mod = MASK_LEN_TO_MOD(arr->len);
+ last_msk = ~(-1ULL << last_mod);
+
+ for (msk_idx = first; msk_idx < msk->n_masks; msk_idx++) {
+ uint64_t cur_msk, lookahead_msk;
+ unsigned int run_start, clz, left;
+ bool found = false;
+ /*
+ * The process of getting n consecutive bits for arbitrary n is
+ * a bit involved, but here it is in a nutshell:
+ *
+ * 1. let n be the number of consecutive bits we're looking for
+ * 2. check if n can fit in one mask, and if so, do n-1
+ * rshift-ands to see if there is an appropriate run inside
+ * our current mask
+ * 2a. if we found a run, bail out early
+ * 2b. if we didn't find a run, proceed
+ * 3. invert the mask and count leading zeroes (that is, count
+ * how many consecutive set bits we had starting from the
+ * end of current mask) as k
+ * 3a. if k is 0, continue to next mask
+ * 3b. if k is not 0, we have a potential run
+ * 4. to satisfy our requirements, next mask must have n-k
+ * consecutive set bits right at the start, so we will do
+ * (n-k-1) rshift-ands and check if first bit is set.
+ *
+ * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
+ * we either run out of masks, lose the run, or find what we
+ * were looking for.
+ */
+ cur_msk = msk->data[msk_idx];
+ left = n;
+
+ /* if we're looking for free spaces, invert the mask */
+ if (!used)
+ cur_msk = ~cur_msk;
+
+ /* combine current ignore mask with last index ignore mask */
+ if (msk_idx == last)
+ ignore_msk |= last_msk;
+
+ /* if we have an ignore mask, ignore once */
+ if (ignore_msk) {
+ cur_msk &= ignore_msk;
+ ignore_msk = 0;
+ }
+
+ /* if n can fit in within a single mask, do a search */
+ if (n <= MASK_ALIGN) {
+ uint64_t tmp_msk = cur_msk;
+ unsigned int s_idx;
+ for (s_idx = 0; s_idx < n - 1; s_idx++)
+ tmp_msk &= tmp_msk >> 1ULL;
+ /* we found what we were looking for */
+ if (tmp_msk != 0) {
+ run_start = __builtin_ctzll(tmp_msk);
+ return MASK_GET_IDX(msk_idx, run_start);
+ }
+ }
+
+ /*
+ * we didn't find our run within the mask, or n > MASK_ALIGN,
+ * so we're going for plan B.
+ */
+
+ /* count leading zeroes on inverted mask */
+ if (~cur_msk == 0)
+ clz = sizeof(cur_msk) * 8;
+ else
+ clz = __builtin_clzll(~cur_msk);
+
+ /* if there aren't any runs at the end either, just continue */
+ if (clz == 0)
+ continue;
+
+ /* we have a partial run at the end, so try looking ahead */
+ run_start = MASK_ALIGN - clz;
+ left -= clz;
+
+ for (lookahead_idx = msk_idx + 1; lookahead_idx < msk->n_masks;
+ lookahead_idx++) {
+ unsigned int s_idx, need;
+ lookahead_msk = msk->data[lookahead_idx];
+
+ /* if we're looking for free space, invert the mask */
+ if (!used)
+ lookahead_msk = ~lookahead_msk;
+
+ /* figure out how many consecutive bits we need here */
+ need = RTE_MIN(left, MASK_ALIGN);
+
+ for (s_idx = 0; s_idx < need - 1; s_idx++)
+ lookahead_msk &= lookahead_msk >> 1ULL;
+
+ /* if first bit is not set, we've lost the run */
+ if ((lookahead_msk & 1) == 0) {
+ /*
+ * we've scanned this far, so we know there are
+ * no runs in the space we've lookahead-scanned
+ * as well, so skip that on next iteration.
+ */
+ ignore_msk = ~((1ULL << need) - 1);
+ msk_idx = lookahead_idx;
+ break;
+ }
+
+ left -= need;
+
+ /* check if we've found what we were looking for */
+ if (left == 0) {
+ found = true;
+ break;
+ }
+ }
+
+ /* we didn't find anything, so continue */
+ if (!found)
+ continue;
+
+ return MASK_GET_IDX(msk_idx, run_start);
+ }
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_next(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ unsigned int last, last_mod;
+ uint64_t last_msk, ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing ctz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ ignore_msk = ~((1ULL << first_mod) - 1ULL);
+
+ /* array length may not be aligned, so calculate ignore mask for last
+ * mask index.
+ */
+ last = MASK_LEN_TO_IDX(arr->len);
+ last_mod = MASK_LEN_TO_MOD(arr->len);
+ last_msk = ~(-(1ULL) << last_mod);
+
+ for (idx = first; idx < msk->n_masks; idx++) {
+ uint64_t cur = msk->data[idx];
+ int found;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ if (idx == last)
+ cur &= last_msk;
+
+ /* ignore everything before start on first iteration */
+ if (idx == first)
+ cur &= ignore_msk;
+
+ /* check if we have any entries */
+ if (cur == 0)
+ continue;
+
+ /*
+ * find first set bit - that will correspond to whatever it is
+ * that we're looking for.
+ */
+ found = __builtin_ctzll(cur);
+ return MASK_GET_IDX(idx, found);
+ }
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ unsigned int last, last_mod;
+ uint64_t last_msk;
+ unsigned int need_len, result = 0;
+
+ /* array length may not be aligned, so calculate ignore mask for last
+ * mask index.
+ */
+ last = MASK_LEN_TO_IDX(arr->len);
+ last_mod = MASK_LEN_TO_MOD(arr->len);
+ last_msk = ~(-(1ULL) << last_mod);
+
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ for (idx = first; idx < msk->n_masks; idx++, result += need_len) {
+ uint64_t cur = msk->data[idx];
+ unsigned int run_len;
+
+ need_len = MASK_ALIGN;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ /* if this is last mask, ignore everything after last bit */
+ if (idx == last)
+ cur &= last_msk;
+
+ /* ignore everything before start on first iteration */
+ if (idx == first) {
+ cur >>= first_mod;
+ /* at the start, we don't need the full mask len */
+ need_len -= first_mod;
+ }
+
+ /* we will be looking for zeroes, so invert the mask */
+ cur = ~cur;
+
+ /* if mask is zero, we have a complete run */
+ if (cur == 0)
+ continue;
+
+ /*
+ * see if current run ends before mask end.
+ */
+ run_len = __builtin_ctzll(cur);
+
+ /* add however many zeroes we've had in the last run and quit */
+ if (run_len < need_len) {
+ result += run_len;
+ break;
+ }
+ }
+ return result;
+}
+
+static int
+find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
+ bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int msk_idx, lookbehind_idx, first, first_mod;
+ uint64_t ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing ctz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ /* we're going backwards, so mask must start from the top */
+ ignore_msk = first_mod == MASK_ALIGN - 1 ?
+ -1ULL : /* prevent overflow */
+ ~(-1ULL << (first_mod + 1));
+
+ /* go backwards, include zero */
+ msk_idx = first;
+ do {
+ uint64_t cur_msk, lookbehind_msk;
+ unsigned int run_start, run_end, ctz, left;
+ bool found = false;
+ /*
+ * The process of getting n consecutive bits from the top for
+ * arbitrary n is a bit involved, but here it is in a nutshell:
+ *
+ * 1. let n be the number of consecutive bits we're looking for
+ * 2. check if n can fit in one mask, and if so, do n-1
+ * lshift-ands to see if there is an appropriate run inside
+ * our current mask
+ * 2a. if we found a run, bail out early
+ * 2b. if we didn't find a run, proceed
+ * 3. invert the mask and count trailing zeroes (that is, count
+ * how many consecutive set bits we had starting from the
+ * start of current mask) as k
+ * 3a. if k is 0, continue to next mask
+ * 3b. if k is not 0, we have a potential run
+ * 4. to satisfy our requirements, next mask must have n-k
+ * consecutive set bits at the end, so we will do (n-k-1)
+ * lshift-ands and check if last bit is set.
+ *
+ * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
+ * we either run out of masks, lose the run, or find what we
+ * were looking for.
+ */
+ cur_msk = msk->data[msk_idx];
+ left = n;
+
+ /* if we're looking for free spaces, invert the mask */
+ if (!used)
+ cur_msk = ~cur_msk;
+
+ /* if we have an ignore mask, ignore once */
+ if (ignore_msk) {
+ cur_msk &= ignore_msk;
+ ignore_msk = 0;
+ }
+
+ /* if n can fit in within a single mask, do a search */
+ if (n <= MASK_ALIGN) {
+ uint64_t tmp_msk = cur_msk;
+ unsigned int s_idx;
+ for (s_idx = 0; s_idx < n - 1; s_idx++)
+ tmp_msk &= tmp_msk << 1ULL;
+ /* we found what we were looking for */
+ if (tmp_msk != 0) {
+ /* clz will give us offset from end of mask, and
+ * we only get the end of our run, not start,
+ * so adjust result to point to where start
+ * would have been.
+ */
+ run_start = MASK_ALIGN -
+ __builtin_clzll(tmp_msk) - n;
+ return MASK_GET_IDX(msk_idx, run_start);
+ }
+ }
+
+ /*
+ * we didn't find our run within the mask, or n > MASK_ALIGN,
+ * so we're going for plan B.
+ */
+
+ /* count trailing zeroes on inverted mask */
+ if (~cur_msk == 0)
+ ctz = sizeof(cur_msk) * 8;
+ else
+ ctz = __builtin_ctzll(~cur_msk);
+
+ /* if there aren't any runs at the start either, just
+ * continue
+ */
+ if (ctz == 0)
+ continue;
+
+ /* we have a partial run at the start, so try looking behind */
+ run_end = MASK_GET_IDX(msk_idx, ctz);
+ left -= ctz;
+
+ /* go backwards, include zero */
+ lookbehind_idx = msk_idx - 1;
+
+ /* we can't lookbehind as we've run out of masks, so stop */
+ if (msk_idx == 0)
+ break;
+
+ do {
+ const uint64_t last_bit = 1ULL << (MASK_ALIGN - 1);
+ unsigned int s_idx, need;
+
+ lookbehind_msk = msk->data[lookbehind_idx];
+
+ /* if we're looking for free space, invert the mask */
+ if (!used)
+ lookbehind_msk = ~lookbehind_msk;
+
+ /* figure out how many consecutive bits we need here */
+ need = RTE_MIN(left, MASK_ALIGN);
+
+ for (s_idx = 0; s_idx < need - 1; s_idx++)
+ lookbehind_msk &= lookbehind_msk << 1ULL;
+
+ /* if last bit is not set, we've lost the run */
+ if ((lookbehind_msk & last_bit) == 0) {
+ /*
+ * we've scanned this far, so we know there are
+ * no runs in the space we've lookbehind-scanned
+ * as well, so skip that on next iteration.
+ */
+ ignore_msk = -1ULL << need;
+ msk_idx = lookbehind_idx;
+ break;
+ }
+
+ left -= need;
+
+ /* check if we've found what we were looking for */
+ if (left == 0) {
+ found = true;
+ break;
+ }
+ } while ((lookbehind_idx--) != 0); /* decrement after check to
+ * include zero
+ */
+
+ /* we didn't find anything, so continue */
+ if (!found)
+ continue;
+
+ /* we've found what we were looking for, but we only know where
+ * the run ended, so calculate start position.
+ */
+ return run_end - n;
+ } while (msk_idx-- != 0); /* decrement after check to include zero */
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_prev(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ uint64_t ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing clz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ /* we're going backwards, so mask must start from the top */
+ ignore_msk = first_mod == MASK_ALIGN - 1 ?
+ -1ULL : /* prevent overflow */
+ ~(-1ULL << (first_mod + 1));
+
+ /* go backwards, include zero */
+ idx = first;
+ do {
+ uint64_t cur = msk->data[idx];
+ int found;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ /* ignore everything before start on first iteration */
+ if (idx == first)
+ cur &= ignore_msk;
+
+ /* check if we have any entries */
+ if (cur == 0)
+ continue;
+
+ /*
+ * find last set bit - that will correspond to whatever it is
+ * that we're looking for. we're counting trailing zeroes, thus
+ * the value we get is counted from end of mask, so calculate
+ * position from start of mask.
+ */
+ found = MASK_ALIGN - __builtin_clzll(cur) - 1;
+
+ return MASK_GET_IDX(idx, found);
+ } while (idx-- != 0); /* decrement after check to include zero*/
+
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_rev_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ unsigned int need_len, result = 0;
+
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+
+ /* go backwards, include zero */
+ idx = first;
+ do {
+ uint64_t cur = msk->data[idx];
+ unsigned int run_len;
+
+ need_len = MASK_ALIGN;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ /* ignore everything after start on first iteration */
+ if (idx == first) {
+ unsigned int end_len = MASK_ALIGN - first_mod - 1;
+ cur <<= end_len;
+ /* at the start, we don't need the full mask len */
+ need_len -= end_len;
+ }
+
+ /* we will be looking for zeroes, so invert the mask */
+ cur = ~cur;
+
+ /* if mask is zero, we have a complete run */
+ if (cur == 0)
+ goto endloop;
+
+ /*
+ * see where run ends, starting from the end.
+ */
+ run_len = __builtin_clzll(cur);
+
+ /* add however many zeroes we've had in the last run and quit */
+ if (run_len < need_len) {
+ result += run_len;
+ break;
+ }
+endloop:
+ result += need_len;
+ } while (idx-- != 0); /* decrement after check to include zero */
+ return result;
+}
+
+static int
+set_used(struct rte_fbarray *arr, unsigned int idx, bool used)
+{
+ struct used_mask *msk;
+ uint64_t msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
+ unsigned int msk_idx = MASK_LEN_TO_IDX(idx);
+ bool already_used;
+ int ret = -1;
+
+ if (arr == NULL || idx >= arr->len) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+ ret = 0;
+
+ /* prevent array from changing under us */
+ rte_rwlock_write_lock(&arr->rwlock);
+
+ already_used = (msk->data[msk_idx] & msk_bit) != 0;
+
+ /* nothing to be done */
+ if (used == already_used)
+ goto out;
+
+ if (used) {
+ msk->data[msk_idx] |= msk_bit;
+ arr->count++;
+ } else {
+ msk->data[msk_idx] &= ~msk_bit;
+ arr->count--;
+ }
+out:
+ rte_rwlock_write_unlock(&arr->rwlock);
+
+ return ret;
+}
+
+static int
+fully_validate(const char *name, unsigned int elt_sz, unsigned int len)
+{
+ if (name == NULL || elt_sz == 0 || len == 0 || len > INT_MAX) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ if (strnlen(name, RTE_FBARRAY_NAME_LEN) == RTE_FBARRAY_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ return 0;
+}
+
+int __rte_experimental
+rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
+ unsigned int elt_sz)
+{
+ size_t page_sz, mmap_len;
+ char path[PATH_MAX];
+ struct used_mask *msk;
+ void *data = NULL;
+ int fd = -1;
+
+ if (arr == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ if (fully_validate(name, elt_sz, len))
+ return -1;
+
+ page_sz = sysconf(_SC_PAGESIZE);
+ if (page_sz == (size_t)-1)
+ goto fail;
+
+ /* calculate our memory limits */
+ mmap_len = calc_data_size(page_sz, elt_sz, len);
+
+ data = eal_get_virtual_area(NULL, &mmap_len, page_sz, 0, 0);
+ if (data == NULL)
+ goto fail;
+
+ if (internal_config.no_shconf) {
+ /* remap virtual area as writable */
+ void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (new_data == MAP_FAILED) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n",
+ __func__, strerror(errno));
+ goto fail;
+ }
+ } else {
+ eal_get_fbarray_path(path, sizeof(path), name);
+
+ /*
+ * Each fbarray is unique to process namespace, i.e. the
+ * filename depends on process prefix. Try to take out a lock
+ * and see if we succeed. If we don't, someone else is using it
+ * already.
+ */
+ fd = open(path, O_CREAT | O_RDWR, 0600);
+ if (fd < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n",
+ __func__, path, strerror(errno));
+ rte_errno = errno;
+ goto fail;
+ } else if (flock(fd, LOCK_EX | LOCK_NB)) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n",
+ __func__, path, strerror(errno));
+ rte_errno = EBUSY;
+ goto fail;
+ }
+
+ /* take out a non-exclusive lock, so that other processes could
+ * still attach to it, but no other process could reinitialize
+ * it.
+ */
+ if (flock(fd, LOCK_SH | LOCK_NB)) {
+ rte_errno = errno;
+ goto fail;
+ }
+
+ if (resize_and_map(fd, data, mmap_len))
+ goto fail;
+
+ /* we've mmap'ed the file, we can now close the fd */
+ close(fd);
+ }
+
+ /* initialize the data */
+ memset(data, 0, mmap_len);
+
+ /* populate data structure */
+ strlcpy(arr->name, name, sizeof(arr->name));
+ arr->data = data;
+ arr->len = len;
+ arr->elt_sz = elt_sz;
+ arr->count = 0;
+
+ msk = get_used_mask(data, elt_sz, len);
+ msk->n_masks = MASK_LEN_TO_IDX(RTE_ALIGN_CEIL(len, MASK_ALIGN));
+
+ rte_rwlock_init(&arr->rwlock);
+
+ return 0;
+fail:
+ if (data)
+ munmap(data, mmap_len);
+ if (fd >= 0)
+ close(fd);
+ return -1;
+}
+
+int __rte_experimental
+rte_fbarray_attach(struct rte_fbarray *arr)
+{
+ size_t page_sz, mmap_len;
+ char path[PATH_MAX];
+ void *data = NULL;
+ int fd = -1;
+
+ if (arr == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /*
+ * we don't need to synchronize attach as two values we need (element
+ * size and array length) are constant for the duration of life of
+ * the array, so the parts we care about will not race.
+ */
+
+ if (fully_validate(arr->name, arr->elt_sz, arr->len))
+ return -1;
+
+ page_sz = sysconf(_SC_PAGESIZE);
+ if (page_sz == (size_t)-1)
+ goto fail;
+
+ mmap_len = calc_data_size(page_sz, arr->elt_sz, arr->len);
+
+ data = eal_get_virtual_area(arr->data, &mmap_len, page_sz, 0, 0);
+ if (data == NULL)
+ goto fail;
+
+ eal_get_fbarray_path(path, sizeof(path), arr->name);
+
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ rte_errno = errno;
+ goto fail;
+ }
+
+ /* lock the file, to let others know we're using it */
+ if (flock(fd, LOCK_SH | LOCK_NB)) {
+ rte_errno = errno;
+ goto fail;
+ }
+
+ if (resize_and_map(fd, data, mmap_len))
+ goto fail;
+
+ close(fd);
+
+ /* we're done */
+
+ return 0;
+fail:
+ if (data)
+ munmap(data, mmap_len);
+ if (fd >= 0)
+ close(fd);
+ return -1;
+}
+
+int __rte_experimental
+rte_fbarray_detach(struct rte_fbarray *arr)
+{
+ if (arr == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /*
+ * we don't need to synchronize detach as two values we need (element
+ * size and total capacity) are constant for the duration of life of
+ * the array, so the parts we care about will not race. if the user is
+ * detaching while doing something else in the same process, we can't
+ * really do anything about it, things will blow up either way.
+ */
+
+ size_t page_sz = sysconf(_SC_PAGESIZE);
+
+ if (page_sz == (size_t)-1)
+ return -1;
+
+ /* this may already be unmapped (e.g. repeated call from previously
+ * failed destroy(), but this is on user, we can't (easily) know if this
+ * is still mapped.
+ */
+ munmap(arr->data, calc_data_size(page_sz, arr->elt_sz, arr->len));
+
+ return 0;
+}
+
+int __rte_experimental
+rte_fbarray_destroy(struct rte_fbarray *arr)
+{
+ int fd, ret;
+ char path[PATH_MAX];
+
+ ret = rte_fbarray_detach(arr);
+ if (ret)
+ return ret;
+
+ /* try deleting the file */
+ eal_get_fbarray_path(path, sizeof(path), arr->name);
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ RTE_LOG(ERR, EAL, "Could not open fbarray file: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ if (flock(fd, LOCK_EX | LOCK_NB)) {
+ RTE_LOG(DEBUG, EAL, "Cannot destroy fbarray - another process is using it\n");
+ rte_errno = EBUSY;
+ ret = -1;
+ } else {
+ ret = 0;
+ unlink(path);
+ memset(arr, 0, sizeof(*arr));
+ }
+ close(fd);
+
+ return ret;
+}
+
+void * __rte_experimental
+rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx)
+{
+ void *ret = NULL;
+ if (arr == NULL) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ if (idx >= arr->len) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ ret = RTE_PTR_ADD(arr->data, idx * arr->elt_sz);
+
+ return ret;
+}
+
+int __rte_experimental
+rte_fbarray_set_used(struct rte_fbarray *arr, unsigned int idx)
+{
+ return set_used(arr, idx, true);
+}
+
+int __rte_experimental
+rte_fbarray_set_free(struct rte_fbarray *arr, unsigned int idx)
+{
+ return set_used(arr, idx, false);
+}
+
+int __rte_experimental
+rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx)
+{
+ struct used_mask *msk;
+ int msk_idx;
+ uint64_t msk_bit;
+ int ret = -1;
+
+ if (arr == NULL || idx >= arr->len) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+ msk_idx = MASK_LEN_TO_IDX(idx);
+ msk_bit = 1ULL << MASK_LEN_TO_MOD(idx);
+
+ ret = (msk->data[msk_idx] & msk_bit) != 0;
+
+ rte_rwlock_read_unlock(&arr->rwlock);
+
+ return ret;
+}
+
+static int
+fbarray_find(struct rte_fbarray *arr, unsigned int start, bool next, bool used)
+{
+ int ret = -1;
+
+ if (arr == NULL || start >= arr->len) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ /* cheap checks to prevent doing useless work */
+ if (!used) {
+ if (arr->len == arr->count) {
+ rte_errno = ENOSPC;
+ goto out;
+ }
+ if (arr->count == 0) {
+ ret = start;
+ goto out;
+ }
+ } else {
+ if (arr->count == 0) {
+ rte_errno = ENOENT;
+ goto out;
+ }
+ if (arr->len == arr->count) {
+ ret = start;
+ goto out;
+ }
+ }
+ if (next)
+ ret = find_next(arr, start, used);
+ else
+ ret = find_prev(arr, start, used);
+out:
+ rte_rwlock_read_unlock(&arr->rwlock);
+ return ret;
+}
+
+int __rte_experimental
+rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, true, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, true, true);
+}
+
+int __rte_experimental
+rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, false, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, false, true);
+}
+
+static int
+fbarray_find_n(struct rte_fbarray *arr, unsigned int start, unsigned int n,
+ bool next, bool used)
+{
+ int ret = -1;
+
+ if (arr == NULL || start >= arr->len || n > arr->len || n == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ if (next && (arr->len - start) < n) {
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+ }
+ if (!next && start < (n - 1)) {
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ /* cheap checks to prevent doing useless work */
+ if (!used) {
+ if (arr->len == arr->count || arr->len - arr->count < n) {
+ rte_errno = ENOSPC;
+ goto out;
+ }
+ if (arr->count == 0) {
+ ret = next ? start : start - n + 1;
+ goto out;
+ }
+ } else {
+ if (arr->count < n) {
+ rte_errno = ENOENT;
+ goto out;
+ }
+ if (arr->count == arr->len) {
+ ret = next ? start : start - n + 1;
+ goto out;
+ }
+ }
+
+ if (next)
+ ret = find_next_n(arr, start, n, used);
+ else
+ ret = find_prev_n(arr, start, n, used);
+out:
+ rte_rwlock_read_unlock(&arr->rwlock);
+ return ret;
+}
+
+int __rte_experimental
+rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, true, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, true, true);
+}
+
+int __rte_experimental
+rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, false, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, false, true);
+}
+
+static int
+fbarray_find_contig(struct rte_fbarray *arr, unsigned int start, bool next,
+ bool used)
+{
+ int ret = -1;
+
+ if (arr == NULL || start >= arr->len) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ /* cheap checks to prevent doing useless work */
+ if (used) {
+ if (arr->count == 0) {
+ ret = 0;
+ goto out;
+ }
+ if (next && arr->count == arr->len) {
+ ret = arr->len - start;
+ goto out;
+ }
+ if (!next && arr->count == arr->len) {
+ ret = start + 1;
+ goto out;
+ }
+ } else {
+ if (arr->len == arr->count) {
+ ret = 0;
+ goto out;
+ }
+ if (next && arr->count == 0) {
+ ret = arr->len - start;
+ goto out;
+ }
+ if (!next && arr->count == 0) {
+ ret = start + 1;
+ goto out;
+ }
+ }
+
+ if (next)
+ ret = find_contig(arr, start, used);
+ else
+ ret = find_rev_contig(arr, start, used);
+out:
+ rte_rwlock_read_unlock(&arr->rwlock);
+ return ret;
+}
+
+int __rte_experimental
+rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, true, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, true, true);
+}
+
+int __rte_experimental
+rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, false, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, false, true);
+}
+
+int __rte_experimental
+rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt)
+{
+ void *end;
+ int ret = -1;
+
+ /*
+ * no need to synchronize as it doesn't matter if underlying data
+ * changes - we're doing pointer arithmetic here.
+ */
+
+ if (arr == NULL || elt == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ end = RTE_PTR_ADD(arr->data, arr->elt_sz * arr->len);
+ if (elt < arr->data || elt >= end) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ ret = RTE_PTR_DIFF(elt, arr->data) / arr->elt_sz;
+
+ return ret;
+}
+
+void __rte_experimental
+rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f)
+{
+ struct used_mask *msk;
+ unsigned int i;
+
+ if (arr == NULL || f == NULL) {
+ rte_errno = EINVAL;
+ return;
+ }
+
+ if (fully_validate(arr->name, arr->elt_sz, arr->len)) {
+ fprintf(f, "Invalid file-backed array\n");
+ goto out;
+ }
+
+ /* prevent array from changing under us */
+ rte_rwlock_read_lock(&arr->rwlock);
+
+ fprintf(f, "File-backed array: %s\n", arr->name);
+ fprintf(f, "size: %i occupied: %i elt_sz: %i\n",
+ arr->len, arr->count, arr->elt_sz);
+
+ msk = get_used_mask(arr->data, arr->elt_sz, arr->len);
+
+ for (i = 0; i < msk->n_masks; i++)
+ fprintf(f, "msk idx %i: 0x%016" PRIx64 "\n", i, msk->data[i]);
+out:
+ rte_rwlock_read_unlock(&arr->rwlock);
+}
diff --git a/lib/librte_eal/common/eal_common_hypervisor.c b/lib/librte_eal/common/eal_common_hypervisor.c
index c3b4c621..5388b81a 100644
--- a/lib/librte_eal/common/eal_common_hypervisor.c
+++ b/lib/librte_eal/common/eal_common_hypervisor.c
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2017 Mellanox Technologies, Ltd.
+ * Copyright 2017 Mellanox Technologies, Ltd
*/
#include "rte_hypervisor.h"
diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c
index 7724fa43..3167e9d7 100644
--- a/lib/librte_eal/common/eal_common_lcore.c
+++ b/lib/librte_eal/common/eal_common_lcore.c
@@ -7,6 +7,7 @@
#include <string.h>
#include <dirent.h>
+#include <rte_errno.h>
#include <rte_log.h>
#include <rte_eal.h>
#include <rte_lcore.h>
@@ -16,6 +17,19 @@
#include "eal_private.h"
#include "eal_thread.h"
+static int
+socket_id_cmp(const void *a, const void *b)
+{
+ const int *lcore_id_a = a;
+ const int *lcore_id_b = b;
+
+ if (*lcore_id_a < *lcore_id_b)
+ return -1;
+ if (*lcore_id_a > *lcore_id_b)
+ return 1;
+ return 0;
+}
+
/*
* Parse /sys/devices/system/cpu to get the number of physical and logical
* processors on the machine. The function will fill the cpu_info
@@ -28,6 +42,8 @@ rte_eal_cpu_init(void)
struct rte_config *config = rte_eal_get_configuration();
unsigned lcore_id;
unsigned count = 0;
+ unsigned int socket_id, prev_socket_id;
+ int lcore_to_socket_id[RTE_MAX_LCORE];
/*
* Parse the maximum set of logical cores, detect the subset of running
@@ -39,6 +55,19 @@ rte_eal_cpu_init(void)
/* init cpuset for per lcore config */
CPU_ZERO(&lcore_config[lcore_id].cpuset);
+ /* find socket first */
+ socket_id = eal_cpu_socket_id(lcore_id);
+ if (socket_id >= RTE_MAX_NUMA_NODES) {
+#ifdef RTE_EAL_ALLOW_INV_SOCKET_ID
+ socket_id = 0;
+#else
+ RTE_LOG(ERR, EAL, "Socket ID (%u) is greater than RTE_MAX_NUMA_NODES (%d)\n",
+ socket_id, RTE_MAX_NUMA_NODES);
+ return -1;
+#endif
+ }
+ lcore_to_socket_id[lcore_id] = socket_id;
+
/* in 1:1 mapping, record related cpu detected state */
lcore_config[lcore_id].detected = eal_cpu_detected(lcore_id);
if (lcore_config[lcore_id].detected == 0) {
@@ -54,18 +83,7 @@ rte_eal_cpu_init(void)
config->lcore_role[lcore_id] = ROLE_RTE;
lcore_config[lcore_id].core_role = ROLE_RTE;
lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id);
- lcore_config[lcore_id].socket_id = eal_cpu_socket_id(lcore_id);
- if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES) {
-#ifdef RTE_EAL_ALLOW_INV_SOCKET_ID
- lcore_config[lcore_id].socket_id = 0;
-#else
- RTE_LOG(ERR, EAL, "Socket ID (%u) is greater than "
- "RTE_MAX_NUMA_NODES (%d)\n",
- lcore_config[lcore_id].socket_id,
- RTE_MAX_NUMA_NODES);
- return -1;
-#endif
- }
+ lcore_config[lcore_id].socket_id = socket_id;
RTE_LOG(DEBUG, EAL, "Detected lcore %u as "
"core %u on socket %u\n",
lcore_id, lcore_config[lcore_id].core_id,
@@ -79,5 +97,38 @@ rte_eal_cpu_init(void)
RTE_MAX_LCORE);
RTE_LOG(INFO, EAL, "Detected %u lcore(s)\n", config->lcore_count);
+ /* sort all socket id's in ascending order */
+ qsort(lcore_to_socket_id, RTE_DIM(lcore_to_socket_id),
+ sizeof(lcore_to_socket_id[0]), socket_id_cmp);
+
+ prev_socket_id = -1;
+ config->numa_node_count = 0;
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ socket_id = lcore_to_socket_id[lcore_id];
+ if (socket_id != prev_socket_id)
+ config->numa_nodes[config->numa_node_count++] =
+ socket_id;
+ prev_socket_id = socket_id;
+ }
+ RTE_LOG(INFO, EAL, "Detected %u NUMA nodes\n", config->numa_node_count);
+
return 0;
}
+
+unsigned int __rte_experimental
+rte_socket_count(void)
+{
+ const struct rte_config *config = rte_eal_get_configuration();
+ return config->numa_node_count;
+}
+
+int __rte_experimental
+rte_socket_id_by_idx(unsigned int idx)
+{
+ const struct rte_config *config = rte_eal_get_configuration();
+ if (idx >= config->numa_node_count) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ return config->numa_nodes[idx];
+}
diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c
index 37b2e20e..c714a4bd 100644
--- a/lib/librte_eal/common/eal_common_log.c
+++ b/lib/librte_eal/common/eal_common_log.c
@@ -9,6 +9,7 @@
#include <string.h>
#include <errno.h>
#include <regex.h>
+#include <fnmatch.h>
#include <rte_eal.h>
#include <rte_log.h>
@@ -23,6 +24,23 @@ struct rte_logs rte_logs = {
.file = NULL,
};
+struct rte_eal_opt_loglevel {
+ /** Next list entry */
+ TAILQ_ENTRY(rte_eal_opt_loglevel) next;
+ /** Compiled regular expression obtained from the option */
+ regex_t re_match;
+ /** Glob match string option */
+ char *pattern;
+ /** Log level value obtained from the option */
+ uint32_t level;
+};
+
+TAILQ_HEAD(rte_eal_opt_loglevel_list, rte_eal_opt_loglevel);
+
+/** List of valid EAL log level options */
+static struct rte_eal_opt_loglevel_list opt_loglevel_list =
+ TAILQ_HEAD_INITIALIZER(opt_loglevel_list);
+
/* Stream to use for logging if rte_logs.file is NULL */
static FILE *default_log_stream;
@@ -89,9 +107,9 @@ rte_log_set_level(uint32_t type, uint32_t level)
return 0;
}
-/* set level */
+/* set log level by regular expression */
int
-rte_log_set_level_regexp(const char *pattern, uint32_t level)
+rte_log_set_level_regexp(const char *regex, uint32_t level)
{
regex_t r;
size_t i;
@@ -99,7 +117,7 @@ rte_log_set_level_regexp(const char *pattern, uint32_t level)
if (level > RTE_LOG_DEBUG)
return -1;
- if (regcomp(&r, pattern, 0) != 0)
+ if (regcomp(&r, regex, 0) != 0)
return -1;
for (i = 0; i < rte_logs.dynamic_types_len; i++) {
@@ -115,6 +133,69 @@ rte_log_set_level_regexp(const char *pattern, uint32_t level)
return 0;
}
+/*
+ * Save the type string and the loglevel for later dynamic
+ * logtypes which may register later.
+ */
+static int rte_log_save_level(int priority,
+ const char *regex, const char *pattern)
+{
+ struct rte_eal_opt_loglevel *opt_ll = NULL;
+
+ opt_ll = malloc(sizeof(*opt_ll));
+ if (opt_ll == NULL)
+ goto fail;
+
+ opt_ll->level = priority;
+
+ if (regex) {
+ opt_ll->pattern = NULL;
+ if (regcomp(&opt_ll->re_match, regex, 0) != 0)
+ goto fail;
+ } else if (pattern) {
+ opt_ll->pattern = strdup(pattern);
+ if (opt_ll->pattern == NULL)
+ goto fail;
+ } else
+ goto fail;
+
+ TAILQ_INSERT_HEAD(&opt_loglevel_list, opt_ll, next);
+ return 0;
+fail:
+ free(opt_ll);
+ return -1;
+}
+
+int rte_log_save_regexp(const char *regex, int tmp)
+{
+ return rte_log_save_level(tmp, regex, NULL);
+}
+
+/* set log level based on glob (file match) pattern */
+int
+rte_log_set_level_pattern(const char *pattern, uint32_t level)
+{
+ size_t i;
+
+ if (level > RTE_LOG_DEBUG)
+ return -1;
+
+ for (i = 0; i < rte_logs.dynamic_types_len; i++) {
+ if (rte_logs.dynamic_types[i].name == NULL)
+ continue;
+
+ if (fnmatch(pattern, rte_logs.dynamic_types[i].name, 0) == 0)
+ rte_logs.dynamic_types[i].loglevel = level;
+ }
+
+ return 0;
+}
+
+int rte_log_save_pattern(const char *pattern, int priority)
+{
+ return rte_log_save_level(priority, NULL, pattern);
+}
+
/* get the current loglevel for the message being processed */
int rte_log_cur_msg_loglevel(void)
{
@@ -186,6 +267,36 @@ rte_log_register(const char *name)
return ret;
}
+/* Register an extended log type and try to pick its level from EAL options */
+int __rte_experimental
+rte_log_register_type_and_pick_level(const char *name, uint32_t level_def)
+{
+ struct rte_eal_opt_loglevel *opt_ll;
+ uint32_t level = level_def;
+ int type;
+
+ type = rte_log_register(name);
+ if (type < 0)
+ return type;
+
+ TAILQ_FOREACH(opt_ll, &opt_loglevel_list, next) {
+ if (opt_ll->level > RTE_LOG_DEBUG)
+ continue;
+
+ if (opt_ll->pattern) {
+ if (fnmatch(opt_ll->pattern, name, 0))
+ level = opt_ll->level;
+ } else {
+ if (regexec(&opt_ll->re_match, name, 0, NULL, 0) == 0)
+ level = opt_ll->level;
+ }
+ }
+
+ rte_logs.dynamic_types[type].loglevel = level;
+
+ return type;
+}
+
struct logtype {
uint32_t log_id;
const char *logtype;
@@ -224,9 +335,7 @@ static const struct logtype logtype_strings[] = {
};
/* Logging should be first initializer (before drivers and bus) */
-RTE_INIT_PRIO(rte_log_init, 101);
-static void
-rte_log_init(void)
+RTE_INIT_PRIO(rte_log_init, LOG)
{
uint32_t i;
diff --git a/lib/librte_eal/common/eal_common_memalloc.c b/lib/librte_eal/common/eal_common_memalloc.c
new file mode 100644
index 00000000..1d41ea11
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_memalloc.c
@@ -0,0 +1,364 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#include <string.h>
+
+#include <rte_errno.h>
+#include <rte_lcore.h>
+#include <rte_fbarray.h>
+#include <rte_memzone.h>
+#include <rte_memory.h>
+#include <rte_eal_memconfig.h>
+#include <rte_string_fns.h>
+#include <rte_rwlock.h>
+
+#include "eal_private.h"
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
+
+struct mem_event_callback_entry {
+ TAILQ_ENTRY(mem_event_callback_entry) next;
+ char name[RTE_MEM_EVENT_CALLBACK_NAME_LEN];
+ rte_mem_event_callback_t clb;
+ void *arg;
+};
+
+struct mem_alloc_validator_entry {
+ TAILQ_ENTRY(mem_alloc_validator_entry) next;
+ char name[RTE_MEM_ALLOC_VALIDATOR_NAME_LEN];
+ rte_mem_alloc_validator_t clb;
+ int socket_id;
+ size_t limit;
+};
+
+/** Double linked list of actions. */
+TAILQ_HEAD(mem_event_callback_entry_list, mem_event_callback_entry);
+TAILQ_HEAD(mem_alloc_validator_entry_list, mem_alloc_validator_entry);
+
+static struct mem_event_callback_entry_list mem_event_callback_list =
+ TAILQ_HEAD_INITIALIZER(mem_event_callback_list);
+static rte_rwlock_t mem_event_rwlock = RTE_RWLOCK_INITIALIZER;
+
+static struct mem_alloc_validator_entry_list mem_alloc_validator_list =
+ TAILQ_HEAD_INITIALIZER(mem_alloc_validator_list);
+static rte_rwlock_t mem_alloc_validator_rwlock = RTE_RWLOCK_INITIALIZER;
+
+static struct mem_event_callback_entry *
+find_mem_event_callback(const char *name, void *arg)
+{
+ struct mem_event_callback_entry *r;
+
+ TAILQ_FOREACH(r, &mem_event_callback_list, next) {
+ if (!strcmp(r->name, name) && r->arg == arg)
+ break;
+ }
+ return r;
+}
+
+static struct mem_alloc_validator_entry *
+find_mem_alloc_validator(const char *name, int socket_id)
+{
+ struct mem_alloc_validator_entry *r;
+
+ TAILQ_FOREACH(r, &mem_alloc_validator_list, next) {
+ if (!strcmp(r->name, name) && r->socket_id == socket_id)
+ break;
+ }
+ return r;
+}
+
+bool
+eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start,
+ size_t len)
+{
+ void *end, *aligned_start, *aligned_end;
+ size_t pgsz = (size_t)msl->page_sz;
+ const struct rte_memseg *ms;
+
+ /* for IOVA_VA, it's always contiguous */
+ if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ return true;
+
+ /* for legacy memory, it's always contiguous */
+ if (internal_config.legacy_mem)
+ return true;
+
+ end = RTE_PTR_ADD(start, len);
+
+ /* for nohuge, we check pagemap, otherwise check memseg */
+ if (!rte_eal_has_hugepages()) {
+ rte_iova_t cur, expected;
+
+ aligned_start = RTE_PTR_ALIGN_FLOOR(start, pgsz);
+ aligned_end = RTE_PTR_ALIGN_CEIL(end, pgsz);
+
+ /* if start and end are on the same page, bail out early */
+ if (RTE_PTR_DIFF(aligned_end, aligned_start) == pgsz)
+ return true;
+
+ /* skip first iteration */
+ cur = rte_mem_virt2iova(aligned_start);
+ expected = cur + pgsz;
+ aligned_start = RTE_PTR_ADD(aligned_start, pgsz);
+
+ while (aligned_start < aligned_end) {
+ cur = rte_mem_virt2iova(aligned_start);
+ if (cur != expected)
+ return false;
+ aligned_start = RTE_PTR_ADD(aligned_start, pgsz);
+ expected += pgsz;
+ }
+ } else {
+ int start_seg, end_seg, cur_seg;
+ rte_iova_t cur, expected;
+
+ aligned_start = RTE_PTR_ALIGN_FLOOR(start, pgsz);
+ aligned_end = RTE_PTR_ALIGN_CEIL(end, pgsz);
+
+ start_seg = RTE_PTR_DIFF(aligned_start, msl->base_va) /
+ pgsz;
+ end_seg = RTE_PTR_DIFF(aligned_end, msl->base_va) /
+ pgsz;
+
+ /* if start and end are on the same page, bail out early */
+ if (RTE_PTR_DIFF(aligned_end, aligned_start) == pgsz)
+ return true;
+
+ /* skip first iteration */
+ ms = rte_fbarray_get(&msl->memseg_arr, start_seg);
+ cur = ms->iova;
+ expected = cur + pgsz;
+
+ /* if we can't access IOVA addresses, assume non-contiguous */
+ if (cur == RTE_BAD_IOVA)
+ return false;
+
+ for (cur_seg = start_seg + 1; cur_seg < end_seg;
+ cur_seg++, expected += pgsz) {
+ ms = rte_fbarray_get(&msl->memseg_arr, cur_seg);
+
+ if (ms->iova != expected)
+ return false;
+ }
+ }
+ return true;
+}
+
+int
+eal_memalloc_mem_event_callback_register(const char *name,
+ rte_mem_event_callback_t clb, void *arg)
+{
+ struct mem_event_callback_entry *entry;
+ int ret, len;
+ if (name == NULL || clb == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ len = strnlen(name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ } else if (len == RTE_MEM_EVENT_CALLBACK_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ rte_rwlock_write_lock(&mem_event_rwlock);
+
+ entry = find_mem_event_callback(name, arg);
+ if (entry != NULL) {
+ rte_errno = EEXIST;
+ ret = -1;
+ goto unlock;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* callback successfully created and is valid, add it to the list */
+ entry->clb = clb;
+ entry->arg = arg;
+ strlcpy(entry->name, name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+ TAILQ_INSERT_TAIL(&mem_event_callback_list, entry, next);
+
+ ret = 0;
+
+ RTE_LOG(DEBUG, EAL, "Mem event callback '%s:%p' registered\n",
+ name, arg);
+
+unlock:
+ rte_rwlock_write_unlock(&mem_event_rwlock);
+ return ret;
+}
+
+int
+eal_memalloc_mem_event_callback_unregister(const char *name, void *arg)
+{
+ struct mem_event_callback_entry *entry;
+ int ret, len;
+
+ if (name == NULL) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ len = strnlen(name, RTE_MEM_EVENT_CALLBACK_NAME_LEN);
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ } else if (len == RTE_MEM_EVENT_CALLBACK_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ rte_rwlock_write_lock(&mem_event_rwlock);
+
+ entry = find_mem_event_callback(name, arg);
+ if (entry == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ TAILQ_REMOVE(&mem_event_callback_list, entry, next);
+ free(entry);
+
+ ret = 0;
+
+ RTE_LOG(DEBUG, EAL, "Mem event callback '%s:%p' unregistered\n",
+ name, arg);
+
+unlock:
+ rte_rwlock_write_unlock(&mem_event_rwlock);
+ return ret;
+}
+
+void
+eal_memalloc_mem_event_notify(enum rte_mem_event event, const void *start,
+ size_t len)
+{
+ struct mem_event_callback_entry *entry;
+
+ rte_rwlock_read_lock(&mem_event_rwlock);
+
+ TAILQ_FOREACH(entry, &mem_event_callback_list, next) {
+ RTE_LOG(DEBUG, EAL, "Calling mem event callback '%s:%p'\n",
+ entry->name, entry->arg);
+ entry->clb(event, start, len, entry->arg);
+ }
+
+ rte_rwlock_read_unlock(&mem_event_rwlock);
+}
+
+int
+eal_memalloc_mem_alloc_validator_register(const char *name,
+ rte_mem_alloc_validator_t clb, int socket_id, size_t limit)
+{
+ struct mem_alloc_validator_entry *entry;
+ int ret, len;
+ if (name == NULL || clb == NULL || socket_id < 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ len = strnlen(name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ } else if (len == RTE_MEM_ALLOC_VALIDATOR_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ rte_rwlock_write_lock(&mem_alloc_validator_rwlock);
+
+ entry = find_mem_alloc_validator(name, socket_id);
+ if (entry != NULL) {
+ rte_errno = EEXIST;
+ ret = -1;
+ goto unlock;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto unlock;
+ }
+
+ /* callback successfully created and is valid, add it to the list */
+ entry->clb = clb;
+ entry->socket_id = socket_id;
+ entry->limit = limit;
+ strlcpy(entry->name, name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+ TAILQ_INSERT_TAIL(&mem_alloc_validator_list, entry, next);
+
+ ret = 0;
+
+ RTE_LOG(DEBUG, EAL, "Mem alloc validator '%s' on socket %i with limit %zu registered\n",
+ name, socket_id, limit);
+
+unlock:
+ rte_rwlock_write_unlock(&mem_alloc_validator_rwlock);
+ return ret;
+}
+
+int
+eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id)
+{
+ struct mem_alloc_validator_entry *entry;
+ int ret, len;
+
+ if (name == NULL || socket_id < 0) {
+ rte_errno = EINVAL;
+ return -1;
+ }
+ len = strnlen(name, RTE_MEM_ALLOC_VALIDATOR_NAME_LEN);
+ if (len == 0) {
+ rte_errno = EINVAL;
+ return -1;
+ } else if (len == RTE_MEM_ALLOC_VALIDATOR_NAME_LEN) {
+ rte_errno = ENAMETOOLONG;
+ return -1;
+ }
+ rte_rwlock_write_lock(&mem_alloc_validator_rwlock);
+
+ entry = find_mem_alloc_validator(name, socket_id);
+ if (entry == NULL) {
+ rte_errno = ENOENT;
+ ret = -1;
+ goto unlock;
+ }
+ TAILQ_REMOVE(&mem_alloc_validator_list, entry, next);
+ free(entry);
+
+ ret = 0;
+
+ RTE_LOG(DEBUG, EAL, "Mem alloc validator '%s' on socket %i unregistered\n",
+ name, socket_id);
+
+unlock:
+ rte_rwlock_write_unlock(&mem_alloc_validator_rwlock);
+ return ret;
+}
+
+int
+eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len)
+{
+ struct mem_alloc_validator_entry *entry;
+ int ret = 0;
+
+ rte_rwlock_read_lock(&mem_alloc_validator_rwlock);
+
+ TAILQ_FOREACH(entry, &mem_alloc_validator_list, next) {
+ if (entry->socket_id != socket_id || entry->limit > new_len)
+ continue;
+ RTE_LOG(DEBUG, EAL, "Calling mem alloc validator '%s' on socket %i\n",
+ entry->name, entry->socket_id);
+ if (entry->clb(socket_id, entry->limit, new_len) < 0)
+ ret = -1;
+ }
+
+ rte_rwlock_read_unlock(&mem_alloc_validator_rwlock);
+
+ return ret;
+}
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 852f3bb9..fbfb1b05 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -2,82 +2,385 @@
* Copyright(c) 2010-2014 Intel Corporation
*/
+#include <errno.h>
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdarg.h>
+#include <string.h>
#include <unistd.h>
#include <inttypes.h>
#include <sys/mman.h>
#include <sys/queue.h>
+#include <rte_fbarray.h>
#include <rte_memory.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
+#include <rte_errno.h>
#include <rte_log.h>
+#include "eal_memalloc.h"
#include "eal_private.h"
#include "eal_internal_cfg.h"
/*
- * Return a pointer to a read-only table of struct rte_physmem_desc
- * elements, containing the layout of all addressable physical
- * memory. The last element of the table contains a NULL address.
+ * Try to mmap *size bytes in /dev/zero. If it is successful, return the
+ * pointer to the mmap'd area and keep *size unmodified. Else, retry
+ * with a smaller zone: decrease *size by hugepage_sz until it reaches
+ * 0. In this case, return NULL. Note: this function returns an address
+ * which is a multiple of hugepage size.
*/
-const struct rte_memseg *
-rte_eal_get_physmem_layout(void)
+
+#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
+
+static void *next_baseaddr;
+static uint64_t system_page_sz;
+
+void *
+eal_get_virtual_area(void *requested_addr, size_t *size,
+ size_t page_sz, int flags, int mmap_flags)
+{
+ bool addr_is_hint, allow_shrink, unmap, no_align;
+ uint64_t map_sz;
+ void *mapped_addr, *aligned_addr;
+
+ if (system_page_sz == 0)
+ system_page_sz = sysconf(_SC_PAGESIZE);
+
+ mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+
+ RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size);
+
+ addr_is_hint = (flags & EAL_VIRTUAL_AREA_ADDR_IS_HINT) > 0;
+ allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0;
+ unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0;
+
+ if (next_baseaddr == NULL && internal_config.base_virtaddr != 0 &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY)
+ next_baseaddr = (void *) internal_config.base_virtaddr;
+
+ if (requested_addr == NULL && next_baseaddr != NULL) {
+ requested_addr = next_baseaddr;
+ requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
+ addr_is_hint = true;
+ }
+
+ /* we don't need alignment of resulting pointer in the following cases:
+ *
+ * 1. page size is equal to system size
+ * 2. we have a requested address, and it is page-aligned, and we will
+ * be discarding the address if we get a different one.
+ *
+ * for all other cases, alignment is potentially necessary.
+ */
+ no_align = (requested_addr != NULL &&
+ requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) &&
+ !addr_is_hint) ||
+ page_sz == system_page_sz;
+
+ do {
+ map_sz = no_align ? *size : *size + page_sz;
+ if (map_sz > SIZE_MAX) {
+ RTE_LOG(ERR, EAL, "Map size too big\n");
+ rte_errno = E2BIG;
+ return NULL;
+ }
+
+ mapped_addr = mmap(requested_addr, (size_t)map_sz, PROT_READ,
+ mmap_flags, -1, 0);
+ if (mapped_addr == MAP_FAILED && allow_shrink)
+ *size -= page_sz;
+ } while (allow_shrink && mapped_addr == MAP_FAILED && *size > 0);
+
+ /* align resulting address - if map failed, we will ignore the value
+ * anyway, so no need to add additional checks.
+ */
+ aligned_addr = no_align ? mapped_addr :
+ RTE_PTR_ALIGN(mapped_addr, page_sz);
+
+ if (*size == 0) {
+ RTE_LOG(ERR, EAL, "Cannot get a virtual area of any size: %s\n",
+ strerror(errno));
+ rte_errno = errno;
+ return NULL;
+ } else if (mapped_addr == MAP_FAILED) {
+ RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n",
+ strerror(errno));
+ /* pass errno up the call chain */
+ rte_errno = errno;
+ return NULL;
+ } else if (requested_addr != NULL && !addr_is_hint &&
+ aligned_addr != requested_addr) {
+ RTE_LOG(ERR, EAL, "Cannot get a virtual area at requested address: %p (got %p)\n",
+ requested_addr, aligned_addr);
+ munmap(mapped_addr, map_sz);
+ rte_errno = EADDRNOTAVAIL;
+ return NULL;
+ } else if (requested_addr != NULL && addr_is_hint &&
+ aligned_addr != requested_addr) {
+ RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
+ requested_addr, aligned_addr);
+ RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory into secondary processes\n");
+ } else if (next_baseaddr != NULL) {
+ next_baseaddr = RTE_PTR_ADD(aligned_addr, *size);
+ }
+
+ RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
+ aligned_addr, *size);
+
+ if (unmap) {
+ munmap(mapped_addr, map_sz);
+ } else if (!no_align) {
+ void *map_end, *aligned_end;
+ size_t before_len, after_len;
+
+ /* when we reserve space with alignment, we add alignment to
+ * mapping size. On 32-bit, if 1GB alignment was requested, this
+ * would waste 1GB of address space, which is a luxury we cannot
+ * afford. so, if alignment was performed, check if any unneeded
+ * address space can be unmapped back.
+ */
+
+ map_end = RTE_PTR_ADD(mapped_addr, (size_t)map_sz);
+ aligned_end = RTE_PTR_ADD(aligned_addr, *size);
+
+ /* unmap space before aligned mmap address */
+ before_len = RTE_PTR_DIFF(aligned_addr, mapped_addr);
+ if (before_len > 0)
+ munmap(mapped_addr, before_len);
+
+ /* unmap space after aligned end mmap address */
+ after_len = RTE_PTR_DIFF(map_end, aligned_end);
+ if (after_len > 0)
+ munmap(aligned_end, after_len);
+ }
+
+ return aligned_addr;
+}
+
+static struct rte_memseg *
+virt2memseg(const void *addr, const struct rte_memseg_list *msl)
+{
+ const struct rte_fbarray *arr;
+ void *start, *end;
+ int ms_idx;
+
+ if (msl == NULL)
+ return NULL;
+
+ /* a memseg list was specified, check if it's the right one */
+ start = msl->base_va;
+ end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len);
+
+ if (addr < start || addr >= end)
+ return NULL;
+
+ /* now, calculate index */
+ arr = &msl->memseg_arr;
+ ms_idx = RTE_PTR_DIFF(addr, msl->base_va) / msl->page_sz;
+ return rte_fbarray_get(arr, ms_idx);
+}
+
+static struct rte_memseg_list *
+virt2memseg_list(const void *addr)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl;
+ int msl_idx;
+
+ for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
+ void *start, *end;
+ msl = &mcfg->memsegs[msl_idx];
+
+ start = msl->base_va;
+ end = RTE_PTR_ADD(start,
+ (size_t)msl->page_sz * msl->memseg_arr.len);
+ if (addr >= start && addr < end)
+ break;
+ }
+ /* if we didn't find our memseg list */
+ if (msl_idx == RTE_MAX_MEMSEG_LISTS)
+ return NULL;
+ return msl;
+}
+
+__rte_experimental struct rte_memseg_list *
+rte_mem_virt2memseg_list(const void *addr)
+{
+ return virt2memseg_list(addr);
+}
+
+struct virtiova {
+ rte_iova_t iova;
+ void *virt;
+};
+static int
+find_virt(const struct rte_memseg_list *msl __rte_unused,
+ const struct rte_memseg *ms, void *arg)
+{
+ struct virtiova *vi = arg;
+ if (vi->iova >= ms->iova && vi->iova < (ms->iova + ms->len)) {
+ size_t offset = vi->iova - ms->iova;
+ vi->virt = RTE_PTR_ADD(ms->addr, offset);
+ /* stop the walk */
+ return 1;
+ }
+ return 0;
+}
+static int
+find_virt_legacy(const struct rte_memseg_list *msl __rte_unused,
+ const struct rte_memseg *ms, size_t len, void *arg)
{
- return rte_eal_get_configuration()->mem_config->memseg;
+ struct virtiova *vi = arg;
+ if (vi->iova >= ms->iova && vi->iova < (ms->iova + len)) {
+ size_t offset = vi->iova - ms->iova;
+ vi->virt = RTE_PTR_ADD(ms->addr, offset);
+ /* stop the walk */
+ return 1;
+ }
+ return 0;
}
+__rte_experimental void *
+rte_mem_iova2virt(rte_iova_t iova)
+{
+ struct virtiova vi;
+
+ memset(&vi, 0, sizeof(vi));
+
+ vi.iova = iova;
+ /* for legacy mem, we can get away with scanning VA-contiguous segments,
+ * as we know they are PA-contiguous as well
+ */
+ if (internal_config.legacy_mem)
+ rte_memseg_contig_walk(find_virt_legacy, &vi);
+ else
+ rte_memseg_walk(find_virt, &vi);
+
+ return vi.virt;
+}
+
+__rte_experimental struct rte_memseg *
+rte_mem_virt2memseg(const void *addr, const struct rte_memseg_list *msl)
+{
+ return virt2memseg(addr, msl != NULL ? msl :
+ rte_mem_virt2memseg_list(addr));
+}
+
+static int
+physmem_size(const struct rte_memseg_list *msl, void *arg)
+{
+ uint64_t *total_len = arg;
+
+ *total_len += msl->memseg_arr.count * msl->page_sz;
+
+ return 0;
+}
/* get the total size of memory */
uint64_t
rte_eal_get_physmem_size(void)
{
- const struct rte_mem_config *mcfg;
- unsigned i = 0;
uint64_t total_len = 0;
- /* get pointer to global configuration */
- mcfg = rte_eal_get_configuration()->mem_config;
+ rte_memseg_list_walk(physmem_size, &total_len);
- for (i = 0; i < RTE_MAX_MEMSEG; i++) {
- if (mcfg->memseg[i].addr == NULL)
- break;
+ return total_len;
+}
- total_len += mcfg->memseg[i].len;
- }
+static int
+dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int msl_idx, ms_idx;
+ FILE *f = arg;
- return total_len;
+ msl_idx = msl - mcfg->memsegs;
+ if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
+ return -1;
+
+ ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+ if (ms_idx < 0)
+ return -1;
+
+ fprintf(f, "Segment %i-%i: IOVA:0x%"PRIx64", len:%zu, "
+ "virt:%p, socket_id:%"PRId32", "
+ "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
+ "nrank:%"PRIx32"\n",
+ msl_idx, ms_idx,
+ ms->iova,
+ ms->len,
+ ms->addr,
+ ms->socket_id,
+ ms->hugepage_sz,
+ ms->nchannel,
+ ms->nrank);
+
+ return 0;
}
-/* Dump the physical memory layout on console */
-void
-rte_dump_physmem_layout(FILE *f)
+/*
+ * Defining here because declared in rte_memory.h, but the actual implementation
+ * is in eal_common_memalloc.c, like all other memalloc internals.
+ */
+int __rte_experimental
+rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
+ void *arg)
{
- const struct rte_mem_config *mcfg;
- unsigned i = 0;
+ /* FreeBSD boots with legacy mem enabled by default */
+ if (internal_config.legacy_mem) {
+ RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ return eal_memalloc_mem_event_callback_register(name, clb, arg);
+}
- /* get pointer to global configuration */
- mcfg = rte_eal_get_configuration()->mem_config;
+int __rte_experimental
+rte_mem_event_callback_unregister(const char *name, void *arg)
+{
+ /* FreeBSD boots with legacy mem enabled by default */
+ if (internal_config.legacy_mem) {
+ RTE_LOG(DEBUG, EAL, "Registering mem event callbacks not supported\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ return eal_memalloc_mem_event_callback_unregister(name, arg);
+}
- for (i = 0; i < RTE_MAX_MEMSEG; i++) {
- if (mcfg->memseg[i].addr == NULL)
- break;
+int __rte_experimental
+rte_mem_alloc_validator_register(const char *name,
+ rte_mem_alloc_validator_t clb, int socket_id, size_t limit)
+{
+ /* FreeBSD boots with legacy mem enabled by default */
+ if (internal_config.legacy_mem) {
+ RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n");
+ rte_errno = ENOTSUP;
+ return -1;
+ }
+ return eal_memalloc_mem_alloc_validator_register(name, clb, socket_id,
+ limit);
+}
- fprintf(f, "Segment %u: IOVA:0x%"PRIx64", len:%zu, "
- "virt:%p, socket_id:%"PRId32", "
- "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
- "nrank:%"PRIx32"\n", i,
- mcfg->memseg[i].iova,
- mcfg->memseg[i].len,
- mcfg->memseg[i].addr,
- mcfg->memseg[i].socket_id,
- mcfg->memseg[i].hugepage_sz,
- mcfg->memseg[i].nchannel,
- mcfg->memseg[i].nrank);
+int __rte_experimental
+rte_mem_alloc_validator_unregister(const char *name, int socket_id)
+{
+ /* FreeBSD boots with legacy mem enabled by default */
+ if (internal_config.legacy_mem) {
+ RTE_LOG(DEBUG, EAL, "Registering mem alloc validators not supported\n");
+ rte_errno = ENOTSUP;
+ return -1;
}
+ return eal_memalloc_mem_alloc_validator_unregister(name, socket_id);
+}
+
+/* Dump the physical memory layout on console */
+void
+rte_dump_physmem_layout(FILE *f)
+{
+ rte_memseg_walk(dump_memseg, f);
}
/* return the number of memory channels */
@@ -117,20 +420,165 @@ rte_mem_lock_page(const void *virt)
return mlock((void *)aligned, page_size);
}
+int __rte_experimental
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, ms_idx, ret = 0;
+
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *msl = &mcfg->memsegs[i];
+ const struct rte_memseg *ms;
+ struct rte_fbarray *arr;
+
+ if (msl->memseg_arr.count == 0)
+ continue;
+
+ arr = &msl->memseg_arr;
+
+ ms_idx = rte_fbarray_find_next_used(arr, 0);
+ while (ms_idx >= 0) {
+ int n_segs;
+ size_t len;
+
+ ms = rte_fbarray_get(arr, ms_idx);
+
+ /* find how many more segments there are, starting with
+ * this one.
+ */
+ n_segs = rte_fbarray_find_contig_used(arr, ms_idx);
+ len = n_segs * msl->page_sz;
+
+ ret = func(msl, ms, len, arg);
+ if (ret)
+ return ret;
+ ms_idx = rte_fbarray_find_next_used(arr,
+ ms_idx + n_segs);
+ }
+ }
+ return 0;
+}
+
+int __rte_experimental
+rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int ret = 0;
+
+ /* do not allow allocations/frees/init while we iterate */
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ ret = rte_memseg_contig_walk_thread_unsafe(func, arg);
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
+}
+
+int __rte_experimental
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, ms_idx, ret = 0;
+
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *msl = &mcfg->memsegs[i];
+ const struct rte_memseg *ms;
+ struct rte_fbarray *arr;
+
+ if (msl->memseg_arr.count == 0)
+ continue;
+
+ arr = &msl->memseg_arr;
+
+ ms_idx = rte_fbarray_find_next_used(arr, 0);
+ while (ms_idx >= 0) {
+ ms = rte_fbarray_get(arr, ms_idx);
+ ret = func(msl, ms, arg);
+ if (ret)
+ return ret;
+ ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
+ }
+ }
+ return 0;
+}
+
+int __rte_experimental
+rte_memseg_walk(rte_memseg_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int ret = 0;
+
+ /* do not allow allocations/frees/init while we iterate */
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ ret = rte_memseg_walk_thread_unsafe(func, arg);
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
+}
+
+int __rte_experimental
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, ret = 0;
+
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *msl = &mcfg->memsegs[i];
+
+ if (msl->base_va == NULL)
+ continue;
+
+ ret = func(msl, arg);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+int __rte_experimental
+rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int ret = 0;
+
+ /* do not allow allocations/frees/init while we iterate */
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ ret = rte_memseg_list_walk_thread_unsafe(func, arg);
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
+}
+
/* init memory subsystem */
int
rte_eal_memory_init(void)
{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int retval;
RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n");
- const int retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
+ if (!mcfg)
+ return -1;
+
+ /* lock mem hotplug here, to prevent races while we init */
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
+ if (rte_eal_memseg_init() < 0)
+ goto fail;
+
+ if (eal_memalloc_init() < 0)
+ goto fail;
+
+ retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
rte_eal_hugepage_init() :
rte_eal_hugepage_attach();
if (retval < 0)
- return -1;
+ goto fail;
if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0)
- return -1;
+ goto fail;
return 0;
+fail:
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+ return -1;
}
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index 1ab3ade2..7300fe05 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -28,88 +28,49 @@
static inline const struct rte_memzone *
memzone_lookup_thread_unsafe(const char *name)
{
- const struct rte_mem_config *mcfg;
+ struct rte_mem_config *mcfg;
+ struct rte_fbarray *arr;
const struct rte_memzone *mz;
- unsigned i = 0;
+ int i = 0;
/* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
+ arr = &mcfg->memzones;
/*
* the algorithm is not optimal (linear), but there are few
* zones and this function should be called at init only
*/
- for (i = 0; i < RTE_MAX_MEMZONE; i++) {
- mz = &mcfg->memzone[i];
- if (mz->addr != NULL && !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE))
- return &mcfg->memzone[i];
+ i = rte_fbarray_find_next_used(arr, 0);
+ while (i >= 0) {
+ mz = rte_fbarray_get(arr, i);
+ if (mz->addr != NULL &&
+ !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE))
+ return mz;
+ i = rte_fbarray_find_next_used(arr, i + 1);
}
-
- return NULL;
-}
-
-static inline struct rte_memzone *
-get_next_free_memzone(void)
-{
- struct rte_mem_config *mcfg;
- unsigned i = 0;
-
- /* get pointer to global configuration */
- mcfg = rte_eal_get_configuration()->mem_config;
-
- for (i = 0; i < RTE_MAX_MEMZONE; i++) {
- if (mcfg->memzone[i].addr == NULL)
- return &mcfg->memzone[i];
- }
-
return NULL;
}
-/* This function will return the greatest free block if a heap has been
- * specified. If no heap has been specified, it will return the heap and
- * length of the greatest free block available in all heaps */
-static size_t
-find_heap_max_free_elem(int *s, unsigned align)
-{
- struct rte_mem_config *mcfg;
- struct rte_malloc_socket_stats stats;
- int i, socket = *s;
- size_t len = 0;
-
- /* get pointer to global configuration */
- mcfg = rte_eal_get_configuration()->mem_config;
-
- for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
- if ((socket != SOCKET_ID_ANY) && (socket != i))
- continue;
-
- malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats);
- if (stats.greatest_free_size > len) {
- len = stats.greatest_free_size;
- *s = i;
- }
- }
-
- if (len < MALLOC_ELEM_OVERHEAD + align)
- return 0;
-
- return len - MALLOC_ELEM_OVERHEAD - align;
-}
-
static const struct rte_memzone *
memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
- int socket_id, unsigned flags, unsigned align, unsigned bound)
+ int socket_id, unsigned int flags, unsigned int align,
+ unsigned int bound)
{
struct rte_memzone *mz;
struct rte_mem_config *mcfg;
+ struct rte_fbarray *arr;
+ void *mz_addr;
size_t requested_len;
- int socket, i;
+ int mz_idx;
+ bool contig;
/* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
+ arr = &mcfg->memzones;
/* no more room in config */
- if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) {
+ if (arr->count >= arr->len) {
RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__);
rte_errno = ENOSPC;
return NULL;
@@ -148,8 +109,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
return NULL;
}
- len += RTE_CACHE_LINE_MASK;
- len &= ~((size_t) RTE_CACHE_LINE_MASK);
+ len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE);
/* save minimal requested length */
requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len);
@@ -169,40 +129,22 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
if (!rte_eal_has_hugepages())
socket_id = SOCKET_ID_ANY;
- if (len == 0) {
- if (bound != 0)
- requested_len = bound;
- else {
- requested_len = find_heap_max_free_elem(&socket_id, align);
- if (requested_len == 0) {
- rte_errno = ENOMEM;
- return NULL;
- }
- }
- }
+ contig = (flags & RTE_MEMZONE_IOVA_CONTIG) != 0;
+ /* malloc only cares about size flags, remove contig flag from flags */
+ flags &= ~RTE_MEMZONE_IOVA_CONTIG;
- if (socket_id == SOCKET_ID_ANY)
- socket = malloc_get_numa_socket();
- else
- socket = socket_id;
-
- /* allocate memory on heap */
- void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL,
- requested_len, flags, align, bound);
-
- if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) {
- /* try other heaps */
- for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
- if (socket == i)
- continue;
-
- mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i],
- NULL, requested_len, flags, align, bound);
- if (mz_addr != NULL)
- break;
- }
+ if (len == 0 && bound == 0) {
+ /* no size constraints were placed, so use malloc elem len */
+ requested_len = 0;
+ mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags,
+ align, contig);
+ } else {
+ if (len == 0)
+ requested_len = bound;
+ /* allocate memory on heap */
+ mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,
+ flags, align, bound, contig);
}
-
if (mz_addr == NULL) {
rte_errno = ENOMEM;
return NULL;
@@ -211,33 +153,38 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
struct malloc_elem *elem = malloc_elem_from_data(mz_addr);
/* fill the zone in config */
- mz = get_next_free_memzone();
+ mz_idx = rte_fbarray_find_next_free(arr, 0);
+
+ if (mz_idx < 0) {
+ mz = NULL;
+ } else {
+ rte_fbarray_set_used(arr, mz_idx);
+ mz = rte_fbarray_get(arr, mz_idx);
+ }
if (mz == NULL) {
- RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room "
- "in config!\n", __func__);
- malloc_elem_free(elem);
+ RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone\n", __func__);
+ malloc_heap_free(elem);
rte_errno = ENOSPC;
return NULL;
}
- mcfg->memzone_cnt++;
snprintf(mz->name, sizeof(mz->name), "%s", name);
mz->iova = rte_malloc_virt2iova(mz_addr);
mz->addr = mz_addr;
- mz->len = (requested_len == 0 ? elem->size : requested_len);
- mz->hugepage_sz = elem->ms->hugepage_sz;
- mz->socket_id = elem->ms->socket_id;
+ mz->len = requested_len == 0 ?
+ elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
+ requested_len;
+ mz->hugepage_sz = elem->msl->page_sz;
+ mz->socket_id = elem->msl->socket_id;
mz->flags = 0;
- mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg;
return mz;
}
static const struct rte_memzone *
-rte_memzone_reserve_thread_safe(const char *name, size_t len,
- int socket_id, unsigned flags, unsigned align,
- unsigned bound)
+rte_memzone_reserve_thread_safe(const char *name, size_t len, int socket_id,
+ unsigned int flags, unsigned int align, unsigned int bound)
{
struct rte_mem_config *mcfg;
const struct rte_memzone *mz = NULL;
@@ -296,34 +243,38 @@ int
rte_memzone_free(const struct rte_memzone *mz)
{
struct rte_mem_config *mcfg;
+ struct rte_fbarray *arr;
+ struct rte_memzone *found_mz;
int ret = 0;
- void *addr;
+ void *addr = NULL;
unsigned idx;
if (mz == NULL)
return -EINVAL;
mcfg = rte_eal_get_configuration()->mem_config;
+ arr = &mcfg->memzones;
rte_rwlock_write_lock(&mcfg->mlock);
- idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone);
- idx = idx / sizeof(struct rte_memzone);
+ idx = rte_fbarray_find_idx(arr, mz);
+ found_mz = rte_fbarray_get(arr, idx);
- addr = mcfg->memzone[idx].addr;
- if (addr == NULL)
+ if (found_mz == NULL) {
+ ret = -EINVAL;
+ } else if (found_mz->addr == NULL) {
+ RTE_LOG(ERR, EAL, "Memzone is not allocated\n");
ret = -EINVAL;
- else if (mcfg->memzone_cnt == 0) {
- rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!\n",
- __func__);
} else {
- memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx]));
- mcfg->memzone_cnt--;
+ addr = found_mz->addr;
+ memset(found_mz, 0, sizeof(*found_mz));
+ rte_fbarray_set_free(arr, idx);
}
rte_rwlock_write_unlock(&mcfg->mlock);
- rte_free(addr);
+ if (addr != NULL)
+ rte_free(addr);
return ret;
}
@@ -348,31 +299,61 @@ rte_memzone_lookup(const char *name)
return memzone;
}
+static void
+dump_memzone(const struct rte_memzone *mz, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *msl = NULL;
+ void *cur_addr, *mz_end;
+ struct rte_memseg *ms;
+ int mz_idx, ms_idx;
+ size_t page_sz;
+ FILE *f = arg;
+
+ mz_idx = rte_fbarray_find_idx(&mcfg->memzones, mz);
+
+ fprintf(f, "Zone %u: name:<%s>, len:0x%zx, virt:%p, "
+ "socket_id:%"PRId32", flags:%"PRIx32"\n",
+ mz_idx,
+ mz->name,
+ mz->len,
+ mz->addr,
+ mz->socket_id,
+ mz->flags);
+
+ /* go through each page occupied by this memzone */
+ msl = rte_mem_virt2memseg_list(mz->addr);
+ if (!msl) {
+ RTE_LOG(DEBUG, EAL, "Skipping bad memzone\n");
+ return;
+ }
+ page_sz = (size_t)mz->hugepage_sz;
+ cur_addr = RTE_PTR_ALIGN_FLOOR(mz->addr, page_sz);
+ mz_end = RTE_PTR_ADD(cur_addr, mz->len);
+
+ fprintf(f, "physical segments used:\n");
+ ms_idx = RTE_PTR_DIFF(mz->addr, msl->base_va) / page_sz;
+ ms = rte_fbarray_get(&msl->memseg_arr, ms_idx);
+
+ do {
+ fprintf(f, " addr: %p iova: 0x%" PRIx64 " "
+ "len: 0x%zx "
+ "pagesz: 0x%zx\n",
+ cur_addr, ms->iova, ms->len, page_sz);
+
+ /* advance VA to next page */
+ cur_addr = RTE_PTR_ADD(cur_addr, page_sz);
+
+ /* memzones occupy contiguous segments */
+ ++ms;
+ } while (cur_addr < mz_end);
+}
+
/* Dump all reserved memory zones on console */
void
rte_memzone_dump(FILE *f)
{
- struct rte_mem_config *mcfg;
- unsigned i = 0;
-
- /* get pointer to global configuration */
- mcfg = rte_eal_get_configuration()->mem_config;
-
- rte_rwlock_read_lock(&mcfg->mlock);
- /* dump all zones */
- for (i=0; i<RTE_MAX_MEMZONE; i++) {
- if (mcfg->memzone[i].addr == NULL)
- break;
- fprintf(f, "Zone %u: name:<%s>, IO:0x%"PRIx64", len:0x%zx"
- ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i,
- mcfg->memzone[i].name,
- mcfg->memzone[i].iova,
- mcfg->memzone[i].len,
- mcfg->memzone[i].addr,
- mcfg->memzone[i].socket_id,
- mcfg->memzone[i].flags);
- }
- rte_rwlock_read_unlock(&mcfg->mlock);
+ rte_memzone_walk(dump_memzone, f);
}
/*
@@ -382,30 +363,27 @@ int
rte_eal_memzone_init(void)
{
struct rte_mem_config *mcfg;
- const struct rte_memseg *memseg;
/* get pointer to global configuration */
mcfg = rte_eal_get_configuration()->mem_config;
- /* secondary processes don't need to initialise anything */
- if (rte_eal_process_type() == RTE_PROC_SECONDARY)
- return 0;
+ rte_rwlock_write_lock(&mcfg->mlock);
- memseg = rte_eal_get_physmem_layout();
- if (memseg == NULL) {
- RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__);
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
+ rte_fbarray_init(&mcfg->memzones, "memzone",
+ RTE_MAX_MEMZONE, sizeof(struct rte_memzone))) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memzone list\n");
+ return -1;
+ } else if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
+ rte_fbarray_attach(&mcfg->memzones)) {
+ RTE_LOG(ERR, EAL, "Cannot attach to memzone list\n");
+ rte_rwlock_write_unlock(&mcfg->mlock);
return -1;
}
- rte_rwlock_write_lock(&mcfg->mlock);
-
- /* delete all zones */
- mcfg->memzone_cnt = 0;
- memset(mcfg->memzone, 0, sizeof(mcfg->memzone));
-
rte_rwlock_write_unlock(&mcfg->mlock);
- return rte_eal_malloc_heap_init();
+ return 0;
}
/* Walk all reserved memory zones */
@@ -413,14 +391,18 @@ void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
void *arg)
{
struct rte_mem_config *mcfg;
- unsigned i;
+ struct rte_fbarray *arr;
+ int i;
mcfg = rte_eal_get_configuration()->mem_config;
+ arr = &mcfg->memzones;
rte_rwlock_read_lock(&mcfg->mlock);
- for (i=0; i<RTE_MAX_MEMZONE; i++) {
- if (mcfg->memzone[i].addr != NULL)
- (*func)(&mcfg->memzone[i], arg);
+ i = rte_fbarray_find_next_used(arr, 0);
+ while (i >= 0) {
+ struct rte_memzone *mz = rte_fbarray_get(arr, i);
+ (*func)(mz, arg);
+ i = rte_fbarray_find_next_used(arr, i + 1);
}
rte_rwlock_read_unlock(&mcfg->mlock);
}
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index 9f2f8d25..dd5f9740 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -27,6 +27,7 @@
#include "eal_internal_cfg.h"
#include "eal_options.h"
#include "eal_filesystem.h"
+#include "eal_private.h"
#define BITS_PER_HEX 4
#define LCORE_OPT_LST 1
@@ -65,14 +66,18 @@ eal_long_options[] = {
{OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM },
{OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM },
{OPT_NO_SHCONF, 0, NULL, OPT_NO_SHCONF_NUM },
+ {OPT_IN_MEMORY, 0, NULL, OPT_IN_MEMORY_NUM },
{OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM },
{OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM },
{OPT_PROC_TYPE, 1, NULL, OPT_PROC_TYPE_NUM },
{OPT_SOCKET_MEM, 1, NULL, OPT_SOCKET_MEM_NUM },
+ {OPT_SOCKET_LIMIT, 1, NULL, OPT_SOCKET_LIMIT_NUM },
{OPT_SYSLOG, 1, NULL, OPT_SYSLOG_NUM },
{OPT_VDEV, 1, NULL, OPT_VDEV_NUM },
{OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
{OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM },
+ {OPT_LEGACY_MEM, 0, NULL, OPT_LEGACY_MEM_NUM },
+ {OPT_SINGLE_FILE_SEGMENTS, 0, NULL, OPT_SINGLE_FILE_SEGMENTS_NUM},
{0, 0, NULL, 0 }
};
@@ -151,7 +156,7 @@ eal_option_device_parse(void)
TAILQ_FOREACH_SAFE(devopt, &devopt_list, next, tmp) {
if (ret == 0) {
- ret = rte_eal_devargs_add(devopt->type, devopt->arg);
+ ret = rte_devargs_add(devopt->type, devopt->arg);
if (ret)
RTE_LOG(ERR, EAL, "Unable to parse device '%s'\n",
devopt->arg);
@@ -176,9 +181,16 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
/* zero out the NUMA config */
for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
internal_cfg->socket_mem[i] = 0;
+ internal_cfg->force_socket_limits = 0;
+ /* zero out the NUMA limits config */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ internal_cfg->socket_limit[i] = 0;
/* zero out hugedir descriptors */
- for (i = 0; i < MAX_HUGEPAGE_SIZES; i++)
+ for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) {
+ memset(&internal_cfg->hugepage_info[i], 0,
+ sizeof(internal_cfg->hugepage_info[0]));
internal_cfg->hugepage_info[i].lock_descriptor = -1;
+ }
internal_cfg->base_virtaddr = 0;
internal_cfg->syslog_facility = LOG_DAEMON;
@@ -194,6 +206,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
internal_cfg->vmware_tsc_map = 0;
internal_cfg->create_uio_dev = 0;
internal_cfg->user_mbuf_pool_ops_name = NULL;
+ internal_cfg->init_complete = 0;
}
static int
@@ -308,6 +321,7 @@ eal_parse_service_coremask(const char *coremask)
unsigned int count = 0;
char c;
int val;
+ uint32_t taken_lcore_count = 0;
if (coremask == NULL)
return -1;
@@ -341,7 +355,7 @@ eal_parse_service_coremask(const char *coremask)
if (master_lcore_parsed &&
cfg->master_lcore == lcore) {
RTE_LOG(ERR, EAL,
- "Error: lcore %u is master lcore, cannot use as service core\n",
+ "lcore %u is master lcore, cannot use as service core\n",
idx);
return -1;
}
@@ -351,6 +365,10 @@ eal_parse_service_coremask(const char *coremask)
"lcore %u unavailable\n", idx);
return -1;
}
+
+ if (cfg->lcore_role[idx] == ROLE_RTE)
+ taken_lcore_count++;
+
lcore_config[idx].core_role = ROLE_SERVICE;
count++;
}
@@ -367,11 +385,28 @@ eal_parse_service_coremask(const char *coremask)
if (count == 0)
return -1;
+ if (core_parsed && taken_lcore_count != count) {
+ RTE_LOG(WARNING, EAL,
+ "Not all service cores are in the coremask. "
+ "Please ensure -c or -l includes service cores\n");
+ }
+
cfg->service_lcore_count = count;
return 0;
}
static int
+eal_service_cores_parsed(void)
+{
+ int idx;
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+ if (lcore_config[idx].core_role == ROLE_SERVICE)
+ return 1;
+ }
+ return 0;
+}
+
+static int
eal_parse_coremask(const char *coremask)
{
struct rte_config *cfg = rte_eal_get_configuration();
@@ -380,6 +415,11 @@ eal_parse_coremask(const char *coremask)
char c;
int val;
+ if (eal_service_cores_parsed())
+ RTE_LOG(WARNING, EAL,
+ "Service cores parsed before dataplane cores. "
+ "Please ensure -c is before -s or -S\n");
+
if (coremask == NULL)
return -1;
/* Remove all blank characters ahead and after .
@@ -411,6 +451,7 @@ eal_parse_coremask(const char *coremask)
"unavailable\n", idx);
return -1;
}
+
cfg->lcore_role[idx] = ROLE_RTE;
lcore_config[idx].core_index = count;
count++;
@@ -442,6 +483,7 @@ eal_parse_service_corelist(const char *corelist)
unsigned count = 0;
char *end = NULL;
int min, max;
+ uint32_t taken_lcore_count = 0;
if (corelist == NULL)
return -1;
@@ -483,6 +525,9 @@ eal_parse_service_corelist(const char *corelist)
idx);
return -1;
}
+ if (cfg->lcore_role[idx] == ROLE_RTE)
+ taken_lcore_count++;
+
lcore_config[idx].core_role =
ROLE_SERVICE;
count++;
@@ -497,6 +542,12 @@ eal_parse_service_corelist(const char *corelist)
if (count == 0)
return -1;
+ if (core_parsed && taken_lcore_count != count) {
+ RTE_LOG(WARNING, EAL,
+ "Not all service cores were in the coremask. "
+ "Please ensure -c or -l includes service cores\n");
+ }
+
return 0;
}
@@ -509,6 +560,11 @@ eal_parse_corelist(const char *corelist)
char *end = NULL;
int min, max;
+ if (eal_service_cores_parsed())
+ RTE_LOG(WARNING, EAL,
+ "Service cores parsed before dataplane cores. "
+ "Please ensure -l is before -s or -S\n");
+
if (corelist == NULL)
return -1;
@@ -583,7 +639,8 @@ eal_parse_master_lcore(const char *arg)
/* ensure master core is not used as service core */
if (lcore_config[cfg->master_lcore].core_role == ROLE_SERVICE) {
- RTE_LOG(ERR, EAL, "Error: Master lcore is used as a service core.\n");
+ RTE_LOG(ERR, EAL,
+ "Error: Master lcore is used as a service core\n");
return -1;
}
@@ -875,7 +932,7 @@ static int
eal_parse_syslog(const char *facility, struct internal_config *conf)
{
int i;
- static struct {
+ static const struct {
const char *name;
int value;
} map[] = {
@@ -911,43 +968,92 @@ eal_parse_syslog(const char *facility, struct internal_config *conf)
}
static int
-eal_parse_log_level(const char *arg)
+eal_parse_log_priority(const char *level)
{
- char *end, *str, *type, *level;
+ static const char * const levels[] = {
+ [RTE_LOG_EMERG] = "emergency",
+ [RTE_LOG_ALERT] = "alert",
+ [RTE_LOG_CRIT] = "critical",
+ [RTE_LOG_ERR] = "error",
+ [RTE_LOG_WARNING] = "warning",
+ [RTE_LOG_NOTICE] = "notice",
+ [RTE_LOG_INFO] = "info",
+ [RTE_LOG_DEBUG] = "debug",
+ };
+ size_t len = strlen(level);
unsigned long tmp;
+ char *end;
+ unsigned int i;
- str = strdup(arg);
- if (str == NULL)
+ if (len == 0)
return -1;
- if (strchr(str, ',') == NULL) {
- type = NULL;
- level = str;
- } else {
- type = strsep(&str, ",");
- level = strsep(&str, ",");
+ /* look for named values, skip 0 which is not a valid level */
+ for (i = 1; i < RTE_DIM(levels); i++) {
+ if (strncmp(levels[i], level, len) == 0)
+ return i;
}
+ /* not a string, maybe it is numeric */
errno = 0;
tmp = strtoul(level, &end, 0);
/* check for errors */
- if ((errno != 0) || (level[0] == '\0') ||
- end == NULL || (*end != '\0'))
- goto fail;
+ if (errno != 0 || end == NULL || *end != '\0' ||
+ tmp >= UINT32_MAX)
+ return -1;
- /* log_level is a uint32_t */
- if (tmp >= UINT32_MAX)
- goto fail;
+ return tmp;
+}
+
+static int
+eal_parse_log_level(const char *arg)
+{
+ const char *pattern = NULL;
+ const char *regex = NULL;
+ char *str, *level;
+ int priority;
+
+ str = strdup(arg);
+ if (str == NULL)
+ return -1;
- if (type == NULL) {
- rte_log_set_global_level(tmp);
- } else if (rte_log_set_level_regexp(type, tmp) < 0) {
- printf("cannot set log level %s,%lu\n",
- type, tmp);
+ if ((level = strchr(str, ','))) {
+ regex = str;
+ *level++ = '\0';
+ } else if ((level = strchr(str, ':'))) {
+ pattern = str;
+ *level++ = '\0';
+ } else {
+ level = str;
+ }
+
+ priority = eal_parse_log_priority(level);
+ if (priority < 0) {
+ fprintf(stderr, "invalid log priority: %s\n", level);
goto fail;
}
+ if (regex) {
+ if (rte_log_set_level_regexp(regex, priority) < 0) {
+ fprintf(stderr, "cannot set log level %s,%d\n",
+ pattern, priority);
+ goto fail;
+ }
+ if (rte_log_save_regexp(regex, priority) < 0)
+ goto fail;
+ } else if (pattern) {
+ if (rte_log_set_level_pattern(pattern, priority) < 0) {
+ fprintf(stderr, "cannot set log level %s:%d\n",
+ pattern, priority);
+ goto fail;
+ }
+ if (rte_log_save_pattern(pattern, priority) < 0)
+ goto fail;
+ } else {
+ rte_log_set_global_level(priority);
+ }
+
free(str);
return 0;
@@ -1089,6 +1195,8 @@ eal_parse_common_option(int opt, const char *optarg,
case OPT_NO_HUGE_NUM:
conf->no_hugetlbfs = 1;
+ /* no-huge is legacy mem */
+ conf->legacy_mem = 1;
break;
case OPT_NO_PCI_NUM:
@@ -1107,6 +1215,13 @@ eal_parse_common_option(int opt, const char *optarg,
conf->no_shconf = 1;
break;
+ case OPT_IN_MEMORY_NUM:
+ conf->in_memory = 1;
+ /* in-memory is a superset of noshconf and huge-unlink */
+ conf->no_shconf = 1;
+ conf->hugepage_unlink = 1;
+ break;
+
case OPT_PROC_TYPE_NUM:
conf->process_type = eal_parse_proc_type(optarg);
break;
@@ -1160,6 +1275,12 @@ eal_parse_common_option(int opt, const char *optarg,
core_parsed = LCORE_OPT_MAP;
break;
+ case OPT_LEGACY_MEM_NUM:
+ conf->legacy_mem = 1;
+ break;
+ case OPT_SINGLE_FILE_SEGMENTS_NUM:
+ conf->single_file_segments = 1;
+ break;
/* don't know what to do, leave this to caller */
default:
@@ -1252,12 +1373,23 @@ eal_check_common_options(struct internal_config *internal_cfg)
"be specified together with --"OPT_NO_HUGE"\n");
return -1;
}
-
- if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) {
+ if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink &&
+ !internal_cfg->in_memory) {
RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
"be specified together with --"OPT_NO_HUGE"\n");
return -1;
}
+ if (internal_config.force_socket_limits && internal_config.legacy_mem) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_LIMIT
+ " is only supported in non-legacy memory mode\n");
+ }
+ if (internal_cfg->single_file_segments &&
+ internal_cfg->hugepage_unlink) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE_SEGMENTS" is "
+ "not compatible with neither --"OPT_IN_MEMORY" nor "
+ "--"OPT_HUGE_UNLINK"\n");
+ return -1;
+ }
return 0;
}
@@ -1302,10 +1434,12 @@ eal_common_usage(void)
" --"OPT_PROC_TYPE" Type of this process (primary|secondary|auto)\n"
" --"OPT_SYSLOG" Set syslog facility\n"
" --"OPT_LOG_LEVEL"=<int> Set global log level\n"
- " --"OPT_LOG_LEVEL"=<type-regexp>,<int>\n"
+ " --"OPT_LOG_LEVEL"=<type-match>:<int>\n"
" Set specific log level\n"
" -v Display version information on startup\n"
" -h, --help This help\n"
+ " --"OPT_IN_MEMORY" Operate entirely in memory. This will\n"
+ " disable secondary process support\n"
"\nEAL options for DEBUG use only:\n"
" --"OPT_HUGE_UNLINK" Unlink hugepage files after init\n"
" --"OPT_NO_HUGE" Use malloc instead of hugetlbfs\n"
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index caa8774a..9fcb9121 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -13,18 +13,21 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/file.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <unistd.h>
+#include <rte_alarm.h>
#include <rte_common.h>
#include <rte_cycles.h>
#include <rte_eal.h>
#include <rte_errno.h>
#include <rte_lcore.h>
#include <rte_log.h>
+#include <rte_tailq.h>
#include "eal_private.h"
#include "eal_filesystem.h"
@@ -51,6 +54,7 @@ enum mp_type {
MP_MSG, /* Share message with peers, will not block */
MP_REQ, /* Request for information, Will block for a reply */
MP_REP, /* Response to previously-received request */
+ MP_IGN, /* Response telling requester to ignore this response */
};
struct mp_msg_internal {
@@ -58,31 +62,66 @@ struct mp_msg_internal {
struct rte_mp_msg msg;
};
-struct sync_request {
- TAILQ_ENTRY(sync_request) next;
- int reply_received;
+struct async_request_param {
+ rte_mp_async_reply_t clb;
+ struct rte_mp_reply user_reply;
+ struct timespec end;
+ int n_responses_processed;
+};
+
+struct pending_request {
+ TAILQ_ENTRY(pending_request) next;
+ enum {
+ REQUEST_TYPE_SYNC,
+ REQUEST_TYPE_ASYNC
+ } type;
char dst[PATH_MAX];
struct rte_mp_msg *request;
struct rte_mp_msg *reply;
- pthread_cond_t cond;
+ int reply_received;
+ RTE_STD_C11
+ union {
+ struct {
+ struct async_request_param *param;
+ } async;
+ struct {
+ pthread_cond_t cond;
+ } sync;
+ };
};
-TAILQ_HEAD(sync_request_list, sync_request);
+TAILQ_HEAD(pending_request_list, pending_request);
static struct {
- struct sync_request_list requests;
+ struct pending_request_list requests;
pthread_mutex_t lock;
-} sync_requests = {
- .requests = TAILQ_HEAD_INITIALIZER(sync_requests.requests),
- .lock = PTHREAD_MUTEX_INITIALIZER
+} pending_requests = {
+ .requests = TAILQ_HEAD_INITIALIZER(pending_requests.requests),
+ .lock = PTHREAD_MUTEX_INITIALIZER,
+ /**< used in async requests only */
};
-static struct sync_request *
-find_sync_request(const char *dst, const char *act_name)
+/* forward declarations */
+static int
+mp_send(struct rte_mp_msg *msg, const char *peer, int type);
+
+/* for use with alarm callback */
+static void
+async_reply_handle(void *arg);
+
+/* for use with process_msg */
+static struct pending_request *
+async_reply_handle_thread_unsafe(void *arg);
+
+static void
+trigger_async_action(struct pending_request *req);
+
+static struct pending_request *
+find_pending_request(const char *dst, const char *act_name)
{
- struct sync_request *r;
+ struct pending_request *r;
- TAILQ_FOREACH(r, &sync_requests.requests, next) {
+ TAILQ_FOREACH(r, &pending_requests.requests, next) {
if (!strcmp(r->dst, dst) &&
!strcmp(r->request->name, act_name))
break;
@@ -91,6 +130,17 @@ find_sync_request(const char *dst, const char *act_name)
return r;
}
+static void
+create_socket_path(const char *name, char *buf, int len)
+{
+ const char *prefix = eal_mp_socket_path();
+
+ if (strlen(name) > 0)
+ snprintf(buf, len, "%s_%s", prefix, name);
+ else
+ strlcpy(buf, prefix, len);
+}
+
int
rte_eal_primary_proc_alive(const char *config_file_path)
{
@@ -159,7 +209,7 @@ rte_mp_action_register(const char *name, rte_mp_t action)
rte_errno = ENOMEM;
return -1;
}
- strcpy(entry->action_name, name);
+ strlcpy(entry->action_name, name, sizeof(entry->action_name));
entry->action = action;
pthread_mutex_lock(&mp_mutex_action);
@@ -241,23 +291,35 @@ read_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
static void
process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
{
- struct sync_request *sync_req;
+ struct pending_request *pending_req;
struct action_entry *entry;
struct rte_mp_msg *msg = &m->msg;
rte_mp_t action = NULL;
RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name);
- if (m->type == MP_REP) {
- pthread_mutex_lock(&sync_requests.lock);
- sync_req = find_sync_request(s->sun_path, msg->name);
- if (sync_req) {
- memcpy(sync_req->reply, msg, sizeof(*msg));
- sync_req->reply_received = 1;
- pthread_cond_signal(&sync_req->cond);
+ if (m->type == MP_REP || m->type == MP_IGN) {
+ struct pending_request *req = NULL;
+
+ pthread_mutex_lock(&pending_requests.lock);
+ pending_req = find_pending_request(s->sun_path, msg->name);
+ if (pending_req) {
+ memcpy(pending_req->reply, msg, sizeof(*msg));
+ /* -1 indicates that we've been asked to ignore */
+ pending_req->reply_received =
+ m->type == MP_REP ? 1 : -1;
+
+ if (pending_req->type == REQUEST_TYPE_SYNC)
+ pthread_cond_signal(&pending_req->sync.cond);
+ else if (pending_req->type == REQUEST_TYPE_ASYNC)
+ req = async_reply_handle_thread_unsafe(
+ pending_req);
} else
RTE_LOG(ERR, EAL, "Drop mp reply: %s\n", msg->name);
- pthread_mutex_unlock(&sync_requests.lock);
+ pthread_mutex_unlock(&pending_requests.lock);
+
+ if (req != NULL)
+ trigger_async_action(req);
return;
}
@@ -267,10 +329,25 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
action = entry->action;
pthread_mutex_unlock(&mp_mutex_action);
- if (!action)
- RTE_LOG(ERR, EAL, "Cannot find action: %s\n", msg->name);
- else if (action(msg, s->sun_path) < 0)
+ if (!action) {
+ if (m->type == MP_REQ && !internal_config.init_complete) {
+ /* if this is a request, and init is not yet complete,
+ * and callback wasn't registered, we should tell the
+ * requester to ignore our existence because we're not
+ * yet ready to process this request.
+ */
+ struct rte_mp_msg dummy;
+
+ memset(&dummy, 0, sizeof(dummy));
+ strlcpy(dummy.name, msg->name, sizeof(dummy.name));
+ mp_send(&dummy, s->sun_path, MP_IGN);
+ } else {
+ RTE_LOG(ERR, EAL, "Cannot find action: %s\n",
+ msg->name);
+ }
+ } else if (action(msg, s->sun_path) < 0) {
RTE_LOG(ERR, EAL, "Fail to handle message: %s\n", msg->name);
+ }
}
static void *
@@ -288,10 +365,158 @@ mp_handle(void *arg __rte_unused)
}
static int
+timespec_cmp(const struct timespec *a, const struct timespec *b)
+{
+ if (a->tv_sec < b->tv_sec)
+ return -1;
+ if (a->tv_sec > b->tv_sec)
+ return 1;
+ if (a->tv_nsec < b->tv_nsec)
+ return -1;
+ if (a->tv_nsec > b->tv_nsec)
+ return 1;
+ return 0;
+}
+
+enum async_action {
+ ACTION_FREE, /**< free the action entry, but don't trigger callback */
+ ACTION_TRIGGER /**< trigger callback, then free action entry */
+};
+
+static enum async_action
+process_async_request(struct pending_request *sr, const struct timespec *now)
+{
+ struct async_request_param *param;
+ struct rte_mp_reply *reply;
+ bool timeout, last_msg;
+
+ param = sr->async.param;
+ reply = &param->user_reply;
+
+ /* did we timeout? */
+ timeout = timespec_cmp(&param->end, now) <= 0;
+
+ /* if we received a response, adjust relevant data and copy mesasge. */
+ if (sr->reply_received == 1 && sr->reply) {
+ struct rte_mp_msg *msg, *user_msgs, *tmp;
+
+ msg = sr->reply;
+ user_msgs = reply->msgs;
+
+ tmp = realloc(user_msgs, sizeof(*msg) *
+ (reply->nb_received + 1));
+ if (!tmp) {
+ RTE_LOG(ERR, EAL, "Fail to alloc reply for request %s:%s\n",
+ sr->dst, sr->request->name);
+ /* this entry is going to be removed and its message
+ * dropped, but we don't want to leak memory, so
+ * continue.
+ */
+ } else {
+ user_msgs = tmp;
+ reply->msgs = user_msgs;
+ memcpy(&user_msgs[reply->nb_received],
+ msg, sizeof(*msg));
+ reply->nb_received++;
+ }
+
+ /* mark this request as processed */
+ param->n_responses_processed++;
+ } else if (sr->reply_received == -1) {
+ /* we were asked to ignore this process */
+ reply->nb_sent--;
+ } else if (timeout) {
+ /* count it as processed response, but don't increment
+ * nb_received.
+ */
+ param->n_responses_processed++;
+ }
+
+ free(sr->reply);
+
+ last_msg = param->n_responses_processed == reply->nb_sent;
+
+ return last_msg ? ACTION_TRIGGER : ACTION_FREE;
+}
+
+static void
+trigger_async_action(struct pending_request *sr)
+{
+ struct async_request_param *param;
+ struct rte_mp_reply *reply;
+
+ param = sr->async.param;
+ reply = &param->user_reply;
+
+ param->clb(sr->request, reply);
+
+ /* clean up */
+ free(sr->async.param->user_reply.msgs);
+ free(sr->async.param);
+ free(sr->request);
+ free(sr);
+}
+
+static struct pending_request *
+async_reply_handle_thread_unsafe(void *arg)
+{
+ struct pending_request *req = (struct pending_request *)arg;
+ enum async_action action;
+ struct timespec ts_now;
+ struct timeval now;
+
+ if (gettimeofday(&now, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot get current time\n");
+ goto no_trigger;
+ }
+ ts_now.tv_nsec = now.tv_usec * 1000;
+ ts_now.tv_sec = now.tv_sec;
+
+ action = process_async_request(req, &ts_now);
+
+ TAILQ_REMOVE(&pending_requests.requests, req, next);
+
+ if (rte_eal_alarm_cancel(async_reply_handle, req) < 0) {
+ /* if we failed to cancel the alarm because it's already in
+ * progress, don't proceed because otherwise we will end up
+ * handling the same message twice.
+ */
+ if (rte_errno == EINPROGRESS) {
+ RTE_LOG(DEBUG, EAL, "Request handling is already in progress\n");
+ goto no_trigger;
+ }
+ RTE_LOG(ERR, EAL, "Failed to cancel alarm\n");
+ }
+
+ if (action == ACTION_TRIGGER)
+ return req;
+no_trigger:
+ free(req);
+ return NULL;
+}
+
+static void
+async_reply_handle(void *arg)
+{
+ struct pending_request *req;
+
+ pthread_mutex_lock(&pending_requests.lock);
+ req = async_reply_handle_thread_unsafe(arg);
+ pthread_mutex_unlock(&pending_requests.lock);
+
+ if (req != NULL)
+ trigger_async_action(req);
+}
+
+static int
open_socket_fd(void)
{
+ char peer_name[PATH_MAX] = {0};
struct sockaddr_un un;
- const char *prefix = eal_mp_socket_path();
+
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+ snprintf(peer_name, sizeof(peer_name),
+ "%d_%"PRIx64, getpid(), rte_rdtsc());
mp_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
if (mp_fd < 0) {
@@ -301,13 +526,11 @@ open_socket_fd(void)
memset(&un, 0, sizeof(un));
un.sun_family = AF_UNIX;
- if (rte_eal_process_type() == RTE_PROC_PRIMARY)
- snprintf(un.sun_path, sizeof(un.sun_path), "%s", prefix);
- else {
- snprintf(un.sun_path, sizeof(un.sun_path), "%s_%d_%"PRIx64,
- prefix, getpid(), rte_rdtsc());
- }
+
+ create_socket_path(peer_name, un.sun_path, sizeof(un.sun_path));
+
unlink(un.sun_path); /* May still exist since last run */
+
if (bind(mp_fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
RTE_LOG(ERR, EAL, "failed to bind %s: %s\n",
un.sun_path, strerror(errno));
@@ -342,54 +565,70 @@ unlink_sockets(const char *filter)
return 0;
}
-static void
-unlink_socket_by_path(const char *path)
-{
- char *filename;
- char *fullpath = strdup(path);
-
- if (!fullpath)
- return;
- filename = basename(fullpath);
- unlink_sockets(filename);
- free(fullpath);
- RTE_LOG(INFO, EAL, "Remove socket %s\n", path);
-}
-
int
rte_mp_channel_init(void)
{
- char thread_name[RTE_MAX_THREAD_NAME_LEN];
- char *path;
- pthread_t tid;
+ char path[PATH_MAX];
+ int dir_fd;
+ pthread_t mp_handle_tid;
+
+ /* in no shared files mode, we do not have secondary processes support,
+ * so no need to initialize IPC.
+ */
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC will be disabled\n");
+ return 0;
+ }
- snprintf(mp_filter, PATH_MAX, ".%s_unix_*",
- internal_config.hugefile_prefix);
+ /* create filter path */
+ create_socket_path("*", path, sizeof(path));
+ strlcpy(mp_filter, basename(path), sizeof(mp_filter));
- path = strdup(eal_mp_socket_path());
- snprintf(mp_dir_path, PATH_MAX, "%s", dirname(path));
- free(path);
+ /* path may have been modified, so recreate it */
+ create_socket_path("*", path, sizeof(path));
+ strlcpy(mp_dir_path, dirname(path), sizeof(mp_dir_path));
+
+ /* lock the directory */
+ dir_fd = open(mp_dir_path, O_RDONLY);
+ if (dir_fd < 0) {
+ RTE_LOG(ERR, EAL, "failed to open %s: %s\n",
+ mp_dir_path, strerror(errno));
+ return -1;
+ }
+
+ if (flock(dir_fd, LOCK_EX)) {
+ RTE_LOG(ERR, EAL, "failed to lock %s: %s\n",
+ mp_dir_path, strerror(errno));
+ close(dir_fd);
+ return -1;
+ }
if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
- unlink_sockets(mp_filter)) {
+ unlink_sockets(mp_filter)) {
RTE_LOG(ERR, EAL, "failed to unlink mp sockets\n");
+ close(dir_fd);
return -1;
}
- if (open_socket_fd() < 0)
+ if (open_socket_fd() < 0) {
+ close(dir_fd);
return -1;
+ }
- if (pthread_create(&tid, NULL, mp_handle, NULL) < 0) {
+ if (rte_ctrl_thread_create(&mp_handle_tid, "rte_mp_handle",
+ NULL, mp_handle, NULL) < 0) {
RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
strerror(errno));
close(mp_fd);
+ close(dir_fd);
mp_fd = -1;
return -1;
}
- /* try best to set thread name */
- snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "rte_mp_handle");
- rte_thread_setname(tid, thread_name);
+ /* unlock the directory */
+ flock(dir_fd, LOCK_UN);
+ close(dir_fd);
+
return 0;
}
@@ -416,7 +655,7 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
memset(&dst, 0, sizeof(dst));
dst.sun_family = AF_UNIX;
- snprintf(dst.sun_path, sizeof(dst.sun_path), "%s", dst_path);
+ strlcpy(dst.sun_path, dst_path, sizeof(dst.sun_path));
memset(&msgh, 0, sizeof(msgh));
memset(control, 0, sizeof(control));
@@ -444,13 +683,12 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
if (snd < 0) {
rte_errno = errno;
/* Check if it caused by peer process exits */
- if (errno == -ECONNREFUSED) {
- /* We don't unlink the primary's socket here */
- if (rte_eal_process_type() == RTE_PROC_PRIMARY)
- unlink_socket_by_path(dst_path);
+ if (errno == ECONNREFUSED &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ unlink(dst_path);
return 0;
}
- if (errno == -ENOBUFS) {
+ if (errno == ENOBUFS) {
RTE_LOG(ERR, EAL, "Peer cannot receive message %s\n",
dst_path);
return 0;
@@ -466,7 +704,7 @@ send_msg(const char *dst_path, struct rte_mp_msg *msg, int type)
static int
mp_send(struct rte_mp_msg *msg, const char *peer, int type)
{
- int ret = 0;
+ int dir_fd, ret = 0;
DIR *mp_dir;
struct dirent *ent;
@@ -488,14 +726,32 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
rte_errno = errno;
return -1;
}
+
+ dir_fd = dirfd(mp_dir);
+ /* lock the directory to prevent processes spinning up while we send */
+ if (flock(dir_fd, LOCK_SH)) {
+ RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+ mp_dir_path);
+ rte_errno = errno;
+ closedir(mp_dir);
+ return -1;
+ }
+
while ((ent = readdir(mp_dir))) {
+ char path[PATH_MAX];
+
if (fnmatch(mp_filter, ent->d_name, 0) != 0)
continue;
- if (send_msg(ent->d_name, msg, type) < 0)
+ snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+ ent->d_name);
+ if (send_msg(path, msg, type) < 0)
ret = -1;
}
+ /* unlock the dir */
+ flock(dir_fd, LOCK_UN);
+ /* dir_fd automatically closed on closedir */
closedir(mp_dir);
return ret;
}
@@ -539,25 +795,82 @@ rte_mp_sendmsg(struct rte_mp_msg *msg)
}
static int
-mp_request_one(const char *dst, struct rte_mp_msg *req,
+mp_request_async(const char *dst, struct rte_mp_msg *req,
+ struct async_request_param *param, const struct timespec *ts)
+{
+ struct rte_mp_msg *reply_msg;
+ struct pending_request *pending_req, *exist;
+ int ret;
+
+ pending_req = calloc(1, sizeof(*pending_req));
+ reply_msg = calloc(1, sizeof(*reply_msg));
+ if (pending_req == NULL || reply_msg == NULL) {
+ RTE_LOG(ERR, EAL, "Could not allocate space for sync request\n");
+ rte_errno = ENOMEM;
+ ret = -1;
+ goto fail;
+ }
+
+ pending_req->type = REQUEST_TYPE_ASYNC;
+ strlcpy(pending_req->dst, dst, sizeof(pending_req->dst));
+ pending_req->request = req;
+ pending_req->reply = reply_msg;
+ pending_req->async.param = param;
+
+ /* queue already locked by caller */
+
+ exist = find_pending_request(dst, req->name);
+ if (exist) {
+ RTE_LOG(ERR, EAL, "A pending request %s:%s\n", dst, req->name);
+ rte_errno = EEXIST;
+ ret = -1;
+ goto fail;
+ }
+
+ ret = send_msg(dst, req, MP_REQ);
+ if (ret < 0) {
+ RTE_LOG(ERR, EAL, "Fail to send request %s:%s\n",
+ dst, req->name);
+ ret = -1;
+ goto fail;
+ } else if (ret == 0) {
+ ret = 0;
+ goto fail;
+ }
+ TAILQ_INSERT_TAIL(&pending_requests.requests, pending_req, next);
+
+ param->user_reply.nb_sent++;
+
+ if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000,
+ async_reply_handle, pending_req) < 0) {
+ RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n",
+ dst, req->name);
+ rte_panic("Fix the above shit to properly free all memory\n");
+ }
+
+ return 0;
+fail:
+ free(pending_req);
+ free(reply_msg);
+ return ret;
+}
+
+static int
+mp_request_sync(const char *dst, struct rte_mp_msg *req,
struct rte_mp_reply *reply, const struct timespec *ts)
{
int ret;
- struct timeval now;
struct rte_mp_msg msg, *tmp;
- struct sync_request sync_req, *exist;
-
- sync_req.reply_received = 0;
- strcpy(sync_req.dst, dst);
- sync_req.request = req;
- sync_req.reply = &msg;
- pthread_cond_init(&sync_req.cond, NULL);
-
- pthread_mutex_lock(&sync_requests.lock);
- exist = find_sync_request(dst, req->name);
- if (!exist)
- TAILQ_INSERT_TAIL(&sync_requests.requests, &sync_req, next);
- pthread_mutex_unlock(&sync_requests.lock);
+ struct pending_request pending_req, *exist;
+
+ pending_req.type = REQUEST_TYPE_SYNC;
+ pending_req.reply_received = 0;
+ strlcpy(pending_req.dst, dst, sizeof(pending_req.dst));
+ pending_req.request = req;
+ pending_req.reply = &msg;
+ pthread_cond_init(&pending_req.sync.cond, NULL);
+
+ exist = find_pending_request(dst, req->name);
if (exist) {
RTE_LOG(ERR, EAL, "A pending request %s:%s\n", dst, req->name);
rte_errno = EEXIST;
@@ -572,33 +885,31 @@ mp_request_one(const char *dst, struct rte_mp_msg *req,
} else if (ret == 0)
return 0;
+ TAILQ_INSERT_TAIL(&pending_requests.requests, &pending_req, next);
+
reply->nb_sent++;
- pthread_mutex_lock(&sync_requests.lock);
do {
- pthread_cond_timedwait(&sync_req.cond, &sync_requests.lock, ts);
- /* Check spurious wakeups */
- if (sync_req.reply_received == 1)
- break;
- /* Check if time is out */
- if (gettimeofday(&now, NULL) < 0)
- break;
- if (now.tv_sec < ts->tv_sec)
- break;
- else if (now.tv_sec == ts->tv_sec &&
- now.tv_usec * 1000 < ts->tv_nsec)
- break;
- } while (1);
- /* We got the lock now */
- TAILQ_REMOVE(&sync_requests.requests, &sync_req, next);
- pthread_mutex_unlock(&sync_requests.lock);
+ ret = pthread_cond_timedwait(&pending_req.sync.cond,
+ &pending_requests.lock, ts);
+ } while (ret != 0 && ret != ETIMEDOUT);
+
+ TAILQ_REMOVE(&pending_requests.requests, &pending_req, next);
- if (sync_req.reply_received == 0) {
+ if (pending_req.reply_received == 0) {
RTE_LOG(ERR, EAL, "Fail to recv reply for request %s:%s\n",
dst, req->name);
rte_errno = ETIMEDOUT;
return -1;
}
+ if (pending_req.reply_received == -1) {
+ RTE_LOG(DEBUG, EAL, "Asked to ignore response\n");
+ /* not receiving this message is not an error, so decrement
+ * number of sent messages
+ */
+ reply->nb_sent--;
+ return 0;
+ }
tmp = realloc(reply->msgs, sizeof(msg) * (reply->nb_received + 1));
if (!tmp) {
@@ -614,10 +925,10 @@ mp_request_one(const char *dst, struct rte_mp_msg *req,
}
int __rte_experimental
-rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
+rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
const struct timespec *ts)
{
- int ret = 0;
+ int dir_fd, ret = 0;
DIR *mp_dir;
struct dirent *ent;
struct timeval now;
@@ -627,6 +938,12 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
if (check_input(req) == false)
return -1;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ return 0;
+ }
+
if (gettimeofday(&now, NULL) < 0) {
RTE_LOG(ERR, EAL, "Faile to get current time\n");
rte_errno = errno;
@@ -642,8 +959,12 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
reply->msgs = NULL;
/* for secondary process, send request to the primary process only */
- if (rte_eal_process_type() == RTE_PROC_SECONDARY)
- return mp_request_one(eal_mp_socket_path(), req, reply, &end);
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ pthread_mutex_lock(&pending_requests.lock);
+ ret = mp_request_sync(eal_mp_socket_path(), req, reply, &end);
+ pthread_mutex_unlock(&pending_requests.lock);
+ return ret;
+ }
/* for primary process, broadcast request, and collect reply 1 by 1 */
mp_dir = opendir(mp_dir_path);
@@ -653,22 +974,193 @@ rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
return -1;
}
+ dir_fd = dirfd(mp_dir);
+ /* lock the directory to prevent processes spinning up while we send */
+ if (flock(dir_fd, LOCK_SH)) {
+ RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+ mp_dir_path);
+ closedir(mp_dir);
+ rte_errno = errno;
+ return -1;
+ }
+
+ pthread_mutex_lock(&pending_requests.lock);
while ((ent = readdir(mp_dir))) {
+ char path[PATH_MAX];
+
if (fnmatch(mp_filter, ent->d_name, 0) != 0)
continue;
- if (mp_request_one(ent->d_name, req, reply, &end))
+ snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+ ent->d_name);
+
+ /* unlocks the mutex while waiting for response,
+ * locks on receive
+ */
+ if (mp_request_sync(path, req, reply, &end))
ret = -1;
}
+ pthread_mutex_unlock(&pending_requests.lock);
+ /* unlock the directory */
+ flock(dir_fd, LOCK_UN);
+ /* dir_fd automatically closed on closedir */
closedir(mp_dir);
return ret;
}
int __rte_experimental
-rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
+rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
+ rte_mp_async_reply_t clb)
{
+ struct rte_mp_msg *copy;
+ struct pending_request *dummy;
+ struct async_request_param *param;
+ struct rte_mp_reply *reply;
+ int dir_fd, ret = 0;
+ DIR *mp_dir;
+ struct dirent *ent;
+ struct timeval now;
+ struct timespec *end;
+ bool dummy_used = false;
+
+ RTE_LOG(DEBUG, EAL, "request: %s\n", req->name);
+
+ if (check_input(req) == false)
+ return -1;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ return 0;
+ }
+
+ if (gettimeofday(&now, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Faile to get current time\n");
+ rte_errno = errno;
+ return -1;
+ }
+ copy = calloc(1, sizeof(*copy));
+ dummy = calloc(1, sizeof(*dummy));
+ param = calloc(1, sizeof(*param));
+ if (copy == NULL || dummy == NULL || param == NULL) {
+ RTE_LOG(ERR, EAL, "Failed to allocate memory for async reply\n");
+ rte_errno = ENOMEM;
+ goto fail;
+ }
+
+ /* copy message */
+ memcpy(copy, req, sizeof(*copy));
+
+ param->n_responses_processed = 0;
+ param->clb = clb;
+ end = &param->end;
+ reply = &param->user_reply;
+
+ end->tv_nsec = (now.tv_usec * 1000 + ts->tv_nsec) % 1000000000;
+ end->tv_sec = now.tv_sec + ts->tv_sec +
+ (now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
+ reply->nb_sent = 0;
+ reply->nb_received = 0;
+ reply->msgs = NULL;
+
+ /* we have to lock the request queue here, as we will be adding a bunch
+ * of requests to the queue at once, and some of the replies may arrive
+ * before we add all of the requests to the queue.
+ */
+ pthread_mutex_lock(&pending_requests.lock);
+
+ /* we have to ensure that callback gets triggered even if we don't send
+ * anything, therefore earlier we have allocated a dummy request. fill
+ * it, and put it on the queue if we don't send any requests.
+ */
+ dummy->type = REQUEST_TYPE_ASYNC;
+ dummy->request = copy;
+ dummy->reply = NULL;
+ dummy->async.param = param;
+ dummy->reply_received = 1; /* short-circuit the timeout */
+
+ /* for secondary process, send request to the primary process only */
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+ ret = mp_request_async(eal_mp_socket_path(), copy, param, ts);
+
+ /* if we didn't send anything, put dummy request on the queue */
+ if (ret == 0 && reply->nb_sent == 0) {
+ TAILQ_INSERT_TAIL(&pending_requests.requests, dummy,
+ next);
+ dummy_used = true;
+ }
+
+ pthread_mutex_unlock(&pending_requests.lock);
+
+ /* if we couldn't send anything, clean up */
+ if (ret != 0)
+ goto fail;
+ return 0;
+ }
+
+ /* for primary process, broadcast request */
+ mp_dir = opendir(mp_dir_path);
+ if (!mp_dir) {
+ RTE_LOG(ERR, EAL, "Unable to open directory %s\n", mp_dir_path);
+ rte_errno = errno;
+ goto unlock_fail;
+ }
+ dir_fd = dirfd(mp_dir);
+ /* lock the directory to prevent processes spinning up while we send */
+ if (flock(dir_fd, LOCK_SH)) {
+ RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
+ mp_dir_path);
+ rte_errno = errno;
+ goto closedir_fail;
+ }
+
+ while ((ent = readdir(mp_dir))) {
+ char path[PATH_MAX];
+
+ if (fnmatch(mp_filter, ent->d_name, 0) != 0)
+ continue;
+
+ snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
+ ent->d_name);
+
+ if (mp_request_async(path, copy, param, ts))
+ ret = -1;
+ }
+ /* if we didn't send anything, put dummy request on the queue */
+ if (ret == 0 && reply->nb_sent == 0) {
+ TAILQ_INSERT_HEAD(&pending_requests.requests, dummy, next);
+ dummy_used = true;
+ }
+
+ /* finally, unlock the queue */
+ pthread_mutex_unlock(&pending_requests.lock);
+
+ /* unlock the directory */
+ flock(dir_fd, LOCK_UN);
+
+ /* dir_fd automatically closed on closedir */
+ closedir(mp_dir);
+
+ /* if dummy was unused, free it */
+ if (!dummy_used)
+ free(dummy);
+
+ return ret;
+closedir_fail:
+ closedir(mp_dir);
+unlock_fail:
+ pthread_mutex_unlock(&pending_requests.lock);
+fail:
+ free(dummy);
+ free(param);
+ free(copy);
+ return -1;
+}
+
+int __rte_experimental
+rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
+{
RTE_LOG(DEBUG, EAL, "reply: %s\n", msg->name);
if (check_input(msg) == false)
@@ -680,5 +1172,10 @@ rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
return -1;
}
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ return 0;
+ }
+
return mp_send(msg, peer, MP_REP);
}
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 40902e49..48ef4d6d 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -7,6 +7,7 @@
#include <stdint.h>
#include <unistd.h>
#include <pthread.h>
+#include <signal.h>
#include <sched.h>
#include <assert.h>
#include <string.h>
@@ -15,6 +16,7 @@
#include <rte_memory.h>
#include <rte_log.h>
+#include "eal_private.h"
#include "eal_thread.h"
RTE_DECLARE_PER_LCORE(unsigned , _socket_id);
@@ -32,10 +34,7 @@ rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role)
if (lcore_id >= RTE_MAX_LCORE)
return -EINVAL;
- if (cfg->lcore_role[lcore_id] == role)
- return 0;
-
- return -EINVAL;
+ return cfg->lcore_role[lcore_id] == role;
}
int eal_cpuset_socket_id(rte_cpuset_t *cpusetp)
@@ -140,3 +139,94 @@ exit:
return ret;
}
+
+
+struct rte_thread_ctrl_params {
+ void *(*start_routine)(void *);
+ void *arg;
+ pthread_barrier_t configured;
+};
+
+static void *rte_thread_init(void *arg)
+{
+ int ret;
+ struct rte_thread_ctrl_params *params = arg;
+ void *(*start_routine)(void *) = params->start_routine;
+ void *routine_arg = params->arg;
+
+ ret = pthread_barrier_wait(&params->configured);
+ if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+ pthread_barrier_destroy(&params->configured);
+ free(params);
+ }
+
+ return start_routine(routine_arg);
+}
+
+__rte_experimental int
+rte_ctrl_thread_create(pthread_t *thread, const char *name,
+ const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg)
+{
+ struct rte_thread_ctrl_params *params;
+ unsigned int lcore_id;
+ rte_cpuset_t cpuset;
+ int cpu_found, ret;
+
+ params = malloc(sizeof(*params));
+ if (!params)
+ return -ENOMEM;
+
+ params->start_routine = start_routine;
+ params->arg = arg;
+
+ pthread_barrier_init(&params->configured, NULL, 2);
+
+ ret = pthread_create(thread, attr, rte_thread_init, (void *)params);
+ if (ret != 0) {
+ free(params);
+ return -ret;
+ }
+
+ if (name != NULL) {
+ ret = rte_thread_setname(*thread, name);
+ if (ret < 0)
+ RTE_LOG(DEBUG, EAL,
+ "Cannot set name for ctrl thread\n");
+ }
+
+ cpu_found = 0;
+ CPU_ZERO(&cpuset);
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (eal_cpu_detected(lcore_id) &&
+ rte_lcore_has_role(lcore_id, ROLE_OFF)) {
+ CPU_SET(lcore_id, &cpuset);
+ cpu_found = 1;
+ }
+ }
+ /* if no detected cpu is off, use master core */
+ if (!cpu_found)
+ CPU_SET(rte_get_master_lcore(), &cpuset);
+
+ ret = pthread_setaffinity_np(*thread, sizeof(cpuset), &cpuset);
+ if (ret < 0)
+ goto fail;
+
+ ret = pthread_barrier_wait(&params->configured);
+ if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+ pthread_barrier_destroy(&params->configured);
+ free(params);
+ }
+
+ return 0;
+
+fail:
+ if (PTHREAD_BARRIER_SERIAL_THREAD ==
+ pthread_barrier_wait(&params->configured)) {
+ pthread_barrier_destroy(&params->configured);
+ free(params);
+ }
+ pthread_cancel(*thread);
+ pthread_join(*thread, NULL);
+ return -ret;
+}
diff --git a/lib/librte_eal/common/eal_common_uuid.c b/lib/librte_eal/common/eal_common_uuid.c
new file mode 100644
index 00000000..1b93c5b3
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_uuid.c
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include <rte_uuid.h>
+
+/* UUID packed form */
+struct uuid {
+ uint32_t time_low;
+ uint16_t time_mid;
+ uint16_t time_hi_and_version;
+ uint16_t clock_seq;
+ uint8_t node[6];
+};
+
+static void uuid_pack(const struct uuid *uu, rte_uuid_t ptr)
+{
+ uint32_t tmp;
+ uint8_t *out = ptr;
+
+ tmp = uu->time_low;
+ out[3] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[2] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[1] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[0] = (uint8_t) tmp;
+
+ tmp = uu->time_mid;
+ out[5] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[4] = (uint8_t) tmp;
+
+ tmp = uu->time_hi_and_version;
+ out[7] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[6] = (uint8_t) tmp;
+
+ tmp = uu->clock_seq;
+ out[9] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[8] = (uint8_t) tmp;
+
+ memcpy(out+10, uu->node, 6);
+}
+
+static void uuid_unpack(const rte_uuid_t in, struct uuid *uu)
+{
+ const uint8_t *ptr = in;
+ uint32_t tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_low = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_mid = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_hi_and_version = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->clock_seq = tmp;
+
+ memcpy(uu->node, ptr, 6);
+}
+
+bool rte_uuid_is_null(const rte_uuid_t uu)
+{
+ const uint8_t *cp = uu;
+ int i;
+
+ for (i = 0; i < 16; i++)
+ if (*cp++)
+ return false;
+ return true;
+}
+
+/*
+ * rte_uuid_compare() - compare two UUIDs.
+ */
+int rte_uuid_compare(const rte_uuid_t uu1, const rte_uuid_t uu2)
+{
+ struct uuid uuid1, uuid2;
+
+ uuid_unpack(uu1, &uuid1);
+ uuid_unpack(uu2, &uuid2);
+
+#define UUCMP(u1, u2) \
+ do { if (u1 != u2) return (u1 < u2) ? -1 : 1; } while (0)
+
+ UUCMP(uuid1.time_low, uuid2.time_low);
+ UUCMP(uuid1.time_mid, uuid2.time_mid);
+ UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version);
+ UUCMP(uuid1.clock_seq, uuid2.clock_seq);
+#undef UUCMP
+
+ return memcmp(uuid1.node, uuid2.node, 6);
+}
+
+int rte_uuid_parse(const char *in, rte_uuid_t uu)
+{
+ struct uuid uuid;
+ int i;
+ const char *cp;
+ char buf[3];
+
+ if (strlen(in) != 36)
+ return -1;
+
+ for (i = 0, cp = in; i <= 36; i++, cp++) {
+ if ((i == 8) || (i == 13) || (i == 18) ||
+ (i == 23)) {
+ if (*cp == '-')
+ continue;
+ else
+ return -1;
+ }
+ if (i == 36)
+ if (*cp == 0)
+ continue;
+ if (!isxdigit(*cp))
+ return -1;
+ }
+
+ uuid.time_low = strtoul(in, NULL, 16);
+ uuid.time_mid = strtoul(in+9, NULL, 16);
+ uuid.time_hi_and_version = strtoul(in+14, NULL, 16);
+ uuid.clock_seq = strtoul(in+19, NULL, 16);
+ cp = in+24;
+ buf[2] = 0;
+
+ for (i = 0; i < 6; i++) {
+ buf[0] = *cp++;
+ buf[1] = *cp++;
+ uuid.node[i] = strtoul(buf, NULL, 16);
+ }
+
+ uuid_pack(&uuid, uu);
+ return 0;
+}
+
+void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len)
+{
+ struct uuid uuid;
+
+ uuid_unpack(uu, &uuid);
+
+ snprintf(out, len,
+ "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
+ uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
+ uuid.node[0], uuid.node[1], uuid.node[2],
+ uuid.node[3], uuid.node[4], uuid.node[5]);
+}
diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
index 4708dd54..de05febf 100644
--- a/lib/librte_eal/common/eal_filesystem.h
+++ b/lib/librte_eal/common/eal_filesystem.h
@@ -12,7 +12,6 @@
#define EAL_FILESYSTEM_H
/** Path of rte config file. */
-#define RUNTIME_CONFIG_FMT "%s/.%s_config"
#include <stdint.h>
#include <limits.h>
@@ -22,60 +21,70 @@
#include <rte_string_fns.h>
#include "eal_internal_cfg.h"
-static const char *default_config_dir = "/var/run";
+/* sets up platform-specific runtime data dir */
+int
+eal_create_runtime_dir(void);
+/* returns runtime dir */
+const char *
+eal_get_runtime_dir(void);
+
+#define RUNTIME_CONFIG_FNAME "config"
static inline const char *
eal_runtime_config_path(void)
{
static char buffer[PATH_MAX]; /* static so auto-zeroed */
- const char *directory = default_config_dir;
- const char *home_dir = getenv("HOME");
- if (getuid() != 0 && home_dir != NULL)
- directory = home_dir;
- snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory,
- internal_config.hugefile_prefix);
+ snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+ RUNTIME_CONFIG_FNAME);
return buffer;
}
/** Path of primary/secondary communication unix socket file. */
-#define MP_SOCKET_PATH_FMT "%s/.%s_unix"
+#define MP_SOCKET_FNAME "mp_socket"
static inline const char *
eal_mp_socket_path(void)
{
static char buffer[PATH_MAX]; /* static so auto-zeroed */
- const char *directory = default_config_dir;
- const char *home_dir = getenv("HOME");
- if (getuid() != 0 && home_dir != NULL)
- directory = home_dir;
- snprintf(buffer, sizeof(buffer) - 1, MP_SOCKET_PATH_FMT,
- directory, internal_config.hugefile_prefix);
+ snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+ MP_SOCKET_FNAME);
+ return buffer;
+}
+#define FBARRAY_NAME_FMT "%s/fbarray_%s"
+static inline const char *
+eal_get_fbarray_path(char *buffer, size_t buflen, const char *name) {
+ snprintf(buffer, buflen, FBARRAY_NAME_FMT, eal_get_runtime_dir(), name);
return buffer;
}
/** Path of hugepage info file. */
-#define HUGEPAGE_INFO_FMT "%s/.%s_hugepage_info"
-
+#define HUGEPAGE_INFO_FNAME "hugepage_info"
static inline const char *
eal_hugepage_info_path(void)
{
static char buffer[PATH_MAX]; /* static so auto-zeroed */
- const char *directory = default_config_dir;
- const char *home_dir = getenv("HOME");
- if (getuid() != 0 && home_dir != NULL)
- directory = home_dir;
- snprintf(buffer, sizeof(buffer) - 1, HUGEPAGE_INFO_FMT, directory,
- internal_config.hugefile_prefix);
+ snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+ HUGEPAGE_INFO_FNAME);
+ return buffer;
+}
+
+/** Path of hugepage data file. */
+#define HUGEPAGE_DATA_FNAME "hugepage_data"
+static inline const char *
+eal_hugepage_data_path(void)
+{
+ static char buffer[PATH_MAX]; /* static so auto-zeroed */
+
+ snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+ HUGEPAGE_DATA_FNAME);
return buffer;
}
/** String format for hugepage map files. */
#define HUGEFILE_FMT "%s/%smap_%d"
-#define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d"
-
static inline const char *
eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id)
{
@@ -85,6 +94,17 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id
return buffer;
}
+/** String format for hugepage map lock files. */
+#define HUGEFILE_LOCK_FMT "%s/map_%d.lock"
+static inline const char *
+eal_get_hugefile_lock_path(char *buffer, size_t buflen, int f_id)
+{
+ snprintf(buffer, buflen, HUGEFILE_LOCK_FMT, eal_get_runtime_dir(),
+ f_id);
+ buffer[buflen - 1] = '\0';
+ return buffer;
+}
+
/** define the default filename prefix for the %s values above */
#define HUGEFILE_PREFIX_DEFAULT "rte"
diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h
index 1d519bbb..4582f19c 100644
--- a/lib/librte_eal/common/eal_hugepages.h
+++ b/lib/librte_eal/common/eal_hugepages.h
@@ -22,14 +22,19 @@ struct hugepage_file {
size_t size; /**< the page size */
int socket_id; /**< NUMA socket ID */
int file_id; /**< the '%d' in HUGEFILE_FMT */
- int memseg_id; /**< the memory segment to which page belongs */
char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */
};
/**
- * Read the information from linux on what hugepages are available
- * for the EAL to use
+ * Read the information on what hugepages are available for the EAL to use,
+ * clearing out any unused ones.
*/
int eal_hugepage_info_init(void);
+/**
+ * Read whatever information primary process has shared about hugepages into
+ * secondary process.
+ */
+int eal_hugepage_info_read(void);
+
#endif /* EAL_HUGEPAGES_H */
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index 1169fcc3..00ee6e06 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -21,9 +21,9 @@
*/
struct hugepage_info {
uint64_t hugepage_sz; /**< size of a huge page */
- const char *hugedir; /**< dir where hugetlbfs is mounted */
+ char hugedir[PATH_MAX]; /**< dir where hugetlbfs is mounted */
uint32_t num_pages[RTE_MAX_NUMA_NODES];
- /**< number of hugepages of that size on each socket */
+ /**< number of hugepages of that size on each socket */
int lock_descriptor; /**< file descriptor for hugepage dir */
};
@@ -41,12 +41,26 @@ struct internal_config {
volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
* instead of native TSC */
volatile unsigned no_shconf; /**< true if there is no shared config */
+ volatile unsigned in_memory;
+ /**< true if DPDK should operate entirely in-memory and not create any
+ * shared files or runtime data.
+ */
volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */
volatile enum rte_proc_type_t process_type; /**< multi-process proc type */
/** true to try allocating memory on specific sockets */
volatile unsigned force_sockets;
volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */
+ volatile unsigned force_socket_limits;
+ volatile uint64_t socket_limit[RTE_MAX_NUMA_NODES]; /**< limit amount of memory per socket */
uintptr_t base_virtaddr; /**< base address to try and reserve memory from */
+ volatile unsigned legacy_mem;
+ /**< true to enable legacy memory behavior (no dynamic allocation,
+ * IOVA-contiguous segments).
+ */
+ volatile unsigned single_file_segments;
+ /**< true if storing all pages within single files (per-page-size,
+ * per-node) non-legacy mode only.
+ */
volatile int syslog_facility; /**< facility passed to openlog() */
/** default interrupt mode for VFIO */
volatile enum rte_intr_mode vfio_intr_mode;
@@ -56,6 +70,8 @@ struct internal_config {
/**< user defined mbuf pool ops name */
unsigned num_hugepage_sizes; /**< how many sizes on this system */
struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+ volatile unsigned int init_complete;
+ /**< indicates whether EAL has completed initialization */
};
extern struct internal_config internal_config; /**< Global EAL configuration. */
diff --git a/lib/librte_eal/common/eal_memalloc.h b/lib/librte_eal/common/eal_memalloc.h
new file mode 100644
index 00000000..36bb1a02
--- /dev/null
+++ b/lib/librte_eal/common/eal_memalloc.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef EAL_MEMALLOC_H
+#define EAL_MEMALLOC_H
+
+#include <stdbool.h>
+
+#include <rte_memory.h>
+#include <rte_eal_memconfig.h>
+
+/*
+ * Allocate segment of specified page size.
+ */
+struct rte_memseg *
+eal_memalloc_alloc_seg(size_t page_sz, int socket);
+
+/*
+ * Allocate `n_segs` segments.
+ *
+ * Note: `ms` can be NULL.
+ *
+ * Note: it is possible to request best-effort allocation by setting `exact` to
+ * `false`, in which case allocator will return however many pages it managed to
+ * allocate successfully.
+ */
+int
+eal_memalloc_alloc_seg_bulk(struct rte_memseg **ms, int n_segs, size_t page_sz,
+ int socket, bool exact);
+
+/*
+ * Deallocate segment
+ */
+int
+eal_memalloc_free_seg(struct rte_memseg *ms);
+
+/*
+ * Deallocate `n_segs` segments. Returns 0 on successful deallocation of all
+ * segments, returns -1 on error. Any segments that could have been deallocated,
+ * will be deallocated even in case of error.
+ */
+int
+eal_memalloc_free_seg_bulk(struct rte_memseg **ms, int n_segs);
+
+/*
+ * Check if memory pointed to by `start` and of `length` that resides in
+ * memseg list `msl` is IOVA-contiguous.
+ */
+bool
+eal_memalloc_is_contig(const struct rte_memseg_list *msl, void *start,
+ size_t len);
+
+/* synchronize local memory map to primary process */
+int
+eal_memalloc_sync_with_primary(void);
+
+int
+eal_memalloc_mem_event_callback_register(const char *name,
+ rte_mem_event_callback_t clb, void *arg);
+
+int
+eal_memalloc_mem_event_callback_unregister(const char *name, void *arg);
+
+void
+eal_memalloc_mem_event_notify(enum rte_mem_event event, const void *start,
+ size_t len);
+
+int
+eal_memalloc_mem_alloc_validator_register(const char *name,
+ rte_mem_alloc_validator_t clb, int socket_id, size_t limit);
+
+int
+eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id);
+
+int
+eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len);
+
+int
+eal_memalloc_init(void);
+
+#endif /* EAL_MEMALLOC_H */
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index e86c7114..96e16678 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -45,8 +45,12 @@ enum {
OPT_NO_PCI_NUM,
#define OPT_NO_SHCONF "no-shconf"
OPT_NO_SHCONF_NUM,
+#define OPT_IN_MEMORY "in-memory"
+ OPT_IN_MEMORY_NUM,
#define OPT_SOCKET_MEM "socket-mem"
OPT_SOCKET_MEM_NUM,
+#define OPT_SOCKET_LIMIT "socket-limit"
+ OPT_SOCKET_LIMIT_NUM,
#define OPT_SYSLOG "syslog"
OPT_SYSLOG_NUM,
#define OPT_VDEV "vdev"
@@ -55,6 +59,10 @@ enum {
OPT_VFIO_INTR_NUM,
#define OPT_VMWARE_TSC_MAP "vmware-tsc-map"
OPT_VMWARE_TSC_MAP_NUM,
+#define OPT_LEGACY_MEM "legacy-mem"
+ OPT_LEGACY_MEM_NUM,
+#define OPT_SINGLE_FILE_SEGMENTS "single-file-segments"
+ OPT_SINGLE_FILE_SEGMENTS_NUM,
OPT_LONG_MAX_NUM
};
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index 0b287700..4f809a83 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -9,6 +9,8 @@
#include <stdint.h>
#include <stdio.h>
+#include <rte_dev.h>
+
/**
* Initialize the memzone subsystem (private to eal).
*
@@ -45,6 +47,18 @@ void eal_log_set_default(FILE *default_log);
int rte_eal_cpu_init(void);
/**
+ * Create memseg lists
+ *
+ * This function is private to EAL.
+ *
+ * Preallocate virtual memory.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_memseg_init(void);
+
+/**
* Map memory
*
* This function is private to EAL.
@@ -81,6 +95,12 @@ int rte_eal_timer_init(void);
int rte_eal_log_init(const char *id, int facility);
/**
+ * Save the log regexp for later
+ */
+int rte_log_save_regexp(const char *type, int priority);
+int rte_log_save_pattern(const char *pattern, int priority);
+
+/**
* Init tail queues for non-EAL library structures. This is to allow
* the rings, mempools, etc. lists to be shared among multiple processes
*
@@ -127,6 +147,39 @@ int rte_eal_alarm_init(void);
int rte_eal_check_module(const char *module_name);
/**
+ * Get virtual area of specified size from the OS.
+ *
+ * This function is private to the EAL.
+ *
+ * @param requested_addr
+ * Address where to request address space.
+ * @param size
+ * Size of requested area.
+ * @param page_sz
+ * Page size on which to align requested virtual area.
+ * @param flags
+ * EAL_VIRTUAL_AREA_* flags.
+ * @param mmap_flags
+ * Extra flags passed directly to mmap().
+ *
+ * @return
+ * Virtual area address if successful.
+ * NULL if unsuccessful.
+ */
+
+#define EAL_VIRTUAL_AREA_ADDR_IS_HINT (1 << 0)
+/**< don't fail if cannot get exact requested address. */
+#define EAL_VIRTUAL_AREA_ALLOW_SHRINK (1 << 1)
+/**< try getting smaller sized (decrement by page size) virtual areas if cannot
+ * get area of requested size.
+ */
+#define EAL_VIRTUAL_AREA_UNMAP (1 << 2)
+/**< immediately unmap reserved virtual area. */
+void *
+eal_get_virtual_area(void *requested_addr, size_t *size,
+ size_t page_sz, int flags, int mmap_flags);
+
+/**
* Get cpu core_id.
*
* This function is private to the EAL.
@@ -205,4 +258,50 @@ struct rte_bus *rte_bus_find_by_device_name(const char *str);
int rte_mp_channel_init(void);
+/**
+ * Internal Executes all the user application registered callbacks for
+ * the specific device. It is for DPDK internal user only. User
+ * application should not call it directly.
+ *
+ * @param device_name
+ * The device name.
+ * @param event
+ * the device event type.
+ */
+void dev_callback_process(char *device_name, enum rte_dev_event_type event);
+
+/**
+ * @internal
+ * Parse a device string and store its information in an
+ * rte_devargs structure.
+ *
+ * A device description is split by layers of abstraction of the device:
+ * bus, class and driver. Each layer will offer a set of properties that
+ * can be applied either to configure or recognize a device.
+ *
+ * This function will parse those properties and prepare the rte_devargs
+ * to be given to each layers for processing.
+ *
+ * Note: if the "data" field of the devargs points to devstr,
+ * then no dynamic allocation is performed and the rte_devargs
+ * can be safely discarded.
+ *
+ * Otherwise ``data`` will hold a workable copy of devstr, that will be
+ * used by layers descriptors within rte_devargs. In this case,
+ * any rte_devargs should be cleaned-up before being freed.
+ *
+ * @param da
+ * rte_devargs structure to fill.
+ *
+ * @param devstr
+ * Device string.
+ *
+ * @return
+ * 0 on success.
+ * Negative errno values on error (rte_errno is set).
+ */
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+ const char *devstr);
+
#endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic.h b/lib/librte_eal/common/include/arch/arm/rte_atomic.h
index f3f3b6e3..40e14e56 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_atomic.h
@@ -1,33 +1,5 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
*/
#ifndef _RTE_ATOMIC_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h
index d2b7fa20..859562e5 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h
@@ -1,33 +1,5 @@
-/*-
- * BSD LICENSE
- *
- * Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
*/
#ifndef _RTE_ATOMIC_ARM32_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
index 8af0a39a..9ec4a975 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h
@@ -1,33 +1,5 @@
-/*
- * BSD LICENSE
- *
- * Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
*/
#ifndef _RTE_BYTEORDER_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h
index b8f62889..022e7da5 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h
@@ -1,33 +1,5 @@
-/*
- * BSD LICENSE
- *
- * Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
*/
#ifndef _RTE_CPUFLAGS_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h
index eb02d9b9..b5347be1 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h
@@ -1,33 +1,5 @@
-/*
- * BSD LICENSE
- *
- * Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
*/
#ifndef _RTE_CPUFLAGS_ARM32_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles.h b/lib/librte_eal/common/include/arch/arm/rte_cycles.h
index a8009a06..e8ffa894 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_cycles.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles.h
@@ -1,33 +1,5 @@
-/*
- * BSD LICENSE
- *
- * Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
*/
#ifndef _RTE_CYCLES_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
index 9c1be71e..c4f974fe 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
@@ -1,33 +1,5 @@
-/*
- * BSD LICENSE
- *
- * Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
*/
#ifndef _RTE_CYCLES_ARM32_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy.h
index 1d562c3f..47dea9a8 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy.h
@@ -1,33 +1,5 @@
-/*
- * BSD LICENSE
- *
- * Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
*/
#ifndef _RTE_MEMCPY_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
index e4dafda1..eb02c3b4 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h
@@ -1,33 +1,5 @@
-/*
- * BSD LICENSE
- *
- * Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
*/
#ifndef _RTE_MEMCPY_ARM32_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch.h
index aa37de57..27870c2a 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch.h
@@ -1,33 +1,5 @@
-/*
- * BSD LICENSE
- *
- * Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
*/
#ifndef _RTE_PREFETCH_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
index 43cde172..e53420a0 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h
@@ -1,33 +1,5 @@
-/*
- * BSD LICENSE
- *
- * Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
*/
#ifndef _RTE_PREFETCH_ARM32_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_rwlock.h b/lib/librte_eal/common/include/arch/arm/rte_rwlock.h
index 664bec88..18bb37b0 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_rwlock.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_rwlock.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ */
/* copied from ppc_64 */
#ifndef _RTE_RWLOCK_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/arm/rte_spinlock.h b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h
index 396a42e8..1a6916b6 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_spinlock.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h
@@ -1,33 +1,5 @@
-/*
- * BSD LICENSE
- *
- * Copyright(c) 2015 RehiveTech. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of RehiveTech nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2015 RehiveTech. All rights reserved.
*/
#ifndef _RTE_SPINLOCK_ARM_H_
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
index 39fce7b9..ce38350b 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h
@@ -55,7 +55,7 @@ extern "C" {
* Guarantees that the LOAD and STORE operations generated before the
* barrier occur before the LOAD and STORE operations generated after.
*/
-#define rte_mb() {asm volatile("sync" : : : "memory"); }
+#define rte_mb() asm volatile("sync" : : : "memory")
/**
* Write memory barrier.
@@ -136,6 +136,12 @@ static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v)
return __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0;
}
+static inline uint16_t
+rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
+{
+ return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST);
+}
+
/*------------------------- 32 bit atomic operations -------------------------*/
static inline int
@@ -237,6 +243,13 @@ static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v)
return ret == 0;
}
+
+static inline uint32_t
+rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
+{
+ return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST);
+}
+
/*------------------------- 64 bit atomic operations -------------------------*/
static inline int
@@ -431,7 +444,6 @@ static inline int rte_atomic64_test_and_set(rte_atomic64_t *v)
{
return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1);
}
-
/**
* Atomically set a 64-bit counter to 0.
*
@@ -442,6 +454,13 @@ static inline void rte_atomic64_clear(rte_atomic64_t *v)
{
v->cnt = 0;
}
+
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
+{
+ return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST);
+}
+
#endif
#ifdef __cplusplus
diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h
index de8af19e..9fadc040 100644
--- a/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h
+++ b/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ */
#ifndef _RTE_RWLOCK_PPC_64_H_
#define _RTE_RWLOCK_PPC_64_H_
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
index 5cfd3832..148398f5 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h
@@ -104,6 +104,18 @@ rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
return res;
}
+static inline uint16_t
+rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
+{
+ asm volatile(
+ MPLOCKED
+ "xchgw %0, %1;"
+ : "=r" (val), "=m" (*dst)
+ : "0" (val), "m" (*dst)
+ : "memory"); /* no-clobber list */
+ return val;
+}
+
static inline int rte_atomic16_test_and_set(rte_atomic16_t *v)
{
return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1);
@@ -178,6 +190,18 @@ rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
return res;
}
+static inline uint32_t
+rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
+{
+ asm volatile(
+ MPLOCKED
+ "xchgl %0, %1;"
+ : "=r" (val), "=m" (*dst)
+ : "0" (val), "m" (*dst)
+ : "memory"); /* no-clobber list */
+ return val;
+}
+
static inline int rte_atomic32_test_and_set(rte_atomic32_t *v)
{
return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1);
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
index fb3abf18..a932f354 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h
@@ -98,6 +98,18 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
return res;
}
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dest, uint64_t val)
+{
+ uint64_t old;
+
+ do {
+ old = *dest;
+ } while (rte_atomic64_cmpset(dest, old, val) == 0);
+
+ return old;
+}
+
static inline void
rte_atomic64_init(rte_atomic64_t *v)
{
diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
index 1a53a766..fd2ec9c5 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h
@@ -71,6 +71,18 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
return res;
}
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
+{
+ asm volatile(
+ MPLOCKED
+ "xchgq %0, %1;"
+ : "=r" (val), "=m" (*dst)
+ : "0" (val), "m" (*dst)
+ : "memory"); /* no-clobber list */
+ return val;
+}
+
static inline void
rte_atomic64_init(rte_atomic64_t *v)
{
diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
index cc140ecc..7b758094 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h
@@ -52,7 +52,7 @@ rte_memcpy(void *dst, const void *src, size_t n);
* Copy 16 bytes from one location to another,
* locations should not overlap.
*/
-static inline void
+static __rte_always_inline void
rte_mov16(uint8_t *dst, const uint8_t *src)
{
__m128i xmm0;
@@ -65,7 +65,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src)
* Copy 32 bytes from one location to another,
* locations should not overlap.
*/
-static inline void
+static __rte_always_inline void
rte_mov32(uint8_t *dst, const uint8_t *src)
{
__m256i ymm0;
@@ -78,7 +78,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src)
* Copy 64 bytes from one location to another,
* locations should not overlap.
*/
-static inline void
+static __rte_always_inline void
rte_mov64(uint8_t *dst, const uint8_t *src)
{
__m512i zmm0;
@@ -91,7 +91,7 @@ rte_mov64(uint8_t *dst, const uint8_t *src)
* Copy 128 bytes from one location to another,
* locations should not overlap.
*/
-static inline void
+static __rte_always_inline void
rte_mov128(uint8_t *dst, const uint8_t *src)
{
rte_mov64(dst + 0 * 64, src + 0 * 64);
@@ -102,7 +102,7 @@ rte_mov128(uint8_t *dst, const uint8_t *src)
* Copy 256 bytes from one location to another,
* locations should not overlap.
*/
-static inline void
+static __rte_always_inline void
rte_mov256(uint8_t *dst, const uint8_t *src)
{
rte_mov64(dst + 0 * 64, src + 0 * 64);
@@ -293,7 +293,7 @@ COPY_BLOCK_128_BACK63:
* Copy 16 bytes from one location to another,
* locations should not overlap.
*/
-static inline void
+static __rte_always_inline void
rte_mov16(uint8_t *dst, const uint8_t *src)
{
__m128i xmm0;
@@ -306,7 +306,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src)
* Copy 32 bytes from one location to another,
* locations should not overlap.
*/
-static inline void
+static __rte_always_inline void
rte_mov32(uint8_t *dst, const uint8_t *src)
{
__m256i ymm0;
@@ -319,7 +319,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src)
* Copy 64 bytes from one location to another,
* locations should not overlap.
*/
-static inline void
+static __rte_always_inline void
rte_mov64(uint8_t *dst, const uint8_t *src)
{
rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32);
@@ -486,7 +486,7 @@ COPY_BLOCK_128_BACK31:
* Copy 16 bytes from one location to another,
* locations should not overlap.
*/
-static inline void
+static __rte_always_inline void
rte_mov16(uint8_t *dst, const uint8_t *src)
{
__m128i xmm0;
@@ -499,7 +499,7 @@ rte_mov16(uint8_t *dst, const uint8_t *src)
* Copy 32 bytes from one location to another,
* locations should not overlap.
*/
-static inline void
+static __rte_always_inline void
rte_mov32(uint8_t *dst, const uint8_t *src)
{
rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
@@ -510,7 +510,7 @@ rte_mov32(uint8_t *dst, const uint8_t *src)
* Copy 64 bytes from one location to another,
* locations should not overlap.
*/
-static inline void
+static __rte_always_inline void
rte_mov64(uint8_t *dst, const uint8_t *src)
{
rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16);
@@ -574,7 +574,7 @@ rte_mov256(uint8_t *dst, const uint8_t *src)
*/
#define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset) \
__extension__ ({ \
- int tmp; \
+ size_t tmp; \
while (len >= 128 + 16 - offset) { \
xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \
len -= 128; \
diff --git a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
index 4b16887e..60321da0 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h
@@ -76,10 +76,12 @@ static inline int rte_tm_supported(void)
static inline int
rte_try_tm(volatile int *lock)
{
+ int retries;
+
if (!rte_rtm_supported)
return 0;
- int retries = RTE_RTM_MAX_RETRIES;
+ retries = RTE_RTM_MAX_RETRIES;
while (likely(retries--)) {
diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h
index 50e1b8a4..b99ba468 100644
--- a/lib/librte_eal/common/include/generic/rte_atomic.h
+++ b/lib/librte_eal/common/include/generic/rte_atomic.h
@@ -191,6 +191,36 @@ rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src)
#endif
/**
+ * Atomic exchange.
+ *
+ * (atomic) equivalent to:
+ * ret = *dst
+ * *dst = val;
+ * return ret;
+ *
+ * @param dst
+ * The destination location into which the value will be written.
+ * @param val
+ * The new value.
+ * @return
+ * The original value at that location
+ */
+static inline uint16_t
+rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline uint16_t
+rte_atomic16_exchange(volatile uint16_t *dst, uint16_t val)
+{
+#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG)
+ return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+#else
+ return __atomic_exchange_2(dst, val, __ATOMIC_SEQ_CST);
+#endif
+}
+#endif
+
+/**
* The atomic counter structure.
*/
typedef struct {
@@ -444,6 +474,36 @@ rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
#endif
/**
+ * Atomic exchange.
+ *
+ * (atomic) equivalent to:
+ * ret = *dst
+ * *dst = val;
+ * return ret;
+ *
+ * @param dst
+ * The destination location into which the value will be written.
+ * @param val
+ * The new value.
+ * @return
+ * The original value at that location
+ */
+static inline uint32_t
+rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline uint32_t
+rte_atomic32_exchange(volatile uint32_t *dst, uint32_t val)
+{
+#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG)
+ return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+#else
+ return __atomic_exchange_4(dst, val, __ATOMIC_SEQ_CST);
+#endif
+}
+#endif
+
+/**
* The atomic counter structure.
*/
typedef struct {
@@ -696,6 +756,36 @@ rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src)
#endif
/**
+ * Atomic exchange.
+ *
+ * (atomic) equivalent to:
+ * ret = *dst
+ * *dst = val;
+ * return ret;
+ *
+ * @param dst
+ * The destination location into which the value will be written.
+ * @param val
+ * The new value.
+ * @return
+ * The original value at that location
+ */
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val);
+
+#ifdef RTE_FORCE_INTRINSICS
+static inline uint64_t
+rte_atomic64_exchange(volatile uint64_t *dst, uint64_t val)
+{
+#if defined(RTE_ARCH_ARM64) && defined(RTE_TOOLCHAIN_CLANG)
+ return __atomic_exchange_n(dst, val, __ATOMIC_SEQ_CST);
+#else
+ return __atomic_exchange_8(dst, val, __ATOMIC_SEQ_CST);
+#endif
+}
+#endif
+
+/**
* The atomic counter structure.
*/
typedef struct {
diff --git a/lib/librte_eal/common/include/generic/rte_byteorder.h b/lib/librte_eal/common/include/generic/rte_byteorder.h
index 9bed85cc..7d9a1463 100644
--- a/lib/librte_eal/common/include/generic/rte_byteorder.h
+++ b/lib/librte_eal/common/include/generic/rte_byteorder.h
@@ -123,7 +123,7 @@ typedef uint64_t rte_le64_t; /**< 64-bit little-endian value. */
static inline uint16_t
rte_constant_bswap16(uint16_t x)
{
- return RTE_STATIC_BSWAP16(x);
+ return (uint16_t)RTE_STATIC_BSWAP16(x);
}
/*
@@ -135,7 +135,7 @@ rte_constant_bswap16(uint16_t x)
static inline uint32_t
rte_constant_bswap32(uint32_t x)
{
- return RTE_STATIC_BSWAP32(x);
+ return (uint32_t)RTE_STATIC_BSWAP32(x);
}
/*
@@ -147,7 +147,7 @@ rte_constant_bswap32(uint32_t x)
static inline uint64_t
rte_constant_bswap64(uint64_t x)
{
- return RTE_STATIC_BSWAP64(x);
+ return (uint64_t)RTE_STATIC_BSWAP64(x);
}
diff --git a/lib/librte_eal/common/include/generic/rte_cpuflags.h b/lib/librte_eal/common/include/generic/rte_cpuflags.h
index 8d31687d..156ea002 100644
--- a/lib/librte_eal/common/include/generic/rte_cpuflags.h
+++ b/lib/librte_eal/common/include/generic/rte_cpuflags.h
@@ -64,4 +64,25 @@ rte_cpu_check_supported(void);
int
rte_cpu_is_supported(void);
+/**
+ * This function attempts to retrieve a value from the auxiliary vector.
+ * If it is unsuccessful, the result will be 0, and errno will be set.
+ *
+ * @return A value from the auxiliary vector. When the value is 0, check
+ * errno to determine if an error occurred.
+ */
+unsigned long
+rte_cpu_getauxval(unsigned long type);
+
+/**
+ * This function retrieves a value from the auxiliary vector, and compares it
+ * as a string against the value retrieved.
+ *
+ * @return The result of calling strcmp() against the value retrieved from
+ * the auxiliary vector. When the value is 0 (meaning a match is found),
+ * check errno to determine if an error occurred.
+ */
+int
+rte_cpu_strcmp_auxval(unsigned long type, const char *str);
+
#endif /* _RTE_CPUFLAGS_H_ */
diff --git a/lib/librte_eal/common/include/generic/rte_rwlock.h b/lib/librte_eal/common/include/generic/rte_rwlock.h
index 899e9bc4..5751a0e6 100644
--- a/lib/librte_eal/common/include/generic/rte_rwlock.h
+++ b/lib/librte_eal/common/include/generic/rte_rwlock.h
@@ -71,7 +71,7 @@ rte_rwlock_read_lock(rte_rwlock_t *rwl)
continue;
}
success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt,
- x, x + 1);
+ (uint32_t)x, (uint32_t)(x + 1));
}
}
@@ -107,7 +107,7 @@ rte_rwlock_write_lock(rte_rwlock_t *rwl)
continue;
}
success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt,
- 0, -1);
+ 0, (uint32_t)-1);
}
}
diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h
index 7d4935fc..d9facc64 100644
--- a/lib/librte_eal/common/include/rte_bitmap.h
+++ b/lib/librte_eal/common/include/rte_bitmap.h
@@ -198,12 +198,12 @@ rte_bitmap_get_memory_footprint(uint32_t n_bits) {
/**
* Bitmap initialization
*
- * @param mem_size
- * Minimum expected size of bitmap.
+ * @param n_bits
+ * Number of pre-allocated bits in array2.
* @param mem
* Base address of array1 and array2.
- * @param n_bits
- * Number of pre-allocated bits in array2. Must be non-zero and multiple of 512.
+ * @param mem_size
+ * Minimum expected size of bitmap.
* @return
* Handle to bitmap instance.
*/
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index 6fb08341..b7b5b084 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -211,6 +211,7 @@ struct rte_bus {
rte_bus_parse_t parse; /**< Parse a device name */
struct rte_bus_conf conf; /**< Bus configuration */
rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
+ rte_dev_iterate_t dev_iterate; /**< Device iterator. */
};
/**
@@ -325,8 +326,7 @@ enum rte_iova_mode rte_bus_get_iommu_class(void);
* The constructor has higher priority than PMD constructors.
*/
#define RTE_REGISTER_BUS(nm, bus) \
-RTE_INIT_PRIO(businitfn_ ##nm, 110); \
-static void businitfn_ ##nm(void) \
+RTE_INIT_PRIO(businitfn_ ##nm, BUS) \
{\
(bus).name = RTE_STR(nm);\
rte_bus_register(&bus); \
diff --git a/lib/librte_eal/common/include/rte_class.h b/lib/librte_eal/common/include/rte_class.h
new file mode 100644
index 00000000..276c91e9
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_class.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaƫtan Rivet
+ */
+
+#ifndef _RTE_CLASS_H_
+#define _RTE_CLASS_H_
+
+/**
+ * @file
+ *
+ * DPDK device class interface.
+ *
+ * This file describes the interface of the device class
+ * abstraction layer.
+ *
+ * A device class defines the type of function a device
+ * will be used for e.g.: Ethernet adapter (eth),
+ * cryptographic coprocessor (crypto), etc.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/queue.h>
+
+#include <rte_dev.h>
+
+/** Double linked list of classes */
+TAILQ_HEAD(rte_class_list, rte_class);
+
+/**
+ * A structure describing a generic device class.
+ */
+struct rte_class {
+ TAILQ_ENTRY(rte_class) next; /**< Next device class in linked list */
+ const char *name; /**< Name of the class */
+ rte_dev_iterate_t dev_iterate; /**< Device iterator. */
+};
+
+/**
+ * Class comparison function.
+ *
+ * @param cls
+ * Class under test.
+ *
+ * @param data
+ * Data to compare against.
+ *
+ * @return
+ * 0 if the class matches the data.
+ * !0 if the class does not match.
+ * <0 if ordering is possible and the class is lower than the data.
+ * >0 if ordering is possible and the class is greater than the data.
+ */
+typedef int (*rte_class_cmp_t)(const struct rte_class *cls, const void *data);
+
+/**
+ * Class iterator to find a particular class.
+ *
+ * This function compares each registered class to find one that matches
+ * the data passed as parameter.
+ *
+ * If the comparison function returns zero this function will stop iterating
+ * over any more classes. To continue a search the class of a previous search
+ * can be passed via the start parameter.
+ *
+ * @param start
+ * Starting point for the iteration.
+ *
+ * @param cmp
+ * Comparison function.
+ *
+ * @param data
+ * Data to pass to comparison function.
+ *
+ * @return
+ * A pointer to a rte_class structure or NULL in case no class matches
+ */
+__rte_experimental
+struct rte_class *
+rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
+ const void *data);
+
+/**
+ * Find the registered class for a given name.
+ */
+__rte_experimental
+struct rte_class *
+rte_class_find_by_name(const char *name);
+
+/**
+ * Register a Class handle.
+ *
+ * @param cls
+ * A pointer to a rte_class structure describing the class
+ * to be registered.
+ */
+__rte_experimental
+void rte_class_register(struct rte_class *cls);
+
+/**
+ * Unregister a Class handle.
+ *
+ * @param cls
+ * A pointer to a rte_class structure describing the class
+ * to be unregistered.
+ */
+__rte_experimental
+void rte_class_unregister(struct rte_class *cls);
+
+/**
+ * Helper for Class registration.
+ * The constructor has lower priority than Bus constructors.
+ * The constructor has higher priority than PMD constructors.
+ */
+#define RTE_REGISTER_CLASS(nm, cls) \
+RTE_INIT_PRIO(classinitfn_ ##nm, CLASS) \
+{\
+ (cls).name = RTE_STR(nm); \
+ rte_class_register(&cls); \
+}
+
+#define RTE_UNREGISTER_CLASS(nm, cls) \
+RTE_FINI_PRIO(classfinifn_ ##nm, CLASS) \
+{ \
+ rte_class_unregister(&cls); \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CLASS_H_ */
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index c7803e41..069c13ec 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -81,6 +81,26 @@ typedef uint16_t unaligned_uint16_t;
*/
#define RTE_SET_USED(x) (void)(x)
+#define RTE_PRIORITY_LOG 101
+#define RTE_PRIORITY_BUS 110
+#define RTE_PRIORITY_CLASS 120
+#define RTE_PRIORITY_LAST 65535
+
+#define RTE_PRIO(prio) \
+ RTE_PRIORITY_ ## prio
+
+/**
+ * Run function before main() with high priority.
+ *
+ * @param func
+ * Constructor function.
+ * @param prio
+ * Priority number must be above 100.
+ * Lowest number is the first to run.
+ */
+#define RTE_INIT_PRIO(func, prio) \
+static void __attribute__((constructor(RTE_PRIO(prio)), used)) func(void)
+
/**
* Run function before main() with low priority.
*
@@ -90,19 +110,30 @@ typedef uint16_t unaligned_uint16_t;
* Constructor function.
*/
#define RTE_INIT(func) \
-static void __attribute__((constructor, used)) func(void)
+ RTE_INIT_PRIO(func, LAST)
/**
- * Run function before main() with high priority.
+ * Run after main() with low priority.
*
* @param func
- * Constructor function.
+ * Destructor function name.
* @param prio
* Priority number must be above 100.
- * Lowest number is the first to run.
+ * Lowest number is the last to run.
*/
-#define RTE_INIT_PRIO(func, prio) \
-static void __attribute__((constructor(prio), used)) func(void)
+#define RTE_FINI_PRIO(func, prio) \
+static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
+
+/**
+ * Run after main() with high priority.
+ *
+ * The destructor will be run *before* prioritized destructors.
+ *
+ * @param func
+ * Destructor function name.
+ */
+#define RTE_FINI(func) \
+ RTE_FINI_PRIO(func, LAST)
/**
* Force a function to be inlined
@@ -117,7 +148,7 @@ static void __attribute__((constructor(prio), used)) func(void)
/*********** Macros for pointer arithmetic ********/
/**
- * add a byte-value offset from a pointer
+ * add a byte-value offset to a pointer
*/
#define RTE_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x)))
@@ -191,6 +222,22 @@ static void __attribute__((constructor(prio), used)) func(void)
#define RTE_ALIGN(val, align) RTE_ALIGN_CEIL(val, align)
/**
+ * Macro to align a value to the multiple of given value. The resultant
+ * value will be of the same type as the first parameter and will be no lower
+ * than the first parameter.
+ */
+#define RTE_ALIGN_MUL_CEIL(v, mul) \
+ (((v + (typeof(v))(mul) - 1) / ((typeof(v))(mul))) * (typeof(v))(mul))
+
+/**
+ * Macro to align a value to the multiple of given value. The resultant
+ * value will be of the same type as the first parameter and will be no higher
+ * than the first parameter.
+ */
+#define RTE_ALIGN_MUL_FLOOR(v, mul) \
+ ((v / ((typeof(v))(mul))) * (typeof(v))(mul))
+
+/**
* Checks if a pointer is aligned to a given power-of-two value
*
* @param ptr
@@ -223,9 +270,59 @@ extern int RTE_BUILD_BUG_ON_detected_error;
} while(0)
#endif
+/**
+ * Combines 32b inputs most significant set bits into the least
+ * significant bits to construct a value with the same MSBs as x
+ * but all 1's under it.
+ *
+ * @param x
+ * The integer whose MSBs need to be combined with its LSBs
+ * @return
+ * The combined value.
+ */
+static inline uint32_t
+rte_combine32ms1b(register uint32_t x)
+{
+ x |= x >> 1;
+ x |= x >> 2;
+ x |= x >> 4;
+ x |= x >> 8;
+ x |= x >> 16;
+
+ return x;
+}
+
+/**
+ * Combines 64b inputs most significant set bits into the least
+ * significant bits to construct a value with the same MSBs as x
+ * but all 1's under it.
+ *
+ * @param v
+ * The integer whose MSBs need to be combined with its LSBs
+ * @return
+ * The combined value.
+ */
+static inline uint64_t
+rte_combine64ms1b(register uint64_t v)
+{
+ v |= v >> 1;
+ v |= v >> 2;
+ v |= v >> 4;
+ v |= v >> 8;
+ v |= v >> 16;
+ v |= v >> 32;
+
+ return v;
+}
+
/*********** Macros to work with powers of 2 ********/
/**
+ * Macro to return 1 if n is a power of 2, 0 otherwise
+ */
+#define RTE_IS_POWER_OF_2(n) ((n) && !(((n) - 1) & (n)))
+
+/**
* Returns true if n is a power of 2
* @param n
* Number to check
@@ -250,16 +347,29 @@ static inline uint32_t
rte_align32pow2(uint32_t x)
{
x--;
- x |= x >> 1;
- x |= x >> 2;
- x |= x >> 4;
- x |= x >> 8;
- x |= x >> 16;
+ x = rte_combine32ms1b(x);
return x + 1;
}
/**
+ * Aligns input parameter to the previous power of 2
+ *
+ * @param x
+ * The integer value to algin
+ *
+ * @return
+ * Input parameter aligned to the previous power of 2
+ */
+static inline uint32_t
+rte_align32prevpow2(uint32_t x)
+{
+ x = rte_combine32ms1b(x);
+
+ return x - (x >> 1);
+}
+
+/**
* Aligns 64b input parameter to the next power of 2
*
* @param v
@@ -272,16 +382,28 @@ static inline uint64_t
rte_align64pow2(uint64_t v)
{
v--;
- v |= v >> 1;
- v |= v >> 2;
- v |= v >> 4;
- v |= v >> 8;
- v |= v >> 16;
- v |= v >> 32;
+ v = rte_combine64ms1b(v);
return v + 1;
}
+/**
+ * Aligns 64b input parameter to the previous power of 2
+ *
+ * @param v
+ * The 64b value to align
+ *
+ * @return
+ * Input parameter aligned to the previous power of 2
+ */
+static inline uint64_t
+rte_align64prevpow2(uint64_t v)
+{
+ v = rte_combine64ms1b(v);
+
+ return v - (v >> 1);
+}
+
/*********** Macros for calculating min and max **********/
/**
@@ -320,7 +442,7 @@ rte_align64pow2(uint64_t v)
static inline uint32_t
rte_bsf32(uint32_t v)
{
- return __builtin_ctz(v);
+ return (uint32_t)__builtin_ctz(v);
}
/**
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index b688f1ef..b80a8059 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -24,6 +24,25 @@ extern "C" {
#include <rte_compat.h>
#include <rte_log.h>
+/**
+ * The device event type.
+ */
+enum rte_dev_event_type {
+ RTE_DEV_EVENT_ADD, /**< device being added */
+ RTE_DEV_EVENT_REMOVE, /**< device being removed */
+ RTE_DEV_EVENT_MAX /**< max value of this enum */
+};
+
+struct rte_dev_event {
+ enum rte_dev_event_type type; /**< device event type */
+ int subsystem; /**< subsystem id */
+ char *devname; /**< device name */
+};
+
+typedef void (*rte_dev_event_cb_fn)(char *device_name,
+ enum rte_dev_event_type event,
+ void *cb_arg);
+
__attribute__((format(printf, 2, 0)))
static inline void
rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
@@ -32,24 +51,25 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
va_start(ap, fmt);
- char buffer[vsnprintf(NULL, 0, fmt, ap) + 1];
+ {
+ char buffer[vsnprintf(NULL, 0, fmt, ap) + 1];
- va_end(ap);
+ va_end(ap);
- va_start(ap, fmt);
- vsnprintf(buffer, sizeof(buffer), fmt, ap);
- va_end(ap);
+ va_start(ap, fmt);
+ vsnprintf(buffer, sizeof(buffer), fmt, ap);
+ va_end(ap);
- rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", func_name, buffer);
+ rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s",
+ func_name, buffer);
+ }
}
/*
* Enable RTE_PMD_DEBUG_TRACE() when at least one component relying on the
* RTE_*_RET() macros defined below is compiled in debug mode.
*/
-#if defined(RTE_LIBRTE_ETHDEV_DEBUG) || \
- defined(RTE_LIBRTE_CRYPTODEV_DEBUG) || \
- defined(RTE_LIBRTE_EVENTDEV_DEBUG)
+#if defined(RTE_LIBRTE_EVENTDEV_DEBUG)
#define RTE_PMD_DEBUG_TRACE(...) \
rte_pmd_debug_trace(__func__, __VA_ARGS__)
#else
@@ -154,6 +174,7 @@ struct rte_device {
* @return
* 0 on success, negative on error.
*/
+__rte_deprecated
int rte_eal_dev_attach(const char *name, const char *devargs);
/**
@@ -164,6 +185,7 @@ int rte_eal_dev_attach(const char *name, const char *devargs);
* @return
* 0 on success, negative on error.
*/
+__rte_deprecated
int rte_eal_dev_detach(struct rte_device *dev);
/**
@@ -263,8 +285,179 @@ __attribute__((used)) = str
static const char DRV_EXP_TAG(name, kmod_dep_export)[] \
__attribute__((used)) = str
+/**
+ * Iteration context.
+ *
+ * This context carries over the current iteration state.
+ */
+struct rte_dev_iterator {
+ const char *dev_str; /**< device string. */
+ const char *bus_str; /**< bus-related part of device string. */
+ const char *cls_str; /**< class-related part of device string. */
+ struct rte_bus *bus; /**< bus handle. */
+ struct rte_class *cls; /**< class handle. */
+ struct rte_device *device; /**< current position. */
+ void *class_device; /**< additional specialized context. */
+};
+
+/**
+ * Device iteration function.
+ *
+ * Find the next device matching properties passed in parameters.
+ * The function takes an additional ``start`` parameter, that is
+ * used as starting context when relevant.
+ *
+ * The function returns the current element in the iteration.
+ * This return value will potentially be used as a start parameter
+ * in subsequent calls to the function.
+ *
+ * The additional iterator parameter is only there if a specific
+ * implementation needs additional context. It must not be modified by
+ * the iteration function itself.
+ *
+ * @param start
+ * Starting iteration context.
+ *
+ * @param devstr
+ * Device description string.
+ *
+ * @param it
+ * Device iterator.
+ *
+ * @return
+ * The address of the current element matching the device description
+ * string.
+ */
+typedef void *(*rte_dev_iterate_t)(const void *start,
+ const char *devstr,
+ const struct rte_dev_iterator *it);
+
+/**
+ * Initializes a device iterator.
+ *
+ * This iterator allows accessing a list of devices matching a criteria.
+ * The device matching is made among all buses and classes currently registered,
+ * filtered by the device description given as parameter.
+ *
+ * This function will not allocate any memory. It is safe to stop the
+ * iteration at any moment and let the iterator go out of context.
+ *
+ * @param it
+ * Device iterator handle.
+ *
+ * @param str
+ * Device description string.
+ *
+ * @return
+ * 0 on successful initialization.
+ * <0 on error.
+ */
+__rte_experimental
+int
+rte_dev_iterator_init(struct rte_dev_iterator *it, const char *str);
+
+/**
+ * Iterates on a device iterator.
+ *
+ * Generates a new rte_device handle corresponding to the next element
+ * in the list described in comprehension by the iterator.
+ *
+ * The next object is returned, and the iterator is updated.
+ *
+ * @param it
+ * Device iterator handle.
+ *
+ * @return
+ * An rte_device handle if found.
+ * NULL if an error occurred (rte_errno is set).
+ * NULL if no device could be found (rte_errno is not set).
+ */
+__rte_experimental
+struct rte_device *
+rte_dev_iterator_next(struct rte_dev_iterator *it);
+
+#define RTE_DEV_FOREACH(dev, devstr, it) \
+ for (rte_dev_iterator_init(it, devstr), \
+ dev = rte_dev_iterator_next(it); \
+ dev != NULL; \
+ dev = rte_dev_iterator_next(it))
+
#ifdef __cplusplus
}
#endif
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * It registers the callback for the specific device.
+ * Multiple callbacks cal be registered at the same time.
+ *
+ * @param device_name
+ * The device name, that is the param name of the struct rte_device,
+ * null value means for all devices.
+ * @param cb_fn
+ * callback address.
+ * @param cb_arg
+ * address of parameter for callback.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_event_callback_register(const char *device_name,
+ rte_dev_event_cb_fn cb_fn,
+ void *cb_arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * It unregisters the callback according to the specified device.
+ *
+ * @param device_name
+ * The device name, that is the param name of the struct rte_device,
+ * null value means for all devices and their callbacks.
+ * @param cb_fn
+ * callback address.
+ * @param cb_arg
+ * address of parameter for callback, (void *)-1 means to remove all
+ * registered which has the same callback address.
+ *
+ * @return
+ * - On success, return the number of callback entities removed.
+ * - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_event_callback_unregister(const char *device_name,
+ rte_dev_event_cb_fn cb_fn,
+ void *cb_arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Start the device event monitoring.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_event_monitor_start(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Stop the device event monitoring.
+ *
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_event_monitor_stop(void);
+
#endif /* _RTE_DEV_H_ */
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
index 84e5e23c..097a4ce7 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -51,21 +51,23 @@ struct rte_devargs {
enum rte_devtype type;
/** Device policy. */
enum rte_dev_policy policy;
- /** Bus handle for the device. */
- struct rte_bus *bus;
/** Name of the device. */
char name[RTE_DEV_NAME_MAX_LEN];
+ RTE_STD_C11
+ union {
/** Arguments string as given by user or "" for no argument. */
- char *args;
+ char *args;
+ const char *drv_str;
+ };
+ struct rte_bus *bus; /**< bus handle. */
+ struct rte_class *cls; /**< class handle. */
+ const char *bus_str; /**< bus-related part of device string. */
+ const char *cls_str; /**< class-related part of device string. */
+ const char *data; /**< Device string storage. */
};
-/** user device double-linked queue type definition */
-TAILQ_HEAD(rte_devargs_list, rte_devargs);
-
-/** Global list of user devices */
-extern struct rte_devargs_list devargs_list;
-
/**
+ * @deprecated
* Parse a devargs string.
*
* For PCI devices, the format of arguments string is "PCI_ADDR" or
@@ -90,6 +92,7 @@ extern struct rte_devargs_list devargs_list;
* - 0 on success
* - A negative value on error
*/
+__rte_deprecated
int rte_eal_parse_devargs_str(const char *devargs_str,
char **drvname, char **drvargs);
@@ -100,18 +103,73 @@ int rte_eal_parse_devargs_str(const char *devargs_str,
* in argument. Store which bus will handle the device, its name
* and the eventual device parameters.
*
+ * The syntax is:
+ *
+ * bus:device_identifier,arg1=val1,arg2=val2
+ *
+ * where "bus:" is the bus name followed by any character separator.
+ * The bus name is optional. If no bus name is specified, each bus
+ * will attempt to recognize the device identifier. The first one
+ * to succeed will be used.
+ *
+ * Examples:
+ *
+ * pci:0000:05.00.0,arg=val
+ * 05.00.0,arg=val
+ * vdev:net_ring0
+ *
+ * @param da
+ * The devargs structure holding the device information.
+ *
* @param dev
- * The device declaration string.
+ * String describing a device.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative errno on error.
+ */
+__rte_experimental
+int
+rte_devargs_parse(struct rte_devargs *da, const char *dev);
+
+/**
+ * Parse a device string.
+ *
+ * Verify that a bus is capable of handling the device passed
+ * in argument. Store which bus will handle the device, its name
+ * and the eventual device parameters.
+ *
+ * The device string is built with a printf-like syntax.
+ *
+ * The syntax is:
+ *
+ * bus:device_identifier,arg1=val1,arg2=val2
+ *
+ * where "bus:" is the bus name followed by any character separator.
+ * The bus name is optional. If no bus name is specified, each bus
+ * will attempt to recognize the device identifier. The first one
+ * to succeed will be used.
+ *
+ * Examples:
+ *
+ * pci:0000:05.00.0,arg=val
+ * 05.00.0,arg=val
+ * vdev:net_ring0
+ *
* @param da
* The devargs structure holding the device information.
+ * @param format
+ * Format string describing a device.
*
* @return
* - 0 on success.
* - Negative errno on error.
*/
-int __rte_experimental
-rte_eal_devargs_parse(const char *dev,
- struct rte_devargs *da);
+__rte_experimental
+int
+rte_devargs_parsef(struct rte_devargs *da,
+ const char *format, ...)
+__attribute__((format(printf, 2, 0)));
/**
* Insert an rte_devargs in the global list.
@@ -123,21 +181,30 @@ rte_eal_devargs_parse(const char *dev,
* - 0 on success
* - Negative on error.
*/
-int __rte_experimental
-rte_eal_devargs_insert(struct rte_devargs *da);
+__rte_experimental
+int
+rte_devargs_insert(struct rte_devargs *da);
/**
* Add a device to the user device list
+ * See rte_devargs_parse() for details.
*
- * For PCI devices, the format of arguments string is "PCI_ADDR" or
- * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0",
- * "04:00.0,arg=val".
+ * @param devtype
+ * The type of the device.
+ * @param devargs_str
+ * The arguments as given by the user.
*
- * For virtual devices, the format of arguments string is "DRIVER_NAME*"
- * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring",
- * "net_ring0", "net_pmdAnything,arg=0:arg2=1". The validity of the
- * driver name is not checked by this function, it is done when probing
- * the drivers.
+ * @return
+ * - 0 on success
+ * - A negative value on error
+ */
+__rte_experimental
+int rte_devargs_add(enum rte_devtype devtype, const char *devargs_str);
+
+/**
+ * @deprecated
+ * Add a device to the user device list
+ * See rte_devargs_parse() for details.
*
* @param devtype
* The type of the device.
@@ -148,6 +215,7 @@ rte_eal_devargs_insert(struct rte_devargs *da);
* - 0 on success
* - A negative value on error
*/
+__rte_deprecated
int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);
/**
@@ -166,10 +234,25 @@ int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);
* <0 on error.
* >0 if the devargs was not within the user device list.
*/
-int __rte_experimental rte_eal_devargs_remove(const char *busname,
- const char *devname);
+__rte_experimental
+int rte_devargs_remove(const char *busname,
+ const char *devname);
+
+/**
+ * Count the number of user devices of a specified type
+ *
+ * @param devtype
+ * The type of the devices to counted.
+ *
+ * @return
+ * The number of devices.
+ */
+__rte_experimental
+unsigned int
+rte_devargs_type_count(enum rte_devtype devtype);
/**
+ * @deprecated
* Count the number of user devices of a specified type
*
* @param devtype
@@ -178,6 +261,7 @@ int __rte_experimental rte_eal_devargs_remove(const char *busname,
* @return
* The number of devices.
*/
+__rte_deprecated
unsigned int
rte_eal_devargs_type_count(enum rte_devtype devtype);
@@ -187,8 +271,47 @@ rte_eal_devargs_type_count(enum rte_devtype devtype);
* @param f
* A pointer to a file for output
*/
+__rte_experimental
+void rte_devargs_dump(FILE *f);
+
+/**
+ * @deprecated
+ * This function dumps the list of user device and their arguments.
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+__rte_deprecated
void rte_eal_devargs_dump(FILE *f);
+/**
+ * Find next rte_devargs matching the provided bus name.
+ *
+ * @param busname
+ * Limit the iteration to devargs related to buses
+ * matching this name.
+ * Will return any next rte_devargs if NULL.
+ *
+ * @param start
+ * Starting iteration point. The iteration will start at
+ * the first rte_devargs if NULL.
+ *
+ * @return
+ * Next rte_devargs entry matching the requested bus,
+ * NULL if there is none.
+ */
+__rte_experimental
+struct rte_devargs *
+rte_devargs_next(const char *busname, const struct rte_devargs *start);
+
+/**
+ * Iterate over all rte_devargs for a specific bus.
+ */
+#define RTE_EAL_DEVARGS_FOREACH(busname, da) \
+ for (da = rte_devargs_next(busname, NULL); \
+ da != NULL; \
+ da = rte_devargs_next(busname, da)) \
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index 044474e6..e114dcbd 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -57,6 +57,8 @@ enum rte_proc_type_t {
struct rte_config {
uint32_t master_lcore; /**< Id of the master lcore */
uint32_t lcore_count; /**< Number of available logical cores. */
+ uint32_t numa_node_count; /**< Number of detected NUMA nodes. */
+ uint32_t numa_nodes[RTE_MAX_NUMA_NODES]; /**< List of detected NUMA nodes. */
uint32_t service_lcore_count;/**< Number of available service cores. */
enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */
@@ -230,6 +232,16 @@ struct rte_mp_reply {
typedef int (*rte_mp_t)(const struct rte_mp_msg *msg, const void *peer);
/**
+ * Asynchronous reply function typedef used by other components.
+ *
+ * As we create socket channel for primary/secondary communication, use
+ * this function typedef to register action for coming responses to asynchronous
+ * requests.
+ */
+typedef int (*rte_mp_async_reply_t)(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply);
+
+/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
@@ -314,13 +326,39 @@ rte_mp_sendmsg(struct rte_mp_msg *msg);
* - On failure, return -1, and the reason will be stored in rte_errno.
*/
int __rte_experimental
-rte_mp_request(struct rte_mp_msg *req, struct rte_mp_reply *reply,
+rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
const struct timespec *ts);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
+ * Send a request to the peer process and expect a reply in a separate callback.
+ *
+ * This function sends a request message to the peer process, and will not
+ * block. Instead, reply will be received in a separate callback.
+ *
+ * @param req
+ * The req argument contains the customized request message.
+ *
+ * @param ts
+ * The ts argument specifies how long we can wait for the peer(s) to reply.
+ *
+ * @param clb
+ * The callback to trigger when all responses for this request have arrived.
+ *
+ * @return
+ * - On success, return 0.
+ * - On failure, return -1, and the reason will be stored in rte_errno.
+ */
+int __rte_experimental
+rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
+ rte_mp_async_reply_t clb);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
* Send a reply to the peer process.
*
* This function will send a reply message in response to a request message
@@ -452,25 +490,13 @@ static inline int rte_gettid(void)
enum rte_iova_mode rte_eal_iova_mode(void);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Get user provided pool ops name for mbuf
*
* @return
* returns user provided pool ops name.
*/
-const char * __rte_experimental
-rte_eal_mbuf_user_pool_ops(void);
-
-/**
- * Get default pool ops name for mbuf
- *
- * @return
- * returns default pool ops name.
- */
const char *
-rte_eal_mbuf_default_mempool_ops(void);
+rte_eal_mbuf_user_pool_ops(void);
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/common/include/rte_eal_interrupts.h b/lib/librte_eal/common/include/rte_eal_interrupts.h
index 3f792a97..6eb49327 100644
--- a/lib/librte_eal/common/include/rte_eal_interrupts.h
+++ b/lib/librte_eal/common/include/rte_eal_interrupts.h
@@ -34,6 +34,7 @@ enum rte_intr_handle_type {
RTE_INTR_HANDLE_ALARM, /**< alarm handle */
RTE_INTR_HANDLE_EXT, /**< external handler */
RTE_INTR_HANDLE_VDEV, /**< virtual device */
+ RTE_INTR_HANDLE_DEV_EVENT, /**< device event handle */
RTE_INTR_HANDLE_MAX /**< count of elements */
};
diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h
index 29fa0b60..aff0688d 100644
--- a/lib/librte_eal/common/include/rte_eal_memconfig.h
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -12,12 +12,31 @@
#include <rte_malloc_heap.h>
#include <rte_rwlock.h>
#include <rte_pause.h>
+#include <rte_fbarray.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
+ * memseg list is a special case as we need to store a bunch of other data
+ * together with the array itself.
+ */
+struct rte_memseg_list {
+ RTE_STD_C11
+ union {
+ void *base_va;
+ /**< Base virtual address for this memseg list. */
+ uint64_t addr_64;
+ /**< Makes sure addr is always 64-bits */
+ };
+ int socket_id; /**< Socket ID for all memsegs in this list. */
+ uint64_t page_sz; /**< Page size for all memsegs in this list. */
+ volatile uint32_t version; /**< version number for multiprocess sync. */
+ struct rte_fbarray memseg_arr;
+};
+
+/**
* the structure for the memory configuration for the RTE.
* Used by the rte_config structure. It is separated out, as for multi-process
* support, the memory details should be shared across instances
@@ -40,11 +59,14 @@ struct rte_mem_config {
rte_rwlock_t qlock; /**< used for tailq operation for thread safe. */
rte_rwlock_t mplock; /**< only used by mempool LIB for thread-safe. */
- uint32_t memzone_cnt; /**< Number of allocated memzones */
+ rte_rwlock_t memory_hotplug_lock;
+ /**< indicates whether memory hotplug request is in progress. */
/* memory segments and zones */
- struct rte_memseg memseg[RTE_MAX_MEMSEG]; /**< Physmem descriptors. */
- struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */
+ struct rte_fbarray memzones; /**< Memzone descriptors. */
+
+ struct rte_memseg_list memsegs[RTE_MAX_MEMSEG_LISTS];
+ /**< list of dynamic arrays holding memsegs */
struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */
diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h
new file mode 100644
index 00000000..5d880551
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_fbarray.h
@@ -0,0 +1,470 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Intel Corporation
+ */
+
+#ifndef RTE_FBARRAY_H
+#define RTE_FBARRAY_H
+
+/**
+ * @file
+ *
+ * File-backed shared indexed array for DPDK.
+ *
+ * Basic workflow is expected to be the following:
+ * 1) Allocate array either using ``rte_fbarray_init()`` or
+ * ``rte_fbarray_attach()`` (depending on whether it's shared between
+ * multiple DPDK processes)
+ * 2) find free spots using ``rte_fbarray_find_next_free()``
+ * 3) get pointer to data in the free spot using ``rte_fbarray_get()``, and
+ * copy data into the pointer (element size is fixed)
+ * 4) mark entry as used using ``rte_fbarray_set_used()``
+ *
+ * Calls to ``rte_fbarray_init()`` and ``rte_fbarray_destroy()`` will have
+ * consequences for all processes, while calls to ``rte_fbarray_attach()`` and
+ * ``rte_fbarray_detach()`` will only have consequences within a single process.
+ * Therefore, it is safe to call ``rte_fbarray_attach()`` or
+ * ``rte_fbarray_detach()`` while another process is using ``rte_fbarray``,
+ * provided no other thread within the same process will try to use
+ * ``rte_fbarray`` before attaching or after detaching. It is not safe to call
+ * ``rte_fbarray_init()`` or ``rte_fbarray_destroy()`` while another thread or
+ * another process is using ``rte_fbarray``.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include <rte_compat.h>
+#include <rte_rwlock.h>
+
+#define RTE_FBARRAY_NAME_LEN 64
+
+struct rte_fbarray {
+ char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with an array */
+ unsigned int count; /**< number of entries stored */
+ unsigned int len; /**< current length of the array */
+ unsigned int elt_sz; /**< size of each element */
+ void *data; /**< data pointer */
+ rte_rwlock_t rwlock; /**< multiprocess lock */
+};
+
+/**
+ * Set up ``rte_fbarray`` structure and allocate underlying resources.
+ *
+ * Call this function to correctly set up ``rte_fbarray`` and allocate
+ * underlying files that will be backing the data in the current process. Note
+ * that in order to use and share ``rte_fbarray`` between multiple processes,
+ * data pointed to by ``arr`` pointer must itself be allocated in shared memory.
+ *
+ * @param arr
+ * Valid pointer to allocated ``rte_fbarray`` structure.
+ *
+ * @param name
+ * Unique name to be assigned to this array.
+ *
+ * @param len
+ * Number of elements initially available in the array.
+ *
+ * @param elt_sz
+ * Size of each element.
+ *
+ * @return
+ * - 0 on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
+ unsigned int elt_sz);
+
+
+/**
+ * Attach to a file backing an already allocated and correctly set up
+ * ``rte_fbarray`` structure.
+ *
+ * Call this function to attach to file that will be backing the data in the
+ * current process. The structure must have been previously correctly set up
+ * with a call to ``rte_fbarray_init()``. Calls to ``rte_fbarray_attach()`` are
+ * usually meant to be performed in a multiprocessing scenario, with data
+ * pointed to by ``arr`` pointer allocated in shared memory.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up rte_fbarray structure.
+ *
+ * @return
+ * - 0 on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_attach(struct rte_fbarray *arr);
+
+
+/**
+ * Deallocate resources for an already allocated and correctly set up
+ * ``rte_fbarray`` structure, and remove the underlying file.
+ *
+ * Call this function to deallocate all resources associated with an
+ * ``rte_fbarray`` structure within the current process. This will also
+ * zero-fill data pointed to by ``arr`` pointer and remove the underlying file
+ * backing the data, so it is expected that by the time this function is called,
+ * all other processes have detached from this ``rte_fbarray``.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @return
+ * - 0 on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_destroy(struct rte_fbarray *arr);
+
+
+/**
+ * Deallocate resources for an already allocated and correctly set up
+ * ``rte_fbarray`` structure.
+ *
+ * Call this function to deallocate all resources associated with an
+ * ``rte_fbarray`` structure within current process.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @return
+ * - 0 on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_detach(struct rte_fbarray *arr);
+
+
+/**
+ * Get pointer to element residing at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ * Index of an element to get a pointer to.
+ *
+ * @return
+ * - non-NULL pointer on success.
+ * - NULL on failure, with ``rte_errno`` indicating reason for failure.
+ */
+void * __rte_experimental
+rte_fbarray_get(const struct rte_fbarray *arr, unsigned int idx);
+
+
+/**
+ * Find index of a specified element within the array.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param elt
+ * Pointer to element to find index to.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_idx(const struct rte_fbarray *arr, const void *elt);
+
+
+/**
+ * Mark specified element as used.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ * Element index to mark as used.
+ *
+ * @return
+ * - 0 on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_set_used(struct rte_fbarray *arr, unsigned int idx);
+
+
+/**
+ * Mark specified element as free.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ * Element index to mark as free.
+ *
+ * @return
+ * - 0 on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_set_free(struct rte_fbarray *arr, unsigned int idx);
+
+
+/**
+ * Check whether element at specified index is marked as used.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param idx
+ * Element index to check as used.
+ *
+ * @return
+ * - 1 if element is used.
+ * - 0 if element is unused.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx);
+
+
+/**
+ * Find index of next free element, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of next used element, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of next chunk of ``n`` free elements, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @param n
+ * Number of free elements to look for.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n);
+
+
+/**
+ * Find index of next chunk of ``n`` used elements, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @param n
+ * Number of used elements to look for.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n);
+
+
+/**
+ * Find how many more free entries there are, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_contig_free(struct rte_fbarray *arr,
+ unsigned int start);
+
+
+/**
+ * Find how many more used entries there are, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start);
+
+/**
+ * Find index of previous free element, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of previous used element, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find lowest start index of chunk of ``n`` free elements, down from specified
+ * index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @param n
+ * Number of free elements to look for.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n);
+
+
+/**
+ * Find lowest start index of chunk of ``n`` used elements, down from specified
+ * index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @param n
+ * Number of used elements to look for.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n);
+
+
+/**
+ * Find how many more free entries there are before specified index (like
+ * ``rte_fbarray_find_contig_free`` but going in reverse).
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr,
+ unsigned int start);
+
+
+/**
+ * Find how many more used entries there are before specified index (like
+ * ``rte_fbarray_find_contig_used`` but going in reverse).
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Dump ``rte_fbarray`` metadata.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param f
+ * File object to dump information into.
+ */
+void __rte_experimental
+rte_fbarray_dump_metadata(struct rte_fbarray *arr, FILE *f);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_FBARRAY_H */
diff --git a/lib/librte_eal/common/include/rte_hypervisor.h b/lib/librte_eal/common/include/rte_hypervisor.h
index 8d8aac74..5fe719c1 100644
--- a/lib/librte_eal/common/include/rte_hypervisor.h
+++ b/lib/librte_eal/common/include/rte_hypervisor.h
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2017 Mellanox Technologies, Ltd.
+ * Copyright 2017 Mellanox Technologies, Ltd
*/
#ifndef RTE_HYPERVISOR_H
diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h
index 04722203..6e09d918 100644
--- a/lib/librte_eal/common/include/rte_lcore.h
+++ b/lib/librte_eal/common/include/rte_lcore.h
@@ -119,7 +119,7 @@ rte_lcore_index(int lcore_id)
if (lcore_id >= RTE_MAX_LCORE)
return -1;
if (lcore_id < 0)
- lcore_id = rte_lcore_id();
+ lcore_id = (int)rte_lcore_id();
return lcore_config[lcore_id].core_index;
}
@@ -132,6 +132,36 @@ rte_lcore_index(int lcore_id)
unsigned rte_socket_id(void);
/**
+ * Return number of physical sockets detected on the system.
+ *
+ * Note that number of nodes may not be correspondent to their physical id's:
+ * for example, a system may report two socket id's, but the actual socket id's
+ * may be 0 and 8.
+ *
+ * @return
+ * the number of physical sockets as recognized by EAL
+ */
+unsigned int __rte_experimental
+rte_socket_count(void);
+
+/**
+ * Return socket id with a particular index.
+ *
+ * This will return socket id at a particular position in list of all detected
+ * physical socket id's. For example, on a machine with sockets [0, 8], passing
+ * 1 as a parameter will return 8.
+ *
+ * @param idx
+ * index of physical socket id to return
+ *
+ * @return
+ * - physical socket id as recognized by EAL
+ * - -1 on error, with errno set to EINVAL
+ */
+int __rte_experimental
+rte_socket_id_by_idx(unsigned int idx);
+
+/**
* Get the ID of the physical socket of the specified lcore
*
* @param lcore_id
@@ -247,6 +277,32 @@ void rte_thread_get_affinity(rte_cpuset_t *cpusetp);
int rte_thread_setname(pthread_t id, const char *name);
/**
+ * Create a control thread.
+ *
+ * Wrapper to pthread_create(), pthread_setname_np() and
+ * pthread_setaffinity_np(). The dataplane and service lcores are
+ * excluded from the affinity of the new thread.
+ *
+ * @param thread
+ * Filled with the thread id of the new created thread.
+ * @param name
+ * The name of the control thread (max 16 characters including '\0').
+ * @param attr
+ * Attributes for the new thread.
+ * @param start_routine
+ * Function to be executed by the new thread.
+ * @param arg
+ * Argument passed to start_routine.
+ * @return
+ * On success, returns 0; on error, it returns a negative value
+ * corresponding to the error number.
+ */
+__rte_experimental int
+rte_ctrl_thread_create(pthread_t *thread, const char *name,
+ const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg);
+
+/**
* Test if the core supplied has a specific role
*
* @param lcore_id
@@ -255,7 +311,7 @@ int rte_thread_setname(pthread_t id, const char *name);
* @param role
* The role to be checked against.
* @return
- * On success, return 0; otherwise return a negative value.
+ * Boolean value: positive if test is true; otherwise returns 0.
*/
int
rte_lcore_has_role(unsigned int lcore_id, enum rte_lcore_role_t role);
diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h
index 9029c785..2f789cb9 100644
--- a/lib/librte_eal/common/include/rte_log.h
+++ b/lib/librte_eal/common/include/rte_log.h
@@ -20,6 +20,7 @@ extern "C" {
#include <stdint.h>
#include <stdio.h>
#include <stdarg.h>
+#include <sys/queue.h>
#include <rte_common.h>
#include <rte_config.h>
@@ -129,16 +130,28 @@ uint32_t rte_log_get_global_level(void);
int rte_log_get_level(uint32_t logtype);
/**
- * Set the log level for a given type.
+ * Set the log level for a given type based on shell pattern.
*
* @param pattern
- * The regexp identifying the log type.
+ * The match pattern identifying the log type.
+ * @param level
+ * The level to be set.
+ * @return
+ * 0 on success, a negative value if level is invalid.
+ */
+int rte_log_set_level_pattern(const char *pattern, uint32_t level);
+
+/**
+ * Set the log level for a given type based on regular expression.
+ *
+ * @param regex
+ * The regular expression identifying the log type.
* @param level
* The level to be set.
* @return
* 0 on success, a negative value if level is invalid.
*/
-int rte_log_set_level_regexp(const char *pattern, uint32_t level);
+int rte_log_set_level_regexp(const char *regex, uint32_t level);
/**
* Set the log level for a given type.
@@ -195,6 +208,27 @@ int rte_log_cur_msg_logtype(void);
int rte_log_register(const char *name);
/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register a dynamic log type and try to pick its level from EAL options
+ *
+ * rte_log_register() is called inside. If successful, the function tries
+ * to search for matching regexp in the list of EAL log level options and
+ * pick the level from the last matching entry. If nothing can be applied
+ * from the list, the level will be set to the user-defined default value.
+ *
+ * @param name
+ * Name for the log type to be registered
+ * @param level_def
+ * Fallback level to be set if the global list has no matching options
+ * @return
+ * - >=0: the newly registered log type
+ * - <0: rte_log_register() error value
+ */
+int rte_log_register_type_and_pick_level(const char *name, uint32_t level_def);
+
+/**
* Dump log information.
*
* Dump the global level and the registered log types.
diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h
index f02a8ba1..a9fb7e45 100644
--- a/lib/librte_eal/common/include/rte_malloc.h
+++ b/lib/librte_eal/common/include/rte_malloc.h
@@ -13,6 +13,7 @@
#include <stdio.h>
#include <stddef.h>
+#include <rte_compat.h>
#include <rte_memory.h>
#ifdef __cplusplus
@@ -278,6 +279,15 @@ void
rte_malloc_dump_stats(FILE *f, const char *type);
/**
+ * Dump contents of all malloc heaps to a file.
+ *
+ * @param f
+ * A pointer to a file for output
+ */
+void __rte_experimental
+rte_malloc_dump_heaps(FILE *f);
+
+/**
* Set the maximum amount of allocated memory for this type.
*
* This is not yet implemented
diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h
index ba99ed90..d43fa909 100644
--- a/lib/librte_eal/common/include/rte_malloc_heap.h
+++ b/lib/librte_eal/common/include/rte_malloc_heap.h
@@ -13,12 +13,18 @@
/* Number of free lists per heap, grouped by size. */
#define RTE_HEAP_NUM_FREELISTS 13
+/* dummy definition, for pointers */
+struct malloc_elem;
+
/**
* Structure to hold malloc heap
*/
struct malloc_heap {
rte_spinlock_t lock;
LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS];
+ struct malloc_elem *volatile first;
+ struct malloc_elem *volatile last;
+
unsigned alloc_count;
size_t total_size;
} __rte_cache_aligned;
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index 302f865b..c4b7f4cf 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -20,8 +20,12 @@ extern "C" {
#endif
#include <rte_common.h>
+#include <rte_compat.h>
#include <rte_config.h>
+/* forward declaration for pointers */
+struct rte_memseg_list;
+
__extension__
enum rte_page_sizes {
RTE_PGSIZE_4K = 1ULL << 12,
@@ -79,6 +83,8 @@ typedef uint64_t rte_iova_t;
/**
* Physical memory segment descriptor.
*/
+#define RTE_MEMSEG_FLAG_DO_NOT_FREE (1 << 0)
+/**< Prevent this segment from being freed back to the OS. */
struct rte_memseg {
RTE_STD_C11
union {
@@ -95,6 +101,7 @@ struct rte_memseg {
int32_t socket_id; /**< NUMA socket ID. */
uint32_t nchannel; /**< Number of channels. */
uint32_t nrank; /**< Number of ranks. */
+ uint32_t flags; /**< Memseg-specific flags */
} __rte_packed;
/**
@@ -130,25 +137,192 @@ phys_addr_t rte_mem_virt2phy(const void *virt);
rte_iova_t rte_mem_virt2iova(const void *virt);
/**
- * Get the layout of the available physical memory.
+ * Get virtual memory address corresponding to iova address.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @param iova
+ * The iova address.
+ * @return
+ * Virtual address corresponding to iova address (or NULL if address does not
+ * exist within DPDK memory map).
+ */
+__rte_experimental void *
+rte_mem_iova2virt(rte_iova_t iova);
+
+/**
+ * Get memseg to which a particular virtual address belongs.
+ *
+ * @param virt
+ * The virtual address.
+ * @param msl
+ * The memseg list in which to look up based on ``virt`` address
+ * (can be NULL).
+ * @return
+ * Memseg pointer on success, or NULL on error.
+ */
+__rte_experimental struct rte_memseg *
+rte_mem_virt2memseg(const void *virt, const struct rte_memseg_list *msl);
+
+/**
+ * Get memseg list corresponding to virtual memory address.
+ *
+ * @param virt
+ * The virtual address.
+ * @return
+ * Memseg list to which this virtual address belongs to.
+ */
+__rte_experimental struct rte_memseg_list *
+rte_mem_virt2memseg_list(const void *virt);
+
+/**
+ * Memseg walk function prototype.
+ *
+ * Returning 0 will continue walk
+ * Returning 1 will stop the walk
+ * Returning -1 will stop the walk and report error
+ */
+typedef int (*rte_memseg_walk_t)(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, void *arg);
+
+/**
+ * Memseg contig walk function prototype. This will trigger a callback on every
+ * VA-contiguous are starting at memseg ``ms``, so total valid VA space at each
+ * callback call will be [``ms->addr``, ``ms->addr + len``).
+ *
+ * Returning 0 will continue walk
+ * Returning 1 will stop the walk
+ * Returning -1 will stop the walk and report error
+ */
+typedef int (*rte_memseg_contig_walk_t)(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, size_t len, void *arg);
+
+/**
+ * Memseg list walk function prototype. This will trigger a callback on every
+ * allocated memseg list.
+ *
+ * Returning 0 will continue walk
+ * Returning 1 will stop the walk
+ * Returning -1 will stop the walk and report error
+ */
+typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
+ void *arg);
+
+/**
+ * Walk list of all memsegs.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_walk(rte_memseg_walk_t func, void *arg);
+
+/**
+ * Walk each VA-contiguous area.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
+
+/**
+ * Walk each allocated memseg list.
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg);
+
+/**
+ * Walk list of all memsegs without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg);
+
+/**
+ * Walk each VA-contiguous area without performing any locking.
*
- * It can be useful for an application to have the full physical
- * memory layout to decide the size of a memory zone to reserve. This
- * table is stored in rte_config (see rte_eal_get_configuration()).
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
*
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
* @return
- * - On success, return a pointer to a read-only table of struct
- * rte_physmem_desc elements, containing the layout of all
- * addressable physical memory. The last element of the table
- * contains a NULL address.
- * - On error, return NULL. This should not happen since it is a fatal
- * error that will probably cause the entire system to panic.
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
*/
-const struct rte_memseg *rte_eal_get_physmem_layout(void);
+int __rte_experimental
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg);
+
+/**
+ * Walk each allocated memseg list without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
/**
* Dump the physical memory layout to a file.
*
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
* @param f
* A pointer to a file for output
*/
@@ -157,6 +331,9 @@ void rte_dump_physmem_layout(FILE *f);
/**
* Get the total amount of available physical memory.
*
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ * be used within memory-related callback functions.
+ *
* @return
* The total amount of available physical memory in bytes.
*/
@@ -191,6 +368,137 @@ unsigned rte_memory_get_nrank(void);
*/
int rte_eal_using_phys_addrs(void);
+
+/**
+ * Enum indicating which kind of memory event has happened. Used by callbacks to
+ * distinguish between memory allocations and deallocations.
+ */
+enum rte_mem_event {
+ RTE_MEM_EVENT_ALLOC = 0, /**< Allocation event. */
+ RTE_MEM_EVENT_FREE, /**< Deallocation event. */
+};
+#define RTE_MEM_EVENT_CALLBACK_NAME_LEN 64
+/**< maximum length of callback name */
+
+/**
+ * Function typedef used to register callbacks for memory events.
+ */
+typedef void (*rte_mem_event_callback_t)(enum rte_mem_event event_type,
+ const void *addr, size_t len, void *arg);
+
+/**
+ * Function used to register callbacks for memory events.
+ *
+ * @note callbacks will happen while memory hotplug subsystem is write-locked,
+ * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
+ * deadlock when called from within such callbacks.
+ *
+ * @note mem event callbacks not being supported is an expected error condition,
+ * so user code needs to handle this situation. In these cases, return
+ * value will be -1, and rte_errno will be set to ENOTSUP.
+ *
+ * @param name
+ * Name associated with specified callback to be added to the list.
+ *
+ * @param clb
+ * Callback function pointer.
+ *
+ * @param arg
+ * Argument to pass to the callback.
+ *
+ * @return
+ * 0 on successful callback register
+ * -1 on unsuccessful callback register, with rte_errno value indicating
+ * reason for failure.
+ */
+int __rte_experimental
+rte_mem_event_callback_register(const char *name, rte_mem_event_callback_t clb,
+ void *arg);
+
+/**
+ * Function used to unregister callbacks for memory events.
+ *
+ * @param name
+ * Name associated with specified callback to be removed from the list.
+ *
+ * @param arg
+ * Argument to look for among callbacks with specified callback name.
+ *
+ * @return
+ * 0 on successful callback unregister
+ * -1 on unsuccessful callback unregister, with rte_errno value indicating
+ * reason for failure.
+ */
+int __rte_experimental
+rte_mem_event_callback_unregister(const char *name, void *arg);
+
+
+#define RTE_MEM_ALLOC_VALIDATOR_NAME_LEN 64
+/**< maximum length of alloc validator name */
+/**
+ * Function typedef used to register memory allocation validation callbacks.
+ *
+ * Returning 0 will allow allocation attempt to continue. Returning -1 will
+ * prevent allocation from succeeding.
+ */
+typedef int (*rte_mem_alloc_validator_t)(int socket_id,
+ size_t cur_limit, size_t new_len);
+
+/**
+ * @brief Register validator callback for memory allocations.
+ *
+ * Callbacks registered by this function will be called right before memory
+ * allocator is about to trigger allocation of more pages from the system if
+ * said allocation will bring total memory usage above specified limit on
+ * specified socket. User will be able to cancel pending allocation if callback
+ * returns -1.
+ *
+ * @note callbacks will happen while memory hotplug subsystem is write-locked,
+ * therefore some functions (e.g. `rte_memseg_walk()`) will cause a
+ * deadlock when called from within such callbacks.
+ *
+ * @note validator callbacks not being supported is an expected error condition,
+ * so user code needs to handle this situation. In these cases, return
+ * value will be -1, and rte_errno will be set to ENOTSUP.
+ *
+ * @param name
+ * Name associated with specified callback to be added to the list.
+ *
+ * @param clb
+ * Callback function pointer.
+ *
+ * @param socket_id
+ * Socket ID on which to watch for allocations.
+ *
+ * @param limit
+ * Limit above which to trigger callbacks.
+ *
+ * @return
+ * 0 on successful callback register
+ * -1 on unsuccessful callback register, with rte_errno value indicating
+ * reason for failure.
+ */
+int __rte_experimental
+rte_mem_alloc_validator_register(const char *name,
+ rte_mem_alloc_validator_t clb, int socket_id, size_t limit);
+
+/**
+ * @brief Unregister validator callback for memory allocations.
+ *
+ * @param name
+ * Name associated with specified callback to be removed from the list.
+ *
+ * @param socket_id
+ * Socket ID on which to watch for allocations.
+ *
+ * @return
+ * 0 on successful callback unregister
+ * -1 on unsuccessful callback unregister, with rte_errno value indicating
+ * reason for failure.
+ */
+int __rte_experimental
+rte_mem_alloc_validator_unregister(const char *name, int socket_id);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h
index 2bfb2731..f478fa9e 100644
--- a/lib/librte_eal/common/include/rte_memzone.h
+++ b/lib/librte_eal/common/include/rte_memzone.h
@@ -23,6 +23,7 @@
*/
#include <stdio.h>
+#include <rte_compat.h>
#include <rte_memory.h>
#include <rte_common.h>
@@ -39,6 +40,7 @@ extern "C" {
#define RTE_MEMZONE_512MB 0x00040000 /**< Use 512MB pages. */
#define RTE_MEMZONE_4GB 0x00080000 /**< Use 4GB pages. */
#define RTE_MEMZONE_SIZE_HINT_ONLY 0x00000004 /**< Use available page size */
+#define RTE_MEMZONE_IOVA_CONTIG 0x00100000 /**< Ask for IOVA-contiguous memzone. */
/**
* A structure describing a memzone, which is a contiguous portion of
@@ -66,7 +68,6 @@ struct rte_memzone {
int32_t socket_id; /**< NUMA socket ID. */
uint32_t flags; /**< Characteristics of this memzone. */
- uint32_t memseg_id; /**< Memseg it belongs. */
} __attribute__((__packed__));
/**
@@ -76,6 +77,17 @@ struct rte_memzone {
* correctly filled memzone descriptor. If the allocation cannot be
* done, return NULL.
*
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ * memzones from memory that is already available. It will not trigger any
+ * new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
+ *
* @param name
* The name of the memzone. If it already exists, the function will
* fail and return NULL.
@@ -102,6 +114,9 @@ struct rte_memzone {
* If this flag is not set, the function
* will return error on an unavailable size
* request.
+ * - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ * This option should be used when allocating
+ * memory intended for hardware rings etc.
* @return
* A pointer to a correctly-filled read-only memzone descriptor, or NULL
* on error.
@@ -126,6 +141,17 @@ const struct rte_memzone *rte_memzone_reserve(const char *name,
* descriptor. If the allocation cannot be done or if the alignment
* is not a power of 2, returns NULL.
*
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ * memzones from memory that is already available. It will not trigger any
+ * new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
+ *
* @param name
* The name of the memzone. If it already exists, the function will
* fail and return NULL.
@@ -152,6 +178,9 @@ const struct rte_memzone *rte_memzone_reserve(const char *name,
* If this flag is not set, the function
* will return error on an unavailable size
* request.
+ * - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ * This option should be used when allocating
+ * memory intended for hardware rings etc.
* @param align
* Alignment for resulting memzone. Must be a power of 2.
* @return
@@ -181,6 +210,17 @@ const struct rte_memzone *rte_memzone_reserve_aligned(const char *name,
* boundary. That implies that requested length should be less or equal
* then boundary.
*
+ * @note Reserving memzones with len set to 0 will only attempt to allocate
+ * memzones from memory that is already available. It will not trigger any
+ * new allocations.
+ *
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
+ *
* @param name
* The name of the memzone. If it already exists, the function will
* fail and return NULL.
@@ -207,6 +247,9 @@ const struct rte_memzone *rte_memzone_reserve_aligned(const char *name,
* If this flag is not set, the function
* will return error on an unavailable size
* request.
+ * - RTE_MEMZONE_IOVA_CONTIG - Ensure reserved memzone is IOVA-contiguous.
+ * This option should be used when allocating
+ * memory intended for hardware rings etc.
* @param align
* Alignment for resulting memzone. Must be a power of 2.
* @param bound
diff --git a/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h b/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h
index 08222510..e12c2208 100644
--- a/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h
+++ b/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h
@@ -1,59 +1,5 @@
-/*-
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- * The full GNU General Public License is included in this distribution
- * in the file called LICENSE.GPL.
- *
- * Contact Information:
- * Intel Corporation
- *
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+ * Copyright(c) 2010-2014 Intel Corporation
*/
#ifndef _RTE_PCI_DEV_DEFS_H_
diff --git a/lib/librte_eal/common/include/rte_pci_dev_features.h b/lib/librte_eal/common/include/rte_pci_dev_features.h
index 67b986a6..6104123d 100644
--- a/lib/librte_eal/common/include/rte_pci_dev_features.h
+++ b/lib/librte_eal/common/include/rte_pci_dev_features.h
@@ -1,59 +1,5 @@
-/*-
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- * The full GNU General Public License is included in this distribution
- * in the file called LICENSE.GPL.
- *
- * Contact Information:
- * Intel Corporation
- *
- * BSD LICENSE
- *
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of Intel Corporation nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+ * Copyright(c) 2010-2014 Intel Corporation
*/
#ifndef _RTE_PCI_DEV_FEATURES_H
diff --git a/lib/librte_eal/common/include/rte_random.h b/lib/librte_eal/common/include/rte_random.h
index 63bb2808..b2ca1c20 100644
--- a/lib/librte_eal/common/include/rte_random.h
+++ b/lib/librte_eal/common/include/rte_random.h
@@ -31,7 +31,7 @@ extern "C" {
static inline void
rte_srand(uint64_t seedval)
{
- srand48((long unsigned int)seedval);
+ srand48((long)seedval);
}
/**
@@ -48,9 +48,9 @@ static inline uint64_t
rte_rand(void)
{
uint64_t val;
- val = lrand48();
+ val = (uint64_t)lrand48();
val <<= 32;
- val += lrand48();
+ val += (uint64_t)lrand48();
return val;
}
diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h
index 211eb376..34b41aff 100644
--- a/lib/librte_eal/common/include/rte_service.h
+++ b/lib/librte_eal/common/include/rte_service.h
@@ -47,9 +47,6 @@ extern "C" {
#define RTE_SERVICE_CAP_MT_SAFE (1 << 0)
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Return the number of services registered.
*
* The number of services registered can be passed to *rte_service_get_by_id*,
@@ -57,12 +54,9 @@ extern "C" {
*
* @return The number of services registered.
*/
-uint32_t __rte_experimental rte_service_get_count(void);
+uint32_t rte_service_get_count(void);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Return the id of a service by name.
*
* This function provides the id of the service using the service name as
@@ -84,24 +78,17 @@ uint32_t __rte_experimental rte_service_get_count(void);
* @retval -EINVAL Null *service_id* pointer provided
* @retval -ENODEV No such service registered
*/
-int32_t __rte_experimental rte_service_get_by_name(const char *name,
- uint32_t *service_id);
+int32_t rte_service_get_by_name(const char *name, uint32_t *service_id);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Return the name of the service.
*
* @return A pointer to the name of the service. The returned pointer remains
* in ownership of the service, and the application must not free it.
*/
-const char __rte_experimental *rte_service_get_name(uint32_t id);
+const char *rte_service_get_name(uint32_t id);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Check if a service has a specific capability.
*
* This function returns if *service* has implements *capability*.
@@ -109,13 +96,9 @@ const char __rte_experimental *rte_service_get_name(uint32_t id);
* @retval 1 Capability supported by this service instance
* @retval 0 Capability not supported by this service instance
*/
-int32_t __rte_experimental rte_service_probe_capability(uint32_t id,
- uint32_t capability);
+int32_t rte_service_probe_capability(uint32_t id, uint32_t capability);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Map or unmap a lcore to a service.
*
* Each core can be added or removed from running a specific service. This
@@ -134,13 +117,10 @@ int32_t __rte_experimental rte_service_probe_capability(uint32_t id,
* @retval 0 lcore map updated successfully
* @retval -EINVAL An invalid service or lcore was provided.
*/
-int32_t __rte_experimental rte_service_map_lcore_set(uint32_t service_id,
- uint32_t lcore, uint32_t enable);
+int32_t rte_service_map_lcore_set(uint32_t service_id, uint32_t lcore,
+ uint32_t enable);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Retrieve the mapping of an lcore to a service.
*
* @param service_id the service to apply the lcore to
@@ -150,13 +130,9 @@ int32_t __rte_experimental rte_service_map_lcore_set(uint32_t service_id,
* @retval 0 lcore is not mapped to service
* @retval -EINVAL An invalid service or lcore was provided.
*/
-int32_t __rte_experimental rte_service_map_lcore_get(uint32_t service_id,
- uint32_t lcore);
+int32_t rte_service_map_lcore_get(uint32_t service_id, uint32_t lcore);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Set the runstate of the service.
*
* Each service is either running or stopped. Setting a non-zero runstate
@@ -168,12 +144,9 @@ int32_t __rte_experimental rte_service_map_lcore_get(uint32_t service_id,
* @retval 0 The service was successfully started
* @retval -EINVAL Invalid service id
*/
-int32_t __rte_experimental rte_service_runstate_set(uint32_t id, uint32_t runstate);
+int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Get the runstate for the service with *id*. See *rte_service_runstate_set*
* for details of runstates. A service can call this function to ensure that
* the application has indicated that it will receive CPU cycles. Either a
@@ -186,12 +159,29 @@ int32_t __rte_experimental rte_service_runstate_set(uint32_t id, uint32_t runsta
* @retval 0 Service is stopped
* @retval -EINVAL Invalid service id
*/
-int32_t __rte_experimental rte_service_runstate_get(uint32_t id);
+int32_t rte_service_runstate_get(uint32_t id);
/**
* @warning
- * @b EXPERIMENTAL: this API may change without prior notice
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * This function returns whether the service may be currently executing on
+ * at least one lcore, or definitely is not. This function can be used to
+ * determine if, after setting the service runstate to stopped, the service
+ * is still executing a service lcore.
*
+ * Care must be taken if calling this function when the service runstate is
+ * running, since the result of this function may be incorrect by the time the
+ * function returns due to service cores running in parallel.
+ *
+ * @retval 1 Service may be running on one or more lcores
+ * @retval 0 Service is not running on any lcore
+ * @retval -EINVAL Invalid service id
+ */
+int32_t __rte_experimental
+rte_service_may_be_active(uint32_t id);
+
+/**
* Enable or disable the check for a service-core being mapped to the service.
* An application can disable the check when takes the responsibility to run a
* service itself using *rte_service_run_iter_on_app_lcore*.
@@ -202,13 +192,9 @@ int32_t __rte_experimental rte_service_runstate_get(uint32_t id);
* @retval 0 Success
* @retval -EINVAL Invalid service ID
*/
-int32_t __rte_experimental rte_service_set_runstate_mapped_check(uint32_t id,
- int32_t enable);
+int32_t rte_service_set_runstate_mapped_check(uint32_t id, int32_t enable);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* This function runs a service callback from a non-service lcore.
*
* This function is designed to enable gradual porting to service cores, and
@@ -241,13 +227,10 @@ int32_t __rte_experimental rte_service_set_runstate_mapped_check(uint32_t id,
* @retval -ENOEXEC Service is not in a run-able state
* @retval -EINVAL Invalid service id
*/
-int32_t __rte_experimental rte_service_run_iter_on_app_lcore(uint32_t id,
+int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
uint32_t serialize_multithread_unsafe);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Start a service core.
*
* Starting a core makes the core begin polling. Any services assigned to it
@@ -259,12 +242,9 @@ int32_t __rte_experimental rte_service_run_iter_on_app_lcore(uint32_t id,
* @retval -EINVAL Failed to start core. The *lcore_id* passed in is not
* currently assigned to be a service core.
*/
-int32_t __rte_experimental rte_service_lcore_start(uint32_t lcore_id);
+int32_t rte_service_lcore_start(uint32_t lcore_id);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Stop a service core.
*
* Stopping a core makes the core become idle, but remains assigned as a
@@ -278,12 +258,9 @@ int32_t __rte_experimental rte_service_lcore_start(uint32_t lcore_id);
* The application must stop the service first, and then stop the
* lcore.
*/
-int32_t __rte_experimental rte_service_lcore_stop(uint32_t lcore_id);
+int32_t rte_service_lcore_stop(uint32_t lcore_id);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Adds lcore to the list of service cores.
*
* This functions can be used at runtime in order to modify the service core
@@ -294,12 +271,9 @@ int32_t __rte_experimental rte_service_lcore_stop(uint32_t lcore_id);
* @retval -EALREADY lcore is already added to the service core list
* @retval -EINVAL Invalid lcore provided
*/
-int32_t __rte_experimental rte_service_lcore_add(uint32_t lcore);
+int32_t rte_service_lcore_add(uint32_t lcore);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Removes lcore from the list of service cores.
*
* This can fail if the core is not stopped, see *rte_service_core_stop*.
@@ -308,12 +282,9 @@ int32_t __rte_experimental rte_service_lcore_add(uint32_t lcore);
* @retval -EBUSY Lcore is not stopped, stop service core before removing.
* @retval -EINVAL failed to add lcore to service core mask.
*/
-int32_t __rte_experimental rte_service_lcore_del(uint32_t lcore);
+int32_t rte_service_lcore_del(uint32_t lcore);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Retrieve the number of service cores currently available.
*
* This function returns the integer count of service cores available. The
@@ -325,24 +296,18 @@ int32_t __rte_experimental rte_service_lcore_del(uint32_t lcore);
*
* @return The number of service cores currently configured.
*/
-int32_t __rte_experimental rte_service_lcore_count(void);
+int32_t rte_service_lcore_count(void);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Resets all service core mappings. This does not remove the service cores
* from duty, just unmaps all services / cores, and stops() the service cores.
* The runstate of services is not modified.
*
* @retval 0 Success
*/
-int32_t __rte_experimental rte_service_lcore_reset_all(void);
+int32_t rte_service_lcore_reset_all(void);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Enable or disable statistics collection for *service*.
*
* This function enables per core, per-service cycle count collection.
@@ -351,13 +316,9 @@ int32_t __rte_experimental rte_service_lcore_reset_all(void);
* @retval 0 Success
* @retval -EINVAL Invalid service pointer passed
*/
-int32_t __rte_experimental rte_service_set_stats_enable(uint32_t id,
- int32_t enable);
+int32_t rte_service_set_stats_enable(uint32_t id, int32_t enable);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Retrieve the list of currently enabled service cores.
*
* This function fills in an application supplied array, with each element
@@ -373,12 +334,9 @@ int32_t __rte_experimental rte_service_set_stats_enable(uint32_t id,
* service core list. No items have been populated, call this function
* with a size of at least *rte_service_core_count* items.
*/
-int32_t __rte_experimental rte_service_lcore_list(uint32_t array[], uint32_t n);
+int32_t rte_service_lcore_list(uint32_t array[], uint32_t n);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Get the numer of services running on the supplied lcore.
*
* @param lcore Id of the service core.
@@ -386,19 +344,16 @@ int32_t __rte_experimental rte_service_lcore_list(uint32_t array[], uint32_t n);
* @retval -EINVAL Invalid lcore provided
* @retval -ENOTSUP The provided lcore is not a service core.
*/
-int32_t __rte_experimental rte_service_lcore_count_services(uint32_t lcore);
+int32_t rte_service_lcore_count_services(uint32_t lcore);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Dumps any information available about the service. When id is UINT32_MAX,
* this function dumps info for all services.
*
* @retval 0 Statistics have been successfully dumped
* @retval -EINVAL Invalid service id provided
*/
-int32_t __rte_experimental rte_service_dump(FILE *f, uint32_t id);
+int32_t rte_service_dump(FILE *f, uint32_t id);
/**
* Returns the number of cycles that this service has consumed
@@ -411,28 +366,58 @@ int32_t __rte_experimental rte_service_dump(FILE *f, uint32_t id);
#define RTE_SERVICE_ATTR_CALL_COUNT 1
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Get an attribute from a service.
*
* @retval 0 Success, the attribute value has been written to *attr_value*.
* -EINVAL Invalid id, attr_id or attr_value was NULL.
*/
-int32_t __rte_experimental rte_service_attr_get(uint32_t id, uint32_t attr_id,
+int32_t rte_service_attr_get(uint32_t id, uint32_t attr_id,
uint32_t *attr_value);
/**
+ * Reset all attribute values of a service.
+ *
+ * @param id The service to reset all statistics of
+ * @retval 0 Successfully reset attributes
+ * -EINVAL Invalid service id provided
+ */
+int32_t rte_service_attr_reset_all(uint32_t id);
+
+/**
+ * Returns the number of times the service runner has looped.
+ */
+#define RTE_SERVICE_LCORE_ATTR_LOOPS 0
+
+/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
- * Reset all attribute values of a service.
+ * Get an attribute from a service core.
*
- * @param id The service to reset all statistics of
+ * @param lcore Id of the service core.
+ * @param attr_id Id of the attribute to be retrieved.
+ * @param [out] attr_value Pointer to storage in which to write retrieved value.
+ * @retval 0 Success, the attribute value has been written to *attr_value*.
+ * -EINVAL Invalid lcore, attr_id or attr_value was NULL.
+ * -ENOTSUP lcore is not a service core.
+ */
+int32_t __rte_experimental
+rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
+ uint64_t *attr_value);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reset all attribute values of a service core.
+ *
+ * @param lcore The service core to reset all the statistics of
* @retval 0 Successfully reset attributes
* -EINVAL Invalid service id provided
+ * -ENOTSUP lcore is not a service core.
*/
-int32_t __rte_experimental rte_service_attr_reset_all(uint32_t id);
+int32_t __rte_experimental
+rte_service_lcore_attr_reset_all(uint32_t lcore);
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/common/include/rte_service_component.h b/lib/librte_eal/common/include/rte_service_component.h
index 9ba4aa29..c12adbc2 100644
--- a/lib/librte_eal/common/include/rte_service_component.h
+++ b/lib/librte_eal/common/include/rte_service_component.h
@@ -13,17 +13,11 @@
#include <rte_service.h>
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Signature of callback function to run a service.
*/
typedef int32_t (*rte_service_func)(void *args);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* The specification of a service.
*
* This struct contains metadata about the service itself, the callback
@@ -47,9 +41,6 @@ struct rte_service_spec {
};
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Register a new service.
*
* A service represents a component that the requires CPU time periodically to
@@ -73,14 +64,10 @@ struct rte_service_spec {
* -EINVAL Attempted to register an invalid service (eg, no callback
* set)
*/
-int32_t __rte_experimental
-rte_service_component_register(const struct rte_service_spec *spec,
- uint32_t *service_id);
+int32_t rte_service_component_register(const struct rte_service_spec *spec,
+ uint32_t *service_id);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Unregister a service component.
*
* The service being removed must be stopped before calling this function.
@@ -89,12 +76,9 @@ rte_service_component_register(const struct rte_service_spec *spec,
* @retval -EBUSY The service is currently running, stop the service before
* calling unregister. No action has been taken.
*/
-int32_t __rte_experimental rte_service_component_unregister(uint32_t id);
+int32_t rte_service_component_unregister(uint32_t id);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Private function to allow EAL to initialized default mappings.
*
* This function iterates all the services, and maps then to the available
@@ -107,12 +91,9 @@ int32_t __rte_experimental rte_service_component_unregister(uint32_t id);
* @retval -ENODEV Error in enabling service lcore on a service
* @retval -ENOEXEC Error when starting services
*/
-int32_t __rte_experimental rte_service_start_with_defaults(void);
+int32_t rte_service_start_with_defaults(void);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Set the backend runstate of a component.
*
* This function allows services to be registered at startup, but not yet
@@ -124,13 +105,9 @@ int32_t __rte_experimental rte_service_start_with_defaults(void);
*
* @retval 0 Success
*/
-int32_t __rte_experimental rte_service_component_runstate_set(uint32_t id,
- uint32_t runstate);
+int32_t rte_service_component_runstate_set(uint32_t id, uint32_t runstate);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Initialize the service library.
*
* In order to use the service library, it must be initialized. EAL initializes
@@ -142,14 +119,11 @@ int32_t __rte_experimental rte_service_component_runstate_set(uint32_t id,
int32_t rte_service_init(void);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* @internal Free up the memory that has been initialized.
* This routine is to be invoked prior to process termination.
*
* @retval None
*/
-void __rte_experimental rte_service_finalize(void);
+void rte_service_finalize(void);
#endif /* _RTE_SERVICE_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/include/rte_string_fns.h b/lib/librte_eal/common/include/rte_string_fns.h
index e97047a4..97597a14 100644
--- a/lib/librte_eal/common/include/rte_string_fns.h
+++ b/lib/librte_eal/common/include/rte_string_fns.h
@@ -15,6 +15,8 @@
extern "C" {
#endif
+#include <stdio.h>
+
/**
* Takes string "string" parameter and splits it at character "delim"
* up to maxtokens-1 times - to give "maxtokens" resulting tokens. Like
@@ -45,6 +47,35 @@ int
rte_strsplit(char *string, int stringlen,
char **tokens, int maxtokens, char delim);
+/**
+ * @internal
+ * DPDK-specific version of strlcpy for systems without
+ * libc or libbsd copies of the function
+ */
+static inline size_t
+rte_strlcpy(char *dst, const char *src, size_t size)
+{
+ return (size_t)snprintf(dst, size, "%s", src);
+}
+
+/* pull in a strlcpy function */
+#ifdef RTE_EXEC_ENV_BSDAPP
+#include <string.h>
+#ifndef __BSD_VISIBLE /* non-standard functions are hidden */
+#define strlcpy(dst, src, size) rte_strlcpy(dst, src, size)
+#endif
+
+
+#else /* non-BSD platforms */
+#ifdef RTE_USE_LIBBSD
+#include <bsd/string.h>
+
+#else /* no BSD header files, create own */
+#define strlcpy(dst, src, size) rte_strlcpy(dst, src, size)
+
+#endif /* RTE_USE_LIBBSD */
+#endif /* BSDAPP */
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h
index 8dccaefc..9b01abb2 100644
--- a/lib/librte_eal/common/include/rte_tailq.h
+++ b/lib/librte_eal/common/include/rte_tailq.h
@@ -119,8 +119,7 @@ struct rte_tailq_head *rte_eal_tailq_lookup(const char *name);
int rte_eal_tailq_register(struct rte_tailq_elem *t);
#define EAL_REGISTER_TAILQ(t) \
-RTE_INIT(tailqinitfn_ ##t); \
-static void tailqinitfn_ ##t(void) \
+RTE_INIT(tailqinitfn_ ##t) \
{ \
if (rte_eal_tailq_register(&t) < 0) \
rte_panic("Cannot initialize tailq: %s\n", t.name); \
diff --git a/lib/librte_eal/common/include/rte_uuid.h b/lib/librte_eal/common/include/rte_uuid.h
new file mode 100644
index 00000000..2c846b5f
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_uuid.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 1996, 1997, 1998 Theodore Ts'o.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+/**
+ * @file
+ *
+ * UUID related functions originally from libuuid
+ */
+
+#ifndef _RTE_UUID_H_
+#define _RTE_UUID_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+
+/**
+ * Struct describing a Universal Unique Identifer
+ */
+typedef unsigned char rte_uuid_t[16];
+
+/**
+ * Helper for defining UUID values for id tables.
+ */
+#define RTE_UUID_INIT(a, b, c, d, e) { \
+ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, \
+ ((a) >> 8) & 0xff, (a) & 0xff, \
+ ((b) >> 8) & 0xff, (b) & 0xff, \
+ ((c) >> 8) & 0xff, (c) & 0xff, \
+ ((d) >> 8) & 0xff, (d) & 0xff, \
+ ((e) >> 40) & 0xff, ((e) >> 32) & 0xff, \
+ ((e) >> 24) & 0xff, ((e) >> 16) & 0xff, \
+ ((e) >> 8) & 0xff, (e) & 0xff \
+}
+
+/**
+ * Test if UUID is all zeros.
+ *
+ * @param uu
+ * The uuid to check.
+ * @return
+ * true if uuid is NULL value, false otherwise
+ */
+bool rte_uuid_is_null(const rte_uuid_t uu);
+
+/**
+ * Copy uuid.
+ *
+ * @param dst
+ * Destination uuid
+ * @param src
+ * Source uuid
+ */
+static inline void rte_uuid_copy(rte_uuid_t dst, const rte_uuid_t src)
+{
+ memcpy(dst, src, sizeof(rte_uuid_t));
+}
+
+/**
+ * Compare two UUID's
+ *
+ * @param a
+ * A UUID to compare
+ * @param b
+ * A UUID to compare
+ * @return
+ * returns an integer less than, equal to, or greater than zero if UUID a is
+ * is less than, equal, or greater than UUID b.
+ */
+int rte_uuid_compare(const rte_uuid_t a, const rte_uuid_t b);
+
+/**
+ * Extract UUID from string
+ *
+ * @param in
+ * Pointer to string of characters to convert
+ * @param uu
+ * Destination UUID
+ * @return
+ * Returns 0 on succes, and -1 if string is not a valid UUID.
+ */
+int rte_uuid_parse(const char *in, rte_uuid_t uu);
+
+/**
+ * Convert UUID to string
+ *
+ * @param uu
+ * UUID to format
+ * @param out
+ * Resulting string buffer
+ * @param len
+ * Sizeof the available string buffer
+ */
+#define RTE_UUID_STRLEN (36 + 1)
+void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_UUID_H */
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index 8173802b..7c6714a2 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -32,7 +32,7 @@ extern "C" {
/**
* Minor version/month number i.e. the mm in yy.mm.z
*/
-#define RTE_VER_MONTH 02
+#define RTE_VER_MONTH 8
/**
* Patch level number i.e. the z in yy.mm.z
diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
index e981a622..5ca13fcc 100644
--- a/lib/librte_eal/common/include/rte_vfio.h
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -5,6 +5,15 @@
#ifndef _RTE_VFIO_H_
#define _RTE_VFIO_H_
+/**
+ * @file
+ * RTE VFIO. This library provides various VFIO related utility functions.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
/*
* determine if VFIO is present on the system
*/
@@ -28,6 +37,20 @@
#define VFIO_NOIOMMU_MODE \
"/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"
+/* NOIOMMU is defined from kernel version 4.5 onwards */
+#ifdef VFIO_NOIOMMU_IOMMU
+#define RTE_VFIO_NOIOMMU VFIO_NOIOMMU_IOMMU
+#else
+#define RTE_VFIO_NOIOMMU 8
+#endif
+
+#else /* not VFIO_PRESENT */
+
+/* we don't need an actual definition, only pointer is used */
+struct vfio_device_info;
+
+#endif /* VFIO_PRESENT */
+
/**
* Setup vfio_cfg for the device identified by its address.
* It discovers the configured I/O MMU groups or sets a new one for the device.
@@ -119,10 +142,226 @@ int rte_vfio_is_enabled(const char *modname);
*/
int rte_vfio_noiommu_is_enabled(void);
-/* remove group fd from internal VFIO group fd array */
+/**
+ * Remove group fd from internal VFIO group fd array/
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param vfio_group_fd
+ * VFIO Grouup FD.
+ *
+ * @return
+ * 0 on success.
+ * <0 on failure.
+ */
int
rte_vfio_clear_group(int vfio_group_fd);
-#endif /* VFIO_PRESENT */
+/**
+ * Map memory region for use with VFIO.
+ *
+ * @note Require at least one device to be attached at the time of
+ * mapping. DMA maps done via this API will only apply to default
+ * container and will not apply to any of the containers created
+ * via rte_vfio_container_create().
+ *
+ * @param vaddr
+ * Starting virtual address of memory to be mapped.
+ *
+ * @param iova
+ * Starting IOVA address of memory to be mapped.
+ *
+ * @param len
+ * Length of memory segment being mapped.
+ *
+ * @return
+ * 0 if success.
+ * -1 on error.
+ */
+int
+rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
+
+
+/**
+ * Unmap memory region from VFIO.
+ *
+ * @param vaddr
+ * Starting virtual address of memory to be unmapped.
+ *
+ * @param iova
+ * Starting IOVA address of memory to be unmapped.
+ *
+ * @param len
+ * Length of memory segment being unmapped.
+ *
+ * @return
+ * 0 if success.
+ * -1 on error.
+ */
+
+int
+rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
+/**
+ * Parse IOMMU group number for a device
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param sysfs_base
+ * sysfs path prefix.
+ *
+ * @param dev_addr
+ * device location.
+ *
+ * @param iommu_group_num
+ * iommu group number
+ *
+ * @return
+ * >0 on success
+ * 0 for non-existent group or VFIO
+ * <0 for errors
+ */
+int
+rte_vfio_get_group_num(const char *sysfs_base,
+ const char *dev_addr, int *iommu_group_num);
+
+/**
+ * Open VFIO container fd or get an existing one
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @return
+ * > 0 container fd
+ * < 0 for errors
+ */
+int
+rte_vfio_get_container_fd(void);
+
+/**
+ * Open VFIO group fd or get an existing one
+ *
+ * This function is only relevant to linux and will return
+ * an error on BSD.
+ *
+ * @param iommu_group_num
+ * iommu group number
+ *
+ * @return
+ * > 0 group fd
+ * < 0 for errors
+ */
+int
+rte_vfio_get_group_fd(int iommu_group_num);
+
+/**
+ * Create a new container for device binding.
+ *
+ * @note Any newly allocated DPDK memory will not be mapped into these
+ * containers by default, user needs to manage DMA mappings for
+ * any container created by this API.
+ *
+ * @return
+ * the container fd if successful
+ * <0 if failed
+ */
+int
+rte_vfio_container_create(void);
+
+/**
+ * Destroy the container, unbind all vfio groups within it.
+ *
+ * @param container_fd
+ * the container fd to destroy
+ *
+ * @return
+ * 0 if successful
+ * <0 if failed
+ */
+int
+rte_vfio_container_destroy(int container_fd);
+
+/**
+ * Bind a IOMMU group to a container.
+ *
+ * @param container_fd
+ * the container's fd
+ *
+ * @param iommu_group_num
+ * the iommu group number to bind to container
+ *
+ * @return
+ * group fd if successful
+ * <0 if failed
+ */
+int
+rte_vfio_container_group_bind(int container_fd, int iommu_group_num);
+
+/**
+ * Unbind a IOMMU group from a container.
+ *
+ * @param container_fd
+ * the container fd of container
+ *
+ * @param iommu_group_num
+ * the iommu group number to delete from container
+ *
+ * @return
+ * 0 if successful
+ * <0 if failed
+ */
+int
+rte_vfio_container_group_unbind(int container_fd, int iommu_group_num);
+
+/**
+ * Perform DMA mapping for devices in a container.
+ *
+ * @param container_fd
+ * the specified container fd
+ *
+ * @param vaddr
+ * Starting virtual address of memory to be mapped.
+ *
+ * @param iova
+ * Starting IOVA address of memory to be mapped.
+ *
+ * @param len
+ * Length of memory segment being mapped.
+ *
+ * @return
+ * 0 if successful
+ * <0 if failed
+ */
+int
+rte_vfio_container_dma_map(int container_fd, uint64_t vaddr,
+ uint64_t iova, uint64_t len);
+
+/**
+ * Perform DMA unmapping for devices in a container.
+ *
+ * @param container_fd
+ * the specified container fd
+ *
+ * @param vaddr
+ * Starting virtual address of memory to be unmapped.
+ *
+ * @param iova
+ * Starting IOVA address of memory to be unmapped.
+ *
+ * @param len
+ * Length of memory segment being unmapped.
+ *
+ * @return
+ * 0 if successful
+ * <0 if failed
+ */
+int
+rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr,
+ uint64_t iova, uint64_t len);
+
+#ifdef __cplusplus
+}
+#endif
#endif /* _RTE_VFIO_H_ */
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index 0cadc8af..e0a8ed15 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -1,10 +1,12 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2010-2014 Intel Corporation
*/
+#include <inttypes.h>
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
+#include <unistd.h>
#include <sys/queue.h>
#include <rte_memory.h>
@@ -16,21 +18,100 @@
#include <rte_common.h>
#include <rte_spinlock.h>
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
#include "malloc_elem.h"
#include "malloc_heap.h"
-#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE)
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
+{
+ void *cur_page, *contig_seg_start, *page_end, *cur_seg_end;
+ void *data_start, *data_end;
+ rte_iova_t expected_iova;
+ struct rte_memseg *ms;
+ size_t page_sz, cur, max;
+
+ page_sz = (size_t)elem->msl->page_sz;
+ data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
+ data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN);
+ /* segment must start after header and with specified alignment */
+ contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
+
+ /* if we're in IOVA as VA mode, or if we're in legacy mode with
+ * hugepages, all elements are IOVA-contiguous.
+ */
+ if (rte_eal_iova_mode() == RTE_IOVA_VA ||
+ (internal_config.legacy_mem && rte_eal_has_hugepages()))
+ return RTE_PTR_DIFF(data_end, contig_seg_start);
+
+ cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
+ ms = rte_mem_virt2memseg(cur_page, elem->msl);
+
+ /* do first iteration outside the loop */
+ page_end = RTE_PTR_ADD(cur_page, page_sz);
+ cur_seg_end = RTE_MIN(page_end, data_end);
+ cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) -
+ MALLOC_ELEM_TRAILER_LEN;
+ max = cur;
+ expected_iova = ms->iova + page_sz;
+ /* memsegs are contiguous in memory */
+ ms++;
+
+ cur_page = RTE_PTR_ADD(cur_page, page_sz);
+
+ while (cur_page < data_end) {
+ page_end = RTE_PTR_ADD(cur_page, page_sz);
+ cur_seg_end = RTE_MIN(page_end, data_end);
+
+ /* reset start of contiguous segment if unexpected iova */
+ if (ms->iova != expected_iova) {
+ /* next contiguous segment must start at specified
+ * alignment.
+ */
+ contig_seg_start = RTE_PTR_ALIGN(cur_page, align);
+ /* new segment start may be on a different page, so find
+ * the page and skip to next iteration to make sure
+ * we're not blowing past data end.
+ */
+ ms = rte_mem_virt2memseg(contig_seg_start, elem->msl);
+ cur_page = ms->addr;
+ /* don't trigger another recalculation */
+ expected_iova = ms->iova;
+ continue;
+ }
+ /* cur_seg_end ends on a page boundary or on data end. if we're
+ * looking at data end, then malloc trailer is already included
+ * in the calculations. if we're looking at page end, then we
+ * know there's more data past this page and thus there's space
+ * for malloc element trailer, so don't count it here.
+ */
+ cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start);
+ /* update max if cur value is bigger */
+ if (cur > max)
+ max = cur;
+
+ /* move to next page */
+ cur_page = page_end;
+ expected_iova = ms->iova + page_sz;
+ /* memsegs are contiguous in memory */
+ ms++;
+ }
+
+ return max;
+}
/*
* Initialize a general malloc_elem header structure
*/
void
-malloc_elem_init(struct malloc_elem *elem,
- struct malloc_heap *heap, const struct rte_memseg *ms, size_t size)
+malloc_elem_init(struct malloc_elem *elem, struct malloc_heap *heap,
+ struct rte_memseg_list *msl, size_t size)
{
elem->heap = heap;
- elem->ms = ms;
+ elem->msl = msl;
elem->prev = NULL;
+ elem->next = NULL;
memset(&elem->free_list, 0, sizeof(elem->free_list));
elem->state = ELEM_FREE;
elem->size = size;
@@ -39,15 +120,74 @@ malloc_elem_init(struct malloc_elem *elem,
set_trailer(elem);
}
+void
+malloc_elem_insert(struct malloc_elem *elem)
+{
+ struct malloc_elem *prev_elem, *next_elem;
+ struct malloc_heap *heap = elem->heap;
+
+ /* first and last elements must be both NULL or both non-NULL */
+ if ((heap->first == NULL) != (heap->last == NULL)) {
+ RTE_LOG(ERR, EAL, "Heap is probably corrupt\n");
+ return;
+ }
+
+ if (heap->first == NULL && heap->last == NULL) {
+ /* if empty heap */
+ heap->first = elem;
+ heap->last = elem;
+ prev_elem = NULL;
+ next_elem = NULL;
+ } else if (elem < heap->first) {
+ /* if lower than start */
+ prev_elem = NULL;
+ next_elem = heap->first;
+ heap->first = elem;
+ } else if (elem > heap->last) {
+ /* if higher than end */
+ prev_elem = heap->last;
+ next_elem = NULL;
+ heap->last = elem;
+ } else {
+ /* the new memory is somewhere inbetween start and end */
+ uint64_t dist_from_start, dist_from_end;
+
+ dist_from_end = RTE_PTR_DIFF(heap->last, elem);
+ dist_from_start = RTE_PTR_DIFF(elem, heap->first);
+
+ /* check which is closer, and find closest list entries */
+ if (dist_from_start < dist_from_end) {
+ prev_elem = heap->first;
+ while (prev_elem->next < elem)
+ prev_elem = prev_elem->next;
+ next_elem = prev_elem->next;
+ } else {
+ next_elem = heap->last;
+ while (next_elem->prev > elem)
+ next_elem = next_elem->prev;
+ prev_elem = next_elem->prev;
+ }
+ }
+
+ /* insert new element */
+ elem->prev = prev_elem;
+ elem->next = next_elem;
+ if (prev_elem)
+ prev_elem->next = elem;
+ if (next_elem)
+ next_elem->prev = elem;
+}
+
/*
- * Initialize a dummy malloc_elem header for the end-of-memseg marker
+ * Attempt to find enough physically contiguous memory in this block to store
+ * our data. Assume that element has at least enough space to fit in the data,
+ * so we just check the page addresses.
*/
-void
-malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev)
+static bool
+elem_check_phys_contig(const struct rte_memseg_list *msl,
+ void *start, size_t size)
{
- malloc_elem_init(elem, prev->heap, prev->ms, 0);
- elem->prev = prev;
- elem->state = ELEM_BUSY; /* mark busy so its never merged */
+ return eal_memalloc_is_contig(msl, start, size);
}
/*
@@ -57,27 +197,59 @@ malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev)
*/
static void *
elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
- size_t bound)
+ size_t bound, bool contig)
{
- const size_t bmask = ~(bound - 1);
- uintptr_t end_pt = (uintptr_t)elem +
- elem->size - MALLOC_ELEM_TRAILER_LEN;
- uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
- uintptr_t new_elem_start;
-
- /* check boundary */
- if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
- end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
- new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align);
- end_pt = new_data_start + size;
- if (((end_pt - 1) & bmask) != (new_data_start & bmask))
- return NULL;
- }
+ size_t elem_size = elem->size;
+
+ /*
+ * we're allocating from the end, so adjust the size of element by
+ * alignment size.
+ */
+ while (elem_size >= size) {
+ const size_t bmask = ~(bound - 1);
+ uintptr_t end_pt = (uintptr_t)elem +
+ elem_size - MALLOC_ELEM_TRAILER_LEN;
+ uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
+ align);
+ uintptr_t new_elem_start;
+
+ /* check boundary */
+ if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) {
+ end_pt = RTE_ALIGN_FLOOR(end_pt, bound);
+ new_data_start = RTE_ALIGN_FLOOR((end_pt - size),
+ align);
+ end_pt = new_data_start + size;
+
+ if (((end_pt - 1) & bmask) != (new_data_start & bmask))
+ return NULL;
+ }
+
+ new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
- new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN;
+ /* if the new start point is before the exist start,
+ * it won't fit
+ */
+ if (new_elem_start < (uintptr_t)elem)
+ return NULL;
- /* if the new start point is before the exist start, it won't fit */
- return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start;
+ if (contig) {
+ size_t new_data_size = end_pt - new_data_start;
+
+ /*
+ * if physical contiguousness was requested and we
+ * couldn't fit all data into one physically contiguous
+ * block, try again with lower addresses.
+ */
+ if (!elem_check_phys_contig(elem->msl,
+ (void *)new_data_start,
+ new_data_size)) {
+ elem_size -= align;
+ continue;
+ }
+ }
+ return (void *)new_elem_start;
+ }
+ return NULL;
}
/*
@@ -86,9 +258,9 @@ elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align,
*/
int
malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align,
- size_t bound)
+ size_t bound, bool contig)
{
- return elem_start_pt(elem, size, align, bound) != NULL;
+ return elem_start_pt(elem, size, align, bound, contig) != NULL;
}
/*
@@ -98,18 +270,58 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align,
static void
split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt)
{
- struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size);
+ struct malloc_elem *next_elem = elem->next;
const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem;
const size_t new_elem_size = elem->size - old_elem_size;
- malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size);
+ malloc_elem_init(split_pt, elem->heap, elem->msl, new_elem_size);
split_pt->prev = elem;
- next_elem->prev = split_pt;
+ split_pt->next = next_elem;
+ if (next_elem)
+ next_elem->prev = split_pt;
+ else
+ elem->heap->last = split_pt;
+ elem->next = split_pt;
elem->size = old_elem_size;
set_trailer(elem);
}
/*
+ * our malloc heap is a doubly linked list, so doubly remove our element.
+ */
+static void __rte_unused
+remove_elem(struct malloc_elem *elem)
+{
+ struct malloc_elem *next, *prev;
+ next = elem->next;
+ prev = elem->prev;
+
+ if (next)
+ next->prev = prev;
+ else
+ elem->heap->last = prev;
+ if (prev)
+ prev->next = next;
+ else
+ elem->heap->first = next;
+
+ elem->prev = NULL;
+ elem->next = NULL;
+}
+
+static int
+next_elem_is_adjacent(struct malloc_elem *elem)
+{
+ return elem->next == RTE_PTR_ADD(elem, elem->size);
+}
+
+static int
+prev_elem_is_adjacent(struct malloc_elem *elem)
+{
+ return elem == RTE_PTR_ADD(elem->prev, elem->prev->size);
+}
+
+/*
* Given an element size, compute its freelist index.
* We free an element into the freelist containing similarly-sized elements.
* We try to allocate elements starting with the freelist containing
@@ -162,8 +374,8 @@ malloc_elem_free_list_insert(struct malloc_elem *elem)
/*
* Remove the specified element from its heap's free list.
*/
-static void
-elem_free_list_remove(struct malloc_elem *elem)
+void
+malloc_elem_free_list_remove(struct malloc_elem *elem)
{
LIST_REMOVE(elem, free_list);
}
@@ -176,14 +388,15 @@ elem_free_list_remove(struct malloc_elem *elem)
*/
struct malloc_elem *
malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
- size_t bound)
+ size_t bound, bool contig)
{
- struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound);
+ struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound,
+ contig);
const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem;
const size_t trailer_size = elem->size - old_elem_size - size -
MALLOC_ELEM_OVERHEAD;
- elem_free_list_remove(elem);
+ malloc_elem_free_list_remove(elem);
if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
/* split it, too much free space after elem */
@@ -192,6 +405,9 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
split_elem(elem, new_free_elem);
malloc_elem_free_list_insert(new_free_elem);
+
+ if (elem == elem->heap->last)
+ elem->heap->last = new_free_elem;
}
if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
@@ -230,9 +446,66 @@ malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align,
static inline void
join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
{
- struct malloc_elem *next = RTE_PTR_ADD(elem2, elem2->size);
+ struct malloc_elem *next = elem2->next;
elem1->size += elem2->size;
- next->prev = elem1;
+ if (next)
+ next->prev = elem1;
+ else
+ elem1->heap->last = elem1;
+ elem1->next = next;
+}
+
+struct malloc_elem *
+malloc_elem_join_adjacent_free(struct malloc_elem *elem)
+{
+ /*
+ * check if next element exists, is adjacent and is free, if so join
+ * with it, need to remove from free list.
+ */
+ if (elem->next != NULL && elem->next->state == ELEM_FREE &&
+ next_elem_is_adjacent(elem)) {
+ void *erase;
+ size_t erase_len;
+
+ /* we will want to erase the trailer and header */
+ erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN);
+ erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad;
+
+ /* remove from free list, join to this one */
+ malloc_elem_free_list_remove(elem->next);
+ join_elem(elem, elem->next);
+
+ /* erase header, trailer and pad */
+ memset(erase, 0, erase_len);
+ }
+
+ /*
+ * check if prev element exists, is adjacent and is free, if so join
+ * with it, need to remove from free list.
+ */
+ if (elem->prev != NULL && elem->prev->state == ELEM_FREE &&
+ prev_elem_is_adjacent(elem)) {
+ struct malloc_elem *new_elem;
+ void *erase;
+ size_t erase_len;
+
+ /* we will want to erase trailer and header */
+ erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN);
+ erase_len = MALLOC_ELEM_OVERHEAD + elem->pad;
+
+ /* remove from free list, join to this one */
+ malloc_elem_free_list_remove(elem->prev);
+
+ new_elem = elem->prev;
+ join_elem(new_elem, elem);
+
+ /* erase header, trailer and pad */
+ memset(erase, 0, erase_len);
+
+ elem = new_elem;
+ }
+
+ return elem;
}
/*
@@ -240,43 +513,74 @@ join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2)
* blocks either immediately before or immediately after newly freed block
* are also free, the blocks are merged together.
*/
-int
+struct malloc_elem *
malloc_elem_free(struct malloc_elem *elem)
{
- if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
- return -1;
+ void *ptr;
+ size_t data_len;
- rte_spinlock_lock(&(elem->heap->lock));
- size_t sz = elem->size - sizeof(*elem) - MALLOC_ELEM_TRAILER_LEN;
- uint8_t *ptr = (uint8_t *)&elem[1];
- struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
- if (next->state == ELEM_FREE){
- /* remove from free list, join to this one */
- elem_free_list_remove(next);
- join_elem(elem, next);
- sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
- }
+ ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
+ data_len = elem->size - MALLOC_ELEM_OVERHEAD;
+
+ elem = malloc_elem_join_adjacent_free(elem);
- /* check if previous element is free, if so join with it and return,
- * need to re-insert in free list, as that element's size is changing
- */
- if (elem->prev != NULL && elem->prev->state == ELEM_FREE) {
- elem_free_list_remove(elem->prev);
- join_elem(elem->prev, elem);
- sz += (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
- ptr -= (sizeof(*elem) + MALLOC_ELEM_TRAILER_LEN);
- elem = elem->prev;
- }
malloc_elem_free_list_insert(elem);
+ elem->pad = 0;
+
/* decrease heap's count of allocated elements */
elem->heap->alloc_count--;
- memset(ptr, 0, sz);
+ memset(ptr, 0, data_len);
- rte_spinlock_unlock(&(elem->heap->lock));
+ return elem;
+}
- return 0;
+/* assume all checks were already done */
+void
+malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len)
+{
+ struct malloc_elem *hide_start, *hide_end, *prev, *next;
+ size_t len_before, len_after;
+
+ hide_start = start;
+ hide_end = RTE_PTR_ADD(start, len);
+
+ prev = elem->prev;
+ next = elem->next;
+
+ /* we cannot do anything with non-adjacent elements */
+ if (next && next_elem_is_adjacent(elem)) {
+ len_after = RTE_PTR_DIFF(next, hide_end);
+ if (len_after >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* split after */
+ split_elem(elem, hide_end);
+
+ malloc_elem_free_list_insert(hide_end);
+ } else if (len_after > 0) {
+ RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
+ return;
+ }
+ }
+
+ /* we cannot do anything with non-adjacent elements */
+ if (prev && prev_elem_is_adjacent(elem)) {
+ len_before = RTE_PTR_DIFF(hide_start, elem);
+ if (len_before >= MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* split before */
+ split_elem(elem, hide_start);
+
+ prev = elem;
+ elem = hide_start;
+
+ malloc_elem_free_list_insert(prev);
+ } else if (len_before > 0) {
+ RTE_LOG(ERR, EAL, "Unaligned element, heap is probably corrupt\n");
+ return;
+ }
+ }
+
+ remove_elem(elem);
}
/*
@@ -287,22 +591,23 @@ int
malloc_elem_resize(struct malloc_elem *elem, size_t size)
{
const size_t new_size = size + elem->pad + MALLOC_ELEM_OVERHEAD;
+
/* if we request a smaller size, then always return ok */
if (elem->size >= new_size)
return 0;
- struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size);
- rte_spinlock_lock(&elem->heap->lock);
- if (next ->state != ELEM_FREE)
- goto err_return;
- if (elem->size + next->size < new_size)
- goto err_return;
+ /* check if there is a next element, it's free and adjacent */
+ if (!elem->next || elem->next->state != ELEM_FREE ||
+ !next_elem_is_adjacent(elem))
+ return -1;
+ if (elem->size + elem->next->size < new_size)
+ return -1;
/* we now know the element fits, so remove from free list,
* join the two
*/
- elem_free_list_remove(next);
- join_elem(elem, next);
+ malloc_elem_free_list_remove(elem->next);
+ join_elem(elem, elem->next);
if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD) {
/* now we have a big block together. Lets cut it down a bit, by splitting */
@@ -311,10 +616,28 @@ malloc_elem_resize(struct malloc_elem *elem, size_t size)
split_elem(elem, split_pt);
malloc_elem_free_list_insert(split_pt);
}
- rte_spinlock_unlock(&elem->heap->lock);
return 0;
+}
-err_return:
- rte_spinlock_unlock(&elem->heap->lock);
- return -1;
+static inline const char *
+elem_state_to_str(enum elem_state state)
+{
+ switch (state) {
+ case ELEM_PAD:
+ return "PAD";
+ case ELEM_BUSY:
+ return "BUSY";
+ case ELEM_FREE:
+ return "FREE";
+ }
+ return "ERROR";
+}
+
+void
+malloc_elem_dump(const struct malloc_elem *elem, FILE *f)
+{
+ fprintf(f, "Malloc element at %p (%s)\n", elem,
+ elem_state_to_str(elem->state));
+ fprintf(f, " len: 0x%zx pad: 0x%" PRIx32 "\n", elem->size, elem->pad);
+ fprintf(f, " prev: %p next: %p\n", elem->prev, elem->next);
}
diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
index f4c1c7a9..e2bda4c0 100644
--- a/lib/librte_eal/common/malloc_elem.h
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -5,7 +5,11 @@
#ifndef MALLOC_ELEM_H_
#define MALLOC_ELEM_H_
-#include <rte_memory.h>
+#include <stdbool.h>
+
+#include <rte_eal_memconfig.h>
+
+#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE)
/* dummy definition of struct so we can use pointers to it in malloc_elem struct */
struct malloc_heap;
@@ -18,9 +22,13 @@ enum elem_state {
struct malloc_elem {
struct malloc_heap *heap;
- struct malloc_elem *volatile prev; /* points to prev elem in memseg */
- LIST_ENTRY(malloc_elem) free_list; /* list of free elements in heap */
- const struct rte_memseg *ms;
+ struct malloc_elem *volatile prev;
+ /**< points to prev elem in memseg */
+ struct malloc_elem *volatile next;
+ /**< points to next elem in memseg */
+ LIST_ENTRY(malloc_elem) free_list;
+ /**< list of free elements in heap */
+ struct rte_memseg_list *msl;
volatile enum elem_state state;
uint32_t pad;
size_t size;
@@ -107,15 +115,11 @@ malloc_elem_from_data(const void *data)
void
malloc_elem_init(struct malloc_elem *elem,
struct malloc_heap *heap,
- const struct rte_memseg *ms,
+ struct rte_memseg_list *msl,
size_t size);
-/*
- * initialise a dummy malloc_elem header for the end-of-memseg marker
- */
void
-malloc_elem_mkend(struct malloc_elem *elem,
- struct malloc_elem *prev_free);
+malloc_elem_insert(struct malloc_elem *elem);
/*
* return true if the current malloc_elem can hold a block of data
@@ -123,7 +127,7 @@ malloc_elem_mkend(struct malloc_elem *elem,
*/
int
malloc_elem_can_hold(struct malloc_elem *elem, size_t size,
- unsigned align, size_t bound);
+ unsigned int align, size_t bound, bool contig);
/*
* reserve a block of data in an existing malloc_elem. If the malloc_elem
@@ -131,16 +135,19 @@ malloc_elem_can_hold(struct malloc_elem *elem, size_t size,
*/
struct malloc_elem *
malloc_elem_alloc(struct malloc_elem *elem, size_t size,
- unsigned align, size_t bound);
+ unsigned int align, size_t bound, bool contig);
/*
* free a malloc_elem block by adding it to the free list. If the
* blocks either immediately before or immediately after newly freed block
* are also free, the blocks are merged together.
*/
-int
+struct malloc_elem *
malloc_elem_free(struct malloc_elem *elem);
+struct malloc_elem *
+malloc_elem_join_adjacent_free(struct malloc_elem *elem);
+
/*
* attempt to resize a malloc_elem by expanding into any free space
* immediately after it in memory.
@@ -148,6 +155,18 @@ malloc_elem_free(struct malloc_elem *elem);
int
malloc_elem_resize(struct malloc_elem *elem, size_t size);
+void
+malloc_elem_hide_region(struct malloc_elem *elem, void *start, size_t len);
+
+void
+malloc_elem_free_list_remove(struct malloc_elem *elem);
+
+/*
+ * dump contents of malloc elem to a file.
+ */
+void
+malloc_elem_dump(const struct malloc_elem *elem, FILE *f);
+
/*
* Given an element size, compute its freelist index.
*/
@@ -160,4 +179,10 @@ malloc_elem_free_list_index(size_t size);
void
malloc_elem_free_list_insert(struct malloc_elem *elem);
+/*
+ * Find biggest IOVA-contiguous zone within an element with specified alignment.
+ */
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align);
+
#endif /* MALLOC_ELEM_H_ */
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index 7aafc880..12aaf2d7 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -10,6 +10,7 @@
#include <sys/queue.h>
#include <rte_memory.h>
+#include <rte_errno.h>
#include <rte_eal.h>
#include <rte_eal_memconfig.h>
#include <rte_launch.h>
@@ -20,9 +21,13 @@
#include <rte_spinlock.h>
#include <rte_memcpy.h>
#include <rte_atomic.h>
+#include <rte_fbarray.h>
+#include "eal_internal_cfg.h"
+#include "eal_memalloc.h"
#include "malloc_elem.h"
#include "malloc_heap.h"
+#include "malloc_mp.h"
static unsigned
check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
@@ -62,26 +67,51 @@ check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
}
/*
- * Expand the heap with a memseg.
- * This reserves the zone and sets a dummy malloc_elem header at the end
- * to prevent overflow. The rest of the zone is added to free list as a single
- * large free block
+ * Expand the heap with a memory area.
*/
-static void
-malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms)
+static struct malloc_elem *
+malloc_heap_add_memory(struct malloc_heap *heap, struct rte_memseg_list *msl,
+ void *start, size_t len)
+{
+ struct malloc_elem *elem = start;
+
+ malloc_elem_init(elem, heap, msl, len);
+
+ malloc_elem_insert(elem);
+
+ elem = malloc_elem_join_adjacent_free(elem);
+
+ malloc_elem_free_list_insert(elem);
+
+ return elem;
+}
+
+static int
+malloc_add_seg(const struct rte_memseg_list *msl,
+ const struct rte_memseg *ms, size_t len, void *arg __rte_unused)
{
- /* allocate the memory block headers, one at end, one at start */
- struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr;
- struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr,
- ms->len - MALLOC_ELEM_OVERHEAD);
- end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE);
- const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem;
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *found_msl;
+ struct malloc_heap *heap;
+ int msl_idx;
+
+ heap = &mcfg->malloc_heaps[msl->socket_id];
+
+ /* msl is const, so find it */
+ msl_idx = msl - mcfg->memsegs;
+
+ if (msl_idx < 0 || msl_idx >= RTE_MAX_MEMSEG_LISTS)
+ return -1;
- malloc_elem_init(start_elem, heap, ms, elem_size);
- malloc_elem_mkend(end_elem, start_elem);
- malloc_elem_free_list_insert(start_elem);
+ found_msl = &mcfg->memsegs[msl_idx];
- heap->total_size += elem_size;
+ malloc_heap_add_memory(heap, found_msl, ms->addr, len);
+
+ heap->total_size += len;
+
+ RTE_LOG(DEBUG, EAL, "Added %zuM to heap on socket %i\n", len >> 20,
+ msl->socket_id);
+ return 0;
}
/*
@@ -92,7 +122,7 @@ malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms)
*/
static struct malloc_elem *
find_suitable_element(struct malloc_heap *heap, size_t size,
- unsigned flags, size_t align, size_t bound)
+ unsigned int flags, size_t align, size_t bound, bool contig)
{
size_t idx;
struct malloc_elem *elem, *alt_elem = NULL;
@@ -101,8 +131,10 @@ find_suitable_element(struct malloc_heap *heap, size_t size,
idx < RTE_HEAP_NUM_FREELISTS; idx++) {
for (elem = LIST_FIRST(&heap->free_head[idx]);
!!elem; elem = LIST_NEXT(elem, free_list)) {
- if (malloc_elem_can_hold(elem, size, align, bound)) {
- if (check_hugepage_sz(flags, elem->ms->hugepage_sz))
+ if (malloc_elem_can_hold(elem, size, align, bound,
+ contig)) {
+ if (check_hugepage_sz(flags,
+ elem->msl->page_sz))
return elem;
if (alt_elem == NULL)
alt_elem = elem;
@@ -117,34 +149,770 @@ find_suitable_element(struct malloc_heap *heap, size_t size,
}
/*
+ * Iterates through the freelist for a heap to find a free element with the
+ * biggest size and requested alignment. Will also set size to whatever element
+ * size that was found.
+ * Returns null on failure, or pointer to element on success.
+ */
+static struct malloc_elem *
+find_biggest_element(struct malloc_heap *heap, size_t *size,
+ unsigned int flags, size_t align, bool contig)
+{
+ struct malloc_elem *elem, *max_elem = NULL;
+ size_t idx, max_size = 0;
+
+ for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+ for (elem = LIST_FIRST(&heap->free_head[idx]);
+ !!elem; elem = LIST_NEXT(elem, free_list)) {
+ size_t cur_size;
+ if (!check_hugepage_sz(flags, elem->msl->page_sz))
+ continue;
+ if (contig) {
+ cur_size =
+ malloc_elem_find_max_iova_contig(elem,
+ align);
+ } else {
+ void *data_start = RTE_PTR_ADD(elem,
+ MALLOC_ELEM_HEADER_LEN);
+ void *data_end = RTE_PTR_ADD(elem, elem->size -
+ MALLOC_ELEM_TRAILER_LEN);
+ void *aligned = RTE_PTR_ALIGN_CEIL(data_start,
+ align);
+ /* check if aligned data start is beyond end */
+ if (aligned >= data_end)
+ continue;
+ cur_size = RTE_PTR_DIFF(data_end, aligned);
+ }
+ if (cur_size > max_size) {
+ max_size = cur_size;
+ max_elem = elem;
+ }
+ }
+ }
+
+ *size = max_size;
+ return max_elem;
+}
+
+/*
* Main function to allocate a block of memory from the heap.
* It locks the free list, scans it, and adds a new memseg if the
* scan fails. Once the new memseg is added, it re-scans and should return
* the new element after releasing the lock.
*/
-void *
-malloc_heap_alloc(struct malloc_heap *heap,
- const char *type __attribute__((unused)), size_t size, unsigned flags,
- size_t align, size_t bound)
+static void *
+heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size,
+ unsigned int flags, size_t align, size_t bound, bool contig)
{
struct malloc_elem *elem;
size = RTE_CACHE_LINE_ROUNDUP(size);
align = RTE_CACHE_LINE_ROUNDUP(align);
- rte_spinlock_lock(&heap->lock);
+ elem = find_suitable_element(heap, size, flags, align, bound, contig);
+ if (elem != NULL) {
+ elem = malloc_elem_alloc(elem, size, align, bound, contig);
+
+ /* increase heap's count of allocated elements */
+ heap->alloc_count++;
+ }
- elem = find_suitable_element(heap, size, flags, align, bound);
+ return elem == NULL ? NULL : (void *)(&elem[1]);
+}
+
+static void *
+heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused,
+ unsigned int flags, size_t align, bool contig)
+{
+ struct malloc_elem *elem;
+ size_t size;
+
+ align = RTE_CACHE_LINE_ROUNDUP(align);
+
+ elem = find_biggest_element(heap, &size, flags, align, contig);
if (elem != NULL) {
- elem = malloc_elem_alloc(elem, size, align, bound);
+ elem = malloc_elem_alloc(elem, size, align, 0, contig);
+
/* increase heap's count of allocated elements */
heap->alloc_count++;
}
- rte_spinlock_unlock(&heap->lock);
return elem == NULL ? NULL : (void *)(&elem[1]);
}
+/* this function is exposed in malloc_mp.h */
+void
+rollback_expand_heap(struct rte_memseg **ms, int n_segs,
+ struct malloc_elem *elem, void *map_addr, size_t map_len)
+{
+ if (elem != NULL) {
+ malloc_elem_free_list_remove(elem);
+ malloc_elem_hide_region(elem, map_addr, map_len);
+ }
+
+ eal_memalloc_free_seg_bulk(ms, n_segs);
+}
+
+/* this function is exposed in malloc_mp.h */
+struct malloc_elem *
+alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+ int socket, unsigned int flags, size_t align, size_t bound,
+ bool contig, struct rte_memseg **ms, int n_segs)
+{
+ struct rte_memseg_list *msl;
+ struct malloc_elem *elem = NULL;
+ size_t alloc_sz;
+ int allocd_pages;
+ void *ret, *map_addr;
+
+ alloc_sz = (size_t)pg_sz * n_segs;
+
+ /* first, check if we're allowed to allocate this memory */
+ if (eal_memalloc_mem_alloc_validate(socket,
+ heap->total_size + alloc_sz) < 0) {
+ RTE_LOG(DEBUG, EAL, "User has disallowed allocation\n");
+ return NULL;
+ }
+
+ allocd_pages = eal_memalloc_alloc_seg_bulk(ms, n_segs, pg_sz,
+ socket, true);
+
+ /* make sure we've allocated our pages... */
+ if (allocd_pages < 0)
+ return NULL;
+
+ map_addr = ms[0]->addr;
+ msl = rte_mem_virt2memseg_list(map_addr);
+
+ /* check if we wanted contiguous memory but didn't get it */
+ if (contig && !eal_memalloc_is_contig(msl, map_addr, alloc_sz)) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't allocate physically contiguous space\n",
+ __func__);
+ goto fail;
+ }
+
+ /* add newly minted memsegs to malloc heap */
+ elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz);
+
+ /* try once more, as now we have allocated new memory */
+ ret = find_suitable_element(heap, elt_size, flags, align, bound,
+ contig);
+
+ if (ret == NULL)
+ goto fail;
+
+ return elem;
+
+fail:
+ rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
+ return NULL;
+}
+
+static int
+try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
+ size_t elt_size, int socket, unsigned int flags, size_t align,
+ size_t bound, bool contig)
+{
+ struct malloc_elem *elem;
+ struct rte_memseg **ms;
+ void *map_addr;
+ size_t alloc_sz;
+ int n_segs;
+ bool callback_triggered = false;
+
+ alloc_sz = RTE_ALIGN_CEIL(align + elt_size +
+ MALLOC_ELEM_TRAILER_LEN, pg_sz);
+ n_segs = alloc_sz / pg_sz;
+
+ /* we can't know in advance how many pages we'll need, so we malloc */
+ ms = malloc(sizeof(*ms) * n_segs);
+
+ memset(ms, 0, sizeof(*ms) * n_segs);
+
+ if (ms == NULL)
+ return -1;
+
+ elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align,
+ bound, contig, ms, n_segs);
+
+ if (elem == NULL)
+ goto free_ms;
+
+ map_addr = ms[0]->addr;
+
+ /* notify user about changes in memory map */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC, map_addr, alloc_sz);
+
+ /* notify other processes that this has happened */
+ if (request_sync()) {
+ /* we couldn't ensure all processes have mapped memory,
+ * so free it back and notify everyone that it's been
+ * freed back.
+ *
+ * technically, we could've avoided adding memory addresses to
+ * the map, but that would've led to inconsistent behavior
+ * between primary and secondary processes, as those get
+ * callbacks during sync. therefore, force primary process to
+ * do alloc-and-rollback syncs as well.
+ */
+ callback_triggered = true;
+ goto free_elem;
+ }
+ heap->total_size += alloc_sz;
+
+ RTE_LOG(DEBUG, EAL, "Heap on socket %d was expanded by %zdMB\n",
+ socket, alloc_sz >> 20ULL);
+
+ free(ms);
+
+ return 0;
+
+free_elem:
+ if (callback_triggered)
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ map_addr, alloc_sz);
+
+ rollback_expand_heap(ms, n_segs, elem, map_addr, alloc_sz);
+
+ request_sync();
+free_ms:
+ free(ms);
+
+ return -1;
+}
+
+static int
+try_expand_heap_secondary(struct malloc_heap *heap, uint64_t pg_sz,
+ size_t elt_size, int socket, unsigned int flags, size_t align,
+ size_t bound, bool contig)
+{
+ struct malloc_mp_req req;
+ int req_result;
+
+ memset(&req, 0, sizeof(req));
+
+ req.t = REQ_TYPE_ALLOC;
+ req.alloc_req.align = align;
+ req.alloc_req.bound = bound;
+ req.alloc_req.contig = contig;
+ req.alloc_req.flags = flags;
+ req.alloc_req.elt_size = elt_size;
+ req.alloc_req.page_sz = pg_sz;
+ req.alloc_req.socket = socket;
+ req.alloc_req.heap = heap; /* it's in shared memory */
+
+ req_result = request_to_primary(&req);
+
+ if (req_result != 0)
+ return -1;
+
+ if (req.result != REQ_RESULT_SUCCESS)
+ return -1;
+
+ return 0;
+}
+
+static int
+try_expand_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+ int socket, unsigned int flags, size_t align, size_t bound,
+ bool contig)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int ret;
+
+ rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ ret = try_expand_heap_primary(heap, pg_sz, elt_size, socket,
+ flags, align, bound, contig);
+ } else {
+ ret = try_expand_heap_secondary(heap, pg_sz, elt_size, socket,
+ flags, align, bound, contig);
+ }
+
+ rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+ return ret;
+}
+
+static int
+compare_pagesz(const void *a, const void *b)
+{
+ const struct rte_memseg_list * const*mpa = a;
+ const struct rte_memseg_list * const*mpb = b;
+ const struct rte_memseg_list *msla = *mpa;
+ const struct rte_memseg_list *mslb = *mpb;
+ uint64_t pg_sz_a = msla->page_sz;
+ uint64_t pg_sz_b = mslb->page_sz;
+
+ if (pg_sz_a < pg_sz_b)
+ return -1;
+ if (pg_sz_a > pg_sz_b)
+ return 1;
+ return 0;
+}
+
+static int
+alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
+ unsigned int flags, size_t align, size_t bound, bool contig)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS];
+ struct rte_memseg_list *other_msls[RTE_MAX_MEMSEG_LISTS];
+ uint64_t requested_pg_sz[RTE_MAX_MEMSEG_LISTS];
+ uint64_t other_pg_sz[RTE_MAX_MEMSEG_LISTS];
+ uint64_t prev_pg_sz;
+ int i, n_other_msls, n_other_pg_sz, n_requested_msls, n_requested_pg_sz;
+ bool size_hint = (flags & RTE_MEMZONE_SIZE_HINT_ONLY) > 0;
+ unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+ void *ret;
+
+ memset(requested_msls, 0, sizeof(requested_msls));
+ memset(other_msls, 0, sizeof(other_msls));
+ memset(requested_pg_sz, 0, sizeof(requested_pg_sz));
+ memset(other_pg_sz, 0, sizeof(other_pg_sz));
+
+ /*
+ * go through memseg list and take note of all the page sizes available,
+ * and if any of them were specifically requested by the user.
+ */
+ n_requested_msls = 0;
+ n_other_msls = 0;
+ for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+ struct rte_memseg_list *msl = &mcfg->memsegs[i];
+
+ if (msl->socket_id != socket)
+ continue;
+
+ if (msl->base_va == NULL)
+ continue;
+
+ /* if pages of specific size were requested */
+ if (size_flags != 0 && check_hugepage_sz(size_flags,
+ msl->page_sz))
+ requested_msls[n_requested_msls++] = msl;
+ else if (size_flags == 0 || size_hint)
+ other_msls[n_other_msls++] = msl;
+ }
+
+ /* sort the lists, smallest first */
+ qsort(requested_msls, n_requested_msls, sizeof(requested_msls[0]),
+ compare_pagesz);
+ qsort(other_msls, n_other_msls, sizeof(other_msls[0]),
+ compare_pagesz);
+
+ /* now, extract page sizes we are supposed to try */
+ prev_pg_sz = 0;
+ n_requested_pg_sz = 0;
+ for (i = 0; i < n_requested_msls; i++) {
+ uint64_t pg_sz = requested_msls[i]->page_sz;
+
+ if (prev_pg_sz != pg_sz) {
+ requested_pg_sz[n_requested_pg_sz++] = pg_sz;
+ prev_pg_sz = pg_sz;
+ }
+ }
+ prev_pg_sz = 0;
+ n_other_pg_sz = 0;
+ for (i = 0; i < n_other_msls; i++) {
+ uint64_t pg_sz = other_msls[i]->page_sz;
+
+ if (prev_pg_sz != pg_sz) {
+ other_pg_sz[n_other_pg_sz++] = pg_sz;
+ prev_pg_sz = pg_sz;
+ }
+ }
+
+ /* finally, try allocating memory of specified page sizes, starting from
+ * the smallest sizes
+ */
+ for (i = 0; i < n_requested_pg_sz; i++) {
+ uint64_t pg_sz = requested_pg_sz[i];
+
+ /*
+ * do not pass the size hint here, as user expects other page
+ * sizes first, before resorting to best effort allocation.
+ */
+ if (!try_expand_heap(heap, pg_sz, size, socket, size_flags,
+ align, bound, contig))
+ return 0;
+ }
+ if (n_other_pg_sz == 0)
+ return -1;
+
+ /* now, check if we can reserve anything with size hint */
+ ret = find_suitable_element(heap, size, flags, align, bound, contig);
+ if (ret != NULL)
+ return 0;
+
+ /*
+ * we still couldn't reserve memory, so try expanding heap with other
+ * page sizes, if there are any
+ */
+ for (i = 0; i < n_other_pg_sz; i++) {
+ uint64_t pg_sz = other_pg_sz[i];
+
+ if (!try_expand_heap(heap, pg_sz, size, socket, flags,
+ align, bound, contig))
+ return 0;
+ }
+ return -1;
+}
+
+/* this will try lower page sizes first */
+static void *
+heap_alloc_on_socket(const char *type, size_t size, int socket,
+ unsigned int flags, size_t align, size_t bound, bool contig)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+ unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+ void *ret;
+
+ rte_spinlock_lock(&(heap->lock));
+
+ align = align == 0 ? 1 : align;
+
+ /* for legacy mode, try once and with all flags */
+ if (internal_config.legacy_mem) {
+ ret = heap_alloc(heap, type, size, flags, align, bound, contig);
+ goto alloc_unlock;
+ }
+
+ /*
+ * we do not pass the size hint here, because even if allocation fails,
+ * we may still be able to allocate memory from appropriate page sizes,
+ * we just need to request more memory first.
+ */
+ ret = heap_alloc(heap, type, size, size_flags, align, bound, contig);
+ if (ret != NULL)
+ goto alloc_unlock;
+
+ if (!alloc_more_mem_on_socket(heap, size, socket, flags, align, bound,
+ contig)) {
+ ret = heap_alloc(heap, type, size, flags, align, bound, contig);
+
+ /* this should have succeeded */
+ if (ret == NULL)
+ RTE_LOG(ERR, EAL, "Error allocating from heap\n");
+ }
+alloc_unlock:
+ rte_spinlock_unlock(&(heap->lock));
+ return ret;
+}
+
+void *
+malloc_heap_alloc(const char *type, size_t size, int socket_arg,
+ unsigned int flags, size_t align, size_t bound, bool contig)
+{
+ int socket, i, cur_socket;
+ void *ret;
+
+ /* return NULL if size is 0 or alignment is not power-of-2 */
+ if (size == 0 || (align && !rte_is_power_of_2(align)))
+ return NULL;
+
+ if (!rte_eal_has_hugepages())
+ socket_arg = SOCKET_ID_ANY;
+
+ if (socket_arg == SOCKET_ID_ANY)
+ socket = malloc_get_numa_socket();
+ else
+ socket = socket_arg;
+
+ /* Check socket parameter */
+ if (socket >= RTE_MAX_NUMA_NODES)
+ return NULL;
+
+ ret = heap_alloc_on_socket(type, size, socket, flags, align, bound,
+ contig);
+ if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+ return ret;
+
+ /* try other heaps */
+ for (i = 0; i < (int) rte_socket_count(); i++) {
+ cur_socket = rte_socket_id_by_idx(i);
+ if (cur_socket == socket)
+ continue;
+ ret = heap_alloc_on_socket(type, size, cur_socket, flags,
+ align, bound, contig);
+ if (ret != NULL)
+ return ret;
+ }
+ return NULL;
+}
+
+static void *
+heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags,
+ size_t align, bool contig)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+ void *ret;
+
+ rte_spinlock_lock(&(heap->lock));
+
+ align = align == 0 ? 1 : align;
+
+ ret = heap_alloc_biggest(heap, type, flags, align, contig);
+
+ rte_spinlock_unlock(&(heap->lock));
+
+ return ret;
+}
+
+void *
+malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
+ size_t align, bool contig)
+{
+ int socket, i, cur_socket;
+ void *ret;
+
+ /* return NULL if align is not power-of-2 */
+ if ((align && !rte_is_power_of_2(align)))
+ return NULL;
+
+ if (!rte_eal_has_hugepages())
+ socket_arg = SOCKET_ID_ANY;
+
+ if (socket_arg == SOCKET_ID_ANY)
+ socket = malloc_get_numa_socket();
+ else
+ socket = socket_arg;
+
+ /* Check socket parameter */
+ if (socket >= RTE_MAX_NUMA_NODES)
+ return NULL;
+
+ ret = heap_alloc_biggest_on_socket(type, socket, flags, align,
+ contig);
+ if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+ return ret;
+
+ /* try other heaps */
+ for (i = 0; i < (int) rte_socket_count(); i++) {
+ cur_socket = rte_socket_id_by_idx(i);
+ if (cur_socket == socket)
+ continue;
+ ret = heap_alloc_biggest_on_socket(type, cur_socket, flags,
+ align, contig);
+ if (ret != NULL)
+ return ret;
+ }
+ return NULL;
+}
+
+/* this function is exposed in malloc_mp.h */
+int
+malloc_heap_free_pages(void *aligned_start, size_t aligned_len)
+{
+ int n_segs, seg_idx, max_seg_idx;
+ struct rte_memseg_list *msl;
+ size_t page_sz;
+
+ msl = rte_mem_virt2memseg_list(aligned_start);
+ if (msl == NULL)
+ return -1;
+
+ page_sz = (size_t)msl->page_sz;
+ n_segs = aligned_len / page_sz;
+ seg_idx = RTE_PTR_DIFF(aligned_start, msl->base_va) / page_sz;
+ max_seg_idx = seg_idx + n_segs;
+
+ for (; seg_idx < max_seg_idx; seg_idx++) {
+ struct rte_memseg *ms;
+
+ ms = rte_fbarray_get(&msl->memseg_arr, seg_idx);
+ eal_memalloc_free_seg(ms);
+ }
+ return 0;
+}
+
+int
+malloc_heap_free(struct malloc_elem *elem)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap;
+ void *start, *aligned_start, *end, *aligned_end;
+ size_t len, aligned_len, page_sz;
+ struct rte_memseg_list *msl;
+ unsigned int i, n_segs, before_space, after_space;
+ int ret;
+
+ if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
+ return -1;
+
+ /* elem may be merged with previous element, so keep heap address */
+ heap = elem->heap;
+ msl = elem->msl;
+ page_sz = (size_t)msl->page_sz;
+
+ rte_spinlock_lock(&(heap->lock));
+
+ /* mark element as free */
+ elem->state = ELEM_FREE;
+
+ elem = malloc_elem_free(elem);
+
+ /* anything after this is a bonus */
+ ret = 0;
+
+ /* ...of which we can't avail if we are in legacy mode */
+ if (internal_config.legacy_mem)
+ goto free_unlock;
+
+ /* check if we can free any memory back to the system */
+ if (elem->size < page_sz)
+ goto free_unlock;
+
+ /* probably, but let's make sure, as we may not be using up full page */
+ start = elem;
+ len = elem->size;
+ aligned_start = RTE_PTR_ALIGN_CEIL(start, page_sz);
+ end = RTE_PTR_ADD(elem, len);
+ aligned_end = RTE_PTR_ALIGN_FLOOR(end, page_sz);
+
+ aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
+
+ /* can't free anything */
+ if (aligned_len < page_sz)
+ goto free_unlock;
+
+ /* we can free something. however, some of these pages may be marked as
+ * unfreeable, so also check that as well
+ */
+ n_segs = aligned_len / page_sz;
+ for (i = 0; i < n_segs; i++) {
+ const struct rte_memseg *tmp =
+ rte_mem_virt2memseg(aligned_start, msl);
+
+ if (tmp->flags & RTE_MEMSEG_FLAG_DO_NOT_FREE) {
+ /* this is an unfreeable segment, so move start */
+ aligned_start = RTE_PTR_ADD(tmp->addr, tmp->len);
+ }
+ }
+
+ /* recalculate length and number of segments */
+ aligned_len = RTE_PTR_DIFF(aligned_end, aligned_start);
+ n_segs = aligned_len / page_sz;
+
+ /* check if we can still free some pages */
+ if (n_segs == 0)
+ goto free_unlock;
+
+ /* We're not done yet. We also have to check if by freeing space we will
+ * be leaving free elements that are too small to store new elements.
+ * Check if we have enough space in the beginning and at the end, or if
+ * start/end are exactly page aligned.
+ */
+ before_space = RTE_PTR_DIFF(aligned_start, elem);
+ after_space = RTE_PTR_DIFF(end, aligned_end);
+ if (before_space != 0 &&
+ before_space < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* There is not enough space before start, but we may be able to
+ * move the start forward by one page.
+ */
+ if (n_segs == 1)
+ goto free_unlock;
+
+ /* move start */
+ aligned_start = RTE_PTR_ADD(aligned_start, page_sz);
+ aligned_len -= page_sz;
+ n_segs--;
+ }
+ if (after_space != 0 && after_space <
+ MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) {
+ /* There is not enough space after end, but we may be able to
+ * move the end backwards by one page.
+ */
+ if (n_segs == 1)
+ goto free_unlock;
+
+ /* move end */
+ aligned_end = RTE_PTR_SUB(aligned_end, page_sz);
+ aligned_len -= page_sz;
+ n_segs--;
+ }
+
+ /* now we can finally free us some pages */
+
+ rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
+ /*
+ * we allow secondary processes to clear the heap of this allocated
+ * memory because it is safe to do so, as even if notifications about
+ * unmapped pages don't make it to other processes, heap is shared
+ * across all processes, and will become empty of this memory anyway,
+ * and nothing can allocate it back unless primary process will be able
+ * to deliver allocation message to every single running process.
+ */
+
+ malloc_elem_free_list_remove(elem);
+
+ malloc_elem_hide_region(elem, (void *) aligned_start, aligned_len);
+
+ heap->total_size -= aligned_len;
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ /* notify user about changes in memory map */
+ eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+ aligned_start, aligned_len);
+
+ /* don't care if any of this fails */
+ malloc_heap_free_pages(aligned_start, aligned_len);
+
+ request_sync();
+ } else {
+ struct malloc_mp_req req;
+
+ memset(&req, 0, sizeof(req));
+
+ req.t = REQ_TYPE_FREE;
+ req.free_req.addr = aligned_start;
+ req.free_req.len = aligned_len;
+
+ /*
+ * we request primary to deallocate pages, but we don't do it
+ * in this thread. instead, we notify primary that we would like
+ * to deallocate pages, and this process will receive another
+ * request (in parallel) that will do it for us on another
+ * thread.
+ *
+ * we also don't really care if this succeeds - the data is
+ * already removed from the heap, so it is, for all intents and
+ * purposes, hidden from the rest of DPDK even if some other
+ * process (including this one) may have these pages mapped.
+ *
+ * notifications about deallocated memory happen during sync.
+ */
+ request_to_primary(&req);
+ }
+
+ RTE_LOG(DEBUG, EAL, "Heap on socket %d was shrunk by %zdMB\n",
+ msl->socket_id, aligned_len >> 20ULL);
+
+ rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+free_unlock:
+ rte_spinlock_unlock(&(heap->lock));
+ return ret;
+}
+
+int
+malloc_heap_resize(struct malloc_elem *elem, size_t size)
+{
+ int ret;
+
+ if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY)
+ return -1;
+
+ rte_spinlock_lock(&(elem->heap->lock));
+
+ ret = malloc_elem_resize(elem, size);
+
+ rte_spinlock_unlock(&(elem->heap->lock));
+
+ return ret;
+}
+
/*
* Function to retrieve data for heap on given socket
*/
@@ -183,21 +951,49 @@ malloc_heap_get_stats(struct malloc_heap *heap,
return 0;
}
+/*
+ * Function to retrieve data for heap on given socket
+ */
+void
+malloc_heap_dump(struct malloc_heap *heap, FILE *f)
+{
+ struct malloc_elem *elem;
+
+ rte_spinlock_lock(&heap->lock);
+
+ fprintf(f, "Heap size: 0x%zx\n", heap->total_size);
+ fprintf(f, "Heap alloc count: %u\n", heap->alloc_count);
+
+ elem = heap->first;
+ while (elem) {
+ malloc_elem_dump(elem, f);
+ elem = elem->next;
+ }
+
+ rte_spinlock_unlock(&heap->lock);
+}
+
int
rte_eal_malloc_heap_init(void)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- unsigned ms_cnt;
- struct rte_memseg *ms;
- if (mcfg == NULL)
+ if (register_mp_requests()) {
+ RTE_LOG(ERR, EAL, "Couldn't register malloc multiprocess actions\n");
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
return -1;
-
- for (ms = &mcfg->memseg[0], ms_cnt = 0;
- (ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0);
- ms_cnt++, ms++) {
- malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms);
}
- return 0;
+ /* unlock mem hotplug here. it's safe for primary as no requests can
+ * even come before primary itself is fully initialized, and secondaries
+ * do not need to initialize the heap.
+ */
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+ /* secondary process does not need to initialize anything */
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return 0;
+
+ /* add all IOVA-contiguous areas to the heap */
+ return rte_memseg_contig_walk(malloc_add_seg, NULL);
}
diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h
index e0defa70..f52cb555 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -5,6 +5,8 @@
#ifndef MALLOC_HEAP_H_
#define MALLOC_HEAP_H_
+#include <stdbool.h>
+
#include <rte_malloc.h>
#include <rte_malloc_heap.h>
@@ -24,13 +26,26 @@ malloc_get_numa_socket(void)
}
void *
-malloc_heap_alloc(struct malloc_heap *heap, const char *type, size_t size,
- unsigned flags, size_t align, size_t bound);
+malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
+ size_t align, size_t bound, bool contig);
+
+void *
+malloc_heap_alloc_biggest(const char *type, int socket, unsigned int flags,
+ size_t align, bool contig);
+
+int
+malloc_heap_free(struct malloc_elem *elem);
+
+int
+malloc_heap_resize(struct malloc_elem *elem, size_t size);
int
malloc_heap_get_stats(struct malloc_heap *heap,
struct rte_malloc_socket_stats *socket_stats);
+void
+malloc_heap_dump(struct malloc_heap *heap, FILE *f);
+
int
rte_eal_malloc_heap_init(void);
diff --git a/lib/librte_eal/common/malloc_mp.c b/lib/librte_eal/common/malloc_mp.c
new file mode 100644
index 00000000..931c14bc
--- /dev/null
+++ b/lib/librte_eal/common/malloc_mp.c
@@ -0,0 +1,743 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <string.h>
+#include <sys/time.h>
+
+#include <rte_alarm.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+
+#include "eal_memalloc.h"
+
+#include "malloc_elem.h"
+#include "malloc_mp.h"
+
+#define MP_ACTION_SYNC "mp_malloc_sync"
+/**< request sent by primary process to notify of changes in memory map */
+#define MP_ACTION_ROLLBACK "mp_malloc_rollback"
+/**< request sent by primary process to notify of changes in memory map. this is
+ * essentially a regular sync request, but we cannot send sync requests while
+ * another one is in progress, and we might have to - therefore, we do this as
+ * a separate callback.
+ */
+#define MP_ACTION_REQUEST "mp_malloc_request"
+/**< request sent by secondary process to ask for allocation/deallocation */
+#define MP_ACTION_RESPONSE "mp_malloc_response"
+/**< response sent to secondary process to indicate result of request */
+
+/* forward declarations */
+static int
+handle_sync_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply);
+static int
+handle_rollback_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply);
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+/* when we're allocating, we need to store some state to ensure that we can
+ * roll back later
+ */
+struct primary_alloc_req_state {
+ struct malloc_heap *heap;
+ struct rte_memseg **ms;
+ int ms_len;
+ struct malloc_elem *elem;
+ void *map_addr;
+ size_t map_len;
+};
+
+enum req_state {
+ REQ_STATE_INACTIVE = 0,
+ REQ_STATE_ACTIVE,
+ REQ_STATE_COMPLETE
+};
+
+struct mp_request {
+ TAILQ_ENTRY(mp_request) next;
+ struct malloc_mp_req user_req; /**< contents of request */
+ pthread_cond_t cond; /**< variable we use to time out on this request */
+ enum req_state state; /**< indicate status of this request */
+ struct primary_alloc_req_state alloc_state;
+};
+
+/*
+ * We could've used just a single request, but it may be possible for
+ * secondaries to timeout earlier than the primary, and send a new request while
+ * primary is still expecting replies to the old one. Therefore, each new
+ * request will get assigned a new ID, which is how we will distinguish between
+ * expected and unexpected messages.
+ */
+TAILQ_HEAD(mp_request_list, mp_request);
+static struct {
+ struct mp_request_list list;
+ pthread_mutex_t lock;
+} mp_request_list = {
+ .list = TAILQ_HEAD_INITIALIZER(mp_request_list.list),
+ .lock = PTHREAD_MUTEX_INITIALIZER
+};
+
+/**
+ * General workflow is the following:
+ *
+ * Allocation:
+ * S: send request to primary
+ * P: attempt to allocate memory
+ * if failed, sendmsg failure
+ * if success, send sync request
+ * S: if received msg of failure, quit
+ * if received sync request, synchronize memory map and reply with result
+ * P: if received sync request result
+ * if success, sendmsg success
+ * if failure, roll back allocation and send a rollback request
+ * S: if received msg of success, quit
+ * if received rollback request, synchronize memory map and reply with result
+ * P: if received sync request result
+ * sendmsg sync request result
+ * S: if received msg, quit
+ *
+ * Aside from timeouts, there are three points where we can quit:
+ * - if allocation failed straight away
+ * - if allocation and sync request succeeded
+ * - if allocation succeeded, sync request failed, allocation rolled back and
+ * rollback request received (irrespective of whether it succeeded or failed)
+ *
+ * Deallocation:
+ * S: send request to primary
+ * P: attempt to deallocate memory
+ * if failed, sendmsg failure
+ * if success, send sync request
+ * S: if received msg of failure, quit
+ * if received sync request, synchronize memory map and reply with result
+ * P: if received sync request result
+ * sendmsg sync request result
+ * S: if received msg, quit
+ *
+ * There is no "rollback" from deallocation, as it's safe to have some memory
+ * mapped in some processes - it's absent from the heap, so it won't get used.
+ */
+
+static struct mp_request *
+find_request_by_id(uint64_t id)
+{
+ struct mp_request *req;
+ TAILQ_FOREACH(req, &mp_request_list.list, next) {
+ if (req->user_req.id == id)
+ break;
+ }
+ return req;
+}
+
+/* this ID is, like, totally guaranteed to be absolutely unique. pinky swear. */
+static uint64_t
+get_unique_id(void)
+{
+ uint64_t id;
+ do {
+ id = rte_rand();
+ } while (find_request_by_id(id) != NULL);
+ return id;
+}
+
+/* secondary will respond to sync requests thusly */
+static int
+handle_sync(const struct rte_mp_msg *msg, const void *peer)
+{
+ struct rte_mp_msg reply;
+ const struct malloc_mp_req *req =
+ (const struct malloc_mp_req *)msg->param;
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)reply.param;
+ int ret;
+
+ if (req->t != REQ_TYPE_SYNC) {
+ RTE_LOG(ERR, EAL, "Unexpected request from primary\n");
+ return -1;
+ }
+
+ memset(&reply, 0, sizeof(reply));
+
+ reply.num_fds = 0;
+ strlcpy(reply.name, msg->name, sizeof(reply.name));
+ reply.len_param = sizeof(*resp);
+
+ ret = eal_memalloc_sync_with_primary();
+
+ resp->t = REQ_TYPE_SYNC;
+ resp->id = req->id;
+ resp->result = ret == 0 ? REQ_RESULT_SUCCESS : REQ_RESULT_FAIL;
+
+ rte_mp_reply(&reply, peer);
+
+ return 0;
+}
+
+static int
+handle_alloc_request(const struct malloc_mp_req *m,
+ struct mp_request *req)
+{
+ const struct malloc_req_alloc *ar = &m->alloc_req;
+ struct malloc_heap *heap;
+ struct malloc_elem *elem;
+ struct rte_memseg **ms;
+ size_t alloc_sz;
+ int n_segs;
+ void *map_addr;
+
+ alloc_sz = RTE_ALIGN_CEIL(ar->align + ar->elt_size +
+ MALLOC_ELEM_TRAILER_LEN, ar->page_sz);
+ n_segs = alloc_sz / ar->page_sz;
+
+ heap = ar->heap;
+
+ /* we can't know in advance how many pages we'll need, so we malloc */
+ ms = malloc(sizeof(*ms) * n_segs);
+
+ memset(ms, 0, sizeof(*ms) * n_segs);
+
+ if (ms == NULL) {
+ RTE_LOG(ERR, EAL, "Couldn't allocate memory for request state\n");
+ goto fail;
+ }
+
+ elem = alloc_pages_on_heap(heap, ar->page_sz, ar->elt_size, ar->socket,
+ ar->flags, ar->align, ar->bound, ar->contig, ms,
+ n_segs);
+
+ if (elem == NULL)
+ goto fail;
+
+ map_addr = ms[0]->addr;
+
+ /* we have succeeded in allocating memory, but we still need to sync
+ * with other processes. however, since DPDK IPC is single-threaded, we
+ * send an asynchronous request and exit this callback.
+ */
+
+ req->alloc_state.ms = ms;
+ req->alloc_state.ms_len = n_segs;
+ req->alloc_state.map_addr = map_addr;
+ req->alloc_state.map_len = alloc_sz;
+ req->alloc_state.elem = elem;
+ req->alloc_state.heap = heap;
+
+ return 0;
+fail:
+ free(ms);
+ return -1;
+}
+
+/* first stage of primary handling requests from secondary */
+static int
+handle_request(const struct rte_mp_msg *msg, const void *peer __rte_unused)
+{
+ const struct malloc_mp_req *m =
+ (const struct malloc_mp_req *)msg->param;
+ struct mp_request *entry;
+ int ret;
+
+ /* lock access to request */
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ /* make sure it's not a dupe */
+ entry = find_request_by_id(m->id);
+ if (entry != NULL) {
+ RTE_LOG(ERR, EAL, "Duplicate request id\n");
+ goto fail;
+ }
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ RTE_LOG(ERR, EAL, "Unable to allocate memory for request\n");
+ goto fail;
+ }
+
+ /* erase all data */
+ memset(entry, 0, sizeof(*entry));
+
+ if (m->t == REQ_TYPE_ALLOC) {
+ ret = handle_alloc_request(m, entry);
+ } else if (m->t == REQ_TYPE_FREE) {
+ ret = malloc_heap_free_pages(m->free_req.addr,
+ m->free_req.len);
+ } else {
+ RTE_LOG(ERR, EAL, "Unexpected request from secondary\n");
+ goto fail;
+ }
+
+ if (ret != 0) {
+ struct rte_mp_msg resp_msg;
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)resp_msg.param;
+
+ /* send failure message straight away */
+ resp_msg.num_fds = 0;
+ resp_msg.len_param = sizeof(*resp);
+ strlcpy(resp_msg.name, MP_ACTION_RESPONSE,
+ sizeof(resp_msg.name));
+
+ resp->t = m->t;
+ resp->result = REQ_RESULT_FAIL;
+ resp->id = m->id;
+
+ if (rte_mp_sendmsg(&resp_msg)) {
+ RTE_LOG(ERR, EAL, "Couldn't send response\n");
+ goto fail;
+ }
+ /* we did not modify the request */
+ free(entry);
+ } else {
+ struct rte_mp_msg sr_msg;
+ struct malloc_mp_req *sr =
+ (struct malloc_mp_req *)sr_msg.param;
+ struct timespec ts;
+
+ memset(&sr_msg, 0, sizeof(sr_msg));
+
+ /* we can do something, so send sync request asynchronously */
+ sr_msg.num_fds = 0;
+ sr_msg.len_param = sizeof(*sr);
+ strlcpy(sr_msg.name, MP_ACTION_SYNC, sizeof(sr_msg.name));
+
+ ts.tv_nsec = 0;
+ ts.tv_sec = MP_TIMEOUT_S;
+
+ /* sync requests carry no data */
+ sr->t = REQ_TYPE_SYNC;
+ sr->id = m->id;
+
+ /* there may be stray timeout still waiting */
+ do {
+ ret = rte_mp_request_async(&sr_msg, &ts,
+ handle_sync_response);
+ } while (ret != 0 && rte_errno == EEXIST);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Couldn't send sync request\n");
+ if (m->t == REQ_TYPE_ALLOC)
+ free(entry->alloc_state.ms);
+ goto fail;
+ }
+
+ /* mark request as in progress */
+ memcpy(&entry->user_req, m, sizeof(*m));
+ entry->state = REQ_STATE_ACTIVE;
+
+ TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+ }
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return 0;
+fail:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ free(entry);
+ return -1;
+}
+
+/* callback for asynchronous sync requests for primary. this will either do a
+ * sendmsg with results, or trigger rollback request.
+ */
+static int
+handle_sync_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply)
+{
+ enum malloc_req_result result;
+ struct mp_request *entry;
+ const struct malloc_mp_req *mpreq =
+ (const struct malloc_mp_req *)request->param;
+ int i;
+
+ /* lock the request */
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(mpreq->id);
+ if (entry == NULL) {
+ RTE_LOG(ERR, EAL, "Wrong request ID\n");
+ goto fail;
+ }
+
+ result = REQ_RESULT_SUCCESS;
+
+ if (reply->nb_received != reply->nb_sent)
+ result = REQ_RESULT_FAIL;
+
+ for (i = 0; i < reply->nb_received; i++) {
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)reply->msgs[i].param;
+
+ if (resp->t != REQ_TYPE_SYNC) {
+ RTE_LOG(ERR, EAL, "Unexpected response to sync request\n");
+ result = REQ_RESULT_FAIL;
+ break;
+ }
+ if (resp->id != entry->user_req.id) {
+ RTE_LOG(ERR, EAL, "Response to wrong sync request\n");
+ result = REQ_RESULT_FAIL;
+ break;
+ }
+ if (resp->result == REQ_RESULT_FAIL) {
+ result = REQ_RESULT_FAIL;
+ break;
+ }
+ }
+
+ if (entry->user_req.t == REQ_TYPE_FREE) {
+ struct rte_mp_msg msg;
+ struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param;
+
+ memset(&msg, 0, sizeof(msg));
+
+ /* this is a free request, just sendmsg result */
+ resp->t = REQ_TYPE_FREE;
+ resp->result = result;
+ resp->id = entry->user_req.id;
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*resp);
+ strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+ if (rte_mp_sendmsg(&msg))
+ RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+ } else if (entry->user_req.t == REQ_TYPE_ALLOC &&
+ result == REQ_RESULT_SUCCESS) {
+ struct malloc_heap *heap = entry->alloc_state.heap;
+ struct rte_mp_msg msg;
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)msg.param;
+
+ memset(&msg, 0, sizeof(msg));
+
+ heap->total_size += entry->alloc_state.map_len;
+
+ /* result is success, so just notify secondary about this */
+ resp->t = REQ_TYPE_ALLOC;
+ resp->result = result;
+ resp->id = entry->user_req.id;
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*resp);
+ strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+ if (rte_mp_sendmsg(&msg))
+ RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry->alloc_state.ms);
+ free(entry);
+ } else if (entry->user_req.t == REQ_TYPE_ALLOC &&
+ result == REQ_RESULT_FAIL) {
+ struct rte_mp_msg rb_msg;
+ struct malloc_mp_req *rb =
+ (struct malloc_mp_req *)rb_msg.param;
+ struct timespec ts;
+ struct primary_alloc_req_state *state =
+ &entry->alloc_state;
+ int ret;
+
+ memset(&rb_msg, 0, sizeof(rb_msg));
+
+ /* we've failed to sync, so do a rollback */
+ rollback_expand_heap(state->ms, state->ms_len, state->elem,
+ state->map_addr, state->map_len);
+
+ /* send rollback request */
+ rb_msg.num_fds = 0;
+ rb_msg.len_param = sizeof(*rb);
+ strlcpy(rb_msg.name, MP_ACTION_ROLLBACK, sizeof(rb_msg.name));
+
+ ts.tv_nsec = 0;
+ ts.tv_sec = MP_TIMEOUT_S;
+
+ /* sync requests carry no data */
+ rb->t = REQ_TYPE_SYNC;
+ rb->id = entry->user_req.id;
+
+ /* there may be stray timeout still waiting */
+ do {
+ ret = rte_mp_request_async(&rb_msg, &ts,
+ handle_rollback_response);
+ } while (ret != 0 && rte_errno == EEXIST);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Could not send rollback request to secondary process\n");
+
+ /* we couldn't send rollback request, but that's OK -
+ * secondary will time out, and memory has been removed
+ * from heap anyway.
+ */
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(state->ms);
+ free(entry);
+ goto fail;
+ }
+ } else {
+ RTE_LOG(ERR, EAL, " to sync request of unknown type\n");
+ goto fail;
+ }
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return 0;
+fail:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return -1;
+}
+
+static int
+handle_rollback_response(const struct rte_mp_msg *request,
+ const struct rte_mp_reply *reply __rte_unused)
+{
+ struct rte_mp_msg msg;
+ struct malloc_mp_req *resp = (struct malloc_mp_req *)msg.param;
+ const struct malloc_mp_req *mpreq =
+ (const struct malloc_mp_req *)request->param;
+ struct mp_request *entry;
+
+ /* lock the request */
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ memset(&msg, 0, sizeof(0));
+
+ entry = find_request_by_id(mpreq->id);
+ if (entry == NULL) {
+ RTE_LOG(ERR, EAL, "Wrong request ID\n");
+ goto fail;
+ }
+
+ if (entry->user_req.t != REQ_TYPE_ALLOC) {
+ RTE_LOG(ERR, EAL, "Unexpected active request\n");
+ goto fail;
+ }
+
+ /* we don't care if rollback succeeded, request still failed */
+ resp->t = REQ_TYPE_ALLOC;
+ resp->result = REQ_RESULT_FAIL;
+ resp->id = mpreq->id;
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*resp);
+ strlcpy(msg.name, MP_ACTION_RESPONSE, sizeof(msg.name));
+
+ if (rte_mp_sendmsg(&msg))
+ RTE_LOG(ERR, EAL, "Could not send message to secondary process\n");
+
+ /* clean up */
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry->alloc_state.ms);
+ free(entry);
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return 0;
+fail:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return -1;
+}
+
+/* final stage of the request from secondary */
+static int
+handle_response(const struct rte_mp_msg *msg, const void *peer __rte_unused)
+{
+ const struct malloc_mp_req *m =
+ (const struct malloc_mp_req *)msg->param;
+ struct mp_request *entry;
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = find_request_by_id(m->id);
+ if (entry != NULL) {
+ /* update request status */
+ entry->user_req.result = m->result;
+
+ entry->state = REQ_STATE_COMPLETE;
+
+ /* trigger thread wakeup */
+ pthread_cond_signal(&entry->cond);
+ }
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+
+ return 0;
+}
+
+/* synchronously request memory map sync, this is only called whenever primary
+ * process initiates the allocation.
+ */
+int
+request_sync(void)
+{
+ struct rte_mp_msg msg;
+ struct rte_mp_reply reply;
+ struct malloc_mp_req *req = (struct malloc_mp_req *)msg.param;
+ struct timespec ts;
+ int i, ret;
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&reply, 0, sizeof(reply));
+
+ /* no need to create tailq entries as this is entirely synchronous */
+
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*req);
+ strlcpy(msg.name, MP_ACTION_SYNC, sizeof(msg.name));
+
+ /* sync request carries no data */
+ req->t = REQ_TYPE_SYNC;
+ req->id = get_unique_id();
+
+ ts.tv_nsec = 0;
+ ts.tv_sec = MP_TIMEOUT_S;
+
+ /* there may be stray timeout still waiting */
+ do {
+ ret = rte_mp_request_sync(&msg, &reply, &ts);
+ } while (ret != 0 && rte_errno == EEXIST);
+ if (ret != 0) {
+ RTE_LOG(ERR, EAL, "Could not send sync request to secondary process\n");
+ ret = -1;
+ goto out;
+ }
+
+ if (reply.nb_received != reply.nb_sent) {
+ RTE_LOG(ERR, EAL, "Not all secondaries have responded\n");
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < reply.nb_received; i++) {
+ struct malloc_mp_req *resp =
+ (struct malloc_mp_req *)reply.msgs[i].param;
+ if (resp->t != REQ_TYPE_SYNC) {
+ RTE_LOG(ERR, EAL, "Unexpected response from secondary\n");
+ ret = -1;
+ goto out;
+ }
+ if (resp->id != req->id) {
+ RTE_LOG(ERR, EAL, "Wrong request ID\n");
+ ret = -1;
+ goto out;
+ }
+ if (resp->result != REQ_RESULT_SUCCESS) {
+ RTE_LOG(ERR, EAL, "Secondary process failed to synchronize\n");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ free(reply.msgs);
+ return ret;
+}
+
+/* this is a synchronous wrapper around a bunch of asynchronous requests to
+ * primary process. this will initiate a request and wait until responses come.
+ */
+int
+request_to_primary(struct malloc_mp_req *user_req)
+{
+ struct rte_mp_msg msg;
+ struct malloc_mp_req *msg_req = (struct malloc_mp_req *)msg.param;
+ struct mp_request *entry;
+ struct timespec ts;
+ struct timeval now;
+ int ret;
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&ts, 0, sizeof(ts));
+
+ pthread_mutex_lock(&mp_request_list.lock);
+
+ entry = malloc(sizeof(*entry));
+ if (entry == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot allocate memory for request\n");
+ goto fail;
+ }
+
+ memset(entry, 0, sizeof(*entry));
+
+ if (gettimeofday(&now, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot get current time\n");
+ goto fail;
+ }
+
+ ts.tv_nsec = (now.tv_usec * 1000) % 1000000000;
+ ts.tv_sec = now.tv_sec + MP_TIMEOUT_S +
+ (now.tv_usec * 1000) / 1000000000;
+
+ /* initialize the request */
+ pthread_cond_init(&entry->cond, NULL);
+
+ msg.num_fds = 0;
+ msg.len_param = sizeof(*msg_req);
+ strlcpy(msg.name, MP_ACTION_REQUEST, sizeof(msg.name));
+
+ /* (attempt to) get a unique id */
+ user_req->id = get_unique_id();
+
+ /* copy contents of user request into the message */
+ memcpy(msg_req, user_req, sizeof(*msg_req));
+
+ if (rte_mp_sendmsg(&msg)) {
+ RTE_LOG(ERR, EAL, "Cannot send message to primary\n");
+ goto fail;
+ }
+
+ /* copy contents of user request into active request */
+ memcpy(&entry->user_req, user_req, sizeof(*user_req));
+
+ /* mark request as in progress */
+ entry->state = REQ_STATE_ACTIVE;
+
+ TAILQ_INSERT_TAIL(&mp_request_list.list, entry, next);
+
+ /* finally, wait on timeout */
+ do {
+ ret = pthread_cond_timedwait(&entry->cond,
+ &mp_request_list.lock, &ts);
+ } while (ret != 0 && ret != ETIMEDOUT);
+
+ if (entry->state != REQ_STATE_COMPLETE) {
+ RTE_LOG(ERR, EAL, "Request timed out\n");
+ ret = -1;
+ } else {
+ ret = 0;
+ user_req->result = entry->user_req.result;
+ }
+ TAILQ_REMOVE(&mp_request_list.list, entry, next);
+ free(entry);
+
+ pthread_mutex_unlock(&mp_request_list.lock);
+ return ret;
+fail:
+ pthread_mutex_unlock(&mp_request_list.lock);
+ free(entry);
+ return -1;
+}
+
+int
+register_mp_requests(void)
+{
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ if (rte_mp_action_register(MP_ACTION_REQUEST, handle_request)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ MP_ACTION_REQUEST);
+ return -1;
+ }
+ } else {
+ if (rte_mp_action_register(MP_ACTION_SYNC, handle_sync)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ MP_ACTION_SYNC);
+ return -1;
+ }
+ if (rte_mp_action_register(MP_ACTION_ROLLBACK, handle_sync)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ MP_ACTION_SYNC);
+ return -1;
+ }
+ if (rte_mp_action_register(MP_ACTION_RESPONSE,
+ handle_response)) {
+ RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+ MP_ACTION_RESPONSE);
+ return -1;
+ }
+ }
+ return 0;
+}
diff --git a/lib/librte_eal/common/malloc_mp.h b/lib/librte_eal/common/malloc_mp.h
new file mode 100644
index 00000000..2b86b76f
--- /dev/null
+++ b/lib/librte_eal/common/malloc_mp.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef MALLOC_MP_H
+#define MALLOC_MP_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_random.h>
+#include <rte_spinlock.h>
+#include <rte_tailq.h>
+
+/* forward declarations */
+struct malloc_heap;
+struct rte_memseg;
+
+/* multiprocess synchronization structures for malloc */
+enum malloc_req_type {
+ REQ_TYPE_ALLOC, /**< ask primary to allocate */
+ REQ_TYPE_FREE, /**< ask primary to free */
+ REQ_TYPE_SYNC /**< ask secondary to synchronize its memory map */
+};
+
+enum malloc_req_result {
+ REQ_RESULT_SUCCESS,
+ REQ_RESULT_FAIL
+};
+
+struct malloc_req_alloc {
+ struct malloc_heap *heap;
+ uint64_t page_sz;
+ size_t elt_size;
+ int socket;
+ unsigned int flags;
+ size_t align;
+ size_t bound;
+ bool contig;
+};
+
+struct malloc_req_free {
+ RTE_STD_C11
+ union {
+ void *addr;
+ uint64_t addr_64;
+ };
+ uint64_t len;
+};
+
+struct malloc_mp_req {
+ enum malloc_req_type t;
+ RTE_STD_C11
+ union {
+ struct malloc_req_alloc alloc_req;
+ struct malloc_req_free free_req;
+ };
+ uint64_t id; /**< not to be populated by caller */
+ enum malloc_req_result result;
+};
+
+int
+register_mp_requests(void);
+
+int
+request_to_primary(struct malloc_mp_req *req);
+
+/* synchronous memory map sync request */
+int
+request_sync(void);
+
+/* functions from malloc_heap exposed here */
+int
+malloc_heap_free_pages(void *aligned_start, size_t aligned_len);
+
+struct malloc_elem *
+alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
+ int socket, unsigned int flags, size_t align, size_t bound,
+ bool contig, struct rte_memseg **ms, int n_segs);
+
+void
+rollback_expand_heap(struct rte_memseg **ms, int n_segs,
+ struct malloc_elem *elem, void *map_addr, size_t map_len);
+
+#endif /* MALLOC_MP_H */
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 82b8910f..56005bea 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -8,13 +8,16 @@ common_objs = []
common_sources = files(
'eal_common_bus.c',
'eal_common_cpuflags.c',
+ 'eal_common_class.c',
'eal_common_devargs.c',
'eal_common_dev.c',
'eal_common_errno.c',
+ 'eal_common_fbarray.c',
'eal_common_hexdump.c',
'eal_common_launch.c',
'eal_common_lcore.c',
'eal_common_log.c',
+ 'eal_common_memalloc.c',
'eal_common_memory.c',
'eal_common_memzone.c',
'eal_common_options.c',
@@ -23,8 +26,10 @@ common_sources = files(
'eal_common_tailqs.c',
'eal_common_thread.c',
'eal_common_timer.c',
+ 'eal_common_uuid.c',
'malloc_elem.c',
'malloc_heap.c',
+ 'malloc_mp.c',
'rte_keepalive.c',
'rte_malloc.c',
'rte_reciprocal.c',
@@ -43,6 +48,7 @@ common_headers = files(
'include/rte_branch_prediction.h',
'include/rte_bus.h',
'include/rte_bitmap.h',
+ 'include/rte_class.h',
'include/rte_common.h',
'include/rte_debug.h',
'include/rte_devargs.h',
@@ -51,6 +57,7 @@ common_headers = files(
'include/rte_eal_memconfig.h',
'include/rte_eal_interrupts.h',
'include/rte_errno.h',
+ 'include/rte_fbarray.h',
'include/rte_hexdump.h',
'include/rte_interrupts.h',
'include/rte_keepalive.h',
@@ -71,6 +78,7 @@ common_headers = files(
'include/rte_string_fns.h',
'include/rte_tailq.h',
'include/rte_time.h',
+ 'include/rte_uuid.h',
'include/rte_version.h')
# special case install the generic headers, since they go in a subdir
diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c
index e0e0d0b3..b51a6d11 100644
--- a/lib/librte_eal/common/rte_malloc.c
+++ b/lib/librte_eal/common/rte_malloc.c
@@ -29,20 +29,17 @@
void rte_free(void *addr)
{
if (addr == NULL) return;
- if (malloc_elem_free(malloc_elem_from_data(addr)) < 0)
- rte_panic("Fatal error: Invalid memory\n");
+ if (malloc_heap_free(malloc_elem_from_data(addr)) < 0)
+ RTE_LOG(ERR, EAL, "Error: Invalid memory\n");
}
/*
* Allocate memory on specified heap.
*/
void *
-rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg)
+rte_malloc_socket(const char *type, size_t size, unsigned int align,
+ int socket_arg)
{
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int socket, i;
- void *ret;
-
/* return NULL if size is 0 or alignment is not power-of-2 */
if (size == 0 || (align && !rte_is_power_of_2(align)))
return NULL;
@@ -50,33 +47,12 @@ rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg)
if (!rte_eal_has_hugepages())
socket_arg = SOCKET_ID_ANY;
- if (socket_arg == SOCKET_ID_ANY)
- socket = malloc_get_numa_socket();
- else
- socket = socket_arg;
-
/* Check socket parameter */
- if (socket >= RTE_MAX_NUMA_NODES)
+ if (socket_arg >= RTE_MAX_NUMA_NODES)
return NULL;
- ret = malloc_heap_alloc(&mcfg->malloc_heaps[socket], type,
- size, 0, align == 0 ? 1 : align, 0);
- if (ret != NULL || socket_arg != SOCKET_ID_ANY)
- return ret;
-
- /* try other heaps */
- for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
- /* we already tried this one */
- if (i == socket)
- continue;
-
- ret = malloc_heap_alloc(&mcfg->malloc_heaps[i], type,
- size, 0, align == 0 ? 1 : align, 0);
- if (ret != NULL)
- return ret;
- }
-
- return NULL;
+ return malloc_heap_alloc(type, size, socket_arg, 0,
+ align == 0 ? 1 : align, 0, false);
}
/*
@@ -134,13 +110,15 @@ rte_realloc(void *ptr, size_t size, unsigned align)
return rte_malloc(NULL, size, align);
struct malloc_elem *elem = malloc_elem_from_data(ptr);
- if (elem == NULL)
- rte_panic("Fatal error: memory corruption detected\n");
+ if (elem == NULL) {
+ RTE_LOG(ERR, EAL, "Error: memory corruption detected\n");
+ return NULL;
+ }
size = RTE_CACHE_LINE_ROUNDUP(size), align = RTE_CACHE_LINE_ROUNDUP(align);
/* check alignment matches first, and if ok, see if we can resize block */
if (RTE_PTR_ALIGN(ptr,align) == ptr &&
- malloc_elem_resize(elem, size) == 0)
+ malloc_heap_resize(elem, size) == 0)
return ptr;
/* either alignment is off, or we have no room to expand,
@@ -182,6 +160,23 @@ rte_malloc_get_socket_stats(int socket,
}
/*
+ * Function to dump contents of all heaps
+ */
+void __rte_experimental
+rte_malloc_dump_heaps(FILE *f)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ unsigned int idx;
+
+ for (idx = 0; idx < rte_socket_count(); idx++) {
+ unsigned int socket = rte_socket_id_by_idx(idx);
+ fprintf(f, "Heap on socket %i:\n", socket);
+ malloc_heap_dump(&mcfg->malloc_heaps[socket], f);
+ }
+
+}
+
+/*
* Print stats on memory type. If type is NULL, info on all types is printed
*/
void
@@ -222,17 +217,21 @@ rte_malloc_set_limit(__rte_unused const char *type,
rte_iova_t
rte_malloc_virt2iova(const void *addr)
{
- rte_iova_t iova;
- const struct malloc_elem *elem = malloc_elem_from_data(addr);
+ const struct rte_memseg *ms;
+ struct malloc_elem *elem = malloc_elem_from_data(addr);
+
if (elem == NULL)
return RTE_BAD_IOVA;
- if (elem->ms->iova == RTE_BAD_IOVA)
- return RTE_BAD_IOVA;
if (rte_eal_iova_mode() == RTE_IOVA_VA)
- iova = (uintptr_t)addr;
- else
- iova = elem->ms->iova +
- RTE_PTR_DIFF(addr, elem->ms->addr);
- return iova;
+ return (uintptr_t) addr;
+
+ ms = rte_mem_virt2memseg(addr, elem->msl);
+ if (ms == NULL)
+ return RTE_BAD_IOVA;
+
+ if (ms->iova == RTE_BAD_IOVA)
+ return RTE_BAD_IOVA;
+
+ return ms->iova + RTE_PTR_DIFF(addr, ms->addr);
}
diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index be9b5e6d..8767c722 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -52,6 +52,7 @@ struct rte_service_spec_impl {
rte_atomic32_t num_mapped_cores;
uint64_t calls;
uint64_t cycles_spent;
+ uint8_t active_on_lcore[RTE_MAX_LCORE];
} __rte_cache_aligned;
/* the internal values of a service core */
@@ -61,7 +62,7 @@ struct core_state {
uint8_t runstate; /* running or stopped */
uint8_t is_service_core; /* set if core is currently a service core */
- /* extreme statistics */
+ uint64_t loops;
uint64_t calls_per_service[RTE_SERVICE_NUM_MAX];
} __rte_cache_aligned;
@@ -115,7 +116,7 @@ fail_mem:
return -ENOMEM;
}
-void __rte_experimental
+void
rte_service_finalize(void)
{
if (!rte_service_library_initialized)
@@ -161,7 +162,7 @@ service_mt_safe(struct rte_service_spec_impl *s)
return !!(s->spec.capabilities & RTE_SERVICE_CAP_MT_SAFE);
}
-int32_t __rte_experimental
+int32_t
rte_service_set_stats_enable(uint32_t id, int32_t enabled)
{
struct rte_service_spec_impl *s;
@@ -175,7 +176,7 @@ rte_service_set_stats_enable(uint32_t id, int32_t enabled)
return 0;
}
-int32_t __rte_experimental
+int32_t
rte_service_set_runstate_mapped_check(uint32_t id, int32_t enabled)
{
struct rte_service_spec_impl *s;
@@ -189,13 +190,13 @@ rte_service_set_runstate_mapped_check(uint32_t id, int32_t enabled)
return 0;
}
-uint32_t __rte_experimental
+uint32_t
rte_service_get_count(void)
{
return rte_service_count;
}
-int32_t __rte_experimental
+int32_t
rte_service_get_by_name(const char *name, uint32_t *service_id)
{
if (!service_id)
@@ -213,7 +214,7 @@ rte_service_get_by_name(const char *name, uint32_t *service_id)
return -ENODEV;
}
-const char * __rte_experimental
+const char *
rte_service_get_name(uint32_t id)
{
struct rte_service_spec_impl *s;
@@ -221,7 +222,7 @@ rte_service_get_name(uint32_t id)
return s->spec.name;
}
-int32_t __rte_experimental
+int32_t
rte_service_probe_capability(uint32_t id, uint32_t capability)
{
struct rte_service_spec_impl *s;
@@ -229,7 +230,7 @@ rte_service_probe_capability(uint32_t id, uint32_t capability)
return !!(s->spec.capabilities & capability);
}
-int32_t __rte_experimental
+int32_t
rte_service_component_register(const struct rte_service_spec *spec,
uint32_t *id_ptr)
{
@@ -262,7 +263,7 @@ rte_service_component_register(const struct rte_service_spec *spec,
return 0;
}
-int32_t __rte_experimental
+int32_t
rte_service_component_unregister(uint32_t id)
{
uint32_t i;
@@ -283,7 +284,7 @@ rte_service_component_unregister(uint32_t id)
return 0;
}
-int32_t __rte_experimental
+int32_t
rte_service_component_runstate_set(uint32_t id, uint32_t runstate)
{
struct rte_service_spec_impl *s;
@@ -298,7 +299,7 @@ rte_service_component_runstate_set(uint32_t id, uint32_t runstate)
return 0;
}
-int32_t __rte_experimental
+int32_t
rte_service_runstate_set(uint32_t id, uint32_t runstate)
{
struct rte_service_spec_impl *s;
@@ -313,7 +314,7 @@ rte_service_runstate_set(uint32_t id, uint32_t runstate)
return 0;
}
-int32_t __rte_experimental
+int32_t
rte_service_runstate_get(uint32_t id)
{
struct rte_service_spec_impl *s;
@@ -347,15 +348,19 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s,
static inline int32_t
-service_run(uint32_t i, struct core_state *cs, uint64_t service_mask)
+service_run(uint32_t i, int lcore, struct core_state *cs, uint64_t service_mask)
{
if (!service_valid(i))
return -EINVAL;
struct rte_service_spec_impl *s = &rte_services[i];
if (s->comp_runstate != RUNSTATE_RUNNING ||
s->app_runstate != RUNSTATE_RUNNING ||
- !(service_mask & (UINT64_C(1) << i)))
+ !(service_mask & (UINT64_C(1) << i))) {
+ s->active_on_lcore[lcore] = 0;
return -ENOEXEC;
+ }
+
+ s->active_on_lcore[lcore] = 1;
/* check do we need cmpset, if MT safe or <= 1 core
* mapped, atomic ops are not required.
@@ -374,7 +379,26 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask)
return 0;
}
-int32_t __rte_experimental rte_service_run_iter_on_app_lcore(uint32_t id,
+int32_t __rte_experimental
+rte_service_may_be_active(uint32_t id)
+{
+ uint32_t ids[RTE_MAX_LCORE] = {0};
+ struct rte_service_spec_impl *s = &rte_services[id];
+ int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
+ int i;
+
+ if (!service_valid(id))
+ return -EINVAL;
+
+ for (i = 0; i < lcore_count; i++) {
+ if (s->active_on_lcore[ids[i]])
+ return 1;
+ }
+
+ return 0;
+}
+
+int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
uint32_t serialize_mt_unsafe)
{
/* run service on calling core, using all-ones as the service mask */
@@ -398,7 +422,7 @@ int32_t __rte_experimental rte_service_run_iter_on_app_lcore(uint32_t id,
return -EBUSY;
}
- int ret = service_run(id, cs, UINT64_MAX);
+ int ret = service_run(id, rte_lcore_id(), cs, UINT64_MAX);
if (serialize_mt_unsafe)
rte_atomic32_dec(&s->num_mapped_cores);
@@ -419,9 +443,11 @@ rte_service_runner_func(void *arg)
for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
/* return value ignored as no change to code flow */
- service_run(i, cs, service_mask);
+ service_run(i, lcore, cs, service_mask);
}
+ cs->loops++;
+
rte_smp_rmb();
}
@@ -430,7 +456,7 @@ rte_service_runner_func(void *arg)
return 0;
}
-int32_t __rte_experimental
+int32_t
rte_service_lcore_count(void)
{
int32_t count = 0;
@@ -440,7 +466,7 @@ rte_service_lcore_count(void)
return count;
}
-int32_t __rte_experimental
+int32_t
rte_service_lcore_list(uint32_t array[], uint32_t n)
{
uint32_t count = rte_service_lcore_count();
@@ -463,7 +489,7 @@ rte_service_lcore_list(uint32_t array[], uint32_t n)
return count;
}
-int32_t __rte_experimental
+int32_t
rte_service_lcore_count_services(uint32_t lcore)
{
if (lcore >= RTE_MAX_LCORE)
@@ -476,7 +502,7 @@ rte_service_lcore_count_services(uint32_t lcore)
return __builtin_popcountll(cs->service_mask);
}
-int32_t __rte_experimental
+int32_t
rte_service_start_with_defaults(void)
{
/* create a default mapping from cores to services, then start the
@@ -562,7 +588,7 @@ service_update(struct rte_service_spec *service, uint32_t lcore,
return 0;
}
-int32_t __rte_experimental
+int32_t
rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
{
struct rte_service_spec_impl *s;
@@ -571,7 +597,7 @@ rte_service_map_lcore_set(uint32_t id, uint32_t lcore, uint32_t enabled)
return service_update(&s->spec, lcore, &on, 0);
}
-int32_t __rte_experimental
+int32_t
rte_service_map_lcore_get(uint32_t id, uint32_t lcore)
{
struct rte_service_spec_impl *s;
@@ -597,7 +623,7 @@ set_lcore_state(uint32_t lcore, int32_t state)
lcore_states[lcore].is_service_core = (state == ROLE_SERVICE);
}
-int32_t __rte_experimental
+int32_t
rte_service_lcore_reset_all(void)
{
/* loop over cores, reset all to mask 0 */
@@ -617,7 +643,7 @@ rte_service_lcore_reset_all(void)
return 0;
}
-int32_t __rte_experimental
+int32_t
rte_service_lcore_add(uint32_t lcore)
{
if (lcore >= RTE_MAX_LCORE)
@@ -636,7 +662,7 @@ rte_service_lcore_add(uint32_t lcore)
return rte_eal_wait_lcore(lcore);
}
-int32_t __rte_experimental
+int32_t
rte_service_lcore_del(uint32_t lcore)
{
if (lcore >= RTE_MAX_LCORE)
@@ -655,7 +681,7 @@ rte_service_lcore_del(uint32_t lcore)
return 0;
}
-int32_t __rte_experimental
+int32_t
rte_service_lcore_start(uint32_t lcore)
{
if (lcore >= RTE_MAX_LCORE)
@@ -678,7 +704,7 @@ rte_service_lcore_start(uint32_t lcore)
return ret;
}
-int32_t __rte_experimental
+int32_t
rte_service_lcore_stop(uint32_t lcore)
{
if (lcore >= RTE_MAX_LCORE)
@@ -708,7 +734,7 @@ rte_service_lcore_stop(uint32_t lcore)
return 0;
}
-int32_t __rte_experimental
+int32_t
rte_service_attr_get(uint32_t id, uint32_t attr_id, uint32_t *attr_value)
{
struct rte_service_spec_impl *s;
@@ -729,6 +755,28 @@ rte_service_attr_get(uint32_t id, uint32_t attr_id, uint32_t *attr_value)
}
}
+int32_t __rte_experimental
+rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
+ uint64_t *attr_value)
+{
+ struct core_state *cs;
+
+ if (lcore >= RTE_MAX_LCORE || !attr_value)
+ return -EINVAL;
+
+ cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -ENOTSUP;
+
+ switch (attr_id) {
+ case RTE_SERVICE_LCORE_ATTR_LOOPS:
+ *attr_value = cs->loops;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static void
rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s,
uint64_t all_cycles, uint32_t reset)
@@ -753,7 +801,7 @@ rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s,
s->cycles_spent, s->cycles_spent / calls);
}
-int32_t __rte_experimental
+int32_t
rte_service_attr_reset_all(uint32_t id)
{
struct rte_service_spec_impl *s;
@@ -764,6 +812,23 @@ rte_service_attr_reset_all(uint32_t id)
return 0;
}
+int32_t __rte_experimental
+rte_service_lcore_attr_reset_all(uint32_t lcore)
+{
+ struct core_state *cs;
+
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -ENOTSUP;
+
+ cs->loops = 0;
+
+ return 0;
+}
+
static void
service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset)
{
@@ -781,7 +846,8 @@ service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset)
fprintf(f, "\n");
}
-int32_t __rte_experimental rte_service_dump(FILE *f, uint32_t id)
+int32_t
+rte_service_dump(FILE *f, uint32_t id)
{
uint32_t i;
int print_one = (id != UINT32_MAX);