aboutsummaryrefslogtreecommitdiffstats
path: root/lib/librte_eal/common
diff options
context:
space:
mode:
Diffstat (limited to 'lib/librte_eal/common')
-rw-r--r--lib/librte_eal/common/Makefile4
-rw-r--r--lib/librte_eal/common/eal_common_class.c64
-rw-r--r--lib/librte_eal/common/eal_common_dev.c247
-rw-r--r--lib/librte_eal/common/eal_common_devargs.c193
-rw-r--r--lib/librte_eal/common/eal_common_fbarray.c564
-rw-r--r--lib/librte_eal/common/eal_common_log.c4
-rw-r--r--lib/librte_eal/common/eal_common_memory.c504
-rw-r--r--lib/librte_eal/common/eal_common_memzone.c70
-rw-r--r--lib/librte_eal/common/eal_common_options.c78
-rw-r--r--lib/librte_eal/common/eal_common_proc.c222
-rw-r--r--lib/librte_eal/common/eal_common_thread.c9
-rw-r--r--lib/librte_eal/common/eal_common_uuid.c193
-rw-r--r--lib/librte_eal/common/eal_filesystem.h10
-rw-r--r--lib/librte_eal/common/eal_internal_cfg.h6
-rw-r--r--lib/librte_eal/common/eal_options.h4
-rw-r--r--lib/librte_eal/common/eal_private.h46
-rw-r--r--lib/librte_eal/common/include/rte_bitmap.h8
-rw-r--r--lib/librte_eal/common/include/rte_bus.h4
-rw-r--r--lib/librte_eal/common/include/rte_class.h134
-rw-r--r--lib/librte_eal/common/include/rte_common.h29
-rw-r--r--lib/librte_eal/common/include/rte_dev.h103
-rw-r--r--lib/librte_eal/common/include/rte_devargs.h53
-rw-r--r--lib/librte_eal/common/include/rte_eal.h16
-rw-r--r--lib/librte_eal/common/include/rte_fbarray.h114
-rw-r--r--lib/librte_eal/common/include/rte_memory.h54
-rw-r--r--lib/librte_eal/common/include/rte_memzone.h24
-rw-r--r--lib/librte_eal/common/include/rte_service.h56
-rw-r--r--lib/librte_eal/common/include/rte_tailq.h3
-rw-r--r--lib/librte_eal/common/include/rte_uuid.h129
-rw-r--r--lib/librte_eal/common/include/rte_version.h2
-rw-r--r--lib/librte_eal/common/include/rte_vfio.h40
-rw-r--r--lib/librte_eal/common/malloc_elem.c93
-rw-r--r--lib/librte_eal/common/malloc_elem.h6
-rw-r--r--lib/librte_eal/common/malloc_heap.c126
-rw-r--r--lib/librte_eal/common/malloc_heap.h4
-rw-r--r--lib/librte_eal/common/meson.build4
-rw-r--r--lib/librte_eal/common/rte_service.c75
37 files changed, 2452 insertions, 843 deletions
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile
index 48f870f2..cca68826 100644
--- a/lib/librte_eal/common/Makefile
+++ b/lib/librte_eal/common/Makefile
@@ -11,12 +11,12 @@ INC += rte_per_lcore.h rte_random.h
INC += rte_tailq.h rte_interrupts.h rte_alarm.h
INC += rte_string_fns.h rte_version.h
INC += rte_eal_memconfig.h rte_malloc_heap.h
-INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h
+INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_class.h
INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
INC += rte_malloc.h rte_keepalive.h rte_time.h
INC += rte_service.h rte_service_component.h
INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h
-INC += rte_reciprocal.h rte_fbarray.h
+INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h
GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h
GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h
diff --git a/lib/librte_eal/common/eal_common_class.c b/lib/librte_eal/common/eal_common_class.c
new file mode 100644
index 00000000..404a9065
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_class.c
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaƫtan Rivet
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+
+#include <rte_class.h>
+#include <rte_debug.h>
+
+struct rte_class_list rte_class_list =
+ TAILQ_HEAD_INITIALIZER(rte_class_list);
+
+__rte_experimental void
+rte_class_register(struct rte_class *class)
+{
+ RTE_VERIFY(class);
+ RTE_VERIFY(class->name && strlen(class->name));
+
+ TAILQ_INSERT_TAIL(&rte_class_list, class, next);
+ RTE_LOG(DEBUG, EAL, "Registered [%s] device class.\n", class->name);
+}
+
+__rte_experimental void
+rte_class_unregister(struct rte_class *class)
+{
+ TAILQ_REMOVE(&rte_class_list, class, next);
+ RTE_LOG(DEBUG, EAL, "Unregistered [%s] device class.\n", class->name);
+}
+
+__rte_experimental
+struct rte_class *
+rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
+ const void *data)
+{
+ struct rte_class *cls;
+
+ if (start != NULL)
+ cls = TAILQ_NEXT(start, next);
+ else
+ cls = TAILQ_FIRST(&rte_class_list);
+ while (cls != NULL) {
+ if (cmp(cls, data) == 0)
+ break;
+ cls = TAILQ_NEXT(cls, next);
+ }
+ return cls;
+}
+
+static int
+cmp_class_name(const struct rte_class *class, const void *_name)
+{
+ const char *name = _name;
+
+ return strcmp(class->name, name);
+}
+
+__rte_experimental
+struct rte_class *
+rte_class_find_by_name(const char *name)
+{
+ return rte_class_find(NULL, cmp_class_name, (const void *)name);
+}
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c
index 61cb3b16..678dbcac 100644
--- a/lib/librte_eal/common/eal_common_dev.c
+++ b/lib/librte_eal/common/eal_common_dev.c
@@ -10,9 +10,12 @@
#include <rte_compat.h>
#include <rte_bus.h>
+#include <rte_class.h>
#include <rte_dev.h>
#include <rte_devargs.h>
#include <rte_debug.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
#include <rte_log.h>
#include <rte_spinlock.h>
#include <rte_malloc.h>
@@ -42,17 +45,27 @@ static struct dev_event_cb_list dev_event_cbs;
/* spinlock for device callbacks */
static rte_spinlock_t dev_event_lock = RTE_SPINLOCK_INITIALIZER;
-static int cmp_detached_dev_name(const struct rte_device *dev,
- const void *_name)
-{
- const char *name = _name;
+struct dev_next_ctx {
+ struct rte_dev_iterator *it;
+ const char *bus_str;
+ const char *cls_str;
+};
- /* skip attached devices */
- if (dev->driver != NULL)
- return 1;
+#define CTX(it, bus_str, cls_str) \
+ (&(const struct dev_next_ctx){ \
+ .it = it, \
+ .bus_str = bus_str, \
+ .cls_str = cls_str, \
+ })
- return strcmp(dev->name, name);
-}
+#define ITCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->it)
+
+#define BUSCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->bus_str)
+
+#define CLSCTX(ptr) \
+ (((struct dev_next_ctx *)(intptr_t)ptr)->cls_str)
static int cmp_dev_name(const struct rte_device *dev, const void *_name)
{
@@ -138,8 +151,8 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
if (da == NULL)
return -ENOMEM;
- ret = rte_devargs_parse(da, "%s:%s,%s",
- busname, devname, devargs);
+ ret = rte_devargs_parsef(da, "%s:%s,%s",
+ busname, devname, devargs);
if (ret)
goto err_devarg;
@@ -151,14 +164,19 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
if (ret)
goto err_devarg;
- dev = bus->find_device(NULL, cmp_detached_dev_name, devname);
+ dev = bus->find_device(NULL, cmp_dev_name, devname);
if (dev == NULL) {
- RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n",
+ RTE_LOG(ERR, EAL, "Cannot find device (%s)\n",
devname);
ret = -ENODEV;
goto err_devarg;
}
+ if (dev->driver != NULL) {
+ RTE_LOG(ERR, EAL, "Device is already plugged\n");
+ return -EEXIST;
+ }
+
ret = bus->plug(dev);
if (ret) {
RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
@@ -200,6 +218,11 @@ rte_eal_hotplug_remove(const char *busname, const char *devname)
return -EINVAL;
}
+ if (dev->driver == NULL) {
+ RTE_LOG(ERR, EAL, "Device is already unplugged\n");
+ return -ENOENT;
+ }
+
ret = bus->unplug(dev);
if (ret)
RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
@@ -343,3 +366,201 @@ dev_callback_process(char *device_name, enum rte_dev_event_type event)
}
rte_spinlock_unlock(&dev_event_lock);
}
+
+__rte_experimental
+int
+rte_dev_iterator_init(struct rte_dev_iterator *it,
+ const char *dev_str)
+{
+ struct rte_devargs devargs;
+ struct rte_class *cls = NULL;
+ struct rte_bus *bus = NULL;
+
+ /* Having both bus_str and cls_str NULL is illegal,
+ * marking this iterator as invalid unless
+ * everything goes well.
+ */
+ it->bus_str = NULL;
+ it->cls_str = NULL;
+
+ devargs.data = dev_str;
+ if (rte_devargs_layers_parse(&devargs, dev_str))
+ goto get_out;
+
+ bus = devargs.bus;
+ cls = devargs.cls;
+ /* The string should have at least
+ * one layer specified.
+ */
+ if (bus == NULL && cls == NULL) {
+ RTE_LOG(ERR, EAL,
+ "Either bus or class must be specified.\n");
+ rte_errno = EINVAL;
+ goto get_out;
+ }
+ if (bus != NULL && bus->dev_iterate == NULL) {
+ RTE_LOG(ERR, EAL, "Bus %s not supported\n", bus->name);
+ rte_errno = ENOTSUP;
+ goto get_out;
+ }
+ if (cls != NULL && cls->dev_iterate == NULL) {
+ RTE_LOG(ERR, EAL, "Class %s not supported\n", cls->name);
+ rte_errno = ENOTSUP;
+ goto get_out;
+ }
+ it->bus_str = devargs.bus_str;
+ it->cls_str = devargs.cls_str;
+ it->dev_str = dev_str;
+ it->bus = bus;
+ it->cls = cls;
+ it->device = NULL;
+ it->class_device = NULL;
+get_out:
+ return -rte_errno;
+}
+
+static char *
+dev_str_sane_copy(const char *str)
+{
+ size_t end;
+ char *copy;
+
+ end = strcspn(str, ",/");
+ if (str[end] == ',') {
+ copy = strdup(&str[end + 1]);
+ } else {
+ /* '/' or '\0' */
+ copy = strdup("");
+ }
+ if (copy == NULL) {
+ rte_errno = ENOMEM;
+ } else {
+ char *slash;
+
+ slash = strchr(copy, '/');
+ if (slash != NULL)
+ slash[0] = '\0';
+ }
+ return copy;
+}
+
+static int
+class_next_dev_cmp(const struct rte_class *cls,
+ const void *ctx)
+{
+ struct rte_dev_iterator *it;
+ const char *cls_str = NULL;
+ void *dev;
+
+ if (cls->dev_iterate == NULL)
+ return 1;
+ it = ITCTX(ctx);
+ cls_str = CLSCTX(ctx);
+ dev = it->class_device;
+ /* it->cls_str != NULL means a class
+ * was specified in the devstr.
+ */
+ if (it->cls_str != NULL && cls != it->cls)
+ return 1;
+ /* If an error occurred previously,
+ * no need to test further.
+ */
+ if (rte_errno != 0)
+ return -1;
+ dev = cls->dev_iterate(dev, cls_str, it);
+ it->class_device = dev;
+ return dev == NULL;
+}
+
+static int
+bus_next_dev_cmp(const struct rte_bus *bus,
+ const void *ctx)
+{
+ struct rte_device *dev = NULL;
+ struct rte_class *cls = NULL;
+ struct rte_dev_iterator *it;
+ const char *bus_str = NULL;
+
+ if (bus->dev_iterate == NULL)
+ return 1;
+ it = ITCTX(ctx);
+ bus_str = BUSCTX(ctx);
+ dev = it->device;
+ /* it->bus_str != NULL means a bus
+ * was specified in the devstr.
+ */
+ if (it->bus_str != NULL && bus != it->bus)
+ return 1;
+ /* If an error occurred previously,
+ * no need to test further.
+ */
+ if (rte_errno != 0)
+ return -1;
+ if (it->cls_str == NULL) {
+ dev = bus->dev_iterate(dev, bus_str, it);
+ goto end;
+ }
+ /* cls_str != NULL */
+ if (dev == NULL) {
+next_dev_on_bus:
+ dev = bus->dev_iterate(dev, bus_str, it);
+ it->device = dev;
+ }
+ if (dev == NULL)
+ return 1;
+ if (it->cls != NULL)
+ cls = TAILQ_PREV(it->cls, rte_class_list, next);
+ cls = rte_class_find(cls, class_next_dev_cmp, ctx);
+ if (cls != NULL) {
+ it->cls = cls;
+ goto end;
+ }
+ goto next_dev_on_bus;
+end:
+ it->device = dev;
+ return dev == NULL;
+}
+__rte_experimental
+struct rte_device *
+rte_dev_iterator_next(struct rte_dev_iterator *it)
+{
+ struct rte_bus *bus = NULL;
+ int old_errno = rte_errno;
+ char *bus_str = NULL;
+ char *cls_str = NULL;
+
+ rte_errno = 0;
+ if (it->bus_str == NULL && it->cls_str == NULL) {
+ /* Invalid iterator. */
+ rte_errno = EINVAL;
+ return NULL;
+ }
+ if (it->bus != NULL)
+ bus = TAILQ_PREV(it->bus, rte_bus_list, next);
+ if (it->bus_str != NULL) {
+ bus_str = dev_str_sane_copy(it->bus_str);
+ if (bus_str == NULL)
+ goto out;
+ }
+ if (it->cls_str != NULL) {
+ cls_str = dev_str_sane_copy(it->cls_str);
+ if (cls_str == NULL)
+ goto out;
+ }
+ while ((bus = rte_bus_find(bus, bus_next_dev_cmp,
+ CTX(it, bus_str, cls_str)))) {
+ if (it->device != NULL) {
+ it->bus = bus;
+ goto out;
+ }
+ if (it->bus_str != NULL ||
+ rte_errno != 0)
+ break;
+ }
+ if (rte_errno == 0)
+ rte_errno = old_errno;
+out:
+ free(bus_str);
+ free(cls_str);
+ return it->device;
+}
diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c
index b0434158..dac2402a 100644
--- a/lib/librte_eal/common/eal_common_devargs.c
+++ b/lib/librte_eal/common/eal_common_devargs.c
@@ -13,9 +13,14 @@
#include <string.h>
#include <stdarg.h>
+#include <rte_bus.h>
+#include <rte_class.h>
#include <rte_compat.h>
#include <rte_dev.h>
#include <rte_devargs.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
#include <rte_tailq.h>
#include "eal_private.h"
@@ -56,30 +61,164 @@ rte_eal_parse_devargs_str(const char *devargs_str,
return 0;
}
+static size_t
+devargs_layer_count(const char *s)
+{
+ size_t i = s ? 1 : 0;
+
+ while (s != NULL && s[0] != '\0') {
+ i += s[0] == '/';
+ s++;
+ }
+ return i;
+}
+
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+ const char *devstr)
+{
+ struct {
+ const char *key;
+ const char *str;
+ struct rte_kvargs *kvlist;
+ } layers[] = {
+ { "bus=", NULL, NULL, },
+ { "class=", NULL, NULL, },
+ { "driver=", NULL, NULL, },
+ };
+ struct rte_kvargs_pair *kv = NULL;
+ struct rte_class *cls = NULL;
+ struct rte_bus *bus = NULL;
+ const char *s = devstr;
+ size_t nblayer;
+ size_t i = 0;
+ int ret = 0;
+
+ /* Split each sub-lists. */
+ nblayer = devargs_layer_count(devstr);
+ if (nblayer > RTE_DIM(layers)) {
+ RTE_LOG(ERR, EAL, "Invalid format: too many layers (%zu)\n",
+ nblayer);
+ ret = -E2BIG;
+ goto get_out;
+ }
+
+ /* If the devargs points the devstr
+ * as source data, then it should not allocate
+ * anything and keep referring only to it.
+ */
+ if (devargs->data != devstr) {
+ devargs->data = strdup(devstr);
+ if (devargs->data == NULL) {
+ RTE_LOG(ERR, EAL, "OOM\n");
+ ret = -ENOMEM;
+ goto get_out;
+ }
+ s = devargs->data;
+ }
+
+ while (s != NULL) {
+ if (i >= RTE_DIM(layers)) {
+ RTE_LOG(ERR, EAL, "Unrecognized layer %s\n", s);
+ ret = -EINVAL;
+ goto get_out;
+ }
+ /*
+ * The last layer is free-form.
+ * The "driver" key is not required (but accepted).
+ */
+ if (strncmp(layers[i].key, s, strlen(layers[i].key)) &&
+ i != RTE_DIM(layers) - 1)
+ goto next_layer;
+ layers[i].str = s;
+ layers[i].kvlist = rte_kvargs_parse_delim(s, NULL, "/");
+ if (layers[i].kvlist == NULL) {
+ RTE_LOG(ERR, EAL, "Could not parse %s\n", s);
+ ret = -EINVAL;
+ goto get_out;
+ }
+ s = strchr(s, '/');
+ if (s != NULL)
+ s++;
+next_layer:
+ i++;
+ }
+
+ /* Parse each sub-list. */
+ for (i = 0; i < RTE_DIM(layers); i++) {
+ if (layers[i].kvlist == NULL)
+ continue;
+ kv = &layers[i].kvlist->pairs[0];
+ if (strcmp(kv->key, "bus") == 0) {
+ bus = rte_bus_find_by_name(kv->value);
+ if (bus == NULL) {
+ RTE_LOG(ERR, EAL, "Could not find bus \"%s\"\n",
+ kv->value);
+ ret = -EFAULT;
+ goto get_out;
+ }
+ } else if (strcmp(kv->key, "class") == 0) {
+ cls = rte_class_find_by_name(kv->value);
+ if (cls == NULL) {
+ RTE_LOG(ERR, EAL, "Could not find class \"%s\"\n",
+ kv->value);
+ ret = -EFAULT;
+ goto get_out;
+ }
+ } else if (strcmp(kv->key, "driver") == 0) {
+ /* Ignore */
+ continue;
+ }
+ }
+
+ /* Fill devargs fields. */
+ devargs->bus_str = layers[0].str;
+ devargs->cls_str = layers[1].str;
+ devargs->drv_str = layers[2].str;
+ devargs->bus = bus;
+ devargs->cls = cls;
+
+ /* If we own the data, clean up a bit
+ * the several layers string, to ease
+ * their parsing afterward.
+ */
+ if (devargs->data != devstr) {
+ char *s = (void *)(intptr_t)(devargs->data);
+
+ while ((s = strchr(s, '/'))) {
+ *s = '\0';
+ s++;
+ }
+ }
+
+get_out:
+ for (i = 0; i < RTE_DIM(layers); i++) {
+ if (layers[i].kvlist)
+ rte_kvargs_free(layers[i].kvlist);
+ }
+ if (ret != 0)
+ rte_errno = -ret;
+ return ret;
+}
+
static int
bus_name_cmp(const struct rte_bus *bus, const void *name)
{
return strncmp(bus->name, name, strlen(bus->name));
}
-int __rte_experimental
-rte_devargs_parse(struct rte_devargs *da, const char *format, ...)
+__rte_experimental
+int
+rte_devargs_parse(struct rte_devargs *da, const char *dev)
{
struct rte_bus *bus = NULL;
- va_list ap;
- va_start(ap, format);
- char dev[vsnprintf(NULL, 0, format, ap) + 1];
const char *devname;
const size_t maxlen = sizeof(da->name);
size_t i;
- va_end(ap);
if (da == NULL)
return -EINVAL;
- va_start(ap, format);
- vsnprintf(dev, sizeof(dev), format, ap);
- va_end(ap);
/* Retrieve eventual bus info */
do {
devname = dev;
@@ -96,7 +235,7 @@ rte_devargs_parse(struct rte_devargs *da, const char *format, ...)
da->name[i] = devname[i];
i++;
if (i == maxlen) {
- fprintf(stderr, "WARNING: Parsing \"%s\": device name should be shorter than %zu\n",
+ RTE_LOG(WARNING, EAL, "Parsing \"%s\": device name should be shorter than %zu\n",
dev, maxlen);
da->name[i - 1] = '\0';
return -EINVAL;
@@ -106,7 +245,7 @@ rte_devargs_parse(struct rte_devargs *da, const char *format, ...)
if (bus == NULL) {
bus = rte_bus_find_by_device_name(da->name);
if (bus == NULL) {
- fprintf(stderr, "ERROR: failed to parse device \"%s\"\n",
+ RTE_LOG(ERR, EAL, "failed to parse device \"%s\"\n",
da->name);
return -EFAULT;
}
@@ -118,12 +257,40 @@ rte_devargs_parse(struct rte_devargs *da, const char *format, ...)
else
da->args = strdup("");
if (da->args == NULL) {
- fprintf(stderr, "ERROR: not enough memory to parse arguments\n");
+ RTE_LOG(ERR, EAL, "not enough memory to parse arguments\n");
return -ENOMEM;
}
return 0;
}
+__rte_experimental
+int
+rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
+{
+ va_list ap;
+ size_t len;
+ char *dev;
+
+ if (da == NULL)
+ return -EINVAL;
+
+ va_start(ap, format);
+ len = vsnprintf(NULL, 0, format, ap);
+ va_end(ap);
+
+ dev = calloc(1, len + 1);
+ if (dev == NULL) {
+ RTE_LOG(ERR, EAL, "not enough memory to parse device\n");
+ return -ENOMEM;
+ }
+
+ va_start(ap, format);
+ vsnprintf(dev, len + 1, format, ap);
+ va_end(ap);
+
+ return rte_devargs_parse(da, dev);
+}
+
int __rte_experimental
rte_devargs_insert(struct rte_devargs *da)
{
@@ -150,7 +317,7 @@ rte_devargs_add(enum rte_devtype devtype, const char *devargs_str)
if (devargs == NULL)
goto fail;
- if (rte_devargs_parse(devargs, "%s", dev))
+ if (rte_devargs_parse(devargs, dev))
goto fail;
devargs->type = devtype;
bus = devargs->bus;
diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c
index 019f84c1..43caf3ce 100644
--- a/lib/librte_eal/common/eal_common_fbarray.c
+++ b/lib/librte_eal/common/eal_common_fbarray.c
@@ -231,7 +231,7 @@ find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
return MASK_GET_IDX(msk_idx, run_start);
}
/* we didn't find anything */
- rte_errno = used ? -ENOENT : -ENOSPC;
+ rte_errno = used ? ENOENT : ENOSPC;
return -1;
}
@@ -287,7 +287,7 @@ find_next(const struct rte_fbarray *arr, unsigned int start, bool used)
return MASK_GET_IDX(idx, found);
}
/* we didn't find anything */
- rte_errno = used ? -ENOENT : -ENOSPC;
+ rte_errno = used ? ENOENT : ENOSPC;
return -1;
}
@@ -353,6 +353,277 @@ find_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
}
static int
+find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n,
+ bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int msk_idx, lookbehind_idx, first, first_mod;
+ uint64_t ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing ctz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ /* we're going backwards, so mask must start from the top */
+ ignore_msk = first_mod == MASK_ALIGN - 1 ?
+ -1ULL : /* prevent overflow */
+ ~(-1ULL << (first_mod + 1));
+
+ /* go backwards, include zero */
+ msk_idx = first;
+ do {
+ uint64_t cur_msk, lookbehind_msk;
+ unsigned int run_start, run_end, ctz, left;
+ bool found = false;
+ /*
+ * The process of getting n consecutive bits from the top for
+ * arbitrary n is a bit involved, but here it is in a nutshell:
+ *
+ * 1. let n be the number of consecutive bits we're looking for
+ * 2. check if n can fit in one mask, and if so, do n-1
+ * lshift-ands to see if there is an appropriate run inside
+ * our current mask
+ * 2a. if we found a run, bail out early
+ * 2b. if we didn't find a run, proceed
+ * 3. invert the mask and count trailing zeroes (that is, count
+ * how many consecutive set bits we had starting from the
+ * start of current mask) as k
+ * 3a. if k is 0, continue to next mask
+ * 3b. if k is not 0, we have a potential run
+ * 4. to satisfy our requirements, next mask must have n-k
+ * consecutive set bits at the end, so we will do (n-k-1)
+ * lshift-ands and check if last bit is set.
+ *
+ * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until
+ * we either run out of masks, lose the run, or find what we
+ * were looking for.
+ */
+ cur_msk = msk->data[msk_idx];
+ left = n;
+
+ /* if we're looking for free spaces, invert the mask */
+ if (!used)
+ cur_msk = ~cur_msk;
+
+ /* if we have an ignore mask, ignore once */
+ if (ignore_msk) {
+ cur_msk &= ignore_msk;
+ ignore_msk = 0;
+ }
+
+ /* if n can fit in within a single mask, do a search */
+ if (n <= MASK_ALIGN) {
+ uint64_t tmp_msk = cur_msk;
+ unsigned int s_idx;
+ for (s_idx = 0; s_idx < n - 1; s_idx++)
+ tmp_msk &= tmp_msk << 1ULL;
+ /* we found what we were looking for */
+ if (tmp_msk != 0) {
+ /* clz will give us offset from end of mask, and
+ * we only get the end of our run, not start,
+ * so adjust result to point to where start
+ * would have been.
+ */
+ run_start = MASK_ALIGN -
+ __builtin_clzll(tmp_msk) - n;
+ return MASK_GET_IDX(msk_idx, run_start);
+ }
+ }
+
+ /*
+ * we didn't find our run within the mask, or n > MASK_ALIGN,
+ * so we're going for plan B.
+ */
+
+ /* count trailing zeroes on inverted mask */
+ if (~cur_msk == 0)
+ ctz = sizeof(cur_msk) * 8;
+ else
+ ctz = __builtin_ctzll(~cur_msk);
+
+ /* if there aren't any runs at the start either, just
+ * continue
+ */
+ if (ctz == 0)
+ continue;
+
+ /* we have a partial run at the start, so try looking behind */
+ run_end = MASK_GET_IDX(msk_idx, ctz);
+ left -= ctz;
+
+ /* go backwards, include zero */
+ lookbehind_idx = msk_idx - 1;
+
+ /* we can't lookbehind as we've run out of masks, so stop */
+ if (msk_idx == 0)
+ break;
+
+ do {
+ const uint64_t last_bit = 1ULL << (MASK_ALIGN - 1);
+ unsigned int s_idx, need;
+
+ lookbehind_msk = msk->data[lookbehind_idx];
+
+ /* if we're looking for free space, invert the mask */
+ if (!used)
+ lookbehind_msk = ~lookbehind_msk;
+
+ /* figure out how many consecutive bits we need here */
+ need = RTE_MIN(left, MASK_ALIGN);
+
+ for (s_idx = 0; s_idx < need - 1; s_idx++)
+ lookbehind_msk &= lookbehind_msk << 1ULL;
+
+ /* if last bit is not set, we've lost the run */
+ if ((lookbehind_msk & last_bit) == 0) {
+ /*
+ * we've scanned this far, so we know there are
+ * no runs in the space we've lookbehind-scanned
+ * as well, so skip that on next iteration.
+ */
+ ignore_msk = -1ULL << need;
+ msk_idx = lookbehind_idx;
+ break;
+ }
+
+ left -= need;
+
+ /* check if we've found what we were looking for */
+ if (left == 0) {
+ found = true;
+ break;
+ }
+ } while ((lookbehind_idx--) != 0); /* decrement after check to
+ * include zero
+ */
+
+ /* we didn't find anything, so continue */
+ if (!found)
+ continue;
+
+ /* we've found what we were looking for, but we only know where
+ * the run ended, so calculate start position.
+ */
+ return run_end - n;
+ } while (msk_idx-- != 0); /* decrement after check to include zero */
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_prev(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ uint64_t ignore_msk;
+
+ /*
+ * mask only has granularity of MASK_ALIGN, but start may not be aligned
+ * on that boundary, so construct a special mask to exclude anything we
+ * don't want to see to avoid confusing clz.
+ */
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+ /* we're going backwards, so mask must start from the top */
+ ignore_msk = first_mod == MASK_ALIGN - 1 ?
+ -1ULL : /* prevent overflow */
+ ~(-1ULL << (first_mod + 1));
+
+ /* go backwards, include zero */
+ idx = first;
+ do {
+ uint64_t cur = msk->data[idx];
+ int found;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ /* ignore everything before start on first iteration */
+ if (idx == first)
+ cur &= ignore_msk;
+
+ /* check if we have any entries */
+ if (cur == 0)
+ continue;
+
+ /*
+ * find last set bit - that will correspond to whatever it is
+ * that we're looking for. we're counting trailing zeroes, thus
+ * the value we get is counted from end of mask, so calculate
+ * position from start of mask.
+ */
+ found = MASK_ALIGN - __builtin_clzll(cur) - 1;
+
+ return MASK_GET_IDX(idx, found);
+ } while (idx-- != 0); /* decrement after check to include zero*/
+
+ /* we didn't find anything */
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+}
+
+static int
+find_rev_contig(const struct rte_fbarray *arr, unsigned int start, bool used)
+{
+ const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz,
+ arr->len);
+ unsigned int idx, first, first_mod;
+ unsigned int need_len, result = 0;
+
+ first = MASK_LEN_TO_IDX(start);
+ first_mod = MASK_LEN_TO_MOD(start);
+
+ /* go backwards, include zero */
+ idx = first;
+ do {
+ uint64_t cur = msk->data[idx];
+ unsigned int run_len;
+
+ need_len = MASK_ALIGN;
+
+ /* if we're looking for free entries, invert mask */
+ if (!used)
+ cur = ~cur;
+
+ /* ignore everything after start on first iteration */
+ if (idx == first) {
+ unsigned int end_len = MASK_ALIGN - first_mod - 1;
+ cur <<= end_len;
+ /* at the start, we don't need the full mask len */
+ need_len -= end_len;
+ }
+
+ /* we will be looking for zeroes, so invert the mask */
+ cur = ~cur;
+
+ /* if mask is zero, we have a complete run */
+ if (cur == 0)
+ goto endloop;
+
+ /*
+ * see where run ends, starting from the end.
+ */
+ run_len = __builtin_clzll(cur);
+
+ /* add however many zeroes we've had in the last run and quit */
+ if (run_len < need_len) {
+ result += run_len;
+ break;
+ }
+endloop:
+ result += need_len;
+ } while (idx-- != 0); /* decrement after check to include zero */
+ return result;
+}
+
+static int
set_used(struct rte_fbarray *arr, unsigned int idx, bool used)
{
struct used_mask *msk;
@@ -434,39 +705,52 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len,
if (data == NULL)
goto fail;
- eal_get_fbarray_path(path, sizeof(path), name);
+ if (internal_config.no_shconf) {
+ /* remap virtual area as writable */
+ void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (new_data == MAP_FAILED) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n",
+ __func__, strerror(errno));
+ goto fail;
+ }
+ } else {
+ eal_get_fbarray_path(path, sizeof(path), name);
- /*
- * Each fbarray is unique to process namespace, i.e. the filename
- * depends on process prefix. Try to take out a lock and see if we
- * succeed. If we don't, someone else is using it already.
- */
- fd = open(path, O_CREAT | O_RDWR, 0600);
- if (fd < 0) {
- RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n", __func__,
- path, strerror(errno));
- rte_errno = errno;
- goto fail;
- } else if (flock(fd, LOCK_EX | LOCK_NB)) {
- RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n", __func__,
- path, strerror(errno));
- rte_errno = EBUSY;
- goto fail;
- }
+ /*
+ * Each fbarray is unique to process namespace, i.e. the
+ * filename depends on process prefix. Try to take out a lock
+ * and see if we succeed. If we don't, someone else is using it
+ * already.
+ */
+ fd = open(path, O_CREAT | O_RDWR, 0600);
+ if (fd < 0) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n",
+ __func__, path, strerror(errno));
+ rte_errno = errno;
+ goto fail;
+ } else if (flock(fd, LOCK_EX | LOCK_NB)) {
+ RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n",
+ __func__, path, strerror(errno));
+ rte_errno = EBUSY;
+ goto fail;
+ }
- /* take out a non-exclusive lock, so that other processes could still
- * attach to it, but no other process could reinitialize it.
- */
- if (flock(fd, LOCK_SH | LOCK_NB)) {
- rte_errno = errno;
- goto fail;
- }
+ /* take out a non-exclusive lock, so that other processes could
+ * still attach to it, but no other process could reinitialize
+ * it.
+ */
+ if (flock(fd, LOCK_SH | LOCK_NB)) {
+ rte_errno = errno;
+ goto fail;
+ }
- if (resize_and_map(fd, data, mmap_len))
- goto fail;
+ if (resize_and_map(fd, data, mmap_len))
+ goto fail;
- /* we've mmap'ed the file, we can now close the fd */
- close(fd);
+ /* we've mmap'ed the file, we can now close the fd */
+ close(fd);
+ }
/* initialize the data */
memset(data, 0, mmap_len);
@@ -675,8 +959,8 @@ rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx)
return ret;
}
-int __rte_experimental
-rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
+static int
+fbarray_find(struct rte_fbarray *arr, unsigned int start, bool next, bool used)
{
int ret = -1;
@@ -688,36 +972,106 @@ rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
/* prevent array from changing under us */
rte_rwlock_read_lock(&arr->rwlock);
- if (arr->len == arr->count) {
- rte_errno = ENOSPC;
- goto out;
+ /* cheap checks to prevent doing useless work */
+ if (!used) {
+ if (arr->len == arr->count) {
+ rte_errno = ENOSPC;
+ goto out;
+ }
+ if (arr->count == 0) {
+ ret = start;
+ goto out;
+ }
+ } else {
+ if (arr->count == 0) {
+ rte_errno = ENOENT;
+ goto out;
+ }
+ if (arr->len == arr->count) {
+ ret = start;
+ goto out;
+ }
}
-
- ret = find_next(arr, start, false);
+ if (next)
+ ret = find_next(arr, start, used);
+ else
+ ret = find_prev(arr, start, used);
out:
rte_rwlock_read_unlock(&arr->rwlock);
return ret;
}
int __rte_experimental
+rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, true, false);
+}
+
+int __rte_experimental
rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start)
{
+ return fbarray_find(arr, start, true, true);
+}
+
+int __rte_experimental
+rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, false, false);
+}
+
+int __rte_experimental
+rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find(arr, start, false, true);
+}
+
+static int
+fbarray_find_n(struct rte_fbarray *arr, unsigned int start, unsigned int n,
+ bool next, bool used)
+{
int ret = -1;
- if (arr == NULL || start >= arr->len) {
+ if (arr == NULL || start >= arr->len || n > arr->len || n == 0) {
rte_errno = EINVAL;
return -1;
}
+ if (next && (arr->len - start) < n) {
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+ }
+ if (!next && start < (n - 1)) {
+ rte_errno = used ? ENOENT : ENOSPC;
+ return -1;
+ }
/* prevent array from changing under us */
rte_rwlock_read_lock(&arr->rwlock);
- if (arr->count == 0) {
- rte_errno = ENOENT;
- goto out;
+ /* cheap checks to prevent doing useless work */
+ if (!used) {
+ if (arr->len == arr->count || arr->len - arr->count < n) {
+ rte_errno = ENOSPC;
+ goto out;
+ }
+ if (arr->count == 0) {
+ ret = next ? start : start - n + 1;
+ goto out;
+ }
+ } else {
+ if (arr->count < n) {
+ rte_errno = ENOENT;
+ goto out;
+ }
+ if (arr->count == arr->len) {
+ ret = next ? start : start - n + 1;
+ goto out;
+ }
}
- ret = find_next(arr, start, true);
+ if (next)
+ ret = find_next_n(arr, start, n, used);
+ else
+ ret = find_prev_n(arr, start, n, used);
out:
rte_rwlock_read_unlock(&arr->rwlock);
return ret;
@@ -727,54 +1081,33 @@ int __rte_experimental
rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start,
unsigned int n)
{
- int ret = -1;
-
- if (arr == NULL || start >= arr->len || n > arr->len) {
- rte_errno = EINVAL;
- return -1;
- }
-
- /* prevent array from changing under us */
- rte_rwlock_read_lock(&arr->rwlock);
-
- if (arr->len == arr->count || arr->len - arr->count < n) {
- rte_errno = ENOSPC;
- goto out;
- }
-
- ret = find_next_n(arr, start, n, false);
-out:
- rte_rwlock_read_unlock(&arr->rwlock);
- return ret;
+ return fbarray_find_n(arr, start, n, true, false);
}
int __rte_experimental
rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start,
unsigned int n)
{
- int ret = -1;
-
- if (arr == NULL || start >= arr->len || n > arr->len) {
- rte_errno = EINVAL;
- return -1;
- }
-
- /* prevent array from changing under us */
- rte_rwlock_read_lock(&arr->rwlock);
-
- if (arr->count < n) {
- rte_errno = ENOENT;
- goto out;
- }
+ return fbarray_find_n(arr, start, n, true, true);
+}
- ret = find_next_n(arr, start, n, true);
-out:
- rte_rwlock_read_unlock(&arr->rwlock);
- return ret;
+int __rte_experimental
+rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, false, false);
}
int __rte_experimental
-rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
+rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n)
+{
+ return fbarray_find_n(arr, start, n, false, true);
+}
+
+static int
+fbarray_find_contig(struct rte_fbarray *arr, unsigned int start, bool next,
+ bool used)
{
int ret = -1;
@@ -786,39 +1119,66 @@ rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
/* prevent array from changing under us */
rte_rwlock_read_lock(&arr->rwlock);
- if (arr->len == arr->count) {
- rte_errno = ENOSPC;
- goto out;
- }
-
- if (arr->count == 0) {
- ret = arr->len - start;
- goto out;
+ /* cheap checks to prevent doing useless work */
+ if (used) {
+ if (arr->count == 0) {
+ ret = 0;
+ goto out;
+ }
+ if (next && arr->count == arr->len) {
+ ret = arr->len - start;
+ goto out;
+ }
+ if (!next && arr->count == arr->len) {
+ ret = start + 1;
+ goto out;
+ }
+ } else {
+ if (arr->len == arr->count) {
+ ret = 0;
+ goto out;
+ }
+ if (next && arr->count == 0) {
+ ret = arr->len - start;
+ goto out;
+ }
+ if (!next && arr->count == 0) {
+ ret = start + 1;
+ goto out;
+ }
}
- ret = find_contig(arr, start, false);
+ if (next)
+ ret = find_contig(arr, start, used);
+ else
+ ret = find_rev_contig(arr, start, used);
out:
rte_rwlock_read_unlock(&arr->rwlock);
return ret;
}
int __rte_experimental
-rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start)
+rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start)
{
- int ret = -1;
-
- if (arr == NULL || start >= arr->len) {
- rte_errno = EINVAL;
- return -1;
- }
+ return fbarray_find_contig(arr, start, true, false);
+}
- /* prevent array from changing under us */
- rte_rwlock_read_lock(&arr->rwlock);
+int __rte_experimental
+rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, true, true);
+}
- ret = find_contig(arr, start, true);
+int __rte_experimental
+rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, false, false);
+}
- rte_rwlock_read_unlock(&arr->rwlock);
- return ret;
+int __rte_experimental
+rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start)
+{
+ return fbarray_find_contig(arr, start, false, true);
}
int __rte_experimental
diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c
index 81811894..c714a4bd 100644
--- a/lib/librte_eal/common/eal_common_log.c
+++ b/lib/librte_eal/common/eal_common_log.c
@@ -335,9 +335,7 @@ static const struct logtype logtype_strings[] = {
};
/* Logging should be first initializer (before drivers and bus) */
-RTE_INIT_PRIO(rte_log_init, LOG);
-static void
-rte_log_init(void)
+RTE_INIT_PRIO(rte_log_init, LOG)
{
uint32_t i;
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 4f0688f9..fbfb1b05 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -34,7 +34,7 @@
#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
-static uint64_t baseaddr_offset;
+static void *next_baseaddr;
static uint64_t system_page_sz;
void *
@@ -56,21 +56,27 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0;
unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0;
- if (requested_addr == NULL && internal_config.base_virtaddr != 0) {
- requested_addr = (void *) (internal_config.base_virtaddr +
- (size_t)baseaddr_offset);
+ if (next_baseaddr == NULL && internal_config.base_virtaddr != 0 &&
+ rte_eal_process_type() == RTE_PROC_PRIMARY)
+ next_baseaddr = (void *) internal_config.base_virtaddr;
+
+ if (requested_addr == NULL && next_baseaddr != NULL) {
+ requested_addr = next_baseaddr;
requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
addr_is_hint = true;
}
- /* if requested address is not aligned by page size, or if requested
- * address is NULL, add page size to requested length as we may get an
- * address that's aligned by system page size, which can be smaller than
- * our requested page size. additionally, we shouldn't try to align if
- * system page size is the same as requested page size.
+ /* we don't need alignment of resulting pointer in the following cases:
+ *
+ * 1. page size is equal to system size
+ * 2. we have a requested address, and it is page-aligned, and we will
+ * be discarding the address if we get a different one.
+ *
+ * for all other cases, alignment is potentially necessary.
*/
no_align = (requested_addr != NULL &&
- ((uintptr_t)requested_addr & (page_sz - 1)) == 0) ||
+ requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) &&
+ !addr_is_hint) ||
page_sz == system_page_sz;
do {
@@ -116,6 +122,8 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n",
requested_addr, aligned_addr);
RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory into secondary processes\n");
+ } else if (next_baseaddr != NULL) {
+ next_baseaddr = RTE_PTR_ADD(aligned_addr, *size);
}
RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n",
@@ -148,387 +156,9 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
munmap(aligned_end, after_len);
}
- baseaddr_offset += *size;
-
return aligned_addr;
}
-static uint64_t
-get_mem_amount(uint64_t page_sz, uint64_t max_mem)
-{
- uint64_t area_sz, max_pages;
-
- /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */
- max_pages = RTE_MAX_MEMSEG_PER_LIST;
- max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem);
-
- area_sz = RTE_MIN(page_sz * max_pages, max_mem);
-
- /* make sure the list isn't smaller than the page size */
- area_sz = RTE_MAX(area_sz, page_sz);
-
- return RTE_ALIGN(area_sz, page_sz);
-}
-
-static int
-free_memseg_list(struct rte_memseg_list *msl)
-{
- if (rte_fbarray_destroy(&msl->memseg_arr)) {
- RTE_LOG(ERR, EAL, "Cannot destroy memseg list\n");
- return -1;
- }
- memset(msl, 0, sizeof(*msl));
- return 0;
-}
-
-static int
-alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz,
- uint64_t max_mem, int socket_id, int type_msl_idx)
-{
- char name[RTE_FBARRAY_NAME_LEN];
- uint64_t mem_amount;
- int max_segs;
-
- mem_amount = get_mem_amount(page_sz, max_mem);
- max_segs = mem_amount / page_sz;
-
- snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id,
- type_msl_idx);
- if (rte_fbarray_init(&msl->memseg_arr, name, max_segs,
- sizeof(struct rte_memseg))) {
- RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n",
- rte_strerror(rte_errno));
- return -1;
- }
-
- msl->page_sz = page_sz;
- msl->socket_id = socket_id;
- msl->base_va = NULL;
-
- RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n",
- (size_t)page_sz >> 10, socket_id);
-
- return 0;
-}
-
-static int
-alloc_va_space(struct rte_memseg_list *msl)
-{
- uint64_t page_sz;
- size_t mem_sz;
- void *addr;
- int flags = 0;
-
-#ifdef RTE_ARCH_PPC_64
- flags |= MAP_HUGETLB;
-#endif
-
- page_sz = msl->page_sz;
- mem_sz = page_sz * msl->memseg_arr.len;
-
- addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags);
- if (addr == NULL) {
- if (rte_errno == EADDRNOTAVAIL)
- RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n",
- (unsigned long long)mem_sz, msl->base_va);
- else
- RTE_LOG(ERR, EAL, "Cannot reserve memory\n");
- return -1;
- }
- msl->base_va = addr;
-
- return 0;
-}
-
-static int __rte_unused
-memseg_primary_init_32(void)
-{
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int active_sockets, hpi_idx, msl_idx = 0;
- unsigned int socket_id, i;
- struct rte_memseg_list *msl;
- uint64_t extra_mem_per_socket, total_extra_mem, total_requested_mem;
- uint64_t max_mem;
-
- /* no-huge does not need this at all */
- if (internal_config.no_hugetlbfs)
- return 0;
-
- /* this is a giant hack, but desperate times call for desperate
- * measures. in legacy 32-bit mode, we cannot preallocate VA space,
- * because having upwards of 2 gigabytes of VA space already mapped will
- * interfere with our ability to map and sort hugepages.
- *
- * therefore, in legacy 32-bit mode, we will be initializing memseg
- * lists much later - in eal_memory.c, right after we unmap all the
- * unneeded pages. this will not affect secondary processes, as those
- * should be able to mmap the space without (too many) problems.
- */
- if (internal_config.legacy_mem)
- return 0;
-
- /* 32-bit mode is a very special case. we cannot know in advance where
- * the user will want to allocate their memory, so we have to do some
- * heuristics.
- */
- active_sockets = 0;
- total_requested_mem = 0;
- if (internal_config.force_sockets)
- for (i = 0; i < rte_socket_count(); i++) {
- uint64_t mem;
-
- socket_id = rte_socket_id_by_idx(i);
- mem = internal_config.socket_mem[socket_id];
-
- if (mem == 0)
- continue;
-
- active_sockets++;
- total_requested_mem += mem;
- }
- else
- total_requested_mem = internal_config.memory;
-
- max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
- if (total_requested_mem > max_mem) {
- RTE_LOG(ERR, EAL, "Invalid parameters: 32-bit process can at most use %uM of memory\n",
- (unsigned int)(max_mem >> 20));
- return -1;
- }
- total_extra_mem = max_mem - total_requested_mem;
- extra_mem_per_socket = active_sockets == 0 ? total_extra_mem :
- total_extra_mem / active_sockets;
-
- /* the allocation logic is a little bit convoluted, but here's how it
- * works, in a nutshell:
- * - if user hasn't specified on which sockets to allocate memory via
- * --socket-mem, we allocate all of our memory on master core socket.
- * - if user has specified sockets to allocate memory on, there may be
- * some "unused" memory left (e.g. if user has specified --socket-mem
- * such that not all memory adds up to 2 gigabytes), so add it to all
- * sockets that are in use equally.
- *
- * page sizes are sorted by size in descending order, so we can safely
- * assume that we dispense with bigger page sizes first.
- */
-
- /* create memseg lists */
- for (i = 0; i < rte_socket_count(); i++) {
- int hp_sizes = (int) internal_config.num_hugepage_sizes;
- uint64_t max_socket_mem, cur_socket_mem;
- unsigned int master_lcore_socket;
- struct rte_config *cfg = rte_eal_get_configuration();
- bool skip;
-
- socket_id = rte_socket_id_by_idx(i);
-
-#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
- if (socket_id > 0)
- break;
-#endif
-
- /* if we didn't specifically request memory on this socket */
- skip = active_sockets != 0 &&
- internal_config.socket_mem[socket_id] == 0;
- /* ...or if we didn't specifically request memory on *any*
- * socket, and this is not master lcore
- */
- master_lcore_socket = rte_lcore_to_socket_id(cfg->master_lcore);
- skip |= active_sockets == 0 && socket_id != master_lcore_socket;
-
- if (skip) {
- RTE_LOG(DEBUG, EAL, "Will not preallocate memory on socket %u\n",
- socket_id);
- continue;
- }
-
- /* max amount of memory on this socket */
- max_socket_mem = (active_sockets != 0 ?
- internal_config.socket_mem[socket_id] :
- internal_config.memory) +
- extra_mem_per_socket;
- cur_socket_mem = 0;
-
- for (hpi_idx = 0; hpi_idx < hp_sizes; hpi_idx++) {
- uint64_t max_pagesz_mem, cur_pagesz_mem = 0;
- uint64_t hugepage_sz;
- struct hugepage_info *hpi;
- int type_msl_idx, max_segs, total_segs = 0;
-
- hpi = &internal_config.hugepage_info[hpi_idx];
- hugepage_sz = hpi->hugepage_sz;
-
- /* check if pages are actually available */
- if (hpi->num_pages[socket_id] == 0)
- continue;
-
- max_segs = RTE_MAX_MEMSEG_PER_TYPE;
- max_pagesz_mem = max_socket_mem - cur_socket_mem;
-
- /* make it multiple of page size */
- max_pagesz_mem = RTE_ALIGN_FLOOR(max_pagesz_mem,
- hugepage_sz);
-
- RTE_LOG(DEBUG, EAL, "Attempting to preallocate "
- "%" PRIu64 "M on socket %i\n",
- max_pagesz_mem >> 20, socket_id);
-
- type_msl_idx = 0;
- while (cur_pagesz_mem < max_pagesz_mem &&
- total_segs < max_segs) {
- if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
- RTE_LOG(ERR, EAL,
- "No more space in memseg lists, please increase %s\n",
- RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
- return -1;
- }
-
- msl = &mcfg->memsegs[msl_idx];
-
- if (alloc_memseg_list(msl, hugepage_sz,
- max_pagesz_mem, socket_id,
- type_msl_idx)) {
- /* failing to allocate a memseg list is
- * a serious error.
- */
- RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n");
- return -1;
- }
-
- if (alloc_va_space(msl)) {
- /* if we couldn't allocate VA space, we
- * can try with smaller page sizes.
- */
- RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list, retrying with different page size\n");
- /* deallocate memseg list */
- if (free_memseg_list(msl))
- return -1;
- break;
- }
-
- total_segs += msl->memseg_arr.len;
- cur_pagesz_mem = total_segs * hugepage_sz;
- type_msl_idx++;
- msl_idx++;
- }
- cur_socket_mem += cur_pagesz_mem;
- }
- if (cur_socket_mem == 0) {
- RTE_LOG(ERR, EAL, "Cannot allocate VA space on socket %u\n",
- socket_id);
- return -1;
- }
- }
-
- return 0;
-}
-
-static int __rte_unused
-memseg_primary_init(void)
-{
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int i, socket_id, hpi_idx, msl_idx = 0;
- struct rte_memseg_list *msl;
- uint64_t max_mem, total_mem;
-
- /* no-huge does not need this at all */
- if (internal_config.no_hugetlbfs)
- return 0;
-
- max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
- total_mem = 0;
-
- /* create memseg lists */
- for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
- hpi_idx++) {
- struct hugepage_info *hpi;
- uint64_t hugepage_sz;
-
- hpi = &internal_config.hugepage_info[hpi_idx];
- hugepage_sz = hpi->hugepage_sz;
-
- for (i = 0; i < (int) rte_socket_count(); i++) {
- uint64_t max_type_mem, total_type_mem = 0;
- int type_msl_idx, max_segs, total_segs = 0;
-
- socket_id = rte_socket_id_by_idx(i);
-
-#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
- if (socket_id > 0)
- break;
-#endif
-
- if (total_mem >= max_mem)
- break;
-
- max_type_mem = RTE_MIN(max_mem - total_mem,
- (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
- max_segs = RTE_MAX_MEMSEG_PER_TYPE;
-
- type_msl_idx = 0;
- while (total_type_mem < max_type_mem &&
- total_segs < max_segs) {
- uint64_t cur_max_mem;
- if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
- RTE_LOG(ERR, EAL,
- "No more space in memseg lists, please increase %s\n",
- RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
- return -1;
- }
-
- msl = &mcfg->memsegs[msl_idx++];
-
- cur_max_mem = max_type_mem - total_type_mem;
- if (alloc_memseg_list(msl, hugepage_sz,
- cur_max_mem, socket_id,
- type_msl_idx))
- return -1;
-
- total_segs += msl->memseg_arr.len;
- total_type_mem = total_segs * hugepage_sz;
- type_msl_idx++;
-
- if (alloc_va_space(msl)) {
- RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
- return -1;
- }
- }
- total_mem += total_type_mem;
- }
- }
- return 0;
-}
-
-static int
-memseg_secondary_init(void)
-{
- struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int msl_idx = 0;
- struct rte_memseg_list *msl;
-
- for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) {
-
- msl = &mcfg->memsegs[msl_idx];
-
- /* skip empty memseg lists */
- if (msl->memseg_arr.len == 0)
- continue;
-
- if (rte_fbarray_attach(&msl->memseg_arr)) {
- RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n");
- return -1;
- }
-
- /* preallocate VA space */
- if (alloc_va_space(msl)) {
- RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n");
- return -1;
- }
- }
-
- return 0;
-}
-
static struct rte_memseg *
virt2memseg(const void *addr, const struct rte_memseg_list *msl)
{
@@ -536,6 +166,9 @@ virt2memseg(const void *addr, const struct rte_memseg_list *msl)
void *start, *end;
int ms_idx;
+ if (msl == NULL)
+ return NULL;
+
/* a memseg list was specified, check if it's the right one */
start = msl->base_va;
end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len);
@@ -788,14 +421,11 @@ rte_mem_lock_page(const void *virt)
}
int __rte_experimental
-rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
int i, ms_idx, ret = 0;
- /* do not allow allocations/frees/init while we iterate */
- rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
-
for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
struct rte_memseg_list *msl = &mcfg->memsegs[i];
const struct rte_memseg *ms;
@@ -820,30 +450,34 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
len = n_segs * msl->page_sz;
ret = func(msl, ms, len, arg);
- if (ret < 0) {
- ret = -1;
- goto out;
- } else if (ret > 0) {
- ret = 1;
- goto out;
- }
+ if (ret)
+ return ret;
ms_idx = rte_fbarray_find_next_used(arr,
ms_idx + n_segs);
}
}
-out:
- rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
- return ret;
+ return 0;
}
int __rte_experimental
-rte_memseg_walk(rte_memseg_walk_t func, void *arg)
+rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int i, ms_idx, ret = 0;
+ int ret = 0;
/* do not allow allocations/frees/init while we iterate */
rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ ret = rte_memseg_contig_walk_thread_unsafe(func, arg);
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
+}
+
+int __rte_experimental
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, ms_idx, ret = 0;
for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
struct rte_memseg_list *msl = &mcfg->memsegs[i];
@@ -859,29 +493,33 @@ rte_memseg_walk(rte_memseg_walk_t func, void *arg)
while (ms_idx >= 0) {
ms = rte_fbarray_get(arr, ms_idx);
ret = func(msl, ms, arg);
- if (ret < 0) {
- ret = -1;
- goto out;
- } else if (ret > 0) {
- ret = 1;
- goto out;
- }
+ if (ret)
+ return ret;
ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1);
}
}
-out:
- rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
- return ret;
+ return 0;
}
int __rte_experimental
-rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
+rte_memseg_walk(rte_memseg_walk_t func, void *arg)
{
struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
- int i, ret = 0;
+ int ret = 0;
/* do not allow allocations/frees/init while we iterate */
rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ ret = rte_memseg_walk_thread_unsafe(func, arg);
+ rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+ return ret;
+}
+
+int __rte_experimental
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int i, ret = 0;
for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
struct rte_memseg_list *msl = &mcfg->memsegs[i];
@@ -890,17 +528,23 @@ rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
continue;
ret = func(msl, arg);
- if (ret < 0) {
- ret = -1;
- goto out;
- }
- if (ret > 0) {
- ret = 1;
- goto out;
- }
+ if (ret)
+ return ret;
}
-out:
+ return 0;
+}
+
+int __rte_experimental
+rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ int ret = 0;
+
+ /* do not allow allocations/frees/init while we iterate */
+ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+ ret = rte_memseg_list_walk_thread_unsafe(func, arg);
rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
return ret;
}
@@ -918,15 +562,7 @@ rte_eal_memory_init(void)
/* lock mem hotplug here, to prevent races while we init */
rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
- retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
-#ifndef RTE_ARCH_64
- memseg_primary_init_32() :
-#else
- memseg_primary_init() :
-#endif
- memseg_secondary_init();
-
- if (retval < 0)
+ if (rte_eal_memseg_init() < 0)
goto fail;
if (eal_memalloc_init() < 0)
diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c
index faa3b061..7300fe05 100644
--- a/lib/librte_eal/common/eal_common_memzone.c
+++ b/lib/librte_eal/common/eal_common_memzone.c
@@ -52,38 +52,6 @@ memzone_lookup_thread_unsafe(const char *name)
return NULL;
}
-
-/* This function will return the greatest free block if a heap has been
- * specified. If no heap has been specified, it will return the heap and
- * length of the greatest free block available in all heaps */
-static size_t
-find_heap_max_free_elem(int *s, unsigned align)
-{
- struct rte_mem_config *mcfg;
- struct rte_malloc_socket_stats stats;
- int i, socket = *s;
- size_t len = 0;
-
- /* get pointer to global configuration */
- mcfg = rte_eal_get_configuration()->mem_config;
-
- for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
- if ((socket != SOCKET_ID_ANY) && (socket != i))
- continue;
-
- malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats);
- if (stats.greatest_free_size > len) {
- len = stats.greatest_free_size;
- *s = i;
- }
- }
-
- if (len < MALLOC_ELEM_OVERHEAD + align)
- return 0;
-
- return len - MALLOC_ELEM_OVERHEAD - align;
-}
-
static const struct rte_memzone *
memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
int socket_id, unsigned int flags, unsigned int align,
@@ -92,6 +60,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
struct rte_memzone *mz;
struct rte_mem_config *mcfg;
struct rte_fbarray *arr;
+ void *mz_addr;
size_t requested_len;
int mz_idx;
bool contig;
@@ -140,8 +109,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
return NULL;
}
- len += RTE_CACHE_LINE_MASK;
- len &= ~((size_t) RTE_CACHE_LINE_MASK);
+ len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE);
/* save minimal requested length */
requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len);
@@ -165,27 +133,18 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
/* malloc only cares about size flags, remove contig flag from flags */
flags &= ~RTE_MEMZONE_IOVA_CONTIG;
- if (len == 0) {
- /* len == 0 is only allowed for non-contiguous zones */
- if (contig) {
- RTE_LOG(DEBUG, EAL, "Reserving zero-length contiguous memzones is not supported\n");
- rte_errno = EINVAL;
- return NULL;
- }
- if (bound != 0)
+ if (len == 0 && bound == 0) {
+ /* no size constraints were placed, so use malloc elem len */
+ requested_len = 0;
+ mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags,
+ align, contig);
+ } else {
+ if (len == 0)
requested_len = bound;
- else {
- requested_len = find_heap_max_free_elem(&socket_id, align);
- if (requested_len == 0) {
- rte_errno = ENOMEM;
- return NULL;
- }
- }
+ /* allocate memory on heap */
+ mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id,
+ flags, align, bound, contig);
}
-
- /* allocate memory on heap */
- void *mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id, flags,
- align, bound, contig);
if (mz_addr == NULL) {
rte_errno = ENOMEM;
return NULL;
@@ -213,8 +172,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
snprintf(mz->name, sizeof(mz->name), "%s", name);
mz->iova = rte_malloc_virt2iova(mz_addr);
mz->addr = mz_addr;
- mz->len = (requested_len == 0 ?
- (elem->size - MALLOC_ELEM_OVERHEAD) : requested_len);
+ mz->len = requested_len == 0 ?
+ elem->size - elem->pad - MALLOC_ELEM_OVERHEAD :
+ requested_len;
mz->hugepage_sz = elem->msl->page_sz;
mz->socket_id = elem->msl->socket_id;
mz->flags = 0;
diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c
index ecebb292..dd5f9740 100644
--- a/lib/librte_eal/common/eal_common_options.c
+++ b/lib/librte_eal/common/eal_common_options.c
@@ -66,10 +66,12 @@ eal_long_options[] = {
{OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM },
{OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM },
{OPT_NO_SHCONF, 0, NULL, OPT_NO_SHCONF_NUM },
+ {OPT_IN_MEMORY, 0, NULL, OPT_IN_MEMORY_NUM },
{OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM },
{OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM },
{OPT_PROC_TYPE, 1, NULL, OPT_PROC_TYPE_NUM },
{OPT_SOCKET_MEM, 1, NULL, OPT_SOCKET_MEM_NUM },
+ {OPT_SOCKET_LIMIT, 1, NULL, OPT_SOCKET_LIMIT_NUM },
{OPT_SYSLOG, 1, NULL, OPT_SYSLOG_NUM },
{OPT_VDEV, 1, NULL, OPT_VDEV_NUM },
{OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM },
@@ -179,6 +181,10 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
/* zero out the NUMA config */
for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
internal_cfg->socket_mem[i] = 0;
+ internal_cfg->force_socket_limits = 0;
+ /* zero out the NUMA limits config */
+ for (i = 0; i < RTE_MAX_NUMA_NODES; i++)
+ internal_cfg->socket_limit[i] = 0;
/* zero out hugedir descriptors */
for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) {
memset(&internal_cfg->hugepage_info[i], 0,
@@ -315,6 +321,7 @@ eal_parse_service_coremask(const char *coremask)
unsigned int count = 0;
char c;
int val;
+ uint32_t taken_lcore_count = 0;
if (coremask == NULL)
return -1;
@@ -348,7 +355,7 @@ eal_parse_service_coremask(const char *coremask)
if (master_lcore_parsed &&
cfg->master_lcore == lcore) {
RTE_LOG(ERR, EAL,
- "Error: lcore %u is master lcore, cannot use as service core\n",
+ "lcore %u is master lcore, cannot use as service core\n",
idx);
return -1;
}
@@ -358,6 +365,10 @@ eal_parse_service_coremask(const char *coremask)
"lcore %u unavailable\n", idx);
return -1;
}
+
+ if (cfg->lcore_role[idx] == ROLE_RTE)
+ taken_lcore_count++;
+
lcore_config[idx].core_role = ROLE_SERVICE;
count++;
}
@@ -374,11 +385,28 @@ eal_parse_service_coremask(const char *coremask)
if (count == 0)
return -1;
+ if (core_parsed && taken_lcore_count != count) {
+ RTE_LOG(WARNING, EAL,
+ "Not all service cores are in the coremask. "
+ "Please ensure -c or -l includes service cores\n");
+ }
+
cfg->service_lcore_count = count;
return 0;
}
static int
+eal_service_cores_parsed(void)
+{
+ int idx;
+ for (idx = 0; idx < RTE_MAX_LCORE; idx++) {
+ if (lcore_config[idx].core_role == ROLE_SERVICE)
+ return 1;
+ }
+ return 0;
+}
+
+static int
eal_parse_coremask(const char *coremask)
{
struct rte_config *cfg = rte_eal_get_configuration();
@@ -387,6 +415,11 @@ eal_parse_coremask(const char *coremask)
char c;
int val;
+ if (eal_service_cores_parsed())
+ RTE_LOG(WARNING, EAL,
+ "Service cores parsed before dataplane cores. "
+ "Please ensure -c is before -s or -S\n");
+
if (coremask == NULL)
return -1;
/* Remove all blank characters ahead and after .
@@ -418,6 +451,7 @@ eal_parse_coremask(const char *coremask)
"unavailable\n", idx);
return -1;
}
+
cfg->lcore_role[idx] = ROLE_RTE;
lcore_config[idx].core_index = count;
count++;
@@ -449,6 +483,7 @@ eal_parse_service_corelist(const char *corelist)
unsigned count = 0;
char *end = NULL;
int min, max;
+ uint32_t taken_lcore_count = 0;
if (corelist == NULL)
return -1;
@@ -490,6 +525,9 @@ eal_parse_service_corelist(const char *corelist)
idx);
return -1;
}
+ if (cfg->lcore_role[idx] == ROLE_RTE)
+ taken_lcore_count++;
+
lcore_config[idx].core_role =
ROLE_SERVICE;
count++;
@@ -504,6 +542,12 @@ eal_parse_service_corelist(const char *corelist)
if (count == 0)
return -1;
+ if (core_parsed && taken_lcore_count != count) {
+ RTE_LOG(WARNING, EAL,
+ "Not all service cores were in the coremask. "
+ "Please ensure -c or -l includes service cores\n");
+ }
+
return 0;
}
@@ -516,6 +560,11 @@ eal_parse_corelist(const char *corelist)
char *end = NULL;
int min, max;
+ if (eal_service_cores_parsed())
+ RTE_LOG(WARNING, EAL,
+ "Service cores parsed before dataplane cores. "
+ "Please ensure -l is before -s or -S\n");
+
if (corelist == NULL)
return -1;
@@ -590,7 +639,8 @@ eal_parse_master_lcore(const char *arg)
/* ensure master core is not used as service core */
if (lcore_config[cfg->master_lcore].core_role == ROLE_SERVICE) {
- RTE_LOG(ERR, EAL, "Error: Master lcore is used as a service core.\n");
+ RTE_LOG(ERR, EAL,
+ "Error: Master lcore is used as a service core\n");
return -1;
}
@@ -1165,6 +1215,13 @@ eal_parse_common_option(int opt, const char *optarg,
conf->no_shconf = 1;
break;
+ case OPT_IN_MEMORY_NUM:
+ conf->in_memory = 1;
+ /* in-memory is a superset of noshconf and huge-unlink */
+ conf->no_shconf = 1;
+ conf->hugepage_unlink = 1;
+ break;
+
case OPT_PROC_TYPE_NUM:
conf->process_type = eal_parse_proc_type(optarg);
break;
@@ -1316,12 +1373,23 @@ eal_check_common_options(struct internal_config *internal_cfg)
"be specified together with --"OPT_NO_HUGE"\n");
return -1;
}
-
- if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) {
+ if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink &&
+ !internal_cfg->in_memory) {
RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot "
"be specified together with --"OPT_NO_HUGE"\n");
return -1;
}
+ if (internal_config.force_socket_limits && internal_config.legacy_mem) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_LIMIT
+ " is only supported in non-legacy memory mode\n");
+ }
+ if (internal_cfg->single_file_segments &&
+ internal_cfg->hugepage_unlink) {
+ RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE_SEGMENTS" is "
+ "not compatible with neither --"OPT_IN_MEMORY" nor "
+ "--"OPT_HUGE_UNLINK"\n");
+ return -1;
+ }
return 0;
}
@@ -1370,6 +1438,8 @@ eal_common_usage(void)
" Set specific log level\n"
" -v Display version information on startup\n"
" -h, --help This help\n"
+ " --"OPT_IN_MEMORY" Operate entirely in memory. This will\n"
+ " disable secondary process support\n"
"\nEAL options for DEBUG use only:\n"
" --"OPT_HUGE_UNLINK" Unlink hugepage files after init\n"
" --"OPT_NO_HUGE" Use malloc instead of hugetlbfs\n"
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 707d8ab3..9fcb9121 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -20,6 +20,7 @@
#include <sys/un.h>
#include <unistd.h>
+#include <rte_alarm.h>
#include <rte_common.h>
#include <rte_cycles.h>
#include <rte_eal.h>
@@ -94,11 +95,9 @@ TAILQ_HEAD(pending_request_list, pending_request);
static struct {
struct pending_request_list requests;
pthread_mutex_t lock;
- pthread_cond_t async_cond;
} pending_requests = {
.requests = TAILQ_HEAD_INITIALIZER(pending_requests.requests),
.lock = PTHREAD_MUTEX_INITIALIZER,
- .async_cond = PTHREAD_COND_INITIALIZER
/**< used in async requests only */
};
@@ -106,6 +105,16 @@ static struct {
static int
mp_send(struct rte_mp_msg *msg, const char *peer, int type);
+/* for use with alarm callback */
+static void
+async_reply_handle(void *arg);
+
+/* for use with process_msg */
+static struct pending_request *
+async_reply_handle_thread_unsafe(void *arg);
+
+static void
+trigger_async_action(struct pending_request *req);
static struct pending_request *
find_pending_request(const char *dst, const char *act_name)
@@ -290,6 +299,8 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name);
if (m->type == MP_REP || m->type == MP_IGN) {
+ struct pending_request *req = NULL;
+
pthread_mutex_lock(&pending_requests.lock);
pending_req = find_pending_request(s->sun_path, msg->name);
if (pending_req) {
@@ -301,11 +312,14 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s)
if (pending_req->type == REQUEST_TYPE_SYNC)
pthread_cond_signal(&pending_req->sync.cond);
else if (pending_req->type == REQUEST_TYPE_ASYNC)
- pthread_cond_signal(
- &pending_requests.async_cond);
+ req = async_reply_handle_thread_unsafe(
+ pending_req);
} else
RTE_LOG(ERR, EAL, "Drop mp reply: %s\n", msg->name);
pthread_mutex_unlock(&pending_requests.lock);
+
+ if (req != NULL)
+ trigger_async_action(req);
return;
}
@@ -365,7 +379,6 @@ timespec_cmp(const struct timespec *a, const struct timespec *b)
}
enum async_action {
- ACTION_NONE, /**< don't do anything */
ACTION_FREE, /**< free the action entry, but don't trigger callback */
ACTION_TRIGGER /**< trigger callback, then free action entry */
};
@@ -375,7 +388,7 @@ process_async_request(struct pending_request *sr, const struct timespec *now)
{
struct async_request_param *param;
struct rte_mp_reply *reply;
- bool timeout, received, last_msg;
+ bool timeout, last_msg;
param = sr->async.param;
reply = &param->user_reply;
@@ -383,13 +396,6 @@ process_async_request(struct pending_request *sr, const struct timespec *now)
/* did we timeout? */
timeout = timespec_cmp(&param->end, now) <= 0;
- /* did we receive a response? */
- received = sr->reply_received != 0;
-
- /* if we didn't time out, and we didn't receive a response, ignore */
- if (!timeout && !received)
- return ACTION_NONE;
-
/* if we received a response, adjust relevant data and copy mesasge. */
if (sr->reply_received == 1 && sr->reply) {
struct rte_mp_msg *msg, *user_msgs, *tmp;
@@ -448,118 +454,58 @@ trigger_async_action(struct pending_request *sr)
free(sr->async.param->user_reply.msgs);
free(sr->async.param);
free(sr->request);
+ free(sr);
}
static struct pending_request *
-check_trigger(struct timespec *ts)
+async_reply_handle_thread_unsafe(void *arg)
{
- struct pending_request *next, *cur, *trigger = NULL;
-
- TAILQ_FOREACH_SAFE(cur, &pending_requests.requests, next, next) {
- enum async_action action;
- if (cur->type != REQUEST_TYPE_ASYNC)
- continue;
+ struct pending_request *req = (struct pending_request *)arg;
+ enum async_action action;
+ struct timespec ts_now;
+ struct timeval now;
- action = process_async_request(cur, ts);
- if (action == ACTION_FREE) {
- TAILQ_REMOVE(&pending_requests.requests, cur, next);
- free(cur);
- } else if (action == ACTION_TRIGGER) {
- TAILQ_REMOVE(&pending_requests.requests, cur, next);
- trigger = cur;
- break;
- }
+ if (gettimeofday(&now, NULL) < 0) {
+ RTE_LOG(ERR, EAL, "Cannot get current time\n");
+ goto no_trigger;
}
- return trigger;
-}
+ ts_now.tv_nsec = now.tv_usec * 1000;
+ ts_now.tv_sec = now.tv_sec;
-static void
-wait_for_async_messages(void)
-{
- struct pending_request *sr;
- struct timespec timeout;
- bool timedwait = false;
- bool nowait = false;
- int ret;
+ action = process_async_request(req, &ts_now);
- /* scan through the list and see if there are any timeouts that
- * are earlier than our current timeout.
- */
- TAILQ_FOREACH(sr, &pending_requests.requests, next) {
- if (sr->type != REQUEST_TYPE_ASYNC)
- continue;
- if (!timedwait || timespec_cmp(&sr->async.param->end,
- &timeout) < 0) {
- memcpy(&timeout, &sr->async.param->end,
- sizeof(timeout));
- timedwait = true;
- }
+ TAILQ_REMOVE(&pending_requests.requests, req, next);
- /* sometimes, we don't even wait */
- if (sr->reply_received) {
- nowait = true;
- break;
+ if (rte_eal_alarm_cancel(async_reply_handle, req) < 0) {
+ /* if we failed to cancel the alarm because it's already in
+ * progress, don't proceed because otherwise we will end up
+ * handling the same message twice.
+ */
+ if (rte_errno == EINPROGRESS) {
+ RTE_LOG(DEBUG, EAL, "Request handling is already in progress\n");
+ goto no_trigger;
}
+ RTE_LOG(ERR, EAL, "Failed to cancel alarm\n");
}
- if (nowait)
- return;
-
- do {
- ret = timedwait ?
- pthread_cond_timedwait(
- &pending_requests.async_cond,
- &pending_requests.lock,
- &timeout) :
- pthread_cond_wait(
- &pending_requests.async_cond,
- &pending_requests.lock);
- } while (ret != 0 && ret != ETIMEDOUT);
-
- /* we've been woken up or timed out */
+ if (action == ACTION_TRIGGER)
+ return req;
+no_trigger:
+ free(req);
+ return NULL;
}
-static void *
-async_reply_handle(void *arg __rte_unused)
+static void
+async_reply_handle(void *arg)
{
- struct timeval now;
- struct timespec ts_now;
- while (1) {
- struct pending_request *trigger = NULL;
-
- pthread_mutex_lock(&pending_requests.lock);
+ struct pending_request *req;
- /* we exit this function holding the lock */
- wait_for_async_messages();
-
- if (gettimeofday(&now, NULL) < 0) {
- pthread_mutex_unlock(&pending_requests.lock);
- RTE_LOG(ERR, EAL, "Cannot get current time\n");
- break;
- }
- ts_now.tv_nsec = now.tv_usec * 1000;
- ts_now.tv_sec = now.tv_sec;
-
- do {
- trigger = check_trigger(&ts_now);
- /* unlock request list */
- pthread_mutex_unlock(&pending_requests.lock);
-
- if (trigger) {
- trigger_async_action(trigger);
- free(trigger);
-
- /* we've triggered a callback, but there may be
- * more, so lock the list and check again.
- */
- pthread_mutex_lock(&pending_requests.lock);
- }
- } while (trigger);
- }
-
- RTE_LOG(ERR, EAL, "ERROR: asynchronous requests disabled\n");
+ pthread_mutex_lock(&pending_requests.lock);
+ req = async_reply_handle_thread_unsafe(arg);
+ pthread_mutex_unlock(&pending_requests.lock);
- return NULL;
+ if (req != NULL)
+ trigger_async_action(req);
}
static int
@@ -624,7 +570,15 @@ rte_mp_channel_init(void)
{
char path[PATH_MAX];
int dir_fd;
- pthread_t mp_handle_tid, async_reply_handle_tid;
+ pthread_t mp_handle_tid;
+
+ /* in no shared files mode, we do not have secondary processes support,
+ * so no need to initialize IPC.
+ */
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC will be disabled\n");
+ return 0;
+ }
/* create filter path */
create_socket_path("*", path, sizeof(path));
@@ -671,17 +625,6 @@ rte_mp_channel_init(void)
return -1;
}
- if (rte_ctrl_thread_create(&async_reply_handle_tid,
- "rte_mp_async", NULL,
- async_reply_handle, NULL) < 0) {
- RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n",
- strerror(errno));
- close(mp_fd);
- close(dir_fd);
- mp_fd = -1;
- return -1;
- }
-
/* unlock the directory */
flock(dir_fd, LOCK_UN);
close(dir_fd);
@@ -786,7 +729,7 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type)
dir_fd = dirfd(mp_dir);
/* lock the directory to prevent processes spinning up while we send */
- if (flock(dir_fd, LOCK_EX)) {
+ if (flock(dir_fd, LOCK_SH)) {
RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
mp_dir_path);
rte_errno = errno;
@@ -853,7 +796,7 @@ rte_mp_sendmsg(struct rte_mp_msg *msg)
static int
mp_request_async(const char *dst, struct rte_mp_msg *req,
- struct async_request_param *param)
+ struct async_request_param *param, const struct timespec *ts)
{
struct rte_mp_msg *reply_msg;
struct pending_request *pending_req, *exist;
@@ -898,6 +841,13 @@ mp_request_async(const char *dst, struct rte_mp_msg *req,
param->user_reply.nb_sent++;
+ if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000,
+ async_reply_handle, pending_req) < 0) {
+ RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n",
+ dst, req->name);
+ rte_panic("Fix the above shit to properly free all memory\n");
+ }
+
return 0;
fail:
free(pending_req);
@@ -988,6 +938,12 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
if (check_input(req) == false)
return -1;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ return 0;
+ }
+
if (gettimeofday(&now, NULL) < 0) {
RTE_LOG(ERR, EAL, "Faile to get current time\n");
rte_errno = errno;
@@ -1020,7 +976,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
dir_fd = dirfd(mp_dir);
/* lock the directory to prevent processes spinning up while we send */
- if (flock(dir_fd, LOCK_EX)) {
+ if (flock(dir_fd, LOCK_SH)) {
RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
mp_dir_path);
closedir(mp_dir);
@@ -1072,6 +1028,12 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
if (check_input(req) == false)
return -1;
+
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ return 0;
+ }
+
if (gettimeofday(&now, NULL) < 0) {
RTE_LOG(ERR, EAL, "Faile to get current time\n");
rte_errno = errno;
@@ -1119,7 +1081,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
/* for secondary process, send request to the primary process only */
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
- ret = mp_request_async(eal_mp_socket_path(), copy, param);
+ ret = mp_request_async(eal_mp_socket_path(), copy, param, ts);
/* if we didn't send anything, put dummy request on the queue */
if (ret == 0 && reply->nb_sent == 0) {
@@ -1146,7 +1108,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
dir_fd = dirfd(mp_dir);
/* lock the directory to prevent processes spinning up while we send */
- if (flock(dir_fd, LOCK_EX)) {
+ if (flock(dir_fd, LOCK_SH)) {
RTE_LOG(ERR, EAL, "Unable to lock directory %s\n",
mp_dir_path);
rte_errno = errno;
@@ -1162,7 +1124,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
snprintf(path, sizeof(path), "%s/%s", mp_dir_path,
ent->d_name);
- if (mp_request_async(path, copy, param))
+ if (mp_request_async(path, copy, param, ts))
ret = -1;
}
/* if we didn't send anything, put dummy request on the queue */
@@ -1171,9 +1133,6 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
dummy_used = true;
}
- /* trigger async request thread wake up */
- pthread_cond_signal(&pending_requests.async_cond);
-
/* finally, unlock the queue */
pthread_mutex_unlock(&pending_requests.lock);
@@ -1213,5 +1172,10 @@ rte_mp_reply(struct rte_mp_msg *msg, const char *peer)
return -1;
}
+ if (internal_config.no_shconf) {
+ RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
+ return 0;
+ }
+
return mp_send(msg, peer, MP_REP);
}
diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c
index 42398630..48ef4d6d 100644
--- a/lib/librte_eal/common/eal_common_thread.c
+++ b/lib/librte_eal/common/eal_common_thread.c
@@ -175,7 +175,7 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
params = malloc(sizeof(*params));
if (!params)
- return -1;
+ return -ENOMEM;
params->start_routine = start_routine;
params->arg = arg;
@@ -185,13 +185,14 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name,
ret = pthread_create(thread, attr, rte_thread_init, (void *)params);
if (ret != 0) {
free(params);
- return ret;
+ return -ret;
}
if (name != NULL) {
ret = rte_thread_setname(*thread, name);
if (ret < 0)
- goto fail;
+ RTE_LOG(DEBUG, EAL,
+ "Cannot set name for ctrl thread\n");
}
cpu_found = 0;
@@ -227,5 +228,5 @@ fail:
}
pthread_cancel(*thread);
pthread_join(*thread, NULL);
- return ret;
+ return -ret;
}
diff --git a/lib/librte_eal/common/eal_common_uuid.c b/lib/librte_eal/common/eal_common_uuid.c
new file mode 100644
index 00000000..1b93c5b3
--- /dev/null
+++ b/lib/librte_eal/common/eal_common_uuid.c
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 1996, 1997 Theodore Ts'o.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include <rte_uuid.h>
+
+/* UUID packed form */
+struct uuid {
+ uint32_t time_low;
+ uint16_t time_mid;
+ uint16_t time_hi_and_version;
+ uint16_t clock_seq;
+ uint8_t node[6];
+};
+
+static void uuid_pack(const struct uuid *uu, rte_uuid_t ptr)
+{
+ uint32_t tmp;
+ uint8_t *out = ptr;
+
+ tmp = uu->time_low;
+ out[3] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[2] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[1] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[0] = (uint8_t) tmp;
+
+ tmp = uu->time_mid;
+ out[5] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[4] = (uint8_t) tmp;
+
+ tmp = uu->time_hi_and_version;
+ out[7] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[6] = (uint8_t) tmp;
+
+ tmp = uu->clock_seq;
+ out[9] = (uint8_t) tmp;
+ tmp >>= 8;
+ out[8] = (uint8_t) tmp;
+
+ memcpy(out+10, uu->node, 6);
+}
+
+static void uuid_unpack(const rte_uuid_t in, struct uuid *uu)
+{
+ const uint8_t *ptr = in;
+ uint32_t tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_low = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_mid = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->time_hi_and_version = tmp;
+
+ tmp = *ptr++;
+ tmp = (tmp << 8) | *ptr++;
+ uu->clock_seq = tmp;
+
+ memcpy(uu->node, ptr, 6);
+}
+
+bool rte_uuid_is_null(const rte_uuid_t uu)
+{
+ const uint8_t *cp = uu;
+ int i;
+
+ for (i = 0; i < 16; i++)
+ if (*cp++)
+ return false;
+ return true;
+}
+
+/*
+ * rte_uuid_compare() - compare two UUIDs.
+ */
+int rte_uuid_compare(const rte_uuid_t uu1, const rte_uuid_t uu2)
+{
+ struct uuid uuid1, uuid2;
+
+ uuid_unpack(uu1, &uuid1);
+ uuid_unpack(uu2, &uuid2);
+
+#define UUCMP(u1, u2) \
+ do { if (u1 != u2) return (u1 < u2) ? -1 : 1; } while (0)
+
+ UUCMP(uuid1.time_low, uuid2.time_low);
+ UUCMP(uuid1.time_mid, uuid2.time_mid);
+ UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version);
+ UUCMP(uuid1.clock_seq, uuid2.clock_seq);
+#undef UUCMP
+
+ return memcmp(uuid1.node, uuid2.node, 6);
+}
+
+int rte_uuid_parse(const char *in, rte_uuid_t uu)
+{
+ struct uuid uuid;
+ int i;
+ const char *cp;
+ char buf[3];
+
+ if (strlen(in) != 36)
+ return -1;
+
+ for (i = 0, cp = in; i <= 36; i++, cp++) {
+ if ((i == 8) || (i == 13) || (i == 18) ||
+ (i == 23)) {
+ if (*cp == '-')
+ continue;
+ else
+ return -1;
+ }
+ if (i == 36)
+ if (*cp == 0)
+ continue;
+ if (!isxdigit(*cp))
+ return -1;
+ }
+
+ uuid.time_low = strtoul(in, NULL, 16);
+ uuid.time_mid = strtoul(in+9, NULL, 16);
+ uuid.time_hi_and_version = strtoul(in+14, NULL, 16);
+ uuid.clock_seq = strtoul(in+19, NULL, 16);
+ cp = in+24;
+ buf[2] = 0;
+
+ for (i = 0; i < 6; i++) {
+ buf[0] = *cp++;
+ buf[1] = *cp++;
+ uuid.node[i] = strtoul(buf, NULL, 16);
+ }
+
+ uuid_pack(&uuid, uu);
+ return 0;
+}
+
+void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len)
+{
+ struct uuid uuid;
+
+ uuid_unpack(uu, &uuid);
+
+ snprintf(out, len,
+ "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
+ uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
+ uuid.node[0], uuid.node[1], uuid.node[2],
+ uuid.node[3], uuid.node[4], uuid.node[5]);
+}
diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
index 364f38d1..de05febf 100644
--- a/lib/librte_eal/common/eal_filesystem.h
+++ b/lib/librte_eal/common/eal_filesystem.h
@@ -12,7 +12,6 @@
#define EAL_FILESYSTEM_H
/** Path of rte config file. */
-#define RUNTIME_CONFIG_FMT "%s/.%s_config"
#include <stdint.h>
#include <limits.h>
@@ -30,17 +29,14 @@ eal_create_runtime_dir(void);
const char *
eal_get_runtime_dir(void);
+#define RUNTIME_CONFIG_FNAME "config"
static inline const char *
eal_runtime_config_path(void)
{
static char buffer[PATH_MAX]; /* static so auto-zeroed */
- const char *directory = "/var/run";
- const char *home_dir = getenv("HOME");
- if (getuid() != 0 && home_dir != NULL)
- directory = home_dir;
- snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory,
- internal_config.hugefile_prefix);
+ snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+ RUNTIME_CONFIG_FNAME);
return buffer;
}
diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h
index c4cbf3ac..00ee6e06 100644
--- a/lib/librte_eal/common/eal_internal_cfg.h
+++ b/lib/librte_eal/common/eal_internal_cfg.h
@@ -41,11 +41,17 @@ struct internal_config {
volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping
* instead of native TSC */
volatile unsigned no_shconf; /**< true if there is no shared config */
+ volatile unsigned in_memory;
+ /**< true if DPDK should operate entirely in-memory and not create any
+ * shared files or runtime data.
+ */
volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */
volatile enum rte_proc_type_t process_type; /**< multi-process proc type */
/** true to try allocating memory on specific sockets */
volatile unsigned force_sockets;
volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */
+ volatile unsigned force_socket_limits;
+ volatile uint64_t socket_limit[RTE_MAX_NUMA_NODES]; /**< limit amount of memory per socket */
uintptr_t base_virtaddr; /**< base address to try and reserve memory from */
volatile unsigned legacy_mem;
/**< true to enable legacy memory behavior (no dynamic allocation,
diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h
index 211ae06a..96e16678 100644
--- a/lib/librte_eal/common/eal_options.h
+++ b/lib/librte_eal/common/eal_options.h
@@ -45,8 +45,12 @@ enum {
OPT_NO_PCI_NUM,
#define OPT_NO_SHCONF "no-shconf"
OPT_NO_SHCONF_NUM,
+#define OPT_IN_MEMORY "in-memory"
+ OPT_IN_MEMORY_NUM,
#define OPT_SOCKET_MEM "socket-mem"
OPT_SOCKET_MEM_NUM,
+#define OPT_SOCKET_LIMIT "socket-limit"
+ OPT_SOCKET_LIMIT_NUM,
#define OPT_SYSLOG "syslog"
OPT_SYSLOG_NUM,
#define OPT_VDEV "vdev"
diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h
index bdadc4d5..4f809a83 100644
--- a/lib/librte_eal/common/eal_private.h
+++ b/lib/librte_eal/common/eal_private.h
@@ -47,6 +47,18 @@ void eal_log_set_default(FILE *default_log);
int rte_eal_cpu_init(void);
/**
+ * Create memseg lists
+ *
+ * This function is private to EAL.
+ *
+ * Preallocate virtual memory.
+ *
+ * @return
+ * 0 on success, negative on error
+ */
+int rte_eal_memseg_init(void);
+
+/**
* Map memory
*
* This function is private to EAL.
@@ -258,4 +270,38 @@ int rte_mp_channel_init(void);
*/
void dev_callback_process(char *device_name, enum rte_dev_event_type event);
+/**
+ * @internal
+ * Parse a device string and store its information in an
+ * rte_devargs structure.
+ *
+ * A device description is split by layers of abstraction of the device:
+ * bus, class and driver. Each layer will offer a set of properties that
+ * can be applied either to configure or recognize a device.
+ *
+ * This function will parse those properties and prepare the rte_devargs
+ * to be given to each layers for processing.
+ *
+ * Note: if the "data" field of the devargs points to devstr,
+ * then no dynamic allocation is performed and the rte_devargs
+ * can be safely discarded.
+ *
+ * Otherwise ``data`` will hold a workable copy of devstr, that will be
+ * used by layers descriptors within rte_devargs. In this case,
+ * any rte_devargs should be cleaned-up before being freed.
+ *
+ * @param da
+ * rte_devargs structure to fill.
+ *
+ * @param devstr
+ * Device string.
+ *
+ * @return
+ * 0 on success.
+ * Negative errno values on error (rte_errno is set).
+ */
+int
+rte_devargs_layers_parse(struct rte_devargs *devargs,
+ const char *devstr);
+
#endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h
index 7d4935fc..d9facc64 100644
--- a/lib/librte_eal/common/include/rte_bitmap.h
+++ b/lib/librte_eal/common/include/rte_bitmap.h
@@ -198,12 +198,12 @@ rte_bitmap_get_memory_footprint(uint32_t n_bits) {
/**
* Bitmap initialization
*
- * @param mem_size
- * Minimum expected size of bitmap.
+ * @param n_bits
+ * Number of pre-allocated bits in array2.
* @param mem
* Base address of array1 and array2.
- * @param n_bits
- * Number of pre-allocated bits in array2. Must be non-zero and multiple of 512.
+ * @param mem_size
+ * Minimum expected size of bitmap.
* @return
* Handle to bitmap instance.
*/
diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h
index eb9eded4..b7b5b084 100644
--- a/lib/librte_eal/common/include/rte_bus.h
+++ b/lib/librte_eal/common/include/rte_bus.h
@@ -211,6 +211,7 @@ struct rte_bus {
rte_bus_parse_t parse; /**< Parse a device name */
struct rte_bus_conf conf; /**< Bus configuration */
rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
+ rte_dev_iterate_t dev_iterate; /**< Device iterator. */
};
/**
@@ -325,8 +326,7 @@ enum rte_iova_mode rte_bus_get_iommu_class(void);
* The constructor has higher priority than PMD constructors.
*/
#define RTE_REGISTER_BUS(nm, bus) \
-RTE_INIT_PRIO(businitfn_ ##nm, BUS); \
-static void businitfn_ ##nm(void) \
+RTE_INIT_PRIO(businitfn_ ##nm, BUS) \
{\
(bus).name = RTE_STR(nm);\
rte_bus_register(&bus); \
diff --git a/lib/librte_eal/common/include/rte_class.h b/lib/librte_eal/common/include/rte_class.h
new file mode 100644
index 00000000..276c91e9
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_class.h
@@ -0,0 +1,134 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaƫtan Rivet
+ */
+
+#ifndef _RTE_CLASS_H_
+#define _RTE_CLASS_H_
+
+/**
+ * @file
+ *
+ * DPDK device class interface.
+ *
+ * This file describes the interface of the device class
+ * abstraction layer.
+ *
+ * A device class defines the type of function a device
+ * will be used for e.g.: Ethernet adapter (eth),
+ * cryptographic coprocessor (crypto), etc.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/queue.h>
+
+#include <rte_dev.h>
+
+/** Double linked list of classes */
+TAILQ_HEAD(rte_class_list, rte_class);
+
+/**
+ * A structure describing a generic device class.
+ */
+struct rte_class {
+ TAILQ_ENTRY(rte_class) next; /**< Next device class in linked list */
+ const char *name; /**< Name of the class */
+ rte_dev_iterate_t dev_iterate; /**< Device iterator. */
+};
+
+/**
+ * Class comparison function.
+ *
+ * @param cls
+ * Class under test.
+ *
+ * @param data
+ * Data to compare against.
+ *
+ * @return
+ * 0 if the class matches the data.
+ * !0 if the class does not match.
+ * <0 if ordering is possible and the class is lower than the data.
+ * >0 if ordering is possible and the class is greater than the data.
+ */
+typedef int (*rte_class_cmp_t)(const struct rte_class *cls, const void *data);
+
+/**
+ * Class iterator to find a particular class.
+ *
+ * This function compares each registered class to find one that matches
+ * the data passed as parameter.
+ *
+ * If the comparison function returns zero this function will stop iterating
+ * over any more classes. To continue a search the class of a previous search
+ * can be passed via the start parameter.
+ *
+ * @param start
+ * Starting point for the iteration.
+ *
+ * @param cmp
+ * Comparison function.
+ *
+ * @param data
+ * Data to pass to comparison function.
+ *
+ * @return
+ * A pointer to a rte_class structure or NULL in case no class matches
+ */
+__rte_experimental
+struct rte_class *
+rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp,
+ const void *data);
+
+/**
+ * Find the registered class for a given name.
+ */
+__rte_experimental
+struct rte_class *
+rte_class_find_by_name(const char *name);
+
+/**
+ * Register a Class handle.
+ *
+ * @param cls
+ * A pointer to a rte_class structure describing the class
+ * to be registered.
+ */
+__rte_experimental
+void rte_class_register(struct rte_class *cls);
+
+/**
+ * Unregister a Class handle.
+ *
+ * @param cls
+ * A pointer to a rte_class structure describing the class
+ * to be unregistered.
+ */
+__rte_experimental
+void rte_class_unregister(struct rte_class *cls);
+
+/**
+ * Helper for Class registration.
+ * The constructor has lower priority than Bus constructors.
+ * The constructor has higher priority than PMD constructors.
+ */
+#define RTE_REGISTER_CLASS(nm, cls) \
+RTE_INIT_PRIO(classinitfn_ ##nm, CLASS) \
+{\
+ (cls).name = RTE_STR(nm); \
+ rte_class_register(&cls); \
+}
+
+#define RTE_UNREGISTER_CLASS(nm, cls) \
+RTE_FINI_PRIO(classfinifn_ ##nm, CLASS) \
+{ \
+ rte_class_unregister(&cls); \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_CLASS_H_ */
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index 434adfd4..069c13ec 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -83,6 +83,7 @@ typedef uint16_t unaligned_uint16_t;
#define RTE_PRIORITY_LOG 101
#define RTE_PRIORITY_BUS 110
+#define RTE_PRIORITY_CLASS 120
#define RTE_PRIORITY_LAST 65535
#define RTE_PRIO(prio) \
@@ -112,6 +113,29 @@ static void __attribute__((constructor(RTE_PRIO(prio)), used)) func(void)
RTE_INIT_PRIO(func, LAST)
/**
+ * Run after main() with low priority.
+ *
+ * @param func
+ * Destructor function name.
+ * @param prio
+ * Priority number must be above 100.
+ * Lowest number is the last to run.
+ */
+#define RTE_FINI_PRIO(func, prio) \
+static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
+
+/**
+ * Run after main() with high priority.
+ *
+ * The destructor will be run *before* prioritized destructors.
+ *
+ * @param func
+ * Destructor function name.
+ */
+#define RTE_FINI(func) \
+ RTE_FINI_PRIO(func, LAST)
+
+/**
* Force a function to be inlined
*/
#define __rte_always_inline inline __attribute__((always_inline))
@@ -294,6 +318,11 @@ rte_combine64ms1b(register uint64_t v)
/*********** Macros to work with powers of 2 ********/
/**
+ * Macro to return 1 if n is a power of 2, 0 otherwise
+ */
+#define RTE_IS_POWER_OF_2(n) ((n) && !(((n) - 1) & (n)))
+
+/**
* Returns true if n is a power of 2
* @param n
* Number to check
diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h
index 3879ff3c..b80a8059 100644
--- a/lib/librte_eal/common/include/rte_dev.h
+++ b/lib/librte_eal/common/include/rte_dev.h
@@ -69,9 +69,7 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...)
* Enable RTE_PMD_DEBUG_TRACE() when at least one component relying on the
* RTE_*_RET() macros defined below is compiled in debug mode.
*/
-#if defined(RTE_LIBRTE_ETHDEV_DEBUG) || \
- defined(RTE_LIBRTE_CRYPTODEV_DEBUG) || \
- defined(RTE_LIBRTE_EVENTDEV_DEBUG)
+#if defined(RTE_LIBRTE_EVENTDEV_DEBUG)
#define RTE_PMD_DEBUG_TRACE(...) \
rte_pmd_debug_trace(__func__, __VA_ARGS__)
#else
@@ -176,6 +174,7 @@ struct rte_device {
* @return
* 0 on success, negative on error.
*/
+__rte_deprecated
int rte_eal_dev_attach(const char *name, const char *devargs);
/**
@@ -186,6 +185,7 @@ int rte_eal_dev_attach(const char *name, const char *devargs);
* @return
* 0 on success, negative on error.
*/
+__rte_deprecated
int rte_eal_dev_detach(struct rte_device *dev);
/**
@@ -285,6 +285,103 @@ __attribute__((used)) = str
static const char DRV_EXP_TAG(name, kmod_dep_export)[] \
__attribute__((used)) = str
+/**
+ * Iteration context.
+ *
+ * This context carries over the current iteration state.
+ */
+struct rte_dev_iterator {
+ const char *dev_str; /**< device string. */
+ const char *bus_str; /**< bus-related part of device string. */
+ const char *cls_str; /**< class-related part of device string. */
+ struct rte_bus *bus; /**< bus handle. */
+ struct rte_class *cls; /**< class handle. */
+ struct rte_device *device; /**< current position. */
+ void *class_device; /**< additional specialized context. */
+};
+
+/**
+ * Device iteration function.
+ *
+ * Find the next device matching properties passed in parameters.
+ * The function takes an additional ``start`` parameter, that is
+ * used as starting context when relevant.
+ *
+ * The function returns the current element in the iteration.
+ * This return value will potentially be used as a start parameter
+ * in subsequent calls to the function.
+ *
+ * The additional iterator parameter is only there if a specific
+ * implementation needs additional context. It must not be modified by
+ * the iteration function itself.
+ *
+ * @param start
+ * Starting iteration context.
+ *
+ * @param devstr
+ * Device description string.
+ *
+ * @param it
+ * Device iterator.
+ *
+ * @return
+ * The address of the current element matching the device description
+ * string.
+ */
+typedef void *(*rte_dev_iterate_t)(const void *start,
+ const char *devstr,
+ const struct rte_dev_iterator *it);
+
+/**
+ * Initializes a device iterator.
+ *
+ * This iterator allows accessing a list of devices matching a criteria.
+ * The device matching is made among all buses and classes currently registered,
+ * filtered by the device description given as parameter.
+ *
+ * This function will not allocate any memory. It is safe to stop the
+ * iteration at any moment and let the iterator go out of context.
+ *
+ * @param it
+ * Device iterator handle.
+ *
+ * @param str
+ * Device description string.
+ *
+ * @return
+ * 0 on successful initialization.
+ * <0 on error.
+ */
+__rte_experimental
+int
+rte_dev_iterator_init(struct rte_dev_iterator *it, const char *str);
+
+/**
+ * Iterates on a device iterator.
+ *
+ * Generates a new rte_device handle corresponding to the next element
+ * in the list described in comprehension by the iterator.
+ *
+ * The next object is returned, and the iterator is updated.
+ *
+ * @param it
+ * Device iterator handle.
+ *
+ * @return
+ * An rte_device handle if found.
+ * NULL if an error occurred (rte_errno is set).
+ * NULL if no device could be found (rte_errno is not set).
+ */
+__rte_experimental
+struct rte_device *
+rte_dev_iterator_next(struct rte_dev_iterator *it);
+
+#define RTE_DEV_FOREACH(dev, devstr, it) \
+ for (rte_dev_iterator_init(it, devstr), \
+ dev = rte_dev_iterator_next(it); \
+ dev != NULL; \
+ dev = rte_dev_iterator_next(it))
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h
index 58fbd90a..097a4ce7 100644
--- a/lib/librte_eal/common/include/rte_devargs.h
+++ b/lib/librte_eal/common/include/rte_devargs.h
@@ -51,12 +51,19 @@ struct rte_devargs {
enum rte_devtype type;
/** Device policy. */
enum rte_dev_policy policy;
- /** Bus handle for the device. */
- struct rte_bus *bus;
/** Name of the device. */
char name[RTE_DEV_NAME_MAX_LEN];
+ RTE_STD_C11
+ union {
/** Arguments string as given by user or "" for no argument. */
- char *args;
+ char *args;
+ const char *drv_str;
+ };
+ struct rte_bus *bus; /**< bus handle. */
+ struct rte_class *cls; /**< class handle. */
+ const char *bus_str; /**< bus-related part of device string. */
+ const char *cls_str; /**< class-related part of device string. */
+ const char *data; /**< Device string storage. */
};
/**
@@ -96,6 +103,42 @@ int rte_eal_parse_devargs_str(const char *devargs_str,
* in argument. Store which bus will handle the device, its name
* and the eventual device parameters.
*
+ * The syntax is:
+ *
+ * bus:device_identifier,arg1=val1,arg2=val2
+ *
+ * where "bus:" is the bus name followed by any character separator.
+ * The bus name is optional. If no bus name is specified, each bus
+ * will attempt to recognize the device identifier. The first one
+ * to succeed will be used.
+ *
+ * Examples:
+ *
+ * pci:0000:05.00.0,arg=val
+ * 05.00.0,arg=val
+ * vdev:net_ring0
+ *
+ * @param da
+ * The devargs structure holding the device information.
+ *
+ * @param dev
+ * String describing a device.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative errno on error.
+ */
+__rte_experimental
+int
+rte_devargs_parse(struct rte_devargs *da, const char *dev);
+
+/**
+ * Parse a device string.
+ *
+ * Verify that a bus is capable of handling the device passed
+ * in argument. Store which bus will handle the device, its name
+ * and the eventual device parameters.
+ *
* The device string is built with a printf-like syntax.
*
* The syntax is:
@@ -124,8 +167,8 @@ int rte_eal_parse_devargs_str(const char *devargs_str,
*/
__rte_experimental
int
-rte_devargs_parse(struct rte_devargs *da,
- const char *format, ...)
+rte_devargs_parsef(struct rte_devargs *da,
+ const char *format, ...)
__attribute__((format(printf, 2, 0)));
/**
diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h
index 8de5d69e..e114dcbd 100644
--- a/lib/librte_eal/common/include/rte_eal.h
+++ b/lib/librte_eal/common/include/rte_eal.h
@@ -490,27 +490,13 @@ static inline int rte_gettid(void)
enum rte_iova_mode rte_eal_iova_mode(void);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
* Get user provided pool ops name for mbuf
*
* @return
* returns user provided pool ops name.
*/
-const char * __rte_experimental
-rte_eal_mbuf_user_pool_ops(void);
-
-/**
- * @deprecated
- * Get default pool ops name for mbuf
- *
- * @return
- * returns default pool ops name.
- */
-__rte_deprecated
const char *
-rte_eal_mbuf_default_mempool_ops(void);
+rte_eal_mbuf_user_pool_ops(void);
#ifdef __cplusplus
}
diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h
index 3e61fffe..5d880551 100644
--- a/lib/librte_eal/common/include/rte_fbarray.h
+++ b/lib/librte_eal/common/include/rte_fbarray.h
@@ -336,6 +336,120 @@ rte_fbarray_find_contig_free(struct rte_fbarray *arr,
int __rte_experimental
rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start);
+/**
+ * Find index of previous free element, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find index of previous used element, starting at specified index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start);
+
+
+/**
+ * Find lowest start index of chunk of ``n`` free elements, down from specified
+ * index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @param n
+ * Number of free elements to look for.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n);
+
+
+/**
+ * Find lowest start index of chunk of ``n`` used elements, down from specified
+ * index.
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @param n
+ * Number of used elements to look for.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start,
+ unsigned int n);
+
+
+/**
+ * Find how many more free entries there are before specified index (like
+ * ``rte_fbarray_find_contig_free`` but going in reverse).
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr,
+ unsigned int start);
+
+
+/**
+ * Find how many more used entries there are before specified index (like
+ * ``rte_fbarray_find_contig_used`` but going in reverse).
+ *
+ * @param arr
+ * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure.
+ *
+ * @param start
+ * Element index to start search from.
+ *
+ * @return
+ * - non-negative integer on success.
+ * - -1 on failure, with ``rte_errno`` indicating reason for failure.
+ */
+int __rte_experimental
+rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start);
+
/**
* Dump ``rte_fbarray`` metadata.
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index aab9f6fe..c4b7f4cf 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -264,6 +264,60 @@ int __rte_experimental
rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg);
/**
+ * Walk list of all memsegs without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg);
+
+/**
+ * Walk each VA-contiguous area without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg);
+
+/**
+ * Walk each allocated memseg list without performing any locking.
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ * from within memory-related callback functions.
+ *
+ * @param func
+ * Iterator function
+ * @param arg
+ * Argument passed to iterator
+ * @return
+ * 0 if walked over the entire list
+ * 1 if stopped by the user
+ * -1 if user function reported error
+ */
+int __rte_experimental
+rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
+
+/**
* Dump the physical memory layout to a file.
*
* @note This function read-locks the memory hotplug subsystem, and thus cannot
diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h
index ef370fa6..f478fa9e 100644
--- a/lib/librte_eal/common/include/rte_memzone.h
+++ b/lib/librte_eal/common/include/rte_memzone.h
@@ -81,8 +81,12 @@ struct rte_memzone {
* memzones from memory that is already available. It will not trigger any
* new allocations.
*
- * @note Reserving IOVA-contiguous memzones with len set to 0 is not currently
- * supported.
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
*
* @param name
* The name of the memzone. If it already exists, the function will
@@ -141,8 +145,12 @@ const struct rte_memzone *rte_memzone_reserve(const char *name,
* memzones from memory that is already available. It will not trigger any
* new allocations.
*
- * @note Reserving IOVA-contiguous memzones with len set to 0 is not currently
- * supported.
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
*
* @param name
* The name of the memzone. If it already exists, the function will
@@ -206,8 +214,12 @@ const struct rte_memzone *rte_memzone_reserve_aligned(const char *name,
* memzones from memory that is already available. It will not trigger any
* new allocations.
*
- * @note Reserving IOVA-contiguous memzones with len set to 0 is not currently
- * supported.
+ * @note: When reserving memzones with len set to 0, it is preferable to also
+ * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but
+ * will likely not yield expected results. Specifically, the resulting memzone
+ * may not necessarily be the biggest memzone available, but rather biggest
+ * memzone available on socket id corresponding to an lcore from which
+ * reservation was called.
*
* @param name
* The name of the memzone. If it already exists, the function will
diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h
index aea4d91b..34b41aff 100644
--- a/lib/librte_eal/common/include/rte_service.h
+++ b/lib/librte_eal/common/include/rte_service.h
@@ -162,6 +162,26 @@ int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate);
int32_t rte_service_runstate_get(uint32_t id);
/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * This function returns whether the service may be currently executing on
+ * at least one lcore, or definitely is not. This function can be used to
+ * determine if, after setting the service runstate to stopped, the service
+ * is still executing a service lcore.
+ *
+ * Care must be taken if calling this function when the service runstate is
+ * running, since the result of this function may be incorrect by the time the
+ * function returns due to service cores running in parallel.
+ *
+ * @retval 1 Service may be running on one or more lcores
+ * @retval 0 Service is not running on any lcore
+ * @retval -EINVAL Invalid service id
+ */
+int32_t __rte_experimental
+rte_service_may_be_active(uint32_t id);
+
+/**
* Enable or disable the check for a service-core being mapped to the service.
* An application can disable the check when takes the responsibility to run a
* service itself using *rte_service_run_iter_on_app_lcore*.
@@ -363,6 +383,42 @@ int32_t rte_service_attr_get(uint32_t id, uint32_t attr_id,
*/
int32_t rte_service_attr_reset_all(uint32_t id);
+/**
+ * Returns the number of times the service runner has looped.
+ */
+#define RTE_SERVICE_LCORE_ATTR_LOOPS 0
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Get an attribute from a service core.
+ *
+ * @param lcore Id of the service core.
+ * @param attr_id Id of the attribute to be retrieved.
+ * @param [out] attr_value Pointer to storage in which to write retrieved value.
+ * @retval 0 Success, the attribute value has been written to *attr_value*.
+ * -EINVAL Invalid lcore, attr_id or attr_value was NULL.
+ * -ENOTSUP lcore is not a service core.
+ */
+int32_t __rte_experimental
+rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
+ uint64_t *attr_value);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reset all attribute values of a service core.
+ *
+ * @param lcore The service core to reset all the statistics of
+ * @retval 0 Successfully reset attributes
+ * -EINVAL Invalid service id provided
+ * -ENOTSUP lcore is not a service core.
+ */
+int32_t __rte_experimental
+rte_service_lcore_attr_reset_all(uint32_t lcore);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h
index 8dccaefc..9b01abb2 100644
--- a/lib/librte_eal/common/include/rte_tailq.h
+++ b/lib/librte_eal/common/include/rte_tailq.h
@@ -119,8 +119,7 @@ struct rte_tailq_head *rte_eal_tailq_lookup(const char *name);
int rte_eal_tailq_register(struct rte_tailq_elem *t);
#define EAL_REGISTER_TAILQ(t) \
-RTE_INIT(tailqinitfn_ ##t); \
-static void tailqinitfn_ ##t(void) \
+RTE_INIT(tailqinitfn_ ##t) \
{ \
if (rte_eal_tailq_register(&t) < 0) \
rte_panic("Cannot initialize tailq: %s\n", t.name); \
diff --git a/lib/librte_eal/common/include/rte_uuid.h b/lib/librte_eal/common/include/rte_uuid.h
new file mode 100644
index 00000000..2c846b5f
--- /dev/null
+++ b/lib/librte_eal/common/include/rte_uuid.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (C) 1996, 1997, 1998 Theodore Ts'o.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, and the entire permission notice in its entirety,
+ * including the disclaimer of warranties.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ */
+/**
+ * @file
+ *
+ * UUID related functions originally from libuuid
+ */
+
+#ifndef _RTE_UUID_H_
+#define _RTE_UUID_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdbool.h>
+
+/**
+ * Struct describing a Universal Unique Identifer
+ */
+typedef unsigned char rte_uuid_t[16];
+
+/**
+ * Helper for defining UUID values for id tables.
+ */
+#define RTE_UUID_INIT(a, b, c, d, e) { \
+ ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, \
+ ((a) >> 8) & 0xff, (a) & 0xff, \
+ ((b) >> 8) & 0xff, (b) & 0xff, \
+ ((c) >> 8) & 0xff, (c) & 0xff, \
+ ((d) >> 8) & 0xff, (d) & 0xff, \
+ ((e) >> 40) & 0xff, ((e) >> 32) & 0xff, \
+ ((e) >> 24) & 0xff, ((e) >> 16) & 0xff, \
+ ((e) >> 8) & 0xff, (e) & 0xff \
+}
+
+/**
+ * Test if UUID is all zeros.
+ *
+ * @param uu
+ * The uuid to check.
+ * @return
+ * true if uuid is NULL value, false otherwise
+ */
+bool rte_uuid_is_null(const rte_uuid_t uu);
+
+/**
+ * Copy uuid.
+ *
+ * @param dst
+ * Destination uuid
+ * @param src
+ * Source uuid
+ */
+static inline void rte_uuid_copy(rte_uuid_t dst, const rte_uuid_t src)
+{
+ memcpy(dst, src, sizeof(rte_uuid_t));
+}
+
+/**
+ * Compare two UUID's
+ *
+ * @param a
+ * A UUID to compare
+ * @param b
+ * A UUID to compare
+ * @return
+ * returns an integer less than, equal to, or greater than zero if UUID a is
+ * is less than, equal, or greater than UUID b.
+ */
+int rte_uuid_compare(const rte_uuid_t a, const rte_uuid_t b);
+
+/**
+ * Extract UUID from string
+ *
+ * @param in
+ * Pointer to string of characters to convert
+ * @param uu
+ * Destination UUID
+ * @return
+ * Returns 0 on succes, and -1 if string is not a valid UUID.
+ */
+int rte_uuid_parse(const char *in, rte_uuid_t uu);
+
+/**
+ * Convert UUID to string
+ *
+ * @param uu
+ * UUID to format
+ * @param out
+ * Resulting string buffer
+ * @param len
+ * Sizeof the available string buffer
+ */
+#define RTE_UUID_STRLEN (36 + 1)
+void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_UUID_H */
diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h
index 6e2a2362..7c6714a2 100644
--- a/lib/librte_eal/common/include/rte_version.h
+++ b/lib/librte_eal/common/include/rte_version.h
@@ -32,7 +32,7 @@ extern "C" {
/**
* Minor version/month number i.e. the mm in yy.mm.z
*/
-#define RTE_VER_MONTH 05
+#define RTE_VER_MONTH 8
/**
* Patch level number i.e. the z in yy.mm.z
diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h
index f90972fa..5ca13fcc 100644
--- a/lib/librte_eal/common/include/rte_vfio.h
+++ b/lib/librte_eal/common/include/rte_vfio.h
@@ -179,7 +179,7 @@ rte_vfio_clear_group(int vfio_group_fd);
* 0 if success.
* -1 on error.
*/
-int __rte_experimental
+int
rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
@@ -200,7 +200,7 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
* -1 on error.
*/
-int __rte_experimental
+int
rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
/**
* Parse IOMMU group number for a device
@@ -222,7 +222,7 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
* 0 for non-existent group or VFIO
* <0 for errors
*/
-int __rte_experimental
+int
rte_vfio_get_group_num(const char *sysfs_base,
const char *dev_addr, int *iommu_group_num);
@@ -236,7 +236,7 @@ rte_vfio_get_group_num(const char *sysfs_base,
* > 0 container fd
* < 0 for errors
*/
-int __rte_experimental
+int
rte_vfio_get_container_fd(void);
/**
@@ -252,13 +252,10 @@ rte_vfio_get_container_fd(void);
* > 0 group fd
* < 0 for errors
*/
-int __rte_experimental
+int
rte_vfio_get_group_fd(int iommu_group_num);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
* Create a new container for device binding.
*
* @note Any newly allocated DPDK memory will not be mapped into these
@@ -269,13 +266,10 @@ rte_vfio_get_group_fd(int iommu_group_num);
* the container fd if successful
* <0 if failed
*/
-int __rte_experimental
+int
rte_vfio_container_create(void);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
* Destroy the container, unbind all vfio groups within it.
*
* @param container_fd
@@ -285,13 +279,10 @@ rte_vfio_container_create(void);
* 0 if successful
* <0 if failed
*/
-int __rte_experimental
+int
rte_vfio_container_destroy(int container_fd);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
* Bind a IOMMU group to a container.
*
* @param container_fd
@@ -304,13 +295,10 @@ rte_vfio_container_destroy(int container_fd);
* group fd if successful
* <0 if failed
*/
-int __rte_experimental
+int
rte_vfio_container_group_bind(int container_fd, int iommu_group_num);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
* Unbind a IOMMU group from a container.
*
* @param container_fd
@@ -323,13 +311,10 @@ rte_vfio_container_group_bind(int container_fd, int iommu_group_num);
* 0 if successful
* <0 if failed
*/
-int __rte_experimental
+int
rte_vfio_container_group_unbind(int container_fd, int iommu_group_num);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
* Perform DMA mapping for devices in a container.
*
* @param container_fd
@@ -348,14 +333,11 @@ rte_vfio_container_group_unbind(int container_fd, int iommu_group_num);
* 0 if successful
* <0 if failed
*/
-int __rte_experimental
+int
rte_vfio_container_dma_map(int container_fd, uint64_t vaddr,
uint64_t iova, uint64_t len);
/**
- * @warning
- * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
- *
* Perform DMA unmapping for devices in a container.
*
* @param container_fd
@@ -374,7 +356,7 @@ rte_vfio_container_dma_map(int container_fd, uint64_t vaddr,
* 0 if successful
* <0 if failed
*/
-int __rte_experimental
+int
rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr,
uint64_t iova, uint64_t len);
diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c
index 9bfe9b9b..e0a8ed15 100644
--- a/lib/librte_eal/common/malloc_elem.c
+++ b/lib/librte_eal/common/malloc_elem.c
@@ -18,10 +18,89 @@
#include <rte_common.h>
#include <rte_spinlock.h>
+#include "eal_internal_cfg.h"
#include "eal_memalloc.h"
#include "malloc_elem.h"
#include "malloc_heap.h"
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
+{
+ void *cur_page, *contig_seg_start, *page_end, *cur_seg_end;
+ void *data_start, *data_end;
+ rte_iova_t expected_iova;
+ struct rte_memseg *ms;
+ size_t page_sz, cur, max;
+
+ page_sz = (size_t)elem->msl->page_sz;
+ data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
+ data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN);
+ /* segment must start after header and with specified alignment */
+ contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
+
+ /* if we're in IOVA as VA mode, or if we're in legacy mode with
+ * hugepages, all elements are IOVA-contiguous.
+ */
+ if (rte_eal_iova_mode() == RTE_IOVA_VA ||
+ (internal_config.legacy_mem && rte_eal_has_hugepages()))
+ return RTE_PTR_DIFF(data_end, contig_seg_start);
+
+ cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
+ ms = rte_mem_virt2memseg(cur_page, elem->msl);
+
+ /* do first iteration outside the loop */
+ page_end = RTE_PTR_ADD(cur_page, page_sz);
+ cur_seg_end = RTE_MIN(page_end, data_end);
+ cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) -
+ MALLOC_ELEM_TRAILER_LEN;
+ max = cur;
+ expected_iova = ms->iova + page_sz;
+ /* memsegs are contiguous in memory */
+ ms++;
+
+ cur_page = RTE_PTR_ADD(cur_page, page_sz);
+
+ while (cur_page < data_end) {
+ page_end = RTE_PTR_ADD(cur_page, page_sz);
+ cur_seg_end = RTE_MIN(page_end, data_end);
+
+ /* reset start of contiguous segment if unexpected iova */
+ if (ms->iova != expected_iova) {
+ /* next contiguous segment must start at specified
+ * alignment.
+ */
+ contig_seg_start = RTE_PTR_ALIGN(cur_page, align);
+ /* new segment start may be on a different page, so find
+ * the page and skip to next iteration to make sure
+ * we're not blowing past data end.
+ */
+ ms = rte_mem_virt2memseg(contig_seg_start, elem->msl);
+ cur_page = ms->addr;
+ /* don't trigger another recalculation */
+ expected_iova = ms->iova;
+ continue;
+ }
+ /* cur_seg_end ends on a page boundary or on data end. if we're
+ * looking at data end, then malloc trailer is already included
+ * in the calculations. if we're looking at page end, then we
+ * know there's more data past this page and thus there's space
+ * for malloc element trailer, so don't count it here.
+ */
+ cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start);
+ /* update max if cur value is bigger */
+ if (cur > max)
+ max = cur;
+
+ /* move to next page */
+ cur_page = page_end;
+ expected_iova = ms->iova + page_sz;
+ /* memsegs are contiguous in memory */
+ ms++;
+ }
+
+ return max;
+}
+
/*
* Initialize a general malloc_elem header structure
*/
@@ -386,16 +465,18 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
if (elem->next != NULL && elem->next->state == ELEM_FREE &&
next_elem_is_adjacent(elem)) {
void *erase;
+ size_t erase_len;
/* we will want to erase the trailer and header */
erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN);
+ erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad;
/* remove from free list, join to this one */
malloc_elem_free_list_remove(elem->next);
join_elem(elem, elem->next);
- /* erase header and trailer */
- memset(erase, 0, MALLOC_ELEM_OVERHEAD);
+ /* erase header, trailer and pad */
+ memset(erase, 0, erase_len);
}
/*
@@ -406,9 +487,11 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
prev_elem_is_adjacent(elem)) {
struct malloc_elem *new_elem;
void *erase;
+ size_t erase_len;
/* we will want to erase trailer and header */
erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN);
+ erase_len = MALLOC_ELEM_OVERHEAD + elem->pad;
/* remove from free list, join to this one */
malloc_elem_free_list_remove(elem->prev);
@@ -416,8 +499,8 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem)
new_elem = elem->prev;
join_elem(new_elem, elem);
- /* erase header and trailer */
- memset(erase, 0, MALLOC_ELEM_OVERHEAD);
+ /* erase header, trailer and pad */
+ memset(erase, 0, erase_len);
elem = new_elem;
}
@@ -436,7 +519,7 @@ malloc_elem_free(struct malloc_elem *elem)
void *ptr;
size_t data_len;
- ptr = RTE_PTR_ADD(elem, sizeof(*elem));
+ ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN);
data_len = elem->size - MALLOC_ELEM_OVERHEAD;
elem = malloc_elem_join_adjacent_free(elem);
diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h
index 7331af9c..e2bda4c0 100644
--- a/lib/librte_eal/common/malloc_elem.h
+++ b/lib/librte_eal/common/malloc_elem.h
@@ -179,4 +179,10 @@ malloc_elem_free_list_index(size_t size);
void
malloc_elem_free_list_insert(struct malloc_elem *elem);
+/*
+ * Find biggest IOVA-contiguous zone within an element with specified alignment.
+ */
+size_t
+malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align);
+
#endif /* MALLOC_ELEM_H_ */
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index d6cf3af8..12aaf2d7 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -149,6 +149,52 @@ find_suitable_element(struct malloc_heap *heap, size_t size,
}
/*
+ * Iterates through the freelist for a heap to find a free element with the
+ * biggest size and requested alignment. Will also set size to whatever element
+ * size that was found.
+ * Returns null on failure, or pointer to element on success.
+ */
+static struct malloc_elem *
+find_biggest_element(struct malloc_heap *heap, size_t *size,
+ unsigned int flags, size_t align, bool contig)
+{
+ struct malloc_elem *elem, *max_elem = NULL;
+ size_t idx, max_size = 0;
+
+ for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) {
+ for (elem = LIST_FIRST(&heap->free_head[idx]);
+ !!elem; elem = LIST_NEXT(elem, free_list)) {
+ size_t cur_size;
+ if (!check_hugepage_sz(flags, elem->msl->page_sz))
+ continue;
+ if (contig) {
+ cur_size =
+ malloc_elem_find_max_iova_contig(elem,
+ align);
+ } else {
+ void *data_start = RTE_PTR_ADD(elem,
+ MALLOC_ELEM_HEADER_LEN);
+ void *data_end = RTE_PTR_ADD(elem, elem->size -
+ MALLOC_ELEM_TRAILER_LEN);
+ void *aligned = RTE_PTR_ALIGN_CEIL(data_start,
+ align);
+ /* check if aligned data start is beyond end */
+ if (aligned >= data_end)
+ continue;
+ cur_size = RTE_PTR_DIFF(data_end, aligned);
+ }
+ if (cur_size > max_size) {
+ max_size = cur_size;
+ max_elem = elem;
+ }
+ }
+ }
+
+ *size = max_size;
+ return max_elem;
+}
+
+/*
* Main function to allocate a block of memory from the heap.
* It locks the free list, scans it, and adds a new memseg if the
* scan fails. Once the new memseg is added, it re-scans and should return
@@ -174,6 +220,26 @@ heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size,
return elem == NULL ? NULL : (void *)(&elem[1]);
}
+static void *
+heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused,
+ unsigned int flags, size_t align, bool contig)
+{
+ struct malloc_elem *elem;
+ size_t size;
+
+ align = RTE_CACHE_LINE_ROUNDUP(align);
+
+ elem = find_biggest_element(heap, &size, flags, align, contig);
+ if (elem != NULL) {
+ elem = malloc_elem_alloc(elem, size, align, 0, contig);
+
+ /* increase heap's count of allocated elements */
+ heap->alloc_count++;
+ }
+
+ return elem == NULL ? NULL : (void *)(&elem[1]);
+}
+
/* this function is exposed in malloc_mp.h */
void
rollback_expand_heap(struct rte_memseg **ms, int n_segs,
@@ -575,6 +641,66 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
return NULL;
}
+static void *
+heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags,
+ size_t align, bool contig)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+ void *ret;
+
+ rte_spinlock_lock(&(heap->lock));
+
+ align = align == 0 ? 1 : align;
+
+ ret = heap_alloc_biggest(heap, type, flags, align, contig);
+
+ rte_spinlock_unlock(&(heap->lock));
+
+ return ret;
+}
+
+void *
+malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
+ size_t align, bool contig)
+{
+ int socket, i, cur_socket;
+ void *ret;
+
+ /* return NULL if align is not power-of-2 */
+ if ((align && !rte_is_power_of_2(align)))
+ return NULL;
+
+ if (!rte_eal_has_hugepages())
+ socket_arg = SOCKET_ID_ANY;
+
+ if (socket_arg == SOCKET_ID_ANY)
+ socket = malloc_get_numa_socket();
+ else
+ socket = socket_arg;
+
+ /* Check socket parameter */
+ if (socket >= RTE_MAX_NUMA_NODES)
+ return NULL;
+
+ ret = heap_alloc_biggest_on_socket(type, socket, flags, align,
+ contig);
+ if (ret != NULL || socket_arg != SOCKET_ID_ANY)
+ return ret;
+
+ /* try other heaps */
+ for (i = 0; i < (int) rte_socket_count(); i++) {
+ cur_socket = rte_socket_id_by_idx(i);
+ if (cur_socket == socket)
+ continue;
+ ret = heap_alloc_biggest_on_socket(type, cur_socket, flags,
+ align, contig);
+ if (ret != NULL)
+ return ret;
+ }
+ return NULL;
+}
+
/* this function is exposed in malloc_mp.h */
int
malloc_heap_free_pages(void *aligned_start, size_t aligned_len)
diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h
index 03b80141..f52cb555 100644
--- a/lib/librte_eal/common/malloc_heap.h
+++ b/lib/librte_eal/common/malloc_heap.h
@@ -29,6 +29,10 @@ void *
malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags,
size_t align, size_t bound, bool contig);
+void *
+malloc_heap_alloc_biggest(const char *type, int socket, unsigned int flags,
+ size_t align, bool contig);
+
int
malloc_heap_free(struct malloc_elem *elem);
diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build
index 8a3dcfee..56005bea 100644
--- a/lib/librte_eal/common/meson.build
+++ b/lib/librte_eal/common/meson.build
@@ -8,6 +8,7 @@ common_objs = []
common_sources = files(
'eal_common_bus.c',
'eal_common_cpuflags.c',
+ 'eal_common_class.c',
'eal_common_devargs.c',
'eal_common_dev.c',
'eal_common_errno.c',
@@ -25,6 +26,7 @@ common_sources = files(
'eal_common_tailqs.c',
'eal_common_thread.c',
'eal_common_timer.c',
+ 'eal_common_uuid.c',
'malloc_elem.c',
'malloc_heap.c',
'malloc_mp.c',
@@ -46,6 +48,7 @@ common_headers = files(
'include/rte_branch_prediction.h',
'include/rte_bus.h',
'include/rte_bitmap.h',
+ 'include/rte_class.h',
'include/rte_common.h',
'include/rte_debug.h',
'include/rte_devargs.h',
@@ -75,6 +78,7 @@ common_headers = files(
'include/rte_string_fns.h',
'include/rte_tailq.h',
'include/rte_time.h',
+ 'include/rte_uuid.h',
'include/rte_version.h')
# special case install the generic headers, since they go in a subdir
diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c
index 73507aac..8767c722 100644
--- a/lib/librte_eal/common/rte_service.c
+++ b/lib/librte_eal/common/rte_service.c
@@ -52,6 +52,7 @@ struct rte_service_spec_impl {
rte_atomic32_t num_mapped_cores;
uint64_t calls;
uint64_t cycles_spent;
+ uint8_t active_on_lcore[RTE_MAX_LCORE];
} __rte_cache_aligned;
/* the internal values of a service core */
@@ -61,7 +62,7 @@ struct core_state {
uint8_t runstate; /* running or stopped */
uint8_t is_service_core; /* set if core is currently a service core */
- /* extreme statistics */
+ uint64_t loops;
uint64_t calls_per_service[RTE_SERVICE_NUM_MAX];
} __rte_cache_aligned;
@@ -347,15 +348,19 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s,
static inline int32_t
-service_run(uint32_t i, struct core_state *cs, uint64_t service_mask)
+service_run(uint32_t i, int lcore, struct core_state *cs, uint64_t service_mask)
{
if (!service_valid(i))
return -EINVAL;
struct rte_service_spec_impl *s = &rte_services[i];
if (s->comp_runstate != RUNSTATE_RUNNING ||
s->app_runstate != RUNSTATE_RUNNING ||
- !(service_mask & (UINT64_C(1) << i)))
+ !(service_mask & (UINT64_C(1) << i))) {
+ s->active_on_lcore[lcore] = 0;
return -ENOEXEC;
+ }
+
+ s->active_on_lcore[lcore] = 1;
/* check do we need cmpset, if MT safe or <= 1 core
* mapped, atomic ops are not required.
@@ -374,6 +379,25 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask)
return 0;
}
+int32_t __rte_experimental
+rte_service_may_be_active(uint32_t id)
+{
+ uint32_t ids[RTE_MAX_LCORE] = {0};
+ struct rte_service_spec_impl *s = &rte_services[id];
+ int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE);
+ int i;
+
+ if (!service_valid(id))
+ return -EINVAL;
+
+ for (i = 0; i < lcore_count; i++) {
+ if (s->active_on_lcore[ids[i]])
+ return 1;
+ }
+
+ return 0;
+}
+
int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
uint32_t serialize_mt_unsafe)
{
@@ -398,7 +422,7 @@ int32_t rte_service_run_iter_on_app_lcore(uint32_t id,
return -EBUSY;
}
- int ret = service_run(id, cs, UINT64_MAX);
+ int ret = service_run(id, rte_lcore_id(), cs, UINT64_MAX);
if (serialize_mt_unsafe)
rte_atomic32_dec(&s->num_mapped_cores);
@@ -419,9 +443,11 @@ rte_service_runner_func(void *arg)
for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) {
/* return value ignored as no change to code flow */
- service_run(i, cs, service_mask);
+ service_run(i, lcore, cs, service_mask);
}
+ cs->loops++;
+
rte_smp_rmb();
}
@@ -729,6 +755,28 @@ rte_service_attr_get(uint32_t id, uint32_t attr_id, uint32_t *attr_value)
}
}
+int32_t __rte_experimental
+rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id,
+ uint64_t *attr_value)
+{
+ struct core_state *cs;
+
+ if (lcore >= RTE_MAX_LCORE || !attr_value)
+ return -EINVAL;
+
+ cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -ENOTSUP;
+
+ switch (attr_id) {
+ case RTE_SERVICE_LCORE_ATTR_LOOPS:
+ *attr_value = cs->loops;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static void
rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s,
uint64_t all_cycles, uint32_t reset)
@@ -764,6 +812,23 @@ rte_service_attr_reset_all(uint32_t id)
return 0;
}
+int32_t __rte_experimental
+rte_service_lcore_attr_reset_all(uint32_t lcore)
+{
+ struct core_state *cs;
+
+ if (lcore >= RTE_MAX_LCORE)
+ return -EINVAL;
+
+ cs = &lcore_states[lcore];
+ if (!cs->is_service_core)
+ return -ENOTSUP;
+
+ cs->loops = 0;
+
+ return 0;
+}
+
static void
service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset)
{