diff options
Diffstat (limited to 'lib/librte_eal/common')
37 files changed, 2452 insertions, 843 deletions
diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile index 48f870f2..cca68826 100644 --- a/lib/librte_eal/common/Makefile +++ b/lib/librte_eal/common/Makefile @@ -11,12 +11,12 @@ INC += rte_per_lcore.h rte_random.h INC += rte_tailq.h rte_interrupts.h rte_alarm.h INC += rte_string_fns.h rte_version.h INC += rte_eal_memconfig.h rte_malloc_heap.h -INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h +INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_class.h INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h INC += rte_malloc.h rte_keepalive.h rte_time.h INC += rte_service.h rte_service_component.h INC += rte_bitmap.h rte_vfio.h rte_hypervisor.h rte_test.h -INC += rte_reciprocal.h rte_fbarray.h +INC += rte_reciprocal.h rte_fbarray.h rte_uuid.h GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h diff --git a/lib/librte_eal/common/eal_common_class.c b/lib/librte_eal/common/eal_common_class.c new file mode 100644 index 00000000..404a9065 --- /dev/null +++ b/lib/librte_eal/common/eal_common_class.c @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 GaĆ«tan Rivet + */ + +#include <stdio.h> +#include <string.h> +#include <sys/queue.h> + +#include <rte_class.h> +#include <rte_debug.h> + +struct rte_class_list rte_class_list = + TAILQ_HEAD_INITIALIZER(rte_class_list); + +__rte_experimental void +rte_class_register(struct rte_class *class) +{ + RTE_VERIFY(class); + RTE_VERIFY(class->name && strlen(class->name)); + + TAILQ_INSERT_TAIL(&rte_class_list, class, next); + RTE_LOG(DEBUG, EAL, "Registered [%s] device class.\n", class->name); +} + +__rte_experimental void +rte_class_unregister(struct rte_class *class) +{ + TAILQ_REMOVE(&rte_class_list, class, next); + RTE_LOG(DEBUG, EAL, "Unregistered [%s] device class.\n", class->name); +} + +__rte_experimental +struct rte_class * +rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp, + const void *data) +{ + struct rte_class *cls; + + if (start != NULL) + cls = TAILQ_NEXT(start, next); + else + cls = TAILQ_FIRST(&rte_class_list); + while (cls != NULL) { + if (cmp(cls, data) == 0) + break; + cls = TAILQ_NEXT(cls, next); + } + return cls; +} + +static int +cmp_class_name(const struct rte_class *class, const void *_name) +{ + const char *name = _name; + + return strcmp(class->name, name); +} + +__rte_experimental +struct rte_class * +rte_class_find_by_name(const char *name) +{ + return rte_class_find(NULL, cmp_class_name, (const void *)name); +} diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c index 61cb3b16..678dbcac 100644 --- a/lib/librte_eal/common/eal_common_dev.c +++ b/lib/librte_eal/common/eal_common_dev.c @@ -10,9 +10,12 @@ #include <rte_compat.h> #include <rte_bus.h> +#include <rte_class.h> #include <rte_dev.h> #include <rte_devargs.h> #include <rte_debug.h> +#include <rte_errno.h> +#include <rte_kvargs.h> #include <rte_log.h> #include <rte_spinlock.h> #include <rte_malloc.h> @@ -42,17 +45,27 @@ static struct dev_event_cb_list dev_event_cbs; /* spinlock for device callbacks */ static rte_spinlock_t dev_event_lock = RTE_SPINLOCK_INITIALIZER; -static int cmp_detached_dev_name(const struct rte_device *dev, - const void *_name) -{ - const char *name = _name; +struct dev_next_ctx { + struct rte_dev_iterator *it; + const char *bus_str; + const char *cls_str; +}; - /* skip attached devices */ - if (dev->driver != NULL) - return 1; +#define CTX(it, bus_str, cls_str) \ + (&(const struct dev_next_ctx){ \ + .it = it, \ + .bus_str = bus_str, \ + .cls_str = cls_str, \ + }) - return strcmp(dev->name, name); -} +#define ITCTX(ptr) \ + (((struct dev_next_ctx *)(intptr_t)ptr)->it) + +#define BUSCTX(ptr) \ + (((struct dev_next_ctx *)(intptr_t)ptr)->bus_str) + +#define CLSCTX(ptr) \ + (((struct dev_next_ctx *)(intptr_t)ptr)->cls_str) static int cmp_dev_name(const struct rte_device *dev, const void *_name) { @@ -138,8 +151,8 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn if (da == NULL) return -ENOMEM; - ret = rte_devargs_parse(da, "%s:%s,%s", - busname, devname, devargs); + ret = rte_devargs_parsef(da, "%s:%s,%s", + busname, devname, devargs); if (ret) goto err_devarg; @@ -151,14 +164,19 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn if (ret) goto err_devarg; - dev = bus->find_device(NULL, cmp_detached_dev_name, devname); + dev = bus->find_device(NULL, cmp_dev_name, devname); if (dev == NULL) { - RTE_LOG(ERR, EAL, "Cannot find unplugged device (%s)\n", + RTE_LOG(ERR, EAL, "Cannot find device (%s)\n", devname); ret = -ENODEV; goto err_devarg; } + if (dev->driver != NULL) { + RTE_LOG(ERR, EAL, "Device is already plugged\n"); + return -EEXIST; + } + ret = bus->plug(dev); if (ret) { RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", @@ -200,6 +218,11 @@ rte_eal_hotplug_remove(const char *busname, const char *devname) return -EINVAL; } + if (dev->driver == NULL) { + RTE_LOG(ERR, EAL, "Device is already unplugged\n"); + return -ENOENT; + } + ret = bus->unplug(dev); if (ret) RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n", @@ -343,3 +366,201 @@ dev_callback_process(char *device_name, enum rte_dev_event_type event) } rte_spinlock_unlock(&dev_event_lock); } + +__rte_experimental +int +rte_dev_iterator_init(struct rte_dev_iterator *it, + const char *dev_str) +{ + struct rte_devargs devargs; + struct rte_class *cls = NULL; + struct rte_bus *bus = NULL; + + /* Having both bus_str and cls_str NULL is illegal, + * marking this iterator as invalid unless + * everything goes well. + */ + it->bus_str = NULL; + it->cls_str = NULL; + + devargs.data = dev_str; + if (rte_devargs_layers_parse(&devargs, dev_str)) + goto get_out; + + bus = devargs.bus; + cls = devargs.cls; + /* The string should have at least + * one layer specified. + */ + if (bus == NULL && cls == NULL) { + RTE_LOG(ERR, EAL, + "Either bus or class must be specified.\n"); + rte_errno = EINVAL; + goto get_out; + } + if (bus != NULL && bus->dev_iterate == NULL) { + RTE_LOG(ERR, EAL, "Bus %s not supported\n", bus->name); + rte_errno = ENOTSUP; + goto get_out; + } + if (cls != NULL && cls->dev_iterate == NULL) { + RTE_LOG(ERR, EAL, "Class %s not supported\n", cls->name); + rte_errno = ENOTSUP; + goto get_out; + } + it->bus_str = devargs.bus_str; + it->cls_str = devargs.cls_str; + it->dev_str = dev_str; + it->bus = bus; + it->cls = cls; + it->device = NULL; + it->class_device = NULL; +get_out: + return -rte_errno; +} + +static char * +dev_str_sane_copy(const char *str) +{ + size_t end; + char *copy; + + end = strcspn(str, ",/"); + if (str[end] == ',') { + copy = strdup(&str[end + 1]); + } else { + /* '/' or '\0' */ + copy = strdup(""); + } + if (copy == NULL) { + rte_errno = ENOMEM; + } else { + char *slash; + + slash = strchr(copy, '/'); + if (slash != NULL) + slash[0] = '\0'; + } + return copy; +} + +static int +class_next_dev_cmp(const struct rte_class *cls, + const void *ctx) +{ + struct rte_dev_iterator *it; + const char *cls_str = NULL; + void *dev; + + if (cls->dev_iterate == NULL) + return 1; + it = ITCTX(ctx); + cls_str = CLSCTX(ctx); + dev = it->class_device; + /* it->cls_str != NULL means a class + * was specified in the devstr. + */ + if (it->cls_str != NULL && cls != it->cls) + return 1; + /* If an error occurred previously, + * no need to test further. + */ + if (rte_errno != 0) + return -1; + dev = cls->dev_iterate(dev, cls_str, it); + it->class_device = dev; + return dev == NULL; +} + +static int +bus_next_dev_cmp(const struct rte_bus *bus, + const void *ctx) +{ + struct rte_device *dev = NULL; + struct rte_class *cls = NULL; + struct rte_dev_iterator *it; + const char *bus_str = NULL; + + if (bus->dev_iterate == NULL) + return 1; + it = ITCTX(ctx); + bus_str = BUSCTX(ctx); + dev = it->device; + /* it->bus_str != NULL means a bus + * was specified in the devstr. + */ + if (it->bus_str != NULL && bus != it->bus) + return 1; + /* If an error occurred previously, + * no need to test further. + */ + if (rte_errno != 0) + return -1; + if (it->cls_str == NULL) { + dev = bus->dev_iterate(dev, bus_str, it); + goto end; + } + /* cls_str != NULL */ + if (dev == NULL) { +next_dev_on_bus: + dev = bus->dev_iterate(dev, bus_str, it); + it->device = dev; + } + if (dev == NULL) + return 1; + if (it->cls != NULL) + cls = TAILQ_PREV(it->cls, rte_class_list, next); + cls = rte_class_find(cls, class_next_dev_cmp, ctx); + if (cls != NULL) { + it->cls = cls; + goto end; + } + goto next_dev_on_bus; +end: + it->device = dev; + return dev == NULL; +} +__rte_experimental +struct rte_device * +rte_dev_iterator_next(struct rte_dev_iterator *it) +{ + struct rte_bus *bus = NULL; + int old_errno = rte_errno; + char *bus_str = NULL; + char *cls_str = NULL; + + rte_errno = 0; + if (it->bus_str == NULL && it->cls_str == NULL) { + /* Invalid iterator. */ + rte_errno = EINVAL; + return NULL; + } + if (it->bus != NULL) + bus = TAILQ_PREV(it->bus, rte_bus_list, next); + if (it->bus_str != NULL) { + bus_str = dev_str_sane_copy(it->bus_str); + if (bus_str == NULL) + goto out; + } + if (it->cls_str != NULL) { + cls_str = dev_str_sane_copy(it->cls_str); + if (cls_str == NULL) + goto out; + } + while ((bus = rte_bus_find(bus, bus_next_dev_cmp, + CTX(it, bus_str, cls_str)))) { + if (it->device != NULL) { + it->bus = bus; + goto out; + } + if (it->bus_str != NULL || + rte_errno != 0) + break; + } + if (rte_errno == 0) + rte_errno = old_errno; +out: + free(bus_str); + free(cls_str); + return it->device; +} diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c index b0434158..dac2402a 100644 --- a/lib/librte_eal/common/eal_common_devargs.c +++ b/lib/librte_eal/common/eal_common_devargs.c @@ -13,9 +13,14 @@ #include <string.h> #include <stdarg.h> +#include <rte_bus.h> +#include <rte_class.h> #include <rte_compat.h> #include <rte_dev.h> #include <rte_devargs.h> +#include <rte_errno.h> +#include <rte_kvargs.h> +#include <rte_log.h> #include <rte_tailq.h> #include "eal_private.h" @@ -56,30 +61,164 @@ rte_eal_parse_devargs_str(const char *devargs_str, return 0; } +static size_t +devargs_layer_count(const char *s) +{ + size_t i = s ? 1 : 0; + + while (s != NULL && s[0] != '\0') { + i += s[0] == '/'; + s++; + } + return i; +} + +int +rte_devargs_layers_parse(struct rte_devargs *devargs, + const char *devstr) +{ + struct { + const char *key; + const char *str; + struct rte_kvargs *kvlist; + } layers[] = { + { "bus=", NULL, NULL, }, + { "class=", NULL, NULL, }, + { "driver=", NULL, NULL, }, + }; + struct rte_kvargs_pair *kv = NULL; + struct rte_class *cls = NULL; + struct rte_bus *bus = NULL; + const char *s = devstr; + size_t nblayer; + size_t i = 0; + int ret = 0; + + /* Split each sub-lists. */ + nblayer = devargs_layer_count(devstr); + if (nblayer > RTE_DIM(layers)) { + RTE_LOG(ERR, EAL, "Invalid format: too many layers (%zu)\n", + nblayer); + ret = -E2BIG; + goto get_out; + } + + /* If the devargs points the devstr + * as source data, then it should not allocate + * anything and keep referring only to it. + */ + if (devargs->data != devstr) { + devargs->data = strdup(devstr); + if (devargs->data == NULL) { + RTE_LOG(ERR, EAL, "OOM\n"); + ret = -ENOMEM; + goto get_out; + } + s = devargs->data; + } + + while (s != NULL) { + if (i >= RTE_DIM(layers)) { + RTE_LOG(ERR, EAL, "Unrecognized layer %s\n", s); + ret = -EINVAL; + goto get_out; + } + /* + * The last layer is free-form. + * The "driver" key is not required (but accepted). + */ + if (strncmp(layers[i].key, s, strlen(layers[i].key)) && + i != RTE_DIM(layers) - 1) + goto next_layer; + layers[i].str = s; + layers[i].kvlist = rte_kvargs_parse_delim(s, NULL, "/"); + if (layers[i].kvlist == NULL) { + RTE_LOG(ERR, EAL, "Could not parse %s\n", s); + ret = -EINVAL; + goto get_out; + } + s = strchr(s, '/'); + if (s != NULL) + s++; +next_layer: + i++; + } + + /* Parse each sub-list. */ + for (i = 0; i < RTE_DIM(layers); i++) { + if (layers[i].kvlist == NULL) + continue; + kv = &layers[i].kvlist->pairs[0]; + if (strcmp(kv->key, "bus") == 0) { + bus = rte_bus_find_by_name(kv->value); + if (bus == NULL) { + RTE_LOG(ERR, EAL, "Could not find bus \"%s\"\n", + kv->value); + ret = -EFAULT; + goto get_out; + } + } else if (strcmp(kv->key, "class") == 0) { + cls = rte_class_find_by_name(kv->value); + if (cls == NULL) { + RTE_LOG(ERR, EAL, "Could not find class \"%s\"\n", + kv->value); + ret = -EFAULT; + goto get_out; + } + } else if (strcmp(kv->key, "driver") == 0) { + /* Ignore */ + continue; + } + } + + /* Fill devargs fields. */ + devargs->bus_str = layers[0].str; + devargs->cls_str = layers[1].str; + devargs->drv_str = layers[2].str; + devargs->bus = bus; + devargs->cls = cls; + + /* If we own the data, clean up a bit + * the several layers string, to ease + * their parsing afterward. + */ + if (devargs->data != devstr) { + char *s = (void *)(intptr_t)(devargs->data); + + while ((s = strchr(s, '/'))) { + *s = '\0'; + s++; + } + } + +get_out: + for (i = 0; i < RTE_DIM(layers); i++) { + if (layers[i].kvlist) + rte_kvargs_free(layers[i].kvlist); + } + if (ret != 0) + rte_errno = -ret; + return ret; +} + static int bus_name_cmp(const struct rte_bus *bus, const void *name) { return strncmp(bus->name, name, strlen(bus->name)); } -int __rte_experimental -rte_devargs_parse(struct rte_devargs *da, const char *format, ...) +__rte_experimental +int +rte_devargs_parse(struct rte_devargs *da, const char *dev) { struct rte_bus *bus = NULL; - va_list ap; - va_start(ap, format); - char dev[vsnprintf(NULL, 0, format, ap) + 1]; const char *devname; const size_t maxlen = sizeof(da->name); size_t i; - va_end(ap); if (da == NULL) return -EINVAL; - va_start(ap, format); - vsnprintf(dev, sizeof(dev), format, ap); - va_end(ap); /* Retrieve eventual bus info */ do { devname = dev; @@ -96,7 +235,7 @@ rte_devargs_parse(struct rte_devargs *da, const char *format, ...) da->name[i] = devname[i]; i++; if (i == maxlen) { - fprintf(stderr, "WARNING: Parsing \"%s\": device name should be shorter than %zu\n", + RTE_LOG(WARNING, EAL, "Parsing \"%s\": device name should be shorter than %zu\n", dev, maxlen); da->name[i - 1] = '\0'; return -EINVAL; @@ -106,7 +245,7 @@ rte_devargs_parse(struct rte_devargs *da, const char *format, ...) if (bus == NULL) { bus = rte_bus_find_by_device_name(da->name); if (bus == NULL) { - fprintf(stderr, "ERROR: failed to parse device \"%s\"\n", + RTE_LOG(ERR, EAL, "failed to parse device \"%s\"\n", da->name); return -EFAULT; } @@ -118,12 +257,40 @@ rte_devargs_parse(struct rte_devargs *da, const char *format, ...) else da->args = strdup(""); if (da->args == NULL) { - fprintf(stderr, "ERROR: not enough memory to parse arguments\n"); + RTE_LOG(ERR, EAL, "not enough memory to parse arguments\n"); return -ENOMEM; } return 0; } +__rte_experimental +int +rte_devargs_parsef(struct rte_devargs *da, const char *format, ...) +{ + va_list ap; + size_t len; + char *dev; + + if (da == NULL) + return -EINVAL; + + va_start(ap, format); + len = vsnprintf(NULL, 0, format, ap); + va_end(ap); + + dev = calloc(1, len + 1); + if (dev == NULL) { + RTE_LOG(ERR, EAL, "not enough memory to parse device\n"); + return -ENOMEM; + } + + va_start(ap, format); + vsnprintf(dev, len + 1, format, ap); + va_end(ap); + + return rte_devargs_parse(da, dev); +} + int __rte_experimental rte_devargs_insert(struct rte_devargs *da) { @@ -150,7 +317,7 @@ rte_devargs_add(enum rte_devtype devtype, const char *devargs_str) if (devargs == NULL) goto fail; - if (rte_devargs_parse(devargs, "%s", dev)) + if (rte_devargs_parse(devargs, dev)) goto fail; devargs->type = devtype; bus = devargs->bus; diff --git a/lib/librte_eal/common/eal_common_fbarray.c b/lib/librte_eal/common/eal_common_fbarray.c index 019f84c1..43caf3ce 100644 --- a/lib/librte_eal/common/eal_common_fbarray.c +++ b/lib/librte_eal/common/eal_common_fbarray.c @@ -231,7 +231,7 @@ find_next_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n, return MASK_GET_IDX(msk_idx, run_start); } /* we didn't find anything */ - rte_errno = used ? -ENOENT : -ENOSPC; + rte_errno = used ? ENOENT : ENOSPC; return -1; } @@ -287,7 +287,7 @@ find_next(const struct rte_fbarray *arr, unsigned int start, bool used) return MASK_GET_IDX(idx, found); } /* we didn't find anything */ - rte_errno = used ? -ENOENT : -ENOSPC; + rte_errno = used ? ENOENT : ENOSPC; return -1; } @@ -353,6 +353,277 @@ find_contig(const struct rte_fbarray *arr, unsigned int start, bool used) } static int +find_prev_n(const struct rte_fbarray *arr, unsigned int start, unsigned int n, + bool used) +{ + const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, + arr->len); + unsigned int msk_idx, lookbehind_idx, first, first_mod; + uint64_t ignore_msk; + + /* + * mask only has granularity of MASK_ALIGN, but start may not be aligned + * on that boundary, so construct a special mask to exclude anything we + * don't want to see to avoid confusing ctz. + */ + first = MASK_LEN_TO_IDX(start); + first_mod = MASK_LEN_TO_MOD(start); + /* we're going backwards, so mask must start from the top */ + ignore_msk = first_mod == MASK_ALIGN - 1 ? + -1ULL : /* prevent overflow */ + ~(-1ULL << (first_mod + 1)); + + /* go backwards, include zero */ + msk_idx = first; + do { + uint64_t cur_msk, lookbehind_msk; + unsigned int run_start, run_end, ctz, left; + bool found = false; + /* + * The process of getting n consecutive bits from the top for + * arbitrary n is a bit involved, but here it is in a nutshell: + * + * 1. let n be the number of consecutive bits we're looking for + * 2. check if n can fit in one mask, and if so, do n-1 + * lshift-ands to see if there is an appropriate run inside + * our current mask + * 2a. if we found a run, bail out early + * 2b. if we didn't find a run, proceed + * 3. invert the mask and count trailing zeroes (that is, count + * how many consecutive set bits we had starting from the + * start of current mask) as k + * 3a. if k is 0, continue to next mask + * 3b. if k is not 0, we have a potential run + * 4. to satisfy our requirements, next mask must have n-k + * consecutive set bits at the end, so we will do (n-k-1) + * lshift-ands and check if last bit is set. + * + * Step 4 will need to be repeated if (n-k) > MASK_ALIGN until + * we either run out of masks, lose the run, or find what we + * were looking for. + */ + cur_msk = msk->data[msk_idx]; + left = n; + + /* if we're looking for free spaces, invert the mask */ + if (!used) + cur_msk = ~cur_msk; + + /* if we have an ignore mask, ignore once */ + if (ignore_msk) { + cur_msk &= ignore_msk; + ignore_msk = 0; + } + + /* if n can fit in within a single mask, do a search */ + if (n <= MASK_ALIGN) { + uint64_t tmp_msk = cur_msk; + unsigned int s_idx; + for (s_idx = 0; s_idx < n - 1; s_idx++) + tmp_msk &= tmp_msk << 1ULL; + /* we found what we were looking for */ + if (tmp_msk != 0) { + /* clz will give us offset from end of mask, and + * we only get the end of our run, not start, + * so adjust result to point to where start + * would have been. + */ + run_start = MASK_ALIGN - + __builtin_clzll(tmp_msk) - n; + return MASK_GET_IDX(msk_idx, run_start); + } + } + + /* + * we didn't find our run within the mask, or n > MASK_ALIGN, + * so we're going for plan B. + */ + + /* count trailing zeroes on inverted mask */ + if (~cur_msk == 0) + ctz = sizeof(cur_msk) * 8; + else + ctz = __builtin_ctzll(~cur_msk); + + /* if there aren't any runs at the start either, just + * continue + */ + if (ctz == 0) + continue; + + /* we have a partial run at the start, so try looking behind */ + run_end = MASK_GET_IDX(msk_idx, ctz); + left -= ctz; + + /* go backwards, include zero */ + lookbehind_idx = msk_idx - 1; + + /* we can't lookbehind as we've run out of masks, so stop */ + if (msk_idx == 0) + break; + + do { + const uint64_t last_bit = 1ULL << (MASK_ALIGN - 1); + unsigned int s_idx, need; + + lookbehind_msk = msk->data[lookbehind_idx]; + + /* if we're looking for free space, invert the mask */ + if (!used) + lookbehind_msk = ~lookbehind_msk; + + /* figure out how many consecutive bits we need here */ + need = RTE_MIN(left, MASK_ALIGN); + + for (s_idx = 0; s_idx < need - 1; s_idx++) + lookbehind_msk &= lookbehind_msk << 1ULL; + + /* if last bit is not set, we've lost the run */ + if ((lookbehind_msk & last_bit) == 0) { + /* + * we've scanned this far, so we know there are + * no runs in the space we've lookbehind-scanned + * as well, so skip that on next iteration. + */ + ignore_msk = -1ULL << need; + msk_idx = lookbehind_idx; + break; + } + + left -= need; + + /* check if we've found what we were looking for */ + if (left == 0) { + found = true; + break; + } + } while ((lookbehind_idx--) != 0); /* decrement after check to + * include zero + */ + + /* we didn't find anything, so continue */ + if (!found) + continue; + + /* we've found what we were looking for, but we only know where + * the run ended, so calculate start position. + */ + return run_end - n; + } while (msk_idx-- != 0); /* decrement after check to include zero */ + /* we didn't find anything */ + rte_errno = used ? ENOENT : ENOSPC; + return -1; +} + +static int +find_prev(const struct rte_fbarray *arr, unsigned int start, bool used) +{ + const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, + arr->len); + unsigned int idx, first, first_mod; + uint64_t ignore_msk; + + /* + * mask only has granularity of MASK_ALIGN, but start may not be aligned + * on that boundary, so construct a special mask to exclude anything we + * don't want to see to avoid confusing clz. + */ + first = MASK_LEN_TO_IDX(start); + first_mod = MASK_LEN_TO_MOD(start); + /* we're going backwards, so mask must start from the top */ + ignore_msk = first_mod == MASK_ALIGN - 1 ? + -1ULL : /* prevent overflow */ + ~(-1ULL << (first_mod + 1)); + + /* go backwards, include zero */ + idx = first; + do { + uint64_t cur = msk->data[idx]; + int found; + + /* if we're looking for free entries, invert mask */ + if (!used) + cur = ~cur; + + /* ignore everything before start on first iteration */ + if (idx == first) + cur &= ignore_msk; + + /* check if we have any entries */ + if (cur == 0) + continue; + + /* + * find last set bit - that will correspond to whatever it is + * that we're looking for. we're counting trailing zeroes, thus + * the value we get is counted from end of mask, so calculate + * position from start of mask. + */ + found = MASK_ALIGN - __builtin_clzll(cur) - 1; + + return MASK_GET_IDX(idx, found); + } while (idx-- != 0); /* decrement after check to include zero*/ + + /* we didn't find anything */ + rte_errno = used ? ENOENT : ENOSPC; + return -1; +} + +static int +find_rev_contig(const struct rte_fbarray *arr, unsigned int start, bool used) +{ + const struct used_mask *msk = get_used_mask(arr->data, arr->elt_sz, + arr->len); + unsigned int idx, first, first_mod; + unsigned int need_len, result = 0; + + first = MASK_LEN_TO_IDX(start); + first_mod = MASK_LEN_TO_MOD(start); + + /* go backwards, include zero */ + idx = first; + do { + uint64_t cur = msk->data[idx]; + unsigned int run_len; + + need_len = MASK_ALIGN; + + /* if we're looking for free entries, invert mask */ + if (!used) + cur = ~cur; + + /* ignore everything after start on first iteration */ + if (idx == first) { + unsigned int end_len = MASK_ALIGN - first_mod - 1; + cur <<= end_len; + /* at the start, we don't need the full mask len */ + need_len -= end_len; + } + + /* we will be looking for zeroes, so invert the mask */ + cur = ~cur; + + /* if mask is zero, we have a complete run */ + if (cur == 0) + goto endloop; + + /* + * see where run ends, starting from the end. + */ + run_len = __builtin_clzll(cur); + + /* add however many zeroes we've had in the last run and quit */ + if (run_len < need_len) { + result += run_len; + break; + } +endloop: + result += need_len; + } while (idx-- != 0); /* decrement after check to include zero */ + return result; +} + +static int set_used(struct rte_fbarray *arr, unsigned int idx, bool used) { struct used_mask *msk; @@ -434,39 +705,52 @@ rte_fbarray_init(struct rte_fbarray *arr, const char *name, unsigned int len, if (data == NULL) goto fail; - eal_get_fbarray_path(path, sizeof(path), name); + if (internal_config.no_shconf) { + /* remap virtual area as writable */ + void *new_data = mmap(data, mmap_len, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (new_data == MAP_FAILED) { + RTE_LOG(DEBUG, EAL, "%s(): couldn't remap anonymous memory: %s\n", + __func__, strerror(errno)); + goto fail; + } + } else { + eal_get_fbarray_path(path, sizeof(path), name); - /* - * Each fbarray is unique to process namespace, i.e. the filename - * depends on process prefix. Try to take out a lock and see if we - * succeed. If we don't, someone else is using it already. - */ - fd = open(path, O_CREAT | O_RDWR, 0600); - if (fd < 0) { - RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n", __func__, - path, strerror(errno)); - rte_errno = errno; - goto fail; - } else if (flock(fd, LOCK_EX | LOCK_NB)) { - RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n", __func__, - path, strerror(errno)); - rte_errno = EBUSY; - goto fail; - } + /* + * Each fbarray is unique to process namespace, i.e. the + * filename depends on process prefix. Try to take out a lock + * and see if we succeed. If we don't, someone else is using it + * already. + */ + fd = open(path, O_CREAT | O_RDWR, 0600); + if (fd < 0) { + RTE_LOG(DEBUG, EAL, "%s(): couldn't open %s: %s\n", + __func__, path, strerror(errno)); + rte_errno = errno; + goto fail; + } else if (flock(fd, LOCK_EX | LOCK_NB)) { + RTE_LOG(DEBUG, EAL, "%s(): couldn't lock %s: %s\n", + __func__, path, strerror(errno)); + rte_errno = EBUSY; + goto fail; + } - /* take out a non-exclusive lock, so that other processes could still - * attach to it, but no other process could reinitialize it. - */ - if (flock(fd, LOCK_SH | LOCK_NB)) { - rte_errno = errno; - goto fail; - } + /* take out a non-exclusive lock, so that other processes could + * still attach to it, but no other process could reinitialize + * it. + */ + if (flock(fd, LOCK_SH | LOCK_NB)) { + rte_errno = errno; + goto fail; + } - if (resize_and_map(fd, data, mmap_len)) - goto fail; + if (resize_and_map(fd, data, mmap_len)) + goto fail; - /* we've mmap'ed the file, we can now close the fd */ - close(fd); + /* we've mmap'ed the file, we can now close the fd */ + close(fd); + } /* initialize the data */ memset(data, 0, mmap_len); @@ -675,8 +959,8 @@ rte_fbarray_is_used(struct rte_fbarray *arr, unsigned int idx) return ret; } -int __rte_experimental -rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start) +static int +fbarray_find(struct rte_fbarray *arr, unsigned int start, bool next, bool used) { int ret = -1; @@ -688,36 +972,106 @@ rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start) /* prevent array from changing under us */ rte_rwlock_read_lock(&arr->rwlock); - if (arr->len == arr->count) { - rte_errno = ENOSPC; - goto out; + /* cheap checks to prevent doing useless work */ + if (!used) { + if (arr->len == arr->count) { + rte_errno = ENOSPC; + goto out; + } + if (arr->count == 0) { + ret = start; + goto out; + } + } else { + if (arr->count == 0) { + rte_errno = ENOENT; + goto out; + } + if (arr->len == arr->count) { + ret = start; + goto out; + } } - - ret = find_next(arr, start, false); + if (next) + ret = find_next(arr, start, used); + else + ret = find_prev(arr, start, used); out: rte_rwlock_read_unlock(&arr->rwlock); return ret; } int __rte_experimental +rte_fbarray_find_next_free(struct rte_fbarray *arr, unsigned int start) +{ + return fbarray_find(arr, start, true, false); +} + +int __rte_experimental rte_fbarray_find_next_used(struct rte_fbarray *arr, unsigned int start) { + return fbarray_find(arr, start, true, true); +} + +int __rte_experimental +rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start) +{ + return fbarray_find(arr, start, false, false); +} + +int __rte_experimental +rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start) +{ + return fbarray_find(arr, start, false, true); +} + +static int +fbarray_find_n(struct rte_fbarray *arr, unsigned int start, unsigned int n, + bool next, bool used) +{ int ret = -1; - if (arr == NULL || start >= arr->len) { + if (arr == NULL || start >= arr->len || n > arr->len || n == 0) { rte_errno = EINVAL; return -1; } + if (next && (arr->len - start) < n) { + rte_errno = used ? ENOENT : ENOSPC; + return -1; + } + if (!next && start < (n - 1)) { + rte_errno = used ? ENOENT : ENOSPC; + return -1; + } /* prevent array from changing under us */ rte_rwlock_read_lock(&arr->rwlock); - if (arr->count == 0) { - rte_errno = ENOENT; - goto out; + /* cheap checks to prevent doing useless work */ + if (!used) { + if (arr->len == arr->count || arr->len - arr->count < n) { + rte_errno = ENOSPC; + goto out; + } + if (arr->count == 0) { + ret = next ? start : start - n + 1; + goto out; + } + } else { + if (arr->count < n) { + rte_errno = ENOENT; + goto out; + } + if (arr->count == arr->len) { + ret = next ? start : start - n + 1; + goto out; + } } - ret = find_next(arr, start, true); + if (next) + ret = find_next_n(arr, start, n, used); + else + ret = find_prev_n(arr, start, n, used); out: rte_rwlock_read_unlock(&arr->rwlock); return ret; @@ -727,54 +1081,33 @@ int __rte_experimental rte_fbarray_find_next_n_free(struct rte_fbarray *arr, unsigned int start, unsigned int n) { - int ret = -1; - - if (arr == NULL || start >= arr->len || n > arr->len) { - rte_errno = EINVAL; - return -1; - } - - /* prevent array from changing under us */ - rte_rwlock_read_lock(&arr->rwlock); - - if (arr->len == arr->count || arr->len - arr->count < n) { - rte_errno = ENOSPC; - goto out; - } - - ret = find_next_n(arr, start, n, false); -out: - rte_rwlock_read_unlock(&arr->rwlock); - return ret; + return fbarray_find_n(arr, start, n, true, false); } int __rte_experimental rte_fbarray_find_next_n_used(struct rte_fbarray *arr, unsigned int start, unsigned int n) { - int ret = -1; - - if (arr == NULL || start >= arr->len || n > arr->len) { - rte_errno = EINVAL; - return -1; - } - - /* prevent array from changing under us */ - rte_rwlock_read_lock(&arr->rwlock); - - if (arr->count < n) { - rte_errno = ENOENT; - goto out; - } + return fbarray_find_n(arr, start, n, true, true); +} - ret = find_next_n(arr, start, n, true); -out: - rte_rwlock_read_unlock(&arr->rwlock); - return ret; +int __rte_experimental +rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start, + unsigned int n) +{ + return fbarray_find_n(arr, start, n, false, false); } int __rte_experimental -rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start) +rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start, + unsigned int n) +{ + return fbarray_find_n(arr, start, n, false, true); +} + +static int +fbarray_find_contig(struct rte_fbarray *arr, unsigned int start, bool next, + bool used) { int ret = -1; @@ -786,39 +1119,66 @@ rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start) /* prevent array from changing under us */ rte_rwlock_read_lock(&arr->rwlock); - if (arr->len == arr->count) { - rte_errno = ENOSPC; - goto out; - } - - if (arr->count == 0) { - ret = arr->len - start; - goto out; + /* cheap checks to prevent doing useless work */ + if (used) { + if (arr->count == 0) { + ret = 0; + goto out; + } + if (next && arr->count == arr->len) { + ret = arr->len - start; + goto out; + } + if (!next && arr->count == arr->len) { + ret = start + 1; + goto out; + } + } else { + if (arr->len == arr->count) { + ret = 0; + goto out; + } + if (next && arr->count == 0) { + ret = arr->len - start; + goto out; + } + if (!next && arr->count == 0) { + ret = start + 1; + goto out; + } } - ret = find_contig(arr, start, false); + if (next) + ret = find_contig(arr, start, used); + else + ret = find_rev_contig(arr, start, used); out: rte_rwlock_read_unlock(&arr->rwlock); return ret; } int __rte_experimental -rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start) +rte_fbarray_find_contig_free(struct rte_fbarray *arr, unsigned int start) { - int ret = -1; - - if (arr == NULL || start >= arr->len) { - rte_errno = EINVAL; - return -1; - } + return fbarray_find_contig(arr, start, true, false); +} - /* prevent array from changing under us */ - rte_rwlock_read_lock(&arr->rwlock); +int __rte_experimental +rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start) +{ + return fbarray_find_contig(arr, start, true, true); +} - ret = find_contig(arr, start, true); +int __rte_experimental +rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, unsigned int start) +{ + return fbarray_find_contig(arr, start, false, false); +} - rte_rwlock_read_unlock(&arr->rwlock); - return ret; +int __rte_experimental +rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start) +{ + return fbarray_find_contig(arr, start, false, true); } int __rte_experimental diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c index 81811894..c714a4bd 100644 --- a/lib/librte_eal/common/eal_common_log.c +++ b/lib/librte_eal/common/eal_common_log.c @@ -335,9 +335,7 @@ static const struct logtype logtype_strings[] = { }; /* Logging should be first initializer (before drivers and bus) */ -RTE_INIT_PRIO(rte_log_init, LOG); -static void -rte_log_init(void) +RTE_INIT_PRIO(rte_log_init, LOG) { uint32_t i; diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c index 4f0688f9..fbfb1b05 100644 --- a/lib/librte_eal/common/eal_common_memory.c +++ b/lib/librte_eal/common/eal_common_memory.c @@ -34,7 +34,7 @@ #define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i" -static uint64_t baseaddr_offset; +static void *next_baseaddr; static uint64_t system_page_sz; void * @@ -56,21 +56,27 @@ eal_get_virtual_area(void *requested_addr, size_t *size, allow_shrink = (flags & EAL_VIRTUAL_AREA_ALLOW_SHRINK) > 0; unmap = (flags & EAL_VIRTUAL_AREA_UNMAP) > 0; - if (requested_addr == NULL && internal_config.base_virtaddr != 0) { - requested_addr = (void *) (internal_config.base_virtaddr + - (size_t)baseaddr_offset); + if (next_baseaddr == NULL && internal_config.base_virtaddr != 0 && + rte_eal_process_type() == RTE_PROC_PRIMARY) + next_baseaddr = (void *) internal_config.base_virtaddr; + + if (requested_addr == NULL && next_baseaddr != NULL) { + requested_addr = next_baseaddr; requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz); addr_is_hint = true; } - /* if requested address is not aligned by page size, or if requested - * address is NULL, add page size to requested length as we may get an - * address that's aligned by system page size, which can be smaller than - * our requested page size. additionally, we shouldn't try to align if - * system page size is the same as requested page size. + /* we don't need alignment of resulting pointer in the following cases: + * + * 1. page size is equal to system size + * 2. we have a requested address, and it is page-aligned, and we will + * be discarding the address if we get a different one. + * + * for all other cases, alignment is potentially necessary. */ no_align = (requested_addr != NULL && - ((uintptr_t)requested_addr & (page_sz - 1)) == 0) || + requested_addr == RTE_PTR_ALIGN(requested_addr, page_sz) && + !addr_is_hint) || page_sz == system_page_sz; do { @@ -116,6 +122,8 @@ eal_get_virtual_area(void *requested_addr, size_t *size, RTE_LOG(WARNING, EAL, "WARNING! Base virtual address hint (%p != %p) not respected!\n", requested_addr, aligned_addr); RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory into secondary processes\n"); + } else if (next_baseaddr != NULL) { + next_baseaddr = RTE_PTR_ADD(aligned_addr, *size); } RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", @@ -148,387 +156,9 @@ eal_get_virtual_area(void *requested_addr, size_t *size, munmap(aligned_end, after_len); } - baseaddr_offset += *size; - return aligned_addr; } -static uint64_t -get_mem_amount(uint64_t page_sz, uint64_t max_mem) -{ - uint64_t area_sz, max_pages; - - /* limit to RTE_MAX_MEMSEG_PER_LIST pages or RTE_MAX_MEM_MB_PER_LIST */ - max_pages = RTE_MAX_MEMSEG_PER_LIST; - max_mem = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20, max_mem); - - area_sz = RTE_MIN(page_sz * max_pages, max_mem); - - /* make sure the list isn't smaller than the page size */ - area_sz = RTE_MAX(area_sz, page_sz); - - return RTE_ALIGN(area_sz, page_sz); -} - -static int -free_memseg_list(struct rte_memseg_list *msl) -{ - if (rte_fbarray_destroy(&msl->memseg_arr)) { - RTE_LOG(ERR, EAL, "Cannot destroy memseg list\n"); - return -1; - } - memset(msl, 0, sizeof(*msl)); - return 0; -} - -static int -alloc_memseg_list(struct rte_memseg_list *msl, uint64_t page_sz, - uint64_t max_mem, int socket_id, int type_msl_idx) -{ - char name[RTE_FBARRAY_NAME_LEN]; - uint64_t mem_amount; - int max_segs; - - mem_amount = get_mem_amount(page_sz, max_mem); - max_segs = mem_amount / page_sz; - - snprintf(name, sizeof(name), MEMSEG_LIST_FMT, page_sz >> 10, socket_id, - type_msl_idx); - if (rte_fbarray_init(&msl->memseg_arr, name, max_segs, - sizeof(struct rte_memseg))) { - RTE_LOG(ERR, EAL, "Cannot allocate memseg list: %s\n", - rte_strerror(rte_errno)); - return -1; - } - - msl->page_sz = page_sz; - msl->socket_id = socket_id; - msl->base_va = NULL; - - RTE_LOG(DEBUG, EAL, "Memseg list allocated: 0x%zxkB at socket %i\n", - (size_t)page_sz >> 10, socket_id); - - return 0; -} - -static int -alloc_va_space(struct rte_memseg_list *msl) -{ - uint64_t page_sz; - size_t mem_sz; - void *addr; - int flags = 0; - -#ifdef RTE_ARCH_PPC_64 - flags |= MAP_HUGETLB; -#endif - - page_sz = msl->page_sz; - mem_sz = page_sz * msl->memseg_arr.len; - - addr = eal_get_virtual_area(msl->base_va, &mem_sz, page_sz, 0, flags); - if (addr == NULL) { - if (rte_errno == EADDRNOTAVAIL) - RTE_LOG(ERR, EAL, "Could not mmap %llu bytes at [%p] - please use '--base-virtaddr' option\n", - (unsigned long long)mem_sz, msl->base_va); - else - RTE_LOG(ERR, EAL, "Cannot reserve memory\n"); - return -1; - } - msl->base_va = addr; - - return 0; -} - -static int __rte_unused -memseg_primary_init_32(void) -{ - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - int active_sockets, hpi_idx, msl_idx = 0; - unsigned int socket_id, i; - struct rte_memseg_list *msl; - uint64_t extra_mem_per_socket, total_extra_mem, total_requested_mem; - uint64_t max_mem; - - /* no-huge does not need this at all */ - if (internal_config.no_hugetlbfs) - return 0; - - /* this is a giant hack, but desperate times call for desperate - * measures. in legacy 32-bit mode, we cannot preallocate VA space, - * because having upwards of 2 gigabytes of VA space already mapped will - * interfere with our ability to map and sort hugepages. - * - * therefore, in legacy 32-bit mode, we will be initializing memseg - * lists much later - in eal_memory.c, right after we unmap all the - * unneeded pages. this will not affect secondary processes, as those - * should be able to mmap the space without (too many) problems. - */ - if (internal_config.legacy_mem) - return 0; - - /* 32-bit mode is a very special case. we cannot know in advance where - * the user will want to allocate their memory, so we have to do some - * heuristics. - */ - active_sockets = 0; - total_requested_mem = 0; - if (internal_config.force_sockets) - for (i = 0; i < rte_socket_count(); i++) { - uint64_t mem; - - socket_id = rte_socket_id_by_idx(i); - mem = internal_config.socket_mem[socket_id]; - - if (mem == 0) - continue; - - active_sockets++; - total_requested_mem += mem; - } - else - total_requested_mem = internal_config.memory; - - max_mem = (uint64_t)RTE_MAX_MEM_MB << 20; - if (total_requested_mem > max_mem) { - RTE_LOG(ERR, EAL, "Invalid parameters: 32-bit process can at most use %uM of memory\n", - (unsigned int)(max_mem >> 20)); - return -1; - } - total_extra_mem = max_mem - total_requested_mem; - extra_mem_per_socket = active_sockets == 0 ? total_extra_mem : - total_extra_mem / active_sockets; - - /* the allocation logic is a little bit convoluted, but here's how it - * works, in a nutshell: - * - if user hasn't specified on which sockets to allocate memory via - * --socket-mem, we allocate all of our memory on master core socket. - * - if user has specified sockets to allocate memory on, there may be - * some "unused" memory left (e.g. if user has specified --socket-mem - * such that not all memory adds up to 2 gigabytes), so add it to all - * sockets that are in use equally. - * - * page sizes are sorted by size in descending order, so we can safely - * assume that we dispense with bigger page sizes first. - */ - - /* create memseg lists */ - for (i = 0; i < rte_socket_count(); i++) { - int hp_sizes = (int) internal_config.num_hugepage_sizes; - uint64_t max_socket_mem, cur_socket_mem; - unsigned int master_lcore_socket; - struct rte_config *cfg = rte_eal_get_configuration(); - bool skip; - - socket_id = rte_socket_id_by_idx(i); - -#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES - if (socket_id > 0) - break; -#endif - - /* if we didn't specifically request memory on this socket */ - skip = active_sockets != 0 && - internal_config.socket_mem[socket_id] == 0; - /* ...or if we didn't specifically request memory on *any* - * socket, and this is not master lcore - */ - master_lcore_socket = rte_lcore_to_socket_id(cfg->master_lcore); - skip |= active_sockets == 0 && socket_id != master_lcore_socket; - - if (skip) { - RTE_LOG(DEBUG, EAL, "Will not preallocate memory on socket %u\n", - socket_id); - continue; - } - - /* max amount of memory on this socket */ - max_socket_mem = (active_sockets != 0 ? - internal_config.socket_mem[socket_id] : - internal_config.memory) + - extra_mem_per_socket; - cur_socket_mem = 0; - - for (hpi_idx = 0; hpi_idx < hp_sizes; hpi_idx++) { - uint64_t max_pagesz_mem, cur_pagesz_mem = 0; - uint64_t hugepage_sz; - struct hugepage_info *hpi; - int type_msl_idx, max_segs, total_segs = 0; - - hpi = &internal_config.hugepage_info[hpi_idx]; - hugepage_sz = hpi->hugepage_sz; - - /* check if pages are actually available */ - if (hpi->num_pages[socket_id] == 0) - continue; - - max_segs = RTE_MAX_MEMSEG_PER_TYPE; - max_pagesz_mem = max_socket_mem - cur_socket_mem; - - /* make it multiple of page size */ - max_pagesz_mem = RTE_ALIGN_FLOOR(max_pagesz_mem, - hugepage_sz); - - RTE_LOG(DEBUG, EAL, "Attempting to preallocate " - "%" PRIu64 "M on socket %i\n", - max_pagesz_mem >> 20, socket_id); - - type_msl_idx = 0; - while (cur_pagesz_mem < max_pagesz_mem && - total_segs < max_segs) { - if (msl_idx >= RTE_MAX_MEMSEG_LISTS) { - RTE_LOG(ERR, EAL, - "No more space in memseg lists, please increase %s\n", - RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS)); - return -1; - } - - msl = &mcfg->memsegs[msl_idx]; - - if (alloc_memseg_list(msl, hugepage_sz, - max_pagesz_mem, socket_id, - type_msl_idx)) { - /* failing to allocate a memseg list is - * a serious error. - */ - RTE_LOG(ERR, EAL, "Cannot allocate memseg list\n"); - return -1; - } - - if (alloc_va_space(msl)) { - /* if we couldn't allocate VA space, we - * can try with smaller page sizes. - */ - RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list, retrying with different page size\n"); - /* deallocate memseg list */ - if (free_memseg_list(msl)) - return -1; - break; - } - - total_segs += msl->memseg_arr.len; - cur_pagesz_mem = total_segs * hugepage_sz; - type_msl_idx++; - msl_idx++; - } - cur_socket_mem += cur_pagesz_mem; - } - if (cur_socket_mem == 0) { - RTE_LOG(ERR, EAL, "Cannot allocate VA space on socket %u\n", - socket_id); - return -1; - } - } - - return 0; -} - -static int __rte_unused -memseg_primary_init(void) -{ - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - int i, socket_id, hpi_idx, msl_idx = 0; - struct rte_memseg_list *msl; - uint64_t max_mem, total_mem; - - /* no-huge does not need this at all */ - if (internal_config.no_hugetlbfs) - return 0; - - max_mem = (uint64_t)RTE_MAX_MEM_MB << 20; - total_mem = 0; - - /* create memseg lists */ - for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes; - hpi_idx++) { - struct hugepage_info *hpi; - uint64_t hugepage_sz; - - hpi = &internal_config.hugepage_info[hpi_idx]; - hugepage_sz = hpi->hugepage_sz; - - for (i = 0; i < (int) rte_socket_count(); i++) { - uint64_t max_type_mem, total_type_mem = 0; - int type_msl_idx, max_segs, total_segs = 0; - - socket_id = rte_socket_id_by_idx(i); - -#ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES - if (socket_id > 0) - break; -#endif - - if (total_mem >= max_mem) - break; - - max_type_mem = RTE_MIN(max_mem - total_mem, - (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20); - max_segs = RTE_MAX_MEMSEG_PER_TYPE; - - type_msl_idx = 0; - while (total_type_mem < max_type_mem && - total_segs < max_segs) { - uint64_t cur_max_mem; - if (msl_idx >= RTE_MAX_MEMSEG_LISTS) { - RTE_LOG(ERR, EAL, - "No more space in memseg lists, please increase %s\n", - RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS)); - return -1; - } - - msl = &mcfg->memsegs[msl_idx++]; - - cur_max_mem = max_type_mem - total_type_mem; - if (alloc_memseg_list(msl, hugepage_sz, - cur_max_mem, socket_id, - type_msl_idx)) - return -1; - - total_segs += msl->memseg_arr.len; - total_type_mem = total_segs * hugepage_sz; - type_msl_idx++; - - if (alloc_va_space(msl)) { - RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n"); - return -1; - } - } - total_mem += total_type_mem; - } - } - return 0; -} - -static int -memseg_secondary_init(void) -{ - struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - int msl_idx = 0; - struct rte_memseg_list *msl; - - for (msl_idx = 0; msl_idx < RTE_MAX_MEMSEG_LISTS; msl_idx++) { - - msl = &mcfg->memsegs[msl_idx]; - - /* skip empty memseg lists */ - if (msl->memseg_arr.len == 0) - continue; - - if (rte_fbarray_attach(&msl->memseg_arr)) { - RTE_LOG(ERR, EAL, "Cannot attach to primary process memseg lists\n"); - return -1; - } - - /* preallocate VA space */ - if (alloc_va_space(msl)) { - RTE_LOG(ERR, EAL, "Cannot preallocate VA space for hugepage memory\n"); - return -1; - } - } - - return 0; -} - static struct rte_memseg * virt2memseg(const void *addr, const struct rte_memseg_list *msl) { @@ -536,6 +166,9 @@ virt2memseg(const void *addr, const struct rte_memseg_list *msl) void *start, *end; int ms_idx; + if (msl == NULL) + return NULL; + /* a memseg list was specified, check if it's the right one */ start = msl->base_va; end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len); @@ -788,14 +421,11 @@ rte_mem_lock_page(const void *virt) } int __rte_experimental -rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg) +rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; int i, ms_idx, ret = 0; - /* do not allow allocations/frees/init while we iterate */ - rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); - for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { struct rte_memseg_list *msl = &mcfg->memsegs[i]; const struct rte_memseg *ms; @@ -820,30 +450,34 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg) len = n_segs * msl->page_sz; ret = func(msl, ms, len, arg); - if (ret < 0) { - ret = -1; - goto out; - } else if (ret > 0) { - ret = 1; - goto out; - } + if (ret) + return ret; ms_idx = rte_fbarray_find_next_used(arr, ms_idx + n_segs); } } -out: - rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); - return ret; + return 0; } int __rte_experimental -rte_memseg_walk(rte_memseg_walk_t func, void *arg) +rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - int i, ms_idx, ret = 0; + int ret = 0; /* do not allow allocations/frees/init while we iterate */ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + ret = rte_memseg_contig_walk_thread_unsafe(func, arg); + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + + return ret; +} + +int __rte_experimental +rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int i, ms_idx, ret = 0; for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { struct rte_memseg_list *msl = &mcfg->memsegs[i]; @@ -859,29 +493,33 @@ rte_memseg_walk(rte_memseg_walk_t func, void *arg) while (ms_idx >= 0) { ms = rte_fbarray_get(arr, ms_idx); ret = func(msl, ms, arg); - if (ret < 0) { - ret = -1; - goto out; - } else if (ret > 0) { - ret = 1; - goto out; - } + if (ret) + return ret; ms_idx = rte_fbarray_find_next_used(arr, ms_idx + 1); } } -out: - rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); - return ret; + return 0; } int __rte_experimental -rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg) +rte_memseg_walk(rte_memseg_walk_t func, void *arg) { struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; - int i, ret = 0; + int ret = 0; /* do not allow allocations/frees/init while we iterate */ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + ret = rte_memseg_walk_thread_unsafe(func, arg); + rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + + return ret; +} + +int __rte_experimental +rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int i, ret = 0; for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) { struct rte_memseg_list *msl = &mcfg->memsegs[i]; @@ -890,17 +528,23 @@ rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg) continue; ret = func(msl, arg); - if (ret < 0) { - ret = -1; - goto out; - } - if (ret > 0) { - ret = 1; - goto out; - } + if (ret) + return ret; } -out: + return 0; +} + +int __rte_experimental +rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int ret = 0; + + /* do not allow allocations/frees/init while we iterate */ + rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); + ret = rte_memseg_list_walk_thread_unsafe(func, arg); rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock); + return ret; } @@ -918,15 +562,7 @@ rte_eal_memory_init(void) /* lock mem hotplug here, to prevent races while we init */ rte_rwlock_read_lock(&mcfg->memory_hotplug_lock); - retval = rte_eal_process_type() == RTE_PROC_PRIMARY ? -#ifndef RTE_ARCH_64 - memseg_primary_init_32() : -#else - memseg_primary_init() : -#endif - memseg_secondary_init(); - - if (retval < 0) + if (rte_eal_memseg_init() < 0) goto fail; if (eal_memalloc_init() < 0) diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c index faa3b061..7300fe05 100644 --- a/lib/librte_eal/common/eal_common_memzone.c +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -52,38 +52,6 @@ memzone_lookup_thread_unsafe(const char *name) return NULL; } - -/* This function will return the greatest free block if a heap has been - * specified. If no heap has been specified, it will return the heap and - * length of the greatest free block available in all heaps */ -static size_t -find_heap_max_free_elem(int *s, unsigned align) -{ - struct rte_mem_config *mcfg; - struct rte_malloc_socket_stats stats; - int i, socket = *s; - size_t len = 0; - - /* get pointer to global configuration */ - mcfg = rte_eal_get_configuration()->mem_config; - - for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { - if ((socket != SOCKET_ID_ANY) && (socket != i)) - continue; - - malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats); - if (stats.greatest_free_size > len) { - len = stats.greatest_free_size; - *s = i; - } - } - - if (len < MALLOC_ELEM_OVERHEAD + align) - return 0; - - return len - MALLOC_ELEM_OVERHEAD - align; -} - static const struct rte_memzone * memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, int socket_id, unsigned int flags, unsigned int align, @@ -92,6 +60,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, struct rte_memzone *mz; struct rte_mem_config *mcfg; struct rte_fbarray *arr; + void *mz_addr; size_t requested_len; int mz_idx; bool contig; @@ -140,8 +109,7 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, return NULL; } - len += RTE_CACHE_LINE_MASK; - len &= ~((size_t) RTE_CACHE_LINE_MASK); + len = RTE_ALIGN_CEIL(len, RTE_CACHE_LINE_SIZE); /* save minimal requested length */ requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len); @@ -165,27 +133,18 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, /* malloc only cares about size flags, remove contig flag from flags */ flags &= ~RTE_MEMZONE_IOVA_CONTIG; - if (len == 0) { - /* len == 0 is only allowed for non-contiguous zones */ - if (contig) { - RTE_LOG(DEBUG, EAL, "Reserving zero-length contiguous memzones is not supported\n"); - rte_errno = EINVAL; - return NULL; - } - if (bound != 0) + if (len == 0 && bound == 0) { + /* no size constraints were placed, so use malloc elem len */ + requested_len = 0; + mz_addr = malloc_heap_alloc_biggest(NULL, socket_id, flags, + align, contig); + } else { + if (len == 0) requested_len = bound; - else { - requested_len = find_heap_max_free_elem(&socket_id, align); - if (requested_len == 0) { - rte_errno = ENOMEM; - return NULL; - } - } + /* allocate memory on heap */ + mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id, + flags, align, bound, contig); } - - /* allocate memory on heap */ - void *mz_addr = malloc_heap_alloc(NULL, requested_len, socket_id, flags, - align, bound, contig); if (mz_addr == NULL) { rte_errno = ENOMEM; return NULL; @@ -213,8 +172,9 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, snprintf(mz->name, sizeof(mz->name), "%s", name); mz->iova = rte_malloc_virt2iova(mz_addr); mz->addr = mz_addr; - mz->len = (requested_len == 0 ? - (elem->size - MALLOC_ELEM_OVERHEAD) : requested_len); + mz->len = requested_len == 0 ? + elem->size - elem->pad - MALLOC_ELEM_OVERHEAD : + requested_len; mz->hugepage_sz = elem->msl->page_sz; mz->socket_id = elem->msl->socket_id; mz->flags = 0; diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c index ecebb292..dd5f9740 100644 --- a/lib/librte_eal/common/eal_common_options.c +++ b/lib/librte_eal/common/eal_common_options.c @@ -66,10 +66,12 @@ eal_long_options[] = { {OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM }, {OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM }, {OPT_NO_SHCONF, 0, NULL, OPT_NO_SHCONF_NUM }, + {OPT_IN_MEMORY, 0, NULL, OPT_IN_MEMORY_NUM }, {OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM }, {OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM }, {OPT_PROC_TYPE, 1, NULL, OPT_PROC_TYPE_NUM }, {OPT_SOCKET_MEM, 1, NULL, OPT_SOCKET_MEM_NUM }, + {OPT_SOCKET_LIMIT, 1, NULL, OPT_SOCKET_LIMIT_NUM }, {OPT_SYSLOG, 1, NULL, OPT_SYSLOG_NUM }, {OPT_VDEV, 1, NULL, OPT_VDEV_NUM }, {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM }, @@ -179,6 +181,10 @@ eal_reset_internal_config(struct internal_config *internal_cfg) /* zero out the NUMA config */ for (i = 0; i < RTE_MAX_NUMA_NODES; i++) internal_cfg->socket_mem[i] = 0; + internal_cfg->force_socket_limits = 0; + /* zero out the NUMA limits config */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + internal_cfg->socket_limit[i] = 0; /* zero out hugedir descriptors */ for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) { memset(&internal_cfg->hugepage_info[i], 0, @@ -315,6 +321,7 @@ eal_parse_service_coremask(const char *coremask) unsigned int count = 0; char c; int val; + uint32_t taken_lcore_count = 0; if (coremask == NULL) return -1; @@ -348,7 +355,7 @@ eal_parse_service_coremask(const char *coremask) if (master_lcore_parsed && cfg->master_lcore == lcore) { RTE_LOG(ERR, EAL, - "Error: lcore %u is master lcore, cannot use as service core\n", + "lcore %u is master lcore, cannot use as service core\n", idx); return -1; } @@ -358,6 +365,10 @@ eal_parse_service_coremask(const char *coremask) "lcore %u unavailable\n", idx); return -1; } + + if (cfg->lcore_role[idx] == ROLE_RTE) + taken_lcore_count++; + lcore_config[idx].core_role = ROLE_SERVICE; count++; } @@ -374,11 +385,28 @@ eal_parse_service_coremask(const char *coremask) if (count == 0) return -1; + if (core_parsed && taken_lcore_count != count) { + RTE_LOG(WARNING, EAL, + "Not all service cores are in the coremask. " + "Please ensure -c or -l includes service cores\n"); + } + cfg->service_lcore_count = count; return 0; } static int +eal_service_cores_parsed(void) +{ + int idx; + for (idx = 0; idx < RTE_MAX_LCORE; idx++) { + if (lcore_config[idx].core_role == ROLE_SERVICE) + return 1; + } + return 0; +} + +static int eal_parse_coremask(const char *coremask) { struct rte_config *cfg = rte_eal_get_configuration(); @@ -387,6 +415,11 @@ eal_parse_coremask(const char *coremask) char c; int val; + if (eal_service_cores_parsed()) + RTE_LOG(WARNING, EAL, + "Service cores parsed before dataplane cores. " + "Please ensure -c is before -s or -S\n"); + if (coremask == NULL) return -1; /* Remove all blank characters ahead and after . @@ -418,6 +451,7 @@ eal_parse_coremask(const char *coremask) "unavailable\n", idx); return -1; } + cfg->lcore_role[idx] = ROLE_RTE; lcore_config[idx].core_index = count; count++; @@ -449,6 +483,7 @@ eal_parse_service_corelist(const char *corelist) unsigned count = 0; char *end = NULL; int min, max; + uint32_t taken_lcore_count = 0; if (corelist == NULL) return -1; @@ -490,6 +525,9 @@ eal_parse_service_corelist(const char *corelist) idx); return -1; } + if (cfg->lcore_role[idx] == ROLE_RTE) + taken_lcore_count++; + lcore_config[idx].core_role = ROLE_SERVICE; count++; @@ -504,6 +542,12 @@ eal_parse_service_corelist(const char *corelist) if (count == 0) return -1; + if (core_parsed && taken_lcore_count != count) { + RTE_LOG(WARNING, EAL, + "Not all service cores were in the coremask. " + "Please ensure -c or -l includes service cores\n"); + } + return 0; } @@ -516,6 +560,11 @@ eal_parse_corelist(const char *corelist) char *end = NULL; int min, max; + if (eal_service_cores_parsed()) + RTE_LOG(WARNING, EAL, + "Service cores parsed before dataplane cores. " + "Please ensure -l is before -s or -S\n"); + if (corelist == NULL) return -1; @@ -590,7 +639,8 @@ eal_parse_master_lcore(const char *arg) /* ensure master core is not used as service core */ if (lcore_config[cfg->master_lcore].core_role == ROLE_SERVICE) { - RTE_LOG(ERR, EAL, "Error: Master lcore is used as a service core.\n"); + RTE_LOG(ERR, EAL, + "Error: Master lcore is used as a service core\n"); return -1; } @@ -1165,6 +1215,13 @@ eal_parse_common_option(int opt, const char *optarg, conf->no_shconf = 1; break; + case OPT_IN_MEMORY_NUM: + conf->in_memory = 1; + /* in-memory is a superset of noshconf and huge-unlink */ + conf->no_shconf = 1; + conf->hugepage_unlink = 1; + break; + case OPT_PROC_TYPE_NUM: conf->process_type = eal_parse_proc_type(optarg); break; @@ -1316,12 +1373,23 @@ eal_check_common_options(struct internal_config *internal_cfg) "be specified together with --"OPT_NO_HUGE"\n"); return -1; } - - if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) { + if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink && + !internal_cfg->in_memory) { RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot " "be specified together with --"OPT_NO_HUGE"\n"); return -1; } + if (internal_config.force_socket_limits && internal_config.legacy_mem) { + RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_LIMIT + " is only supported in non-legacy memory mode\n"); + } + if (internal_cfg->single_file_segments && + internal_cfg->hugepage_unlink) { + RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE_SEGMENTS" is " + "not compatible with neither --"OPT_IN_MEMORY" nor " + "--"OPT_HUGE_UNLINK"\n"); + return -1; + } return 0; } @@ -1370,6 +1438,8 @@ eal_common_usage(void) " Set specific log level\n" " -v Display version information on startup\n" " -h, --help This help\n" + " --"OPT_IN_MEMORY" Operate entirely in memory. This will\n" + " disable secondary process support\n" "\nEAL options for DEBUG use only:\n" " --"OPT_HUGE_UNLINK" Unlink hugepage files after init\n" " --"OPT_NO_HUGE" Use malloc instead of hugetlbfs\n" diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c index 707d8ab3..9fcb9121 100644 --- a/lib/librte_eal/common/eal_common_proc.c +++ b/lib/librte_eal/common/eal_common_proc.c @@ -20,6 +20,7 @@ #include <sys/un.h> #include <unistd.h> +#include <rte_alarm.h> #include <rte_common.h> #include <rte_cycles.h> #include <rte_eal.h> @@ -94,11 +95,9 @@ TAILQ_HEAD(pending_request_list, pending_request); static struct { struct pending_request_list requests; pthread_mutex_t lock; - pthread_cond_t async_cond; } pending_requests = { .requests = TAILQ_HEAD_INITIALIZER(pending_requests.requests), .lock = PTHREAD_MUTEX_INITIALIZER, - .async_cond = PTHREAD_COND_INITIALIZER /**< used in async requests only */ }; @@ -106,6 +105,16 @@ static struct { static int mp_send(struct rte_mp_msg *msg, const char *peer, int type); +/* for use with alarm callback */ +static void +async_reply_handle(void *arg); + +/* for use with process_msg */ +static struct pending_request * +async_reply_handle_thread_unsafe(void *arg); + +static void +trigger_async_action(struct pending_request *req); static struct pending_request * find_pending_request(const char *dst, const char *act_name) @@ -290,6 +299,8 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s) RTE_LOG(DEBUG, EAL, "msg: %s\n", msg->name); if (m->type == MP_REP || m->type == MP_IGN) { + struct pending_request *req = NULL; + pthread_mutex_lock(&pending_requests.lock); pending_req = find_pending_request(s->sun_path, msg->name); if (pending_req) { @@ -301,11 +312,14 @@ process_msg(struct mp_msg_internal *m, struct sockaddr_un *s) if (pending_req->type == REQUEST_TYPE_SYNC) pthread_cond_signal(&pending_req->sync.cond); else if (pending_req->type == REQUEST_TYPE_ASYNC) - pthread_cond_signal( - &pending_requests.async_cond); + req = async_reply_handle_thread_unsafe( + pending_req); } else RTE_LOG(ERR, EAL, "Drop mp reply: %s\n", msg->name); pthread_mutex_unlock(&pending_requests.lock); + + if (req != NULL) + trigger_async_action(req); return; } @@ -365,7 +379,6 @@ timespec_cmp(const struct timespec *a, const struct timespec *b) } enum async_action { - ACTION_NONE, /**< don't do anything */ ACTION_FREE, /**< free the action entry, but don't trigger callback */ ACTION_TRIGGER /**< trigger callback, then free action entry */ }; @@ -375,7 +388,7 @@ process_async_request(struct pending_request *sr, const struct timespec *now) { struct async_request_param *param; struct rte_mp_reply *reply; - bool timeout, received, last_msg; + bool timeout, last_msg; param = sr->async.param; reply = ¶m->user_reply; @@ -383,13 +396,6 @@ process_async_request(struct pending_request *sr, const struct timespec *now) /* did we timeout? */ timeout = timespec_cmp(¶m->end, now) <= 0; - /* did we receive a response? */ - received = sr->reply_received != 0; - - /* if we didn't time out, and we didn't receive a response, ignore */ - if (!timeout && !received) - return ACTION_NONE; - /* if we received a response, adjust relevant data and copy mesasge. */ if (sr->reply_received == 1 && sr->reply) { struct rte_mp_msg *msg, *user_msgs, *tmp; @@ -448,118 +454,58 @@ trigger_async_action(struct pending_request *sr) free(sr->async.param->user_reply.msgs); free(sr->async.param); free(sr->request); + free(sr); } static struct pending_request * -check_trigger(struct timespec *ts) +async_reply_handle_thread_unsafe(void *arg) { - struct pending_request *next, *cur, *trigger = NULL; - - TAILQ_FOREACH_SAFE(cur, &pending_requests.requests, next, next) { - enum async_action action; - if (cur->type != REQUEST_TYPE_ASYNC) - continue; + struct pending_request *req = (struct pending_request *)arg; + enum async_action action; + struct timespec ts_now; + struct timeval now; - action = process_async_request(cur, ts); - if (action == ACTION_FREE) { - TAILQ_REMOVE(&pending_requests.requests, cur, next); - free(cur); - } else if (action == ACTION_TRIGGER) { - TAILQ_REMOVE(&pending_requests.requests, cur, next); - trigger = cur; - break; - } + if (gettimeofday(&now, NULL) < 0) { + RTE_LOG(ERR, EAL, "Cannot get current time\n"); + goto no_trigger; } - return trigger; -} + ts_now.tv_nsec = now.tv_usec * 1000; + ts_now.tv_sec = now.tv_sec; -static void -wait_for_async_messages(void) -{ - struct pending_request *sr; - struct timespec timeout; - bool timedwait = false; - bool nowait = false; - int ret; + action = process_async_request(req, &ts_now); - /* scan through the list and see if there are any timeouts that - * are earlier than our current timeout. - */ - TAILQ_FOREACH(sr, &pending_requests.requests, next) { - if (sr->type != REQUEST_TYPE_ASYNC) - continue; - if (!timedwait || timespec_cmp(&sr->async.param->end, - &timeout) < 0) { - memcpy(&timeout, &sr->async.param->end, - sizeof(timeout)); - timedwait = true; - } + TAILQ_REMOVE(&pending_requests.requests, req, next); - /* sometimes, we don't even wait */ - if (sr->reply_received) { - nowait = true; - break; + if (rte_eal_alarm_cancel(async_reply_handle, req) < 0) { + /* if we failed to cancel the alarm because it's already in + * progress, don't proceed because otherwise we will end up + * handling the same message twice. + */ + if (rte_errno == EINPROGRESS) { + RTE_LOG(DEBUG, EAL, "Request handling is already in progress\n"); + goto no_trigger; } + RTE_LOG(ERR, EAL, "Failed to cancel alarm\n"); } - if (nowait) - return; - - do { - ret = timedwait ? - pthread_cond_timedwait( - &pending_requests.async_cond, - &pending_requests.lock, - &timeout) : - pthread_cond_wait( - &pending_requests.async_cond, - &pending_requests.lock); - } while (ret != 0 && ret != ETIMEDOUT); - - /* we've been woken up or timed out */ + if (action == ACTION_TRIGGER) + return req; +no_trigger: + free(req); + return NULL; } -static void * -async_reply_handle(void *arg __rte_unused) +static void +async_reply_handle(void *arg) { - struct timeval now; - struct timespec ts_now; - while (1) { - struct pending_request *trigger = NULL; - - pthread_mutex_lock(&pending_requests.lock); + struct pending_request *req; - /* we exit this function holding the lock */ - wait_for_async_messages(); - - if (gettimeofday(&now, NULL) < 0) { - pthread_mutex_unlock(&pending_requests.lock); - RTE_LOG(ERR, EAL, "Cannot get current time\n"); - break; - } - ts_now.tv_nsec = now.tv_usec * 1000; - ts_now.tv_sec = now.tv_sec; - - do { - trigger = check_trigger(&ts_now); - /* unlock request list */ - pthread_mutex_unlock(&pending_requests.lock); - - if (trigger) { - trigger_async_action(trigger); - free(trigger); - - /* we've triggered a callback, but there may be - * more, so lock the list and check again. - */ - pthread_mutex_lock(&pending_requests.lock); - } - } while (trigger); - } - - RTE_LOG(ERR, EAL, "ERROR: asynchronous requests disabled\n"); + pthread_mutex_lock(&pending_requests.lock); + req = async_reply_handle_thread_unsafe(arg); + pthread_mutex_unlock(&pending_requests.lock); - return NULL; + if (req != NULL) + trigger_async_action(req); } static int @@ -624,7 +570,15 @@ rte_mp_channel_init(void) { char path[PATH_MAX]; int dir_fd; - pthread_t mp_handle_tid, async_reply_handle_tid; + pthread_t mp_handle_tid; + + /* in no shared files mode, we do not have secondary processes support, + * so no need to initialize IPC. + */ + if (internal_config.no_shconf) { + RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC will be disabled\n"); + return 0; + } /* create filter path */ create_socket_path("*", path, sizeof(path)); @@ -671,17 +625,6 @@ rte_mp_channel_init(void) return -1; } - if (rte_ctrl_thread_create(&async_reply_handle_tid, - "rte_mp_async", NULL, - async_reply_handle, NULL) < 0) { - RTE_LOG(ERR, EAL, "failed to create mp thead: %s\n", - strerror(errno)); - close(mp_fd); - close(dir_fd); - mp_fd = -1; - return -1; - } - /* unlock the directory */ flock(dir_fd, LOCK_UN); close(dir_fd); @@ -786,7 +729,7 @@ mp_send(struct rte_mp_msg *msg, const char *peer, int type) dir_fd = dirfd(mp_dir); /* lock the directory to prevent processes spinning up while we send */ - if (flock(dir_fd, LOCK_EX)) { + if (flock(dir_fd, LOCK_SH)) { RTE_LOG(ERR, EAL, "Unable to lock directory %s\n", mp_dir_path); rte_errno = errno; @@ -853,7 +796,7 @@ rte_mp_sendmsg(struct rte_mp_msg *msg) static int mp_request_async(const char *dst, struct rte_mp_msg *req, - struct async_request_param *param) + struct async_request_param *param, const struct timespec *ts) { struct rte_mp_msg *reply_msg; struct pending_request *pending_req, *exist; @@ -898,6 +841,13 @@ mp_request_async(const char *dst, struct rte_mp_msg *req, param->user_reply.nb_sent++; + if (rte_eal_alarm_set(ts->tv_sec * 1000000 + ts->tv_nsec / 1000, + async_reply_handle, pending_req) < 0) { + RTE_LOG(ERR, EAL, "Fail to set alarm for request %s:%s\n", + dst, req->name); + rte_panic("Fix the above shit to properly free all memory\n"); + } + return 0; fail: free(pending_req); @@ -988,6 +938,12 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply, if (check_input(req) == false) return -1; + + if (internal_config.no_shconf) { + RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n"); + return 0; + } + if (gettimeofday(&now, NULL) < 0) { RTE_LOG(ERR, EAL, "Faile to get current time\n"); rte_errno = errno; @@ -1020,7 +976,7 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply, dir_fd = dirfd(mp_dir); /* lock the directory to prevent processes spinning up while we send */ - if (flock(dir_fd, LOCK_EX)) { + if (flock(dir_fd, LOCK_SH)) { RTE_LOG(ERR, EAL, "Unable to lock directory %s\n", mp_dir_path); closedir(mp_dir); @@ -1072,6 +1028,12 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts, if (check_input(req) == false) return -1; + + if (internal_config.no_shconf) { + RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n"); + return 0; + } + if (gettimeofday(&now, NULL) < 0) { RTE_LOG(ERR, EAL, "Faile to get current time\n"); rte_errno = errno; @@ -1119,7 +1081,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts, /* for secondary process, send request to the primary process only */ if (rte_eal_process_type() == RTE_PROC_SECONDARY) { - ret = mp_request_async(eal_mp_socket_path(), copy, param); + ret = mp_request_async(eal_mp_socket_path(), copy, param, ts); /* if we didn't send anything, put dummy request on the queue */ if (ret == 0 && reply->nb_sent == 0) { @@ -1146,7 +1108,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts, dir_fd = dirfd(mp_dir); /* lock the directory to prevent processes spinning up while we send */ - if (flock(dir_fd, LOCK_EX)) { + if (flock(dir_fd, LOCK_SH)) { RTE_LOG(ERR, EAL, "Unable to lock directory %s\n", mp_dir_path); rte_errno = errno; @@ -1162,7 +1124,7 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts, snprintf(path, sizeof(path), "%s/%s", mp_dir_path, ent->d_name); - if (mp_request_async(path, copy, param)) + if (mp_request_async(path, copy, param, ts)) ret = -1; } /* if we didn't send anything, put dummy request on the queue */ @@ -1171,9 +1133,6 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts, dummy_used = true; } - /* trigger async request thread wake up */ - pthread_cond_signal(&pending_requests.async_cond); - /* finally, unlock the queue */ pthread_mutex_unlock(&pending_requests.lock); @@ -1213,5 +1172,10 @@ rte_mp_reply(struct rte_mp_msg *msg, const char *peer) return -1; } + if (internal_config.no_shconf) { + RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n"); + return 0; + } + return mp_send(msg, peer, MP_REP); } diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c index 42398630..48ef4d6d 100644 --- a/lib/librte_eal/common/eal_common_thread.c +++ b/lib/librte_eal/common/eal_common_thread.c @@ -175,7 +175,7 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name, params = malloc(sizeof(*params)); if (!params) - return -1; + return -ENOMEM; params->start_routine = start_routine; params->arg = arg; @@ -185,13 +185,14 @@ rte_ctrl_thread_create(pthread_t *thread, const char *name, ret = pthread_create(thread, attr, rte_thread_init, (void *)params); if (ret != 0) { free(params); - return ret; + return -ret; } if (name != NULL) { ret = rte_thread_setname(*thread, name); if (ret < 0) - goto fail; + RTE_LOG(DEBUG, EAL, + "Cannot set name for ctrl thread\n"); } cpu_found = 0; @@ -227,5 +228,5 @@ fail: } pthread_cancel(*thread); pthread_join(*thread, NULL); - return ret; + return -ret; } diff --git a/lib/librte_eal/common/eal_common_uuid.c b/lib/librte_eal/common/eal_common_uuid.c new file mode 100644 index 00000000..1b93c5b3 --- /dev/null +++ b/lib/librte_eal/common/eal_common_uuid.c @@ -0,0 +1,193 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 1996, 1997 Theodore Ts'o. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, and the entire permission notice in its entirety, + * including the disclaimer of warranties. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF + * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ + +#include <stdio.h> +#include <string.h> +#include <stdint.h> +#include <stdlib.h> +#include <ctype.h> + +#include <rte_uuid.h> + +/* UUID packed form */ +struct uuid { + uint32_t time_low; + uint16_t time_mid; + uint16_t time_hi_and_version; + uint16_t clock_seq; + uint8_t node[6]; +}; + +static void uuid_pack(const struct uuid *uu, rte_uuid_t ptr) +{ + uint32_t tmp; + uint8_t *out = ptr; + + tmp = uu->time_low; + out[3] = (uint8_t) tmp; + tmp >>= 8; + out[2] = (uint8_t) tmp; + tmp >>= 8; + out[1] = (uint8_t) tmp; + tmp >>= 8; + out[0] = (uint8_t) tmp; + + tmp = uu->time_mid; + out[5] = (uint8_t) tmp; + tmp >>= 8; + out[4] = (uint8_t) tmp; + + tmp = uu->time_hi_and_version; + out[7] = (uint8_t) tmp; + tmp >>= 8; + out[6] = (uint8_t) tmp; + + tmp = uu->clock_seq; + out[9] = (uint8_t) tmp; + tmp >>= 8; + out[8] = (uint8_t) tmp; + + memcpy(out+10, uu->node, 6); +} + +static void uuid_unpack(const rte_uuid_t in, struct uuid *uu) +{ + const uint8_t *ptr = in; + uint32_t tmp; + + tmp = *ptr++; + tmp = (tmp << 8) | *ptr++; + tmp = (tmp << 8) | *ptr++; + tmp = (tmp << 8) | *ptr++; + uu->time_low = tmp; + + tmp = *ptr++; + tmp = (tmp << 8) | *ptr++; + uu->time_mid = tmp; + + tmp = *ptr++; + tmp = (tmp << 8) | *ptr++; + uu->time_hi_and_version = tmp; + + tmp = *ptr++; + tmp = (tmp << 8) | *ptr++; + uu->clock_seq = tmp; + + memcpy(uu->node, ptr, 6); +} + +bool rte_uuid_is_null(const rte_uuid_t uu) +{ + const uint8_t *cp = uu; + int i; + + for (i = 0; i < 16; i++) + if (*cp++) + return false; + return true; +} + +/* + * rte_uuid_compare() - compare two UUIDs. + */ +int rte_uuid_compare(const rte_uuid_t uu1, const rte_uuid_t uu2) +{ + struct uuid uuid1, uuid2; + + uuid_unpack(uu1, &uuid1); + uuid_unpack(uu2, &uuid2); + +#define UUCMP(u1, u2) \ + do { if (u1 != u2) return (u1 < u2) ? -1 : 1; } while (0) + + UUCMP(uuid1.time_low, uuid2.time_low); + UUCMP(uuid1.time_mid, uuid2.time_mid); + UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version); + UUCMP(uuid1.clock_seq, uuid2.clock_seq); +#undef UUCMP + + return memcmp(uuid1.node, uuid2.node, 6); +} + +int rte_uuid_parse(const char *in, rte_uuid_t uu) +{ + struct uuid uuid; + int i; + const char *cp; + char buf[3]; + + if (strlen(in) != 36) + return -1; + + for (i = 0, cp = in; i <= 36; i++, cp++) { + if ((i == 8) || (i == 13) || (i == 18) || + (i == 23)) { + if (*cp == '-') + continue; + else + return -1; + } + if (i == 36) + if (*cp == 0) + continue; + if (!isxdigit(*cp)) + return -1; + } + + uuid.time_low = strtoul(in, NULL, 16); + uuid.time_mid = strtoul(in+9, NULL, 16); + uuid.time_hi_and_version = strtoul(in+14, NULL, 16); + uuid.clock_seq = strtoul(in+19, NULL, 16); + cp = in+24; + buf[2] = 0; + + for (i = 0; i < 6; i++) { + buf[0] = *cp++; + buf[1] = *cp++; + uuid.node[i] = strtoul(buf, NULL, 16); + } + + uuid_pack(&uuid, uu); + return 0; +} + +void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len) +{ + struct uuid uuid; + + uuid_unpack(uu, &uuid); + + snprintf(out, len, + "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x", + uuid.time_low, uuid.time_mid, uuid.time_hi_and_version, + uuid.clock_seq >> 8, uuid.clock_seq & 0xFF, + uuid.node[0], uuid.node[1], uuid.node[2], + uuid.node[3], uuid.node[4], uuid.node[5]); +} diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h index 364f38d1..de05febf 100644 --- a/lib/librte_eal/common/eal_filesystem.h +++ b/lib/librte_eal/common/eal_filesystem.h @@ -12,7 +12,6 @@ #define EAL_FILESYSTEM_H /** Path of rte config file. */ -#define RUNTIME_CONFIG_FMT "%s/.%s_config" #include <stdint.h> #include <limits.h> @@ -30,17 +29,14 @@ eal_create_runtime_dir(void); const char * eal_get_runtime_dir(void); +#define RUNTIME_CONFIG_FNAME "config" static inline const char * eal_runtime_config_path(void) { static char buffer[PATH_MAX]; /* static so auto-zeroed */ - const char *directory = "/var/run"; - const char *home_dir = getenv("HOME"); - if (getuid() != 0 && home_dir != NULL) - directory = home_dir; - snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory, - internal_config.hugefile_prefix); + snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(), + RUNTIME_CONFIG_FNAME); return buffer; } diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h index c4cbf3ac..00ee6e06 100644 --- a/lib/librte_eal/common/eal_internal_cfg.h +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -41,11 +41,17 @@ struct internal_config { volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping * instead of native TSC */ volatile unsigned no_shconf; /**< true if there is no shared config */ + volatile unsigned in_memory; + /**< true if DPDK should operate entirely in-memory and not create any + * shared files or runtime data. + */ volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */ volatile enum rte_proc_type_t process_type; /**< multi-process proc type */ /** true to try allocating memory on specific sockets */ volatile unsigned force_sockets; volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */ + volatile unsigned force_socket_limits; + volatile uint64_t socket_limit[RTE_MAX_NUMA_NODES]; /**< limit amount of memory per socket */ uintptr_t base_virtaddr; /**< base address to try and reserve memory from */ volatile unsigned legacy_mem; /**< true to enable legacy memory behavior (no dynamic allocation, diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h index 211ae06a..96e16678 100644 --- a/lib/librte_eal/common/eal_options.h +++ b/lib/librte_eal/common/eal_options.h @@ -45,8 +45,12 @@ enum { OPT_NO_PCI_NUM, #define OPT_NO_SHCONF "no-shconf" OPT_NO_SHCONF_NUM, +#define OPT_IN_MEMORY "in-memory" + OPT_IN_MEMORY_NUM, #define OPT_SOCKET_MEM "socket-mem" OPT_SOCKET_MEM_NUM, +#define OPT_SOCKET_LIMIT "socket-limit" + OPT_SOCKET_LIMIT_NUM, #define OPT_SYSLOG "syslog" OPT_SYSLOG_NUM, #define OPT_VDEV "vdev" diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index bdadc4d5..4f809a83 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -47,6 +47,18 @@ void eal_log_set_default(FILE *default_log); int rte_eal_cpu_init(void); /** + * Create memseg lists + * + * This function is private to EAL. + * + * Preallocate virtual memory. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_memseg_init(void); + +/** * Map memory * * This function is private to EAL. @@ -258,4 +270,38 @@ int rte_mp_channel_init(void); */ void dev_callback_process(char *device_name, enum rte_dev_event_type event); +/** + * @internal + * Parse a device string and store its information in an + * rte_devargs structure. + * + * A device description is split by layers of abstraction of the device: + * bus, class and driver. Each layer will offer a set of properties that + * can be applied either to configure or recognize a device. + * + * This function will parse those properties and prepare the rte_devargs + * to be given to each layers for processing. + * + * Note: if the "data" field of the devargs points to devstr, + * then no dynamic allocation is performed and the rte_devargs + * can be safely discarded. + * + * Otherwise ``data`` will hold a workable copy of devstr, that will be + * used by layers descriptors within rte_devargs. In this case, + * any rte_devargs should be cleaned-up before being freed. + * + * @param da + * rte_devargs structure to fill. + * + * @param devstr + * Device string. + * + * @return + * 0 on success. + * Negative errno values on error (rte_errno is set). + */ +int +rte_devargs_layers_parse(struct rte_devargs *devargs, + const char *devstr); + #endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/common/include/rte_bitmap.h b/lib/librte_eal/common/include/rte_bitmap.h index 7d4935fc..d9facc64 100644 --- a/lib/librte_eal/common/include/rte_bitmap.h +++ b/lib/librte_eal/common/include/rte_bitmap.h @@ -198,12 +198,12 @@ rte_bitmap_get_memory_footprint(uint32_t n_bits) { /** * Bitmap initialization * - * @param mem_size - * Minimum expected size of bitmap. + * @param n_bits + * Number of pre-allocated bits in array2. * @param mem * Base address of array1 and array2. - * @param n_bits - * Number of pre-allocated bits in array2. Must be non-zero and multiple of 512. + * @param mem_size + * Minimum expected size of bitmap. * @return * Handle to bitmap instance. */ diff --git a/lib/librte_eal/common/include/rte_bus.h b/lib/librte_eal/common/include/rte_bus.h index eb9eded4..b7b5b084 100644 --- a/lib/librte_eal/common/include/rte_bus.h +++ b/lib/librte_eal/common/include/rte_bus.h @@ -211,6 +211,7 @@ struct rte_bus { rte_bus_parse_t parse; /**< Parse a device name */ struct rte_bus_conf conf; /**< Bus configuration */ rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */ + rte_dev_iterate_t dev_iterate; /**< Device iterator. */ }; /** @@ -325,8 +326,7 @@ enum rte_iova_mode rte_bus_get_iommu_class(void); * The constructor has higher priority than PMD constructors. */ #define RTE_REGISTER_BUS(nm, bus) \ -RTE_INIT_PRIO(businitfn_ ##nm, BUS); \ -static void businitfn_ ##nm(void) \ +RTE_INIT_PRIO(businitfn_ ##nm, BUS) \ {\ (bus).name = RTE_STR(nm);\ rte_bus_register(&bus); \ diff --git a/lib/librte_eal/common/include/rte_class.h b/lib/librte_eal/common/include/rte_class.h new file mode 100644 index 00000000..276c91e9 --- /dev/null +++ b/lib/librte_eal/common/include/rte_class.h @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 GaĆ«tan Rivet + */ + +#ifndef _RTE_CLASS_H_ +#define _RTE_CLASS_H_ + +/** + * @file + * + * DPDK device class interface. + * + * This file describes the interface of the device class + * abstraction layer. + * + * A device class defines the type of function a device + * will be used for e.g.: Ethernet adapter (eth), + * cryptographic coprocessor (crypto), etc. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/queue.h> + +#include <rte_dev.h> + +/** Double linked list of classes */ +TAILQ_HEAD(rte_class_list, rte_class); + +/** + * A structure describing a generic device class. + */ +struct rte_class { + TAILQ_ENTRY(rte_class) next; /**< Next device class in linked list */ + const char *name; /**< Name of the class */ + rte_dev_iterate_t dev_iterate; /**< Device iterator. */ +}; + +/** + * Class comparison function. + * + * @param cls + * Class under test. + * + * @param data + * Data to compare against. + * + * @return + * 0 if the class matches the data. + * !0 if the class does not match. + * <0 if ordering is possible and the class is lower than the data. + * >0 if ordering is possible and the class is greater than the data. + */ +typedef int (*rte_class_cmp_t)(const struct rte_class *cls, const void *data); + +/** + * Class iterator to find a particular class. + * + * This function compares each registered class to find one that matches + * the data passed as parameter. + * + * If the comparison function returns zero this function will stop iterating + * over any more classes. To continue a search the class of a previous search + * can be passed via the start parameter. + * + * @param start + * Starting point for the iteration. + * + * @param cmp + * Comparison function. + * + * @param data + * Data to pass to comparison function. + * + * @return + * A pointer to a rte_class structure or NULL in case no class matches + */ +__rte_experimental +struct rte_class * +rte_class_find(const struct rte_class *start, rte_class_cmp_t cmp, + const void *data); + +/** + * Find the registered class for a given name. + */ +__rte_experimental +struct rte_class * +rte_class_find_by_name(const char *name); + +/** + * Register a Class handle. + * + * @param cls + * A pointer to a rte_class structure describing the class + * to be registered. + */ +__rte_experimental +void rte_class_register(struct rte_class *cls); + +/** + * Unregister a Class handle. + * + * @param cls + * A pointer to a rte_class structure describing the class + * to be unregistered. + */ +__rte_experimental +void rte_class_unregister(struct rte_class *cls); + +/** + * Helper for Class registration. + * The constructor has lower priority than Bus constructors. + * The constructor has higher priority than PMD constructors. + */ +#define RTE_REGISTER_CLASS(nm, cls) \ +RTE_INIT_PRIO(classinitfn_ ##nm, CLASS) \ +{\ + (cls).name = RTE_STR(nm); \ + rte_class_register(&cls); \ +} + +#define RTE_UNREGISTER_CLASS(nm, cls) \ +RTE_FINI_PRIO(classfinifn_ ##nm, CLASS) \ +{ \ + rte_class_unregister(&cls); \ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CLASS_H_ */ diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h index 434adfd4..069c13ec 100644 --- a/lib/librte_eal/common/include/rte_common.h +++ b/lib/librte_eal/common/include/rte_common.h @@ -83,6 +83,7 @@ typedef uint16_t unaligned_uint16_t; #define RTE_PRIORITY_LOG 101 #define RTE_PRIORITY_BUS 110 +#define RTE_PRIORITY_CLASS 120 #define RTE_PRIORITY_LAST 65535 #define RTE_PRIO(prio) \ @@ -112,6 +113,29 @@ static void __attribute__((constructor(RTE_PRIO(prio)), used)) func(void) RTE_INIT_PRIO(func, LAST) /** + * Run after main() with low priority. + * + * @param func + * Destructor function name. + * @param prio + * Priority number must be above 100. + * Lowest number is the last to run. + */ +#define RTE_FINI_PRIO(func, prio) \ +static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void) + +/** + * Run after main() with high priority. + * + * The destructor will be run *before* prioritized destructors. + * + * @param func + * Destructor function name. + */ +#define RTE_FINI(func) \ + RTE_FINI_PRIO(func, LAST) + +/** * Force a function to be inlined */ #define __rte_always_inline inline __attribute__((always_inline)) @@ -294,6 +318,11 @@ rte_combine64ms1b(register uint64_t v) /*********** Macros to work with powers of 2 ********/ /** + * Macro to return 1 if n is a power of 2, 0 otherwise + */ +#define RTE_IS_POWER_OF_2(n) ((n) && !(((n) - 1) & (n))) + +/** * Returns true if n is a power of 2 * @param n * Number to check diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h index 3879ff3c..b80a8059 100644 --- a/lib/librte_eal/common/include/rte_dev.h +++ b/lib/librte_eal/common/include/rte_dev.h @@ -69,9 +69,7 @@ rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) * Enable RTE_PMD_DEBUG_TRACE() when at least one component relying on the * RTE_*_RET() macros defined below is compiled in debug mode. */ -#if defined(RTE_LIBRTE_ETHDEV_DEBUG) || \ - defined(RTE_LIBRTE_CRYPTODEV_DEBUG) || \ - defined(RTE_LIBRTE_EVENTDEV_DEBUG) +#if defined(RTE_LIBRTE_EVENTDEV_DEBUG) #define RTE_PMD_DEBUG_TRACE(...) \ rte_pmd_debug_trace(__func__, __VA_ARGS__) #else @@ -176,6 +174,7 @@ struct rte_device { * @return * 0 on success, negative on error. */ +__rte_deprecated int rte_eal_dev_attach(const char *name, const char *devargs); /** @@ -186,6 +185,7 @@ int rte_eal_dev_attach(const char *name, const char *devargs); * @return * 0 on success, negative on error. */ +__rte_deprecated int rte_eal_dev_detach(struct rte_device *dev); /** @@ -285,6 +285,103 @@ __attribute__((used)) = str static const char DRV_EXP_TAG(name, kmod_dep_export)[] \ __attribute__((used)) = str +/** + * Iteration context. + * + * This context carries over the current iteration state. + */ +struct rte_dev_iterator { + const char *dev_str; /**< device string. */ + const char *bus_str; /**< bus-related part of device string. */ + const char *cls_str; /**< class-related part of device string. */ + struct rte_bus *bus; /**< bus handle. */ + struct rte_class *cls; /**< class handle. */ + struct rte_device *device; /**< current position. */ + void *class_device; /**< additional specialized context. */ +}; + +/** + * Device iteration function. + * + * Find the next device matching properties passed in parameters. + * The function takes an additional ``start`` parameter, that is + * used as starting context when relevant. + * + * The function returns the current element in the iteration. + * This return value will potentially be used as a start parameter + * in subsequent calls to the function. + * + * The additional iterator parameter is only there if a specific + * implementation needs additional context. It must not be modified by + * the iteration function itself. + * + * @param start + * Starting iteration context. + * + * @param devstr + * Device description string. + * + * @param it + * Device iterator. + * + * @return + * The address of the current element matching the device description + * string. + */ +typedef void *(*rte_dev_iterate_t)(const void *start, + const char *devstr, + const struct rte_dev_iterator *it); + +/** + * Initializes a device iterator. + * + * This iterator allows accessing a list of devices matching a criteria. + * The device matching is made among all buses and classes currently registered, + * filtered by the device description given as parameter. + * + * This function will not allocate any memory. It is safe to stop the + * iteration at any moment and let the iterator go out of context. + * + * @param it + * Device iterator handle. + * + * @param str + * Device description string. + * + * @return + * 0 on successful initialization. + * <0 on error. + */ +__rte_experimental +int +rte_dev_iterator_init(struct rte_dev_iterator *it, const char *str); + +/** + * Iterates on a device iterator. + * + * Generates a new rte_device handle corresponding to the next element + * in the list described in comprehension by the iterator. + * + * The next object is returned, and the iterator is updated. + * + * @param it + * Device iterator handle. + * + * @return + * An rte_device handle if found. + * NULL if an error occurred (rte_errno is set). + * NULL if no device could be found (rte_errno is not set). + */ +__rte_experimental +struct rte_device * +rte_dev_iterator_next(struct rte_dev_iterator *it); + +#define RTE_DEV_FOREACH(dev, devstr, it) \ + for (rte_dev_iterator_init(it, devstr), \ + dev = rte_dev_iterator_next(it); \ + dev != NULL; \ + dev = rte_dev_iterator_next(it)) + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h index 58fbd90a..097a4ce7 100644 --- a/lib/librte_eal/common/include/rte_devargs.h +++ b/lib/librte_eal/common/include/rte_devargs.h @@ -51,12 +51,19 @@ struct rte_devargs { enum rte_devtype type; /** Device policy. */ enum rte_dev_policy policy; - /** Bus handle for the device. */ - struct rte_bus *bus; /** Name of the device. */ char name[RTE_DEV_NAME_MAX_LEN]; + RTE_STD_C11 + union { /** Arguments string as given by user or "" for no argument. */ - char *args; + char *args; + const char *drv_str; + }; + struct rte_bus *bus; /**< bus handle. */ + struct rte_class *cls; /**< class handle. */ + const char *bus_str; /**< bus-related part of device string. */ + const char *cls_str; /**< class-related part of device string. */ + const char *data; /**< Device string storage. */ }; /** @@ -96,6 +103,42 @@ int rte_eal_parse_devargs_str(const char *devargs_str, * in argument. Store which bus will handle the device, its name * and the eventual device parameters. * + * The syntax is: + * + * bus:device_identifier,arg1=val1,arg2=val2 + * + * where "bus:" is the bus name followed by any character separator. + * The bus name is optional. If no bus name is specified, each bus + * will attempt to recognize the device identifier. The first one + * to succeed will be used. + * + * Examples: + * + * pci:0000:05.00.0,arg=val + * 05.00.0,arg=val + * vdev:net_ring0 + * + * @param da + * The devargs structure holding the device information. + * + * @param dev + * String describing a device. + * + * @return + * - 0 on success. + * - Negative errno on error. + */ +__rte_experimental +int +rte_devargs_parse(struct rte_devargs *da, const char *dev); + +/** + * Parse a device string. + * + * Verify that a bus is capable of handling the device passed + * in argument. Store which bus will handle the device, its name + * and the eventual device parameters. + * * The device string is built with a printf-like syntax. * * The syntax is: @@ -124,8 +167,8 @@ int rte_eal_parse_devargs_str(const char *devargs_str, */ __rte_experimental int -rte_devargs_parse(struct rte_devargs *da, - const char *format, ...) +rte_devargs_parsef(struct rte_devargs *da, + const char *format, ...) __attribute__((format(printf, 2, 0))); /** diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h index 8de5d69e..e114dcbd 100644 --- a/lib/librte_eal/common/include/rte_eal.h +++ b/lib/librte_eal/common/include/rte_eal.h @@ -490,27 +490,13 @@ static inline int rte_gettid(void) enum rte_iova_mode rte_eal_iova_mode(void); /** - * @warning - * @b EXPERIMENTAL: this API may change without prior notice - * * Get user provided pool ops name for mbuf * * @return * returns user provided pool ops name. */ -const char * __rte_experimental -rte_eal_mbuf_user_pool_ops(void); - -/** - * @deprecated - * Get default pool ops name for mbuf - * - * @return - * returns default pool ops name. - */ -__rte_deprecated const char * -rte_eal_mbuf_default_mempool_ops(void); +rte_eal_mbuf_user_pool_ops(void); #ifdef __cplusplus } diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h index 3e61fffe..5d880551 100644 --- a/lib/librte_eal/common/include/rte_fbarray.h +++ b/lib/librte_eal/common/include/rte_fbarray.h @@ -336,6 +336,120 @@ rte_fbarray_find_contig_free(struct rte_fbarray *arr, int __rte_experimental rte_fbarray_find_contig_used(struct rte_fbarray *arr, unsigned int start); +/** + * Find index of previous free element, starting at specified index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_prev_free(struct rte_fbarray *arr, unsigned int start); + + +/** + * Find index of previous used element, starting at specified index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_prev_used(struct rte_fbarray *arr, unsigned int start); + + +/** + * Find lowest start index of chunk of ``n`` free elements, down from specified + * index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @param n + * Number of free elements to look for. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_prev_n_free(struct rte_fbarray *arr, unsigned int start, + unsigned int n); + + +/** + * Find lowest start index of chunk of ``n`` used elements, down from specified + * index. + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @param n + * Number of used elements to look for. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_prev_n_used(struct rte_fbarray *arr, unsigned int start, + unsigned int n); + + +/** + * Find how many more free entries there are before specified index (like + * ``rte_fbarray_find_contig_free`` but going in reverse). + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_rev_contig_free(struct rte_fbarray *arr, + unsigned int start); + + +/** + * Find how many more used entries there are before specified index (like + * ``rte_fbarray_find_contig_used`` but going in reverse). + * + * @param arr + * Valid pointer to allocated and correctly set up ``rte_fbarray`` structure. + * + * @param start + * Element index to start search from. + * + * @return + * - non-negative integer on success. + * - -1 on failure, with ``rte_errno`` indicating reason for failure. + */ +int __rte_experimental +rte_fbarray_find_rev_contig_used(struct rte_fbarray *arr, unsigned int start); + /** * Dump ``rte_fbarray`` metadata. diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h index aab9f6fe..c4b7f4cf 100644 --- a/lib/librte_eal/common/include/rte_memory.h +++ b/lib/librte_eal/common/include/rte_memory.h @@ -264,6 +264,60 @@ int __rte_experimental rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg); /** + * Walk list of all memsegs without performing any locking. + * + * @note This function does not perform any locking, and is only safe to call + * from within memory-related callback functions. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_walk_thread_unsafe(rte_memseg_walk_t func, void *arg); + +/** + * Walk each VA-contiguous area without performing any locking. + * + * @note This function does not perform any locking, and is only safe to call + * from within memory-related callback functions. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg); + +/** + * Walk each allocated memseg list without performing any locking. + * + * @note This function does not perform any locking, and is only safe to call + * from within memory-related callback functions. + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + * @return + * 0 if walked over the entire list + * 1 if stopped by the user + * -1 if user function reported error + */ +int __rte_experimental +rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg); + +/** * Dump the physical memory layout to a file. * * @note This function read-locks the memory hotplug subsystem, and thus cannot diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h index ef370fa6..f478fa9e 100644 --- a/lib/librte_eal/common/include/rte_memzone.h +++ b/lib/librte_eal/common/include/rte_memzone.h @@ -81,8 +81,12 @@ struct rte_memzone { * memzones from memory that is already available. It will not trigger any * new allocations. * - * @note Reserving IOVA-contiguous memzones with len set to 0 is not currently - * supported. + * @note: When reserving memzones with len set to 0, it is preferable to also + * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but + * will likely not yield expected results. Specifically, the resulting memzone + * may not necessarily be the biggest memzone available, but rather biggest + * memzone available on socket id corresponding to an lcore from which + * reservation was called. * * @param name * The name of the memzone. If it already exists, the function will @@ -141,8 +145,12 @@ const struct rte_memzone *rte_memzone_reserve(const char *name, * memzones from memory that is already available. It will not trigger any * new allocations. * - * @note Reserving IOVA-contiguous memzones with len set to 0 is not currently - * supported. + * @note: When reserving memzones with len set to 0, it is preferable to also + * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but + * will likely not yield expected results. Specifically, the resulting memzone + * may not necessarily be the biggest memzone available, but rather biggest + * memzone available on socket id corresponding to an lcore from which + * reservation was called. * * @param name * The name of the memzone. If it already exists, the function will @@ -206,8 +214,12 @@ const struct rte_memzone *rte_memzone_reserve_aligned(const char *name, * memzones from memory that is already available. It will not trigger any * new allocations. * - * @note Reserving IOVA-contiguous memzones with len set to 0 is not currently - * supported. + * @note: When reserving memzones with len set to 0, it is preferable to also + * set a valid socket_id. Setting socket_id to SOCKET_ID_ANY is supported, but + * will likely not yield expected results. Specifically, the resulting memzone + * may not necessarily be the biggest memzone available, but rather biggest + * memzone available on socket id corresponding to an lcore from which + * reservation was called. * * @param name * The name of the memzone. If it already exists, the function will diff --git a/lib/librte_eal/common/include/rte_service.h b/lib/librte_eal/common/include/rte_service.h index aea4d91b..34b41aff 100644 --- a/lib/librte_eal/common/include/rte_service.h +++ b/lib/librte_eal/common/include/rte_service.h @@ -162,6 +162,26 @@ int32_t rte_service_runstate_set(uint32_t id, uint32_t runstate); int32_t rte_service_runstate_get(uint32_t id); /** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + * This function returns whether the service may be currently executing on + * at least one lcore, or definitely is not. This function can be used to + * determine if, after setting the service runstate to stopped, the service + * is still executing a service lcore. + * + * Care must be taken if calling this function when the service runstate is + * running, since the result of this function may be incorrect by the time the + * function returns due to service cores running in parallel. + * + * @retval 1 Service may be running on one or more lcores + * @retval 0 Service is not running on any lcore + * @retval -EINVAL Invalid service id + */ +int32_t __rte_experimental +rte_service_may_be_active(uint32_t id); + +/** * Enable or disable the check for a service-core being mapped to the service. * An application can disable the check when takes the responsibility to run a * service itself using *rte_service_run_iter_on_app_lcore*. @@ -363,6 +383,42 @@ int32_t rte_service_attr_get(uint32_t id, uint32_t attr_id, */ int32_t rte_service_attr_reset_all(uint32_t id); +/** + * Returns the number of times the service runner has looped. + */ +#define RTE_SERVICE_LCORE_ATTR_LOOPS 0 + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Get an attribute from a service core. + * + * @param lcore Id of the service core. + * @param attr_id Id of the attribute to be retrieved. + * @param [out] attr_value Pointer to storage in which to write retrieved value. + * @retval 0 Success, the attribute value has been written to *attr_value*. + * -EINVAL Invalid lcore, attr_id or attr_value was NULL. + * -ENOTSUP lcore is not a service core. + */ +int32_t __rte_experimental +rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id, + uint64_t *attr_value); + +/** + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + * + * Reset all attribute values of a service core. + * + * @param lcore The service core to reset all the statistics of + * @retval 0 Successfully reset attributes + * -EINVAL Invalid service id provided + * -ENOTSUP lcore is not a service core. + */ +int32_t __rte_experimental +rte_service_lcore_attr_reset_all(uint32_t lcore); + #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h index 8dccaefc..9b01abb2 100644 --- a/lib/librte_eal/common/include/rte_tailq.h +++ b/lib/librte_eal/common/include/rte_tailq.h @@ -119,8 +119,7 @@ struct rte_tailq_head *rte_eal_tailq_lookup(const char *name); int rte_eal_tailq_register(struct rte_tailq_elem *t); #define EAL_REGISTER_TAILQ(t) \ -RTE_INIT(tailqinitfn_ ##t); \ -static void tailqinitfn_ ##t(void) \ +RTE_INIT(tailqinitfn_ ##t) \ { \ if (rte_eal_tailq_register(&t) < 0) \ rte_panic("Cannot initialize tailq: %s\n", t.name); \ diff --git a/lib/librte_eal/common/include/rte_uuid.h b/lib/librte_eal/common/include/rte_uuid.h new file mode 100644 index 00000000..2c846b5f --- /dev/null +++ b/lib/librte_eal/common/include/rte_uuid.h @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (C) 1996, 1997, 1998 Theodore Ts'o. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, and the entire permission notice in its entirety, + * including the disclaimer of warranties. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF + * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH + * DAMAGE. + */ +/** + * @file + * + * UUID related functions originally from libuuid + */ + +#ifndef _RTE_UUID_H_ +#define _RTE_UUID_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdbool.h> + +/** + * Struct describing a Universal Unique Identifer + */ +typedef unsigned char rte_uuid_t[16]; + +/** + * Helper for defining UUID values for id tables. + */ +#define RTE_UUID_INIT(a, b, c, d, e) { \ + ((a) >> 24) & 0xff, ((a) >> 16) & 0xff, \ + ((a) >> 8) & 0xff, (a) & 0xff, \ + ((b) >> 8) & 0xff, (b) & 0xff, \ + ((c) >> 8) & 0xff, (c) & 0xff, \ + ((d) >> 8) & 0xff, (d) & 0xff, \ + ((e) >> 40) & 0xff, ((e) >> 32) & 0xff, \ + ((e) >> 24) & 0xff, ((e) >> 16) & 0xff, \ + ((e) >> 8) & 0xff, (e) & 0xff \ +} + +/** + * Test if UUID is all zeros. + * + * @param uu + * The uuid to check. + * @return + * true if uuid is NULL value, false otherwise + */ +bool rte_uuid_is_null(const rte_uuid_t uu); + +/** + * Copy uuid. + * + * @param dst + * Destination uuid + * @param src + * Source uuid + */ +static inline void rte_uuid_copy(rte_uuid_t dst, const rte_uuid_t src) +{ + memcpy(dst, src, sizeof(rte_uuid_t)); +} + +/** + * Compare two UUID's + * + * @param a + * A UUID to compare + * @param b + * A UUID to compare + * @return + * returns an integer less than, equal to, or greater than zero if UUID a is + * is less than, equal, or greater than UUID b. + */ +int rte_uuid_compare(const rte_uuid_t a, const rte_uuid_t b); + +/** + * Extract UUID from string + * + * @param in + * Pointer to string of characters to convert + * @param uu + * Destination UUID + * @return + * Returns 0 on succes, and -1 if string is not a valid UUID. + */ +int rte_uuid_parse(const char *in, rte_uuid_t uu); + +/** + * Convert UUID to string + * + * @param uu + * UUID to format + * @param out + * Resulting string buffer + * @param len + * Sizeof the available string buffer + */ +#define RTE_UUID_STRLEN (36 + 1) +void rte_uuid_unparse(const rte_uuid_t uu, char *out, size_t len); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_UUID_H */ diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h index 6e2a2362..7c6714a2 100644 --- a/lib/librte_eal/common/include/rte_version.h +++ b/lib/librte_eal/common/include/rte_version.h @@ -32,7 +32,7 @@ extern "C" { /** * Minor version/month number i.e. the mm in yy.mm.z */ -#define RTE_VER_MONTH 05 +#define RTE_VER_MONTH 8 /** * Patch level number i.e. the z in yy.mm.z diff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h index f90972fa..5ca13fcc 100644 --- a/lib/librte_eal/common/include/rte_vfio.h +++ b/lib/librte_eal/common/include/rte_vfio.h @@ -179,7 +179,7 @@ rte_vfio_clear_group(int vfio_group_fd); * 0 if success. * -1 on error. */ -int __rte_experimental +int rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len); @@ -200,7 +200,7 @@ rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len); * -1 on error. */ -int __rte_experimental +int rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len); /** * Parse IOMMU group number for a device @@ -222,7 +222,7 @@ rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len); * 0 for non-existent group or VFIO * <0 for errors */ -int __rte_experimental +int rte_vfio_get_group_num(const char *sysfs_base, const char *dev_addr, int *iommu_group_num); @@ -236,7 +236,7 @@ rte_vfio_get_group_num(const char *sysfs_base, * > 0 container fd * < 0 for errors */ -int __rte_experimental +int rte_vfio_get_container_fd(void); /** @@ -252,13 +252,10 @@ rte_vfio_get_container_fd(void); * > 0 group fd * < 0 for errors */ -int __rte_experimental +int rte_vfio_get_group_fd(int iommu_group_num); /** - * @warning - * @b EXPERIMENTAL: this API may change, or be removed, without prior notice - * * Create a new container for device binding. * * @note Any newly allocated DPDK memory will not be mapped into these @@ -269,13 +266,10 @@ rte_vfio_get_group_fd(int iommu_group_num); * the container fd if successful * <0 if failed */ -int __rte_experimental +int rte_vfio_container_create(void); /** - * @warning - * @b EXPERIMENTAL: this API may change, or be removed, without prior notice - * * Destroy the container, unbind all vfio groups within it. * * @param container_fd @@ -285,13 +279,10 @@ rte_vfio_container_create(void); * 0 if successful * <0 if failed */ -int __rte_experimental +int rte_vfio_container_destroy(int container_fd); /** - * @warning - * @b EXPERIMENTAL: this API may change, or be removed, without prior notice - * * Bind a IOMMU group to a container. * * @param container_fd @@ -304,13 +295,10 @@ rte_vfio_container_destroy(int container_fd); * group fd if successful * <0 if failed */ -int __rte_experimental +int rte_vfio_container_group_bind(int container_fd, int iommu_group_num); /** - * @warning - * @b EXPERIMENTAL: this API may change, or be removed, without prior notice - * * Unbind a IOMMU group from a container. * * @param container_fd @@ -323,13 +311,10 @@ rte_vfio_container_group_bind(int container_fd, int iommu_group_num); * 0 if successful * <0 if failed */ -int __rte_experimental +int rte_vfio_container_group_unbind(int container_fd, int iommu_group_num); /** - * @warning - * @b EXPERIMENTAL: this API may change, or be removed, without prior notice - * * Perform DMA mapping for devices in a container. * * @param container_fd @@ -348,14 +333,11 @@ rte_vfio_container_group_unbind(int container_fd, int iommu_group_num); * 0 if successful * <0 if failed */ -int __rte_experimental +int rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, uint64_t iova, uint64_t len); /** - * @warning - * @b EXPERIMENTAL: this API may change, or be removed, without prior notice - * * Perform DMA unmapping for devices in a container. * * @param container_fd @@ -374,7 +356,7 @@ rte_vfio_container_dma_map(int container_fd, uint64_t vaddr, * 0 if successful * <0 if failed */ -int __rte_experimental +int rte_vfio_container_dma_unmap(int container_fd, uint64_t vaddr, uint64_t iova, uint64_t len); diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c index 9bfe9b9b..e0a8ed15 100644 --- a/lib/librte_eal/common/malloc_elem.c +++ b/lib/librte_eal/common/malloc_elem.c @@ -18,10 +18,89 @@ #include <rte_common.h> #include <rte_spinlock.h> +#include "eal_internal_cfg.h" #include "eal_memalloc.h" #include "malloc_elem.h" #include "malloc_heap.h" +size_t +malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align) +{ + void *cur_page, *contig_seg_start, *page_end, *cur_seg_end; + void *data_start, *data_end; + rte_iova_t expected_iova; + struct rte_memseg *ms; + size_t page_sz, cur, max; + + page_sz = (size_t)elem->msl->page_sz; + data_start = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN); + data_end = RTE_PTR_ADD(elem, elem->size - MALLOC_ELEM_TRAILER_LEN); + /* segment must start after header and with specified alignment */ + contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align); + + /* if we're in IOVA as VA mode, or if we're in legacy mode with + * hugepages, all elements are IOVA-contiguous. + */ + if (rte_eal_iova_mode() == RTE_IOVA_VA || + (internal_config.legacy_mem && rte_eal_has_hugepages())) + return RTE_PTR_DIFF(data_end, contig_seg_start); + + cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz); + ms = rte_mem_virt2memseg(cur_page, elem->msl); + + /* do first iteration outside the loop */ + page_end = RTE_PTR_ADD(cur_page, page_sz); + cur_seg_end = RTE_MIN(page_end, data_end); + cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start) - + MALLOC_ELEM_TRAILER_LEN; + max = cur; + expected_iova = ms->iova + page_sz; + /* memsegs are contiguous in memory */ + ms++; + + cur_page = RTE_PTR_ADD(cur_page, page_sz); + + while (cur_page < data_end) { + page_end = RTE_PTR_ADD(cur_page, page_sz); + cur_seg_end = RTE_MIN(page_end, data_end); + + /* reset start of contiguous segment if unexpected iova */ + if (ms->iova != expected_iova) { + /* next contiguous segment must start at specified + * alignment. + */ + contig_seg_start = RTE_PTR_ALIGN(cur_page, align); + /* new segment start may be on a different page, so find + * the page and skip to next iteration to make sure + * we're not blowing past data end. + */ + ms = rte_mem_virt2memseg(contig_seg_start, elem->msl); + cur_page = ms->addr; + /* don't trigger another recalculation */ + expected_iova = ms->iova; + continue; + } + /* cur_seg_end ends on a page boundary or on data end. if we're + * looking at data end, then malloc trailer is already included + * in the calculations. if we're looking at page end, then we + * know there's more data past this page and thus there's space + * for malloc element trailer, so don't count it here. + */ + cur = RTE_PTR_DIFF(cur_seg_end, contig_seg_start); + /* update max if cur value is bigger */ + if (cur > max) + max = cur; + + /* move to next page */ + cur_page = page_end; + expected_iova = ms->iova + page_sz; + /* memsegs are contiguous in memory */ + ms++; + } + + return max; +} + /* * Initialize a general malloc_elem header structure */ @@ -386,16 +465,18 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem) if (elem->next != NULL && elem->next->state == ELEM_FREE && next_elem_is_adjacent(elem)) { void *erase; + size_t erase_len; /* we will want to erase the trailer and header */ erase = RTE_PTR_SUB(elem->next, MALLOC_ELEM_TRAILER_LEN); + erase_len = MALLOC_ELEM_OVERHEAD + elem->next->pad; /* remove from free list, join to this one */ malloc_elem_free_list_remove(elem->next); join_elem(elem, elem->next); - /* erase header and trailer */ - memset(erase, 0, MALLOC_ELEM_OVERHEAD); + /* erase header, trailer and pad */ + memset(erase, 0, erase_len); } /* @@ -406,9 +487,11 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem) prev_elem_is_adjacent(elem)) { struct malloc_elem *new_elem; void *erase; + size_t erase_len; /* we will want to erase trailer and header */ erase = RTE_PTR_SUB(elem, MALLOC_ELEM_TRAILER_LEN); + erase_len = MALLOC_ELEM_OVERHEAD + elem->pad; /* remove from free list, join to this one */ malloc_elem_free_list_remove(elem->prev); @@ -416,8 +499,8 @@ malloc_elem_join_adjacent_free(struct malloc_elem *elem) new_elem = elem->prev; join_elem(new_elem, elem); - /* erase header and trailer */ - memset(erase, 0, MALLOC_ELEM_OVERHEAD); + /* erase header, trailer and pad */ + memset(erase, 0, erase_len); elem = new_elem; } @@ -436,7 +519,7 @@ malloc_elem_free(struct malloc_elem *elem) void *ptr; size_t data_len; - ptr = RTE_PTR_ADD(elem, sizeof(*elem)); + ptr = RTE_PTR_ADD(elem, MALLOC_ELEM_HEADER_LEN); data_len = elem->size - MALLOC_ELEM_OVERHEAD; elem = malloc_elem_join_adjacent_free(elem); diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h index 7331af9c..e2bda4c0 100644 --- a/lib/librte_eal/common/malloc_elem.h +++ b/lib/librte_eal/common/malloc_elem.h @@ -179,4 +179,10 @@ malloc_elem_free_list_index(size_t size); void malloc_elem_free_list_insert(struct malloc_elem *elem); +/* + * Find biggest IOVA-contiguous zone within an element with specified alignment. + */ +size_t +malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align); + #endif /* MALLOC_ELEM_H_ */ diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c index d6cf3af8..12aaf2d7 100644 --- a/lib/librte_eal/common/malloc_heap.c +++ b/lib/librte_eal/common/malloc_heap.c @@ -149,6 +149,52 @@ find_suitable_element(struct malloc_heap *heap, size_t size, } /* + * Iterates through the freelist for a heap to find a free element with the + * biggest size and requested alignment. Will also set size to whatever element + * size that was found. + * Returns null on failure, or pointer to element on success. + */ +static struct malloc_elem * +find_biggest_element(struct malloc_heap *heap, size_t *size, + unsigned int flags, size_t align, bool contig) +{ + struct malloc_elem *elem, *max_elem = NULL; + size_t idx, max_size = 0; + + for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) { + for (elem = LIST_FIRST(&heap->free_head[idx]); + !!elem; elem = LIST_NEXT(elem, free_list)) { + size_t cur_size; + if (!check_hugepage_sz(flags, elem->msl->page_sz)) + continue; + if (contig) { + cur_size = + malloc_elem_find_max_iova_contig(elem, + align); + } else { + void *data_start = RTE_PTR_ADD(elem, + MALLOC_ELEM_HEADER_LEN); + void *data_end = RTE_PTR_ADD(elem, elem->size - + MALLOC_ELEM_TRAILER_LEN); + void *aligned = RTE_PTR_ALIGN_CEIL(data_start, + align); + /* check if aligned data start is beyond end */ + if (aligned >= data_end) + continue; + cur_size = RTE_PTR_DIFF(data_end, aligned); + } + if (cur_size > max_size) { + max_size = cur_size; + max_elem = elem; + } + } + } + + *size = max_size; + return max_elem; +} + +/* * Main function to allocate a block of memory from the heap. * It locks the free list, scans it, and adds a new memseg if the * scan fails. Once the new memseg is added, it re-scans and should return @@ -174,6 +220,26 @@ heap_alloc(struct malloc_heap *heap, const char *type __rte_unused, size_t size, return elem == NULL ? NULL : (void *)(&elem[1]); } +static void * +heap_alloc_biggest(struct malloc_heap *heap, const char *type __rte_unused, + unsigned int flags, size_t align, bool contig) +{ + struct malloc_elem *elem; + size_t size; + + align = RTE_CACHE_LINE_ROUNDUP(align); + + elem = find_biggest_element(heap, &size, flags, align, contig); + if (elem != NULL) { + elem = malloc_elem_alloc(elem, size, align, 0, contig); + + /* increase heap's count of allocated elements */ + heap->alloc_count++; + } + + return elem == NULL ? NULL : (void *)(&elem[1]); +} + /* this function is exposed in malloc_mp.h */ void rollback_expand_heap(struct rte_memseg **ms, int n_segs, @@ -575,6 +641,66 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg, return NULL; } +static void * +heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags, + size_t align, bool contig) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct malloc_heap *heap = &mcfg->malloc_heaps[socket]; + void *ret; + + rte_spinlock_lock(&(heap->lock)); + + align = align == 0 ? 1 : align; + + ret = heap_alloc_biggest(heap, type, flags, align, contig); + + rte_spinlock_unlock(&(heap->lock)); + + return ret; +} + +void * +malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags, + size_t align, bool contig) +{ + int socket, i, cur_socket; + void *ret; + + /* return NULL if align is not power-of-2 */ + if ((align && !rte_is_power_of_2(align))) + return NULL; + + if (!rte_eal_has_hugepages()) + socket_arg = SOCKET_ID_ANY; + + if (socket_arg == SOCKET_ID_ANY) + socket = malloc_get_numa_socket(); + else + socket = socket_arg; + + /* Check socket parameter */ + if (socket >= RTE_MAX_NUMA_NODES) + return NULL; + + ret = heap_alloc_biggest_on_socket(type, socket, flags, align, + contig); + if (ret != NULL || socket_arg != SOCKET_ID_ANY) + return ret; + + /* try other heaps */ + for (i = 0; i < (int) rte_socket_count(); i++) { + cur_socket = rte_socket_id_by_idx(i); + if (cur_socket == socket) + continue; + ret = heap_alloc_biggest_on_socket(type, cur_socket, flags, + align, contig); + if (ret != NULL) + return ret; + } + return NULL; +} + /* this function is exposed in malloc_mp.h */ int malloc_heap_free_pages(void *aligned_start, size_t aligned_len) diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h index 03b80141..f52cb555 100644 --- a/lib/librte_eal/common/malloc_heap.h +++ b/lib/librte_eal/common/malloc_heap.h @@ -29,6 +29,10 @@ void * malloc_heap_alloc(const char *type, size_t size, int socket, unsigned int flags, size_t align, size_t bound, bool contig); +void * +malloc_heap_alloc_biggest(const char *type, int socket, unsigned int flags, + size_t align, bool contig); + int malloc_heap_free(struct malloc_elem *elem); diff --git a/lib/librte_eal/common/meson.build b/lib/librte_eal/common/meson.build index 8a3dcfee..56005bea 100644 --- a/lib/librte_eal/common/meson.build +++ b/lib/librte_eal/common/meson.build @@ -8,6 +8,7 @@ common_objs = [] common_sources = files( 'eal_common_bus.c', 'eal_common_cpuflags.c', + 'eal_common_class.c', 'eal_common_devargs.c', 'eal_common_dev.c', 'eal_common_errno.c', @@ -25,6 +26,7 @@ common_sources = files( 'eal_common_tailqs.c', 'eal_common_thread.c', 'eal_common_timer.c', + 'eal_common_uuid.c', 'malloc_elem.c', 'malloc_heap.c', 'malloc_mp.c', @@ -46,6 +48,7 @@ common_headers = files( 'include/rte_branch_prediction.h', 'include/rte_bus.h', 'include/rte_bitmap.h', + 'include/rte_class.h', 'include/rte_common.h', 'include/rte_debug.h', 'include/rte_devargs.h', @@ -75,6 +78,7 @@ common_headers = files( 'include/rte_string_fns.h', 'include/rte_tailq.h', 'include/rte_time.h', + 'include/rte_uuid.h', 'include/rte_version.h') # special case install the generic headers, since they go in a subdir diff --git a/lib/librte_eal/common/rte_service.c b/lib/librte_eal/common/rte_service.c index 73507aac..8767c722 100644 --- a/lib/librte_eal/common/rte_service.c +++ b/lib/librte_eal/common/rte_service.c @@ -52,6 +52,7 @@ struct rte_service_spec_impl { rte_atomic32_t num_mapped_cores; uint64_t calls; uint64_t cycles_spent; + uint8_t active_on_lcore[RTE_MAX_LCORE]; } __rte_cache_aligned; /* the internal values of a service core */ @@ -61,7 +62,7 @@ struct core_state { uint8_t runstate; /* running or stopped */ uint8_t is_service_core; /* set if core is currently a service core */ - /* extreme statistics */ + uint64_t loops; uint64_t calls_per_service[RTE_SERVICE_NUM_MAX]; } __rte_cache_aligned; @@ -347,15 +348,19 @@ rte_service_runner_do_callback(struct rte_service_spec_impl *s, static inline int32_t -service_run(uint32_t i, struct core_state *cs, uint64_t service_mask) +service_run(uint32_t i, int lcore, struct core_state *cs, uint64_t service_mask) { if (!service_valid(i)) return -EINVAL; struct rte_service_spec_impl *s = &rte_services[i]; if (s->comp_runstate != RUNSTATE_RUNNING || s->app_runstate != RUNSTATE_RUNNING || - !(service_mask & (UINT64_C(1) << i))) + !(service_mask & (UINT64_C(1) << i))) { + s->active_on_lcore[lcore] = 0; return -ENOEXEC; + } + + s->active_on_lcore[lcore] = 1; /* check do we need cmpset, if MT safe or <= 1 core * mapped, atomic ops are not required. @@ -374,6 +379,25 @@ service_run(uint32_t i, struct core_state *cs, uint64_t service_mask) return 0; } +int32_t __rte_experimental +rte_service_may_be_active(uint32_t id) +{ + uint32_t ids[RTE_MAX_LCORE] = {0}; + struct rte_service_spec_impl *s = &rte_services[id]; + int32_t lcore_count = rte_service_lcore_list(ids, RTE_MAX_LCORE); + int i; + + if (!service_valid(id)) + return -EINVAL; + + for (i = 0; i < lcore_count; i++) { + if (s->active_on_lcore[ids[i]]) + return 1; + } + + return 0; +} + int32_t rte_service_run_iter_on_app_lcore(uint32_t id, uint32_t serialize_mt_unsafe) { @@ -398,7 +422,7 @@ int32_t rte_service_run_iter_on_app_lcore(uint32_t id, return -EBUSY; } - int ret = service_run(id, cs, UINT64_MAX); + int ret = service_run(id, rte_lcore_id(), cs, UINT64_MAX); if (serialize_mt_unsafe) rte_atomic32_dec(&s->num_mapped_cores); @@ -419,9 +443,11 @@ rte_service_runner_func(void *arg) for (i = 0; i < RTE_SERVICE_NUM_MAX; i++) { /* return value ignored as no change to code flow */ - service_run(i, cs, service_mask); + service_run(i, lcore, cs, service_mask); } + cs->loops++; + rte_smp_rmb(); } @@ -729,6 +755,28 @@ rte_service_attr_get(uint32_t id, uint32_t attr_id, uint32_t *attr_value) } } +int32_t __rte_experimental +rte_service_lcore_attr_get(uint32_t lcore, uint32_t attr_id, + uint64_t *attr_value) +{ + struct core_state *cs; + + if (lcore >= RTE_MAX_LCORE || !attr_value) + return -EINVAL; + + cs = &lcore_states[lcore]; + if (!cs->is_service_core) + return -ENOTSUP; + + switch (attr_id) { + case RTE_SERVICE_LCORE_ATTR_LOOPS: + *attr_value = cs->loops; + return 0; + default: + return -EINVAL; + } +} + static void rte_service_dump_one(FILE *f, struct rte_service_spec_impl *s, uint64_t all_cycles, uint32_t reset) @@ -764,6 +812,23 @@ rte_service_attr_reset_all(uint32_t id) return 0; } +int32_t __rte_experimental +rte_service_lcore_attr_reset_all(uint32_t lcore) +{ + struct core_state *cs; + + if (lcore >= RTE_MAX_LCORE) + return -EINVAL; + + cs = &lcore_states[lcore]; + if (!cs->is_service_core) + return -ENOTSUP; + + cs->loops = 0; + + return 0; +} + static void service_dump_calls_per_lcore(FILE *f, uint32_t lcore, uint32_t reset) { |