summaryrefslogtreecommitdiffstats
path: root/src/dpdk_lib18/librte_eal/common/eal_common_memzone.c
diff options
context:
space:
mode:
authorHanoh Haim <hhaim@cisco.com>2015-06-24 14:03:29 +0300
committerHanoh Haim <hhaim@cisco.com>2015-06-24 14:03:29 +0300
commit8b52a31ed2c299b759f330c4f976b9c70f5765f4 (patch)
tree9d6da5438b5b56b1d2d57e6c13494b4e65d000e7 /src/dpdk_lib18/librte_eal/common/eal_common_memzone.c
first version
Diffstat (limited to 'src/dpdk_lib18/librte_eal/common/eal_common_memzone.c')
-rwxr-xr-xsrc/dpdk_lib18/librte_eal/common/eal_common_memzone.c533
1 files changed, 533 insertions, 0 deletions
diff --git a/src/dpdk_lib18/librte_eal/common/eal_common_memzone.c b/src/dpdk_lib18/librte_eal/common/eal_common_memzone.c
new file mode 100755
index 00000000..b5a5d727
--- /dev/null
+++ b/src/dpdk_lib18/librte_eal/common/eal_common_memzone.c
@@ -0,0 +1,533 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_eal_memconfig.h>
+#include <rte_per_lcore.h>
+#include <rte_errno.h>
+#include <rte_string_fns.h>
+#include <rte_common.h>
+
+#include "eal_private.h"
+
+/* internal copy of free memory segments */
+static struct rte_memseg *free_memseg = NULL;
+
+static inline const struct rte_memzone *
+memzone_lookup_thread_unsafe(const char *name)
+{
+ const struct rte_mem_config *mcfg;
+ unsigned i = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /*
+ * the algorithm is not optimal (linear), but there are few
+ * zones and this function should be called at init only
+ */
+ for (i = 0; i < RTE_MAX_MEMZONE && mcfg->memzone[i].addr != NULL; i++) {
+ if (!strncmp(name, mcfg->memzone[i].name, RTE_MEMZONE_NAMESIZE))
+ return &mcfg->memzone[i];
+ }
+
+ return NULL;
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor. If the
+ * allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve(const char *name, size_t len, int socket_id,
+ unsigned flags)
+{
+ return rte_memzone_reserve_aligned(name,
+ len, socket_id, flags, RTE_CACHE_LINE_SIZE);
+}
+
+/*
+ * Helper function for memzone_reserve_aligned_thread_unsafe().
+ * Calculate address offset from the start of the segment.
+ * Align offset in that way that it satisfy istart alignmnet and
+ * buffer of the requested length would not cross specified boundary.
+ */
+static inline phys_addr_t
+align_phys_boundary(const struct rte_memseg *ms, size_t len, size_t align,
+ size_t bound)
+{
+ phys_addr_t addr_offset, bmask, end, start;
+ size_t step;
+
+ step = RTE_MAX(align, bound);
+ bmask = ~((phys_addr_t)bound - 1);
+
+ /* calculate offset to closest alignment */
+ start = RTE_ALIGN_CEIL(ms->phys_addr, align);
+ addr_offset = start - ms->phys_addr;
+
+ while (addr_offset + len < ms->len) {
+
+ /* check, do we meet boundary condition */
+ end = start + len - (len != 0);
+ if ((start & bmask) == (end & bmask))
+ break;
+
+ /* calculate next offset */
+ start = RTE_ALIGN_CEIL(start + 1, step);
+ addr_offset = start - ms->phys_addr;
+ }
+
+ return (addr_offset);
+}
+
+static const struct rte_memzone *
+memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
+ int socket_id, unsigned flags, unsigned align, unsigned bound)
+{
+ struct rte_mem_config *mcfg;
+ unsigned i = 0;
+ int memseg_idx = -1;
+ uint64_t addr_offset, seg_offset = 0;
+ size_t requested_len;
+ size_t memseg_len = 0;
+ phys_addr_t memseg_physaddr;
+ void *memseg_addr;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* no more room in config */
+ if (mcfg->memzone_idx >= RTE_MAX_MEMZONE) {
+ RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__);
+ rte_errno = ENOSPC;
+ return NULL;
+ }
+
+ /* zone already exist */
+ if ((memzone_lookup_thread_unsafe(name)) != NULL) {
+ RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n",
+ __func__, name);
+ rte_errno = EEXIST;
+ return NULL;
+ }
+
+ /* if alignment is not a power of two */
+ if (!rte_is_power_of_2(align)) {
+ RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__,
+ align);
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ /* alignment less than cache size is not allowed */
+ if (align < RTE_CACHE_LINE_SIZE)
+ align = RTE_CACHE_LINE_SIZE;
+
+
+ /* align length on cache boundary. Check for overflow before doing so */
+ if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) {
+ rte_errno = EINVAL; /* requested size too big */
+ return NULL;
+ }
+
+ len += RTE_CACHE_LINE_MASK;
+ len &= ~((size_t) RTE_CACHE_LINE_MASK);
+
+ /* save minimal requested length */
+ requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len);
+
+ /* check that boundary condition is valid */
+ if (bound != 0 &&
+ (requested_len > bound || !rte_is_power_of_2(bound))) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ /* find the smallest segment matching requirements */
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ /* last segment */
+ if (free_memseg[i].addr == NULL)
+ break;
+
+ /* empty segment, skip it */
+ if (free_memseg[i].len == 0)
+ continue;
+
+ /* bad socket ID */
+ if (socket_id != SOCKET_ID_ANY &&
+ free_memseg[i].socket_id != SOCKET_ID_ANY &&
+ socket_id != free_memseg[i].socket_id)
+ continue;
+
+ /*
+ * calculate offset to closest alignment that
+ * meets boundary conditions.
+ */
+ addr_offset = align_phys_boundary(free_memseg + i,
+ requested_len, align, bound);
+
+ /* check len */
+ if ((requested_len + addr_offset) > free_memseg[i].len)
+ continue;
+
+ /* check flags for hugepage sizes */
+ if ((flags & RTE_MEMZONE_2MB) &&
+ free_memseg[i].hugepage_sz == RTE_PGSIZE_1G)
+ continue;
+ if ((flags & RTE_MEMZONE_1GB) &&
+ free_memseg[i].hugepage_sz == RTE_PGSIZE_2M)
+ continue;
+ if ((flags & RTE_MEMZONE_16MB) &&
+ free_memseg[i].hugepage_sz == RTE_PGSIZE_16G)
+ continue;
+ if ((flags & RTE_MEMZONE_16GB) &&
+ free_memseg[i].hugepage_sz == RTE_PGSIZE_16M)
+ continue;
+
+ /* this segment is the best until now */
+ if (memseg_idx == -1) {
+ memseg_idx = i;
+ memseg_len = free_memseg[i].len;
+ seg_offset = addr_offset;
+ }
+ /* find the biggest contiguous zone */
+ else if (len == 0) {
+ if (free_memseg[i].len > memseg_len) {
+ memseg_idx = i;
+ memseg_len = free_memseg[i].len;
+ seg_offset = addr_offset;
+ }
+ }
+ /*
+ * find the smallest (we already checked that current
+ * zone length is > len
+ */
+ else if (free_memseg[i].len + align < memseg_len ||
+ (free_memseg[i].len <= memseg_len + align &&
+ addr_offset < seg_offset)) {
+ memseg_idx = i;
+ memseg_len = free_memseg[i].len;
+ seg_offset = addr_offset;
+ }
+ }
+
+ /* no segment found */
+ if (memseg_idx == -1) {
+ /*
+ * If RTE_MEMZONE_SIZE_HINT_ONLY flag is specified,
+ * try allocating again without the size parameter otherwise -fail.
+ */
+ if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) &&
+ ((flags & RTE_MEMZONE_1GB) || (flags & RTE_MEMZONE_2MB)
+ || (flags & RTE_MEMZONE_16MB) || (flags & RTE_MEMZONE_16GB)))
+ return memzone_reserve_aligned_thread_unsafe(name,
+ len, socket_id, 0, align, bound);
+
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+
+ /* save aligned physical and virtual addresses */
+ memseg_physaddr = free_memseg[memseg_idx].phys_addr + seg_offset;
+ memseg_addr = RTE_PTR_ADD(free_memseg[memseg_idx].addr,
+ (uintptr_t) seg_offset);
+
+ /* if we are looking for a biggest memzone */
+ if (len == 0) {
+ if (bound == 0)
+ requested_len = memseg_len - seg_offset;
+ else
+ requested_len = RTE_ALIGN_CEIL(memseg_physaddr + 1,
+ bound) - memseg_physaddr;
+ }
+
+ /* set length to correct value */
+ len = (size_t)seg_offset + requested_len;
+
+ /* update our internal state */
+ free_memseg[memseg_idx].len -= len;
+ free_memseg[memseg_idx].phys_addr += len;
+ free_memseg[memseg_idx].addr =
+ (char *)free_memseg[memseg_idx].addr + len;
+
+ /* fill the zone in config */
+ struct rte_memzone *mz = &mcfg->memzone[mcfg->memzone_idx++];
+ snprintf(mz->name, sizeof(mz->name), "%s", name);
+ mz->phys_addr = memseg_physaddr;
+ mz->addr = memseg_addr;
+ mz->len = requested_len;
+ mz->hugepage_sz = free_memseg[memseg_idx].hugepage_sz;
+ mz->socket_id = free_memseg[memseg_idx].socket_id;
+ mz->flags = 0;
+ mz->memseg_id = memseg_idx;
+
+ return mz;
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment). If the allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve_aligned(const char *name, size_t len,
+ int socket_id, unsigned flags, unsigned align)
+{
+ struct rte_mem_config *mcfg;
+ const struct rte_memzone *mz = NULL;
+
+ /* both sizes cannot be explicitly called for */
+ if (((flags & RTE_MEMZONE_1GB) && (flags & RTE_MEMZONE_2MB))
+ || ((flags & RTE_MEMZONE_16MB) && (flags & RTE_MEMZONE_16GB))) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_write_lock(&mcfg->mlock);
+
+ mz = memzone_reserve_aligned_thread_unsafe(
+ name, len, socket_id, flags, align, 0);
+
+ rte_rwlock_write_unlock(&mcfg->mlock);
+
+ return mz;
+}
+
+/*
+ * Return a pointer to a correctly filled memzone descriptor (with a
+ * specified alignment and boundary).
+ * If the allocation cannot be done, return NULL.
+ */
+const struct rte_memzone *
+rte_memzone_reserve_bounded(const char *name, size_t len,
+ int socket_id, unsigned flags, unsigned align, unsigned bound)
+{
+ struct rte_mem_config *mcfg;
+ const struct rte_memzone *mz = NULL;
+
+ /* both sizes cannot be explicitly called for */
+ if (((flags & RTE_MEMZONE_1GB) && (flags & RTE_MEMZONE_2MB))
+ || ((flags & RTE_MEMZONE_16MB) && (flags & RTE_MEMZONE_16GB))) {
+ rte_errno = EINVAL;
+ return NULL;
+ }
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_write_lock(&mcfg->mlock);
+
+ mz = memzone_reserve_aligned_thread_unsafe(
+ name, len, socket_id, flags, align, bound);
+
+ rte_rwlock_write_unlock(&mcfg->mlock);
+
+ return mz;
+}
+
+
+/*
+ * Lookup for the memzone identified by the given name
+ */
+const struct rte_memzone *
+rte_memzone_lookup(const char *name)
+{
+ struct rte_mem_config *mcfg;
+ const struct rte_memzone *memzone = NULL;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_read_lock(&mcfg->mlock);
+
+ memzone = memzone_lookup_thread_unsafe(name);
+
+ rte_rwlock_read_unlock(&mcfg->mlock);
+
+ return memzone;
+}
+
+/* Dump all reserved memory zones on console */
+void
+rte_memzone_dump(FILE *f)
+{
+ struct rte_mem_config *mcfg;
+ unsigned i = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_read_lock(&mcfg->mlock);
+ /* dump all zones */
+ for (i=0; i<RTE_MAX_MEMZONE; i++) {
+ if (mcfg->memzone[i].addr == NULL)
+ break;
+ fprintf(f, "Zone %u: name:<%s>, phys:0x%"PRIx64", len:0x%zx"
+ ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i,
+ mcfg->memzone[i].name,
+ mcfg->memzone[i].phys_addr,
+ mcfg->memzone[i].len,
+ mcfg->memzone[i].addr,
+ mcfg->memzone[i].socket_id,
+ mcfg->memzone[i].flags);
+ }
+ rte_rwlock_read_unlock(&mcfg->mlock);
+}
+
+/*
+ * called by init: modify the free memseg list to have cache-aligned
+ * addresses and cache-aligned lengths
+ */
+static int
+memseg_sanitize(struct rte_memseg *memseg)
+{
+ unsigned phys_align;
+ unsigned virt_align;
+ unsigned off;
+
+ phys_align = memseg->phys_addr & RTE_CACHE_LINE_MASK;
+ virt_align = (unsigned long)memseg->addr & RTE_CACHE_LINE_MASK;
+
+ /*
+ * sanity check: phys_addr and addr must have the same
+ * alignment
+ */
+ if (phys_align != virt_align)
+ return -1;
+
+ /* memseg is really too small, don't bother with it */
+ if (memseg->len < (2 * RTE_CACHE_LINE_SIZE)) {
+ memseg->len = 0;
+ return 0;
+ }
+
+ /* align start address */
+ off = (RTE_CACHE_LINE_SIZE - phys_align) & RTE_CACHE_LINE_MASK;
+ memseg->phys_addr += off;
+ memseg->addr = (char *)memseg->addr + off;
+ memseg->len -= off;
+
+ /* align end address */
+ memseg->len &= ~((uint64_t)RTE_CACHE_LINE_MASK);
+
+ return 0;
+}
+
+/*
+ * Init the memzone subsystem
+ */
+int
+rte_eal_memzone_init(void)
+{
+ struct rte_mem_config *mcfg;
+ const struct rte_memseg *memseg;
+ unsigned i = 0;
+
+ /* get pointer to global configuration */
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ /* mirror the runtime memsegs from config */
+ free_memseg = mcfg->free_memseg;
+
+ /* secondary processes don't need to initialise anything */
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+ return 0;
+
+ memseg = rte_eal_get_physmem_layout();
+ if (memseg == NULL) {
+ RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__);
+ return -1;
+ }
+
+ rte_rwlock_write_lock(&mcfg->mlock);
+
+ /* fill in uninitialized free_memsegs */
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ if (memseg[i].addr == NULL)
+ break;
+ if (free_memseg[i].addr != NULL)
+ continue;
+ memcpy(&free_memseg[i], &memseg[i], sizeof(struct rte_memseg));
+ }
+
+ /* make all zones cache-aligned */
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ if (free_memseg[i].addr == NULL)
+ break;
+ if (memseg_sanitize(&free_memseg[i]) < 0) {
+ RTE_LOG(ERR, EAL, "%s(): Sanity check failed\n", __func__);
+ rte_rwlock_write_unlock(&mcfg->mlock);
+ return -1;
+ }
+ }
+
+ /* delete all zones */
+ mcfg->memzone_idx = 0;
+ memset(mcfg->memzone, 0, sizeof(mcfg->memzone));
+
+ rte_rwlock_write_unlock(&mcfg->mlock);
+
+ return 0;
+}
+
+/* Walk all reserved memory zones */
+void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *),
+ void *arg)
+{
+ struct rte_mem_config *mcfg;
+ unsigned i;
+
+ mcfg = rte_eal_get_configuration()->mem_config;
+
+ rte_rwlock_read_lock(&mcfg->mlock);
+ for (i=0; i<RTE_MAX_MEMZONE; i++) {
+ if (mcfg->memzone[i].addr != NULL)
+ (*func)(&mcfg->memzone[i], arg);
+ }
+ rte_rwlock_read_unlock(&mcfg->mlock);
+}