summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2020-09-11 22:16:53 +0200
committerDamjan Marion <damarion@cisco.com>2020-09-17 12:38:41 +0200
commit6bfd07670b991c30761ef74fb09f42181dbfd182 (patch)
treef82fa63e1b3ec6e4d31827efb553516936392288 /src
parentc63e2a4f980e09b4274558f0562cee285f9741b5 (diff)
vppinfra: support main heap with different page sizes
Type: improvement Change-Id: I381fc3dec8580208d0e24637d791af69011aa83b Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src')
-rw-r--r--src/plugins/dpdk/main.c2
-rw-r--r--src/vlib/cli.c45
-rw-r--r--src/vlib/linux/pci.c8
-rw-r--r--src/vppinfra/linux/mem.c271
-rw-r--r--src/vppinfra/mem.c50
-rw-r--r--src/vppinfra/mem.h153
-rw-r--r--src/vppinfra/mem_dlmalloc.c30
-rw-r--r--src/vppinfra/pmalloc.c18
-rw-r--r--src/vppinfra/pmalloc.h5
9 files changed, 498 insertions, 84 deletions
diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c
index a1ea9e32c93..e570cfb4d97 100644
--- a/src/plugins/dpdk/main.c
+++ b/src/plugins/dpdk/main.c
@@ -110,7 +110,7 @@ dpdk_early_init (vlib_main_t *vm)
/* check if pagemap is accessible - if we get zero result
dpdk will not be able to get physical memory address and game is over
unless we have IOMMU */
- pt = clib_mem_vm_get_paddr (&pt, min_log2 (sysconf (_SC_PAGESIZE)), 1);
+ pt = clib_mem_vm_get_paddr (&pt, CLIB_MEM_PAGE_SZ_DEFAULT, 1);
if (pt && pt[0])
goto check_hugetlb;
diff --git a/src/vlib/cli.c b/src/vlib/cli.c
index 2bdc98c71ca..223a3b71eea 100644
--- a/src/vlib/cli.c
+++ b/src/vlib/cli.c
@@ -742,6 +742,7 @@ show_memory_usage (vlib_main_t * vm,
clib_mem_main_t *mm = &clib_mem_main;
int verbose __attribute__ ((unused)) = 0;
int api_segment = 0, stats_segment = 0, main_heap = 0, numa_heaps = 0;
+ int map = 0;
clib_error_t *error;
u32 index = 0;
int i;
@@ -761,6 +762,8 @@ show_memory_usage (vlib_main_t * vm,
main_heap = 1;
else if (unformat (input, "numa-heaps"))
numa_heaps = 1;
+ else if (unformat (input, "map"))
+ map = 1;
else
{
error = clib_error_return (0, "unknown input `%U'",
@@ -769,9 +772,10 @@ show_memory_usage (vlib_main_t * vm,
}
}
- if ((api_segment + stats_segment + main_heap + numa_heaps) == 0)
+ if ((api_segment + stats_segment + main_heap + numa_heaps + map) == 0)
return clib_error_return
- (0, "Need one of api-segment, stats-segment, main-heap or numa-heaps");
+ (0, "Need one of api-segment, stats-segment, main-heap, numa-heaps "
+ "or map");
if (api_segment)
{
@@ -871,6 +875,41 @@ show_memory_usage (vlib_main_t * vm,
mm->per_numa_mheaps[index], verbose);
}
}
+ if (map)
+ {
+ clib_mem_page_stats_t stats = { };
+ clib_mem_vm_map_hdr_t *hdr = 0;
+ u8 *s = 0;
+ int numa = -1;
+
+ s = format (s, "\n%-16s%7s%7s%7s",
+ "StartAddr", "size", "PageSz", "Pages");
+ while ((numa = vlib_mem_get_next_numa_node (numa)) != -1)
+ s = format (s, " Numa%u", numa);
+ s = format (s, " NotMap");
+ s = format (s, " Name");
+ vlib_cli_output (vm, "%v", s);
+ vec_reset_length (s);
+
+ while ((hdr = clib_mem_vm_get_next_map_hdr (hdr)))
+ {
+ clib_mem_get_page_stats ((void *) hdr->base_addr,
+ hdr->log2_page_sz, hdr->num_pages,
+ &stats);
+ s = format (s, "%016lx%7U%7U%7lu",
+ hdr->base_addr, format_memory_size,
+ hdr->num_pages << hdr->log2_page_sz,
+ format_log2_page_size, hdr->log2_page_sz,
+ hdr->num_pages);
+ while ((numa = vlib_mem_get_next_numa_node (numa)) != -1)
+ s = format (s, "%6lu", stats.per_numa[numa]);
+ s = format (s, "%7lu", stats.not_mapped);
+ s = format (s, " %s", hdr->name);
+ vlib_cli_output (vm, "%v", s);
+ vec_reset_length (s);
+ }
+ vec_free (s);
+ }
}
return 0;
}
@@ -879,7 +918,7 @@ show_memory_usage (vlib_main_t * vm,
VLIB_CLI_COMMAND (show_memory_usage_command, static) = {
.path = "show memory",
.short_help = "show memory [api-segment][stats-segment][verbose]\n"
- " [numa-heaps]",
+ " [numa-heaps][map]",
.function = show_memory_usage,
};
/* *INDENT-ON* */
diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c
index 168acde7288..a010e64fa13 100644
--- a/src/vlib/linux/pci.c
+++ b/src/vlib/linux/pci.c
@@ -1132,8 +1132,10 @@ vlib_pci_map_region_int (vlib_main_t * vm, vlib_pci_dev_handle_t h,
if (p->type == LINUX_PCI_DEVICE_TYPE_UIO && addr != 0)
flags |= MAP_FIXED;
- *result = mmap (addr, size, PROT_READ | PROT_WRITE, flags, fd, offset);
- if (*result == (void *) -1)
+ *result = clib_mem_vm_map_shared (addr, size, fd, offset,
+ "PCIe %U region %u", format_vlib_pci_addr,
+ vlib_pci_get_addr (vm, h), bar);
+ if (*result == CLIB_MEM_VM_MAP_FAILED)
{
error = clib_error_return_unix (0, "mmap `BAR%u'", bar);
if (p->type == LINUX_PCI_DEVICE_TYPE_UIO && (fd != -1))
@@ -1337,7 +1339,7 @@ vlib_pci_device_close (vlib_main_t * vm, vlib_pci_dev_handle_t h)
{
if (res->size == 0)
continue;
- munmap (res->addr, res->size);
+ clib_mem_vm_unmap (res->addr);
if (res->fd != -1)
close (res->fd);
}
diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c
index 42efc007ceb..96fb0db5b73 100644
--- a/src/vppinfra/linux/mem.c
+++ b/src/vppinfra/linux/mem.c
@@ -50,11 +50,13 @@
#define MFD_HUGETLB 0x0004U
#endif
-uword
-clib_mem_get_page_size (void)
-{
- return getpagesize ();
-}
+#ifndef MAP_HUGE_SHIFT
+#define MAP_HUGE_SHIFT 26
+#endif
+
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE 0x100000
+#endif
uword
clib_mem_get_default_hugepage_size (void)
@@ -166,10 +168,11 @@ clib_mem_get_fd_page_size (int fd)
return st.st_blksize;
}
-int
+clib_mem_page_sz_t
clib_mem_get_fd_log2_page_size (int fd)
{
- return min_log2 (clib_mem_get_fd_page_size (fd));
+ uword page_size = clib_mem_get_fd_page_size (fd);
+ return page_size ? min_log2 (page_size) : CLIB_MEM_PAGE_SZ_UNKNOWN;
}
void
@@ -414,43 +417,263 @@ clib_mem_vm_ext_free (clib_mem_vm_alloc_t * a)
uword
clib_mem_vm_reserve (uword start, uword size, clib_mem_page_sz_t log2_page_sz)
{
- uword off, pagesize = 1ULL << log2_page_sz;
- int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS;
- u8 *p;
-
- if (start)
- mmap_flags |= MAP_FIXED;
+ clib_mem_main_t *mm = &clib_mem_main;
+ uword pagesize = 1ULL << log2_page_sz;
+ uword sys_page_sz = 1ULL << mm->log2_page_sz;
+ uword n_bytes;
+ void *base = 0, *p;
size = round_pow2 (size, pagesize);
- p = uword_to_pointer (start, void *);
- p = mmap (p, size + pagesize, PROT_NONE, mmap_flags, -1, 0);
+ /* in adition of requested reservation, we also rserve one system page
+ * (typically 4K) adjacent to the start off reservation */
+
+ if (start)
+ {
+ /* start address is provided, so we just need to make sure we are not
+ * replacing existing map */
+ if (start & pow2_mask (log2_page_sz))
+ return ~0;
+
+ base = (void *) start - sys_page_sz;
+ base = mmap (base, size + sys_page_sz, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
+ return (base == MAP_FAILED) ? ~0 : start;
+ }
+
+ /* to make sure that we get reservation aligned to page_size we need to
+ * request one additional page as mmap will return us address which is
+ * aligned only to system page size */
+ base = mmap (0, size + pagesize, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (p == MAP_FAILED)
+ if (base == MAP_FAILED)
return ~0;
- off = round_pow2 ((uword) p, pagesize) - (uword) p;
+ /* return additional space at the end of allocation */
+ p = base + size + pagesize;
+ n_bytes = (uword) p & pow2_mask (log2_page_sz);
+ if (n_bytes)
+ {
+ p -= n_bytes;
+ munmap (p, n_bytes);
+ }
- /* trim start and end of reservation to be page aligned */
- if (off)
+ /* return additional space at the start of allocation */
+ n_bytes = pagesize - sys_page_sz - n_bytes;
+ if (n_bytes)
{
- munmap (p, off);
- p += off;
+ munmap (base, n_bytes);
+ base += n_bytes;
}
- munmap (p + size, pagesize - off);
+ return (uword) base + sys_page_sz;
+}
- return (uword) p;
+clib_mem_vm_map_hdr_t *
+clib_mem_vm_get_next_map_hdr (clib_mem_vm_map_hdr_t * hdr)
+{
+ clib_mem_main_t *mm = &clib_mem_main;
+ uword sys_page_sz = 1 << mm->log2_page_sz;
+ clib_mem_vm_map_hdr_t *next;
+ if (hdr == 0)
+ {
+ hdr = mm->first_map;
+ if (hdr)
+ mprotect (hdr, sys_page_sz, PROT_READ);
+ return hdr;
+ }
+ next = hdr->next;
+ mprotect (hdr, sys_page_sz, PROT_NONE);
+ if (next)
+ mprotect (next, sys_page_sz, PROT_READ);
+ return next;
}
+void *
+clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz,
+ uword size, int fd, uword offset, char *name)
+{
+ clib_mem_main_t *mm = &clib_mem_main;
+ clib_mem_vm_map_hdr_t *hdr;
+ uword sys_page_sz = 1 << mm->log2_page_sz;
+ int mmap_flags = MAP_FIXED, is_huge = 0;
+
+ if (fd != -1)
+ {
+ mmap_flags |= MAP_SHARED;
+ log2_page_sz = clib_mem_get_fd_log2_page_size (fd);
+ if (log2_page_sz > mm->log2_page_sz)
+ is_huge = 1;
+ }
+ else
+ {
+ mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
+
+ if (log2_page_sz == mm->log2_page_sz)
+ log2_page_sz = CLIB_MEM_PAGE_SZ_DEFAULT;
+
+ switch (log2_page_sz)
+ {
+ case CLIB_MEM_PAGE_SZ_UNKNOWN:
+ /* will fail later */
+ break;
+ case CLIB_MEM_PAGE_SZ_DEFAULT:
+ log2_page_sz = mm->log2_page_sz;
+ break;
+ case CLIB_MEM_PAGE_SZ_DEFAULT_HUGE:
+ mmap_flags |= MAP_HUGETLB;
+ log2_page_sz = mm->log2_default_hugepage_sz;
+ is_huge = 1;
+ break;
+ default:
+ mmap_flags |= MAP_HUGETLB;
+ mmap_flags |= log2_page_sz << MAP_HUGE_SHIFT;
+ is_huge = 1;
+ }
+ }
+
+ if (log2_page_sz == CLIB_MEM_PAGE_SZ_UNKNOWN)
+ return CLIB_MEM_VM_MAP_FAILED;
+
+ size = round_pow2 (size, 1 << log2_page_sz);
+
+ base = (void *) clib_mem_vm_reserve ((uword) base, size, log2_page_sz);
+
+ if (base == (void *) ~0)
+ return CLIB_MEM_VM_MAP_FAILED;
+
+ base = mmap (base, size, PROT_READ | PROT_WRITE, mmap_flags, fd, offset);
+
+ if (base == MAP_FAILED)
+ return CLIB_MEM_VM_MAP_FAILED;
+
+ if (is_huge && (mlock (base, size) != 0))
+ {
+ munmap (base, size);
+ return CLIB_MEM_VM_MAP_FAILED;
+ }
+
+ hdr = mmap (base - sys_page_sz, sys_page_sz, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+
+ if (hdr != base - sys_page_sz)
+ {
+ munmap (base, size);
+ return CLIB_MEM_VM_MAP_FAILED;
+ }
+
+ if (mm->last_map)
+ {
+ mprotect (mm->last_map, sys_page_sz, PROT_READ | PROT_WRITE);
+ mm->last_map->next = hdr;
+ mprotect (mm->last_map, sys_page_sz, PROT_NONE);
+ }
+ else
+ mm->first_map = hdr;
+
+ hdr->next = 0;
+ hdr->prev = mm->last_map;
+ mm->last_map = hdr;
+
+ hdr->base_addr = (uword) base;
+ hdr->log2_page_sz = log2_page_sz;
+ hdr->num_pages = size >> log2_page_sz;
+ snprintf (hdr->name, CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1, "%s", (char *) name);
+ hdr->name[CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1] = 0;
+ mprotect (hdr, sys_page_sz, PROT_NONE);
+
+ CLIB_MEM_UNPOISON (base, size);
+ return base;
+}
+
+int
+clib_mem_vm_unmap (void *base)
+{
+ clib_mem_main_t *mm = &clib_mem_main;
+ uword size, sys_page_sz = 1 << mm->log2_page_sz;
+ clib_mem_vm_map_hdr_t *hdr = base - sys_page_sz;;
+
+ if (mprotect (hdr, sys_page_sz, PROT_READ | PROT_WRITE) != 0)
+ return -1;
+
+ size = hdr->num_pages << hdr->log2_page_sz;
+ if (munmap ((void *) hdr->base_addr, size) != 0)
+ return -1;
+
+ if (hdr->next)
+ {
+ mprotect (hdr->next, sys_page_sz, PROT_READ | PROT_WRITE);
+ hdr->next->prev = hdr->prev;
+ mprotect (hdr->next, sys_page_sz, PROT_NONE);
+ }
+ else
+ mm->last_map = hdr->prev;
+
+ if (hdr->prev)
+ {
+ mprotect (hdr->prev, sys_page_sz, PROT_READ | PROT_WRITE);
+ hdr->prev->next = hdr->next;
+ mprotect (hdr->prev, sys_page_sz, PROT_NONE);
+ }
+ else
+ mm->first_map = hdr->next;
+
+ if (munmap (hdr, sys_page_sz) != 0)
+ return -1;
+
+ return 0;
+}
+
+void
+clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size,
+ uword n_pages, clib_mem_page_stats_t * stats)
+{
+ int i, *status = 0;
+ void **ptr = 0;
+
+ log2_page_size = clib_mem_log2_page_size_validate (log2_page_size);
+
+ vec_validate (status, n_pages - 1);
+ vec_validate (ptr, n_pages - 1);
+
+ for (i = 0; i < n_pages; i++)
+ ptr[i] = start + (i << log2_page_size);
+
+ clib_memset (stats, 0, sizeof (clib_mem_page_stats_t));
+
+ if (move_pages (0, n_pages, ptr, 0, status, 0) != 0)
+ {
+ stats->unknown = n_pages;
+ return;
+ }
+
+ for (i = 0; i < n_pages; i++)
+ {
+ if (status[i] >= 0 && status[i] < CLIB_MAX_NUMAS)
+ {
+ stats->mapped++;
+ stats->per_numa[status[i]]++;
+ }
+ else if (status[i] == -EFAULT)
+ stats->not_mapped++;
+ else
+ stats->unknown++;
+ }
+}
+
+
u64 *
-clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages)
+clib_mem_vm_get_paddr (void *mem, clib_mem_page_sz_t log2_page_size,
+ int n_pages)
{
int pagesize = sysconf (_SC_PAGESIZE);
int fd;
int i;
u64 *r = 0;
+ log2_page_size = clib_mem_log2_page_size_validate (log2_page_size);
+
if ((fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1)
return 0;
diff --git a/src/vppinfra/mem.c b/src/vppinfra/mem.c
index 3477e5f3c17..b417b8503ad 100644
--- a/src/vppinfra/mem.c
+++ b/src/vppinfra/mem.c
@@ -21,6 +21,56 @@
clib_mem_main_t clib_mem_main;
+void *
+clib_mem_vm_map (void *base, uword size, clib_mem_page_sz_t log2_page_sz,
+ char *fmt, ...)
+{
+ va_list va;
+ void *rv;
+ u8 *s;
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ vec_add1 (s, 0);
+ rv = clib_mem_vm_map_internal (base, log2_page_sz, size, -1, 0, (char *) s);
+ va_end (va);
+ vec_free (s);
+ return rv;
+}
+
+void *
+clib_mem_vm_map_stack (uword size, clib_mem_page_sz_t log2_page_sz,
+ char *fmt, ...)
+{
+ va_list va;
+ void *rv;
+ u8 *s;
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ vec_add1 (s, 0);
+ rv = clib_mem_vm_map_internal (0, log2_page_sz, size, -1, 0, (char *) s);
+ va_end (va);
+ vec_free (s);
+ return rv;
+}
+
+void *
+clib_mem_vm_map_shared (void *base, uword size, int fd, uword offset,
+ char *fmt, ...)
+{
+ va_list va;
+ void *rv;
+ u8 *s;
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ vec_add1 (s, 0);
+ rv = clib_mem_vm_map_internal (base, 0, size, fd, offset, (char *) s);
+ va_end (va);
+ vec_free (s);
+ return rv;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h
index f3484cea09d..e6f019cd097 100644
--- a/src/vppinfra/mem.h
+++ b/src/vppinfra/mem.h
@@ -52,7 +52,8 @@
#include <vppinfra/sanitizer.h>
#define CLIB_MAX_MHEAPS 256
-#define CLIB_MAX_NUMAS 8
+#define CLIB_MAX_NUMAS 16
+#define CLIB_MEM_VM_MAP_FAILED ((void *) ~0)
typedef enum
{
@@ -71,6 +72,25 @@ typedef enum
CLIB_MEM_PAGE_SZ_16G = 34,
} clib_mem_page_sz_t;
+typedef struct _clib_mem_vm_map_hdr
+{
+ /* base address */
+ uword base_addr;
+
+ /* number of pages */
+ uword num_pages;
+
+ /* page size (log2) */
+ clib_mem_page_sz_t log2_page_sz;
+
+ /* allocation mame */
+#define CLIB_VM_MAP_HDR_NAME_MAX_LEN 64
+ char name[CLIB_VM_MAP_HDR_NAME_MAX_LEN];
+
+ /* linked list */
+ struct _clib_mem_vm_map_hdr *prev, *next;
+} clib_mem_vm_map_hdr_t;
+
typedef struct
{
/* log2 system page size */
@@ -87,6 +107,9 @@ typedef struct
/* per NUMA heaps */
void *per_numa_mheaps[CLIB_MAX_NUMAS];
+
+ /* memory maps */
+ clib_mem_vm_map_hdr_t *first_map, *last_map;
} clib_mem_main_t;
extern clib_mem_main_t clib_mem_main;
@@ -305,14 +328,14 @@ clib_mem_set_heap (void *heap)
void clib_mem_main_init ();
void *clib_mem_init (void *heap, uword size);
+void *clib_mem_init_with_page_size (uword memory_size,
+ clib_mem_page_sz_t log2_page_sz);
void *clib_mem_init_thread_safe (void *memory, uword memory_size);
void *clib_mem_init_thread_safe_numa (void *memory, uword memory_size,
u8 numa);
void clib_mem_exit (void);
-uword clib_mem_get_page_size (void);
-
void clib_mem_validate (void);
void clib_mem_trace (int enable);
@@ -374,39 +397,18 @@ clib_mem_vm_free (void *addr, uword size)
munmap (addr, size);
}
-always_inline void *
-clib_mem_vm_unmap (void *addr, uword size)
-{
- void *mmap_addr;
- uword flags = MAP_PRIVATE | MAP_FIXED;
-
- /* To unmap we "map" with no protection. If we actually called
- munmap then other callers could steal the address space. By
- changing to PROT_NONE the kernel can free up the pages which is
- really what we want "unmap" to mean. */
- mmap_addr = mmap (addr, size, PROT_NONE, flags, -1, 0);
- if (mmap_addr == (void *) -1)
- mmap_addr = 0;
- else
- CLIB_MEM_UNPOISON (mmap_addr, size);
-
- return mmap_addr;
-}
-
-always_inline void *
-clib_mem_vm_map (void *addr, uword size)
-{
- void *mmap_addr;
- uword flags = MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS;
-
- mmap_addr = mmap (addr, size, (PROT_READ | PROT_WRITE), flags, -1, 0);
- if (mmap_addr == (void *) -1)
- mmap_addr = 0;
- else
- CLIB_MEM_UNPOISON (mmap_addr, size);
+void *clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz,
+ uword size, int fd, uword offset, char *name);
- return mmap_addr;
-}
+void *clib_mem_vm_map (void *start, uword size,
+ clib_mem_page_sz_t log2_page_size, char *fmt, ...);
+void *clib_mem_vm_map_stack (uword size, clib_mem_page_sz_t log2_page_size,
+ char *fmt, ...);
+void *clib_mem_vm_map_shared (void *start, uword size, int fd, uword offset,
+ char *fmt, ...);
+int clib_mem_vm_unmap (void *base);
+clib_mem_vm_map_hdr_t *clib_mem_vm_get_next_map_hdr (clib_mem_vm_map_hdr_t *
+ hdr);
typedef struct
{
@@ -437,16 +439,36 @@ typedef struct
uword requested_va; /**< Request fixed position mapping */
} clib_mem_vm_alloc_t;
+
+static_always_inline clib_mem_page_sz_t
+clib_mem_get_log2_page_size (void)
+{
+ return clib_mem_main.log2_page_sz;
+}
+
+static_always_inline uword
+clib_mem_get_page_size (void)
+{
+ return 1ULL << clib_mem_main.log2_page_sz;
+}
+
+static_always_inline clib_mem_page_sz_t
+clib_mem_get_log2_default_hugepage_size ()
+{
+ return clib_mem_main.log2_default_hugepage_sz;
+}
+
clib_error_t *clib_mem_create_fd (char *name, int *fdp);
clib_error_t *clib_mem_create_hugetlb_fd (char *name, int *fdp);
clib_error_t *clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a);
void clib_mem_vm_ext_free (clib_mem_vm_alloc_t * a);
-u64 clib_mem_get_fd_page_size (int fd);
+uword clib_mem_get_fd_page_size (int fd);
uword clib_mem_get_default_hugepage_size (void);
-int clib_mem_get_fd_log2_page_size (int fd);
+clib_mem_page_sz_t clib_mem_get_fd_log2_page_size (int fd);
uword clib_mem_vm_reserve (uword start, uword size,
clib_mem_page_sz_t log2_page_sz);
-u64 *clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages);
+u64 *clib_mem_vm_get_paddr (void *mem, clib_mem_page_sz_t log2_page_size,
+ int n_pages);
void clib_mem_destroy_mspace (void *mspace);
void clib_mem_destroy (void);
@@ -466,6 +488,61 @@ void mheap_trace (void *v, int enable);
uword clib_mem_trace_enable_disable (uword enable);
void clib_mem_trace (int enable);
+always_inline uword
+clib_mem_round_to_page_size (uword size, clib_mem_page_sz_t log2_page_size)
+{
+ ASSERT (log2_page_size != CLIB_MEM_PAGE_SZ_UNKNOWN);
+
+ if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT)
+ log2_page_size = clib_mem_get_log2_page_size ();
+ else if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT_HUGE)
+ log2_page_size = clib_mem_get_log2_default_hugepage_size ();
+
+ return round_pow2 (size, 1ULL << log2_page_size);
+}
+
+typedef struct
+{
+ uword mapped;
+ uword not_mapped;
+ uword per_numa[CLIB_MAX_NUMAS];
+ uword unknown;
+} clib_mem_page_stats_t;
+
+void clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size,
+ uword n_pages, clib_mem_page_stats_t * stats);
+
+static_always_inline int
+vlib_mem_get_next_numa_node (int numa)
+{
+ clib_mem_main_t *mm = &clib_mem_main;
+ u32 bitmap = mm->numa_node_bitmap;
+
+ if (numa >= 0)
+ bitmap &= ~pow2_mask (numa + 1);
+ if (bitmap == 0)
+ return -1;
+
+ return count_trailing_zeros (bitmap);
+}
+
+static_always_inline clib_mem_page_sz_t
+clib_mem_log2_page_size_validate (clib_mem_page_sz_t log2_page_size)
+{
+ if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT)
+ return clib_mem_get_log2_page_size ();
+ if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT_HUGE)
+ return clib_mem_get_log2_default_hugepage_size ();
+ return log2_page_size;
+}
+
+static_always_inline uword
+clib_mem_page_bytes (clib_mem_page_sz_t log2_page_size)
+{
+ return 1 << clib_mem_log2_page_size_validate (log2_page_size);
+}
+
+
#include <vppinfra/error.h> /* clib_panic */
#endif /* _included_clib_mem_h */
diff --git a/src/vppinfra/mem_dlmalloc.c b/src/vppinfra/mem_dlmalloc.c
index 0401df5993e..50dc57a60bd 100644
--- a/src/vppinfra/mem_dlmalloc.c
+++ b/src/vppinfra/mem_dlmalloc.c
@@ -197,7 +197,8 @@ mheap_trace_main_free (mheap_trace_main_t * tm)
/* Initialize CLIB heap based on memory/size given by user.
Set memory to 0 and CLIB will try to allocate its own heap. */
static void *
-clib_mem_init_internal (void *memory, uword memory_size, int set_heap)
+clib_mem_init_internal (void *memory, uword memory_size,
+ clib_mem_page_sz_t log2_page_sz, int set_heap)
{
u8 *heap;
@@ -209,7 +210,18 @@ clib_mem_init_internal (void *memory, uword memory_size, int set_heap)
mspace_disable_expand (heap);
}
else
- heap = create_mspace (memory_size, 1 /* locked */ );
+ {
+ memory_size = round_pow2 (memory_size,
+ clib_mem_page_bytes (log2_page_sz));
+ memory = clib_mem_vm_map_internal (0, log2_page_sz, memory_size, -1, 0,
+ "main heap");
+
+ if (memory == CLIB_MEM_VM_MAP_FAILED)
+ return 0;
+
+ heap = create_mspace_with_base (memory, memory_size, 1 /* locked */ );
+ mspace_disable_expand (heap);
+ }
CLIB_MEM_POISON (mspace_least_addr (heap), mspace_footprint (heap));
@@ -226,6 +238,15 @@ void *
clib_mem_init (void *memory, uword memory_size)
{
return clib_mem_init_internal (memory, memory_size,
+ CLIB_MEM_PAGE_SZ_DEFAULT,
+ 1 /* do clib_mem_set_heap */ );
+}
+
+void *
+clib_mem_init_with_page_size (uword memory_size,
+ clib_mem_page_sz_t log2_page_sz)
+{
+ return clib_mem_init_internal (0, memory_size, log2_page_sz,
1 /* do clib_mem_set_heap */ );
}
@@ -233,6 +254,7 @@ void *
clib_mem_init_thread_safe (void *memory, uword memory_size)
{
return clib_mem_init_internal (memory, memory_size,
+ CLIB_MEM_PAGE_SZ_DEFAULT,
1 /* do clib_mem_set_heap */ );
}
@@ -250,7 +272,10 @@ clib_mem_destroy_mspace (void *mspace)
void
clib_mem_destroy (void)
{
+ void *heap = clib_mem_get_heap ();
+ void *base = mspace_least_addr (heap);
clib_mem_destroy_mspace (clib_mem_get_heap ());
+ clib_mem_vm_unmap (base);
}
void *
@@ -270,6 +295,7 @@ clib_mem_init_thread_safe_numa (void *memory, uword memory_size, u8 numa)
}
heap = clib_mem_init_internal (memory, memory_size,
+ CLIB_MEM_PAGE_SZ_DEFAULT,
0 /* do NOT clib_mem_set_heap */ );
ASSERT (heap);
diff --git a/src/vppinfra/pmalloc.c b/src/vppinfra/pmalloc.c
index 870f3647229..f6171dbf458 100644
--- a/src/vppinfra/pmalloc.c
+++ b/src/vppinfra/pmalloc.c
@@ -70,11 +70,10 @@ clib_pmalloc_init (clib_pmalloc_main_t * pm, uword base_addr, uword size)
pagesize = clib_mem_get_default_hugepage_size ();
pm->def_log2_page_sz = min_log2 (pagesize);
- pm->sys_log2_page_sz = min_log2 (sysconf (_SC_PAGESIZE));
pm->lookup_log2_page_sz = pm->def_log2_page_sz;
/* check if pagemap is accessible */
- pt = clib_mem_vm_get_paddr (&pt, pm->sys_log2_page_sz, 1);
+ pt = clib_mem_vm_get_paddr (&pt, CLIB_MEM_PAGE_SZ_DEFAULT, 1);
if (pt == 0 || pt[0] == 0)
pm->flags |= CLIB_PMALLOC_F_NO_PAGEMAP;
@@ -223,12 +222,12 @@ pmalloc_update_lookup_table (clib_pmalloc_main_t * pm, u32 first, u32 count)
{
va = pointer_to_uword (pm->base) + (p << pm->lookup_log2_page_sz);
pa = 0;
- seek = (va >> pm->sys_log2_page_sz) * sizeof (pa);
+ seek = (va >> clib_mem_get_log2_page_size ()) * sizeof (pa);
if (fd != -1 && lseek (fd, seek, SEEK_SET) == seek &&
read (fd, &pa, sizeof (pa)) == (sizeof (pa)) &&
pa & (1ULL << 63) /* page present bit */ )
{
- pa = (pa & pow2_mask (55)) << pm->sys_log2_page_sz;
+ pa = (pa & pow2_mask (55)) << clib_mem_get_log2_page_size ();
}
pm->lookup_table[p] = va - pa;
p++;
@@ -258,7 +257,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
return 0;
}
- if (a->log2_subpage_sz != pm->sys_log2_page_sz)
+ if (a->log2_subpage_sz != clib_mem_get_log2_page_size ())
{
pm->error = clib_sysfs_prealloc_hugepages (numa_node,
a->log2_subpage_sz, n_pages);
@@ -289,7 +288,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
if (a->flags & CLIB_PMALLOC_ARENA_F_SHARED_MEM)
{
mmap_flags |= MAP_SHARED;
- if (a->log2_subpage_sz != pm->sys_log2_page_sz)
+ if (a->log2_subpage_sz != clib_mem_get_log2_page_size ())
pm->error = clib_mem_create_hugetlb_fd ((char *) a->name, &a->fd);
else
pm->error = clib_mem_create_fd ((char *) a->name, &a->fd);
@@ -300,7 +299,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
}
else
{
- if (a->log2_subpage_sz != pm->sys_log2_page_sz)
+ if (a->log2_subpage_sz != clib_mem_get_log2_page_size ())
mmap_flags |= MAP_HUGETLB;
mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
@@ -318,7 +317,8 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a,
goto error;
}
- if (a->log2_subpage_sz != pm->sys_log2_page_sz && mlock (va, size) != 0)
+ if (a->log2_subpage_sz != clib_mem_get_log2_page_size () &&
+ mlock (va, size) != 0)
{
pm->error = clib_error_return_unix (0, "Unable to lock pages");
goto error;
@@ -398,7 +398,7 @@ clib_pmalloc_create_shared_arena (clib_pmalloc_main_t * pm, char *name,
if (log2_page_sz == 0)
log2_page_sz = pm->def_log2_page_sz;
else if (log2_page_sz != pm->def_log2_page_sz &&
- log2_page_sz != pm->sys_log2_page_sz)
+ log2_page_sz != clib_mem_get_log2_page_size ())
{
pm->error = clib_error_create ("unsupported page size (%uKB)",
1 << (log2_page_sz - 10));
diff --git a/src/vppinfra/pmalloc.h b/src/vppinfra/pmalloc.h
index 2a3bde2acc4..f7ae5843dbe 100644
--- a/src/vppinfra/pmalloc.h
+++ b/src/vppinfra/pmalloc.h
@@ -67,10 +67,7 @@ typedef struct
u8 *base;
/* default page size - typically 2M */
- u32 def_log2_page_sz;
-
- /* system page size - typically 4K */
- u32 sys_log2_page_sz;
+ clib_mem_page_sz_t def_log2_page_sz;
/* maximum number of pages, limited by VA preallocation size */
u32 max_pages;