From 6bfd07670b991c30761ef74fb09f42181dbfd182 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 11 Sep 2020 22:16:53 +0200 Subject: vppinfra: support main heap with different page sizes Type: improvement Change-Id: I381fc3dec8580208d0e24637d791af69011aa83b Signed-off-by: Damjan Marion --- src/plugins/dpdk/main.c | 2 +- src/vlib/cli.c | 45 +++++++- src/vlib/linux/pci.c | 8 +- src/vppinfra/linux/mem.c | 271 ++++++++++++++++++++++++++++++++++++++++---- src/vppinfra/mem.c | 50 ++++++++ src/vppinfra/mem.h | 153 ++++++++++++++++++------- src/vppinfra/mem_dlmalloc.c | 30 ++++- src/vppinfra/pmalloc.c | 18 +-- src/vppinfra/pmalloc.h | 5 +- 9 files changed, 498 insertions(+), 84 deletions(-) (limited to 'src') diff --git a/src/plugins/dpdk/main.c b/src/plugins/dpdk/main.c index a1ea9e32c93..e570cfb4d97 100644 --- a/src/plugins/dpdk/main.c +++ b/src/plugins/dpdk/main.c @@ -110,7 +110,7 @@ dpdk_early_init (vlib_main_t *vm) /* check if pagemap is accessible - if we get zero result dpdk will not be able to get physical memory address and game is over unless we have IOMMU */ - pt = clib_mem_vm_get_paddr (&pt, min_log2 (sysconf (_SC_PAGESIZE)), 1); + pt = clib_mem_vm_get_paddr (&pt, CLIB_MEM_PAGE_SZ_DEFAULT, 1); if (pt && pt[0]) goto check_hugetlb; diff --git a/src/vlib/cli.c b/src/vlib/cli.c index 2bdc98c71ca..223a3b71eea 100644 --- a/src/vlib/cli.c +++ b/src/vlib/cli.c @@ -742,6 +742,7 @@ show_memory_usage (vlib_main_t * vm, clib_mem_main_t *mm = &clib_mem_main; int verbose __attribute__ ((unused)) = 0; int api_segment = 0, stats_segment = 0, main_heap = 0, numa_heaps = 0; + int map = 0; clib_error_t *error; u32 index = 0; int i; @@ -761,6 +762,8 @@ show_memory_usage (vlib_main_t * vm, main_heap = 1; else if (unformat (input, "numa-heaps")) numa_heaps = 1; + else if (unformat (input, "map")) + map = 1; else { error = clib_error_return (0, "unknown input `%U'", @@ -769,9 +772,10 @@ show_memory_usage (vlib_main_t * vm, } } - if ((api_segment + stats_segment + main_heap + numa_heaps) == 0) + if ((api_segment + stats_segment + main_heap + numa_heaps + map) == 0) return clib_error_return - (0, "Need one of api-segment, stats-segment, main-heap or numa-heaps"); + (0, "Need one of api-segment, stats-segment, main-heap, numa-heaps " + "or map"); if (api_segment) { @@ -871,6 +875,41 @@ show_memory_usage (vlib_main_t * vm, mm->per_numa_mheaps[index], verbose); } } + if (map) + { + clib_mem_page_stats_t stats = { }; + clib_mem_vm_map_hdr_t *hdr = 0; + u8 *s = 0; + int numa = -1; + + s = format (s, "\n%-16s%7s%7s%7s", + "StartAddr", "size", "PageSz", "Pages"); + while ((numa = vlib_mem_get_next_numa_node (numa)) != -1) + s = format (s, " Numa%u", numa); + s = format (s, " NotMap"); + s = format (s, " Name"); + vlib_cli_output (vm, "%v", s); + vec_reset_length (s); + + while ((hdr = clib_mem_vm_get_next_map_hdr (hdr))) + { + clib_mem_get_page_stats ((void *) hdr->base_addr, + hdr->log2_page_sz, hdr->num_pages, + &stats); + s = format (s, "%016lx%7U%7U%7lu", + hdr->base_addr, format_memory_size, + hdr->num_pages << hdr->log2_page_sz, + format_log2_page_size, hdr->log2_page_sz, + hdr->num_pages); + while ((numa = vlib_mem_get_next_numa_node (numa)) != -1) + s = format (s, "%6lu", stats.per_numa[numa]); + s = format (s, "%7lu", stats.not_mapped); + s = format (s, " %s", hdr->name); + vlib_cli_output (vm, "%v", s); + vec_reset_length (s); + } + vec_free (s); + } } return 0; } @@ -879,7 +918,7 @@ show_memory_usage (vlib_main_t * vm, VLIB_CLI_COMMAND (show_memory_usage_command, static) = { .path = "show memory", .short_help = "show memory [api-segment][stats-segment][verbose]\n" - " [numa-heaps]", + " [numa-heaps][map]", .function = show_memory_usage, }; /* *INDENT-ON* */ diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c index 168acde7288..a010e64fa13 100644 --- a/src/vlib/linux/pci.c +++ b/src/vlib/linux/pci.c @@ -1132,8 +1132,10 @@ vlib_pci_map_region_int (vlib_main_t * vm, vlib_pci_dev_handle_t h, if (p->type == LINUX_PCI_DEVICE_TYPE_UIO && addr != 0) flags |= MAP_FIXED; - *result = mmap (addr, size, PROT_READ | PROT_WRITE, flags, fd, offset); - if (*result == (void *) -1) + *result = clib_mem_vm_map_shared (addr, size, fd, offset, + "PCIe %U region %u", format_vlib_pci_addr, + vlib_pci_get_addr (vm, h), bar); + if (*result == CLIB_MEM_VM_MAP_FAILED) { error = clib_error_return_unix (0, "mmap `BAR%u'", bar); if (p->type == LINUX_PCI_DEVICE_TYPE_UIO && (fd != -1)) @@ -1337,7 +1339,7 @@ vlib_pci_device_close (vlib_main_t * vm, vlib_pci_dev_handle_t h) { if (res->size == 0) continue; - munmap (res->addr, res->size); + clib_mem_vm_unmap (res->addr); if (res->fd != -1) close (res->fd); } diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c index 42efc007ceb..96fb0db5b73 100644 --- a/src/vppinfra/linux/mem.c +++ b/src/vppinfra/linux/mem.c @@ -50,11 +50,13 @@ #define MFD_HUGETLB 0x0004U #endif -uword -clib_mem_get_page_size (void) -{ - return getpagesize (); -} +#ifndef MAP_HUGE_SHIFT +#define MAP_HUGE_SHIFT 26 +#endif + +#ifndef MAP_FIXED_NOREPLACE +#define MAP_FIXED_NOREPLACE 0x100000 +#endif uword clib_mem_get_default_hugepage_size (void) @@ -166,10 +168,11 @@ clib_mem_get_fd_page_size (int fd) return st.st_blksize; } -int +clib_mem_page_sz_t clib_mem_get_fd_log2_page_size (int fd) { - return min_log2 (clib_mem_get_fd_page_size (fd)); + uword page_size = clib_mem_get_fd_page_size (fd); + return page_size ? min_log2 (page_size) : CLIB_MEM_PAGE_SZ_UNKNOWN; } void @@ -414,43 +417,263 @@ clib_mem_vm_ext_free (clib_mem_vm_alloc_t * a) uword clib_mem_vm_reserve (uword start, uword size, clib_mem_page_sz_t log2_page_sz) { - uword off, pagesize = 1ULL << log2_page_sz; - int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS; - u8 *p; - - if (start) - mmap_flags |= MAP_FIXED; + clib_mem_main_t *mm = &clib_mem_main; + uword pagesize = 1ULL << log2_page_sz; + uword sys_page_sz = 1ULL << mm->log2_page_sz; + uword n_bytes; + void *base = 0, *p; size = round_pow2 (size, pagesize); - p = uword_to_pointer (start, void *); - p = mmap (p, size + pagesize, PROT_NONE, mmap_flags, -1, 0); + /* in adition of requested reservation, we also rserve one system page + * (typically 4K) adjacent to the start off reservation */ + + if (start) + { + /* start address is provided, so we just need to make sure we are not + * replacing existing map */ + if (start & pow2_mask (log2_page_sz)) + return ~0; + + base = (void *) start - sys_page_sz; + base = mmap (base, size + sys_page_sz, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0); + return (base == MAP_FAILED) ? ~0 : start; + } + + /* to make sure that we get reservation aligned to page_size we need to + * request one additional page as mmap will return us address which is + * aligned only to system page size */ + base = mmap (0, size + pagesize, PROT_NONE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (p == MAP_FAILED) + if (base == MAP_FAILED) return ~0; - off = round_pow2 ((uword) p, pagesize) - (uword) p; + /* return additional space at the end of allocation */ + p = base + size + pagesize; + n_bytes = (uword) p & pow2_mask (log2_page_sz); + if (n_bytes) + { + p -= n_bytes; + munmap (p, n_bytes); + } - /* trim start and end of reservation to be page aligned */ - if (off) + /* return additional space at the start of allocation */ + n_bytes = pagesize - sys_page_sz - n_bytes; + if (n_bytes) { - munmap (p, off); - p += off; + munmap (base, n_bytes); + base += n_bytes; } - munmap (p + size, pagesize - off); + return (uword) base + sys_page_sz; +} - return (uword) p; +clib_mem_vm_map_hdr_t * +clib_mem_vm_get_next_map_hdr (clib_mem_vm_map_hdr_t * hdr) +{ + clib_mem_main_t *mm = &clib_mem_main; + uword sys_page_sz = 1 << mm->log2_page_sz; + clib_mem_vm_map_hdr_t *next; + if (hdr == 0) + { + hdr = mm->first_map; + if (hdr) + mprotect (hdr, sys_page_sz, PROT_READ); + return hdr; + } + next = hdr->next; + mprotect (hdr, sys_page_sz, PROT_NONE); + if (next) + mprotect (next, sys_page_sz, PROT_READ); + return next; } +void * +clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz, + uword size, int fd, uword offset, char *name) +{ + clib_mem_main_t *mm = &clib_mem_main; + clib_mem_vm_map_hdr_t *hdr; + uword sys_page_sz = 1 << mm->log2_page_sz; + int mmap_flags = MAP_FIXED, is_huge = 0; + + if (fd != -1) + { + mmap_flags |= MAP_SHARED; + log2_page_sz = clib_mem_get_fd_log2_page_size (fd); + if (log2_page_sz > mm->log2_page_sz) + is_huge = 1; + } + else + { + mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS; + + if (log2_page_sz == mm->log2_page_sz) + log2_page_sz = CLIB_MEM_PAGE_SZ_DEFAULT; + + switch (log2_page_sz) + { + case CLIB_MEM_PAGE_SZ_UNKNOWN: + /* will fail later */ + break; + case CLIB_MEM_PAGE_SZ_DEFAULT: + log2_page_sz = mm->log2_page_sz; + break; + case CLIB_MEM_PAGE_SZ_DEFAULT_HUGE: + mmap_flags |= MAP_HUGETLB; + log2_page_sz = mm->log2_default_hugepage_sz; + is_huge = 1; + break; + default: + mmap_flags |= MAP_HUGETLB; + mmap_flags |= log2_page_sz << MAP_HUGE_SHIFT; + is_huge = 1; + } + } + + if (log2_page_sz == CLIB_MEM_PAGE_SZ_UNKNOWN) + return CLIB_MEM_VM_MAP_FAILED; + + size = round_pow2 (size, 1 << log2_page_sz); + + base = (void *) clib_mem_vm_reserve ((uword) base, size, log2_page_sz); + + if (base == (void *) ~0) + return CLIB_MEM_VM_MAP_FAILED; + + base = mmap (base, size, PROT_READ | PROT_WRITE, mmap_flags, fd, offset); + + if (base == MAP_FAILED) + return CLIB_MEM_VM_MAP_FAILED; + + if (is_huge && (mlock (base, size) != 0)) + { + munmap (base, size); + return CLIB_MEM_VM_MAP_FAILED; + } + + hdr = mmap (base - sys_page_sz, sys_page_sz, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + + if (hdr != base - sys_page_sz) + { + munmap (base, size); + return CLIB_MEM_VM_MAP_FAILED; + } + + if (mm->last_map) + { + mprotect (mm->last_map, sys_page_sz, PROT_READ | PROT_WRITE); + mm->last_map->next = hdr; + mprotect (mm->last_map, sys_page_sz, PROT_NONE); + } + else + mm->first_map = hdr; + + hdr->next = 0; + hdr->prev = mm->last_map; + mm->last_map = hdr; + + hdr->base_addr = (uword) base; + hdr->log2_page_sz = log2_page_sz; + hdr->num_pages = size >> log2_page_sz; + snprintf (hdr->name, CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1, "%s", (char *) name); + hdr->name[CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1] = 0; + mprotect (hdr, sys_page_sz, PROT_NONE); + + CLIB_MEM_UNPOISON (base, size); + return base; +} + +int +clib_mem_vm_unmap (void *base) +{ + clib_mem_main_t *mm = &clib_mem_main; + uword size, sys_page_sz = 1 << mm->log2_page_sz; + clib_mem_vm_map_hdr_t *hdr = base - sys_page_sz;; + + if (mprotect (hdr, sys_page_sz, PROT_READ | PROT_WRITE) != 0) + return -1; + + size = hdr->num_pages << hdr->log2_page_sz; + if (munmap ((void *) hdr->base_addr, size) != 0) + return -1; + + if (hdr->next) + { + mprotect (hdr->next, sys_page_sz, PROT_READ | PROT_WRITE); + hdr->next->prev = hdr->prev; + mprotect (hdr->next, sys_page_sz, PROT_NONE); + } + else + mm->last_map = hdr->prev; + + if (hdr->prev) + { + mprotect (hdr->prev, sys_page_sz, PROT_READ | PROT_WRITE); + hdr->prev->next = hdr->next; + mprotect (hdr->prev, sys_page_sz, PROT_NONE); + } + else + mm->first_map = hdr->next; + + if (munmap (hdr, sys_page_sz) != 0) + return -1; + + return 0; +} + +void +clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size, + uword n_pages, clib_mem_page_stats_t * stats) +{ + int i, *status = 0; + void **ptr = 0; + + log2_page_size = clib_mem_log2_page_size_validate (log2_page_size); + + vec_validate (status, n_pages - 1); + vec_validate (ptr, n_pages - 1); + + for (i = 0; i < n_pages; i++) + ptr[i] = start + (i << log2_page_size); + + clib_memset (stats, 0, sizeof (clib_mem_page_stats_t)); + + if (move_pages (0, n_pages, ptr, 0, status, 0) != 0) + { + stats->unknown = n_pages; + return; + } + + for (i = 0; i < n_pages; i++) + { + if (status[i] >= 0 && status[i] < CLIB_MAX_NUMAS) + { + stats->mapped++; + stats->per_numa[status[i]]++; + } + else if (status[i] == -EFAULT) + stats->not_mapped++; + else + stats->unknown++; + } +} + + u64 * -clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages) +clib_mem_vm_get_paddr (void *mem, clib_mem_page_sz_t log2_page_size, + int n_pages) { int pagesize = sysconf (_SC_PAGESIZE); int fd; int i; u64 *r = 0; + log2_page_size = clib_mem_log2_page_size_validate (log2_page_size); + if ((fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1) return 0; diff --git a/src/vppinfra/mem.c b/src/vppinfra/mem.c index 3477e5f3c17..b417b8503ad 100644 --- a/src/vppinfra/mem.c +++ b/src/vppinfra/mem.c @@ -21,6 +21,56 @@ clib_mem_main_t clib_mem_main; +void * +clib_mem_vm_map (void *base, uword size, clib_mem_page_sz_t log2_page_sz, + char *fmt, ...) +{ + va_list va; + void *rv; + u8 *s; + + va_start (va, fmt); + s = va_format (0, fmt, &va); + vec_add1 (s, 0); + rv = clib_mem_vm_map_internal (base, log2_page_sz, size, -1, 0, (char *) s); + va_end (va); + vec_free (s); + return rv; +} + +void * +clib_mem_vm_map_stack (uword size, clib_mem_page_sz_t log2_page_sz, + char *fmt, ...) +{ + va_list va; + void *rv; + u8 *s; + + va_start (va, fmt); + s = va_format (0, fmt, &va); + vec_add1 (s, 0); + rv = clib_mem_vm_map_internal (0, log2_page_sz, size, -1, 0, (char *) s); + va_end (va); + vec_free (s); + return rv; +} + +void * +clib_mem_vm_map_shared (void *base, uword size, int fd, uword offset, + char *fmt, ...) +{ + va_list va; + void *rv; + u8 *s; + va_start (va, fmt); + s = va_format (0, fmt, &va); + vec_add1 (s, 0); + rv = clib_mem_vm_map_internal (base, 0, size, fd, offset, (char *) s); + va_end (va); + vec_free (s); + return rv; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h index f3484cea09d..e6f019cd097 100644 --- a/src/vppinfra/mem.h +++ b/src/vppinfra/mem.h @@ -52,7 +52,8 @@ #include #define CLIB_MAX_MHEAPS 256 -#define CLIB_MAX_NUMAS 8 +#define CLIB_MAX_NUMAS 16 +#define CLIB_MEM_VM_MAP_FAILED ((void *) ~0) typedef enum { @@ -71,6 +72,25 @@ typedef enum CLIB_MEM_PAGE_SZ_16G = 34, } clib_mem_page_sz_t; +typedef struct _clib_mem_vm_map_hdr +{ + /* base address */ + uword base_addr; + + /* number of pages */ + uword num_pages; + + /* page size (log2) */ + clib_mem_page_sz_t log2_page_sz; + + /* allocation mame */ +#define CLIB_VM_MAP_HDR_NAME_MAX_LEN 64 + char name[CLIB_VM_MAP_HDR_NAME_MAX_LEN]; + + /* linked list */ + struct _clib_mem_vm_map_hdr *prev, *next; +} clib_mem_vm_map_hdr_t; + typedef struct { /* log2 system page size */ @@ -87,6 +107,9 @@ typedef struct /* per NUMA heaps */ void *per_numa_mheaps[CLIB_MAX_NUMAS]; + + /* memory maps */ + clib_mem_vm_map_hdr_t *first_map, *last_map; } clib_mem_main_t; extern clib_mem_main_t clib_mem_main; @@ -305,14 +328,14 @@ clib_mem_set_heap (void *heap) void clib_mem_main_init (); void *clib_mem_init (void *heap, uword size); +void *clib_mem_init_with_page_size (uword memory_size, + clib_mem_page_sz_t log2_page_sz); void *clib_mem_init_thread_safe (void *memory, uword memory_size); void *clib_mem_init_thread_safe_numa (void *memory, uword memory_size, u8 numa); void clib_mem_exit (void); -uword clib_mem_get_page_size (void); - void clib_mem_validate (void); void clib_mem_trace (int enable); @@ -374,39 +397,18 @@ clib_mem_vm_free (void *addr, uword size) munmap (addr, size); } -always_inline void * -clib_mem_vm_unmap (void *addr, uword size) -{ - void *mmap_addr; - uword flags = MAP_PRIVATE | MAP_FIXED; - - /* To unmap we "map" with no protection. If we actually called - munmap then other callers could steal the address space. By - changing to PROT_NONE the kernel can free up the pages which is - really what we want "unmap" to mean. */ - mmap_addr = mmap (addr, size, PROT_NONE, flags, -1, 0); - if (mmap_addr == (void *) -1) - mmap_addr = 0; - else - CLIB_MEM_UNPOISON (mmap_addr, size); - - return mmap_addr; -} - -always_inline void * -clib_mem_vm_map (void *addr, uword size) -{ - void *mmap_addr; - uword flags = MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS; - - mmap_addr = mmap (addr, size, (PROT_READ | PROT_WRITE), flags, -1, 0); - if (mmap_addr == (void *) -1) - mmap_addr = 0; - else - CLIB_MEM_UNPOISON (mmap_addr, size); +void *clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz, + uword size, int fd, uword offset, char *name); - return mmap_addr; -} +void *clib_mem_vm_map (void *start, uword size, + clib_mem_page_sz_t log2_page_size, char *fmt, ...); +void *clib_mem_vm_map_stack (uword size, clib_mem_page_sz_t log2_page_size, + char *fmt, ...); +void *clib_mem_vm_map_shared (void *start, uword size, int fd, uword offset, + char *fmt, ...); +int clib_mem_vm_unmap (void *base); +clib_mem_vm_map_hdr_t *clib_mem_vm_get_next_map_hdr (clib_mem_vm_map_hdr_t * + hdr); typedef struct { @@ -437,16 +439,36 @@ typedef struct uword requested_va; /**< Request fixed position mapping */ } clib_mem_vm_alloc_t; + +static_always_inline clib_mem_page_sz_t +clib_mem_get_log2_page_size (void) +{ + return clib_mem_main.log2_page_sz; +} + +static_always_inline uword +clib_mem_get_page_size (void) +{ + return 1ULL << clib_mem_main.log2_page_sz; +} + +static_always_inline clib_mem_page_sz_t +clib_mem_get_log2_default_hugepage_size () +{ + return clib_mem_main.log2_default_hugepage_sz; +} + clib_error_t *clib_mem_create_fd (char *name, int *fdp); clib_error_t *clib_mem_create_hugetlb_fd (char *name, int *fdp); clib_error_t *clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a); void clib_mem_vm_ext_free (clib_mem_vm_alloc_t * a); -u64 clib_mem_get_fd_page_size (int fd); +uword clib_mem_get_fd_page_size (int fd); uword clib_mem_get_default_hugepage_size (void); -int clib_mem_get_fd_log2_page_size (int fd); +clib_mem_page_sz_t clib_mem_get_fd_log2_page_size (int fd); uword clib_mem_vm_reserve (uword start, uword size, clib_mem_page_sz_t log2_page_sz); -u64 *clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages); +u64 *clib_mem_vm_get_paddr (void *mem, clib_mem_page_sz_t log2_page_size, + int n_pages); void clib_mem_destroy_mspace (void *mspace); void clib_mem_destroy (void); @@ -466,6 +488,61 @@ void mheap_trace (void *v, int enable); uword clib_mem_trace_enable_disable (uword enable); void clib_mem_trace (int enable); +always_inline uword +clib_mem_round_to_page_size (uword size, clib_mem_page_sz_t log2_page_size) +{ + ASSERT (log2_page_size != CLIB_MEM_PAGE_SZ_UNKNOWN); + + if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT) + log2_page_size = clib_mem_get_log2_page_size (); + else if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT_HUGE) + log2_page_size = clib_mem_get_log2_default_hugepage_size (); + + return round_pow2 (size, 1ULL << log2_page_size); +} + +typedef struct +{ + uword mapped; + uword not_mapped; + uword per_numa[CLIB_MAX_NUMAS]; + uword unknown; +} clib_mem_page_stats_t; + +void clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size, + uword n_pages, clib_mem_page_stats_t * stats); + +static_always_inline int +vlib_mem_get_next_numa_node (int numa) +{ + clib_mem_main_t *mm = &clib_mem_main; + u32 bitmap = mm->numa_node_bitmap; + + if (numa >= 0) + bitmap &= ~pow2_mask (numa + 1); + if (bitmap == 0) + return -1; + + return count_trailing_zeros (bitmap); +} + +static_always_inline clib_mem_page_sz_t +clib_mem_log2_page_size_validate (clib_mem_page_sz_t log2_page_size) +{ + if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT) + return clib_mem_get_log2_page_size (); + if (log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT_HUGE) + return clib_mem_get_log2_default_hugepage_size (); + return log2_page_size; +} + +static_always_inline uword +clib_mem_page_bytes (clib_mem_page_sz_t log2_page_size) +{ + return 1 << clib_mem_log2_page_size_validate (log2_page_size); +} + + #include /* clib_panic */ #endif /* _included_clib_mem_h */ diff --git a/src/vppinfra/mem_dlmalloc.c b/src/vppinfra/mem_dlmalloc.c index 0401df5993e..50dc57a60bd 100644 --- a/src/vppinfra/mem_dlmalloc.c +++ b/src/vppinfra/mem_dlmalloc.c @@ -197,7 +197,8 @@ mheap_trace_main_free (mheap_trace_main_t * tm) /* Initialize CLIB heap based on memory/size given by user. Set memory to 0 and CLIB will try to allocate its own heap. */ static void * -clib_mem_init_internal (void *memory, uword memory_size, int set_heap) +clib_mem_init_internal (void *memory, uword memory_size, + clib_mem_page_sz_t log2_page_sz, int set_heap) { u8 *heap; @@ -209,7 +210,18 @@ clib_mem_init_internal (void *memory, uword memory_size, int set_heap) mspace_disable_expand (heap); } else - heap = create_mspace (memory_size, 1 /* locked */ ); + { + memory_size = round_pow2 (memory_size, + clib_mem_page_bytes (log2_page_sz)); + memory = clib_mem_vm_map_internal (0, log2_page_sz, memory_size, -1, 0, + "main heap"); + + if (memory == CLIB_MEM_VM_MAP_FAILED) + return 0; + + heap = create_mspace_with_base (memory, memory_size, 1 /* locked */ ); + mspace_disable_expand (heap); + } CLIB_MEM_POISON (mspace_least_addr (heap), mspace_footprint (heap)); @@ -226,6 +238,15 @@ void * clib_mem_init (void *memory, uword memory_size) { return clib_mem_init_internal (memory, memory_size, + CLIB_MEM_PAGE_SZ_DEFAULT, + 1 /* do clib_mem_set_heap */ ); +} + +void * +clib_mem_init_with_page_size (uword memory_size, + clib_mem_page_sz_t log2_page_sz) +{ + return clib_mem_init_internal (0, memory_size, log2_page_sz, 1 /* do clib_mem_set_heap */ ); } @@ -233,6 +254,7 @@ void * clib_mem_init_thread_safe (void *memory, uword memory_size) { return clib_mem_init_internal (memory, memory_size, + CLIB_MEM_PAGE_SZ_DEFAULT, 1 /* do clib_mem_set_heap */ ); } @@ -250,7 +272,10 @@ clib_mem_destroy_mspace (void *mspace) void clib_mem_destroy (void) { + void *heap = clib_mem_get_heap (); + void *base = mspace_least_addr (heap); clib_mem_destroy_mspace (clib_mem_get_heap ()); + clib_mem_vm_unmap (base); } void * @@ -270,6 +295,7 @@ clib_mem_init_thread_safe_numa (void *memory, uword memory_size, u8 numa) } heap = clib_mem_init_internal (memory, memory_size, + CLIB_MEM_PAGE_SZ_DEFAULT, 0 /* do NOT clib_mem_set_heap */ ); ASSERT (heap); diff --git a/src/vppinfra/pmalloc.c b/src/vppinfra/pmalloc.c index 870f3647229..f6171dbf458 100644 --- a/src/vppinfra/pmalloc.c +++ b/src/vppinfra/pmalloc.c @@ -70,11 +70,10 @@ clib_pmalloc_init (clib_pmalloc_main_t * pm, uword base_addr, uword size) pagesize = clib_mem_get_default_hugepage_size (); pm->def_log2_page_sz = min_log2 (pagesize); - pm->sys_log2_page_sz = min_log2 (sysconf (_SC_PAGESIZE)); pm->lookup_log2_page_sz = pm->def_log2_page_sz; /* check if pagemap is accessible */ - pt = clib_mem_vm_get_paddr (&pt, pm->sys_log2_page_sz, 1); + pt = clib_mem_vm_get_paddr (&pt, CLIB_MEM_PAGE_SZ_DEFAULT, 1); if (pt == 0 || pt[0] == 0) pm->flags |= CLIB_PMALLOC_F_NO_PAGEMAP; @@ -223,12 +222,12 @@ pmalloc_update_lookup_table (clib_pmalloc_main_t * pm, u32 first, u32 count) { va = pointer_to_uword (pm->base) + (p << pm->lookup_log2_page_sz); pa = 0; - seek = (va >> pm->sys_log2_page_sz) * sizeof (pa); + seek = (va >> clib_mem_get_log2_page_size ()) * sizeof (pa); if (fd != -1 && lseek (fd, seek, SEEK_SET) == seek && read (fd, &pa, sizeof (pa)) == (sizeof (pa)) && pa & (1ULL << 63) /* page present bit */ ) { - pa = (pa & pow2_mask (55)) << pm->sys_log2_page_sz; + pa = (pa & pow2_mask (55)) << clib_mem_get_log2_page_size (); } pm->lookup_table[p] = va - pa; p++; @@ -258,7 +257,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, return 0; } - if (a->log2_subpage_sz != pm->sys_log2_page_sz) + if (a->log2_subpage_sz != clib_mem_get_log2_page_size ()) { pm->error = clib_sysfs_prealloc_hugepages (numa_node, a->log2_subpage_sz, n_pages); @@ -289,7 +288,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, if (a->flags & CLIB_PMALLOC_ARENA_F_SHARED_MEM) { mmap_flags |= MAP_SHARED; - if (a->log2_subpage_sz != pm->sys_log2_page_sz) + if (a->log2_subpage_sz != clib_mem_get_log2_page_size ()) pm->error = clib_mem_create_hugetlb_fd ((char *) a->name, &a->fd); else pm->error = clib_mem_create_fd ((char *) a->name, &a->fd); @@ -300,7 +299,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, } else { - if (a->log2_subpage_sz != pm->sys_log2_page_sz) + if (a->log2_subpage_sz != clib_mem_get_log2_page_size ()) mmap_flags |= MAP_HUGETLB; mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS; @@ -318,7 +317,8 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, goto error; } - if (a->log2_subpage_sz != pm->sys_log2_page_sz && mlock (va, size) != 0) + if (a->log2_subpage_sz != clib_mem_get_log2_page_size () && + mlock (va, size) != 0) { pm->error = clib_error_return_unix (0, "Unable to lock pages"); goto error; @@ -398,7 +398,7 @@ clib_pmalloc_create_shared_arena (clib_pmalloc_main_t * pm, char *name, if (log2_page_sz == 0) log2_page_sz = pm->def_log2_page_sz; else if (log2_page_sz != pm->def_log2_page_sz && - log2_page_sz != pm->sys_log2_page_sz) + log2_page_sz != clib_mem_get_log2_page_size ()) { pm->error = clib_error_create ("unsupported page size (%uKB)", 1 << (log2_page_sz - 10)); diff --git a/src/vppinfra/pmalloc.h b/src/vppinfra/pmalloc.h index 2a3bde2acc4..f7ae5843dbe 100644 --- a/src/vppinfra/pmalloc.h +++ b/src/vppinfra/pmalloc.h @@ -67,10 +67,7 @@ typedef struct u8 *base; /* default page size - typically 2M */ - u32 def_log2_page_sz; - - /* system page size - typically 4K */ - u32 sys_log2_page_sz; + clib_mem_page_sz_t def_log2_page_sz; /* maximum number of pages, limited by VA preallocation size */ u32 max_pages; -- cgit 1.2.3-korg