From bdbb0c5436b52b4dc6c35d05f227cdf934306d83 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 17 Sep 2020 10:40:44 +0200 Subject: stats: configurable page size Type: improvement Change-Id: I9973bce20a0a2a8a7e227cf96518de5b79374425 Signed-off-by: Damjan Marion --- src/vpp/conf/startup.conf | 1 + src/vpp/stats/stat_segment.c | 34 +++++--- src/vpp/stats/stat_segment.h | 1 + src/vppinfra/linux/mem.c | 188 ++++++++++++++++++++++++++----------------- src/vppinfra/mem.h | 12 ++- src/vppinfra/mem_dlmalloc.c | 13 +++ src/vppinfra/pmalloc.c | 5 +- 7 files changed, 163 insertions(+), 91 deletions(-) diff --git a/src/vpp/conf/startup.conf b/src/vpp/conf/startup.conf index 12679da778c..0be10d5ba19 100644 --- a/src/vpp/conf/startup.conf +++ b/src/vpp/conf/startup.conf @@ -186,6 +186,7 @@ cpu { # socket-name , name of the stats segment socket # defaults to /run/vpp/stats.sock # size [KMG], size of the stats segment, defaults to 32mb + # page-size , page size, ie. 2m, defaults to 4k # per-node-counters on | off, defaults to none # update-interval , sets the segment scrape / update interval # } diff --git a/src/vpp/stats/stat_segment.c b/src/vpp/stats/stat_segment.c index 8255b16c010..968c0566b3f 100644 --- a/src/vpp/stats/stat_segment.c +++ b/src/vpp/stats/stat_segment.c @@ -316,32 +316,37 @@ vlib_map_stat_segment_init (void) stat_segment_main_t *sm = &stat_segment_main; stat_segment_shared_header_t *shared_header; void *oldheap; - ssize_t memory_size; + uword memory_size, sys_page_sz; int mfd; - char *mem_name = "stat_segment_test"; - void *memaddr; + char *mem_name = "stat segment"; + void *heap, *memaddr; memory_size = sm->memory_size; if (memory_size == 0) memory_size = STAT_SEGMENT_DEFAULT_SIZE; - /* Create shared memory segment */ - if ((mfd = memfd_create (mem_name, 0)) < 0) - return clib_error_return (0, "stat segment memfd_create failure"); + if (sm->log2_page_sz == CLIB_MEM_PAGE_SZ_UNKNOWN) + sm->log2_page_sz = CLIB_MEM_PAGE_SZ_DEFAULT; + + mfd = clib_mem_vm_create_fd (sm->log2_page_sz, mem_name); /* Set size */ if ((ftruncate (mfd, memory_size)) == -1) return clib_error_return (0, "stat segment ftruncate failure"); - if ((memaddr = - mmap (NULL, memory_size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, - 0)) == MAP_FAILED) + if (mfd == -1) + return clib_error_return (0, "stat segment memory fd failure: %U", + format_clib_error, clib_mem_get_last_error ()); + + memaddr = clib_mem_vm_map_shared (0, memory_size, mfd, 0, mem_name); + + if (memaddr == CLIB_MEM_VM_MAP_FAILED) return clib_error_return (0, "stat segment mmap failure"); - void *heap; - heap = - create_mspace_with_base (((u8 *) memaddr) + getpagesize (), - memory_size - getpagesize (), 1 /* locked */ ); + sys_page_sz = clib_mem_get_page_size (); + + heap = create_mspace_with_base (((u8 *) memaddr) + sys_page_sz, memory_size + - sys_page_sz, 1 /* locked */ ); mspace_disable_expand (heap); sm->heap = heap; sm->memfd = mfd; @@ -904,6 +909,9 @@ statseg_config (vlib_main_t * vm, unformat_input_t * input) else if (unformat (input, "size %U", unformat_memory_size, &sm->memory_size)) ; + else if (unformat (input, "page-size %U", + unformat_log2_page_size, &sm->log2_page_sz)) + ; else if (unformat (input, "per-node-counters on")) sm->node_counters_enabled = 1; else if (unformat (input, "per-node-counters off")) diff --git a/src/vpp/stats/stat_segment.h b/src/vpp/stats/stat_segment.h index a88067154f7..a048fa5b8ca 100644 --- a/src/vpp/stats/stat_segment.h +++ b/src/vpp/stats/stat_segment.h @@ -93,6 +93,7 @@ typedef struct clib_socket_t *socket; u8 *socket_name; ssize_t memory_size; + clib_mem_page_sz_t log2_page_sz; u8 node_counters_enabled; void *last; void *heap; diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c index ef98b3f360b..76195a21118 100644 --- a/src/vppinfra/linux/mem.c +++ b/src/vppinfra/linux/mem.c @@ -54,6 +54,10 @@ #define MAP_HUGE_SHIFT 26 #endif +#ifndef MFD_HUGE_SHIFT +#define MFD_HUGE_SHIFT 26 +#endif + #ifndef MAP_FIXED_NOREPLACE #define MAP_FIXED_NOREPLACE 0x100000 #endif @@ -192,76 +196,10 @@ clib_mem_vm_randomize_va (uword * requested_va, (clib_cpu_time_now () & bit_mask) * (1ull << log2_page_size); } -clib_error_t * -clib_mem_create_fd (char *name, int *fdp) -{ - int fd; - - ASSERT (name); - - if ((fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1) - return clib_error_return_unix (0, "memfd_create"); - - if ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1) - { - close (fd); - return clib_error_return_unix (0, "fcntl (F_ADD_SEALS)"); - } - - *fdp = fd; - return 0; -} - -clib_error_t * -clib_mem_create_hugetlb_fd (char *name, int *fdp) -{ - clib_error_t *err = 0; - int fd = -1; - static int memfd_hugetlb_supported = 1; - char *mount_dir; - char template[] = "/tmp/hugepage_mount.XXXXXX"; - u8 *filename; - - ASSERT (name); - - if (memfd_hugetlb_supported) - { - if ((fd = memfd_create (name, MFD_HUGETLB)) != -1) - goto done; - - /* avoid further tries if memfd MFD_HUGETLB is not supported */ - if (errno == EINVAL && strnlen (name, 256) <= 249) - memfd_hugetlb_supported = 0; - } - - mount_dir = mkdtemp (template); - if (mount_dir == 0) - return clib_error_return_unix (0, "mkdtemp \'%s\'", template); - - if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL)) - { - rmdir ((char *) mount_dir); - err = clib_error_return_unix (0, "mount hugetlb directory '%s'", - mount_dir); - } - - filename = format (0, "%s/%s%c", mount_dir, name, 0); - fd = open ((char *) filename, O_CREAT | O_RDWR, 0755); - umount2 ((char *) mount_dir, MNT_DETACH); - rmdir ((char *) mount_dir); - - if (fd == -1) - err = clib_error_return_unix (0, "open"); - -done: - if (fd != -1) - fdp[0] = fd; - return err; -} - clib_error_t * clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a) { + clib_mem_main_t *mm = &clib_mem_main; int fd = -1; clib_error_t *err = 0; void *addr = 0; @@ -301,15 +239,16 @@ clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a) /* if hugepages are needed we need to create mount point */ if (a->flags & CLIB_MEM_VM_F_HUGETLB) { - if ((err = clib_mem_create_hugetlb_fd (a->name, &fd))) - goto error; - + log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT_HUGE; mmap_flags |= MAP_LOCKED; } else + log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT; + + if ((fd = clib_mem_vm_create_fd (log2_page_size, "%s", a->name)) == -1) { - if ((err = clib_mem_create_fd (a->name, &fd))) - goto error; + err = clib_error_return (0, "%U", format_clib_error, mm->error); + goto error; } log2_page_size = clib_mem_get_fd_log2_page_size (fd); @@ -414,6 +353,111 @@ clib_mem_vm_ext_free (clib_mem_vm_alloc_t * a) } } +static int +legacy_memfd_create (u8 * name) +{ + clib_mem_main_t *mm = &clib_mem_main; + int fd = -1; + char *mount_dir; + u8 *filename; + + /* create mount directory */ + if ((mount_dir = mkdtemp ("/tmp/hugepage_mount.XXXXXX")) == 0) + { + vec_reset_length (mm->error); + mm->error = clib_error_return_unix (mm->error, "mkdtemp"); + return -1; + } + + if (mount ("none", mount_dir, "hugetlbfs", 0, NULL)) + { + rmdir ((char *) mount_dir); + vec_reset_length (mm->error); + mm->error = clib_error_return_unix (mm->error, "mount"); + return -1; + } + + filename = format (0, "%s/%s%c", mount_dir, name, 0); + + if ((fd = open ((char *) filename, O_CREAT | O_RDWR, 0755)) == -1) + { + vec_reset_length (mm->error); + mm->error = clib_error_return_unix (mm->error, "mkdtemp"); + } + + umount2 ((char *) mount_dir, MNT_DETACH); + rmdir ((char *) mount_dir); + vec_free (filename); + + return fd; +} + +int +clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...) +{ + clib_mem_main_t *mm = &clib_mem_main; + int fd; + unsigned int memfd_flags; + va_list va; + u8 *s = 0; + + if (log2_page_size == mm->log2_page_sz) + log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT; + + switch (log2_page_size) + { + case CLIB_MEM_PAGE_SZ_UNKNOWN: + return -1; + case CLIB_MEM_PAGE_SZ_DEFAULT: + memfd_flags = MFD_ALLOW_SEALING; + break; + case CLIB_MEM_PAGE_SZ_DEFAULT_HUGE: + memfd_flags = MFD_HUGETLB; + break; + default: + memfd_flags = MFD_HUGETLB | log2_page_size << MFD_HUGE_SHIFT; + } + + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + + /* memfd_create maximum string size is 249 chars without trailing zero */ + if (vec_len (s) > 249) + _vec_len (s) = 249; + vec_add1 (s, 0); + + /* memfd_create introduced in kernel 3.17, we don't support older kernels */ + fd = memfd_create ((char *) s, memfd_flags); + + /* kernel versions < 4.14 does not support memfd_create for huge pages */ + if (fd == -1 && errno == EINVAL && + log2_page_size == CLIB_MEM_PAGE_SZ_DEFAULT_HUGE) + { + fd = legacy_memfd_create (s); + } + else if (fd == -1) + { + vec_reset_length (mm->error); + mm->error = clib_error_return_unix (mm->error, "memfd_create"); + vec_free (s); + return -1; + } + + vec_free (s); + + if ((memfd_flags & MFD_ALLOW_SEALING) && + ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1)) + { + vec_reset_length (mm->error); + mm->error = clib_error_return_unix (mm->error, "fcntl (F_ADD_SEALS)"); + close (fd); + return -1; + } + + return fd; +} + uword clib_mem_vm_reserve (uword start, uword size, clib_mem_page_sz_t log2_page_sz) { diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h index 9d0aa0997fc..3ba20ad6d3e 100644 --- a/src/vppinfra/mem.h +++ b/src/vppinfra/mem.h @@ -113,6 +113,9 @@ typedef struct /* memory maps */ clib_mem_vm_map_hdr_t *first_map, *last_map; + + /* last error */ + clib_error_t *error; } clib_mem_main_t; extern clib_mem_main_t clib_mem_main; @@ -461,8 +464,7 @@ clib_mem_get_log2_default_hugepage_size () return clib_mem_main.log2_default_hugepage_sz; } -clib_error_t *clib_mem_create_fd (char *name, int *fdp); -clib_error_t *clib_mem_create_hugetlb_fd (char *name, int *fdp); +int clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...); clib_error_t *clib_mem_vm_ext_alloc (clib_mem_vm_alloc_t * a); void clib_mem_vm_ext_free (clib_mem_vm_alloc_t * a); uword clib_mem_get_fd_page_size (int fd); @@ -545,6 +547,12 @@ clib_mem_page_bytes (clib_mem_page_sz_t log2_page_size) return 1 << clib_mem_log2_page_size_validate (log2_page_size); } +static_always_inline clib_error_t * +clib_mem_get_last_error (void) +{ + return clib_mem_main.error; +} + #include /* clib_panic */ diff --git a/src/vppinfra/mem_dlmalloc.c b/src/vppinfra/mem_dlmalloc.c index 50dc57a60bd..10d3c61c77e 100644 --- a/src/vppinfra/mem_dlmalloc.c +++ b/src/vppinfra/mem_dlmalloc.c @@ -543,6 +543,19 @@ mheap_alloc_with_lock (void *memory, uword size, int locked) } } +void * +clib_mem_create_heap (void *base, uword size, char *fmt, ...) +{ + base = clib_mem_vm_map_internal (base, CLIB_MEM_PAGE_SZ_DEFAULT, size, -1, + 0, "str"); + + if (base == 0) + return 0; + + create_mspace_with_base (base, size, 1 /* locked */ ); + return base; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vppinfra/pmalloc.c b/src/vppinfra/pmalloc.c index f6171dbf458..cb6c7e3ba3b 100644 --- a/src/vppinfra/pmalloc.c +++ b/src/vppinfra/pmalloc.c @@ -288,10 +288,7 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, if (a->flags & CLIB_PMALLOC_ARENA_F_SHARED_MEM) { mmap_flags |= MAP_SHARED; - if (a->log2_subpage_sz != clib_mem_get_log2_page_size ()) - pm->error = clib_mem_create_hugetlb_fd ((char *) a->name, &a->fd); - else - pm->error = clib_mem_create_fd ((char *) a->name, &a->fd); + a->fd = clib_mem_vm_create_fd (a->log2_subpage_sz, "%s", a->name); if (a->fd == -1) goto error; if ((ftruncate (a->fd, size)) == -1) -- cgit 1.2.3-korg