diff options
Diffstat (limited to 'src/vppinfra/linux')
-rw-r--r-- | src/vppinfra/linux/mem.c | 123 | ||||
-rw-r--r-- | src/vppinfra/linux/sysfs.c | 46 | ||||
-rw-r--r-- | src/vppinfra/linux/sysfs.h | 5 |
3 files changed, 66 insertions, 108 deletions
diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c index 036890f9c8d..17b4412e6c9 100644 --- a/src/vppinfra/linux/mem.c +++ b/src/vppinfra/linux/mem.c @@ -28,9 +28,9 @@ #include <vppinfra/mem.h> #include <vppinfra/lock.h> #include <vppinfra/time.h> +#include <vppinfra/bitmap.h> #include <vppinfra/format.h> #include <vppinfra/clib_error.h> -#include <vppinfra/linux/sysfs.h> #ifndef F_LINUX_SPECIFIC_BASE #define F_LINUX_SPECIFIC_BASE 1024 @@ -75,40 +75,6 @@ map_unlock () clib_atomic_release (&clib_mem_main.map_lock); } -__clib_export uword -clib_mem_get_default_hugepage_size (void) -{ - unformat_input_t input; - static u32 size = 0; - int fd; - - if (size) - goto done; - - /* - * If the kernel doesn't support hugepages, /proc/meminfo won't - * say anything about it. Use the regular page size as a default. - */ - size = clib_mem_get_page_size () / 1024; - - if ((fd = open ("/proc/meminfo", 0)) == -1) - return 0; - - unformat_init_clib_file (&input, fd); - - while (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (&input, "Hugepagesize:%_%u kB", &size)) - ; - else - unformat_skip_line (&input); - } - unformat_free (&input); - close (fd); -done: - return 1024ULL * size; -} - static clib_mem_page_sz_t legacy_get_log2_default_hugepage_size (void) { @@ -133,18 +99,26 @@ legacy_get_log2_default_hugepage_size (void) } void -clib_mem_main_init () +clib_mem_main_init (void) { + unsigned long nodemask = 0, maxnode = CLIB_MAX_NUMAS; + unsigned long flags = MPOL_F_MEMS_ALLOWED; clib_mem_main_t *mm = &clib_mem_main; + long sysconf_page_size; uword page_size; - void *va; - int fd; + void *va = 0; + int fd, mode; if (mm->log2_page_sz != CLIB_MEM_PAGE_SZ_UNKNOWN) return; /* system page size */ - page_size = sysconf (_SC_PAGESIZE); + sysconf_page_size = sysconf (_SC_PAGESIZE); + if (sysconf_page_size < 0) + { + clib_panic ("Could not determine the page size"); + } + page_size = sysconf_page_size; mm->log2_page_sz = min_log2 (page_size); /* default system hugeppage size */ @@ -156,24 +130,11 @@ clib_mem_main_init () else /* likely kernel older than 4.14 */ mm->log2_default_hugepage_sz = legacy_get_log2_default_hugepage_size (); - /* numa nodes */ - va = mmap (0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | - MAP_ANONYMOUS, -1, 0); - if (va == MAP_FAILED) - return; + mm->log2_sys_default_hugepage_sz = mm->log2_default_hugepage_sz; - if (mlock (va, page_size)) - goto done; - - for (int i = 0; i < CLIB_MAX_NUMAS; i++) - { - int status; - if (syscall (__NR_move_pages, 0, 1, &va, &i, &status, 0) == 0) - mm->numa_node_bitmap |= 1ULL << i; - } - -done: - munmap (va, page_size); + /* numa nodes */ + if (syscall (__NR_get_mempolicy, &mode, &nodemask, maxnode, va, flags) == 0) + mm->numa_node_bitmap = nodemask; } __clib_export u64 @@ -270,7 +231,7 @@ clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...) if (log2_page_size == mm->log2_page_sz) log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT; - else if (log2_page_size == mm->log2_default_hugepage_sz) + else if (log2_page_size == mm->log2_sys_default_hugepage_sz) log2_page_size = CLIB_MEM_PAGE_SZ_DEFAULT_HUGE; switch (log2_page_size) @@ -293,7 +254,7 @@ clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...) /* memfd_create maximum string size is 249 chars without trailing zero */ if (vec_len (s) > 249) - _vec_len (s) = 249; + vec_set_len (s, 249); vec_add1 (s, 0); /* memfd_create introduced in kernel 3.17, we don't support older kernels */ @@ -487,14 +448,12 @@ clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz, else mm->first_map = hdr; - CLIB_MEM_UNPOISON (hdr, sys_page_sz); + clib_mem_unpoison (hdr, sys_page_sz); hdr->next = 0; hdr->prev = mm->last_map; snprintf (hdr->name, CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1, "%s", (char *) name); mm->last_map = hdr; - map_unlock (); - hdr->base_addr = (uword) base; hdr->log2_page_sz = log2_page_sz; hdr->num_pages = size >> log2_page_sz; @@ -502,7 +461,9 @@ clib_mem_vm_map_internal (void *base, clib_mem_page_sz_t log2_page_sz, hdr->name[CLIB_VM_MAP_HDR_NAME_MAX_LEN - 1] = 0; mprotect (hdr, sys_page_sz, PROT_NONE); - CLIB_MEM_UNPOISON (base, size); + map_unlock (); + + clib_mem_unpoison (base, size); return base; } @@ -556,6 +517,7 @@ clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size, { int i, *status = 0; void **ptr = 0; + unsigned char incore; log2_page_size = clib_mem_log2_page_size_validate (log2_page_size); @@ -577,6 +539,19 @@ clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size, for (i = 0; i < n_pages; i++) { + /* move_pages() returns -ENONET in status for huge pages on 5.19+ kernel. + * Retry with get_mempolicy() to obtain NUMA node info only if the pages + * are allocated and in memory, which is checked by mincore(). */ + if (status[i] == -ENOENT && + syscall (__NR_mincore, ptr[i], 1, &incore) == 0 && (incore & 1) != 0) + { + if (syscall (__NR_get_mempolicy, &status[i], 0, 0, ptr[i], + MPOL_F_NODE | MPOL_F_ADDR) != 0) + { + /* if get_mempolicy fails, keep the original value in status */ + status[i] = -ENONET; + } + } if (status[i] >= 0 && status[i] < CLIB_MAX_NUMAS) { stats->mapped++; @@ -640,8 +615,8 @@ __clib_export int clib_mem_set_numa_affinity (u8 numa_node, int force) { clib_mem_main_t *mm = &clib_mem_main; - long unsigned int mask[16] = { 0 }; - int mask_len = sizeof (mask) * 8 + 1; + clib_bitmap_t *bmp = 0; + int rv; /* no numa support */ if (mm->numa_node_bitmap == 0) @@ -657,19 +632,21 @@ clib_mem_set_numa_affinity (u8 numa_node, int force) return 0; } - mask[0] = 1 << numa_node; + bmp = clib_bitmap_set (bmp, numa_node, 1); - if (syscall (__NR_set_mempolicy, force ? MPOL_BIND : MPOL_PREFERRED, mask, - mask_len)) - goto error; + rv = syscall (__NR_set_mempolicy, force ? MPOL_BIND : MPOL_PREFERRED, bmp, + vec_len (bmp) * sizeof (bmp[0]) * 8 + 1); + clib_bitmap_free (bmp); vec_reset_length (mm->error); - return 0; -error: - vec_reset_length (mm->error); - mm->error = clib_error_return_unix (mm->error, (char *) __func__); - return CLIB_MEM_ERROR; + if (rv) + { + mm->error = clib_error_return_unix (mm->error, (char *) __func__); + return CLIB_MEM_ERROR; + } + + return 0; } __clib_export int diff --git a/src/vppinfra/linux/sysfs.c b/src/vppinfra/linux/sysfs.c index 758eaa1a86c..61ee6378c8c 100644 --- a/src/vppinfra/linux/sysfs.c +++ b/src/vppinfra/linux/sysfs.c @@ -70,7 +70,7 @@ clib_sysfs_read (char *file_name, char *fmt, ...) return clib_error_return_unix (0, "read `%s'", file_name); } - _vec_len (s) = sz; + vec_set_len (s, sz); unformat_init_vector (&input, s); va_list va; @@ -87,32 +87,6 @@ clib_sysfs_read (char *file_name, char *fmt, ...) return 0; } -__clib_export u8 * -clib_sysfs_link_to_name (char *link) -{ - char *p, buffer[64]; - unformat_input_t in; - u8 *s = 0; - int r; - - r = readlink (link, buffer, sizeof (buffer) - 1); - - if (r < 0) - return 0; - - buffer[r] = 0; - p = strrchr (buffer, '/'); - - if (!p) - return 0; - - unformat_init_string (&in, p + 1, strlen (p + 1)); - if (unformat (&in, "%s", &s) != 1) - clib_unix_warning ("no string?"); - unformat_free (&in); - - return s; -} clib_error_t * clib_sysfs_set_nr_hugepages (int numa_node, int log2_page_size, int nr) @@ -154,7 +128,7 @@ clib_sysfs_set_nr_hugepages (int numa_node, int log2_page_size, int nr) goto done; } - _vec_len (p) -= 1; + vec_dec_len (p, 1); p = format (p, "/hugepages/hugepages-%ukB/nr_hugepages%c", page_size, 0); clib_sysfs_write ((char *) p, "%d", nr); @@ -207,7 +181,7 @@ clib_sysfs_get_xxx_hugepages (char *type, int numa_node, goto done; } - _vec_len (p) -= 1; + vec_dec_len (p, 1); p = format (p, "/hugepages/hugepages-%ukB/%s_hugepages%c", page_size, type, 0); error = clib_sysfs_read ((char *) p, "%d", val); @@ -263,13 +237,21 @@ clib_sysfs_prealloc_hugepages (int numa_node, int log2_page_size, int nr) return clib_sysfs_set_nr_hugepages (numa_node, log2_page_size, n + needed); } -__clib_export uword * -clib_sysfs_list_to_bitmap (char *filename) +__clib_export clib_bitmap_t * +clib_sysfs_read_bitmap (char *fmt, ...) { FILE *fp; uword *r = 0; + va_list va; + u8 *filename; + + va_start (va, fmt); + filename = va_format (0, fmt, &va); + va_end (va); + vec_add1 (filename, 0); - fp = fopen (filename, "r"); + fp = fopen ((char *) filename, "r"); + vec_free (filename); if (fp != NULL) { diff --git a/src/vppinfra/linux/sysfs.h b/src/vppinfra/linux/sysfs.h index 9cbc34823dd..f2f822d9741 100644 --- a/src/vppinfra/linux/sysfs.h +++ b/src/vppinfra/linux/sysfs.h @@ -17,13 +17,12 @@ #define included_linux_sysfs_h #include <vppinfra/error.h> +#include <vppinfra/bitmap.h> clib_error_t *clib_sysfs_write (char *file_name, char *fmt, ...); clib_error_t *clib_sysfs_read (char *file_name, char *fmt, ...); -u8 *clib_sysfs_link_to_name (char *link); - clib_error_t *clib_sysfs_set_nr_hugepages (int numa_node, int log2_page_size, int nr); clib_error_t *clib_sysfs_get_nr_hugepages (int numa_node, @@ -35,7 +34,7 @@ clib_error_t *clib_sysfs_get_surplus_hugepages (int numa_node, clib_error_t *clib_sysfs_prealloc_hugepages (int numa_node, int log2_page_size, int nr); -uword *clib_sysfs_list_to_bitmap (char *filename); +uword *clib_sysfs_read_bitmap (char *fmt, ...); #endif /* included_linux_sysfs_h */ |