diff options
Diffstat (limited to 'src/vppinfra')
-rw-r--r-- | src/vppinfra/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/vppinfra/bihash_template.c | 2 | ||||
-rw-r--r-- | src/vppinfra/bihash_template.h | 1 | ||||
-rw-r--r-- | src/vppinfra/cpu.c | 14 | ||||
-rw-r--r-- | src/vppinfra/cpu.h | 17 | ||||
-rw-r--r-- | src/vppinfra/linux/mem.c | 14 | ||||
-rw-r--r-- | src/vppinfra/linux/syscall.h | 66 | ||||
-rw-r--r-- | src/vppinfra/pmalloc.c | 70 | ||||
-rw-r--r-- | src/vppinfra/unix-formats.c | 2 |
9 files changed, 51 insertions, 136 deletions
diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt index 3f6e9f591d9..8cebd32ffe3 100644 --- a/src/vppinfra/CMakeLists.txt +++ b/src/vppinfra/CMakeLists.txt @@ -192,7 +192,6 @@ set(VPPINFRA_HEADERS vector_sse42.h warnings.h xxhash.h - linux/syscall.h linux/sysfs.h ) diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c index 555c2e00cfb..0c6aa2a3e5e 100644 --- a/src/vppinfra/bihash_template.c +++ b/src/vppinfra/bihash_template.c @@ -272,7 +272,7 @@ void BV (clib_bihash_initiator_init_svm) ASSERT (memory_size < (1ULL << 32)); /* Set up for memfd sharing */ - if ((fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1) + if ((fd = clib_mem_vm_create_fd (CLIB_MEM_PAGE_SZ_DEFAULT, name) == -1) { clib_unix_warning ("memfd_create"); return; diff --git a/src/vppinfra/bihash_template.h b/src/vppinfra/bihash_template.h index 1ca0ae447f8..da2f684b685 100644 --- a/src/vppinfra/bihash_template.h +++ b/src/vppinfra/bihash_template.h @@ -34,7 +34,6 @@ #endif #ifdef BIHASH_32_64_SVM -#undef HAVE_MEMFD_CREATE #include <vppinfra/linux/syscall.h> #include <fcntl.h> #define F_LINUX_SPECIFIC_BASE 1024 diff --git a/src/vppinfra/cpu.c b/src/vppinfra/cpu.c index 4631eb3b896..d2edc61cfbf 100644 --- a/src/vppinfra/cpu.c +++ b/src/vppinfra/cpu.c @@ -222,7 +222,21 @@ format_cpu_flags (u8 * s, va_list * args) #endif } +__clib_export u32 +clib_get_current_cpu_id () +{ + unsigned cpu, node; + syscall (__NR_getcpu, &cpu, &node, 0); + return cpu; +} +__clib_export u32 +clib_get_current_numa_node () +{ + unsigned cpu, node; + syscall (__NR_getcpu, &cpu, &node, 0); + return node; +} /* * fd.io coding-style-patch-verification: ON diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h index bc4ee58b716..6925d584e52 100644 --- a/src/vppinfra/cpu.h +++ b/src/vppinfra/cpu.h @@ -167,21 +167,8 @@ _ (asimddp, 20) \ _ (sha512, 21) \ _ (sve, 22) -static inline u32 -clib_get_current_cpu_id () -{ - unsigned cpu, node; - syscall (__NR_getcpu, &cpu, &node, 0); - return cpu; -} - -static inline u32 -clib_get_current_numa_node () -{ - unsigned cpu, node; - syscall (__NR_getcpu, &cpu, &node, 0); - return node; -} +u32 clib_get_current_cpu_id (); +u32 clib_get_current_numa_node (); #if defined(__x86_64__) #include "cpuid.h" diff --git a/src/vppinfra/linux/mem.c b/src/vppinfra/linux/mem.c index 11a1e9ee45a..cb46df82552 100644 --- a/src/vppinfra/linux/mem.c +++ b/src/vppinfra/linux/mem.c @@ -30,7 +30,6 @@ #include <vppinfra/time.h> #include <vppinfra/format.h> #include <vppinfra/clib_error.h> -#include <vppinfra/linux/syscall.h> #include <vppinfra/linux/sysfs.h> #ifndef F_LINUX_SPECIFIC_BASE @@ -149,7 +148,7 @@ clib_mem_main_init () mm->log2_page_sz = min_log2 (page_size); /* default system hugeppage size */ - if ((fd = memfd_create ("test", MFD_HUGETLB)) != -1) + if ((fd = syscall (__NR_memfd_create, "test", MFD_HUGETLB)) != -1) { mm->log2_default_hugepage_sz = clib_mem_get_fd_log2_page_size (fd); close (fd); @@ -169,7 +168,7 @@ clib_mem_main_init () for (int i = 0; i < CLIB_MAX_NUMAS; i++) { int status; - if (move_pages (0, 1, &va, &i, &status, 0) == 0) + if (syscall (__NR_move_pages, 0, 1, &va, &i, &status, 0) == 0) mm->numa_node_bitmap |= 1ULL << i; } @@ -298,7 +297,7 @@ clib_mem_vm_create_fd (clib_mem_page_sz_t log2_page_size, char *fmt, ...) vec_add1 (s, 0); /* memfd_create introduced in kernel 3.17, we don't support older kernels */ - fd = memfd_create ((char *) s, memfd_flags); + fd = syscall (__NR_memfd_create, (char *) s, memfd_flags); /* kernel versions < 4.14 does not support memfd_create for huge pages */ if (fd == -1 && errno == EINVAL && @@ -568,7 +567,7 @@ clib_mem_get_page_stats (void *start, clib_mem_page_sz_t log2_page_size, stats->total = n_pages; stats->log2_page_sz = log2_page_size; - if (move_pages (0, n_pages, ptr, 0, status, 0) != 0) + if (syscall (__NR_move_pages, 0, n_pages, ptr, 0, status, 0) != 0) { stats->unknown = n_pages; goto done; @@ -658,7 +657,8 @@ clib_mem_set_numa_affinity (u8 numa_node, int force) mask[0] = 1 << numa_node; - if (set_mempolicy (force ? MPOL_BIND : MPOL_PREFERRED, mask, mask_len)) + if (syscall (__NR_set_mempolicy, force ? MPOL_BIND : MPOL_PREFERRED, mask, + mask_len)) goto error; vec_reset_length (mm->error); @@ -675,7 +675,7 @@ clib_mem_set_default_numa_affinity () { clib_mem_main_t *mm = &clib_mem_main; - if (set_mempolicy (MPOL_DEFAULT, 0, 0)) + if (syscall (__NR_set_mempolicy, MPOL_DEFAULT, 0, 0)) { vec_reset_length (mm->error); mm->error = clib_error_return_unix (mm->error, (char *) __func__); diff --git a/src/vppinfra/linux/syscall.h b/src/vppinfra/linux/syscall.h deleted file mode 100644 index c07cad631bd..00000000000 --- a/src/vppinfra/linux/syscall.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2017 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef included_linux_syscall_h -#define included_linux_syscall_h - -#include <unistd.h> -#include <sys/syscall.h> - -#ifndef HAVE_GETCPU -static inline int -getcpu (unsigned *cpu, unsigned *node) -{ - return syscall (__NR_getcpu, cpu, node, 0); -} -#endif - -static inline long -set_mempolicy (int mode, const unsigned long *nodemask, unsigned long maxnode) -{ - return syscall (__NR_set_mempolicy, mode, nodemask, maxnode); -} - -static inline int -get_mempolicy (int *mode, unsigned long *nodemask, unsigned long maxnode, - void *addr, unsigned long flags) -{ - return syscall (__NR_get_mempolicy, mode, nodemask, maxnode, addr, flags); -} - -static inline long -move_pages (int pid, unsigned long count, void **pages, const int *nodes, - int *status, int flags) -{ - return syscall (__NR_move_pages, pid, count, pages, nodes, status, flags); -} - -#ifndef HAVE_MEMFD_CREATE -static inline int -memfd_create (const char *name, unsigned int flags) -{ - return syscall (__NR_memfd_create, name, flags); -} -#endif - -#endif /* included_linux_syscall_h */ - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vppinfra/pmalloc.c b/src/vppinfra/pmalloc.c index 546a4fe336b..27738e15f47 100644 --- a/src/vppinfra/pmalloc.c +++ b/src/vppinfra/pmalloc.c @@ -19,16 +19,14 @@ #include <sys/stat.h> #include <fcntl.h> #include <unistd.h> -#include <linux/mempolicy.h> -#include <linux/memfd.h> #include <sched.h> #include <vppinfra/format.h> -#include <vppinfra/linux/syscall.h> #include <vppinfra/linux/sysfs.h> #include <vppinfra/mem.h> #include <vppinfra/hash.h> #include <vppinfra/pmalloc.h> +#include <vppinfra/cpu.h> #if __SIZEOF_POINTER__ >= 8 #define DEFAULT_RESERVED_MB 16384 @@ -48,18 +46,6 @@ pmalloc_size2pages (uword size, u32 log2_page_sz) return round_pow2 (size, 1ULL << log2_page_sz) >> log2_page_sz; } -static inline int -pmalloc_validate_numa_node (u32 * numa_node) -{ - if (*numa_node == CLIB_PMALLOC_NUMA_LOCAL) - { - u32 cpu; - if (getcpu (&cpu, numa_node) != 0) - return 1; - } - return 0; -} - __clib_export int clib_pmalloc_init (clib_pmalloc_main_t * pm, uword base_addr, uword size) { @@ -241,12 +227,10 @@ static inline clib_pmalloc_page_t * pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, u32 numa_node, u32 n_pages) { + clib_mem_page_stats_t stats = {}; clib_pmalloc_page_t *pp = 0; - int status, rv, i, mmap_flags; + int rv, i, mmap_flags; void *va = MAP_FAILED; - int old_mpol = -1; - long unsigned int mask[16] = { 0 }; - long unsigned int old_mask[16] = { 0 }; uword size = (uword) n_pages << pm->def_log2_page_sz; clib_error_free (pm->error); @@ -266,17 +250,8 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, return 0; } - rv = get_mempolicy (&old_mpol, old_mask, sizeof (old_mask) * 8 + 1, 0, 0); - /* failure to get mempolicy means we can only proceed with numa 0 maps */ - if (rv == -1 && numa_node != 0) - { - pm->error = clib_error_return_unix (0, "failed to get mempolicy"); - return 0; - } - - mask[0] = 1 << numa_node; - rv = set_mempolicy (MPOL_BIND, mask, sizeof (mask) * 8 + 1); - if (rv == -1 && numa_node != 0) + rv = clib_mem_set_numa_affinity (numa_node, /* force */ 1); + if (rv == CLIB_MEM_ERROR && numa_node != 0) { pm->error = clib_error_return_unix (0, "failed to set mempolicy for " "numa node %u", numa_node); @@ -323,8 +298,8 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, clib_memset (va, 0, size); - rv = set_mempolicy (old_mpol, old_mask, sizeof (old_mask) * 8 + 1); - if (rv == -1 && numa_node != 0) + rv = clib_mem_set_default_numa_affinity (); + if (rv == CLIB_MEM_ERROR && numa_node != 0) { pm->error = clib_error_return_unix (0, "failed to restore mempolicy"); goto error; @@ -332,14 +307,23 @@ pmalloc_map_pages (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, /* we tolerate move_pages failure only if request os for numa node 0 to support non-numa kernels */ - rv = move_pages (0, 1, &va, 0, &status, 0); - if ((rv == 0 && status != numa_node) || (rv != 0 && numa_node != 0)) + clib_mem_get_page_stats (va, CLIB_MEM_PAGE_SZ_DEFAULT, 1, &stats); + + if (stats.per_numa[numa_node] != 1) { - pm->error = rv == -1 ? - clib_error_return_unix (0, "page allocated on wrong node, numa node " - "%u status %d", numa_node, status) : - clib_error_return (0, "page allocated on wrong node, numa node " - "%u status %d", numa_node, status); + u16 allocated_at = ~0; + if (stats.unknown) + clib_error_return (0, + "unable to get information about numa allocation"); + + for (u16 i = 0; i < CLIB_MAX_NUMAS; i++) + if (stats.per_numa[i] == 1) + allocated_at = i; + + clib_error_return (0, + "page allocated on the wrong numa node (%u), " + "expected %u", + allocated_at, numa_node); goto error; } @@ -407,8 +391,8 @@ clib_pmalloc_create_shared_arena (clib_pmalloc_main_t * pm, char *name, if (n_pages + vec_len (pm->pages) > pm->max_pages) return 0; - if (pmalloc_validate_numa_node (&numa_node)) - return 0; + if (numa_node == CLIB_PMALLOC_NUMA_LOCAL) + numa_node = clib_get_current_numa_node (); pool_get (pm->arenas, a); a->index = a - pm->arenas; @@ -438,8 +422,8 @@ clib_pmalloc_alloc_inline (clib_pmalloc_main_t * pm, clib_pmalloc_arena_t * a, ASSERT (is_pow2 (align)); - if (pmalloc_validate_numa_node (&numa_node)) - return 0; + if (numa_node == CLIB_PMALLOC_NUMA_LOCAL) + numa_node = clib_get_current_numa_node (); if (a == 0) { diff --git a/src/vppinfra/unix-formats.c b/src/vppinfra/unix-formats.c index af1eb1aaa7b..cd137e58687 100644 --- a/src/vppinfra/unix-formats.c +++ b/src/vppinfra/unix-formats.c @@ -63,8 +63,6 @@ #include <vppinfra/time.h> #if __linux__ -#include <vppinfra/linux/syscall.h> - #ifdef AF_NETLINK #include <linux/types.h> #include <linux/netlink.h> |