From a690fdbfe179e0ea65818c03b52535bf9210efd0 Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Tue, 21 Jan 2020 12:34:55 -0500 Subject: vppinfra: numa vector placement support Type: feature Signed-off-by: Dave Barach Change-Id: I7e7d95a089dd849c1f01ecea84529d8dbf239f21 --- src/plugins/http_static/static_server.c | 6 +- src/vlib/cli.c | 68 +++++++++++++++++-- src/vlib/threads.c | 35 ++++++++-- src/vlib/threads.h | 9 ++- src/vlib/threads_cli.c | 4 +- src/vnet/unix/gdb_funcs.c | 15 +++++ src/vpp/api/api.c | 2 +- src/vpp/vnet/main.c | 13 +++- src/vppinfra/CMakeLists.txt | 9 ++- src/vppinfra/bihash_template.c | 1 - src/vppinfra/config.h.in | 1 + src/vppinfra/mem.h | 54 ++++++++++----- src/vppinfra/mem_dlmalloc.c | 64 ++++++++++++++++-- src/vppinfra/os.h | 13 ++++ src/vppinfra/pool.h | 35 +++++++--- src/vppinfra/unix-misc.c | 1 + src/vppinfra/vec.c | 19 +++++- src/vppinfra/vec.h | 113 +++++++++++++++++++++++++------- src/vppinfra/vec_bootstrap.h | 32 ++++++++- 19 files changed, 414 insertions(+), 80 deletions(-) (limited to 'src') diff --git a/src/plugins/http_static/static_server.c b/src/plugins/http_static/static_server.c index 5dfa347dc03..ddc8a9e3165 100644 --- a/src/plugins/http_static/static_server.c +++ b/src/plugins/http_static/static_server.c @@ -133,8 +133,10 @@ http_static_server_session_alloc (u32 thread_index) { http_static_server_main_t *hsm = &http_static_server_main; http_session_t *hs; - pool_get (hsm->sessions[thread_index], hs); - memset (hs, 0, sizeof (*hs)); + pool_get_aligned_zero_numa (hsm->sessions[thread_index], hs, + 0 /* not aligned */ , + 1 /* zero */ , + os_get_numa_index ()); hs->session_index = hs - hsm->sessions[thread_index]; hs->thread_index = thread_index; hs->timer_handle = ~0; diff --git a/src/vlib/cli.c b/src/vlib/cli.c index bb6c5746537..85049884db6 100644 --- a/src/vlib/cli.c +++ b/src/vlib/cli.c @@ -733,9 +733,10 @@ show_memory_usage (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { int verbose __attribute__ ((unused)) = 0; - int api_segment = 0, stats_segment = 0, main_heap = 0; + int api_segment = 0, stats_segment = 0, main_heap = 0, numa_heaps = 0; clib_error_t *error; u32 index = 0; + int i; uword clib_mem_trace_enable_disable (uword enable); uword was_enabled; @@ -750,6 +751,8 @@ show_memory_usage (vlib_main_t * vm, stats_segment = 1; else if (unformat (input, "main-heap")) main_heap = 1; + else if (unformat (input, "numa-heaps")) + numa_heaps = 1; else { error = clib_error_return (0, "unknown input `%U'", @@ -758,9 +761,9 @@ show_memory_usage (vlib_main_t * vm, } } - if ((api_segment + stats_segment + main_heap) == 0) + if ((api_segment + stats_segment + main_heap + numa_heaps) == 0) return clib_error_return - (0, "Please supply one of api-segment, stats-segment or main-heap"); + (0, "Need one of api-segment, stats-segment, main-heap or numa-heaps"); if (api_segment) { @@ -801,6 +804,7 @@ show_memory_usage (vlib_main_t * vm, vec_free (s); } + #if USE_DLMALLOC == 0 /* *INDENT-OFF* */ foreach_vlib_main ( @@ -849,6 +853,32 @@ show_memory_usage (vlib_main_t * vm, /* Restore the trace flag */ clib_mem_trace_enable_disable (was_enabled); } + if (numa_heaps) + { + struct dlmallinfo mi; + void *mspace; + + for (i = 0; i < ARRAY_LEN (clib_per_numa_mheaps); i++) + { + if (clib_per_numa_mheaps[i] == 0) + continue; + if (clib_per_numa_mheaps[i] == clib_per_cpu_mheaps[i]) + { + vlib_cli_output (vm, "Numa %d uses the main heap...", i); + continue; + } + was_enabled = clib_mem_trace_enable_disable (0); + mspace = clib_per_numa_mheaps[i]; + + mi = mspace_mallinfo (mspace); + vlib_cli_output (vm, "Numa %d:", i); + vlib_cli_output (vm, " %U\n", format_page_map, + pointer_to_uword (mspace_least_addr (mspace)), + mi.arena); + vlib_cli_output (vm, " %U\n", format_mheap, + clib_per_numa_mheaps[index], verbose); + } + } } #endif /* USE_DLMALLOC */ return 0; @@ -857,7 +887,8 @@ show_memory_usage (vlib_main_t * vm, /* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_memory_usage_command, static) = { .path = "show memory", - .short_help = "show memory [api-segment][stats-segment][verbose]", + .short_help = "show memory [api-segment][stats-segment][verbose]\n" + " [numa-heaps]", .function = show_memory_usage, }; /* *INDENT-ON* */ @@ -905,6 +936,7 @@ enable_disable_memory_trace (vlib_main_t * vm, int api_segment = 0; int stats_segment = 0; int main_heap = 0; + u32 numa_id = ~0; void *oldheap; if (!unformat_user (input, unformat_line_input, line_input)) @@ -920,6 +952,8 @@ enable_disable_memory_trace (vlib_main_t * vm, stats_segment = 1; else if (unformat (line_input, "main-heap")) main_heap = 1; + else if (unformat (line_input, "numa-heap %d", &numa_id)) + ; else { unformat_free (line_input); @@ -928,10 +962,12 @@ enable_disable_memory_trace (vlib_main_t * vm, } unformat_free (line_input); - if ((api_segment + stats_segment + main_heap + (enable == 0)) == 0) + if ((api_segment + stats_segment + main_heap + (enable == 0) + + (numa_id != ~0)) == 0) { return clib_error_return - (0, "Need one of main-heap, stats-segment or api-segment"); + (0, "Need one of main-heap, stats-segment, api-segment,\n" + "numa-heap or disable"); } /* Turn off current trace, if any */ @@ -975,13 +1011,31 @@ enable_disable_memory_trace (vlib_main_t * vm, clib_mem_trace (main_heap); } + if (numa_id != ~0) + { + if (numa_id >= ARRAY_LEN (clib_per_numa_mheaps)) + return clib_error_return (0, "Numa %d out of range", numa_id); + if (clib_per_numa_mheaps[numa_id] == 0) + return clib_error_return (0, "Numa %d heap not active", numa_id); + + if (clib_per_numa_mheaps[numa_id] == clib_mem_get_heap ()) + return clib_error_return (0, "Numa %d uses the main heap...", + numa_id); + current_traced_heap = clib_per_numa_mheaps[numa_id]; + oldheap = clib_mem_set_heap (current_traced_heap); + clib_mem_trace (1); + clib_mem_set_heap (oldheap); + } + + return 0; } /* *INDENT-OFF* */ VLIB_CLI_COMMAND (enable_disable_memory_trace_command, static) = { .path = "memory-trace", - .short_help = "memory-trace on|off [api-segment][stats-segment][main-heap]\n", + .short_help = "memory-trace on|off [api-segment][stats-segment][main-heap]\n" + " [numa-heap ]\n", .function = enable_disable_memory_trace, }; /* *INDENT-ON* */ diff --git a/src/vlib/threads.c b/src/vlib/threads.c index e6733d55b6f..a827e3594e8 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -577,12 +577,12 @@ vlib_worker_thread_bootstrap_fn (void *arg) return rv; } -static void -vlib_get_thread_core_socket (vlib_worker_thread_t * w, unsigned cpu_id) +void +vlib_get_thread_core_numa (vlib_worker_thread_t * w, unsigned cpu_id) { const char *sys_cpu_path = "/sys/devices/system/cpu/cpu"; u8 *p = 0; - int core_id = -1, socket_id = -1; + int core_id = -1, numa_id = -1; p = format (p, "%s%u/topology/core_id%c", sys_cpu_path, cpu_id, 0); clib_sysfs_read ((char *) p, "%d", &core_id); @@ -590,11 +590,11 @@ vlib_get_thread_core_socket (vlib_worker_thread_t * w, unsigned cpu_id) p = format (p, "%s%u/topology/physical_package_id%c", sys_cpu_path, cpu_id, 0); - clib_sysfs_read ((char *) p, "%d", &socket_id); + clib_sysfs_read ((char *) p, "%d", &numa_id); vec_free (p); w->core_id = core_id; - w->socket_id = socket_id; + w->numa_id = numa_id; } static clib_error_t * @@ -602,9 +602,29 @@ vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned cpu_id) { vlib_thread_main_t *tm = &vlib_thread_main; void *(*fp_arg) (void *) = fp; + void *numa_heap; w->cpu_id = cpu_id; - vlib_get_thread_core_socket (w, cpu_id); + vlib_get_thread_core_numa (w, cpu_id); + os_set_numa_index (w->numa_id); + + /* Set up NUMA-bound heap if indicated */ + if (clib_per_numa_mheaps[w->numa_id] == 0) + { + /* If the user requested a NUMA heap, create it... */ + if (tm->numa_heap_size) + { + numa_heap = clib_mem_init_thread_safe_numa + (0 /* DIY */ , tm->numa_heap_size); + clib_per_numa_mheaps[w->numa_id] = numa_heap; + } + else + { + /* Or, use the main heap */ + clib_per_numa_mheaps[w->numa_id] = w->thread_mheap; + } + } + if (tm->cb.vlib_launch_thread_cb && !w->registration->use_pthreads) return tm->cb.vlib_launch_thread_cb (fp, (void *) w, cpu_id); else @@ -1242,6 +1262,9 @@ cpu_config (vlib_main_t * vm, unformat_input_t * input) ; else if (unformat (input, "skip-cores %u", &tm->skip_cores)) ; + else if (unformat (input, "numa-heap-size %U", + unformat_memory_size, &tm->numa_heap_size)) + ; else if (unformat (input, "coremask-%s %U", &name, unformat_bitmap_mask, &bitmap) || unformat (input, "corelist-%s %U", &name, diff --git a/src/vlib/threads.h b/src/vlib/threads.h index 312323c096d..c1188cea933 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -110,7 +110,7 @@ typedef struct long lwp; int cpu_id; int core_id; - int socket_id; + int numa_id; pthread_t thread_id; } vlib_worker_thread_t; @@ -338,6 +338,10 @@ typedef struct /* callbacks */ vlib_thread_callbacks_t cb; int extern_thread_mgmt; + + /* NUMA-bound heap size */ + uword numa_heap_size; + } vlib_thread_main_t; extern vlib_thread_main_t vlib_thread_main; @@ -613,6 +617,9 @@ void vlib_process_signal_event_mt_helper (vlib_process_signal_event_mt_args_t * args); void vlib_rpc_call_main_thread (void *function, u8 * args, u32 size); +void vlib_get_thread_core_numa (vlib_worker_thread_t * w, unsigned cpu_id); + + #endif /* included_vlib_threads_h */ /* diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c index 65b3e2a5496..bcb85ec69fb 100644 --- a/src/vlib/threads_cli.c +++ b/src/vlib/threads_cli.c @@ -67,8 +67,8 @@ show_threads_fn (vlib_main_t * vm, if (cpu_id > -1) { int core_id = w->core_id; - int socket_id = w->socket_id; - line = format (line, "%-7u%-7u%-7u%", cpu_id, core_id, socket_id); + int numa_id = w->numa_id; + line = format (line, "%-7u%-7u%-7u%", cpu_id, core_id, numa_id); } else { diff --git a/src/vnet/unix/gdb_funcs.c b/src/vnet/unix/gdb_funcs.c index 6f0dd208caf..29011f05ce1 100644 --- a/src/vnet/unix/gdb_funcs.c +++ b/src/vnet/unix/gdb_funcs.c @@ -37,6 +37,21 @@ vl (void *p) return vec_len (p); } +/** + * @brief GDB callable function: pvh - Return vector header of vector + * + * @param *p - void - address of vector + * + * @return vh - vec_header_t, the vector header + * + */ +vec_header_t * +pvh (void *p) +{ + return _vec_find (p); +} + + /** * @brief GDB callable function: pe - call pool_elts - number of elements in a pool * diff --git a/src/vpp/api/api.c b/src/vpp/api/api.c index 2ed9ceec62c..f652205d583 100644 --- a/src/vpp/api/api.c +++ b/src/vpp/api/api.c @@ -273,7 +273,7 @@ get_thread_data (vl_api_thread_data_t * td, int index) td->pid = htonl (w->lwp); td->cpu_id = htonl (w->cpu_id); td->core = htonl (w->core_id); - td->cpu_socket = htonl (w->socket_id); + td->cpu_socket = htonl (w->numa_id); } static void diff --git a/src/vpp/vnet/main.c b/src/vpp/vnet/main.c index ea8e4f88718..7c7dd9870e9 100644 --- a/src/vpp/vnet/main.c +++ b/src/vpp/vnet/main.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -107,6 +108,7 @@ main (int argc, char *argv[]) u32 size; int main_core = 1; cpu_set_t cpuset; + void *main_heap; #if __x86_64__ CLIB_UNUSED (const char *msg) @@ -273,8 +275,17 @@ defaulted: vl_msg_api_set_first_available_msg_id (VL_MSG_FIRST_AVAILABLE); /* Allocate main heap */ - if (clib_mem_init_thread_safe (0, main_heap_size)) + if ((main_heap = clib_mem_init_thread_safe (0, main_heap_size))) { + vlib_worker_thread_t tmp; + + /* Figure out which numa runs the main thread */ + vlib_get_thread_core_numa (&tmp, main_core); + __os_numa_index = tmp.numa_id; + + /* and use the main heap as that numa's numa heap */ + clib_mem_set_per_numa_heap (main_heap); + vm->init_functions_called = hash_create (0, /* value bytes */ 0); vpe_main_init (vm); return vlib_unix_main (argc, argv); diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt index 1c234cce234..60e6eeff9fe 100644 --- a/src/vppinfra/CMakeLists.txt +++ b/src/vppinfra/CMakeLists.txt @@ -24,6 +24,13 @@ else(VPP_USE_DLMALLOC) set(DLMALLOC 0) endif(VPP_USE_DLMALLOC) +find_library(NUMA numa) +if (NUMA) + set(NUMA_LIBRARY_FOUND 1) +else(NUMA) + set(NUMA_LIBRARY_FOUND 0) +endif() + configure_file( ${CMAKE_SOURCE_DIR}/vppinfra/config.h.in ${CMAKE_BINARY_DIR}/vppinfra/config.h @@ -210,7 +217,7 @@ endif(VPP_USE_DLMALLOC) add_vpp_library(vppinfra SOURCES ${VPPINFRA_SRCS} - LINK_LIBRARIES m + LINK_LIBRARIES m ${NUMA} INSTALL_HEADERS ${VPPINFRA_HEADERS} COMPONENT libvppinfra ) diff --git a/src/vppinfra/bihash_template.c b/src/vppinfra/bihash_template.c index dda35969058..2b378427ce8 100644 --- a/src/vppinfra/bihash_template.c +++ b/src/vppinfra/bihash_template.c @@ -179,7 +179,6 @@ void BV (clib_bihash_master_init_svm) sizeof (vec_header_t) + BIHASH_FREELIST_LENGTH * sizeof (u64)); freelist_vh->len = BIHASH_FREELIST_LENGTH; - freelist_vh->dlmalloc_header_offset = 0xDEADBEEF; h->sh->freelists_as_u64 = (u64) BV (clib_bihash_get_offset) (h, freelist_vh->vector_data); h->freelists = (void *) (freelist_vh->vector_data); diff --git a/src/vppinfra/config.h.in b/src/vppinfra/config.h.in index a7a22a6a992..b2366630447 100644 --- a/src/vppinfra/config.h.in +++ b/src/vppinfra/config.h.in @@ -21,6 +21,7 @@ #endif #define USE_DLMALLOC @DLMALLOC@ +#define HAVE_NUMA_LIBRARY @NUMA_LIBRARY_FOUND@ #define CLIB_TARGET_TRIPLET "@CMAKE_C_COMPILER_TARGET@" #endif diff --git a/src/vppinfra/mem.h b/src/vppinfra/mem.h index d4819b7f989..5492e106d91 100644 --- a/src/vppinfra/mem.h +++ b/src/vppinfra/mem.h @@ -56,9 +56,46 @@ #include #define CLIB_MAX_MHEAPS 256 +#define CLIB_MAX_NUMAS 8 + +/* Unspecified NUMA socket */ +#define VEC_NUMA_UNSPECIFIED (0xFF) /* Per CPU heaps. */ extern void *clib_per_cpu_mheaps[CLIB_MAX_MHEAPS]; +extern void *clib_per_numa_mheaps[CLIB_MAX_NUMAS]; + +always_inline void * +clib_mem_get_per_cpu_heap (void) +{ + int cpu = os_get_thread_index (); + return clib_per_cpu_mheaps[cpu]; +} + +always_inline void * +clib_mem_set_per_cpu_heap (u8 * new_heap) +{ + int cpu = os_get_thread_index (); + void *old = clib_per_cpu_mheaps[cpu]; + clib_per_cpu_mheaps[cpu] = new_heap; + return old; +} + +always_inline void * +clib_mem_get_per_numa_heap (u32 numa_id) +{ + ASSERT (numa_id >= 0 && numa_id < ARRAY_LEN (clib_per_numa_mheaps)); + return clib_per_numa_mheaps[numa_id]; +} + +always_inline void * +clib_mem_set_per_numa_heap (u8 * new_heap) +{ + int numa = os_get_numa_index (); + void *old = clib_per_numa_mheaps[numa]; + clib_per_numa_mheaps[numa] = new_heap; + return old; +} always_inline void clib_mem_set_thread_index (void) @@ -81,22 +118,6 @@ clib_mem_set_thread_index (void) ASSERT (__os_thread_index > 0); } -always_inline void * -clib_mem_get_per_cpu_heap (void) -{ - int cpu = os_get_thread_index (); - return clib_per_cpu_mheaps[cpu]; -} - -always_inline void * -clib_mem_set_per_cpu_heap (u8 * new_heap) -{ - int cpu = os_get_thread_index (); - void *old = clib_per_cpu_mheaps[cpu]; - clib_per_cpu_mheaps[cpu] = new_heap; - return old; -} - always_inline uword clib_mem_size_nocheck (void *p) { @@ -287,6 +308,7 @@ clib_mem_set_heap (void *heap) void *clib_mem_init (void *heap, uword size); void *clib_mem_init_thread_safe (void *memory, uword memory_size); +void *clib_mem_init_thread_safe_numa (void *memory, uword memory_size); void clib_mem_exit (void); diff --git a/src/vppinfra/mem_dlmalloc.c b/src/vppinfra/mem_dlmalloc.c index 68901a5530a..38226e26f8f 100644 --- a/src/vppinfra/mem_dlmalloc.c +++ b/src/vppinfra/mem_dlmalloc.c @@ -20,8 +20,10 @@ #include #include #include +#include void *clib_per_cpu_mheaps[CLIB_MAX_MHEAPS]; +void *clib_per_numa_mheaps[CLIB_MAX_NUMAS]; typedef struct { @@ -202,8 +204,8 @@ mheap_trace_main_free (mheap_trace_main_t * tm) /* Initialize CLIB heap based on memory/size given by user. Set memory to 0 and CLIB will try to allocate its own heap. */ -void * -clib_mem_init (void *memory, uword memory_size) +static void * +clib_mem_init_internal (void *memory, uword memory_size, int set_heap) { u8 *heap; @@ -217,7 +219,8 @@ clib_mem_init (void *memory, uword memory_size) CLIB_MEM_POISON (mspace_least_addr (heap), mspace_footprint (heap)); - clib_mem_set_heap (heap); + if (set_heap) + clib_mem_set_heap (heap); if (mheap_trace_main.lock == 0) clib_spinlock_init (&mheap_trace_main.lock); @@ -225,10 +228,63 @@ clib_mem_init (void *memory, uword memory_size) return heap; } +void * +clib_mem_init (void *memory, uword memory_size) +{ + return clib_mem_init_internal (memory, memory_size, + 1 /* do clib_mem_set_heap */ ); +} + void * clib_mem_init_thread_safe (void *memory, uword memory_size) { - return clib_mem_init (memory, memory_size); + return clib_mem_init_internal (memory, memory_size, + 1 /* do clib_mem_set_heap */ ); +} + +void * +clib_mem_init_thread_safe_numa (void *memory, uword memory_size) +{ + void *heap; + unsigned long this_numa; + + heap = + clib_mem_init_internal (memory, memory_size, + 0 /* do NOT clib_mem_set_heap */ ); + + ASSERT (heap); + + this_numa = os_get_numa_index (); + +#if HAVE_NUMA_LIBRARY > 0 + unsigned long nodemask = 1 << this_numa; + void *page_base; + unsigned long page_mask; + long rv; + + /* + * Bind the heap to the current thread's NUMA node. + * heap is not naturally page-aligned, so fix it. + */ + + page_mask = ~(clib_mem_get_page_size () - 1); + page_base = (void *) (((unsigned long) heap) & page_mask); + + clib_warning ("Bind heap at %llx size %llx to NUMA numa %d", + page_base, memory_size, this_numa); + + rv = mbind (page_base, memory_size, MPOL_BIND /* mode */ , + &nodemask /* nodemask */ , + BITS (nodemask) /* max node number */ , + MPOL_MF_MOVE /* flags */ ); + + if (rv < 0) + clib_unix_warning ("mbind"); +#else + clib_warning ("mbind unavailable, can't bind to numa %d", this_numa); +#endif + + return heap; } u8 * diff --git a/src/vppinfra/os.h b/src/vppinfra/os.h index 50a4ad97c93..cd3b4289da6 100644 --- a/src/vppinfra/os.h +++ b/src/vppinfra/os.h @@ -57,6 +57,7 @@ void os_out_of_memory (void); f64 os_cpu_clock_frequency (void); extern __thread uword __os_thread_index; +extern __thread uword __os_numa_index; static_always_inline uword os_get_thread_index (void) @@ -70,6 +71,18 @@ os_set_thread_index (uword thread_index) __os_thread_index = thread_index; } +static_always_inline uword +os_get_numa_index (void) +{ + return __os_numa_index; +} + +static_always_inline void +os_set_numa_index (uword numa_index) +{ + __os_numa_index = numa_index; +} + static_always_inline uword os_get_cpu_number (void) __attribute__ ((deprecated)); diff --git a/src/vppinfra/pool.h b/src/vppinfra/pool.h index e6ffe1e874a..db950d27d18 100644 --- a/src/vppinfra/pool.h +++ b/src/vppinfra/pool.h @@ -185,12 +185,13 @@ pool_free_elts (void *v) First search free list. If nothing is free extend vector of objects. */ -#define _pool_get_aligned_internal(P,E,A,Z) \ +#define _pool_get_aligned_internal_numa(P,E,A,Z,N) \ do { \ pool_header_t * _pool_var (p) = pool_header (P); \ uword _pool_var (l); \ \ - STATIC_ASSERT(A==0 || ((A % sizeof(P[0]))==0) || ((sizeof(P[0]) % A) == 0), \ + STATIC_ASSERT(A==0 || ((A % sizeof(P[0]))==0) \ + || ((sizeof(P[0]) % A) == 0), \ "Pool aligned alloc of incorrectly sized object"); \ _pool_var (l) = 0; \ if (P) \ @@ -199,11 +200,12 @@ do { \ if (_pool_var (l) > 0) \ { \ /* Return free element from free list. */ \ - uword _pool_var (i) = _pool_var (p)->free_indices[_pool_var (l) - 1]; \ + uword _pool_var (i) = \ + _pool_var (p)->free_indices[_pool_var (l) - 1]; \ (E) = (P) + _pool_var (i); \ - _pool_var (p)->free_bitmap = \ - clib_bitmap_andnoti_notrim (_pool_var (p)->free_bitmap, \ - _pool_var (i)); \ + _pool_var (p)->free_bitmap = \ + clib_bitmap_andnoti_notrim (_pool_var (p)->free_bitmap, \ + _pool_var (i)); \ _vec_len (_pool_var (p)->free_indices) = _pool_var (l) - 1; \ CLIB_MEM_UNPOISON((E), sizeof((E)[0])); \ } \ @@ -216,17 +218,30 @@ do { \ os_out_of_memory(); \ } \ /* Nothing on free list, make a new element and return it. */ \ - P = _vec_resize (P, \ + P = _vec_resize_numa (P, \ /* length_increment */ 1, \ /* new size */ (vec_len (P) + 1) * sizeof (P[0]), \ pool_aligned_header_bytes, \ - /* align */ (A)); \ + /* align */ (A), \ + /* numa */ (N)); \ E = vec_end (P) - 1; \ - } \ + } \ if (Z) \ - memset(E, 0, sizeof(*E)); \ + memset(E, 0, sizeof(*E)); \ } while (0) +#define pool_get_aligned_zero_numa(P,E,A,Z,S) \ + _pool_get_aligned_internal_numa(P,E,A,Z,S) + +#define pool_get_aligned_numa(P,E,A,S) \ + _pool_get_aligned_internal_numa(P,E,A,0/*zero*/,S) + +#define pool_get_numa(P,E,S) \ + _pool_get_aligned_internal_numa(P,E,0/*align*/,0/*zero*/,S) + +#define _pool_get_aligned_internal(P,E,A,Z) \ + _pool_get_aligned_internal_numa(P,E,A,Z,VEC_NUMA_UNSPECIFIED) + /** Allocate an object E from a pool P with alignment A */ #define pool_get_aligned(P,E,A) _pool_get_aligned_internal(P,E,A,0) diff --git a/src/vppinfra/unix-misc.c b/src/vppinfra/unix-misc.c index f693102c65a..54016ed74f4 100644 --- a/src/vppinfra/unix-misc.c +++ b/src/vppinfra/unix-misc.c @@ -46,6 +46,7 @@ #include /* for sprintf */ __thread uword __os_thread_index = 0; +__thread uword __os_numa_index = 0; clib_error_t * clib_file_n_bytes (char *file, uword * result) diff --git a/src/vppinfra/vec.c b/src/vppinfra/vec.c index 16372e9ef22..2ee78952d19 100644 --- a/src/vppinfra/vec.c +++ b/src/vppinfra/vec.c @@ -44,16 +44,24 @@ void * vec_resize_allocate_memory (void *v, word length_increment, uword data_bytes, - uword header_bytes, uword data_align) + uword header_bytes, uword data_align, + uword numa_id) { vec_header_t *vh = _vec_find (v); uword old_alloc_bytes, new_alloc_bytes; void *old, *new; + void *oldheap; header_bytes = vec_header_bytes (header_bytes); data_bytes += header_bytes; + if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED)) + { + oldheap = clib_mem_get_per_cpu_heap (); + clib_mem_set_per_cpu_heap (clib_mem_get_per_numa_heap (numa_id)); + } + if (!v) { new = clib_mem_alloc_aligned_at_offset (data_bytes, data_align, header_bytes, 1 /* yes, call os_out_of_memory */ @@ -64,6 +72,9 @@ vec_resize_allocate_memory (void *v, CLIB_MEM_POISON (new + data_bytes, new_alloc_bytes - data_bytes); v = new + header_bytes; _vec_len (v) = length_increment; + _vec_numa (v) = numa_id; + if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED)) + clib_mem_set_per_cpu_heap (oldheap); return v; } @@ -79,6 +90,8 @@ vec_resize_allocate_memory (void *v, if (data_bytes <= old_alloc_bytes) { CLIB_MEM_UNPOISON (v, data_bytes); + if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED)) + clib_mem_set_per_cpu_heap (oldheap); return v; } @@ -110,6 +123,10 @@ vec_resize_allocate_memory (void *v, memset (v + old_alloc_bytes, 0, new_alloc_bytes - old_alloc_bytes); CLIB_MEM_POISON (new + data_bytes, new_alloc_bytes - data_bytes); + _vec_numa ((v + header_bytes)) = numa_id; + if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED)) + clib_mem_set_per_cpu_heap (oldheap); + return v + header_bytes; } diff --git a/src/vppinfra/vec.h b/src/vppinfra/vec.h index 021b2295964..9054eaa5e57 100644 --- a/src/vppinfra/vec.h +++ b/src/vppinfra/vec.h @@ -96,12 +96,14 @@ @param data_bytes requested size in bytes @param header_bytes header size in bytes (may be zero) @param data_align alignment (may be zero) + @param numa_id numa id (may be zero) @return v_prime pointer to resized vector, may or may not equal v */ void *vec_resize_allocate_memory (void *v, word length_increment, uword data_bytes, - uword header_bytes, uword data_align); + uword header_bytes, uword data_align, + uword numa_id); /** \brief Low-level vector resize function, usually not called directly @@ -110,19 +112,25 @@ void *vec_resize_allocate_memory (void *v, @param data_bytes requested size in bytes @param header_bytes header size in bytes (may be zero) @param data_align alignment (may be zero) + @param numa_id (may be ~0) @return v_prime pointer to resized vector, may or may not equal v */ -#define _vec_resize(V,L,DB,HB,A) \ - _vec_resize_inline(V,L,DB,HB,clib_max((__alignof__((V)[0])),(A))) +#define _vec_resize_numa(V,L,DB,HB,A,S) \ + _vec_resize_inline(V,L,DB,HB,clib_max((__alignof__((V)[0])),(A)),(S)) + +#define _vec_resize(V,L,DB,HB,A) \ + _vec_resize_numa(V,L,DB,HB,A,VEC_NUMA_UNSPECIFIED) always_inline void * _vec_resize_inline (void *v, word length_increment, - uword data_bytes, uword header_bytes, uword data_align) + uword data_bytes, uword header_bytes, uword data_align, + uword numa_id) { vec_header_t *vh = _vec_find (v); uword new_data_bytes, aligned_header_bytes; + void *oldheap; aligned_header_bytes = vec_header_bytes (header_bytes); @@ -132,6 +140,12 @@ _vec_resize_inline (void *v, { void *p = v - aligned_header_bytes; + if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED)) + { + oldheap = clib_mem_get_per_cpu_heap (); + clib_mem_set_per_cpu_heap (clib_mem_get_per_numa_heap (numa_id)); + } + /* Vector header must start heap object. */ ASSERT (clib_mem_is_heap_object (p)); @@ -140,15 +154,19 @@ _vec_resize_inline (void *v, { CLIB_MEM_UNPOISON (v, data_bytes); vh->len += length_increment; + if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED)) + clib_mem_set_per_cpu_heap (oldheap); return v; } + if (PREDICT_FALSE (numa_id != VEC_NUMA_UNSPECIFIED)) + clib_mem_set_per_cpu_heap (oldheap); } /* Slow path: call helper function. */ return vec_resize_allocate_memory (v, length_increment, data_bytes, header_bytes, clib_max (sizeof (vec_header_t), - data_align)); + data_align), numa_id); } /** \brief Determine if vector will resize with next allocation @@ -221,16 +239,32 @@ clib_mem_is_vec (void *v) @param N number of elements to add @param H header size in bytes (may be zero) @param A alignment (may be zero) + @param S numa_id (may be zero) @return V (value-result macro parameter) */ -#define vec_resize_ha(V,N,H,A) \ -do { \ - word _v(n) = (N); \ - word _v(l) = vec_len (V); \ - V = _vec_resize ((V), _v(n), (_v(l) + _v(n)) * sizeof ((V)[0]), (H), (A)); \ +#define vec_resize_has(V,N,H,A,S) \ +do { \ + word _v(n) = (N); \ + word _v(l) = vec_len (V); \ + V = _vec_resize_numa ((V), _v(n), \ + (_v(l) + _v(n)) * sizeof ((V)[0]), \ + (H), (A),(S)); \ } while (0) +/** \brief Resize a vector (less general version). + Add N elements to end of given vector V, return pointer to start of vector. + Vector will have room for H header bytes and will have user's data aligned + at alignment A (rounded to next power of 2). + + @param V pointer to a vector + @param N number of elements to add + @param H header size in bytes (may be zero) + @param A alignment (may be zero) + @return V (value-result macro parameter) +*/ +#define vec_resize_ha(V,N,H,A) vec_resize_has(V,N,H,A,VEC_NUMA_UNSPECIFIED) + /** \brief Resize a vector (no header, unspecified alignment) Add N elements to end of given vector V, return pointer to start of vector. Vector will have room for H header bytes and will have user's data aligned @@ -352,22 +386,35 @@ do { \ @param V pointer to a vector @param H size of header in bytes @param A alignment (may be zero) + @param S numa (may be VEC_NUMA_UNSPECIFIED) @return Vdup copy of vector */ -#define vec_dup_ha(V,H,A) \ +#define vec_dup_ha_numa(V,H,A,S) \ ({ \ __typeof__ ((V)[0]) * _v(v) = 0; \ uword _v(l) = vec_len (V); \ if (_v(l) > 0) \ { \ - vec_resize_ha (_v(v), _v(l), (H), (A)); \ + vec_resize_has (_v(v), _v(l), (H), (A), (S)); \ clib_memcpy_fast (_v(v), (V), _v(l) * sizeof ((V)[0]));\ } \ _v(v); \ }) +/** \brief Return copy of vector (VEC_NUMA_UNSPECIFIED). + + @param V pointer to a vector + @param H size of header in bytes + @param A alignment (may be zero) + + @return Vdup copy of vector +*/ +#define vec_dup_ha(V,H,A) \ + vec_dup_ha_numa(V,H,A,VEC_NUMA_UNSPECIFIED) + + /** \brief Return copy of vector (no header, no alignment) @param V pointer to a vector @@ -412,24 +459,40 @@ do { \ @param I vector index which will be valid upon return @param H header size in bytes (may be zero) @param A alignment (may be zero) + @param N numa_id (may be zero) @return V (value-result macro parameter) */ -#define vec_validate_ha(V,I,H,A) \ -do { \ - STATIC_ASSERT(A==0 || ((A % sizeof(V[0]))==0) || ((sizeof(V[0]) % A) == 0),\ - "vector validate aligned on incorrectly sized object"); \ - word _v(i) = (I); \ - word _v(l) = vec_len (V); \ - if (_v(i) >= _v(l)) \ - { \ - vec_resize_ha ((V), 1 + (_v(i) - _v(l)), (H), (A)); \ - /* Must zero new space since user may have previously \ - used e.g. _vec_len (v) -= 10 */ \ - clib_memset ((V) + _v(l), 0, (1 + (_v(i) - _v(l))) * sizeof ((V)[0])); \ - } \ +#define vec_validate_han(V,I,H,A,N) \ +do { \ + void *oldheap; \ + STATIC_ASSERT(A==0 || ((A % sizeof(V[0]))==0) \ + || ((sizeof(V[0]) % A) == 0), \ + "vector validate aligned on incorrectly sized object"); \ + word _v(i) = (I); \ + word _v(l) = vec_len (V); \ + if (_v(i) >= _v(l)) \ + { \ + /* switch to the per-numa heap if directed */ \ + if (PREDICT_FALSE(N != VEC_NUMA_UNSPECIFIED)) \ + { \ + oldheap = clib_mem_get_per_cpu_heap(); \ + clib_mem_set_per_cpu_heap (clib_mem_get_per_numa_heap(N)); \ + } \ + \ + vec_resize_ha ((V), 1 + (_v(i) - _v(l)), (H), (A)); \ + /* Must zero new space since user may have previously \ + used e.g. _vec_len (v) -= 10 */ \ + clib_memset ((V) + _v(l), 0, \ + (1 + (_v(i) - _v(l))) * sizeof ((V)[0])); \ + /* Switch back to the global heap */ \ + if (PREDICT_FALSE (N != VEC_NUMA_UNSPECIFIED)) \ + clib_mem_set_per_cpu_heap (oldheap); \ + } \ } while (0) +#define vec_validate_ha(V,I,H,A) vec_validate_han(V,I,H,A,VEC_NUMA_UNSPECIFIED) + /** \brief Make sure vector is long enough for given index (no header, unspecified alignment) diff --git a/src/vppinfra/vec_bootstrap.h b/src/vppinfra/vec_bootstrap.h index fbb01b685ca..879703c8f6a 100644 --- a/src/vppinfra/vec_bootstrap.h +++ b/src/vppinfra/vec_bootstrap.h @@ -58,11 +58,14 @@ typedef struct u64 len; #else u32 len; /**< Number of elements in vector (NOT its allocated length). */ - u32 dlmalloc_header_offset; /**< offset to memory allocator offset */ + u8 numa_id; /**< NUMA id */ + u8 vpad[3]; /**< pad to 8 bytes */ #endif u8 vector_data[0]; /**< Vector data . */ } vec_header_t; +#define VEC_NUMA_UNSPECIFIED (0xFF) + /** \brief Find the vector header Given the user's pointer to a vector, find the corresponding @@ -128,7 +131,7 @@ vec_aligned_header_end (void *v, uword header_bytes, uword align) /** \brief Number of elements in vector (lvalue-capable) - _vec_len (v) does not check for null, but can be used as a lvalue + _vec_len (v) does not check for null, but can be used as an lvalue (e.g. _vec_len (v) = 99). */ @@ -142,6 +145,20 @@ vec_aligned_header_end (void *v, uword header_bytes, uword align) #define vec_len(v) ((v) ? _vec_len(v) : 0) +/** \brief Vector's NUMA id (lvalue-capable) + + _vec_numa(v) does not check for null, but can be used as an lvalue + (e.g. _vec_numa(v) = 1). +*/ + +#define _vec_numa(v) (_vec_find(v)->numa_id) + +/** \brief Return vector's NUMA ID (rvalue-only, NULL tolerant) + vec_numa(v) checks for NULL, but cannot be used as an lvalue. +*/ +#define vec_numa(v) ((v) ? _vec_numa(v) : 0) + + /** \brief Number of data bytes in vector. */ #define vec_bytes(v) (vec_len (v) * sizeof (v[0])) @@ -208,6 +225,17 @@ for (var = vec_end (vec) - 1; var >= (vec); var--) #define vec_foreach_index_backwards(var,v) \ for ((var) = vec_len((v)) - 1; (var) >= 0; (var)--) +/** \brief return the NUMA index for a vector */ +always_inline uword +vec_get_numa (void *v) +{ + vec_header_t *vh; + if (v == 0) + return 0; + vh = _vec_find (v); + return vh->numa_id; +} + #endif /* included_clib_vec_bootstrap_h */ /* -- cgit 1.2.3-korg