diff options
author | Damjan Marion <damarion@cisco.com> | 2020-05-20 22:01:44 +0200 |
---|---|---|
committer | Damjan Marion <damarion@cisco.com> | 2020-05-20 22:01:44 +0200 |
commit | ef58758286e84d227377c447c7cf8fae82bdca94 (patch) | |
tree | 2bf2dd5acd8b2c733487f5749b95faa859bb225f /src | |
parent | ca86c95a3413214110a03b001d45d018385b92dc (diff) |
vlib: mmap process stacks
Instead of allocating stack from the main heap, this patch mmaps stack
memory together with guard page.
This aproach reduces main heap usage, and stack memory is prefaulted
on demand, so bigger process stacks will have zero impact on memory
usage as long as stack memory is not needed for real.
In addition, it fixes issue with systems which have bigger default page
size (observed with 65536).
Type: improvement
Change-Id: I593365c603d4702e428967d80fd425fdee2c4a21
Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/vlib/CMakeLists.txt | 12 | ||||
-rw-r--r-- | src/vlib/config.h.in | 1 | ||||
-rw-r--r-- | src/vlib/node.c | 59 | ||||
-rw-r--r-- | src/vlib/node.h | 29 |
4 files changed, 42 insertions, 59 deletions
diff --git a/src/vlib/CMakeLists.txt b/src/vlib/CMakeLists.txt index 2846128ce93..8a31af68742 100644 --- a/src/vlib/CMakeLists.txt +++ b/src/vlib/CMakeLists.txt @@ -23,6 +23,18 @@ else() endif() set(PRE_DATA_SIZE 128 CACHE STRING "Buffer headroom size.") + +if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") + set(_ss 16) +else() + set(_ss 15) +endif() +set(VLIB_PROCESS_LOG2_STACK_SIZE + ${_ss} + CACHE + STRING "Process node default stack size (log2)" +) + configure_file( ${CMAKE_SOURCE_DIR}/vlib/config.h.in ${CMAKE_BINARY_DIR}/vlib/config.h diff --git a/src/vlib/config.h.in b/src/vlib/config.h.in index 5b7d5da64e4..19ec10cfcca 100644 --- a/src/vlib/config.h.in +++ b/src/vlib/config.h.in @@ -18,5 +18,6 @@ #define __PRE_DATA_SIZE @PRE_DATA_SIZE@ #define VLIB_BUFFER_ALLOC_FAULT_INJECTOR @BUFFER_ALLOC_FAULT_INJECTOR@ +#define VLIB_PROCESS_LOG2_STACK_SIZE @VLIB_PROCESS_LOG2_STACK_SIZE@ #endif diff --git a/src/vlib/node.c b/src/vlib/node.c index 2bb5bceadbc..2e93b9598eb 100644 --- a/src/vlib/node.c +++ b/src/vlib/node.c @@ -433,35 +433,36 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r) if (n->type == VLIB_NODE_TYPE_PROCESS) { vlib_process_t *p; - uword log2_n_stack_bytes; + void *map; + uword log2_n_stack_bytes, stack_bytes; + int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS; - log2_n_stack_bytes = clib_max (r->process_log2_n_stack_bytes, 15); + log2_n_stack_bytes = clib_max (r->process_log2_n_stack_bytes, + VLIB_PROCESS_LOG2_STACK_SIZE); + log2_n_stack_bytes = clib_max (log2_n_stack_bytes, + min_log2 (page_size)); -#ifdef CLIB_UNIX - /* - * Bump the stack size if running over a kernel with a large page size, - * and the stack isn't any too big to begin with. Otherwise, we'll - * trip over the stack guard page for sure. - */ - if ((page_size > (4 << 10)) && log2_n_stack_bytes < 19) - { - if ((1 << log2_n_stack_bytes) <= page_size) - log2_n_stack_bytes = min_log2 (page_size) + 1; - else - log2_n_stack_bytes++; - } -#endif + p = clib_mem_alloc_aligned (sizeof (p[0]), CLIB_CACHE_LINE_BYTES); + clib_memset (p, 0, sizeof (p[0])); + p->log2_n_stack_bytes = log2_n_stack_bytes; - p = clib_mem_alloc_aligned_at_offset - (sizeof (p[0]) + (1 << log2_n_stack_bytes), - STACK_ALIGN, STRUCT_OFFSET_OF (vlib_process_t, stack), - 0 /* no, don't call os_out_of_memory */ ); - if (p == 0) + stack_bytes = 1 << log2_n_stack_bytes; + /* map stack size + 2 extra guard pages */ + map = mmap (0, stack_bytes + page_size, PROT_READ | PROT_WRITE, + mmap_flags, -1, 0); + + if (map == MAP_FAILED) clib_panic ("failed to allocate process stack (%d bytes)", - 1 << log2_n_stack_bytes); + stack_bytes); - clib_memset (p, 0, sizeof (p[0])); - p->log2_n_stack_bytes = log2_n_stack_bytes; + /* skip the guard page */ + p->stack = map + page_size; + + mmap_flags |= MAP_FIXED; + map = mmap (map, page_size, PROT_NONE, mmap_flags, -1, 0); + + if (map == MAP_FAILED) + clib_unix_warning ("failed to create stack guard page"); /* Process node's runtime index is really index into process pointer vector. */ @@ -475,16 +476,6 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r) /* Node runtime is stored inside of process. */ rt = &p->node_runtime; - -#ifdef CLIB_UNIX - /* - * Disallow writes to the bottom page of the stack, to - * catch stack overflows. - */ - if (mprotect (p->stack, page_size, PROT_READ) < 0) - clib_unix_warning ("process stack"); -#endif - } else { diff --git a/src/vlib/node.h b/src/vlib/node.h index b9961f55b56..1bdb3bb7797 100644 --- a/src/vlib/node.h +++ b/src/vlib/node.h @@ -552,6 +552,7 @@ typedef struct typedef struct { + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); /* Node runtime for this process. */ vlib_node_runtime_t node_runtime; @@ -613,32 +614,10 @@ typedef struct vlib_cli_output_function_t *output_function; uword output_function_arg; -#ifdef CLIB_UNIX - /* Pad to a multiple of the page size so we can mprotect process stacks */ -#define PAGE_SIZE_MULTIPLE 0x1000 -#define ALIGN_ON_MULTIPLE_PAGE_BOUNDARY_FOR_MPROTECT __attribute__ ((aligned (PAGE_SIZE_MULTIPLE))) -#else -#define ALIGN_ON_MULTIPLE_PAGE_BOUNDARY_FOR_MPROTECT -#endif - - /* Process stack. Starts here and extends 2^log2_n_stack_bytes - bytes. */ - + /* Process stack */ #define VLIB_PROCESS_STACK_MAGIC (0xdead7ead) - u32 stack[0] ALIGN_ON_MULTIPLE_PAGE_BOUNDARY_FOR_MPROTECT; -} vlib_process_t __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES))); - -#ifdef CLIB_UNIX - /* Ensure that the stack is aligned on the multiple of the page size */ -typedef char - assert_process_stack_must_be_aligned_exactly_to_page_size_multiple[(sizeof - (vlib_process_t) - - - PAGE_SIZE_MULTIPLE) - == - 0 ? 0 : - -1]; -#endif + u32 *stack; +} vlib_process_t; typedef struct { |