From 9c949e72a473195c10a1c1caf503db9467c93f9a Mon Sep 17 00:00:00 2001 From: Dave Barach Date: Thu, 28 Jun 2018 10:59:05 -0400 Subject: Fix mheap_get_aligned() performance jackpot If non-trivial alignment (e.g. 64) requested, and the object size (e.g. 16) is smaller than (alignment_request - MHEAP_ELT_OVERHEAD_BYTES), round up the size request. This avoids creating remainder chunks, which are false-cache-line-sharing bait to begin with. Change-Id: Ie1a21286d29557d125bb346254b1be2def868b1a Signed-off-by: Dave Barach --- src/vppinfra/mheap.c | 20 ++++++++++++++++++-- src/vppinfra/test_mheap.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/vppinfra/mheap.c b/src/vppinfra/mheap.c index fceca95ff7d..0c72c888498 100644 --- a/src/vppinfra/mheap.c +++ b/src/vppinfra/mheap.c @@ -663,12 +663,28 @@ mheap_get_aligned (void *v, return v; } - /* Round requested size. */ + /* + * Round requested size. + * + * Step 1: round up to the minimum object size. + * Step 2: round up to a multiple of the user data size (e.g. 4) + * Step 3: if non-trivial alignment requested, round up + * so that the object precisely fills a chunk + * as big as the alignment request. + * + * Step 3 prevents the code from going into "bin search hyperspace": + * looking at a huge number of fractional remainder chunks, none of which + * will satisfy the alignment constraint. This fixes an allocator + * performance issue when one requests a large number of 16 byte objects + * aligned to 64 bytes, to name one variation on the theme. + */ n_user_data_bytes = clib_max (n_user_data_bytes, MHEAP_MIN_USER_DATA_BYTES); n_user_data_bytes = round_pow2 (n_user_data_bytes, STRUCT_SIZE_OF (mheap_elt_t, user_data[0])); - + if (align > MHEAP_ELT_OVERHEAD_BYTES) + n_user_data_bytes = clib_max (n_user_data_bytes, + align - MHEAP_ELT_OVERHEAD_BYTES); if (!v) v = mheap_alloc (0, 64 << 20); diff --git a/src/vppinfra/test_mheap.c b/src/vppinfra/test_mheap.c index b48e5273651..f6923a96926 100644 --- a/src/vppinfra/test_mheap.c +++ b/src/vppinfra/test_mheap.c @@ -48,11 +48,47 @@ #include #include #include +#include static int verbose = 0; #define if_verbose(format,args...) \ if (verbose) { clib_warning(format, ## args); } +int +test1 (void) +{ + clib_time_t clib_time; + void *h_mem = clib_mem_alloc (2ULL << 30); + void *h; + uword *objects = 0; + int i; + f64 before, after; + + clib_time_init (&clib_time); + + vec_validate (objects, 2000000 - 1); + + h = mheap_alloc (h_mem, (uword) (2 << 30)); + + before = clib_time_now (&clib_time); + + for (i = 0; i < vec_len (objects); i++) + { + h = mheap_get_aligned (h, 24 /* size */ , + 64 /* align */ , + 16 /* align at offset */ , &objects[i]); + } + + after = clib_time_now (&clib_time); + + fformat (stdout, "alloc: %u objects in %.2f seconds, %.2f objects/second\n", + vec_len (objects), (after - before), + ((f64) vec_len (objects)) / (after - before)); + + return 0; +} + + int test_mheap_main (unformat_input_t * input) { @@ -70,6 +106,7 @@ test_mheap_main (unformat_input_t * input) #define CHECK_VALIDITY 1 #define CHECK_DATA 2 #define CHECK_ALIGN 4 +#define TEST1 8 n_iterations = 10; seed = 0; @@ -93,7 +130,8 @@ test_mheap_main (unformat_input_t * input) && 0 == unformat (input, "verbose %=", &really_verbose, 1) && 0 == unformat (input, "trace %=", &trace, 1) && 0 == unformat (input, "vm %=", &use_vm, 1) - && 0 == unformat (input, "align %|", &check_mask, CHECK_ALIGN)) + && 0 == unformat (input, "align %|", &check_mask, CHECK_ALIGN) + && 0 == unformat (input, "test1 %|", &check_mask, TEST1)) { clib_warning ("unknown input `%U'", format_unformat_error, input); return 1; @@ -104,6 +142,11 @@ test_mheap_main (unformat_input_t * input) if (!seed) seed = random_default_seed (); + if (check_mask & TEST1) + { + return test1 (); + } + if_verbose ("testing %d iterations, %d %saligned objects, max. size %d, seed %d", n_iterations, n_objects, (check_mask & CHECK_ALIGN) ? "randomly " : "un", @@ -224,6 +267,8 @@ main (int argc, char *argv[]) unformat_input_t i; int ret; + clib_mem_init (0, 3ULL << 30); + verbose = (argc > 1); unformat_init_command_line (&i, argv); ret = test_mheap_main (&i); -- cgit 1.2.3-korg