summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Barach <dave@barachs.net>2018-06-28 10:59:05 -0400
committerFlorin Coras <florin.coras@gmail.com>2018-06-28 16:33:36 +0000
commit9c949e72a473195c10a1c1caf503db9467c93f9a (patch)
treef44a2128cd25735f553cc9a494a4b12e2580d085
parent7e12d949a346d2e69afb7a8029c0099b5f131b25 (diff)
Fix mheap_get_aligned() performance jackpot
If non-trivial alignment (e.g. 64) requested, and the object size (e.g. 16) is smaller than (alignment_request - MHEAP_ELT_OVERHEAD_BYTES), round up the size request. This avoids creating remainder chunks, which are false-cache-line-sharing bait to begin with. Change-Id: Ie1a21286d29557d125bb346254b1be2def868b1a Signed-off-by: Dave Barach <dave@barachs.net>
-rw-r--r--src/vppinfra/mheap.c20
-rw-r--r--src/vppinfra/test_mheap.c47
2 files changed, 64 insertions, 3 deletions
diff --git a/src/vppinfra/mheap.c b/src/vppinfra/mheap.c
index fceca95ff7d..0c72c888498 100644
--- a/src/vppinfra/mheap.c
+++ b/src/vppinfra/mheap.c
@@ -663,12 +663,28 @@ mheap_get_aligned (void *v,
return v;
}
- /* Round requested size. */
+ /*
+ * Round requested size.
+ *
+ * Step 1: round up to the minimum object size.
+ * Step 2: round up to a multiple of the user data size (e.g. 4)
+ * Step 3: if non-trivial alignment requested, round up
+ * so that the object precisely fills a chunk
+ * as big as the alignment request.
+ *
+ * Step 3 prevents the code from going into "bin search hyperspace":
+ * looking at a huge number of fractional remainder chunks, none of which
+ * will satisfy the alignment constraint. This fixes an allocator
+ * performance issue when one requests a large number of 16 byte objects
+ * aligned to 64 bytes, to name one variation on the theme.
+ */
n_user_data_bytes = clib_max (n_user_data_bytes, MHEAP_MIN_USER_DATA_BYTES);
n_user_data_bytes =
round_pow2 (n_user_data_bytes,
STRUCT_SIZE_OF (mheap_elt_t, user_data[0]));
-
+ if (align > MHEAP_ELT_OVERHEAD_BYTES)
+ n_user_data_bytes = clib_max (n_user_data_bytes,
+ align - MHEAP_ELT_OVERHEAD_BYTES);
if (!v)
v = mheap_alloc (0, 64 << 20);
diff --git a/src/vppinfra/test_mheap.c b/src/vppinfra/test_mheap.c
index b48e5273651..f6923a96926 100644
--- a/src/vppinfra/test_mheap.c
+++ b/src/vppinfra/test_mheap.c
@@ -48,12 +48,48 @@
#include <vppinfra/mheap.h>
#include <vppinfra/format.h>
#include <vppinfra/random.h>
+#include <vppinfra/time.h>
static int verbose = 0;
#define if_verbose(format,args...) \
if (verbose) { clib_warning(format, ## args); }
int
+test1 (void)
+{
+ clib_time_t clib_time;
+ void *h_mem = clib_mem_alloc (2ULL << 30);
+ void *h;
+ uword *objects = 0;
+ int i;
+ f64 before, after;
+
+ clib_time_init (&clib_time);
+
+ vec_validate (objects, 2000000 - 1);
+
+ h = mheap_alloc (h_mem, (uword) (2 << 30));
+
+ before = clib_time_now (&clib_time);
+
+ for (i = 0; i < vec_len (objects); i++)
+ {
+ h = mheap_get_aligned (h, 24 /* size */ ,
+ 64 /* align */ ,
+ 16 /* align at offset */ , &objects[i]);
+ }
+
+ after = clib_time_now (&clib_time);
+
+ fformat (stdout, "alloc: %u objects in %.2f seconds, %.2f objects/second\n",
+ vec_len (objects), (after - before),
+ ((f64) vec_len (objects)) / (after - before));
+
+ return 0;
+}
+
+
+int
test_mheap_main (unformat_input_t * input)
{
int i, j, k, n_iterations;
@@ -70,6 +106,7 @@ test_mheap_main (unformat_input_t * input)
#define CHECK_VALIDITY 1
#define CHECK_DATA 2
#define CHECK_ALIGN 4
+#define TEST1 8
n_iterations = 10;
seed = 0;
@@ -93,7 +130,8 @@ test_mheap_main (unformat_input_t * input)
&& 0 == unformat (input, "verbose %=", &really_verbose, 1)
&& 0 == unformat (input, "trace %=", &trace, 1)
&& 0 == unformat (input, "vm %=", &use_vm, 1)
- && 0 == unformat (input, "align %|", &check_mask, CHECK_ALIGN))
+ && 0 == unformat (input, "align %|", &check_mask, CHECK_ALIGN)
+ && 0 == unformat (input, "test1 %|", &check_mask, TEST1))
{
clib_warning ("unknown input `%U'", format_unformat_error, input);
return 1;
@@ -104,6 +142,11 @@ test_mheap_main (unformat_input_t * input)
if (!seed)
seed = random_default_seed ();
+ if (check_mask & TEST1)
+ {
+ return test1 ();
+ }
+
if_verbose
("testing %d iterations, %d %saligned objects, max. size %d, seed %d",
n_iterations, n_objects, (check_mask & CHECK_ALIGN) ? "randomly " : "un",
@@ -224,6 +267,8 @@ main (int argc, char *argv[])
unformat_input_t i;
int ret;
+ clib_mem_init (0, 3ULL << 30);
+
verbose = (argc > 1);
unformat_init_command_line (&i, argv);
ret = test_mheap_main (&i);