From 09fdf9d074430032c27aba9e12e52440c7de2006 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 28 Jun 2018 17:55:50 +0200 Subject: bihash key compare improvements Looks like CPU doesn't like overlaping loads. This new codes in some cases shows 3-4 clock improvements. Change-Id: Ia1b49976ad95140c573f892fdc0a32eebbfa06c8 Signed-off-by: Damjan Marion --- src/vppinfra/bihash_48_8.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/vppinfra/bihash_48_8.h') diff --git a/src/vppinfra/bihash_48_8.h b/src/vppinfra/bihash_48_8.h index 7e7379baab7..e981950b1bd 100644 --- a/src/vppinfra/bihash_48_8.h +++ b/src/vppinfra/bihash_48_8.h @@ -75,9 +75,10 @@ clib_bihash_key_compare_48_8 (u64 * a, u64 * b) u64x8 v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b); return (u64x8_is_zero_mask (v) & 0x3f) == 0; #elif defined (CLIB_HAVE_VEC256) - u64x4 v; - v = u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b); - v |= u64x4_load_unaligned (a + 2) ^ u64x4_load_unaligned (b + 2); + u64x4 v = { 0 }; + v = u64x4_insert_lo (v, u64x2_load_unaligned (a + 4) ^ + u64x2_load_unaligned (b + 4)); + v |= u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b); return u64x4_is_all_zero (v); #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) u64x2 v; -- cgit 1.2.3-korg