aboutsummaryrefslogtreecommitdiffstats
path: root/src/vppinfra/bihash_48_8.h
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2018-06-28 17:55:50 +0200
committerMarco Varlese <marco.varlese@suse.de>2018-06-29 07:46:33 +0000
commit09fdf9d074430032c27aba9e12e52440c7de2006 (patch)
treeac0e37d7a0181308d1408ce0f590f4e366faf06a /src/vppinfra/bihash_48_8.h
parent697faeace706337eddf0407e4e28e0bb8d39c20e (diff)
bihash key compare improvements
Looks like CPU doesn't like overlaping loads. This new codes in some cases shows 3-4 clock improvements. Change-Id: Ia1b49976ad95140c573f892fdc0a32eebbfa06c8 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vppinfra/bihash_48_8.h')
-rw-r--r--src/vppinfra/bihash_48_8.h7
1 files changed, 4 insertions, 3 deletions
diff --git a/src/vppinfra/bihash_48_8.h b/src/vppinfra/bihash_48_8.h
index 7e7379baab7..e981950b1bd 100644
--- a/src/vppinfra/bihash_48_8.h
+++ b/src/vppinfra/bihash_48_8.h
@@ -75,9 +75,10 @@ clib_bihash_key_compare_48_8 (u64 * a, u64 * b)
u64x8 v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b);
return (u64x8_is_zero_mask (v) & 0x3f) == 0;
#elif defined (CLIB_HAVE_VEC256)
- u64x4 v;
- v = u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b);
- v |= u64x4_load_unaligned (a + 2) ^ u64x4_load_unaligned (b + 2);
+ u64x4 v = { 0 };
+ v = u64x4_insert_lo (v, u64x2_load_unaligned (a + 4) ^
+ u64x2_load_unaligned (b + 4));
+ v |= u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b);
return u64x4_is_all_zero (v);
#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE)
u64x2 v;