diff options
author | Damjan Marion <damarion@cisco.com> | 2018-06-28 17:55:50 +0200 |
---|---|---|
committer | Marco Varlese <marco.varlese@suse.de> | 2018-06-29 07:46:33 +0000 |
commit | 09fdf9d074430032c27aba9e12e52440c7de2006 (patch) | |
tree | ac0e37d7a0181308d1408ce0f590f4e366faf06a | |
parent | 697faeace706337eddf0407e4e28e0bb8d39c20e (diff) |
bihash key compare improvements
Looks like CPU doesn't like overlaping loads.
This new codes in some cases shows 3-4 clock improvements.
Change-Id: Ia1b49976ad95140c573f892fdc0a32eebbfa06c8
Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r-- | src/vppinfra/bihash_24_8.h | 5 | ||||
-rw-r--r-- | src/vppinfra/bihash_40_8.h | 10 | ||||
-rw-r--r-- | src/vppinfra/bihash_48_8.h | 7 |
3 files changed, 10 insertions, 12 deletions
diff --git a/src/vppinfra/bihash_24_8.h b/src/vppinfra/bihash_24_8.h index 0c57e35bc47..dcce2a21259 100644 --- a/src/vppinfra/bihash_24_8.h +++ b/src/vppinfra/bihash_24_8.h @@ -72,9 +72,8 @@ clib_bihash_key_compare_24_8 (u64 * a, u64 * b) u64x8 v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b); return (u64x8_is_zero_mask (v) & 0x7) == 0; #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) - u64x2 v; - v = u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b); - v |= u64x2_load_unaligned (a + 1) ^ u64x2_load_unaligned (b + 1); + u64x2 v = { a[2] ^ b[2], 0 }; + v |= u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b); return u64x2_is_all_zero (v); #else return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) == 0; diff --git a/src/vppinfra/bihash_40_8.h b/src/vppinfra/bihash_40_8.h index 9ceecfcfe21..90adb8f5b10 100644 --- a/src/vppinfra/bihash_40_8.h +++ b/src/vppinfra/bihash_40_8.h @@ -74,15 +74,13 @@ clib_bihash_key_compare_40_8 (u64 * a, u64 * b) v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b); return (u64x8_is_zero_mask (v) & 0x1f) == 0; #elif defined (CLIB_HAVE_VEC256) - u64x4 v; - v = u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b); - v |= u64x4_load_unaligned (a + 1) ^ u64x4_load_unaligned (b + 1); + u64x4 v = { a[4] ^ b[4], 0, 0, 0 }; + v |= u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b); return u64x4_is_all_zero (v); #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) - u64x2 v; - v = u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b); + u64x2 v = { a[4] ^ b[4], 0 }; + v |= u64x2_load_unaligned (a) ^ u64x2_load_unaligned (b); v |= u64x2_load_unaligned (a + 2) ^ u64x2_load_unaligned (b + 2); - v |= u64x2_load_unaligned (a + 3) ^ u64x2_load_unaligned (b + 3); return u64x2_is_all_zero (v); #else return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2]) | (a[3] ^ b[3]) diff --git a/src/vppinfra/bihash_48_8.h b/src/vppinfra/bihash_48_8.h index 7e7379baab7..e981950b1bd 100644 --- a/src/vppinfra/bihash_48_8.h +++ b/src/vppinfra/bihash_48_8.h @@ -75,9 +75,10 @@ clib_bihash_key_compare_48_8 (u64 * a, u64 * b) u64x8 v = u64x8_load_unaligned (a) ^ u64x8_load_unaligned (b); return (u64x8_is_zero_mask (v) & 0x3f) == 0; #elif defined (CLIB_HAVE_VEC256) - u64x4 v; - v = u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b); - v |= u64x4_load_unaligned (a + 2) ^ u64x4_load_unaligned (b + 2); + u64x4 v = { 0 }; + v = u64x4_insert_lo (v, u64x2_load_unaligned (a + 4) ^ + u64x2_load_unaligned (b + 4)); + v |= u64x4_load_unaligned (a) ^ u64x4_load_unaligned (b); return u64x4_is_all_zero (v); #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_UNALIGNED_LOAD_STORE) u64x2 v; |