diff options
author | Damjan Marion <damarion@cisco.com> | 2021-10-27 17:28:26 +0200 |
---|---|---|
committer | Florin Coras <florin.coras@gmail.com> | 2021-10-27 17:08:42 +0000 |
commit | f62ed3f9c1ec3e8db36f63d6a54f46b7bea43723 (patch) | |
tree | dc7dde12d16d802ebb41a513a93a741895bac6da /src/vppinfra | |
parent | bd908d5ac9c3b8b54c5ddaf09cc356e5b4d0ee35 (diff) |
vppinfra: improve clib_array_mask_u32
Type: improvement
Change-Id: Ibdb79a0a1c3ba56f9b2f0f2536aafcdeda5cb6d6
Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vppinfra')
-rw-r--r-- | src/vppinfra/vector/array_mask.h | 85 |
1 files changed, 52 insertions, 33 deletions
diff --git a/src/vppinfra/vector/array_mask.h b/src/vppinfra/vector/array_mask.h index 778ed3e638f..fa427a6f1a9 100644 --- a/src/vppinfra/vector/array_mask.h +++ b/src/vppinfra/vector/array_mask.h @@ -17,61 +17,80 @@ static_always_inline void clib_array_mask_u32 (u32 *src, u32 mask, u32 n_elts) { - u32 i; #if defined(CLIB_HAVE_VEC512) u32x16 mask16 = u32x16_splat (mask); - - for (i = 0; i + 16 <= n_elts; i += 16) - *((u32x16u *) (src + i)) &= mask16; - n_elts -= i; - if (n_elts) + if (n_elts <= 16) { - u16 m = pow2_mask (n_elts); - u32x16_mask_store (u32x16_mask_load_zero (src + i, m) & mask16, src + i, - m); + u32 m = pow2_mask (n_elts); + u32x16 r = u32x16_mask_load_zero (src, m); + u32x16_mask_store (r & mask16, src, m); + return; } - return; + for (int i = 0; i < n_elts; i += 16) + *((u32x16u *) (src + i)) &= mask16; + *((u32x16u *) (src + n_elts - 16)) &= mask16; #elif defined(CLIB_HAVE_VEC256) u32x8 mask8 = u32x8_splat (mask); - - for (i = 0; i + 8 <= n_elts; i += 8) - *((u32x8u *) (src + i)) &= mask8; - n_elts -= i; - src += i; #if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE) - if (n_elts) + if (n_elts <= 8) { - u8 m = pow2_mask (n_elts); - u32x8_mask_store (u32x8_mask_load_zero (src, m) & mask8, src, m); + u32 m = pow2_mask (n_elts); + u32x8 r = u32x8_mask_load_zero (src, m); + u32x8_mask_store (r & mask8, src, m); + return; + } +#else + if (PREDICT_FALSE (n_elts < 4)) + { + if (n_elts & 2) + { + src[0] &= mask; + src[1] &= mask; + src += 2; + } + if (n_elts & 1) + src[0] &= mask; + return; + } + if (n_elts <= 8) + { + u32x4 mask4 = u32x4_splat (mask); + *(u32x4u *) src &= mask4; + *(u32x4u *) (src + n_elts - 4) &= mask4; } - return; #endif + + for (int i = 0; i < n_elts; i += 8) + *((u32x8u *) (src + i)) &= mask8; + *((u32x8u *) (src + n_elts - 8)) &= mask8; #elif defined(CLIB_HAVE_VEC128) u32x4 mask4 = u32x4_splat (mask); - for (i = 0; i + 4 <= n_elts; i += 4) - *((u32x4u *) (src + i)) &= mask4; - n_elts -= i; - src += i; - switch (n_elts) + if (PREDICT_FALSE (n_elts < 4)) { - case 3: - src[2] &= mask; - case 2: - src[1] &= mask; - case 1: - src[0] &= mask; - case 0: - default:; + if (n_elts & 2) + { + src[0] &= mask; + src[1] &= mask; + src += 2; + } + if (n_elts & 1) + src[0] &= mask; + return; } + + for (int i = 0; i < n_elts; i += 4) + *((u32x4u *) (src + i)) &= mask4; + *((u32x4u *) (src + n_elts - 4)) &= mask4; return; -#endif +#else while (n_elts > 0) { src[0] &= mask; src++; n_elts--; } +#endif } #endif |