aboutsummaryrefslogtreecommitdiffstats
path: root/src/vppinfra/vector/array_mask.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/vppinfra/vector/array_mask.h')
-rw-r--r--src/vppinfra/vector/array_mask.h119
1 files changed, 86 insertions, 33 deletions
diff --git a/src/vppinfra/vector/array_mask.h b/src/vppinfra/vector/array_mask.h
index 778ed3e638f..3d4a82ac01b 100644
--- a/src/vppinfra/vector/array_mask.h
+++ b/src/vppinfra/vector/array_mask.h
@@ -17,61 +17,114 @@
static_always_inline void
clib_array_mask_u32 (u32 *src, u32 mask, u32 n_elts)
{
- u32 i;
#if defined(CLIB_HAVE_VEC512)
u32x16 mask16 = u32x16_splat (mask);
-
- for (i = 0; i + 16 <= n_elts; i += 16)
- *((u32x16u *) (src + i)) &= mask16;
- n_elts -= i;
- if (n_elts)
+ if (n_elts <= 16)
{
- u16 m = pow2_mask (n_elts);
- u32x16_mask_store (u32x16_mask_load_zero (src + i, m) & mask16, src + i,
- m);
+ u32 m = pow2_mask (n_elts);
+ u32x16 r = u32x16_mask_load_zero (src, m);
+ u32x16_mask_store (r & mask16, src, m);
+ return;
}
- return;
+ for (; n_elts >= 16; n_elts -= 16, src += 16)
+ *((u32x16u *) src) &= mask16;
+ *((u32x16u *) (src + n_elts - 16)) &= mask16;
#elif defined(CLIB_HAVE_VEC256)
u32x8 mask8 = u32x8_splat (mask);
-
- for (i = 0; i + 8 <= n_elts; i += 8)
- *((u32x8u *) (src + i)) &= mask8;
- n_elts -= i;
- src += i;
#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
- if (n_elts)
+ if (n_elts <= 8)
{
- u8 m = pow2_mask (n_elts);
- u32x8_mask_store (u32x8_mask_load_zero (src, m) & mask8, src, m);
+ u32 m = pow2_mask (n_elts);
+ u32x8 r = u32x8_mask_load_zero (src, m);
+ u32x8_mask_store (r & mask8, src, m);
+ return;
+ }
+#else
+ if (PREDICT_FALSE (n_elts < 4))
+ {
+ if (n_elts & 2)
+ {
+ src[0] &= mask;
+ src[1] &= mask;
+ src += 2;
+ }
+ if (n_elts & 1)
+ src[0] &= mask;
+ return;
+ }
+ if (n_elts <= 8)
+ {
+ u32x4 mask4 = u32x4_splat (mask);
+ *(u32x4u *) src &= mask4;
+ *(u32x4u *) (src + n_elts - 4) &= mask4;
+ return;
}
- return;
#endif
+
+ for (; n_elts >= 8; n_elts -= 8, src += 8)
+ *((u32x8u *) src) &= mask8;
+ *((u32x8u *) (src + n_elts - 8)) &= mask8;
#elif defined(CLIB_HAVE_VEC128)
u32x4 mask4 = u32x4_splat (mask);
- for (i = 0; i + 4 <= n_elts; i += 4)
- *((u32x4u *) (src + i)) &= mask4;
- n_elts -= i;
- src += i;
- switch (n_elts)
+ if (PREDICT_FALSE (n_elts < 4))
{
- case 3:
- src[2] &= mask;
- case 2:
- src[1] &= mask;
- case 1:
- src[0] &= mask;
- case 0:
- default:;
+ if (n_elts & 2)
+ {
+ src[0] &= mask;
+ src[1] &= mask;
+ src += 2;
+ }
+ if (n_elts & 1)
+ src[0] &= mask;
+ return;
}
+
+ for (; n_elts >= 4; n_elts -= 4, src += 4)
+ *((u32x4u *) src) &= mask4;
+ *((u32x4u *) (src + n_elts - 4)) &= mask4;
return;
-#endif
+#else
while (n_elts > 0)
{
src[0] &= mask;
src++;
n_elts--;
}
+#endif
+}
+
+static_always_inline void
+clib_array_mask_set_u32_x64 (u32 *a, u32 v, uword bmp, int n_elts)
+{
+#if defined(CLIB_HAVE_VEC512_MASK_LOAD_STORE)
+ u32x16 r = u32x16_splat (v);
+ for (; n_elts > 0; n_elts -= 16, a += 16, bmp >>= 16)
+ u32x16_mask_store (r, a, bmp);
+#elif defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE)
+ u32x8 r = u32x8_splat (v);
+ for (; n_elts > 0; n_elts -= 8, a += 8, bmp >>= 8)
+ u32x8_mask_store (r, a, bmp);
+#else
+ while (bmp)
+ {
+ a[get_lowest_set_bit_index (bmp)] = v;
+ bmp = clear_lowest_set_bit (bmp);
+ }
+#endif
+}
+
+static_always_inline void
+clib_array_mask_set_u32 (u32 *a, u32 v, uword *bmp, u32 n_elts)
+{
+ while (n_elts >= uword_bits)
+ {
+ clib_array_mask_set_u32_x64 (a, v, bmp++[0], uword_bits);
+ a += uword_bits;
+ n_elts -= uword_bits;
+ }
+
+ clib_array_mask_set_u32_x64 (a, v, bmp[0] & pow2_mask (n_elts), n_elts);
}
#endif