summaryrefslogtreecommitdiffstats
path: root/src/vppinfra/vector/count_equal.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/vppinfra/vector/count_equal.h')
-rw-r--r--src/vppinfra/vector/count_equal.h238
1 files changed, 238 insertions, 0 deletions
diff --git a/src/vppinfra/vector/count_equal.h b/src/vppinfra/vector/count_equal.h
new file mode 100644
index 00000000000..98770cff7c0
--- /dev/null
+++ b/src/vppinfra/vector/count_equal.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_vector_count_equal_h
+#define included_vector_count_equal_h
+#include <vppinfra/clib.h>
+
+static_always_inline uword
+clib_count_equal_u64 (u64 *data, uword max_count)
+{
+ uword count;
+ u64 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC256)
+ u64x4 splat = u64x4_splat (first);
+ while (count + 3 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ {
+ count += count_trailing_zeros (~bmp) / 8;
+ return count;
+ }
+
+ data += 4;
+ count += 4;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline uword
+clib_count_equal_u32 (u32 *data, uword max_count)
+{
+ uword count;
+ u32 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC256)
+ u32x8 splat = u32x8_splat (first);
+ while (count + 7 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ {
+ count += count_trailing_zeros (~bmp) / 4;
+ return count;
+ }
+
+ data += 8;
+ count += 8;
+ }
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u32x4 splat = u32x4_splat (first);
+ while (count + 3 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
+ if (bmp != 0xffff)
+ {
+ count += count_trailing_zeros (~bmp) / 4;
+ return count;
+ }
+
+ data += 4;
+ count += 4;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline uword
+clib_count_equal_u16 (u16 *data, uword max_count)
+{
+ uword count;
+ u16 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC256)
+ u16x16 splat = u16x16_splat (first);
+ while (count + 15 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ {
+ count += count_trailing_zeros (~bmp) / 2;
+ return count;
+ }
+
+ data += 16;
+ count += 16;
+ }
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u16x8 splat = u16x8_splat (first);
+ while (count + 7 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
+ if (bmp != 0xffff)
+ {
+ count += count_trailing_zeros (~bmp) / 2;
+ return count;
+ }
+
+ data += 8;
+ count += 8;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline uword
+clib_count_equal_u8 (u8 *data, uword max_count)
+{
+ uword count;
+ u8 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC256)
+ u8x32 splat = u8x32_splat (first);
+ while (count + 31 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ return max_count;
+
+ data += 32;
+ count += 32;
+ }
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u8x16 splat = u8x16_splat (first);
+ while (count + 15 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
+ if (bmp != 0xffff)
+ {
+ count += count_trailing_zeros (~bmp);
+ return count;
+ }
+
+ data += 16;
+ count += 16;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+#endif