summaryrefslogtreecommitdiffstats
path: root/src/vppinfra
diff options
context:
space:
mode:
Diffstat (limited to 'src/vppinfra')
-rw-r--r--src/vppinfra/CMakeLists.txt5
-rw-r--r--src/vppinfra/string.h233
-rw-r--r--src/vppinfra/vector/count_equal.h238
3 files changed, 241 insertions, 235 deletions
diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt
index 1114092e246..58ec32fbfeb 100644
--- a/src/vppinfra/CMakeLists.txt
+++ b/src/vppinfra/CMakeLists.txt
@@ -192,9 +192,10 @@ set(VPPINFRA_HEADERS
vector_altivec.h
vector_avx2.h
vector_avx512.h
- vector/mask_compare.h
- vector/compress.h
vector/array_mask.h
+ vector/compress.h
+ vector/count_equal.h
+ vector/mask_compare.h
vector.h
vector_neon.h
vector_sse42.h
diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h
index b0eb29f705e..7f9211b1bd2 100644
--- a/src/vppinfra/string.h
+++ b/src/vppinfra/string.h
@@ -483,239 +483,6 @@ clib_memset_u8 (void *p, u8 val, uword count)
ptr++[0] = val;
}
-static_always_inline uword
-clib_count_equal_u64 (u64 * data, uword max_count)
-{
- uword count;
- u64 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u64x4 splat = u64x4_splat (first);
- while (count + 3 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- {
- count += count_trailing_zeros (~bmp) / 8;
- return count;
- }
-
- data += 4;
- count += 4;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
-
-static_always_inline uword
-clib_count_equal_u32 (u32 * data, uword max_count)
-{
- uword count;
- u32 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u32x8 splat = u32x8_splat (first);
- while (count + 7 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- {
- count += count_trailing_zeros (~bmp) / 4;
- return count;
- }
-
- data += 8;
- count += 8;
- }
-#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
- u32x4 splat = u32x4_splat (first);
- while (count + 3 < max_count)
- {
- u64 bmp;
- bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
- if (bmp != 0xffff)
- {
- count += count_trailing_zeros (~bmp) / 4;
- return count;
- }
-
- data += 4;
- count += 4;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
-
-static_always_inline uword
-clib_count_equal_u16 (u16 * data, uword max_count)
-{
- uword count;
- u16 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u16x16 splat = u16x16_splat (first);
- while (count + 15 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- {
- count += count_trailing_zeros (~bmp) / 2;
- return count;
- }
-
- data += 16;
- count += 16;
- }
-#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
- u16x8 splat = u16x8_splat (first);
- while (count + 7 < max_count)
- {
- u64 bmp;
- bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
- if (bmp != 0xffff)
- {
- count += count_trailing_zeros (~bmp) / 2;
- return count;
- }
-
- data += 8;
- count += 8;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
-
-static_always_inline uword
-clib_count_equal_u8 (u8 * data, uword max_count)
-{
- uword count;
- u8 first;
-
- if (max_count <= 1)
- return max_count;
- if (data[0] != data[1])
- return 1;
-
- count = 0;
- first = data[0];
-
-#if defined(CLIB_HAVE_VEC256)
- u8x32 splat = u8x32_splat (first);
- while (count + 31 < max_count)
- {
- u64 bmp;
- bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
- if (bmp != 0xffffffff)
- return max_count;
-
- data += 32;
- count += 32;
- }
-#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
- u8x16 splat = u8x16_splat (first);
- while (count + 15 < max_count)
- {
- u64 bmp;
- bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
- if (bmp != 0xffff)
- {
- count += count_trailing_zeros (~bmp);
- return count;
- }
-
- data += 16;
- count += 16;
- }
-#else
- count += 2;
- data += 2;
- while (count + 3 < max_count &&
- ((data[0] ^ first) | (data[1] ^ first) |
- (data[2] ^ first) | (data[3] ^ first)) == 0)
- {
- data += 4;
- count += 4;
- }
-#endif
- while (count < max_count && (data[0] == first))
- {
- data += 1;
- count += 1;
- }
- return count;
-}
/*
* This macro is to provide smooth mapping from memcmp to memcmp_s.
diff --git a/src/vppinfra/vector/count_equal.h b/src/vppinfra/vector/count_equal.h
new file mode 100644
index 00000000000..98770cff7c0
--- /dev/null
+++ b/src/vppinfra/vector/count_equal.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright(c) 2021 Cisco Systems, Inc.
+ */
+
+#ifndef included_vector_count_equal_h
+#define included_vector_count_equal_h
+#include <vppinfra/clib.h>
+
+static_always_inline uword
+clib_count_equal_u64 (u64 *data, uword max_count)
+{
+ uword count;
+ u64 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC256)
+ u64x4 splat = u64x4_splat (first);
+ while (count + 3 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ {
+ count += count_trailing_zeros (~bmp) / 8;
+ return count;
+ }
+
+ data += 4;
+ count += 4;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline uword
+clib_count_equal_u32 (u32 *data, uword max_count)
+{
+ uword count;
+ u32 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC256)
+ u32x8 splat = u32x8_splat (first);
+ while (count + 7 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ {
+ count += count_trailing_zeros (~bmp) / 4;
+ return count;
+ }
+
+ data += 8;
+ count += 8;
+ }
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u32x4 splat = u32x4_splat (first);
+ while (count + 3 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat));
+ if (bmp != 0xffff)
+ {
+ count += count_trailing_zeros (~bmp) / 4;
+ return count;
+ }
+
+ data += 4;
+ count += 4;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline uword
+clib_count_equal_u16 (u16 *data, uword max_count)
+{
+ uword count;
+ u16 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC256)
+ u16x16 splat = u16x16_splat (first);
+ while (count + 15 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ {
+ count += count_trailing_zeros (~bmp) / 2;
+ return count;
+ }
+
+ data += 16;
+ count += 16;
+ }
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u16x8 splat = u16x8_splat (first);
+ while (count + 7 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat));
+ if (bmp != 0xffff)
+ {
+ count += count_trailing_zeros (~bmp) / 2;
+ return count;
+ }
+
+ data += 8;
+ count += 8;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+
+static_always_inline uword
+clib_count_equal_u8 (u8 *data, uword max_count)
+{
+ uword count;
+ u8 first;
+
+ if (max_count <= 1)
+ return max_count;
+ if (data[0] != data[1])
+ return 1;
+
+ count = 0;
+ first = data[0];
+
+#if defined(CLIB_HAVE_VEC256)
+ u8x32 splat = u8x32_splat (first);
+ while (count + 31 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat));
+ if (bmp != 0xffffffff)
+ return max_count;
+
+ data += 32;
+ count += 32;
+ }
+#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK)
+ u8x16 splat = u8x16_splat (first);
+ while (count + 15 < max_count)
+ {
+ u64 bmp;
+ bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat));
+ if (bmp != 0xffff)
+ {
+ count += count_trailing_zeros (~bmp);
+ return count;
+ }
+
+ data += 16;
+ count += 16;
+ }
+#else
+ count += 2;
+ data += 2;
+ while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) |
+ (data[2] ^ first) | (data[3] ^ first)) == 0)
+ {
+ data += 4;
+ count += 4;
+ }
+#endif
+ while (count < max_count && (data[0] == first))
+ {
+ data += 1;
+ count += 1;
+ }
+ return count;
+}
+#endif