From 4c276f073f0205f67352b5198d412b541a077c5e Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Sat, 6 Nov 2021 13:17:31 +0100 Subject: vppinfra: move clib_count_equal_* code Type: refactor Change-Id: Ib9e8abdbf745ad6563fc79c9ebb6b2ea65917d08 Signed-off-by: Damjan Marion --- src/vppinfra/CMakeLists.txt | 5 +- src/vppinfra/string.h | 233 ------------------------------------- src/vppinfra/vector/count_equal.h | 238 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 241 insertions(+), 235 deletions(-) create mode 100644 src/vppinfra/vector/count_equal.h (limited to 'src/vppinfra') diff --git a/src/vppinfra/CMakeLists.txt b/src/vppinfra/CMakeLists.txt index 1114092e246..58ec32fbfeb 100644 --- a/src/vppinfra/CMakeLists.txt +++ b/src/vppinfra/CMakeLists.txt @@ -192,9 +192,10 @@ set(VPPINFRA_HEADERS vector_altivec.h vector_avx2.h vector_avx512.h - vector/mask_compare.h - vector/compress.h vector/array_mask.h + vector/compress.h + vector/count_equal.h + vector/mask_compare.h vector.h vector_neon.h vector_sse42.h diff --git a/src/vppinfra/string.h b/src/vppinfra/string.h index b0eb29f705e..7f9211b1bd2 100644 --- a/src/vppinfra/string.h +++ b/src/vppinfra/string.h @@ -483,239 +483,6 @@ clib_memset_u8 (void *p, u8 val, uword count) ptr++[0] = val; } -static_always_inline uword -clib_count_equal_u64 (u64 * data, uword max_count) -{ - uword count; - u64 first; - - if (max_count <= 1) - return max_count; - if (data[0] != data[1]) - return 1; - - count = 0; - first = data[0]; - -#if defined(CLIB_HAVE_VEC256) - u64x4 splat = u64x4_splat (first); - while (count + 3 < max_count) - { - u64 bmp; - bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat)); - if (bmp != 0xffffffff) - { - count += count_trailing_zeros (~bmp) / 8; - return count; - } - - data += 4; - count += 4; - } -#else - count += 2; - data += 2; - while (count + 3 < max_count && - ((data[0] ^ first) | (data[1] ^ first) | - (data[2] ^ first) | (data[3] ^ first)) == 0) - { - data += 4; - count += 4; - } -#endif - while (count < max_count && (data[0] == first)) - { - data += 1; - count += 1; - } - return count; -} - -static_always_inline uword -clib_count_equal_u32 (u32 * data, uword max_count) -{ - uword count; - u32 first; - - if (max_count <= 1) - return max_count; - if (data[0] != data[1]) - return 1; - - count = 0; - first = data[0]; - -#if defined(CLIB_HAVE_VEC256) - u32x8 splat = u32x8_splat (first); - while (count + 7 < max_count) - { - u64 bmp; - bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat)); - if (bmp != 0xffffffff) - { - count += count_trailing_zeros (~bmp) / 4; - return count; - } - - data += 8; - count += 8; - } -#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) - u32x4 splat = u32x4_splat (first); - while (count + 3 < max_count) - { - u64 bmp; - bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat)); - if (bmp != 0xffff) - { - count += count_trailing_zeros (~bmp) / 4; - return count; - } - - data += 4; - count += 4; - } -#else - count += 2; - data += 2; - while (count + 3 < max_count && - ((data[0] ^ first) | (data[1] ^ first) | - (data[2] ^ first) | (data[3] ^ first)) == 0) - { - data += 4; - count += 4; - } -#endif - while (count < max_count && (data[0] == first)) - { - data += 1; - count += 1; - } - return count; -} - -static_always_inline uword -clib_count_equal_u16 (u16 * data, uword max_count) -{ - uword count; - u16 first; - - if (max_count <= 1) - return max_count; - if (data[0] != data[1]) - return 1; - - count = 0; - first = data[0]; - -#if defined(CLIB_HAVE_VEC256) - u16x16 splat = u16x16_splat (first); - while (count + 15 < max_count) - { - u64 bmp; - bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat)); - if (bmp != 0xffffffff) - { - count += count_trailing_zeros (~bmp) / 2; - return count; - } - - data += 16; - count += 16; - } -#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) - u16x8 splat = u16x8_splat (first); - while (count + 7 < max_count) - { - u64 bmp; - bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat)); - if (bmp != 0xffff) - { - count += count_trailing_zeros (~bmp) / 2; - return count; - } - - data += 8; - count += 8; - } -#else - count += 2; - data += 2; - while (count + 3 < max_count && - ((data[0] ^ first) | (data[1] ^ first) | - (data[2] ^ first) | (data[3] ^ first)) == 0) - { - data += 4; - count += 4; - } -#endif - while (count < max_count && (data[0] == first)) - { - data += 1; - count += 1; - } - return count; -} - -static_always_inline uword -clib_count_equal_u8 (u8 * data, uword max_count) -{ - uword count; - u8 first; - - if (max_count <= 1) - return max_count; - if (data[0] != data[1]) - return 1; - - count = 0; - first = data[0]; - -#if defined(CLIB_HAVE_VEC256) - u8x32 splat = u8x32_splat (first); - while (count + 31 < max_count) - { - u64 bmp; - bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat)); - if (bmp != 0xffffffff) - return max_count; - - data += 32; - count += 32; - } -#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) - u8x16 splat = u8x16_splat (first); - while (count + 15 < max_count) - { - u64 bmp; - bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat)); - if (bmp != 0xffff) - { - count += count_trailing_zeros (~bmp); - return count; - } - - data += 16; - count += 16; - } -#else - count += 2; - data += 2; - while (count + 3 < max_count && - ((data[0] ^ first) | (data[1] ^ first) | - (data[2] ^ first) | (data[3] ^ first)) == 0) - { - data += 4; - count += 4; - } -#endif - while (count < max_count && (data[0] == first)) - { - data += 1; - count += 1; - } - return count; -} /* * This macro is to provide smooth mapping from memcmp to memcmp_s. diff --git a/src/vppinfra/vector/count_equal.h b/src/vppinfra/vector/count_equal.h new file mode 100644 index 00000000000..98770cff7c0 --- /dev/null +++ b/src/vppinfra/vector/count_equal.h @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2021 Cisco Systems, Inc. + */ + +#ifndef included_vector_count_equal_h +#define included_vector_count_equal_h +#include + +static_always_inline uword +clib_count_equal_u64 (u64 *data, uword max_count) +{ + uword count; + u64 first; + + if (max_count <= 1) + return max_count; + if (data[0] != data[1]) + return 1; + + count = 0; + first = data[0]; + +#if defined(CLIB_HAVE_VEC256) + u64x4 splat = u64x4_splat (first); + while (count + 3 < max_count) + { + u64 bmp; + bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat)); + if (bmp != 0xffffffff) + { + count += count_trailing_zeros (~bmp) / 8; + return count; + } + + data += 4; + count += 4; + } +#else + count += 2; + data += 2; + while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) | + (data[2] ^ first) | (data[3] ^ first)) == 0) + { + data += 4; + count += 4; + } +#endif + while (count < max_count && (data[0] == first)) + { + data += 1; + count += 1; + } + return count; +} + +static_always_inline uword +clib_count_equal_u32 (u32 *data, uword max_count) +{ + uword count; + u32 first; + + if (max_count <= 1) + return max_count; + if (data[0] != data[1]) + return 1; + + count = 0; + first = data[0]; + +#if defined(CLIB_HAVE_VEC256) + u32x8 splat = u32x8_splat (first); + while (count + 7 < max_count) + { + u64 bmp; + bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat)); + if (bmp != 0xffffffff) + { + count += count_trailing_zeros (~bmp) / 4; + return count; + } + + data += 8; + count += 8; + } +#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) + u32x4 splat = u32x4_splat (first); + while (count + 3 < max_count) + { + u64 bmp; + bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat)); + if (bmp != 0xffff) + { + count += count_trailing_zeros (~bmp) / 4; + return count; + } + + data += 4; + count += 4; + } +#else + count += 2; + data += 2; + while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) | + (data[2] ^ first) | (data[3] ^ first)) == 0) + { + data += 4; + count += 4; + } +#endif + while (count < max_count && (data[0] == first)) + { + data += 1; + count += 1; + } + return count; +} + +static_always_inline uword +clib_count_equal_u16 (u16 *data, uword max_count) +{ + uword count; + u16 first; + + if (max_count <= 1) + return max_count; + if (data[0] != data[1]) + return 1; + + count = 0; + first = data[0]; + +#if defined(CLIB_HAVE_VEC256) + u16x16 splat = u16x16_splat (first); + while (count + 15 < max_count) + { + u64 bmp; + bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat)); + if (bmp != 0xffffffff) + { + count += count_trailing_zeros (~bmp) / 2; + return count; + } + + data += 16; + count += 16; + } +#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) + u16x8 splat = u16x8_splat (first); + while (count + 7 < max_count) + { + u64 bmp; + bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat)); + if (bmp != 0xffff) + { + count += count_trailing_zeros (~bmp) / 2; + return count; + } + + data += 8; + count += 8; + } +#else + count += 2; + data += 2; + while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) | + (data[2] ^ first) | (data[3] ^ first)) == 0) + { + data += 4; + count += 4; + } +#endif + while (count < max_count && (data[0] == first)) + { + data += 1; + count += 1; + } + return count; +} + +static_always_inline uword +clib_count_equal_u8 (u8 *data, uword max_count) +{ + uword count; + u8 first; + + if (max_count <= 1) + return max_count; + if (data[0] != data[1]) + return 1; + + count = 0; + first = data[0]; + +#if defined(CLIB_HAVE_VEC256) + u8x32 splat = u8x32_splat (first); + while (count + 31 < max_count) + { + u64 bmp; + bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat)); + if (bmp != 0xffffffff) + return max_count; + + data += 32; + count += 32; + } +#elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) + u8x16 splat = u8x16_splat (first); + while (count + 15 < max_count) + { + u64 bmp; + bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat)); + if (bmp != 0xffff) + { + count += count_trailing_zeros (~bmp); + return count; + } + + data += 16; + count += 16; + } +#else + count += 2; + data += 2; + while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) | + (data[2] ^ first) | (data[3] ^ first)) == 0) + { + data += 4; + count += 4; + } +#endif + while (count < max_count && (data[0] == first)) + { + data += 1; + count += 1; + } + return count; +} +#endif -- cgit 1.2.3-korg