/* SPDX-License-Identifier: Apache-2.0 * Copyright(c) 2021 Cisco Systems, Inc. */ #ifndef included_vector_count_equal_h #define included_vector_count_equal_h #include static_always_inline uword clib_count_equal_u64 (u64 *data, uword max_count) { uword count; u64 first; if (max_count <= 1) return max_count; if (data[0] != data[1]) return 1; count = 0; first = data[0]; #if defined(CLIB_HAVE_VEC256) u64x4 splat = u64x4_splat (first); while (count + 3 < max_count) { u64 bmp; bmp = u8x32_msb_mask ((u8x32) (u64x4_load_unaligned (data) == splat)); if (bmp != 0xffffffff) { count += count_trailing_zeros (~bmp) / 8; return count; } data += 4; count += 4; } #else count += 2; data += 2; while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) | (data[2] ^ first) | (data[3] ^ first)) == 0) { data += 4; count += 4; } #endif while (count < max_count && (data[0] == first)) { data += 1; count += 1; } return count; } static_always_inline uword clib_count_equal_u32 (u32 *data, uword max_count) { uword count; u32 first; if (max_count <= 1) return max_count; if (data[0] != data[1]) return 1; count = 0; first = data[0]; #if defined(CLIB_HAVE_VEC256) u32x8 splat = u32x8_splat (first); while (count + 7 < max_count) { u64 bmp; bmp = u8x32_msb_mask ((u8x32) (u32x8_load_unaligned (data) == splat)); if (bmp != 0xffffffff) { count += count_trailing_zeros (~bmp) / 4; return count; } data += 8; count += 8; } #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) u32x4 splat = u32x4_splat (first); while (count + 3 < max_count) { u64 bmp; bmp = u8x16_msb_mask ((u8x16) (u32x4_load_unaligned (data) == splat)); if (bmp != 0xffff) { count += count_trailing_zeros (~bmp) / 4; return count; } data += 4; count += 4; } #else count += 2; data += 2; while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) | (data[2] ^ first) | (data[3] ^ first)) == 0) { data += 4; count += 4; } #endif while (count < max_count && (data[0] == first)) { data += 1; count += 1; } return count; } static_always_inline uword clib_count_equal_u16 (u16 *data, uword max_count) { uword count; u16 first; if (max_count <= 1) return max_count; if (data[0] != data[1]) return 1; count = 0; first = data[0]; #if defined(CLIB_HAVE_VEC256) u16x16 splat = u16x16_splat (first); while (count + 15 < max_count) { u64 bmp; bmp = u8x32_msb_mask ((u8x32) (u16x16_load_unaligned (data) == splat)); if (bmp != 0xffffffff) { count += count_trailing_zeros (~bmp) / 2; return count; } data += 16; count += 16; } #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) u16x8 splat = u16x8_splat (first); while (count + 7 < max_count) { u64 bmp; bmp = u8x16_msb_mask ((u8x16) (u16x8_load_unaligned (data) == splat)); if (bmp != 0xffff) { count += count_trailing_zeros (~bmp) / 2; return count; } data += 8; count += 8; } #else count += 2; data += 2; while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) | (data[2] ^ first) | (data[3] ^ first)) == 0) { data += 4; count += 4; } #endif while (count < max_count && (data[0] == first)) { data += 1; count += 1; } return count; } static_always_inline uword clib_count_equal_u8 (u8 *data, uword max_count) { uword count; u8 first; if (max_count <= 1) return max_count; if (data[0] != data[1]) return 1; count = 0; first = data[0]; #if defined(CLIB_HAVE_VEC256) u8x32 splat = u8x32_splat (first); while (count + 31 < max_count) { u64 bmp; bmp = u8x32_msb_mask ((u8x32) (u8x32_load_unaligned (data) == splat)); if (bmp != 0xffffffff) return max_count; data += 32; count += 32; } #elif defined(CLIB_HAVE_VEC128) && defined(CLIB_HAVE_VEC128_MSB_MASK) u8x16 splat = u8x16_splat (first); while (count + 15 < max_count) { u64 bmp; bmp = u8x16_msb_mask ((u8x16) (u8x16_load_unaligned (data) == splat)); if (bmp != 0xffff) { count += count_trailing_zeros (~bmp); return count; } data += 16; count += 16; } #else count += 2; data += 2; while (count + 3 < max_count && ((data[0] ^ first) | (data[1] ^ first) | (data[2] ^ first) | (data[3] ^ first)) == 0) { data += 4; count += 4; } #endif while (count < max_count && (data[0] == first)) { data += 1; count += 1; } return count; } #endif