diff options
Diffstat (limited to 'src/vppinfra/vector_sse42.h')
-rw-r--r-- | src/vppinfra/vector_sse42.h | 62 |
1 files changed, 62 insertions, 0 deletions
diff --git a/src/vppinfra/vector_sse42.h b/src/vppinfra/vector_sse42.h index 2b8927b6920..7c8e6255385 100644 --- a/src/vppinfra/vector_sse42.h +++ b/src/vppinfra/vector_sse42.h @@ -493,6 +493,68 @@ u8x16_xor3 (u8x16 a, u8x16 b, u8x16 c) return a ^ b ^ c; } +static_always_inline u8x16 +u8x16_load_partial (u8 *data, uword n) +{ + u8x16 r = {}; +#if defined(CLIB_HAVE_VEC128_MASK_LOAD_STORE) + return u8x16_mask_load_zero (data, pow2_mask (n)); +#endif + if (n > 7) + { + u64x2 r; + r[1] = *(u64u *) (data + n - 8); + r >>= (16 - n) * 8; + r[0] = *(u64u *) data; + return (u8x16) r; + } + else if (n > 3) + { + u32x4 r = {}; + r[1] = *(u32u *) (data + n - 4); + r >>= (8 - n) * 8; + r[0] = *(u32u *) data; + return (u8x16) r; + } + else if (n > 1) + { + u16x8 r = {}; + r[1] = *(u16u *) (data + n - 2); + r >>= (4 - n) * 8; + r[0] = *(u16u *) data; + return (u8x16) r; + } + else if (n > 0) + r[0] = *data; + return r; +} + +static_always_inline void +u8x16_store_partial (u8x16 r, u8 *data, uword n) +{ +#if defined(CLIB_HAVE_VEC256_MASK_LOAD_STORE) + u8x16_mask_store (r, data, pow2_mask (n)); +#else + if (n > 7) + { + *(u64u *) (data + n - 8) = ((u64x2) r)[1] << ((16 - n) * 8); + *(u64u *) data = ((u64x2) r)[0]; + } + else if (n > 3) + { + *(u32u *) (data + n - 4) = ((u32x4) r)[1] << ((8 - n) * 8); + *(u32u *) data = ((u32x4) r)[0]; + } + else if (n > 1) + { + *(u16u *) (data + n - 2) = ((u16x8) r)[1] << ((4 - n) * 8); + *(u16u *) data = ((u16x8) r)[0]; + } + else if (n > 0) + data[0] = r[0]; +#endif +} + #endif /* included_vector_sse2_h */ /* |