From 8304933922620cef005b788a36a4d3f2eab45bb5 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 25 Sep 2019 00:25:36 +0200 Subject: classify: use vector code even when data is not aligned Type: feature Change-Id: I8f5f4841965beb13ebc8c2a37ce0dc331c920109 Signed-off-by: Damjan Marion --- src/vnet/classify/vnet_classify.h | 268 ++++++++++++++++++-------------------- src/vnet/l2/l2_rw.c | 80 +++--------- src/vppinfra/vector.h | 2 + 3 files changed, 147 insertions(+), 203 deletions(-) (limited to 'src') diff --git a/src/vnet/classify/vnet_classify.h b/src/vnet/classify/vnet_classify.h index 620ef9a1052..35a5db3d27b 100644 --- a/src/vnet/classify/vnet_classify.h +++ b/src/vnet/classify/vnet_classify.h @@ -28,8 +28,6 @@ extern vlib_node_registration_t ip6_classify_node; #define CLASSIFY_TRACE 0 -#define U32X4_ALIGNED(p) PREDICT_TRUE((((intptr_t)p) & 0xf) == 0) - /* * Classify table option to process packets * CLASSIFY_FLAG_USE_CURR_DATA: @@ -218,62 +216,57 @@ vnet_classify_hash_packet_inline (vnet_classify_table_t * t, u8 * h) ASSERT (t); mask = t->mask; #ifdef CLIB_HAVE_VEC128 - if (U32X4_ALIGNED (h)) - { //SSE can't handle unaligned data - u32x4 *data = (u32x4 *) h; - xor_sum.as_u32x4 = data[0 + t->skip_n_vectors] & mask[0]; - switch (t->match_n_vectors) - { - case 5: - xor_sum.as_u32x4 ^= data[4 + t->skip_n_vectors] & mask[4]; - /* FALLTHROUGH */ - case 4: - xor_sum.as_u32x4 ^= data[3 + t->skip_n_vectors] & mask[3]; - /* FALLTHROUGH */ - case 3: - xor_sum.as_u32x4 ^= data[2 + t->skip_n_vectors] & mask[2]; - /* FALLTHROUGH */ - case 2: - xor_sum.as_u32x4 ^= data[1 + t->skip_n_vectors] & mask[1]; - /* FALLTHROUGH */ - case 1: - break; - default: - abort (); - } + u32x4u *data = (u32x4u *) h; + xor_sum.as_u32x4 = data[0 + t->skip_n_vectors] & mask[0]; + switch (t->match_n_vectors) + { + case 5: + xor_sum.as_u32x4 ^= data[4 + t->skip_n_vectors] & mask[4]; + /* FALLTHROUGH */ + case 4: + xor_sum.as_u32x4 ^= data[3 + t->skip_n_vectors] & mask[3]; + /* FALLTHROUGH */ + case 3: + xor_sum.as_u32x4 ^= data[2 + t->skip_n_vectors] & mask[2]; + /* FALLTHROUGH */ + case 2: + xor_sum.as_u32x4 ^= data[1 + t->skip_n_vectors] & mask[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + abort (); } - else -#endif /* CLIB_HAVE_VEC128 */ +#else + u32 skip_u64 = t->skip_n_vectors * 2; + u64 *data64 = (u64 *) h; + xor_sum.as_u64[0] = data64[0 + skip_u64] & ((u64 *) mask)[0]; + xor_sum.as_u64[1] = data64[1 + skip_u64] & ((u64 *) mask)[1]; + switch (t->match_n_vectors) { - u32 skip_u64 = t->skip_n_vectors * 2; - u64 *data64 = (u64 *) h; - xor_sum.as_u64[0] = data64[0 + skip_u64] & ((u64 *) mask)[0]; - xor_sum.as_u64[1] = data64[1 + skip_u64] & ((u64 *) mask)[1]; - switch (t->match_n_vectors) - { - case 5: - xor_sum.as_u64[0] ^= data64[8 + skip_u64] & ((u64 *) mask)[8]; - xor_sum.as_u64[1] ^= data64[9 + skip_u64] & ((u64 *) mask)[9]; - /* FALLTHROUGH */ - case 4: - xor_sum.as_u64[0] ^= data64[6 + skip_u64] & ((u64 *) mask)[6]; - xor_sum.as_u64[1] ^= data64[7 + skip_u64] & ((u64 *) mask)[7]; - /* FALLTHROUGH */ - case 3: - xor_sum.as_u64[0] ^= data64[4 + skip_u64] & ((u64 *) mask)[4]; - xor_sum.as_u64[1] ^= data64[5 + skip_u64] & ((u64 *) mask)[5]; - /* FALLTHROUGH */ - case 2: - xor_sum.as_u64[0] ^= data64[2 + skip_u64] & ((u64 *) mask)[2]; - xor_sum.as_u64[1] ^= data64[3 + skip_u64] & ((u64 *) mask)[3]; - /* FALLTHROUGH */ - case 1: - break; - - default: - abort (); - } + case 5: + xor_sum.as_u64[0] ^= data64[8 + skip_u64] & ((u64 *) mask)[8]; + xor_sum.as_u64[1] ^= data64[9 + skip_u64] & ((u64 *) mask)[9]; + /* FALLTHROUGH */ + case 4: + xor_sum.as_u64[0] ^= data64[6 + skip_u64] & ((u64 *) mask)[6]; + xor_sum.as_u64[1] ^= data64[7 + skip_u64] & ((u64 *) mask)[7]; + /* FALLTHROUGH */ + case 3: + xor_sum.as_u64[0] ^= data64[4 + skip_u64] & ((u64 *) mask)[4]; + xor_sum.as_u64[1] ^= data64[5 + skip_u64] & ((u64 *) mask)[5]; + /* FALLTHROUGH */ + case 2: + xor_sum.as_u64[0] ^= data64[2 + skip_u64] & ((u64 *) mask)[2]; + xor_sum.as_u64[1] ^= data64[3 + skip_u64] & ((u64 *) mask)[3]; + /* FALLTHROUGH */ + case 1: + break; + + default: + abort (); } +#endif /* CLIB_HAVE_VEC128 */ return clib_xxhash (xor_sum.as_u64[0] ^ xor_sum.as_u64[1]); } @@ -392,107 +385,98 @@ vnet_classify_find_entry_inline (vnet_classify_table_t * t, v = vnet_classify_entry_at_index (t, v, value_index); #ifdef CLIB_HAVE_VEC128 - if (U32X4_ALIGNED (h)) + u32x4u *data = (u32x4u *) h; + for (i = 0; i < limit; i++) { - u32x4 *data = (u32x4 *) h; - for (i = 0; i < limit; i++) + key = v->key; + result.as_u32x4 = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; + switch (t->match_n_vectors) { - key = v->key; - result.as_u32x4 = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; - switch (t->match_n_vectors) - { - case 5: - result.as_u32x4 |= - (data[4 + t->skip_n_vectors] & mask[4]) ^ key[4]; - /* FALLTHROUGH */ - case 4: - result.as_u32x4 |= - (data[3 + t->skip_n_vectors] & mask[3]) ^ key[3]; - /* FALLTHROUGH */ - case 3: - result.as_u32x4 |= - (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2]; - /* FALLTHROUGH */ - case 2: - result.as_u32x4 |= - (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; - /* FALLTHROUGH */ - case 1: - break; - default: - abort (); - } + case 5: + result.as_u32x4 |= (data[4 + t->skip_n_vectors] & mask[4]) ^ key[4]; + /* FALLTHROUGH */ + case 4: + result.as_u32x4 |= (data[3 + t->skip_n_vectors] & mask[3]) ^ key[3]; + /* FALLTHROUGH */ + case 3: + result.as_u32x4 |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2]; + /* FALLTHROUGH */ + case 2: + result.as_u32x4 |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + abort (); + } - if (u32x4_zero_byte_mask (result.as_u32x4) == 0xffff) + if (u32x4_zero_byte_mask (result.as_u32x4) == 0xffff) + { + if (PREDICT_TRUE (now)) { - if (PREDICT_TRUE (now)) - { - v->hits++; - v->last_heard = now; - } - return (v); + v->hits++; + v->last_heard = now; } - v = vnet_classify_entry_at_index (t, v, 1); + return (v); } + v = vnet_classify_entry_at_index (t, v, 1); } - else -#endif /* CLIB_HAVE_VEC128 */ +#else + u32 skip_u64 = t->skip_n_vectors * 2; + u64 *data64 = (u64 *) h; + for (i = 0; i < limit; i++) { - u32 skip_u64 = t->skip_n_vectors * 2; - u64 *data64 = (u64 *) h; - for (i = 0; i < limit; i++) - { - key = v->key; + key = v->key; - result.as_u64[0] = - (data64[0 + skip_u64] & ((u64 *) mask)[0]) ^ ((u64 *) key)[0]; - result.as_u64[1] = - (data64[1 + skip_u64] & ((u64 *) mask)[1]) ^ ((u64 *) key)[1]; - switch (t->match_n_vectors) - { - case 5: - result.as_u64[0] |= - (data64[8 + skip_u64] & ((u64 *) mask)[8]) ^ ((u64 *) key)[8]; - result.as_u64[1] |= - (data64[9 + skip_u64] & ((u64 *) mask)[9]) ^ ((u64 *) key)[9]; - /* FALLTHROUGH */ - case 4: - result.as_u64[0] |= - (data64[6 + skip_u64] & ((u64 *) mask)[6]) ^ ((u64 *) key)[6]; - result.as_u64[1] |= - (data64[7 + skip_u64] & ((u64 *) mask)[7]) ^ ((u64 *) key)[7]; - /* FALLTHROUGH */ - case 3: - result.as_u64[0] |= - (data64[4 + skip_u64] & ((u64 *) mask)[4]) ^ ((u64 *) key)[4]; - result.as_u64[1] |= - (data64[5 + skip_u64] & ((u64 *) mask)[5]) ^ ((u64 *) key)[5]; - /* FALLTHROUGH */ - case 2: - result.as_u64[0] |= - (data64[2 + skip_u64] & ((u64 *) mask)[2]) ^ ((u64 *) key)[2]; - result.as_u64[1] |= - (data64[3 + skip_u64] & ((u64 *) mask)[3]) ^ ((u64 *) key)[3]; - /* FALLTHROUGH */ - case 1: - break; - default: - abort (); - } + result.as_u64[0] = + (data64[0 + skip_u64] & ((u64 *) mask)[0]) ^ ((u64 *) key)[0]; + result.as_u64[1] = + (data64[1 + skip_u64] & ((u64 *) mask)[1]) ^ ((u64 *) key)[1]; + switch (t->match_n_vectors) + { + case 5: + result.as_u64[0] |= + (data64[8 + skip_u64] & ((u64 *) mask)[8]) ^ ((u64 *) key)[8]; + result.as_u64[1] |= + (data64[9 + skip_u64] & ((u64 *) mask)[9]) ^ ((u64 *) key)[9]; + /* FALLTHROUGH */ + case 4: + result.as_u64[0] |= + (data64[6 + skip_u64] & ((u64 *) mask)[6]) ^ ((u64 *) key)[6]; + result.as_u64[1] |= + (data64[7 + skip_u64] & ((u64 *) mask)[7]) ^ ((u64 *) key)[7]; + /* FALLTHROUGH */ + case 3: + result.as_u64[0] |= + (data64[4 + skip_u64] & ((u64 *) mask)[4]) ^ ((u64 *) key)[4]; + result.as_u64[1] |= + (data64[5 + skip_u64] & ((u64 *) mask)[5]) ^ ((u64 *) key)[5]; + /* FALLTHROUGH */ + case 2: + result.as_u64[0] |= + (data64[2 + skip_u64] & ((u64 *) mask)[2]) ^ ((u64 *) key)[2]; + result.as_u64[1] |= + (data64[3 + skip_u64] & ((u64 *) mask)[3]) ^ ((u64 *) key)[3]; + /* FALLTHROUGH */ + case 1: + break; + default: + abort (); + } - if (result.as_u64[0] == 0 && result.as_u64[1] == 0) + if (result.as_u64[0] == 0 && result.as_u64[1] == 0) + { + if (PREDICT_TRUE (now)) { - if (PREDICT_TRUE (now)) - { - v->hits++; - v->last_heard = now; - } - return (v); + v->hits++; + v->last_heard = now; } - - v = vnet_classify_entry_at_index (t, v, 1); + return (v); } + + v = vnet_classify_entry_at_index (t, v, 1); } +#endif /* CLIB_HAVE_VEC128 */ return 0; } diff --git a/src/vnet/l2/l2_rw.c b/src/vnet/l2/l2_rw.c index 85a53aa06ff..e5851404277 100644 --- a/src/vnet/l2/l2_rw.c +++ b/src/vnet/l2/l2_rw.c @@ -91,68 +91,26 @@ l2_rw_get_config (u32 sw_if_index) static_always_inline void l2_rw_rewrite (l2_rw_entry_t * rwe, u8 * h) { - if (U32X4_ALIGNED (h)) + u32x4u *d = ((u32x4u *) h) + rwe->skip_n_vectors; + switch (rwe->rewrite_n_vectors) { - u32x4 *d = ((u32x4 *) h) + rwe->skip_n_vectors; - switch (rwe->rewrite_n_vectors) - { - case 5: - d[4] = (d[4] & ~rwe->mask[4]) | rwe->value[4]; - /* FALLTHROUGH */ - case 4: - d[3] = (d[3] & ~rwe->mask[3]) | rwe->value[3]; - /* FALLTHROUGH */ - case 3: - d[2] = (d[2] & ~rwe->mask[2]) | rwe->value[2]; - /* FALLTHROUGH */ - case 2: - d[1] = (d[1] & ~rwe->mask[1]) | rwe->value[1]; - /* FALLTHROUGH */ - case 1: - d[0] = (d[0] & ~rwe->mask[0]) | rwe->value[0]; - break; - default: - abort (); - } - } - else - { - u64 *d = ((u64 *) h) + rwe->skip_n_vectors * 2; - switch (rwe->rewrite_n_vectors) - { - case 5: - d[8] = - (d[8] & ~(((u64 *) rwe->mask)[8])) | (((u64 *) rwe->value)[8]); - d[9] = - (d[9] & ~(((u64 *) rwe->mask)[9])) | (((u64 *) rwe->value)[9]); - /* FALLTHROUGH */ - case 4: - d[6] = - (d[6] & ~(((u64 *) rwe->mask)[6])) | (((u64 *) rwe->value)[6]); - d[7] = - (d[7] & ~(((u64 *) rwe->mask)[7])) | (((u64 *) rwe->value)[7]); - /* FALLTHROUGH */ - case 3: - d[4] = - (d[4] & ~(((u64 *) rwe->mask)[4])) | (((u64 *) rwe->value)[4]); - d[5] = - (d[5] & ~(((u64 *) rwe->mask)[5])) | (((u64 *) rwe->value)[5]); - /* FALLTHROUGH */ - case 2: - d[2] = - (d[2] & ~(((u64 *) rwe->mask)[2])) | (((u64 *) rwe->value)[2]); - d[3] = - (d[3] & ~(((u64 *) rwe->mask)[3])) | (((u64 *) rwe->value)[3]); - /* FALLTHROUGH */ - case 1: - d[0] = - (d[0] & ~(((u64 *) rwe->mask)[0])) | (((u64 *) rwe->value)[0]); - d[1] = - (d[1] & ~(((u64 *) rwe->mask)[1])) | (((u64 *) rwe->value)[1]); - break; - default: - abort (); - } + case 5: + d[4] = (d[4] & ~rwe->mask[4]) | rwe->value[4]; + /* FALLTHROUGH */ + case 4: + d[3] = (d[3] & ~rwe->mask[3]) | rwe->value[3]; + /* FALLTHROUGH */ + case 3: + d[2] = (d[2] & ~rwe->mask[2]) | rwe->value[2]; + /* FALLTHROUGH */ + case 2: + d[1] = (d[1] & ~rwe->mask[1]) | rwe->value[1]; + /* FALLTHROUGH */ + case 1: + d[0] = (d[0] & ~rwe->mask[0]) | rwe->value[0]; + break; + default: + abort (); } } diff --git a/src/vppinfra/vector.h b/src/vppinfra/vector.h index 2b84cc24869..906d8d8fbfd 100644 --- a/src/vppinfra/vector.h +++ b/src/vppinfra/vector.h @@ -76,6 +76,7 @@ #endif #define _vector_size(n) __attribute__ ((vector_size (n))) +#define _vector_size_unaligned(n) __attribute__ ((vector_size (n), __aligned__ (1))) #define foreach_vec64i _(i,8,8) _(i,16,4) _(i,32,2) #define foreach_vec64u _(u,8,8) _(u,16,4) _(u,32,2) @@ -111,6 +112,7 @@ /* Type Definitions */ #define _(t,s,c) \ typedef t##s t##s##x##c _vector_size (s/8*c); \ +typedef t##s t##s##x##c##u _vector_size_unaligned (s/8*c); \ typedef union { \ t##s##x##c as_##t##s##x##c; \ t##s as_##t##s[c]; \ -- cgit 1.2.3-korg