diff options
author | Damjan Marion <damarion@cisco.com> | 2019-09-25 00:25:36 +0200 |
---|---|---|
committer | Dave Barach <openvpp@barachs.net> | 2019-09-26 11:37:30 +0000 |
commit | 8304933922620cef005b788a36a4d3f2eab45bb5 (patch) | |
tree | 9b873ff3da92bcae785a7a67d7917193ee901b39 /src/vnet/classify | |
parent | 6eb009ac023799e4ee1b75d759d726622c39de78 (diff) |
classify: use vector code even when data is not aligned
Type: feature
Change-Id: I8f5f4841965beb13ebc8c2a37ce0dc331c920109
Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/vnet/classify')
-rw-r--r-- | src/vnet/classify/vnet_classify.h | 268 |
1 files changed, 126 insertions, 142 deletions
diff --git a/src/vnet/classify/vnet_classify.h b/src/vnet/classify/vnet_classify.h index 620ef9a1052..35a5db3d27b 100644 --- a/src/vnet/classify/vnet_classify.h +++ b/src/vnet/classify/vnet_classify.h @@ -28,8 +28,6 @@ extern vlib_node_registration_t ip6_classify_node; #define CLASSIFY_TRACE 0 -#define U32X4_ALIGNED(p) PREDICT_TRUE((((intptr_t)p) & 0xf) == 0) - /* * Classify table option to process packets * CLASSIFY_FLAG_USE_CURR_DATA: @@ -218,62 +216,57 @@ vnet_classify_hash_packet_inline (vnet_classify_table_t * t, u8 * h) ASSERT (t); mask = t->mask; #ifdef CLIB_HAVE_VEC128 - if (U32X4_ALIGNED (h)) - { //SSE can't handle unaligned data - u32x4 *data = (u32x4 *) h; - xor_sum.as_u32x4 = data[0 + t->skip_n_vectors] & mask[0]; - switch (t->match_n_vectors) - { - case 5: - xor_sum.as_u32x4 ^= data[4 + t->skip_n_vectors] & mask[4]; - /* FALLTHROUGH */ - case 4: - xor_sum.as_u32x4 ^= data[3 + t->skip_n_vectors] & mask[3]; - /* FALLTHROUGH */ - case 3: - xor_sum.as_u32x4 ^= data[2 + t->skip_n_vectors] & mask[2]; - /* FALLTHROUGH */ - case 2: - xor_sum.as_u32x4 ^= data[1 + t->skip_n_vectors] & mask[1]; - /* FALLTHROUGH */ - case 1: - break; - default: - abort (); - } + u32x4u *data = (u32x4u *) h; + xor_sum.as_u32x4 = data[0 + t->skip_n_vectors] & mask[0]; + switch (t->match_n_vectors) + { + case 5: + xor_sum.as_u32x4 ^= data[4 + t->skip_n_vectors] & mask[4]; + /* FALLTHROUGH */ + case 4: + xor_sum.as_u32x4 ^= data[3 + t->skip_n_vectors] & mask[3]; + /* FALLTHROUGH */ + case 3: + xor_sum.as_u32x4 ^= data[2 + t->skip_n_vectors] & mask[2]; + /* FALLTHROUGH */ + case 2: + xor_sum.as_u32x4 ^= data[1 + t->skip_n_vectors] & mask[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + abort (); } - else -#endif /* CLIB_HAVE_VEC128 */ +#else + u32 skip_u64 = t->skip_n_vectors * 2; + u64 *data64 = (u64 *) h; + xor_sum.as_u64[0] = data64[0 + skip_u64] & ((u64 *) mask)[0]; + xor_sum.as_u64[1] = data64[1 + skip_u64] & ((u64 *) mask)[1]; + switch (t->match_n_vectors) { - u32 skip_u64 = t->skip_n_vectors * 2; - u64 *data64 = (u64 *) h; - xor_sum.as_u64[0] = data64[0 + skip_u64] & ((u64 *) mask)[0]; - xor_sum.as_u64[1] = data64[1 + skip_u64] & ((u64 *) mask)[1]; - switch (t->match_n_vectors) - { - case 5: - xor_sum.as_u64[0] ^= data64[8 + skip_u64] & ((u64 *) mask)[8]; - xor_sum.as_u64[1] ^= data64[9 + skip_u64] & ((u64 *) mask)[9]; - /* FALLTHROUGH */ - case 4: - xor_sum.as_u64[0] ^= data64[6 + skip_u64] & ((u64 *) mask)[6]; - xor_sum.as_u64[1] ^= data64[7 + skip_u64] & ((u64 *) mask)[7]; - /* FALLTHROUGH */ - case 3: - xor_sum.as_u64[0] ^= data64[4 + skip_u64] & ((u64 *) mask)[4]; - xor_sum.as_u64[1] ^= data64[5 + skip_u64] & ((u64 *) mask)[5]; - /* FALLTHROUGH */ - case 2: - xor_sum.as_u64[0] ^= data64[2 + skip_u64] & ((u64 *) mask)[2]; - xor_sum.as_u64[1] ^= data64[3 + skip_u64] & ((u64 *) mask)[3]; - /* FALLTHROUGH */ - case 1: - break; - - default: - abort (); - } + case 5: + xor_sum.as_u64[0] ^= data64[8 + skip_u64] & ((u64 *) mask)[8]; + xor_sum.as_u64[1] ^= data64[9 + skip_u64] & ((u64 *) mask)[9]; + /* FALLTHROUGH */ + case 4: + xor_sum.as_u64[0] ^= data64[6 + skip_u64] & ((u64 *) mask)[6]; + xor_sum.as_u64[1] ^= data64[7 + skip_u64] & ((u64 *) mask)[7]; + /* FALLTHROUGH */ + case 3: + xor_sum.as_u64[0] ^= data64[4 + skip_u64] & ((u64 *) mask)[4]; + xor_sum.as_u64[1] ^= data64[5 + skip_u64] & ((u64 *) mask)[5]; + /* FALLTHROUGH */ + case 2: + xor_sum.as_u64[0] ^= data64[2 + skip_u64] & ((u64 *) mask)[2]; + xor_sum.as_u64[1] ^= data64[3 + skip_u64] & ((u64 *) mask)[3]; + /* FALLTHROUGH */ + case 1: + break; + + default: + abort (); } +#endif /* CLIB_HAVE_VEC128 */ return clib_xxhash (xor_sum.as_u64[0] ^ xor_sum.as_u64[1]); } @@ -392,107 +385,98 @@ vnet_classify_find_entry_inline (vnet_classify_table_t * t, v = vnet_classify_entry_at_index (t, v, value_index); #ifdef CLIB_HAVE_VEC128 - if (U32X4_ALIGNED (h)) + u32x4u *data = (u32x4u *) h; + for (i = 0; i < limit; i++) { - u32x4 *data = (u32x4 *) h; - for (i = 0; i < limit; i++) + key = v->key; + result.as_u32x4 = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; + switch (t->match_n_vectors) { - key = v->key; - result.as_u32x4 = (data[0 + t->skip_n_vectors] & mask[0]) ^ key[0]; - switch (t->match_n_vectors) - { - case 5: - result.as_u32x4 |= - (data[4 + t->skip_n_vectors] & mask[4]) ^ key[4]; - /* FALLTHROUGH */ - case 4: - result.as_u32x4 |= - (data[3 + t->skip_n_vectors] & mask[3]) ^ key[3]; - /* FALLTHROUGH */ - case 3: - result.as_u32x4 |= - (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2]; - /* FALLTHROUGH */ - case 2: - result.as_u32x4 |= - (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; - /* FALLTHROUGH */ - case 1: - break; - default: - abort (); - } + case 5: + result.as_u32x4 |= (data[4 + t->skip_n_vectors] & mask[4]) ^ key[4]; + /* FALLTHROUGH */ + case 4: + result.as_u32x4 |= (data[3 + t->skip_n_vectors] & mask[3]) ^ key[3]; + /* FALLTHROUGH */ + case 3: + result.as_u32x4 |= (data[2 + t->skip_n_vectors] & mask[2]) ^ key[2]; + /* FALLTHROUGH */ + case 2: + result.as_u32x4 |= (data[1 + t->skip_n_vectors] & mask[1]) ^ key[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + abort (); + } - if (u32x4_zero_byte_mask (result.as_u32x4) == 0xffff) + if (u32x4_zero_byte_mask (result.as_u32x4) == 0xffff) + { + if (PREDICT_TRUE (now)) { - if (PREDICT_TRUE (now)) - { - v->hits++; - v->last_heard = now; - } - return (v); + v->hits++; + v->last_heard = now; } - v = vnet_classify_entry_at_index (t, v, 1); + return (v); } + v = vnet_classify_entry_at_index (t, v, 1); } - else -#endif /* CLIB_HAVE_VEC128 */ +#else + u32 skip_u64 = t->skip_n_vectors * 2; + u64 *data64 = (u64 *) h; + for (i = 0; i < limit; i++) { - u32 skip_u64 = t->skip_n_vectors * 2; - u64 *data64 = (u64 *) h; - for (i = 0; i < limit; i++) - { - key = v->key; + key = v->key; - result.as_u64[0] = - (data64[0 + skip_u64] & ((u64 *) mask)[0]) ^ ((u64 *) key)[0]; - result.as_u64[1] = - (data64[1 + skip_u64] & ((u64 *) mask)[1]) ^ ((u64 *) key)[1]; - switch (t->match_n_vectors) - { - case 5: - result.as_u64[0] |= - (data64[8 + skip_u64] & ((u64 *) mask)[8]) ^ ((u64 *) key)[8]; - result.as_u64[1] |= - (data64[9 + skip_u64] & ((u64 *) mask)[9]) ^ ((u64 *) key)[9]; - /* FALLTHROUGH */ - case 4: - result.as_u64[0] |= - (data64[6 + skip_u64] & ((u64 *) mask)[6]) ^ ((u64 *) key)[6]; - result.as_u64[1] |= - (data64[7 + skip_u64] & ((u64 *) mask)[7]) ^ ((u64 *) key)[7]; - /* FALLTHROUGH */ - case 3: - result.as_u64[0] |= - (data64[4 + skip_u64] & ((u64 *) mask)[4]) ^ ((u64 *) key)[4]; - result.as_u64[1] |= - (data64[5 + skip_u64] & ((u64 *) mask)[5]) ^ ((u64 *) key)[5]; - /* FALLTHROUGH */ - case 2: - result.as_u64[0] |= - (data64[2 + skip_u64] & ((u64 *) mask)[2]) ^ ((u64 *) key)[2]; - result.as_u64[1] |= - (data64[3 + skip_u64] & ((u64 *) mask)[3]) ^ ((u64 *) key)[3]; - /* FALLTHROUGH */ - case 1: - break; - default: - abort (); - } + result.as_u64[0] = + (data64[0 + skip_u64] & ((u64 *) mask)[0]) ^ ((u64 *) key)[0]; + result.as_u64[1] = + (data64[1 + skip_u64] & ((u64 *) mask)[1]) ^ ((u64 *) key)[1]; + switch (t->match_n_vectors) + { + case 5: + result.as_u64[0] |= + (data64[8 + skip_u64] & ((u64 *) mask)[8]) ^ ((u64 *) key)[8]; + result.as_u64[1] |= + (data64[9 + skip_u64] & ((u64 *) mask)[9]) ^ ((u64 *) key)[9]; + /* FALLTHROUGH */ + case 4: + result.as_u64[0] |= + (data64[6 + skip_u64] & ((u64 *) mask)[6]) ^ ((u64 *) key)[6]; + result.as_u64[1] |= + (data64[7 + skip_u64] & ((u64 *) mask)[7]) ^ ((u64 *) key)[7]; + /* FALLTHROUGH */ + case 3: + result.as_u64[0] |= + (data64[4 + skip_u64] & ((u64 *) mask)[4]) ^ ((u64 *) key)[4]; + result.as_u64[1] |= + (data64[5 + skip_u64] & ((u64 *) mask)[5]) ^ ((u64 *) key)[5]; + /* FALLTHROUGH */ + case 2: + result.as_u64[0] |= + (data64[2 + skip_u64] & ((u64 *) mask)[2]) ^ ((u64 *) key)[2]; + result.as_u64[1] |= + (data64[3 + skip_u64] & ((u64 *) mask)[3]) ^ ((u64 *) key)[3]; + /* FALLTHROUGH */ + case 1: + break; + default: + abort (); + } - if (result.as_u64[0] == 0 && result.as_u64[1] == 0) + if (result.as_u64[0] == 0 && result.as_u64[1] == 0) + { + if (PREDICT_TRUE (now)) { - if (PREDICT_TRUE (now)) - { - v->hits++; - v->last_heard = now; - } - return (v); + v->hits++; + v->last_heard = now; } - - v = vnet_classify_entry_at_index (t, v, 1); + return (v); } + + v = vnet_classify_entry_at_index (t, v, 1); } +#endif /* CLIB_HAVE_VEC128 */ return 0; } |