diff options
author | Zhiyong Yang <zhiyong.yang@intel.com> | 2020-07-08 20:28:36 +0000 |
---|---|---|
committer | Junfeng Wang <drenfong.wang@intel.com> | 2020-09-04 02:34:03 +0000 |
commit | 5e52417a2aa3b2063a811c6a9f293a79d73bcb43 (patch) | |
tree | 4b4cdaccaa682c1dddea8617717af5ac4f520ea0 /src/vnet/ip/vtep.h | |
parent | 4a433f46084d05a524154db64d3d7d2567305009 (diff) |
ip: enhance vtep4_check of tunnel by vector way
This patch aims to improve decap performance by reducing expensive
hash_get callings as less as possible using AVX512 on XEON.
e.g. vxlan, vxlan_gpe, geneve, gtpu.
For the existing code, if vtep4 of the current packet match the last
vtep4_key_t well, expensive hash computation can be avoided and the
code returns directly.
This patch improves tunnel decap multiple flows case greatly by
leveraging 512bit vector register on XEON accommodating 8 vtep4_keys.
It enhances the possiblity of avoiding unnecessary hash computing
once hash key of the current packet hits any one of 8 in the 512bit
cache.
The oldest element in vtep4_cache_t is updated in round-robin order.
vlib_get_buffers is also leveraged in the meanwhile.
Type: improvement
Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
Signed-off-by: Ray Kinsella <mdr@ashroe.eu>
Signed-off-by: Junfeng Wang <drenfong.wang@intel.com>
Change-Id: I313103202bd76f2dd638cd942554721b37ddad60
Diffstat (limited to 'src/vnet/ip/vtep.h')
-rw-r--r-- | src/vnet/ip/vtep.h | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/src/vnet/ip/vtep.h b/src/vnet/ip/vtep.h index 703ace18dba..345b6db1f9b 100644 --- a/src/vnet/ip/vtep.h +++ b/src/vnet/ip/vtep.h @@ -112,6 +112,48 @@ vtep4_check (vtep_table_t * t, vlib_buffer_t * b0, ip4_header_t * ip40, return VTEP_CHECK_PASS; } +typedef struct +{ + vtep4_key_t vtep4_cache[8]; + int idx; +} vtep4_cache_t; + +always_inline u8 +vtep4_check_vector (vtep_table_t * t, vlib_buffer_t * b0, ip4_header_t * ip40, + vtep4_key_t * last_k4, vtep4_cache_t * vtep4_u512) +{ + vtep4_key_t k4; + k4.addr.as_u32 = ip40->dst_address.as_u32; + k4.fib_index = vlib_buffer_get_ip4_fib_index (b0); + + if (PREDICT_TRUE (k4.as_u64 == last_k4->as_u64)) + return VTEP_CHECK_PASS_UNCHANGED; + +#ifdef CLIB_HAVE_VEC512 + u64x8 k4_u64x8 = u64x8_splat (k4.as_u64); + u64x8 cache = u64x8_load_aligned (vtep4_u512->vtep4_cache); + u8 result = u64x8_mask_is_equal (cache, k4_u64x8); + if (PREDICT_TRUE (result != 0)) + { + k4.as_u64 = + vtep4_u512->vtep4_cache[count_trailing_zeros (result)].as_u64; + return VTEP_CHECK_PASS_UNCHANGED; + } +#endif + + if (PREDICT_FALSE (!hash_get (t->vtep4, k4.as_u64))) + return VTEP_CHECK_FAIL; + + last_k4->as_u64 = k4.as_u64; + +#ifdef CLIB_HAVE_VEC512 + vtep4_u512->vtep4_cache[vtep4_u512->idx].as_u64 = k4.as_u64; + vtep4_u512->idx = (vtep4_u512->idx + 1) & 0x7; +#endif + + return VTEP_CHECK_PASS; +} + always_inline u8 vtep6_check (vtep_table_t * t, vlib_buffer_t * b0, ip6_header_t * ip60, vtep6_key_t * last_k6) |