summaryrefslogtreecommitdiffstats
path: root/src/vnet/ip
diff options
context:
space:
mode:
authorZhiyong Yang <zhiyong.yang@intel.com>2020-07-08 20:28:36 +0000
committerJunfeng Wang <drenfong.wang@intel.com>2020-09-04 02:34:03 +0000
commit5e52417a2aa3b2063a811c6a9f293a79d73bcb43 (patch)
tree4b4cdaccaa682c1dddea8617717af5ac4f520ea0 /src/vnet/ip
parent4a433f46084d05a524154db64d3d7d2567305009 (diff)
ip: enhance vtep4_check of tunnel by vector way
This patch aims to improve decap performance by reducing expensive hash_get callings as less as possible using AVX512 on XEON. e.g. vxlan, vxlan_gpe, geneve, gtpu. For the existing code, if vtep4 of the current packet match the last vtep4_key_t well, expensive hash computation can be avoided and the code returns directly. This patch improves tunnel decap multiple flows case greatly by leveraging 512bit vector register on XEON accommodating 8 vtep4_keys. It enhances the possiblity of avoiding unnecessary hash computing once hash key of the current packet hits any one of 8 in the 512bit cache. The oldest element in vtep4_cache_t is updated in round-robin order. vlib_get_buffers is also leveraged in the meanwhile. Type: improvement Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com> Signed-off-by: Ray Kinsella <mdr@ashroe.eu> Signed-off-by: Junfeng Wang <drenfong.wang@intel.com> Change-Id: I313103202bd76f2dd638cd942554721b37ddad60
Diffstat (limited to 'src/vnet/ip')
-rw-r--r--src/vnet/ip/vtep.h42
1 files changed, 42 insertions, 0 deletions
diff --git a/src/vnet/ip/vtep.h b/src/vnet/ip/vtep.h
index 703ace18dba..345b6db1f9b 100644
--- a/src/vnet/ip/vtep.h
+++ b/src/vnet/ip/vtep.h
@@ -112,6 +112,48 @@ vtep4_check (vtep_table_t * t, vlib_buffer_t * b0, ip4_header_t * ip40,
return VTEP_CHECK_PASS;
}
+typedef struct
+{
+ vtep4_key_t vtep4_cache[8];
+ int idx;
+} vtep4_cache_t;
+
+always_inline u8
+vtep4_check_vector (vtep_table_t * t, vlib_buffer_t * b0, ip4_header_t * ip40,
+ vtep4_key_t * last_k4, vtep4_cache_t * vtep4_u512)
+{
+ vtep4_key_t k4;
+ k4.addr.as_u32 = ip40->dst_address.as_u32;
+ k4.fib_index = vlib_buffer_get_ip4_fib_index (b0);
+
+ if (PREDICT_TRUE (k4.as_u64 == last_k4->as_u64))
+ return VTEP_CHECK_PASS_UNCHANGED;
+
+#ifdef CLIB_HAVE_VEC512
+ u64x8 k4_u64x8 = u64x8_splat (k4.as_u64);
+ u64x8 cache = u64x8_load_aligned (vtep4_u512->vtep4_cache);
+ u8 result = u64x8_mask_is_equal (cache, k4_u64x8);
+ if (PREDICT_TRUE (result != 0))
+ {
+ k4.as_u64 =
+ vtep4_u512->vtep4_cache[count_trailing_zeros (result)].as_u64;
+ return VTEP_CHECK_PASS_UNCHANGED;
+ }
+#endif
+
+ if (PREDICT_FALSE (!hash_get (t->vtep4, k4.as_u64)))
+ return VTEP_CHECK_FAIL;
+
+ last_k4->as_u64 = k4.as_u64;
+
+#ifdef CLIB_HAVE_VEC512
+ vtep4_u512->vtep4_cache[vtep4_u512->idx].as_u64 = k4.as_u64;
+ vtep4_u512->idx = (vtep4_u512->idx + 1) & 0x7;
+#endif
+
+ return VTEP_CHECK_PASS;
+}
+
always_inline u8
vtep6_check (vtep_table_t * t, vlib_buffer_t * b0, ip6_header_t * ip60,
vtep6_key_t * last_k6)