summaryrefslogtreecommitdiffstats
path: root/src/plugins/gtpu/gtpu_decap.c
diff options
context:
space:
mode:
authorZhiyong Yang <zhiyong.yang@intel.com>2020-07-08 20:28:36 +0000
committerJunfeng Wang <drenfong.wang@intel.com>2020-09-04 02:34:03 +0000
commit5e52417a2aa3b2063a811c6a9f293a79d73bcb43 (patch)
tree4b4cdaccaa682c1dddea8617717af5ac4f520ea0 /src/plugins/gtpu/gtpu_decap.c
parent4a433f46084d05a524154db64d3d7d2567305009 (diff)
ip: enhance vtep4_check of tunnel by vector way
This patch aims to improve decap performance by reducing expensive hash_get callings as less as possible using AVX512 on XEON. e.g. vxlan, vxlan_gpe, geneve, gtpu. For the existing code, if vtep4 of the current packet match the last vtep4_key_t well, expensive hash computation can be avoided and the code returns directly. This patch improves tunnel decap multiple flows case greatly by leveraging 512bit vector register on XEON accommodating 8 vtep4_keys. It enhances the possiblity of avoiding unnecessary hash computing once hash key of the current packet hits any one of 8 in the 512bit cache. The oldest element in vtep4_cache_t is updated in round-robin order. vlib_get_buffers is also leveraged in the meanwhile. Type: improvement Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com> Signed-off-by: Ray Kinsella <mdr@ashroe.eu> Signed-off-by: Junfeng Wang <drenfong.wang@intel.com> Change-Id: I313103202bd76f2dd638cd942554721b37ddad60
Diffstat (limited to 'src/plugins/gtpu/gtpu_decap.c')
-rw-r--r--src/plugins/gtpu/gtpu_decap.c42
1 files changed, 30 insertions, 12 deletions
diff --git a/src/plugins/gtpu/gtpu_decap.c b/src/plugins/gtpu/gtpu_decap.c
index 05c21381d55..7a88aae63a6 100644
--- a/src/plugins/gtpu/gtpu_decap.c
+++ b/src/plugins/gtpu/gtpu_decap.c
@@ -804,10 +804,16 @@ ip_gtpu_bypass_inline (vlib_main_t * vm,
matching a local VTEP address */
vtep6_key_t last_vtep6; /* last IPv6 address / fib index
matching a local VTEP address */
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+#ifdef CLIB_HAVE_VEC512
+ vtep4_cache_t vtep4_u512;
+ clib_memset (&vtep4_u512, 0, sizeof (vtep4_u512));
+#endif
from = vlib_frame_vector_args (frame);
n_left_from = frame->n_vectors;
next_index = node->cached_next_index;
+ vlib_get_buffers (vm, from, bufs, n_left_from);
if (node->flags & VLIB_NODE_FLAG_TRACE)
ip4_forward_next_trace (vm, node, frame, VLIB_TX);
@@ -835,16 +841,11 @@ ip_gtpu_bypass_inline (vlib_main_t * vm,
/* Prefetch next iteration. */
{
- vlib_buffer_t * p2, * p3;
-
- p2 = vlib_get_buffer (vm, from[2]);
- p3 = vlib_get_buffer (vm, from[3]);
-
- vlib_prefetch_buffer_header (p2, LOAD);
- vlib_prefetch_buffer_header (p3, LOAD);
+ vlib_prefetch_buffer_header (b[2], LOAD);
+ vlib_prefetch_buffer_header (b[3], LOAD);
- CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b[2]->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+ CLIB_PREFETCH (b[3]->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
}
bi0 = to_next[0] = from[0];
@@ -854,8 +855,9 @@ ip_gtpu_bypass_inline (vlib_main_t * vm,
to_next += 2;
n_left_to_next -= 2;
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
+ b0 = b[0];
+ b1 = b[1];
+ b += 2;
if (is_ip4)
{
ip40 = vlib_buffer_get_current (b0);
@@ -899,7 +901,12 @@ ip_gtpu_bypass_inline (vlib_main_t * vm,
/* Validate DIP against VTEPs*/
if (is_ip4)
{
+#ifdef CLIB_HAVE_VEC512
+ if (!vtep4_check_vector
+ (&gtm->vtep_table, b0, ip40, &last_vtep4, &vtep4_u512))
+#else
if (!vtep4_check (&gtm->vtep_table, b0, ip40, &last_vtep4))
+#endif
goto exit0; /* no local VTEP for GTPU packet */
}
else
@@ -973,7 +980,12 @@ ip_gtpu_bypass_inline (vlib_main_t * vm,
/* Validate DIP against VTEPs*/
if (is_ip4)
{
+#ifdef CLIB_HAVE_VEC512
+ if (!vtep4_check_vector
+ (&gtm->vtep_table, b1, ip41, &last_vtep4, &vtep4_u512))
+#else
if (!vtep4_check (&gtm->vtep_table, b1, ip41, &last_vtep4))
+#endif
goto exit1; /* no local VTEP for GTPU packet */
}
else
@@ -1053,7 +1065,8 @@ ip_gtpu_bypass_inline (vlib_main_t * vm,
to_next += 1;
n_left_to_next -= 1;
- b0 = vlib_get_buffer (vm, bi0);
+ b0 = b[0];
+ b++;
if (is_ip4)
ip40 = vlib_buffer_get_current (b0);
else
@@ -1083,7 +1096,12 @@ ip_gtpu_bypass_inline (vlib_main_t * vm,
/* Validate DIP against VTEPs*/
if (is_ip4)
{
+#ifdef CLIB_HAVE_VEC512
+ if (!vtep4_check_vector
+ (&gtm->vtep_table, b0, ip40, &last_vtep4, &vtep4_u512))
+#else
if (!vtep4_check (&gtm->vtep_table, b0, ip40, &last_vtep4))
+#endif
goto exit; /* no local VTEP for GTPU packet */
}
else