From e71492655fab8a70285b3dcf1419420a337750f9 Mon Sep 17 00:00:00 2001
From: Mohammed Hawari <mohammed@hawari.fr>
Date: Wed, 18 May 2022 10:08:47 +0200
Subject: vlib: implement aux data handoff

Type: improvement
Change-Id: I20b41537a249a55f01004e45392b34adaa8fd792
Signed-off-by: Mohammed Hawari <mohammed@hawari.fr>
---
 src/vlib/buffer_funcs.c | 87 ++++++++++++++++++++++++++++++++++++++++++-------
 src/vlib/buffer_funcs.h |  9 +++--
 src/vlib/buffer_node.h  | 14 ++++++++
 src/vlib/main.c         |  8 +++--
 src/vlib/threads.c      | 19 +++++++++++
 src/vlib/threads.h      |  7 +++-
 src/vppinfra/cpu.h      |  3 ++
 7 files changed, 128 insertions(+), 19 deletions(-)

(limited to 'src')

diff --git a/src/vlib/buffer_funcs.c b/src/vlib/buffer_funcs.c
index 32c2d1b8a2f..4ad652b062f 100644
--- a/src/vlib/buffer_funcs.c
+++ b/src/vlib/buffer_funcs.c
@@ -202,7 +202,8 @@ vlib_buffer_enqueue_to_thread_inline (vlib_main_t *vm,
 				      vlib_node_runtime_t *node,
 				      vlib_frame_queue_main_t *fqm,
 				      u32 *buffer_indices, u16 *thread_indices,
-				      u32 n_packets, int drop_on_congestion)
+				      u32 n_packets, int drop_on_congestion,
+				      int with_aux, u32 *aux_data)
 {
   u32 drop_list[VLIB_FRAME_SIZE], n_drop = 0;
   vlib_frame_bitmap_t mask, used_elts = {};
@@ -218,6 +219,9 @@ more:
 
   n_comp = clib_compress_u32 (hf ? hf->buffer_index : drop_list + n_drop,
 			      buffer_indices, mask, n_packets);
+  if (with_aux)
+    clib_compress_u32 (hf ? hf->aux_data : drop_list + n_drop, aux_data, mask,
+		       n_packets);
 
   if (hf)
     {
@@ -269,7 +273,7 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn)
     {
       n_enq += vlib_buffer_enqueue_to_thread_inline (
 	vm, node, fqm, buffer_indices, thread_indices, VLIB_FRAME_SIZE,
-	drop_on_congestion);
+	drop_on_congestion, 0 /* with_aux */, NULL);
       buffer_indices += VLIB_FRAME_SIZE;
       thread_indices += VLIB_FRAME_SIZE;
       n_packets -= VLIB_FRAME_SIZE;
@@ -278,24 +282,58 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn)
   if (n_packets == 0)
     return n_enq;
 
-  n_enq += vlib_buffer_enqueue_to_thread_inline (vm, node, fqm, buffer_indices,
-						 thread_indices, n_packets,
-						 drop_on_congestion);
+  n_enq += vlib_buffer_enqueue_to_thread_inline (
+    vm, node, fqm, buffer_indices, thread_indices, n_packets,
+    drop_on_congestion, 0 /* with_aux */, NULL);
+
+  return n_enq;
+}
+
+u32 __clib_section (".vlib_buffer_enqueue_to_thread_with_aux_fn")
+CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_with_aux_fn)
+(vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index,
+ u32 *buffer_indices, u32 *aux, u16 *thread_indices, u32 n_packets,
+ int drop_on_congestion)
+{
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
+  vlib_frame_queue_main_t *fqm;
+  u32 n_enq = 0;
+
+  fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
+
+  while (n_packets >= VLIB_FRAME_SIZE)
+    {
+      n_enq += vlib_buffer_enqueue_to_thread_inline (
+	vm, node, fqm, buffer_indices, thread_indices, VLIB_FRAME_SIZE,
+	drop_on_congestion, 1 /* with_aux */, aux);
+      buffer_indices += VLIB_FRAME_SIZE;
+      thread_indices += VLIB_FRAME_SIZE;
+      n_packets -= VLIB_FRAME_SIZE;
+    }
+
+  if (n_packets == 0)
+    return n_enq;
+
+  n_enq += vlib_buffer_enqueue_to_thread_inline (
+    vm, node, fqm, buffer_indices, thread_indices, n_packets,
+    drop_on_congestion, 1 /* with_aux */, aux);
 
   return n_enq;
 }
 
 CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_fn);
+CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_with_aux_fn);
 
-u32 __clib_section (".vlib_frame_queue_dequeue_fn")
-CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
-(vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
+static_always_inline u32
+vlib_frame_queue_dequeue_inline (vlib_main_t *vm, vlib_frame_queue_main_t *fqm,
+				 u8 with_aux)
 {
   u32 thread_id = vm->thread_index;
   vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
   u32 mask = fq->nelts - 1;
   vlib_frame_queue_elt_t *elt;
-  u32 n_free, n_copy, *from, *to = 0, processed = 0, vectors = 0;
+  u32 n_free, n_copy, *from, *from_aux, *to = 0, *to_aux = 0, processed = 0,
+					vectors = 0;
   vlib_frame_t *f = 0;
 
   ASSERT (fq);
@@ -352,13 +390,16 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
 	break;
 
       from = elt->buffer_index + elt->offset;
-
+      if (with_aux)
+	from_aux = elt->aux_data + elt->offset;
       ASSERT (elt->offset + elt->n_vectors <= VLIB_FRAME_SIZE);
 
       if (f == 0)
 	{
 	  f = vlib_get_frame_to_node (vm, fqm->node_index);
 	  to = vlib_frame_vector_args (f);
+	  if (with_aux)
+	    to_aux = vlib_frame_aux_args (f);
 	  n_free = VLIB_FRAME_SIZE;
 	}
 
@@ -369,6 +410,12 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
 
       vlib_buffer_copy_indices (to, from, n_copy);
       to += n_copy;
+      if (with_aux)
+	{
+	  vlib_buffer_copy_indices (to_aux, from_aux, n_copy);
+	  to_aux += n_copy;
+	}
+
       n_free -= n_copy;
       vectors += n_copy;
 
@@ -408,8 +455,24 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
   return processed;
 }
 
+u32 __clib_section (".vlib_frame_queue_dequeue_fn")
+CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn)
+(vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
+{
+  return vlib_frame_queue_dequeue_inline (vm, fqm, 0 /* with_aux */);
+}
+
 CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_fn);
 
+u32 __clib_section (".vlib_frame_queue_dequeue_with_aux_fn")
+CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_with_aux_fn)
+(vlib_main_t *vm, vlib_frame_queue_main_t *fqm)
+{
+  return vlib_frame_queue_dequeue_inline (vm, fqm, 1 /* with_aux */);
+}
+
+CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_with_aux_fn);
+
 #ifndef CLIB_MARCH_VARIANT
 vlib_buffer_func_main_t vlib_buffer_func_main;
 
@@ -423,8 +486,8 @@ vlib_buffer_funcs_init (vlib_main_t *vm)
     CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_fn);
   bfm->buffer_enqueue_to_thread_fn =
     CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_fn);
-  bfm->frame_queue_dequeue_fn =
-    CLIB_MARCH_FN_POINTER (vlib_frame_queue_dequeue_fn);
+  bfm->buffer_enqueue_to_thread_with_aux_fn =
+    CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_with_aux_fn);
   return 0;
 }
 
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
index 30fe23443ab..00dce8033fe 100644
--- a/src/vlib/buffer_funcs.h
+++ b/src/vlib/buffer_funcs.h
@@ -65,15 +65,18 @@ typedef u32 (vlib_buffer_enqueue_to_thread_fn_t) (
   u32 *buffer_indices, u16 *thread_indices, u32 n_packets,
   int drop_on_congestion);
 
-typedef u32 (vlib_frame_queue_dequeue_fn_t) (vlib_main_t *vm,
-					     vlib_frame_queue_main_t *fqm);
+typedef u32 (vlib_buffer_enqueue_to_thread_with_aux_fn_t) (
+  vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index,
+  u32 *buffer_indices, u32 *aux, u16 *thread_indices, u32 n_packets,
+  int drop_on_congestion);
 
 typedef struct
 {
   vlib_buffer_enqueue_to_next_fn_t *buffer_enqueue_to_next_fn;
   vlib_buffer_enqueue_to_single_next_fn_t *buffer_enqueue_to_single_next_fn;
   vlib_buffer_enqueue_to_thread_fn_t *buffer_enqueue_to_thread_fn;
-  vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn;
+  vlib_buffer_enqueue_to_thread_with_aux_fn_t
+    *buffer_enqueue_to_thread_with_aux_fn;
 } vlib_buffer_func_main_t;
 
 extern vlib_buffer_func_main_t vlib_buffer_func_main;
diff --git a/src/vlib/buffer_node.h b/src/vlib/buffer_node.h
index 10ebd253c1b..a4c259f715c 100644
--- a/src/vlib/buffer_node.h
+++ b/src/vlib/buffer_node.h
@@ -391,6 +391,20 @@ vlib_buffer_enqueue_to_thread (vlib_main_t *vm, vlib_node_runtime_t *node,
 	       n_packets, drop_on_congestion);
 }
 
+static_always_inline u32
+vlib_buffer_enqueue_to_thread_with_aux (vlib_main_t *vm,
+					vlib_node_runtime_t *node,
+					u32 frame_queue_index,
+					u32 *buffer_indices, u32 *aux,
+					u16 *thread_indices, u32 n_packets,
+					int drop_on_congestion)
+{
+  vlib_buffer_enqueue_to_thread_with_aux_fn_t *fn;
+  fn = vlib_buffer_func_main.buffer_enqueue_to_thread_with_aux_fn;
+  return (fn) (vm, node, frame_queue_index, buffer_indices, aux,
+	       thread_indices, n_packets, drop_on_congestion);
+}
+
 #endif /* included_vlib_buffer_node_h */
 
 /*
diff --git a/src/vlib/main.c b/src/vlib/main.c
index 41d18e2dfa6..9c7d6f58991 100644
--- a/src/vlib/main.c
+++ b/src/vlib/main.c
@@ -1519,8 +1519,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
       if (PREDICT_FALSE (vm->check_frame_queues + frame_queue_check_counter))
 	{
 	  u32 processed = 0;
-	  vlib_frame_queue_dequeue_fn_t *fn =
-	    vlib_buffer_func_main.frame_queue_dequeue_fn;
+	  vlib_frame_queue_dequeue_fn_t *fn;
 
 	  if (vm->check_frame_queues)
 	    {
@@ -1529,7 +1528,10 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
 	    }
 
 	  vec_foreach (fqm, tm->frame_queue_mains)
-	    processed += (fn) (vm, fqm);
+	    {
+	      fn = fqm->frame_queue_dequeue_fn;
+	      processed += (fn) (vm, fqm);
+	    }
 
 	  /* No handoff queue work found? */
 	  if (processed)
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
index 57ba39a00d8..6c39e688b72 100644
--- a/src/vlib/threads.c
+++ b/src/vlib/threads.c
@@ -1587,12 +1587,18 @@ VLIB_REGISTER_THREAD (worker_thread_reg, static) = {
 };
 /* *INDENT-ON* */
 
+extern clib_march_fn_registration
+  *vlib_frame_queue_dequeue_with_aux_fn_march_fn_registrations;
+extern clib_march_fn_registration
+  *vlib_frame_queue_dequeue_fn_march_fn_registrations;
 u32
 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
 {
   vlib_thread_main_t *tm = vlib_get_thread_main ();
+  vlib_main_t *vm = vlib_get_main ();
   vlib_frame_queue_main_t *fqm;
   vlib_frame_queue_t *fq;
+  vlib_node_t *node;
   int i;
   u32 num_threads;
 
@@ -1604,6 +1610,19 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
 
   vec_add2 (tm->frame_queue_mains, fqm, 1);
 
+  node = vlib_get_node (vm, fqm->node_index);
+  ASSERT (node);
+  if (node->aux_offset)
+    {
+      fqm->frame_queue_dequeue_fn =
+	CLIB_MARCH_FN_VOID_POINTER (vlib_frame_queue_dequeue_with_aux_fn);
+    }
+  else
+    {
+      fqm->frame_queue_dequeue_fn =
+	CLIB_MARCH_FN_VOID_POINTER (vlib_frame_queue_dequeue_fn);
+    }
+
   fqm->node_index = node_index;
   fqm->frame_queue_nelts = frame_queue_nelts;
 
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
index b25d4764168..97df3d253a0 100644
--- a/src/vlib/threads.h
+++ b/src/vlib/threads.h
@@ -75,6 +75,7 @@ typedef struct
 
   CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
   u32 buffer_index[VLIB_FRAME_SIZE];
+  u32 aux_data[VLIB_FRAME_SIZE];
 }
 vlib_frame_queue_elt_t;
 
@@ -133,7 +134,10 @@ typedef struct
 }
 vlib_frame_queue_t;
 
-typedef struct
+struct vlib_frame_queue_main_t_;
+typedef u32 (vlib_frame_queue_dequeue_fn_t) (
+  vlib_main_t *vm, struct vlib_frame_queue_main_t_ *fqm);
+typedef struct vlib_frame_queue_main_t_
 {
   u32 node_index;
   u32 frame_queue_nelts;
@@ -143,6 +147,7 @@ typedef struct
   /* for frame queue tracing */
   frame_queue_trace_t *frame_queue_traces;
   frame_queue_nelt_counter_t *frame_queue_histogram;
+  vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn;
 } vlib_frame_queue_main_t;
 
 typedef struct
diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h
index 329e5cc298d..d123f39871d 100644
--- a/src/vppinfra/cpu.h
+++ b/src/vppinfra/cpu.h
@@ -84,6 +84,9 @@ clib_march_select_fn_ptr (clib_march_fn_registration * r)
 #define CLIB_MARCH_FN_POINTER(fn)                                             \
   (__typeof__ (fn) *) clib_march_select_fn_ptr (fn##_march_fn_registrations);
 
+#define CLIB_MARCH_FN_VOID_POINTER(fn)                                        \
+  clib_march_select_fn_ptr (fn##_march_fn_registrations);
+
 #define _CLIB_MARCH_FN_REGISTRATION(fn) \
 static clib_march_fn_registration \
 CLIB_MARCH_SFX(fn##_march_fn_registration) = \
-- 
cgit