From e71492655fab8a70285b3dcf1419420a337750f9 Mon Sep 17 00:00:00 2001 From: Mohammed Hawari Date: Wed, 18 May 2022 10:08:47 +0200 Subject: vlib: implement aux data handoff Type: improvement Change-Id: I20b41537a249a55f01004e45392b34adaa8fd792 Signed-off-by: Mohammed Hawari --- src/vlib/buffer_funcs.c | 87 ++++++++++++++++++++++++++++++++++++++++++------- src/vlib/buffer_funcs.h | 9 +++-- src/vlib/buffer_node.h | 14 ++++++++ src/vlib/main.c | 8 +++-- src/vlib/threads.c | 19 +++++++++++ src/vlib/threads.h | 7 +++- src/vppinfra/cpu.h | 3 ++ 7 files changed, 128 insertions(+), 19 deletions(-) diff --git a/src/vlib/buffer_funcs.c b/src/vlib/buffer_funcs.c index 32c2d1b8a2f..4ad652b062f 100644 --- a/src/vlib/buffer_funcs.c +++ b/src/vlib/buffer_funcs.c @@ -202,7 +202,8 @@ vlib_buffer_enqueue_to_thread_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_queue_main_t *fqm, u32 *buffer_indices, u16 *thread_indices, - u32 n_packets, int drop_on_congestion) + u32 n_packets, int drop_on_congestion, + int with_aux, u32 *aux_data) { u32 drop_list[VLIB_FRAME_SIZE], n_drop = 0; vlib_frame_bitmap_t mask, used_elts = {}; @@ -218,6 +219,9 @@ more: n_comp = clib_compress_u32 (hf ? hf->buffer_index : drop_list + n_drop, buffer_indices, mask, n_packets); + if (with_aux) + clib_compress_u32 (hf ? hf->aux_data : drop_list + n_drop, aux_data, mask, + n_packets); if (hf) { @@ -269,7 +273,7 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn) { n_enq += vlib_buffer_enqueue_to_thread_inline ( vm, node, fqm, buffer_indices, thread_indices, VLIB_FRAME_SIZE, - drop_on_congestion); + drop_on_congestion, 0 /* with_aux */, NULL); buffer_indices += VLIB_FRAME_SIZE; thread_indices += VLIB_FRAME_SIZE; n_packets -= VLIB_FRAME_SIZE; @@ -278,24 +282,58 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn) if (n_packets == 0) return n_enq; - n_enq += vlib_buffer_enqueue_to_thread_inline (vm, node, fqm, buffer_indices, - thread_indices, n_packets, - drop_on_congestion); + n_enq += vlib_buffer_enqueue_to_thread_inline ( + vm, node, fqm, buffer_indices, thread_indices, n_packets, + drop_on_congestion, 0 /* with_aux */, NULL); + + return n_enq; +} + +u32 __clib_section (".vlib_buffer_enqueue_to_thread_with_aux_fn") +CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_with_aux_fn) +(vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index, + u32 *buffer_indices, u32 *aux, u16 *thread_indices, u32 n_packets, + int drop_on_congestion) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_frame_queue_main_t *fqm; + u32 n_enq = 0; + + fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index); + + while (n_packets >= VLIB_FRAME_SIZE) + { + n_enq += vlib_buffer_enqueue_to_thread_inline ( + vm, node, fqm, buffer_indices, thread_indices, VLIB_FRAME_SIZE, + drop_on_congestion, 1 /* with_aux */, aux); + buffer_indices += VLIB_FRAME_SIZE; + thread_indices += VLIB_FRAME_SIZE; + n_packets -= VLIB_FRAME_SIZE; + } + + if (n_packets == 0) + return n_enq; + + n_enq += vlib_buffer_enqueue_to_thread_inline ( + vm, node, fqm, buffer_indices, thread_indices, n_packets, + drop_on_congestion, 1 /* with_aux */, aux); return n_enq; } CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_fn); +CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_with_aux_fn); -u32 __clib_section (".vlib_frame_queue_dequeue_fn") -CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn) -(vlib_main_t *vm, vlib_frame_queue_main_t *fqm) +static_always_inline u32 +vlib_frame_queue_dequeue_inline (vlib_main_t *vm, vlib_frame_queue_main_t *fqm, + u8 with_aux) { u32 thread_id = vm->thread_index; vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id]; u32 mask = fq->nelts - 1; vlib_frame_queue_elt_t *elt; - u32 n_free, n_copy, *from, *to = 0, processed = 0, vectors = 0; + u32 n_free, n_copy, *from, *from_aux, *to = 0, *to_aux = 0, processed = 0, + vectors = 0; vlib_frame_t *f = 0; ASSERT (fq); @@ -352,13 +390,16 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn) break; from = elt->buffer_index + elt->offset; - + if (with_aux) + from_aux = elt->aux_data + elt->offset; ASSERT (elt->offset + elt->n_vectors <= VLIB_FRAME_SIZE); if (f == 0) { f = vlib_get_frame_to_node (vm, fqm->node_index); to = vlib_frame_vector_args (f); + if (with_aux) + to_aux = vlib_frame_aux_args (f); n_free = VLIB_FRAME_SIZE; } @@ -369,6 +410,12 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn) vlib_buffer_copy_indices (to, from, n_copy); to += n_copy; + if (with_aux) + { + vlib_buffer_copy_indices (to_aux, from_aux, n_copy); + to_aux += n_copy; + } + n_free -= n_copy; vectors += n_copy; @@ -408,8 +455,24 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn) return processed; } +u32 __clib_section (".vlib_frame_queue_dequeue_fn") +CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn) +(vlib_main_t *vm, vlib_frame_queue_main_t *fqm) +{ + return vlib_frame_queue_dequeue_inline (vm, fqm, 0 /* with_aux */); +} + CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_fn); +u32 __clib_section (".vlib_frame_queue_dequeue_with_aux_fn") +CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_with_aux_fn) +(vlib_main_t *vm, vlib_frame_queue_main_t *fqm) +{ + return vlib_frame_queue_dequeue_inline (vm, fqm, 1 /* with_aux */); +} + +CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_with_aux_fn); + #ifndef CLIB_MARCH_VARIANT vlib_buffer_func_main_t vlib_buffer_func_main; @@ -423,8 +486,8 @@ vlib_buffer_funcs_init (vlib_main_t *vm) CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_fn); bfm->buffer_enqueue_to_thread_fn = CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_fn); - bfm->frame_queue_dequeue_fn = - CLIB_MARCH_FN_POINTER (vlib_frame_queue_dequeue_fn); + bfm->buffer_enqueue_to_thread_with_aux_fn = + CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_with_aux_fn); return 0; } diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index 30fe23443ab..00dce8033fe 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -65,15 +65,18 @@ typedef u32 (vlib_buffer_enqueue_to_thread_fn_t) ( u32 *buffer_indices, u16 *thread_indices, u32 n_packets, int drop_on_congestion); -typedef u32 (vlib_frame_queue_dequeue_fn_t) (vlib_main_t *vm, - vlib_frame_queue_main_t *fqm); +typedef u32 (vlib_buffer_enqueue_to_thread_with_aux_fn_t) ( + vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index, + u32 *buffer_indices, u32 *aux, u16 *thread_indices, u32 n_packets, + int drop_on_congestion); typedef struct { vlib_buffer_enqueue_to_next_fn_t *buffer_enqueue_to_next_fn; vlib_buffer_enqueue_to_single_next_fn_t *buffer_enqueue_to_single_next_fn; vlib_buffer_enqueue_to_thread_fn_t *buffer_enqueue_to_thread_fn; - vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn; + vlib_buffer_enqueue_to_thread_with_aux_fn_t + *buffer_enqueue_to_thread_with_aux_fn; } vlib_buffer_func_main_t; extern vlib_buffer_func_main_t vlib_buffer_func_main; diff --git a/src/vlib/buffer_node.h b/src/vlib/buffer_node.h index 10ebd253c1b..a4c259f715c 100644 --- a/src/vlib/buffer_node.h +++ b/src/vlib/buffer_node.h @@ -391,6 +391,20 @@ vlib_buffer_enqueue_to_thread (vlib_main_t *vm, vlib_node_runtime_t *node, n_packets, drop_on_congestion); } +static_always_inline u32 +vlib_buffer_enqueue_to_thread_with_aux (vlib_main_t *vm, + vlib_node_runtime_t *node, + u32 frame_queue_index, + u32 *buffer_indices, u32 *aux, + u16 *thread_indices, u32 n_packets, + int drop_on_congestion) +{ + vlib_buffer_enqueue_to_thread_with_aux_fn_t *fn; + fn = vlib_buffer_func_main.buffer_enqueue_to_thread_with_aux_fn; + return (fn) (vm, node, frame_queue_index, buffer_indices, aux, + thread_indices, n_packets, drop_on_congestion); +} + #endif /* included_vlib_buffer_node_h */ /* diff --git a/src/vlib/main.c b/src/vlib/main.c index 41d18e2dfa6..9c7d6f58991 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -1519,8 +1519,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (PREDICT_FALSE (vm->check_frame_queues + frame_queue_check_counter)) { u32 processed = 0; - vlib_frame_queue_dequeue_fn_t *fn = - vlib_buffer_func_main.frame_queue_dequeue_fn; + vlib_frame_queue_dequeue_fn_t *fn; if (vm->check_frame_queues) { @@ -1529,7 +1528,10 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) } vec_foreach (fqm, tm->frame_queue_mains) - processed += (fn) (vm, fqm); + { + fn = fqm->frame_queue_dequeue_fn; + processed += (fn) (vm, fqm); + } /* No handoff queue work found? */ if (processed) diff --git a/src/vlib/threads.c b/src/vlib/threads.c index 57ba39a00d8..6c39e688b72 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -1587,12 +1587,18 @@ VLIB_REGISTER_THREAD (worker_thread_reg, static) = { }; /* *INDENT-ON* */ +extern clib_march_fn_registration + *vlib_frame_queue_dequeue_with_aux_fn_march_fn_registrations; +extern clib_march_fn_registration + *vlib_frame_queue_dequeue_fn_march_fn_registrations; u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts) { vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_main_t *vm = vlib_get_main (); vlib_frame_queue_main_t *fqm; vlib_frame_queue_t *fq; + vlib_node_t *node; int i; u32 num_threads; @@ -1604,6 +1610,19 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts) vec_add2 (tm->frame_queue_mains, fqm, 1); + node = vlib_get_node (vm, fqm->node_index); + ASSERT (node); + if (node->aux_offset) + { + fqm->frame_queue_dequeue_fn = + CLIB_MARCH_FN_VOID_POINTER (vlib_frame_queue_dequeue_with_aux_fn); + } + else + { + fqm->frame_queue_dequeue_fn = + CLIB_MARCH_FN_VOID_POINTER (vlib_frame_queue_dequeue_fn); + } + fqm->node_index = node_index; fqm->frame_queue_nelts = frame_queue_nelts; diff --git a/src/vlib/threads.h b/src/vlib/threads.h index b25d4764168..97df3d253a0 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -75,6 +75,7 @@ typedef struct CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); u32 buffer_index[VLIB_FRAME_SIZE]; + u32 aux_data[VLIB_FRAME_SIZE]; } vlib_frame_queue_elt_t; @@ -133,7 +134,10 @@ typedef struct } vlib_frame_queue_t; -typedef struct +struct vlib_frame_queue_main_t_; +typedef u32 (vlib_frame_queue_dequeue_fn_t) ( + vlib_main_t *vm, struct vlib_frame_queue_main_t_ *fqm); +typedef struct vlib_frame_queue_main_t_ { u32 node_index; u32 frame_queue_nelts; @@ -143,6 +147,7 @@ typedef struct /* for frame queue tracing */ frame_queue_trace_t *frame_queue_traces; frame_queue_nelt_counter_t *frame_queue_histogram; + vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn; } vlib_frame_queue_main_t; typedef struct diff --git a/src/vppinfra/cpu.h b/src/vppinfra/cpu.h index 329e5cc298d..d123f39871d 100644 --- a/src/vppinfra/cpu.h +++ b/src/vppinfra/cpu.h @@ -84,6 +84,9 @@ clib_march_select_fn_ptr (clib_march_fn_registration * r) #define CLIB_MARCH_FN_POINTER(fn) \ (__typeof__ (fn) *) clib_march_select_fn_ptr (fn##_march_fn_registrations); +#define CLIB_MARCH_FN_VOID_POINTER(fn) \ + clib_march_select_fn_ptr (fn##_march_fn_registrations); + #define _CLIB_MARCH_FN_REGISTRATION(fn) \ static clib_march_fn_registration \ CLIB_MARCH_SFX(fn##_march_fn_registration) = \ -- cgit 1.2.3-korg