summaryrefslogtreecommitdiffstats
path: root/vlib
diff options
context:
space:
mode:
Diffstat (limited to 'vlib')
-rw-r--r--vlib/Makefile.am1
-rw-r--r--vlib/vlib/node.h18
-rw-r--r--vlib/vlib/threads.c245
-rw-r--r--vlib/vlib/threads.h12
-rw-r--r--vlib/vlib/threads_cli.c362
5 files changed, 584 insertions, 54 deletions
diff --git a/vlib/Makefile.am b/vlib/Makefile.am
index 274756719c1..07a34c563c5 100644
--- a/vlib/Makefile.am
+++ b/vlib/Makefile.am
@@ -41,6 +41,7 @@ libvlib_la_SOURCES = \
vlib/pci/pci.c \
vlib/pci/linux_pci.c \
vlib/threads.c \
+ vlib/threads_cli.c \
vlib/trace.c
if WITH_DPDK
diff --git a/vlib/vlib/node.h b/vlib/vlib/node.h
index a54f4e83e62..e9842275b0e 100644
--- a/vlib/vlib/node.h
+++ b/vlib/vlib/node.h
@@ -663,4 +663,22 @@ typedef struct {
vlib_node_registration_t * node_registrations;
} vlib_node_main_t;
+
+#define FRAME_QUEUE_MAX_NELTS 32
+typedef struct {
+ CLIB_CACHE_LINE_ALIGN_MARK(cacheline0);
+ u64 head;
+ u64 head_hint;
+ u64 tail;
+ u32 n_in_use;
+ u32 nelts;
+ u32 written;
+ u32 threshold;
+ i32 n_vectors[FRAME_QUEUE_MAX_NELTS];
+} frame_queue_trace_t;
+
+typedef struct {
+ u64 count[FRAME_QUEUE_MAX_NELTS];
+} frame_queue_nelt_counter_t;
+
#endif /* included_vlib_node_h */
diff --git a/vlib/vlib/threads.c b/vlib/vlib/threads.c
index 1808f36fe50..d2ce449f6ce 100644
--- a/vlib/vlib/threads.c
+++ b/vlib/vlib/threads.c
@@ -51,11 +51,6 @@ u32 vl(void *p)
return vec_len (p);
}
-void debug_hex_bytes (u8 *s, u32 n)
-{
- fformat (stderr, "%U\n", format_hex_bytes, s, n);
-}
-
vlib_thread_main_t vlib_thread_main;
uword
@@ -1150,66 +1145,210 @@ void vlib_worker_thread_barrier_release(vlib_main_t * vm)
}
}
-static clib_error_t *
-show_threads_fn (vlib_main_t * vm,
- unformat_input_t * input,
- vlib_cli_command_t * cmd)
+/*
+ * Check the frame queue to see if any frames are available.
+ * If so, pull the packets off the frames and put them to
+ * the handoff node.
+ */
+static inline int vlib_frame_queue_dequeue_internal (vlib_main_t *vm)
{
- vlib_worker_thread_t * w;
- int i;
+ u32 thread_id = vm->cpu_index;
+ vlib_frame_queue_t *fq = vlib_frame_queues[thread_id];
+ vlib_frame_queue_elt_t *elt;
+ u32 * from, * to;
+ vlib_frame_t * f;
+ int msg_type;
+ int processed = 0;
+ u32 n_left_to_node;
+ u32 vectors = 0;
+ vlib_thread_main_t *tm = vlib_get_thread_main();
- vlib_cli_output (vm, "%-7s%-20s%-12s%-8s%-7s%-7s%-7s%-10s",
- "ID", "Name", "Type", "LWP",
- "lcore", "Core", "Socket", "State");
+ ASSERT (fq);
+ ASSERT(vm == vlib_mains[thread_id]);
-#if !defined(__powerpc64__)
- for (i = 0; i < vec_len(vlib_worker_threads); i++)
+ if (PREDICT_FALSE (tm->handoff_dispatch_node_index == ~0))
+ return 0;
+ /*
+ * Gather trace data for frame queues
+ */
+ if (PREDICT_FALSE(fq->trace))
{
- w = vlib_worker_threads + i;
- u8 * line = NULL;
+ frame_queue_trace_t *fqt;
+ frame_queue_nelt_counter_t *fqh;
+ u32 elix;
+
+ fqt = &tm->frame_queue_traces[thread_id];
+
+ fqt->nelts = fq->nelts;
+ fqt->head = fq->head;
+ fqt->head_hint = fq->head_hint;
+ fqt->tail = fq->tail;
+ fqt->threshold = fq->vector_threshold;
+ fqt->n_in_use = fqt->tail - fqt->head;
+ if (fqt->n_in_use >= fqt->nelts){
+ // if beyond max then use max
+ fqt->n_in_use = fqt->nelts-1;
+ }
- line = format(line, "%-7d%-20s%-12s%-8d",
- i,
- w->name ? w->name : (u8 *) "",
- w->registration ? w->registration->name : "",
- w->lwp);
+ /* Record the number of elements in use in the histogram */
+ fqh = &tm->frame_queue_histogram[thread_id];
+ fqh->count[ fqt->n_in_use ]++;
-#if DPDK==1
- int lcore = w->dpdk_lcore_id;
- if (lcore > -1)
+ /* Record a snapshot of the elements in use */
+ for (elix=0; elix<fqt->nelts; elix++) {
+ elt = fq->elts + ((fq->head+1 + elix) & (fq->nelts-1));
+ if (1 || elt->valid)
+ {
+ fqt->n_vectors[elix] = elt->n_vectors;
+ }
+ }
+ fqt->written = 1;
+ }
+
+ while (1)
+ {
+ if (fq->head == fq->tail)
{
- line = format(line, "%-7u%-7u%-7u",
- lcore,
- lcore_config[lcore].core_id,
- lcore_config[lcore].socket_id);
+ fq->head_hint = fq->head;
+ return processed;
+ }
- switch(lcore_config[lcore].state)
- {
- case WAIT:
- line = format(line, "wait");
- break;
- case RUNNING:
- line = format(line, "running");
- break;
- case FINISHED:
- line = format(line, "finished");
- break;
- default:
- line = format(line, "unknown");
- }
+ elt = fq->elts + ((fq->head+1) & (fq->nelts-1));
+
+ if (!elt->valid)
+ {
+ fq->head_hint = fq->head;
+ return processed;
+ }
+
+ from = elt->buffer_index;
+ msg_type = elt->msg_type;
+
+ ASSERT (msg_type == VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME);
+ ASSERT (elt->n_vectors <= VLIB_FRAME_SIZE);
+
+ f = vlib_get_frame_to_node (vm, tm->handoff_dispatch_node_index);
+
+ to = vlib_frame_vector_args (f);
+
+ n_left_to_node = elt->n_vectors;
+
+ while (n_left_to_node >= 4)
+ {
+ to[0] = from[0];
+ to[1] = from[1];
+ to[2] = from[2];
+ to[3] = from[3];
+ to += 4;
+ from += 4;
+ n_left_to_node -= 4;
+ }
+
+ while (n_left_to_node > 0)
+ {
+ to[0] = from[0];
+ to++;
+ from++;
+ n_left_to_node--;
+ }
+
+ vectors += elt->n_vectors;
+ f->n_vectors = elt->n_vectors;
+ vlib_put_frame_to_node (vm, tm->handoff_dispatch_node_index, f);
+
+ elt->valid = 0;
+ elt->n_vectors = 0;
+ elt->msg_type = 0xfefefefe;
+ CLIB_MEMORY_BARRIER();
+ fq->head++;
+ processed++;
+
+ /*
+ * Limit the number of packets pushed into the graph
+ */
+ if (vectors >= fq->vector_threshold)
+ {
+ fq->head_hint = fq->head;
+ return processed;
}
-#endif
- vlib_cli_output(vm, "%v", line);
- vec_free(line);
}
-#endif
+ ASSERT(0);
+ return processed;
+}
- return 0;
+static_always_inline void
+vlib_worker_thread_internal (vlib_main_t *vm)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ u64 cpu_time_now = clib_cpu_time_now ();
+
+ while (1)
+ {
+ vlib_worker_thread_barrier_check ();
+
+ vlib_frame_queue_dequeue_internal (vm);
+
+ vlib_node_runtime_t * n;
+ vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+ {
+ cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
+ VLIB_NODE_STATE_POLLING, /* frame */ 0,
+ cpu_time_now);
+ }
+
+ if (_vec_len (nm->pending_frames))
+ {
+ int i;
+ cpu_time_now = clib_cpu_time_now ();
+ for (i = 0; i < _vec_len (nm->pending_frames); i++) {
+ vlib_pending_frame_t *p;
+
+ p = nm->pending_frames + i;
+
+ cpu_time_now = dispatch_pending_node (vm, p, cpu_time_now);
+ }
+ _vec_len (nm->pending_frames) = 0;
+ }
+ vlib_increment_main_loop_counter (vm);
+
+ /* Record time stamp in case there are no enabled nodes and above
+ calls do not update time stamp. */
+ cpu_time_now = clib_cpu_time_now ();
+ }
}
+void vlib_worker_thread_fn (void * arg)
+{
+ vlib_worker_thread_t * w = (vlib_worker_thread_t *) arg;
+ vlib_thread_main_t * tm = vlib_get_thread_main();
+ vlib_main_t * vm = vlib_get_main();
+
+ ASSERT(vm->cpu_index == os_get_cpu_number());
+
+ vlib_worker_thread_init (w);
+ clib_time_init (&vm->clib_time);
+ clib_mem_set_heap (w->thread_mheap);
+
+ /* Wait until the dpdk init sequence is complete */
+ while (tm->worker_thread_release == 0)
+ vlib_worker_thread_barrier_check ();
+
+ vlib_worker_thread_internal(vm);
+}
-VLIB_CLI_COMMAND (show_threads_command, static) = {
- .path = "show threads",
- .short_help = "Show threads",
- .function = show_threads_fn,
+VLIB_REGISTER_THREAD (worker_thread_reg, static) = {
+ .name = "workers",
+ .short_name = "wk",
+ .function = vlib_worker_thread_fn,
};
+
+clib_error_t *threads_init (vlib_main_t *vm)
+{
+ vlib_thread_main_t * tm = vlib_get_thread_main();
+
+ tm->handoff_dispatch_node_index = ~0;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (threads_init);
diff --git a/vlib/vlib/threads.h b/vlib/vlib/threads.h
index f81be798626..e3b902ae518 100644
--- a/vlib/vlib/threads.h
+++ b/vlib/vlib/threads.h
@@ -298,7 +298,17 @@ typedef struct {
uword * cpu_socket_bitmap;
vlib_efd_t efd;
-
+
+ /* handoff node index */
+ u32 handoff_dispatch_node_index;
+
+ /* for frame queue tracing */
+ frame_queue_trace_t *frame_queue_traces;
+ frame_queue_nelt_counter_t *frame_queue_histogram;
+
+ /* worker thread initialization barrier */
+ volatile u32 worker_thread_release;
+
} vlib_thread_main_t;
vlib_thread_main_t vlib_thread_main;
diff --git a/vlib/vlib/threads_cli.c b/vlib/vlib/threads_cli.c
new file mode 100644
index 00000000000..26cb2213d95
--- /dev/null
+++ b/vlib/vlib/threads_cli.c
@@ -0,0 +1,362 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vppinfra/format.h>
+#include <vlib/vlib.h>
+
+#include <vlib/threads.h>
+
+static clib_error_t *
+show_threads_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_worker_thread_t * w;
+ int i;
+
+ vlib_cli_output (vm, "%-7s%-20s%-12s%-8s%-7s%-7s%-7s%-10s",
+ "ID", "Name", "Type", "LWP",
+ "lcore", "Core", "Socket", "State");
+
+#if !defined(__powerpc64__)
+ for (i = 0; i < vec_len(vlib_worker_threads); i++)
+ {
+ w = vlib_worker_threads + i;
+ u8 * line = NULL;
+
+ line = format(line, "%-7d%-20s%-12s%-8d",
+ i,
+ w->name ? w->name : (u8 *) "",
+ w->registration ? w->registration->name : "",
+ w->lwp);
+
+#if DPDK==1
+ int lcore = w->dpdk_lcore_id;
+ if (lcore > -1)
+ {
+ line = format(line, "%-7u%-7u%-7u",
+ lcore,
+ lcore_config[lcore].core_id,
+ lcore_config[lcore].socket_id);
+
+ switch(lcore_config[lcore].state)
+ {
+ case WAIT:
+ line = format(line, "wait");
+ break;
+ case RUNNING:
+ line = format(line, "running");
+ break;
+ case FINISHED:
+ line = format(line, "finished");
+ break;
+ default:
+ line = format(line, "unknown");
+ }
+ }
+#endif
+ vlib_cli_output(vm, "%v", line);
+ vec_free(line);
+ }
+#endif
+
+ return 0;
+}
+
+
+VLIB_CLI_COMMAND (show_threads_command, static) = {
+ .path = "show threads",
+ .short_help = "Show threads",
+ .function = show_threads_fn,
+};
+
+/*
+ * Trigger threads to grab frame queue trace data
+ */
+static clib_error_t *
+trace_frame_queue (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ clib_error_t * error = NULL;
+ frame_queue_trace_t *fqt;
+ frame_queue_nelt_counter_t *fqh;
+ vlib_thread_main_t *tm = vlib_get_thread_main();
+ u32 num_fq;
+ u32 fqix;
+ u32 enable = 0;
+
+ if (unformat(input, "on")) {
+ enable = 1;
+ } else if (unformat(input, "off")) {
+ enable = 0;
+ } else {
+ return clib_error_return(0, "expecting on or off");
+ }
+
+ num_fq = vec_len(vlib_frame_queues);
+ if (num_fq == 0)
+ {
+ vlib_cli_output(vm, "No frame queues exist\n");
+ return error;
+ }
+
+ // Allocate storage for trace if necessary
+ vec_validate_aligned(tm->frame_queue_traces, num_fq-1, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned(tm->frame_queue_histogram, num_fq-1, CLIB_CACHE_LINE_BYTES);
+
+ for (fqix=0; fqix<num_fq; fqix++) {
+ fqt = &tm->frame_queue_traces[fqix];
+ fqh = &tm->frame_queue_histogram[fqix];
+
+ memset(fqt->n_vectors, 0xff, sizeof(fqt->n_vectors));
+ fqt->written = 0;
+ memset(fqh, 0, sizeof(*fqh));
+ vlib_frame_queues[fqix]->trace = enable;
+ }
+ return error;
+}
+
+VLIB_CLI_COMMAND (cmd_trace_frame_queue,static) = {
+ .path = "trace frame-queue",
+ .short_help = "trace frame-queue (on|off)",
+ .function = trace_frame_queue,
+ .is_mp_safe = 1,
+};
+
+
+/*
+ * Adding two counters and compute percent of total
+ * Round up, e.g. 0.000001 => 1%
+ */
+static u32
+compute_percent (u64 *two_counters, u64 total)
+{
+ if (total == 0)
+ {
+ return 0;
+ }
+ else
+ {
+ return (((two_counters[0] + two_counters[1]) * 100) + (total-1)) / total;
+ }
+}
+
+/*
+ * Display frame queue trace data gathered by threads.
+ */
+static clib_error_t *
+show_frame_queue_internal (vlib_main_t *vm,
+ u32 histogram)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main();
+ clib_error_t * error = NULL;
+ frame_queue_trace_t *fqt;
+ frame_queue_nelt_counter_t *fqh;
+ u32 num_fq;
+ u32 fqix;
+
+ num_fq = vec_len(tm->frame_queue_traces);
+ if (num_fq == 0)
+ {
+ vlib_cli_output(vm, "No trace data for frame queues\n");
+ return error;
+ }
+
+ if (histogram)
+ {
+ vlib_cli_output(vm, "0-1 2-3 4-5 6-7 8-9 10-11 12-13 14-15 "
+ "16-17 18-19 20-21 22-23 24-25 26-27 28-29 30-31\n");
+ }
+
+ for (fqix=0; fqix<num_fq; fqix++) {
+ fqt = &(tm->frame_queue_traces[fqix]);
+
+ vlib_cli_output(vm, "Thread %d %v\n", fqix, vlib_worker_threads[fqix].name);
+
+ if (fqt->written == 0)
+ {
+ vlib_cli_output(vm, " no trace data\n");
+ continue;
+ }
+
+ if (histogram)
+ {
+ fqh = &(tm->frame_queue_histogram[fqix]);
+ u32 nelt;
+ u64 total = 0;
+
+ for (nelt=0; nelt<FRAME_QUEUE_MAX_NELTS; nelt++) {
+ total += fqh->count[nelt];
+ }
+
+ /*
+ * Print in pairs to condense the output.
+ * Allow entries with 0 counts to be clearly identified, by rounding up.
+ * Any non-zero value will be displayed as at least one percent. This
+ * also means the sum of percentages can be > 100, but that is fine. The
+ * histogram is counted from the last time "trace frame on" was issued.
+ */
+ vlib_cli_output(vm,
+ "%3d%% %3d%% %3d%% %3d%% %3d%% %3d%% %3d%% %3d%% "
+ "%3d%% %3d%% %3d%% %3d%% %3d%% %3d%% %3d%% %3d%%\n",
+ compute_percent(&fqh->count[ 0], total),
+ compute_percent(&fqh->count[ 2], total),
+ compute_percent(&fqh->count[ 4], total),
+ compute_percent(&fqh->count[ 6], total),
+ compute_percent(&fqh->count[ 8], total),
+ compute_percent(&fqh->count[10], total),
+ compute_percent(&fqh->count[12], total),
+ compute_percent(&fqh->count[14], total),
+ compute_percent(&fqh->count[16], total),
+ compute_percent(&fqh->count[18], total),
+ compute_percent(&fqh->count[20], total),
+ compute_percent(&fqh->count[22], total),
+ compute_percent(&fqh->count[24], total),
+ compute_percent(&fqh->count[26], total),
+ compute_percent(&fqh->count[28], total),
+ compute_percent(&fqh->count[30], total));
+ }
+ else
+ {
+ vlib_cli_output(vm, " vector-threshold %d ring size %d in use %d\n",
+ fqt->threshold, fqt->nelts, fqt->n_in_use);
+ vlib_cli_output(vm, " head %12d head_hint %12d tail %12d\n",
+ fqt->head, fqt->head_hint, fqt->tail);
+ vlib_cli_output(vm, " %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d\n",
+ fqt->n_vectors[0], fqt->n_vectors[1], fqt->n_vectors[2], fqt->n_vectors[3],
+ fqt->n_vectors[4], fqt->n_vectors[5], fqt->n_vectors[6], fqt->n_vectors[7],
+ fqt->n_vectors[8], fqt->n_vectors[9], fqt->n_vectors[10], fqt->n_vectors[11],
+ fqt->n_vectors[12], fqt->n_vectors[13], fqt->n_vectors[14], fqt->n_vectors[15]);
+
+ if (fqt->nelts > 16)
+ {
+ vlib_cli_output(vm, " %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d\n",
+ fqt->n_vectors[16], fqt->n_vectors[17], fqt->n_vectors[18], fqt->n_vectors[19],
+ fqt->n_vectors[20], fqt->n_vectors[21], fqt->n_vectors[22], fqt->n_vectors[23],
+ fqt->n_vectors[24], fqt->n_vectors[25], fqt->n_vectors[26], fqt->n_vectors[27],
+ fqt->n_vectors[28], fqt->n_vectors[29], fqt->n_vectors[30], fqt->n_vectors[31]);
+ }
+ }
+
+ }
+ return error;
+}
+
+static clib_error_t *
+show_frame_queue_trace (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ return show_frame_queue_internal (vm, 0);
+}
+
+static clib_error_t *
+show_frame_queue_histogram (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ return show_frame_queue_internal (vm, 1);
+}
+
+VLIB_CLI_COMMAND (cmd_show_frame_queue_trace,static) = {
+ .path = "show frame-queue",
+ .short_help = "show frame-queue trace",
+ .function = show_frame_queue_trace,
+};
+
+VLIB_CLI_COMMAND (cmd_show_frame_queue_histogram,static) = {
+ .path = "show frame-queue histogram",
+ .short_help = "show frame-queue histogram",
+ .function = show_frame_queue_histogram,
+};
+
+
+/*
+ * Modify the number of elements on the frame_queues
+ */
+static clib_error_t *
+test_frame_queue_nelts (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ clib_error_t * error = NULL;
+ u32 num_fq;
+ u32 fqix;
+ u32 nelts = 0;
+
+ unformat(input, "%d", &nelts);
+ if ((nelts != 4) && (nelts != 8) && (nelts != 16) && (nelts != 32)) {
+ return clib_error_return(0, "expecting 4,8,16,32");
+ }
+
+ num_fq = vec_len(vlib_frame_queues);
+ if (num_fq == 0)
+ {
+ vlib_cli_output(vm, "No frame queues exist\n");
+ return error;
+ }
+
+ for (fqix=0; fqix<num_fq; fqix++) {
+ vlib_frame_queues[fqix]->nelts = nelts;
+ }
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (cmd_test_frame_queue_nelts,static) = {
+ .path = "test frame-queue nelts",
+ .short_help = "test frame-queue nelts (4,8,16,32)",
+ .function = test_frame_queue_nelts,
+};
+
+
+/*
+ * Modify the max number of packets pulled off the frame queues
+ */
+static clib_error_t *
+test_frame_queue_threshold (vlib_main_t *vm, unformat_input_t *input,
+ vlib_cli_command_t *cmd)
+{
+ clib_error_t * error = NULL;
+ u32 num_fq;
+ u32 fqix;
+ u32 threshold = 0;
+
+ if (unformat(input, "%d", &threshold)) {
+ } else {
+ vlib_cli_output(vm, "expecting threshold value\n");
+ return error;
+ }
+
+ if (threshold == 0)
+ threshold = ~0;
+
+ num_fq = vec_len(vlib_frame_queues);
+ if (num_fq == 0)
+ {
+ vlib_cli_output(vm, "No frame queues exist\n");
+ return error;
+ }
+
+ for (fqix=0; fqix<num_fq; fqix++) {
+ vlib_frame_queues[fqix]->vector_threshold = threshold;
+ }
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (cmd_test_frame_queue_threshold,static) = {
+ .path = "test frame-queue threshold",
+ .short_help = "test frame-queue threshold N (0=no limit)",
+ .function = test_frame_queue_threshold,
+};
+