aboutsummaryrefslogtreecommitdiffstats
path: root/src/vlib
diff options
context:
space:
mode:
Diffstat (limited to 'src/vlib')
-rw-r--r--src/vlib/buffer.c1987
-rw-r--r--src/vlib/buffer.h417
-rw-r--r--src/vlib/buffer_funcs.h755
-rw-r--r--src/vlib/buffer_node.h337
-rw-r--r--src/vlib/cli.c1173
-rw-r--r--src/vlib/cli.h192
-rw-r--r--src/vlib/cli_funcs.h58
-rw-r--r--src/vlib/counter.c151
-rw-r--r--src/vlib/counter.h379
-rw-r--r--src/vlib/defs.h82
-rw-r--r--src/vlib/dir.dox23
-rw-r--r--src/vlib/elog_samples.c122
-rw-r--r--src/vlib/error.c338
-rw-r--r--src/vlib/error.h101
-rw-r--r--src/vlib/error_funcs.h88
-rw-r--r--src/vlib/format.c196
-rw-r--r--src/vlib/format_funcs.h75
-rw-r--r--src/vlib/global_funcs.h45
-rw-r--r--src/vlib/i2c.c231
-rw-r--r--src/vlib/i2c.h67
-rw-r--r--src/vlib/init.c168
-rw-r--r--src/vlib/init.h238
-rw-r--r--src/vlib/lex.c271
-rw-r--r--src/vlib/lex.h145
-rw-r--r--src/vlib/main.c1703
-rw-r--r--src/vlib/main.h333
-rw-r--r--src/vlib/mc.c2609
-rw-r--r--src/vlib/mc.h687
-rw-r--r--src/vlib/node.c631
-rw-r--r--src/vlib/node.h725
-rw-r--r--src/vlib/node_cli.c466
-rw-r--r--src/vlib/node_format.c187
-rw-r--r--src/vlib/node_funcs.h1130
-rw-r--r--src/vlib/parse.c1007
-rw-r--r--src/vlib/parse.h221
-rw-r--r--src/vlib/parse_builtin.c150
-rw-r--r--src/vlib/pci/linux_pci.c642
-rw-r--r--src/vlib/pci/pci.c264
-rw-r--r--src/vlib/pci/pci.h251
-rw-r--r--src/vlib/pci/pci_config.h731
-rw-r--r--src/vlib/physmem.h108
-rw-r--r--src/vlib/threads.c1492
-rw-r--r--src/vlib/threads.h470
-rw-r--r--src/vlib/threads_cli.c579
-rw-r--r--src/vlib/trace.c545
-rw-r--r--src/vlib/trace.h100
-rw-r--r--src/vlib/trace_funcs.h185
-rw-r--r--src/vlib/unix/cj.c271
-rw-r--r--src/vlib/unix/cj.h79
-rw-r--r--src/vlib/unix/cli.c2989
-rw-r--r--src/vlib/unix/dir.dox28
-rw-r--r--src/vlib/unix/input.c265
-rw-r--r--src/vlib/unix/main.c557
-rw-r--r--src/vlib/unix/mc_socket.c1049
-rw-r--r--src/vlib/unix/mc_socket.h137
-rw-r--r--src/vlib/unix/physmem.c470
-rw-r--r--src/vlib/unix/physmem.h65
-rw-r--r--src/vlib/unix/plugin.c260
-rw-r--r--src/vlib/unix/plugin.h98
-rw-r--r--src/vlib/unix/unix.h232
-rw-r--r--src/vlib/unix/util.c231
-rw-r--r--src/vlib/vlib.h86
-rw-r--r--src/vlib/vlib_process_doc.h147
63 files changed, 29819 insertions, 0 deletions
diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c
new file mode 100644
index 00000000000..4bf6d125b21
--- /dev/null
+++ b/src/vlib/buffer.c
@@ -0,0 +1,1987 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer.c: allocate/free network buffers.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @cond (!DPDK)
+ * @file
+ *
+ * Allocate/free network buffers.
+ */
+
+#if DPDK > 0
+#include <rte_config.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_version.h>
+#endif
+
+#include <vlib/vlib.h>
+
+#if DPDK > 0
+#pragma weak rte_mem_virt2phy
+#pragma weak rte_eal_has_hugepages
+#pragma weak rte_socket_id
+#pragma weak rte_pktmbuf_pool_create
+#endif
+
+uword
+vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm,
+ vlib_buffer_t * b_first)
+{
+ vlib_buffer_t *b = b_first;
+ uword l_first = b_first->current_length;
+ uword l = 0;
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ l += b->current_length;
+ }
+ b_first->total_length_not_including_first_buffer = l;
+ b_first->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ return l + l_first;
+}
+
+u8 *
+format_vlib_buffer (u8 * s, va_list * args)
+{
+ vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
+#if DPDK > 0
+ uword indent = format_get_indent (s);
+
+ s = format (s, "current data %d, length %d, free-list %d",
+ b->current_data, b->current_length, b->free_list_index);
+
+ if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID)
+ s = format (s, ", totlen-nifb %d",
+ b->total_length_not_including_first_buffer);
+
+ if (b->flags & VLIB_BUFFER_IS_TRACED)
+ s = format (s, ", trace 0x%x", b->trace_index);
+
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ vlib_main_t *vm = vlib_get_main ();
+ u32 next_buffer = b->next_buffer;
+ b = vlib_get_buffer (vm, next_buffer);
+
+ s = format (s, "\n%Unext-buffer 0x%x, segment length %d",
+ format_white_space, indent, next_buffer, b->current_length);
+ }
+
+#else
+
+ s = format (s, "current data %d, length %d, free-list %d",
+ b->current_data, b->current_length, b->free_list_index);
+
+ if (b->flags & VLIB_BUFFER_IS_TRACED)
+ s = format (s, ", trace 0x%x", b->trace_index);
+
+ if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ s = format (s, ", next-buffer 0x%x", b->next_buffer);
+#endif
+
+ return s;
+}
+
+u8 *
+format_vlib_buffer_and_data (u8 * s, va_list * args)
+{
+ vlib_buffer_t *b = va_arg (*args, vlib_buffer_t *);
+
+ s = format (s, "%U, %U",
+ format_vlib_buffer, b,
+ format_hex_bytes, vlib_buffer_get_current (b), 64);
+
+ return s;
+}
+
+#if DPDK == 0
+static u8 *
+format_vlib_buffer_known_state (u8 * s, va_list * args)
+{
+ vlib_buffer_known_state_t state = va_arg (*args, vlib_buffer_known_state_t);
+ char *t;
+
+ switch (state)
+ {
+ case VLIB_BUFFER_UNKNOWN:
+ t = "unknown";
+ break;
+
+ case VLIB_BUFFER_KNOWN_ALLOCATED:
+ t = "known-allocated";
+ break;
+
+ case VLIB_BUFFER_KNOWN_FREE:
+ t = "known-free";
+ break;
+
+ default:
+ t = "invalid";
+ break;
+ }
+
+ return format (s, "%s", t);
+}
+#endif
+
+u8 *
+format_vlib_buffer_contents (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ vlib_buffer_t *b = va_arg (*va, vlib_buffer_t *);
+
+ while (1)
+ {
+ vec_add (s, vlib_buffer_get_current (b), b->current_length);
+ if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ }
+
+ return s;
+}
+
+#if DPDK == 0
+static u8 *
+vlib_validate_buffer_helper (vlib_main_t * vm,
+ u32 bi,
+ uword follow_buffer_next, uword ** unique_hash)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *fl;
+
+ if (pool_is_free_index (bm->buffer_free_list_pool, b->free_list_index))
+ return format (0, "unknown free list 0x%x", b->free_list_index);
+
+ fl = pool_elt_at_index (bm->buffer_free_list_pool, b->free_list_index);
+
+ if ((signed) b->current_data < (signed) -VLIB_BUFFER_PRE_DATA_SIZE)
+ return format (0, "current data %d before pre-data", b->current_data);
+#if DPDK == 0
+ if (b->current_data + b->current_length > fl->n_data_bytes)
+ return format (0, "%d-%d beyond end of buffer %d",
+ b->current_data, b->current_length, fl->n_data_bytes);
+#endif
+
+ if (follow_buffer_next && (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ vlib_buffer_known_state_t k;
+ u8 *msg, *result;
+
+ k = vlib_buffer_is_known (vm, b->next_buffer);
+ if (k != VLIB_BUFFER_KNOWN_ALLOCATED)
+ return format (0, "next 0x%x: %U",
+ b->next_buffer, format_vlib_buffer_known_state, k);
+
+ if (unique_hash)
+ {
+ if (hash_get (*unique_hash, b->next_buffer))
+ return format (0, "duplicate buffer 0x%x", b->next_buffer);
+
+ hash_set1 (*unique_hash, b->next_buffer);
+ }
+
+ msg = vlib_validate_buffer (vm, b->next_buffer, follow_buffer_next);
+ if (msg)
+ {
+ result = format (0, "next 0x%x: %v", b->next_buffer, msg);
+ vec_free (msg);
+ return result;
+ }
+ }
+
+ return 0;
+}
+
+u8 *
+vlib_validate_buffer (vlib_main_t * vm, u32 bi, uword follow_buffer_next)
+{
+ return vlib_validate_buffer_helper (vm, bi, follow_buffer_next,
+ /* unique_hash */ 0);
+}
+
+u8 *
+vlib_validate_buffers (vlib_main_t * vm,
+ u32 * buffers,
+ uword next_buffer_stride,
+ uword n_buffers,
+ vlib_buffer_known_state_t known_state,
+ uword follow_buffer_next)
+{
+ uword i, *hash;
+ u32 bi, *b = buffers;
+ vlib_buffer_known_state_t k;
+ u8 *msg = 0, *result = 0;
+
+ hash = hash_create (0, 0);
+ for (i = 0; i < n_buffers; i++)
+ {
+ bi = b[0];
+ b += next_buffer_stride;
+
+ /* Buffer is not unique. */
+ if (hash_get (hash, bi))
+ {
+ msg = format (0, "not unique");
+ goto done;
+ }
+
+ k = vlib_buffer_is_known (vm, bi);
+ if (k != known_state)
+ {
+ msg = format (0, "is %U; expected %U",
+ format_vlib_buffer_known_state, k,
+ format_vlib_buffer_known_state, known_state);
+ goto done;
+ }
+
+ msg = vlib_validate_buffer_helper (vm, bi, follow_buffer_next, &hash);
+ if (msg)
+ goto done;
+
+ hash_set1 (hash, bi);
+ }
+
+done:
+ if (msg)
+ {
+ result = format (0, "0x%x: %v", bi, msg);
+ vec_free (msg);
+ }
+ hash_free (hash);
+ return result;
+}
+#endif
+
+vlib_main_t **vlib_mains;
+
+#if DPDK == 0
+/* When dubugging validate that given buffers are either known allocated
+ or known free. */
+static void
+vlib_buffer_validate_alloc_free (vlib_main_t * vm,
+ u32 * buffers,
+ uword n_buffers,
+ vlib_buffer_known_state_t expected_state)
+{
+ u32 *b;
+ uword i, bi, is_free;
+
+ if (CLIB_DEBUG == 0)
+ return;
+
+ ASSERT (os_get_cpu_number () == 0);
+
+ /* smp disaster check */
+ if (vlib_mains)
+ ASSERT (vm == vlib_mains[0]);
+
+ is_free = expected_state == VLIB_BUFFER_KNOWN_ALLOCATED;
+ b = buffers;
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_known_state_t known;
+
+ bi = b[0];
+ b += 1;
+ known = vlib_buffer_is_known (vm, bi);
+ if (known != expected_state)
+ {
+ ASSERT (0);
+ vlib_panic_with_msg
+ (vm, "%s %U buffer 0x%x",
+ is_free ? "freeing" : "allocating",
+ format_vlib_buffer_known_state, known, bi);
+ }
+
+ vlib_buffer_set_known_state
+ (vm, bi,
+ is_free ? VLIB_BUFFER_KNOWN_FREE : VLIB_BUFFER_KNOWN_ALLOCATED);
+ }
+}
+#endif
+
+#define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32))
+
+/* Make sure we have at least given number of unaligned buffers. */
+static void
+fill_unaligned (vlib_main_t * vm,
+ vlib_buffer_free_list_t * free_list,
+ uword n_unaligned_buffers)
+{
+ word la = vec_len (free_list->aligned_buffers);
+ word lu = vec_len (free_list->unaligned_buffers);
+
+ /* Aligned come in aligned copy-sized chunks. */
+ ASSERT (la % BUFFERS_PER_COPY == 0);
+
+ ASSERT (la >= n_unaligned_buffers);
+
+ while (lu < n_unaligned_buffers)
+ {
+ /* Copy 4 buffers from end of aligned vector to unaligned vector. */
+ vec_add (free_list->unaligned_buffers,
+ free_list->aligned_buffers + la - BUFFERS_PER_COPY,
+ BUFFERS_PER_COPY);
+ la -= BUFFERS_PER_COPY;
+ lu += BUFFERS_PER_COPY;
+ }
+ _vec_len (free_list->aligned_buffers) = la;
+}
+
+/* After free aligned buffers may not contain even sized chunks. */
+static void
+trim_aligned (vlib_buffer_free_list_t * f)
+{
+ uword l, n_trim;
+
+ /* Add unaligned to aligned before trim. */
+ l = vec_len (f->unaligned_buffers);
+ if (l > 0)
+ {
+ vec_add_aligned (f->aligned_buffers, f->unaligned_buffers, l,
+ /* align */ sizeof (vlib_copy_unit_t));
+
+ _vec_len (f->unaligned_buffers) = 0;
+ }
+
+ /* Remove unaligned buffers from end of aligned vector and save for next trim. */
+ l = vec_len (f->aligned_buffers);
+ n_trim = l % BUFFERS_PER_COPY;
+ if (n_trim)
+ {
+ /* Trim aligned -> unaligned. */
+ vec_add (f->unaligned_buffers, f->aligned_buffers + l - n_trim, n_trim);
+
+ /* Remove from aligned. */
+ _vec_len (f->aligned_buffers) = l - n_trim;
+ }
+}
+
+static void
+merge_free_lists (vlib_buffer_free_list_t * dst,
+ vlib_buffer_free_list_t * src)
+{
+ uword l;
+ u32 *d;
+
+ trim_aligned (src);
+ trim_aligned (dst);
+
+ l = vec_len (src->aligned_buffers);
+ if (l > 0)
+ {
+ vec_add2_aligned (dst->aligned_buffers, d, l,
+ /* align */ sizeof (vlib_copy_unit_t));
+ clib_memcpy (d, src->aligned_buffers, l * sizeof (d[0]));
+ vec_free (src->aligned_buffers);
+ }
+
+ l = vec_len (src->unaligned_buffers);
+ if (l > 0)
+ {
+ vec_add (dst->unaligned_buffers, src->unaligned_buffers, l);
+ vec_free (src->unaligned_buffers);
+ }
+}
+
+always_inline u32
+vlib_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+
+ size = vlib_buffer_round_size (size);
+ uword *p = hash_get (bm->free_list_by_size, size);
+ return p ? p[0] : ~0;
+}
+
+/* Add buffer free list. */
+static u32
+vlib_buffer_create_free_list_helper (vlib_main_t * vm,
+ u32 n_data_bytes,
+ u32 is_public, u32 is_default, u8 * name)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *f;
+#if DPDK > 0
+ int i;
+
+ ASSERT (os_get_cpu_number () == 0);
+
+ if (!is_default && pool_elts (bm->buffer_free_list_pool) == 0)
+ {
+ u32 default_free_free_list_index;
+
+ /* *INDENT-OFF* */
+ default_free_free_list_index =
+ vlib_buffer_create_free_list_helper
+ (vm,
+ /* default buffer size */ VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
+ /* is_public */ 1,
+ /* is_default */ 1,
+ (u8 *) "default");
+ /* *INDENT-ON* */
+ ASSERT (default_free_free_list_index ==
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ if (n_data_bytes == VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES && is_public)
+ return default_free_free_list_index;
+ }
+
+ pool_get_aligned (bm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES);
+
+ memset (f, 0, sizeof (f[0]));
+ f->index = f - bm->buffer_free_list_pool;
+ f->n_data_bytes = vlib_buffer_round_size (n_data_bytes);
+ f->min_n_buffers_each_physmem_alloc = 16;
+ f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name);
+
+ /* Setup free buffer template. */
+ f->buffer_init_template.free_list_index = f->index;
+
+ if (is_public)
+ {
+ uword *p = hash_get (bm->free_list_by_size, f->n_data_bytes);
+ if (!p)
+ hash_set (bm->free_list_by_size, f->n_data_bytes, f->index);
+ }
+
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ vlib_buffer_main_t *wbm = vlib_mains[i]->buffer_main;
+ vlib_buffer_free_list_t *wf;
+ pool_get_aligned (wbm->buffer_free_list_pool,
+ wf, CLIB_CACHE_LINE_BYTES);
+ ASSERT (f - bm->buffer_free_list_pool ==
+ wf - wbm->buffer_free_list_pool);
+ wf[0] = f[0];
+ wf->aligned_buffers = 0;
+ wf->unaligned_buffers = 0;
+ wf->n_alloc = 0;
+ }
+#else
+
+ if (!is_default && pool_elts (bm->buffer_free_list_pool) == 0)
+ {
+ u32 default_free_free_list_index;
+
+ default_free_free_list_index = vlib_buffer_create_free_list_helper (vm,
+ /* default buffer size */
+ VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
+ /* is_public */
+ 1,
+ /* is_default */
+ 1,
+ (u8
+ *)
+ "default");
+ ASSERT (default_free_free_list_index ==
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ if (n_data_bytes == VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES && is_public)
+ return default_free_free_list_index;
+ }
+
+ pool_get_aligned (bm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES);
+
+ memset (f, 0, sizeof (f[0]));
+ f->index = f - bm->buffer_free_list_pool;
+ f->n_data_bytes = vlib_buffer_round_size (n_data_bytes);
+ f->min_n_buffers_each_physmem_alloc = 256;
+ f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name);
+
+ /* Setup free buffer template. */
+ f->buffer_init_template.free_list_index = f->index;
+
+ if (is_public)
+ {
+ uword *p = hash_get (bm->free_list_by_size, f->n_data_bytes);
+ if (!p)
+ hash_set (bm->free_list_by_size, f->n_data_bytes, f->index);
+ }
+#endif
+
+ return f->index;
+}
+
+u32
+vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
+ char *fmt, ...)
+{
+ va_list va;
+ u8 *name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ return vlib_buffer_create_free_list_helper (vm, n_data_bytes,
+ /* is_public */ 0,
+ /* is_default */ 0,
+ name);
+}
+
+u32
+vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
+ char *fmt, ...)
+{
+ u32 i = vlib_buffer_get_free_list_with_size (vm, n_data_bytes);
+
+ if (i == ~0)
+ {
+ va_list va;
+ u8 *name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ i = vlib_buffer_create_free_list_helper (vm, n_data_bytes,
+ /* is_public */ 1,
+ /* is_default */ 0,
+ name);
+ }
+
+ return i;
+}
+
+static void
+del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f)
+{
+ u32 i;
+#if DPDK > 0
+ struct rte_mbuf *mb;
+ vlib_buffer_t *b;
+
+ for (i = 0; i < vec_len (f->unaligned_buffers); i++)
+ {
+ b = vlib_get_buffer (vm, f->unaligned_buffers[i]);
+ mb = rte_mbuf_from_vlib_buffer (b);
+ ASSERT (rte_mbuf_refcnt_read (mb) == 1);
+ rte_pktmbuf_free (mb);
+ }
+ for (i = 0; i < vec_len (f->aligned_buffers); i++)
+ {
+ b = vlib_get_buffer (vm, f->aligned_buffers[i]);
+ mb = rte_mbuf_from_vlib_buffer (b);
+ ASSERT (rte_mbuf_refcnt_read (mb) == 1);
+ rte_pktmbuf_free (mb);
+ }
+ vec_free (f->name);
+#else
+
+ for (i = 0; i < vec_len (f->buffer_memory_allocated); i++)
+ vm->os_physmem_free (f->buffer_memory_allocated[i]);
+ vec_free (f->name);
+ vec_free (f->buffer_memory_allocated);
+#endif
+ vec_free (f->unaligned_buffers);
+ vec_free (f->aligned_buffers);
+}
+
+/* Add buffer free list. */
+void
+vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *f;
+ u32 merge_index;
+#if DPDK > 0
+ int i;
+
+ ASSERT (os_get_cpu_number () == 0);
+
+ f = vlib_buffer_get_free_list (vm, free_list_index);
+
+ merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes);
+ if (merge_index != ~0 && merge_index != free_list_index)
+ {
+ merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool,
+ merge_index), f);
+ }
+
+ del_free_list (vm, f);
+
+ /* Poison it. */
+ memset (f, 0xab, sizeof (f[0]));
+
+ pool_put (bm->buffer_free_list_pool, f);
+
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ bm = vlib_mains[i]->buffer_main;
+ f = vlib_buffer_get_free_list (vlib_mains[i], free_list_index);;
+ memset (f, 0xab, sizeof (f[0]));
+ pool_put (bm->buffer_free_list_pool, f);
+ }
+#else
+
+ f = vlib_buffer_get_free_list (vm, free_list_index);
+
+ ASSERT (vec_len (f->unaligned_buffers) + vec_len (f->aligned_buffers) ==
+ f->n_alloc);
+ merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes);
+ if (merge_index != ~0 && merge_index != free_list_index)
+ {
+ merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool,
+ merge_index), f);
+ }
+
+ del_free_list (vm, f);
+
+ /* Poison it. */
+ memset (f, 0xab, sizeof (f[0]));
+
+ pool_put (bm->buffer_free_list_pool, f);
+#endif
+}
+
+/* Make sure free list has at least given number of free buffers. */
+static uword
+fill_free_list (vlib_main_t * vm,
+ vlib_buffer_free_list_t * fl, uword min_free_buffers)
+{
+#if DPDK > 0
+ vlib_buffer_t *b;
+ int n, i;
+ u32 bi;
+ u32 n_remaining = 0, n_alloc = 0;
+ unsigned socket_id = rte_socket_id ? rte_socket_id () : 0;
+ struct rte_mempool *rmp = vm->buffer_main->pktmbuf_pools[socket_id];
+ struct rte_mbuf *mb;
+
+ /* Too early? */
+ if (PREDICT_FALSE (rmp == 0))
+ return 0;
+
+ trim_aligned (fl);
+
+ /* Already have enough free buffers on free list? */
+ n = min_free_buffers - vec_len (fl->aligned_buffers);
+ if (n <= 0)
+ return min_free_buffers;
+
+ /* Always allocate round number of buffers. */
+ n = round_pow2 (n, BUFFERS_PER_COPY);
+
+ /* Always allocate new buffers in reasonably large sized chunks. */
+ n = clib_max (n, fl->min_n_buffers_each_physmem_alloc);
+
+ vec_validate (vm->mbuf_alloc_list, n - 1);
+
+ if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0)
+ return 0;
+
+ _vec_len (vm->mbuf_alloc_list) = n;
+
+ for (i = 0; i < n; i++)
+ {
+ mb = vm->mbuf_alloc_list[i];
+
+ ASSERT (rte_mbuf_refcnt_read (mb) == 0);
+ rte_mbuf_refcnt_set (mb, 1);
+
+ b = vlib_buffer_from_rte_mbuf (mb);
+ bi = vlib_get_buffer_index (vm, b);
+
+ vec_add1_aligned (fl->aligned_buffers, bi, sizeof (vlib_copy_unit_t));
+ n_alloc++;
+ n_remaining--;
+
+ vlib_buffer_init_for_free_list (b, fl);
+
+ if (fl->buffer_init_function)
+ fl->buffer_init_function (vm, fl, &bi, 1);
+ }
+
+ fl->n_alloc += n;
+
+ return n;
+#else
+ vlib_buffer_t *buffers, *b;
+ int n, n_bytes, i;
+ u32 *bi;
+ u32 n_remaining, n_alloc, n_this_chunk;
+
+ trim_aligned (fl);
+
+ /* Already have enough free buffers on free list? */
+ n = min_free_buffers - vec_len (fl->aligned_buffers);
+ if (n <= 0)
+ return min_free_buffers;
+
+ /* Always allocate round number of buffers. */
+ n = round_pow2 (n, BUFFERS_PER_COPY);
+
+ /* Always allocate new buffers in reasonably large sized chunks. */
+ n = clib_max (n, fl->min_n_buffers_each_physmem_alloc);
+
+ n_remaining = n;
+ n_alloc = 0;
+ while (n_remaining > 0)
+ {
+ n_this_chunk = clib_min (n_remaining, 16);
+
+ n_bytes = n_this_chunk * (sizeof (b[0]) + fl->n_data_bytes);
+
+ /* drb: removed power-of-2 ASSERT */
+ buffers = vm->os_physmem_alloc_aligned (&vm->physmem_main,
+ n_bytes,
+ sizeof (vlib_buffer_t));
+ if (!buffers)
+ return n_alloc;
+
+ /* Record chunk as being allocated so we can free it later. */
+ vec_add1 (fl->buffer_memory_allocated, buffers);
+
+ fl->n_alloc += n_this_chunk;
+ n_alloc += n_this_chunk;
+ n_remaining -= n_this_chunk;
+
+ b = buffers;
+ vec_add2_aligned (fl->aligned_buffers, bi, n_this_chunk,
+ sizeof (vlib_copy_unit_t));
+ for (i = 0; i < n_this_chunk; i++)
+ {
+ bi[i] = vlib_get_buffer_index (vm, b);
+
+ if (CLIB_DEBUG > 0)
+ vlib_buffer_set_known_state (vm, bi[i], VLIB_BUFFER_KNOWN_FREE);
+ b = vlib_buffer_next_contiguous (b, fl->n_data_bytes);
+ }
+
+ memset (buffers, 0, n_bytes);
+
+ /* Initialize all new buffers. */
+ b = buffers;
+ for (i = 0; i < n_this_chunk; i++)
+ {
+ vlib_buffer_init_for_free_list (b, fl);
+ b = vlib_buffer_next_contiguous (b, fl->n_data_bytes);
+ }
+
+ if (fl->buffer_init_function)
+ fl->buffer_init_function (vm, fl, bi, n_this_chunk);
+ }
+ return n_alloc;
+#endif
+}
+
+always_inline uword
+copy_alignment (u32 * x)
+{
+ return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY;
+}
+
+static u32
+alloc_from_free_list (vlib_main_t * vm,
+ vlib_buffer_free_list_t * free_list,
+ u32 * alloc_buffers, u32 n_alloc_buffers)
+{
+ u32 *dst, *u_src;
+ uword u_len, n_left;
+ uword n_unaligned_start, n_unaligned_end, n_filled;
+
+#if DPDK == 0
+ ASSERT (os_get_cpu_number () == 0);
+
+#endif
+ n_left = n_alloc_buffers;
+ dst = alloc_buffers;
+ n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst))
+ & (BUFFERS_PER_COPY - 1));
+
+ n_filled = fill_free_list (vm, free_list, n_alloc_buffers);
+ if (n_filled == 0)
+ return 0;
+
+ n_left = n_filled < n_left ? n_filled : n_left;
+ n_alloc_buffers = n_left;
+
+ if (n_unaligned_start >= n_left)
+ {
+ n_unaligned_start = n_left;
+ n_unaligned_end = 0;
+ }
+ else
+ n_unaligned_end = copy_alignment (dst + n_alloc_buffers);
+
+ fill_unaligned (vm, free_list, n_unaligned_start + n_unaligned_end);
+
+ u_len = vec_len (free_list->unaligned_buffers);
+ u_src = free_list->unaligned_buffers + u_len - 1;
+
+ if (n_unaligned_start)
+ {
+ uword n_copy = n_unaligned_start;
+ if (n_copy > n_left)
+ n_copy = n_left;
+ n_left -= n_copy;
+
+ while (n_copy > 0)
+ {
+ *dst++ = *u_src--;
+ n_copy--;
+ u_len--;
+ }
+
+ /* Now dst should be aligned. */
+ if (n_left > 0)
+ ASSERT (pointer_to_uword (dst) % sizeof (vlib_copy_unit_t) == 0);
+ }
+
+ /* Aligned copy. */
+ {
+ vlib_copy_unit_t *d, *s;
+ uword n_copy;
+
+ if (vec_len (free_list->aligned_buffers) <
+ ((n_left / BUFFERS_PER_COPY) * BUFFERS_PER_COPY))
+ abort ();
+
+ n_copy = n_left / BUFFERS_PER_COPY;
+ n_left = n_left % BUFFERS_PER_COPY;
+
+ /* Remove buffers from aligned free list. */
+ _vec_len (free_list->aligned_buffers) -= n_copy * BUFFERS_PER_COPY;
+
+ s = (vlib_copy_unit_t *) vec_end (free_list->aligned_buffers);
+ d = (vlib_copy_unit_t *) dst;
+
+ /* Fast path loop. */
+ while (n_copy >= 4)
+ {
+ d[0] = s[0];
+ d[1] = s[1];
+ d[2] = s[2];
+ d[3] = s[3];
+ n_copy -= 4;
+ s += 4;
+ d += 4;
+ }
+
+ while (n_copy >= 1)
+ {
+ d[0] = s[0];
+ n_copy -= 1;
+ s += 1;
+ d += 1;
+ }
+
+ dst = (void *) d;
+ }
+
+ /* Unaligned copy. */
+ ASSERT (n_unaligned_end == n_left);
+ while (n_left > 0)
+ {
+ *dst++ = *u_src--;
+ n_left--;
+ u_len--;
+ }
+
+ if (!free_list->unaligned_buffers)
+ ASSERT (u_len == 0);
+ else
+ _vec_len (free_list->unaligned_buffers) = u_len;
+
+#if DPDK == 0
+ /* Verify that buffers are known free. */
+ vlib_buffer_validate_alloc_free (vm, alloc_buffers,
+ n_alloc_buffers, VLIB_BUFFER_KNOWN_FREE);
+#endif
+
+ return n_alloc_buffers;
+}
+
+/* Allocate a given number of buffers into given array.
+ Returns number actually allocated which will be either zero or
+ number requested. */
+u32
+vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+#if DPDK == 0
+ ASSERT (os_get_cpu_number () == 0);
+#endif
+
+ return alloc_from_free_list
+ (vm,
+ pool_elt_at_index (bm->buffer_free_list_pool,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX),
+ buffers, n_buffers);
+}
+
+u32
+vlib_buffer_alloc_from_free_list (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers, u32 free_list_index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *f;
+ f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index);
+ return alloc_from_free_list (vm, f, buffers, n_buffers);
+}
+
+always_inline void
+add_buffer_to_free_list (vlib_main_t * vm,
+ vlib_buffer_free_list_t * f,
+ u32 buffer_index, u8 do_init)
+{
+ vlib_buffer_t *b;
+ b = vlib_get_buffer (vm, buffer_index);
+ if (PREDICT_TRUE (do_init))
+ vlib_buffer_init_for_free_list (b, f);
+ vec_add1_aligned (f->aligned_buffers, buffer_index,
+ sizeof (vlib_copy_unit_t));
+}
+
+always_inline vlib_buffer_free_list_t *
+buffer_get_free_list (vlib_main_t * vm, vlib_buffer_t * b, u32 * index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ u32 i;
+
+ *index = i = b->free_list_index;
+ return pool_elt_at_index (bm->buffer_free_list_pool, i);
+}
+
+void *
+vlib_set_buffer_free_callback (vlib_main_t * vm, void *fp)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ void *rv = bm->buffer_free_callback;
+
+ bm->buffer_free_callback = fp;
+ return rv;
+}
+
+#if DPDK == 0
+void vnet_buffer_free_dpdk_mb (vlib_buffer_t * b) __attribute__ ((weak));
+void
+vnet_buffer_free_dpdk_mb (vlib_buffer_t * b)
+{
+}
+
+#endif
+static_always_inline void
+vlib_buffer_free_inline (vlib_main_t * vm,
+ u32 * buffers, u32 n_buffers, u32 follow_buffer_next)
+{
+#if DPDK > 0
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *fl;
+ u32 fi;
+ int i;
+ u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
+ u32 follow_buffer_next);
+
+ cb = bm->buffer_free_callback;
+
+ if (PREDICT_FALSE (cb != 0))
+ n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next);
+
+ if (!n_buffers)
+ return;
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t *b;
+ struct rte_mbuf *mb;
+
+ b = vlib_get_buffer (vm, buffers[i]);
+
+ fl = buffer_get_free_list (vm, b, &fi);
+
+ /* The only current use of this callback: multicast recycle */
+ if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0))
+ {
+ int j;
+
+ add_buffer_to_free_list
+ (vm, fl, buffers[i], (b->flags & VLIB_BUFFER_RECYCLE) == 0);
+
+ for (j = 0; j < vec_len (bm->announce_list); j++)
+ {
+ if (fl == bm->announce_list[j])
+ goto already_announced;
+ }
+ vec_add1 (bm->announce_list, fl);
+ already_announced:
+ ;
+ }
+ else
+ {
+ if (PREDICT_TRUE ((b->flags & VLIB_BUFFER_RECYCLE) == 0))
+ {
+ mb = rte_mbuf_from_vlib_buffer (b);
+ ASSERT (rte_mbuf_refcnt_read (mb) == 1);
+ rte_pktmbuf_free (mb);
+ }
+ }
+ }
+ if (vec_len (bm->announce_list))
+ {
+ vlib_buffer_free_list_t *fl;
+ for (i = 0; i < vec_len (bm->announce_list); i++)
+ {
+ fl = bm->announce_list[i];
+ fl->buffers_added_to_freelist_function (vm, fl);
+ }
+ _vec_len (bm->announce_list) = 0;
+ }
+#else
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *fl;
+ static u32 *next_to_free[2]; /* smp bad */
+ u32 i_next_to_free, *b, *n, *f, fi;
+ uword n_left;
+ int i;
+ static vlib_buffer_free_list_t **announce_list;
+ vlib_buffer_free_list_t *fl0 = 0, *fl1 = 0;
+ u32 bi0 = (u32) ~ 0, bi1 = (u32) ~ 0, fi0, fi1 = (u32) ~ 0;
+ u8 free0, free1 = 0, free_next0, free_next1;
+ u32 (*cb) (vlib_main_t * vm, u32 * buffers, u32 n_buffers,
+ u32 follow_buffer_next);
+
+ ASSERT (os_get_cpu_number () == 0);
+
+ cb = bm->buffer_free_callback;
+
+ if (PREDICT_FALSE (cb != 0))
+ n_buffers = (*cb) (vm, buffers, n_buffers, follow_buffer_next);
+
+ if (!n_buffers)
+ return;
+
+ /* Use first buffer to get default free list. */
+ {
+ u32 bi0 = buffers[0];
+ vlib_buffer_t *b0;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ fl = buffer_get_free_list (vm, b0, &fi);
+ if (fl->buffers_added_to_freelist_function)
+ vec_add1 (announce_list, fl);
+ }
+
+ vec_validate (next_to_free[0], n_buffers - 1);
+ vec_validate (next_to_free[1], n_buffers - 1);
+
+ i_next_to_free = 0;
+ n_left = n_buffers;
+ b = buffers;
+
+again:
+ /* Verify that buffers are known allocated. */
+ vlib_buffer_validate_alloc_free (vm, b,
+ n_left, VLIB_BUFFER_KNOWN_ALLOCATED);
+
+ vec_add2_aligned (fl->aligned_buffers, f, n_left,
+ /* align */ sizeof (vlib_copy_unit_t));
+
+ n = next_to_free[i_next_to_free];
+ while (n_left >= 4)
+ {
+ vlib_buffer_t *b0, *b1, *binit0, *binit1, dummy_buffers[2];
+
+ bi0 = b[0];
+ bi1 = b[1];
+
+ f[0] = bi0;
+ f[1] = bi1;
+ f += 2;
+ b += 2;
+ n_left -= 2;
+
+ /* Prefetch buffers for next iteration. */
+ vlib_prefetch_buffer_with_index (vm, b[0], WRITE);
+ vlib_prefetch_buffer_with_index (vm, b[1], WRITE);
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ free0 = (b0->flags & VLIB_BUFFER_RECYCLE) == 0;
+ free1 = (b1->flags & VLIB_BUFFER_RECYCLE) == 0;
+
+ /* Must be before init which will over-write buffer flags. */
+ if (follow_buffer_next)
+ {
+ n[0] = b0->next_buffer;
+ free_next0 = free0 && (b0->flags & VLIB_BUFFER_NEXT_PRESENT) != 0;
+ n += free_next0;
+
+ n[0] = b1->next_buffer;
+ free_next1 = free1 && (b1->flags & VLIB_BUFFER_NEXT_PRESENT) != 0;
+ n += free_next1;
+ }
+ else
+ free_next0 = free_next1 = 0;
+
+ /* Must be before init which will over-write buffer free list. */
+ fi0 = b0->free_list_index;
+ fi1 = b1->free_list_index;
+
+ if (PREDICT_FALSE (fi0 != fi || fi1 != fi))
+ goto slow_path_x2;
+
+ binit0 = free0 ? b0 : &dummy_buffers[0];
+ binit1 = free1 ? b1 : &dummy_buffers[1];
+
+ vlib_buffer_init_two_for_free_list (binit0, binit1, fl);
+ continue;
+
+ slow_path_x2:
+ /* Backup speculation. */
+ f -= 2;
+ n -= free_next0 + free_next1;
+
+ _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers;
+
+ fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0);
+ fl1 = pool_elt_at_index (bm->buffer_free_list_pool, fi1);
+
+ add_buffer_to_free_list (vm, fl0, bi0, free0);
+ if (PREDICT_FALSE (fl0->buffers_added_to_freelist_function != 0))
+ {
+ int i;
+ for (i = 0; i < vec_len (announce_list); i++)
+ if (fl0 == announce_list[i])
+ goto no_fl0;
+ vec_add1 (announce_list, fl0);
+ }
+ no_fl0:
+ if (PREDICT_FALSE (fl1->buffers_added_to_freelist_function != 0))
+ {
+ int i;
+ for (i = 0; i < vec_len (announce_list); i++)
+ if (fl1 == announce_list[i])
+ goto no_fl1;
+ vec_add1 (announce_list, fl1);
+ }
+
+ no_fl1:
+ add_buffer_to_free_list (vm, fl1, bi1, free1);
+
+ /* Possibly change current free list. */
+ if (fi0 != fi && fi1 != fi)
+ {
+ fi = fi1;
+ fl = pool_elt_at_index (bm->buffer_free_list_pool, fi);
+ }
+
+ vec_add2_aligned (fl->aligned_buffers, f, n_left,
+ /* align */ sizeof (vlib_copy_unit_t));
+ }
+
+ while (n_left >= 1)
+ {
+ vlib_buffer_t *b0, *binit0, dummy_buffers[1];
+
+ bi0 = b[0];
+ f[0] = bi0;
+ f += 1;
+ b += 1;
+ n_left -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ free0 = (b0->flags & VLIB_BUFFER_RECYCLE) == 0;
+
+ /* Must be before init which will over-write buffer flags. */
+ if (follow_buffer_next)
+ {
+ n[0] = b0->next_buffer;
+ free_next0 = free0 && (b0->flags & VLIB_BUFFER_NEXT_PRESENT) != 0;
+ n += free_next0;
+ }
+ else
+ free_next0 = 0;
+
+ /* Must be before init which will over-write buffer free list. */
+ fi0 = b0->free_list_index;
+
+ if (PREDICT_FALSE (fi0 != fi))
+ goto slow_path_x1;
+
+ binit0 = free0 ? b0 : &dummy_buffers[0];
+
+ vlib_buffer_init_for_free_list (binit0, fl);
+ continue;
+
+ slow_path_x1:
+ /* Backup speculation. */
+ f -= 1;
+ n -= free_next0;
+
+ _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers;
+
+ fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0);
+
+ add_buffer_to_free_list (vm, fl0, bi0, free0);
+ if (PREDICT_FALSE (fl0->buffers_added_to_freelist_function != 0))
+ {
+ int i;
+ for (i = 0; i < vec_len (announce_list); i++)
+ if (fl0 == announce_list[i])
+ goto no_fl00;
+ vec_add1 (announce_list, fl0);
+ }
+
+ no_fl00:
+ fi = fi0;
+ fl = pool_elt_at_index (bm->buffer_free_list_pool, fi);
+
+ vec_add2_aligned (fl->aligned_buffers, f, n_left,
+ /* align */ sizeof (vlib_copy_unit_t));
+ }
+
+ if (follow_buffer_next && ((n_left = n - next_to_free[i_next_to_free]) > 0))
+ {
+ b = next_to_free[i_next_to_free];
+ i_next_to_free ^= 1;
+ goto again;
+ }
+
+ _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers;
+
+ if (vec_len (announce_list))
+ {
+ vlib_buffer_free_list_t *fl;
+ for (i = 0; i < vec_len (announce_list); i++)
+ {
+ fl = announce_list[i];
+ fl->buffers_added_to_freelist_function (vm, fl);
+ }
+ _vec_len (announce_list) = 0;
+ }
+#endif
+}
+
+void
+vlib_buffer_free (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+ vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */
+ 1);
+}
+
+void
+vlib_buffer_free_no_next (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+ vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */
+ 0);
+}
+
+#if DPDK == 0
+/* Copy template packet data into buffers as they are allocated. */
+static void
+vlib_packet_template_buffer_init (vlib_main_t * vm,
+ vlib_buffer_free_list_t * fl,
+ u32 * buffers, u32 n_buffers)
+{
+ vlib_packet_template_t *t =
+ uword_to_pointer (fl->buffer_init_function_opaque,
+ vlib_packet_template_t *);
+ uword i;
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffers[i]);
+ ASSERT (b->current_length == vec_len (t->packet_data));
+ clib_memcpy (vlib_buffer_get_current (b), t->packet_data,
+ b->current_length);
+ }
+}
+#endif
+
+void
+vlib_packet_template_init (vlib_main_t * vm,
+ vlib_packet_template_t * t,
+ void *packet_data,
+ uword n_packet_data_bytes,
+ uword min_n_buffers_each_physmem_alloc,
+ char *fmt, ...)
+{
+#if DPDK > 0
+ va_list va;
+ __attribute__ ((unused)) u8 *name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ vlib_worker_thread_barrier_sync (vm);
+ memset (t, 0, sizeof (t[0]));
+
+ vec_add (t->packet_data, packet_data, n_packet_data_bytes);
+
+ vlib_worker_thread_barrier_release (vm);
+#else
+ vlib_buffer_free_list_t *fl;
+ va_list va;
+ u8 *name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ memset (t, 0, sizeof (t[0]));
+
+ vec_add (t->packet_data, packet_data, n_packet_data_bytes);
+ t->min_n_buffers_each_physmem_alloc = min_n_buffers_each_physmem_alloc;
+
+ t->free_list_index = vlib_buffer_create_free_list_helper
+ (vm, n_packet_data_bytes,
+ /* is_public */ 1,
+ /* is_default */ 0,
+ name);
+
+ ASSERT (t->free_list_index != 0);
+ fl = vlib_buffer_get_free_list (vm, t->free_list_index);
+ fl->min_n_buffers_each_physmem_alloc = t->min_n_buffers_each_physmem_alloc;
+
+ fl->buffer_init_function = vlib_packet_template_buffer_init;
+ fl->buffer_init_function_opaque = pointer_to_uword (t);
+
+ fl->buffer_init_template.current_data = 0;
+ fl->buffer_init_template.current_length = n_packet_data_bytes;
+ fl->buffer_init_template.flags = 0;
+#endif
+}
+
+void *
+vlib_packet_template_get_packet (vlib_main_t * vm,
+ vlib_packet_template_t * t, u32 * bi_result)
+{
+ u32 bi;
+ vlib_buffer_t *b;
+
+ if (vlib_buffer_alloc (vm, &bi, 1) != 1)
+ return 0;
+
+ *bi_result = bi;
+
+ b = vlib_get_buffer (vm, bi);
+ clib_memcpy (vlib_buffer_get_current (b),
+ t->packet_data, vec_len (t->packet_data));
+ b->current_length = vec_len (t->packet_data);
+
+ return b->data;
+}
+
+#if DPDK == 0
+void
+vlib_packet_template_get_packet_helper (vlib_main_t * vm,
+ vlib_packet_template_t * t)
+{
+ word n = t->min_n_buffers_each_physmem_alloc;
+ word l = vec_len (t->packet_data);
+ word n_alloc;
+
+ ASSERT (l > 0);
+ ASSERT (vec_len (t->free_buffers) == 0);
+
+ vec_validate (t->free_buffers, n - 1);
+ n_alloc = vlib_buffer_alloc_from_free_list (vm, t->free_buffers,
+ n, t->free_list_index);
+ _vec_len (t->free_buffers) = n_alloc;
+}
+
+#endif
+/* Append given data to end of buffer, possibly allocating new buffers. */
+u32
+vlib_buffer_add_data (vlib_main_t * vm,
+ u32 free_list_index,
+ u32 buffer_index, void *data, u32 n_data_bytes)
+{
+ u32 n_buffer_bytes, n_left, n_left_this_buffer, bi;
+ vlib_buffer_t *b;
+ void *d;
+
+ bi = buffer_index;
+ if (bi == 0
+ && 1 != vlib_buffer_alloc_from_free_list (vm, &bi, 1, free_list_index))
+ goto out_of_buffers;
+
+ d = data;
+ n_left = n_data_bytes;
+ n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, free_list_index);
+
+ b = vlib_get_buffer (vm, bi);
+ b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ /* Get to the end of the chain before we try to append data... */
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ b = vlib_get_buffer (vm, b->next_buffer);
+
+ while (1)
+ {
+ u32 n;
+
+ ASSERT (n_buffer_bytes >= b->current_length);
+ n_left_this_buffer =
+ n_buffer_bytes - (b->current_data + b->current_length);
+ n = clib_min (n_left_this_buffer, n_left);
+ clib_memcpy (vlib_buffer_get_current (b) + b->current_length, d, n);
+ b->current_length += n;
+ n_left -= n;
+ if (n_left == 0)
+ break;
+
+ d += n;
+ if (1 !=
+ vlib_buffer_alloc_from_free_list (vm, &b->next_buffer, 1,
+ free_list_index))
+ goto out_of_buffers;
+
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+
+ b = vlib_get_buffer (vm, b->next_buffer);
+ }
+
+ return bi;
+
+out_of_buffers:
+ clib_error ("out of buffers");
+ return bi;
+}
+
+u16
+vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm,
+ u32 free_list_index,
+ vlib_buffer_t * first,
+ vlib_buffer_t ** last,
+ void *data, u16 data_len)
+{
+ vlib_buffer_t *l = *last;
+ u32 n_buffer_bytes =
+ vlib_buffer_free_list_buffer_size (vm, free_list_index);
+ u16 copied = 0;
+ ASSERT (n_buffer_bytes >= l->current_length + l->current_data);
+ while (data_len)
+ {
+ u16 max = n_buffer_bytes - l->current_length - l->current_data;
+ if (max == 0)
+ {
+ if (1 !=
+ vlib_buffer_alloc_from_free_list (vm, &l->next_buffer, 1,
+ free_list_index))
+ return copied;
+ *last = l = vlib_buffer_chain_buffer (vm, first, l, l->next_buffer);
+ max = n_buffer_bytes - l->current_length - l->current_data;
+ }
+
+ u16 len = (data_len > max) ? max : data_len;
+ clib_memcpy (vlib_buffer_get_current (l) + l->current_length,
+ data + copied, len);
+ vlib_buffer_chain_increase_length (first, l, len);
+ data_len -= len;
+ copied += len;
+ }
+ return copied;
+}
+
+#if DPDK > 0
+clib_error_t *
+vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
+ unsigned socket_id)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_physmem_main_t *vpm = &vm->physmem_main;
+ struct rte_mempool *rmp;
+ int i;
+
+ if (!rte_pktmbuf_pool_create)
+ return clib_error_return (0, "not linked with DPDK");
+
+ vec_validate_aligned (bm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES);
+
+ /* pool already exists, nothing to do */
+ if (bm->pktmbuf_pools[socket_id])
+ return 0;
+
+ u8 *pool_name = format (0, "mbuf_pool_socket%u%c", socket_id, 0);
+
+ rmp = rte_pktmbuf_pool_create ((char *) pool_name, /* pool name */
+ num_mbufs, /* number of mbufs */
+ 512, /* cache size */
+ VLIB_BUFFER_HDR_SIZE, /* priv size */
+ VLIB_BUFFER_PRE_DATA_SIZE + VLIB_BUFFER_DATA_SIZE, /* dataroom size */
+ socket_id); /* cpu socket */
+
+ if (rmp)
+ {
+ {
+ uword this_pool_end;
+ uword this_pool_start;
+ uword this_pool_size;
+ uword save_vpm_start, save_vpm_end, save_vpm_size;
+ struct rte_mempool_memhdr *memhdr;
+
+ this_pool_start = ~0ULL;
+ this_pool_end = 0LL;
+
+ STAILQ_FOREACH (memhdr, &rmp->mem_list, next)
+ {
+ if (((uword) (memhdr->addr + memhdr->len)) > this_pool_end)
+ this_pool_end = (uword) (memhdr->addr + memhdr->len);
+ if (((uword) memhdr->addr) < this_pool_start)
+ this_pool_start = (uword) (memhdr->addr);
+ }
+ ASSERT (this_pool_start < ~0ULL && this_pool_end > 0);
+ this_pool_size = this_pool_end - this_pool_start;
+
+ if (CLIB_DEBUG > 1)
+ {
+ clib_warning ("%s: pool start %llx pool end %llx pool size %lld",
+ pool_name, this_pool_start, this_pool_end,
+ this_pool_size);
+ clib_warning
+ ("before: virtual.start %llx virtual.end %llx virtual.size %lld",
+ vpm->virtual.start, vpm->virtual.end, vpm->virtual.size);
+ }
+
+ save_vpm_start = vpm->virtual.start;
+ save_vpm_end = vpm->virtual.end;
+ save_vpm_size = vpm->virtual.size;
+
+ if ((this_pool_start < vpm->virtual.start) || vpm->virtual.start == 0)
+ vpm->virtual.start = this_pool_start;
+ if (this_pool_end > vpm->virtual.end)
+ vpm->virtual.end = this_pool_end;
+
+ vpm->virtual.size = vpm->virtual.end - vpm->virtual.start;
+
+ if (CLIB_DEBUG > 1)
+ {
+ clib_warning
+ ("after: virtual.start %llx virtual.end %llx virtual.size %lld",
+ vpm->virtual.start, vpm->virtual.end, vpm->virtual.size);
+ }
+
+ /* check if fits into buffer index range */
+ if ((u64) vpm->virtual.size >
+ ((u64) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES)))
+ {
+ clib_warning ("physmem: virtual size out of range!");
+ vpm->virtual.start = save_vpm_start;
+ vpm->virtual.end = save_vpm_end;
+ vpm->virtual.size = save_vpm_size;
+ rmp = 0;
+ }
+ }
+ if (rmp)
+ {
+ bm->pktmbuf_pools[socket_id] = rmp;
+ vec_free (pool_name);
+ return 0;
+ }
+ }
+
+ vec_free (pool_name);
+
+ /* no usable pool for this socket, try to use pool from another one */
+ for (i = 0; i < vec_len (bm->pktmbuf_pools); i++)
+ {
+ if (bm->pktmbuf_pools[i])
+ {
+ clib_warning
+ ("WARNING: Failed to allocate mempool for CPU socket %u. "
+ "Threads running on socket %u will use socket %u mempool.",
+ socket_id, socket_id, i);
+ bm->pktmbuf_pools[socket_id] = bm->pktmbuf_pools[i];
+ return 0;
+ }
+ }
+
+ return clib_error_return (0, "failed to allocate mempool on socket %u",
+ socket_id);
+}
+#endif
+
+static void
+vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s)
+{
+ vlib_main_t *vm;
+ vlib_serialize_buffer_main_t *sm;
+ uword n, n_bytes_to_write;
+ vlib_buffer_t *last;
+
+ n_bytes_to_write = s->current_buffer_index;
+ sm =
+ uword_to_pointer (s->data_function_opaque,
+ vlib_serialize_buffer_main_t *);
+ vm = sm->vlib_main;
+
+ ASSERT (sm->tx.max_n_data_bytes_per_chain > 0);
+ if (serialize_stream_is_end_of_stream (s)
+ || sm->tx.n_total_data_bytes + n_bytes_to_write >
+ sm->tx.max_n_data_bytes_per_chain)
+ {
+ vlib_process_t *p = vlib_get_current_process (vm);
+
+ last = vlib_get_buffer (vm, sm->last_buffer);
+ last->current_length = n_bytes_to_write;
+
+ vlib_set_next_frame_buffer (vm, &p->node_runtime, sm->tx.next_index,
+ sm->first_buffer);
+
+ sm->first_buffer = sm->last_buffer = ~0;
+ sm->tx.n_total_data_bytes = 0;
+ }
+
+ else if (n_bytes_to_write == 0 && s->n_buffer_bytes == 0)
+ {
+ ASSERT (sm->first_buffer == ~0);
+ ASSERT (sm->last_buffer == ~0);
+ n =
+ vlib_buffer_alloc_from_free_list (vm, &sm->first_buffer, 1,
+ sm->tx.free_list_index);
+ if (n != 1)
+ serialize_error (m,
+ clib_error_create
+ ("vlib_buffer_alloc_from_free_list fails"));
+ sm->last_buffer = sm->first_buffer;
+ s->n_buffer_bytes =
+ vlib_buffer_free_list_buffer_size (vm, sm->tx.free_list_index);
+ }
+
+ if (n_bytes_to_write > 0)
+ {
+ vlib_buffer_t *prev = vlib_get_buffer (vm, sm->last_buffer);
+ n =
+ vlib_buffer_alloc_from_free_list (vm, &sm->last_buffer, 1,
+ sm->tx.free_list_index);
+ if (n != 1)
+ serialize_error (m,
+ clib_error_create
+ ("vlib_buffer_alloc_from_free_list fails"));
+ sm->tx.n_total_data_bytes += n_bytes_to_write;
+ prev->current_length = n_bytes_to_write;
+ prev->next_buffer = sm->last_buffer;
+ prev->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ }
+
+ if (sm->last_buffer != ~0)
+ {
+ last = vlib_get_buffer (vm, sm->last_buffer);
+ s->buffer = vlib_buffer_get_current (last);
+ s->current_buffer_index = 0;
+ ASSERT (last->current_data == s->current_buffer_index);
+ }
+}
+
+static void
+vlib_serialize_rx (serialize_main_header_t * m, serialize_stream_t * s)
+{
+ vlib_main_t *vm;
+ vlib_serialize_buffer_main_t *sm;
+ vlib_buffer_t *last;
+
+ sm =
+ uword_to_pointer (s->data_function_opaque,
+ vlib_serialize_buffer_main_t *);
+ vm = sm->vlib_main;
+
+ if (serialize_stream_is_end_of_stream (s))
+ return;
+
+ if (sm->last_buffer != ~0)
+ {
+ last = vlib_get_buffer (vm, sm->last_buffer);
+
+ if (last->flags & VLIB_BUFFER_NEXT_PRESENT)
+ sm->last_buffer = last->next_buffer;
+ else
+ {
+ vlib_buffer_free (vm, &sm->first_buffer, /* count */ 1);
+ sm->first_buffer = sm->last_buffer = ~0;
+ }
+ }
+
+ if (sm->last_buffer == ~0)
+ {
+ while (clib_fifo_elts (sm->rx.buffer_fifo) == 0)
+ {
+ sm->rx.ready_one_time_event =
+ vlib_process_create_one_time_event (vm, vlib_current_process (vm),
+ ~0);
+ vlib_process_wait_for_one_time_event (vm, /* no event data */ 0,
+ sm->rx.ready_one_time_event);
+ }
+
+ clib_fifo_sub1 (sm->rx.buffer_fifo, sm->first_buffer);
+ sm->last_buffer = sm->first_buffer;
+ }
+
+ ASSERT (sm->last_buffer != ~0);
+
+ last = vlib_get_buffer (vm, sm->last_buffer);
+ s->current_buffer_index = 0;
+ s->buffer = vlib_buffer_get_current (last);
+ s->n_buffer_bytes = last->current_length;
+}
+
+static void
+serialize_open_vlib_helper (serialize_main_t * m,
+ vlib_main_t * vm,
+ vlib_serialize_buffer_main_t * sm, uword is_read)
+{
+ /* Initialize serialize main but save overflow buffer for re-use between calls. */
+ {
+ u8 *save = m->stream.overflow_buffer;
+ memset (m, 0, sizeof (m[0]));
+ m->stream.overflow_buffer = save;
+ if (save)
+ _vec_len (save) = 0;
+ }
+
+ sm->first_buffer = sm->last_buffer = ~0;
+ if (is_read)
+ clib_fifo_reset (sm->rx.buffer_fifo);
+ else
+ sm->tx.n_total_data_bytes = 0;
+ sm->vlib_main = vm;
+ m->header.data_function = is_read ? vlib_serialize_rx : vlib_serialize_tx;
+ m->stream.data_function_opaque = pointer_to_uword (sm);
+}
+
+void
+serialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm,
+ vlib_serialize_buffer_main_t * sm)
+{
+ serialize_open_vlib_helper (m, vm, sm, /* is_read */ 0);
+}
+
+void
+unserialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm,
+ vlib_serialize_buffer_main_t * sm)
+{
+ serialize_open_vlib_helper (m, vm, sm, /* is_read */ 1);
+}
+
+u32
+serialize_close_vlib_buffer (serialize_main_t * m)
+{
+ vlib_serialize_buffer_main_t *sm
+ = uword_to_pointer (m->stream.data_function_opaque,
+ vlib_serialize_buffer_main_t *);
+ vlib_buffer_t *last;
+ serialize_stream_t *s = &m->stream;
+
+ last = vlib_get_buffer (sm->vlib_main, sm->last_buffer);
+ last->current_length = s->current_buffer_index;
+
+ if (vec_len (s->overflow_buffer) > 0)
+ {
+ sm->last_buffer
+ = vlib_buffer_add_data (sm->vlib_main, sm->tx.free_list_index,
+ sm->last_buffer == ~0 ? 0 : sm->last_buffer,
+ s->overflow_buffer,
+ vec_len (s->overflow_buffer));
+ _vec_len (s->overflow_buffer) = 0;
+ }
+
+ return sm->first_buffer;
+}
+
+void
+unserialize_close_vlib_buffer (serialize_main_t * m)
+{
+ vlib_serialize_buffer_main_t *sm
+ = uword_to_pointer (m->stream.data_function_opaque,
+ vlib_serialize_buffer_main_t *);
+ if (sm->first_buffer != ~0)
+ vlib_buffer_free_one (sm->vlib_main, sm->first_buffer);
+ clib_fifo_reset (sm->rx.buffer_fifo);
+ if (m->stream.overflow_buffer)
+ _vec_len (m->stream.overflow_buffer) = 0;
+}
+
+static u8 *
+format_vlib_buffer_free_list (u8 * s, va_list * va)
+{
+ vlib_buffer_free_list_t *f = va_arg (*va, vlib_buffer_free_list_t *);
+#if DPDK > 0
+ u32 threadnum = va_arg (*va, u32);
+ uword bytes_alloc, bytes_free, n_free, size;
+
+ if (!f)
+ return format (s, "%=7s%=30s%=12s%=12s%=12s%=12s%=12s%=12s",
+ "Thread", "Name", "Index", "Size", "Alloc", "Free",
+ "#Alloc", "#Free");
+
+ size = sizeof (vlib_buffer_t) + f->n_data_bytes;
+ n_free = vec_len (f->aligned_buffers) + vec_len (f->unaligned_buffers);
+ bytes_alloc = size * f->n_alloc;
+ bytes_free = size * n_free;
+
+ s = format (s, "%7d%30s%12d%12d%=12U%=12U%=12d%=12d", threadnum,
+#else
+ uword bytes_alloc, bytes_free, n_free, size;
+
+ if (!f)
+ return format (s, "%=30s%=12s%=12s%=12s%=12s%=12s%=12s",
+ "Name", "Index", "Size", "Alloc", "Free", "#Alloc",
+ "#Free");
+
+ size = sizeof (vlib_buffer_t) + f->n_data_bytes;
+ n_free = vec_len (f->aligned_buffers) + vec_len (f->unaligned_buffers);
+ bytes_alloc = size * f->n_alloc;
+ bytes_free = size * n_free;
+
+ s = format (s, "%30s%12d%12d%=12U%=12U%=12d%=12d",
+#endif
+ f->name, f->index, f->n_data_bytes,
+ format_memory_size, bytes_alloc,
+ format_memory_size, bytes_free, f->n_alloc, n_free);
+
+ return s;
+}
+
+static clib_error_t *
+show_buffers (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+#if DPDK > 0
+ vlib_buffer_main_t *bm;
+ vlib_buffer_free_list_t *f;
+ vlib_main_t *curr_vm;
+ u32 vm_index = 0;
+
+ vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, 0, 0);
+
+ do
+ {
+ curr_vm = vec_len (vlib_mains) ? vlib_mains[vm_index] : vm;
+ bm = curr_vm->buffer_main;
+
+ /* *INDENT-OFF* */
+ pool_foreach (f, bm->buffer_free_list_pool, ({
+ vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, f, vm_index);
+ }));
+ /* *INDENT-ON* */
+
+ vm_index++;
+ }
+ while (vm_index < vec_len (vlib_mains));
+
+#else
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *f;
+
+ vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, 0);
+ /* *INDENT-OFF* */
+ pool_foreach (f, bm->buffer_free_list_pool, ({
+ vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, f);
+ }));
+/* *INDENT-ON* */
+
+#endif
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_buffers_command, static) = {
+ .path = "show buffers",
+ .short_help = "Show packet buffer allocation",
+ .function = show_buffers,
+};
+/* *INDENT-ON* */
+
+#if DPDK > 0
+#if CLIB_DEBUG > 0
+
+u32 *vlib_buffer_state_validation_lock;
+uword *vlib_buffer_state_validation_hash;
+void *vlib_buffer_state_heap;
+
+static clib_error_t *
+buffer_state_validation_init (vlib_main_t * vm)
+{
+ void *oldheap;
+
+ vlib_buffer_state_heap = mheap_alloc (0, 10 << 20);
+
+ oldheap = clib_mem_set_heap (vlib_buffer_state_heap);
+
+ vlib_buffer_state_validation_hash = hash_create (0, sizeof (uword));
+ vec_validate_aligned (vlib_buffer_state_validation_lock, 0,
+ CLIB_CACHE_LINE_BYTES);
+ clib_mem_set_heap (oldheap);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (buffer_state_validation_init);
+#endif
+#endif
+
+
+/** @endcond */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h
new file mode 100644
index 00000000000..5f1e62f08c9
--- /dev/null
+++ b/src/vlib/buffer.h
@@ -0,0 +1,417 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer.h: VLIB buffers
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_buffer_h
+#define included_vlib_buffer_h
+
+#include <vppinfra/types.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/serialize.h>
+#include <vppinfra/vector.h>
+#include <vlib/error.h> /* for vlib_error_t */
+
+#if DPDK > 0
+#include <rte_config.h>
+#define VLIB_BUFFER_DATA_SIZE (2048)
+#define VLIB_BUFFER_PRE_DATA_SIZE RTE_PKTMBUF_HEADROOM
+#else
+#include <vlib/config.h> /* for __PRE_DATA_SIZE */
+#define VLIB_BUFFER_DATA_SIZE (512)
+#define VLIB_BUFFER_PRE_DATA_SIZE __PRE_DATA_SIZE
+#endif
+
+#if defined (CLIB_HAVE_VEC128) || defined (__aarch64__)
+typedef u8x16 vlib_copy_unit_t;
+#else
+typedef u64 vlib_copy_unit_t;
+#endif
+
+/** \file
+ vlib buffer structure definition and a few select
+ access methods. This structure and the buffer allocation
+ mechanism should perhaps live in vnet, but it would take a lot
+ of typing to make it so.
+*/
+
+/* VLIB buffer representation. */
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ /* Offset within data[] that we are currently processing.
+ If negative current header points into predata area. */
+ i16 current_data; /**< signed offset in data[], pre_data[]
+ that we are currently processing.
+ If negative current header points into predata area.
+ */
+ u16 current_length; /**< Nbytes between current data and
+ the end of this buffer.
+ */
+ u32 flags; /**< buffer flags:
+ <br> VLIB_BUFFER_IS_TRACED: trace this buffer.
+ <br> VLIB_BUFFER_NEXT_PRESENT: this is a multi-chunk buffer.
+ <br> VLIB_BUFFER_TOTAL_LENGTH_VALID: as it says
+ <br> VLIB_BUFFER_REPL_FAIL: packet replication failure
+ <br> VLIB_BUFFER_RECYCLE: as it says
+ <br> VLIB_BUFFER_FLOW_REPORT: buffer is a flow report,
+ set to avoid adding it to a flow report
+ <br> VLIB_BUFFER_FLAG_USER(n): user-defined bit N
+ */
+#define VLIB_BUFFER_IS_TRACED (1 << 0)
+#define VLIB_BUFFER_LOG2_NEXT_PRESENT (1)
+#define VLIB_BUFFER_NEXT_PRESENT (1 << VLIB_BUFFER_LOG2_NEXT_PRESENT)
+#define VLIB_BUFFER_IS_RECYCLED (1 << 2)
+#define VLIB_BUFFER_TOTAL_LENGTH_VALID (1 << 3)
+#define VLIB_BUFFER_REPL_FAIL (1 << 4)
+#define VLIB_BUFFER_RECYCLE (1 << 5)
+#define VLIB_BUFFER_FLOW_REPORT (1 << 6)
+
+ /* User defined buffer flags. */
+#define LOG2_VLIB_BUFFER_FLAG_USER(n) (32 - (n))
+#define VLIB_BUFFER_FLAG_USER(n) (1 << LOG2_VLIB_BUFFER_FLAG_USER(n))
+
+ u32 free_list_index; /**< Buffer free list that this buffer was
+ allocated from and will be freed to.
+ */
+
+ u32 total_length_not_including_first_buffer;
+ /**< Only valid for first buffer in chain. Current length plus
+ total length given here give total number of bytes in buffer chain.
+ */
+
+ u32 next_buffer; /**< Next buffer for this linked-list of buffers.
+ Only valid if VLIB_BUFFER_NEXT_PRESENT flag is set.
+ */
+
+ vlib_error_t error; /**< Error code for buffers to be enqueued
+ to error handler.
+ */
+ u32 current_config_index; /**< Used by feature subgraph arcs to
+ visit enabled feature nodes
+ */
+
+ u8 feature_arc_index; /**< Used to identify feature arcs by intermediate
+ feature node
+ */
+
+ u8 dont_waste_me[3]; /**< Available space in the (precious)
+ first 32 octets of buffer metadata
+ Before allocating any of it, discussion required!
+ */
+
+ u32 opaque[8]; /**< Opaque data used by sub-graphs for their own purposes.
+ See .../vnet/vnet/buffer.h
+ */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+
+ u32 trace_index; /**< Specifies index into trace buffer
+ if VLIB_PACKET_IS_TRACED flag is set.
+ */
+ u32 recycle_count; /**< Used by L2 path recycle code */
+ u32 opaque2[14]; /**< More opaque data, currently unused */
+
+ /***** end of second cache line */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
+ u8 pre_data[VLIB_BUFFER_PRE_DATA_SIZE]; /**< Space for inserting data
+ before buffer start.
+ Packet rewrite string will be
+ rewritten backwards and may extend
+ back before buffer->data[0].
+ Must come directly before packet data.
+ */
+
+ u8 data[0]; /**< Packet data. Hardware DMA here */
+} vlib_buffer_t; /* Must be a multiple of 64B. */
+
+#define VLIB_BUFFER_HDR_SIZE (sizeof(vlib_buffer_t) - VLIB_BUFFER_PRE_DATA_SIZE)
+
+/** \brief Prefetch buffer metadata.
+ The first 64 bytes of buffer contains most header information
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @param type - LOAD, STORE. In most cases, STORE is the right answer
+*/
+
+#define vlib_prefetch_buffer_header(b,type) CLIB_PREFETCH (b, 64, type)
+
+always_inline vlib_buffer_t *
+vlib_buffer_next_contiguous (vlib_buffer_t * b, u32 buffer_bytes)
+{
+ return (void *) (b + 1) + buffer_bytes;
+}
+
+always_inline void
+vlib_buffer_struct_is_sane (vlib_buffer_t * b)
+{
+ ASSERT (sizeof (b[0]) % 64 == 0);
+
+ /* Rewrite data must be before and contiguous with packet data. */
+ ASSERT (b->pre_data + VLIB_BUFFER_PRE_DATA_SIZE == b->data);
+}
+
+/** \brief Get pointer to current data to process
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @return - (void *) (b->data + b->current_data)
+*/
+
+always_inline void *
+vlib_buffer_get_current (vlib_buffer_t * b)
+{
+ /* Check bounds. */
+ ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
+ return b->data + b->current_data;
+}
+
+/** \brief Advance current data pointer by the supplied (signed!) amount
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @param l - (word) signed increment
+*/
+always_inline void
+vlib_buffer_advance (vlib_buffer_t * b, word l)
+{
+ ASSERT (b->current_length >= l);
+ b->current_data += l;
+ b->current_length -= l;
+}
+
+/** \brief Reset current header & length to state they were in when
+ packet was received.
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+*/
+
+always_inline void
+vlib_buffer_reset (vlib_buffer_t * b)
+{
+ b->current_length += clib_max (b->current_data, 0);
+ b->current_data = 0;
+}
+
+/** \brief Get pointer to buffer's opaque data array
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @return - (void *) b->opaque
+*/
+always_inline void *
+vlib_get_buffer_opaque (vlib_buffer_t * b)
+{
+ return (void *) b->opaque;
+}
+
+/** \brief Get pointer to buffer's opaque2 data array
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @return - (void *) b->opaque2
+*/
+always_inline void *
+vlib_get_buffer_opaque2 (vlib_buffer_t * b)
+{
+ return (void *) b->opaque2;
+}
+
+/* Forward declaration. */
+struct vlib_main_t;
+
+typedef struct vlib_buffer_free_list_t
+{
+ /* Template buffer used to initialize first 16 bytes of buffers
+ allocated on this free list. */
+ vlib_buffer_t buffer_init_template;
+
+ /* Our index into vlib_main_t's buffer_free_list_pool. */
+ u32 index;
+
+ /* Number of data bytes for buffers in this free list. */
+ u32 n_data_bytes;
+
+ /* Number of buffers to allocate when we need to allocate new buffers
+ from physmem heap. */
+ u32 min_n_buffers_each_physmem_alloc;
+
+ /* Total number of buffers allocated from this free list. */
+ u32 n_alloc;
+
+ /* Vector of free buffers. Each element is a byte offset into I/O heap.
+ Aligned vectors always has naturally aligned vlib_copy_unit_t sized chunks
+ of buffer indices. Unaligned vector has any left over. This is meant to
+ speed up copy routines. */
+ u32 *aligned_buffers, *unaligned_buffers;
+
+ /* Memory chunks allocated for this free list
+ recorded here so they can be freed when free list
+ is deleted. */
+ void **buffer_memory_allocated;
+
+ /* Free list name. */
+ u8 *name;
+
+ /* Callback functions to initialize newly allocated buffers.
+ If null buffers are zeroed. */
+ void (*buffer_init_function) (struct vlib_main_t * vm,
+ struct vlib_buffer_free_list_t * fl,
+ u32 * buffers, u32 n_buffers);
+
+ /* Callback function to announce that buffers have been
+ added to the freelist */
+ void (*buffers_added_to_freelist_function)
+ (struct vlib_main_t * vm, struct vlib_buffer_free_list_t * fl);
+
+ uword buffer_init_function_opaque;
+} __attribute__ ((aligned (16))) vlib_buffer_free_list_t;
+
+typedef struct
+{
+ /* Buffer free callback, for subversive activities */
+ u32 (*buffer_free_callback) (struct vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers, u32 follow_buffer_next);
+ /* Pool of buffer free lists.
+ Multiple free lists exist for packet generator which uses
+ separate free lists for each packet stream --- so as to avoid
+ initializing static data for each packet generated. */
+ vlib_buffer_free_list_t *buffer_free_list_pool;
+#define VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX (0)
+#define VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES VLIB_BUFFER_DATA_SIZE
+
+ /* Hash table mapping buffer size (rounded to next unit of
+ sizeof (vlib_buffer_t)) to free list index. */
+ uword *free_list_by_size;
+
+ /* Hash table mapping buffer index into number
+ 0 => allocated but free, 1 => allocated and not-free.
+ If buffer index is not in hash table then this buffer
+ has never been allocated. */
+ uword *buffer_known_hash;
+
+ /* List of free-lists needing Blue Light Special announcements */
+ vlib_buffer_free_list_t **announce_list;
+
+ /* Vector of rte_mempools per socket */
+#if DPDK == 1
+ struct rte_mempool **pktmbuf_pools;
+#endif
+} vlib_buffer_main_t;
+
+typedef struct
+{
+ struct vlib_main_t *vlib_main;
+
+ u32 first_buffer, last_buffer;
+
+ union
+ {
+ struct
+ {
+ /* Total accumulated bytes in chain starting with first_buffer. */
+ u32 n_total_data_bytes;
+
+ /* Max number of bytes to accumulate in chain starting with first_buffer.
+ As this limit is reached buffers are enqueued to next node. */
+ u32 max_n_data_bytes_per_chain;
+
+ /* Next node to enqueue buffers to relative to current process node. */
+ u32 next_index;
+
+ /* Free list to use to allocate new buffers. */
+ u32 free_list_index;
+ } tx;
+
+ struct
+ {
+ /* CLIB fifo of buffer indices waiting to be unserialized. */
+ u32 *buffer_fifo;
+
+ /* Event type used to signal that RX buffers have been added to fifo. */
+ uword ready_one_time_event;
+ } rx;
+ };
+} vlib_serialize_buffer_main_t;
+
+void serialize_open_vlib_buffer (serialize_main_t * m, struct vlib_main_t *vm,
+ vlib_serialize_buffer_main_t * sm);
+void unserialize_open_vlib_buffer (serialize_main_t * m,
+ struct vlib_main_t *vm,
+ vlib_serialize_buffer_main_t * sm);
+
+u32 serialize_close_vlib_buffer (serialize_main_t * m);
+void unserialize_close_vlib_buffer (serialize_main_t * m);
+void *vlib_set_buffer_free_callback (struct vlib_main_t *vm, void *fp);
+
+always_inline u32
+serialize_vlib_buffer_n_bytes (serialize_main_t * m)
+{
+ serialize_stream_t *s = &m->stream;
+ vlib_serialize_buffer_main_t *sm
+ = uword_to_pointer (m->stream.data_function_opaque,
+ vlib_serialize_buffer_main_t *);
+ return sm->tx.n_total_data_bytes + s->current_buffer_index +
+ vec_len (s->overflow_buffer);
+}
+
+#if DPDK > 0
+#define rte_mbuf_from_vlib_buffer(x) (((struct rte_mbuf *)x) - 1)
+#define vlib_buffer_from_rte_mbuf(x) ((vlib_buffer_t *)(x+1))
+#endif
+
+/*
+ */
+
+/** \brief Compile time buffer trajectory tracing option
+ Turn this on if you run into "bad monkey" contexts,
+ and you want to know exactly which nodes they've visited...
+ See vlib/main.c...
+*/
+#define VLIB_BUFFER_TRACE_TRAJECTORY 0
+
+#if VLIB_BUFFER_TRACE_TRAJECTORY > 0
+#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b) (b)->pre_data[0]=0
+#else
+#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
+#endif /* VLIB_BUFFER_TRACE_TRAJECTORY */
+
+#endif /* included_vlib_buffer_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h
new file mode 100644
index 00000000000..75716eca7f6
--- /dev/null
+++ b/src/vlib/buffer_funcs.h
@@ -0,0 +1,755 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer_funcs.h: VLIB buffer related functions/inlines
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_buffer_funcs_h
+#define included_vlib_buffer_funcs_h
+
+#include <vppinfra/hash.h>
+
+/** \file
+ vlib buffer access methods.
+*/
+
+
+/** \brief Translate buffer index into buffer pointer
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffer_index - (u32) buffer index
+ @return - (vlib_buffer_t *) buffer pointer
+*/
+always_inline vlib_buffer_t *
+vlib_get_buffer (vlib_main_t * vm, u32 buffer_index)
+{
+ return vlib_physmem_at_offset (&vm->physmem_main, ((uword) buffer_index)
+ << CLIB_LOG2_CACHE_LINE_BYTES);
+}
+
+/** \brief Translate buffer pointer into buffer index
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param p - (void *) buffer pointer
+ @return - (u32) buffer index
+*/
+always_inline u32
+vlib_get_buffer_index (vlib_main_t * vm, void *p)
+{
+ uword offset = vlib_physmem_offset_of (&vm->physmem_main, p);
+ ASSERT ((offset % (1 << CLIB_LOG2_CACHE_LINE_BYTES)) == 0);
+ return offset >> CLIB_LOG2_CACHE_LINE_BYTES;
+}
+
+/** \brief Get next buffer in buffer linklist, or zero for end of list.
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param b - (void *) buffer pointer
+ @return - (vlib_buffer_t *) next buffer, or NULL
+*/
+always_inline vlib_buffer_t *
+vlib_get_next_buffer (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ return (b->flags & VLIB_BUFFER_NEXT_PRESENT
+ ? vlib_get_buffer (vm, b->next_buffer) : 0);
+}
+
+uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm,
+ vlib_buffer_t * b_first);
+
+/** \brief Get length in bytes of the buffer chain
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param b - (void *) buffer pointer
+ @return - (uword) length of buffer chain
+*/
+always_inline uword
+vlib_buffer_length_in_chain (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ uword l = b->current_length + b->total_length_not_including_first_buffer;
+ if (PREDICT_FALSE ((b->flags & (VLIB_BUFFER_NEXT_PRESENT
+ | VLIB_BUFFER_TOTAL_LENGTH_VALID))
+ == VLIB_BUFFER_NEXT_PRESENT))
+ return vlib_buffer_length_in_chain_slow_path (vm, b);
+ return l;
+}
+
+/** \brief Get length in bytes of the buffer index buffer chain
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param bi - (u32) buffer index
+ @return - (uword) length of buffer chain
+*/
+always_inline uword
+vlib_buffer_index_length_in_chain (vlib_main_t * vm, u32 bi)
+{
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ return vlib_buffer_length_in_chain (vm, b);
+}
+
+/** \brief Copy buffer contents to memory
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffer_index - (u32) buffer index
+ @param contents - (u8 *) memory, <strong>must be large enough</strong>
+ @return - (uword) length of buffer chain
+*/
+always_inline uword
+vlib_buffer_contents (vlib_main_t * vm, u32 buffer_index, u8 * contents)
+{
+ uword content_len = 0;
+ uword l;
+ vlib_buffer_t *b;
+
+ while (1)
+ {
+ b = vlib_get_buffer (vm, buffer_index);
+ l = b->current_length;
+ clib_memcpy (contents + content_len, b->data + b->current_data, l);
+ content_len += l;
+ if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+ buffer_index = b->next_buffer;
+ }
+
+ return content_len;
+}
+
+/* Return physical address of buffer->data start. */
+always_inline u64
+vlib_get_buffer_data_physical_address (vlib_main_t * vm, u32 buffer_index)
+{
+ return vlib_physmem_offset_to_physical (&vm->physmem_main,
+ (((uword) buffer_index) <<
+ CLIB_LOG2_CACHE_LINE_BYTES) +
+ STRUCT_OFFSET_OF (vlib_buffer_t,
+ data));
+}
+
+/** \brief Prefetch buffer metadata by buffer index
+ The first 64 bytes of buffer contains most header information
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param bi - (u32) buffer index
+ @param type - LOAD, STORE. In most cases, STORE is the right answer
+*/
+/* Prefetch buffer header given index. */
+#define vlib_prefetch_buffer_with_index(vm,bi,type) \
+ do { \
+ vlib_buffer_t * _b = vlib_get_buffer (vm, bi); \
+ vlib_prefetch_buffer_header (_b, type); \
+ } while (0)
+
+#if 0
+/* Iterate over known allocated vlib bufs. You probably do not want
+ * to do this!
+ @param vm the vlib_main_t
+ @param bi found allocated buffer index
+ @param body operation to perform on buffer index
+ function executes body for each allocated buffer index
+ */
+#define vlib_buffer_foreach_allocated(vm,bi,body) \
+do { \
+ vlib_main_t * _vmain = (vm); \
+ vlib_buffer_main_t * _bmain = &_vmain->buffer_main; \
+ hash_pair_t * _vbpair; \
+ hash_foreach_pair(_vbpair, _bmain->buffer_known_hash, ({ \
+ if (VLIB_BUFFER_KNOWN_ALLOCATED == _vbpair->value[0]) { \
+ (bi) = _vbpair->key; \
+ body; \
+ } \
+ })); \
+} while (0)
+#endif
+
+#if DPDK == 0
+
+typedef enum
+{
+ /* Index is unknown. */
+ VLIB_BUFFER_UNKNOWN,
+
+ /* Index is known and free/allocated. */
+ VLIB_BUFFER_KNOWN_FREE,
+ VLIB_BUFFER_KNOWN_ALLOCATED,
+} vlib_buffer_known_state_t;
+
+always_inline vlib_buffer_known_state_t
+vlib_buffer_is_known (vlib_main_t * vm, u32 buffer_index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ ASSERT (os_get_cpu_number () == 0);
+
+ uword *p = hash_get (bm->buffer_known_hash, buffer_index);
+ return p ? p[0] : VLIB_BUFFER_UNKNOWN;
+}
+
+always_inline void
+vlib_buffer_set_known_state (vlib_main_t * vm,
+ u32 buffer_index,
+ vlib_buffer_known_state_t state)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ ASSERT (os_get_cpu_number () == 0);
+ hash_set (bm->buffer_known_hash, buffer_index, state);
+}
+
+/* Validates sanity of a single buffer.
+ Returns format'ed vector with error message if any. */
+u8 *vlib_validate_buffer (vlib_main_t * vm, u32 buffer_index,
+ uword follow_chain);
+
+#endif /* DPDK == 0 */
+
+clib_error_t *vlib_buffer_pool_create (vlib_main_t * vm, unsigned num_mbufs,
+ unsigned socket_id);
+
+/** \brief Allocate buffers into supplied array
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u32) number of buffers requested
+ @return - (u32) number of buffers actually allocated, may be
+ less than the number requested or zero
+*/
+u32 vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers);
+
+always_inline u32
+vlib_buffer_round_size (u32 size)
+{
+ return round_pow2 (size, sizeof (vlib_buffer_t));
+}
+
+/** \brief Allocate buffers from specific freelist into supplied array
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u32) number of buffers requested
+ @return - (u32) number of buffers actually allocated, may be
+ less than the number requested or zero
+*/
+u32 vlib_buffer_alloc_from_free_list (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers, u32 free_list_index);
+
+/** \brief Free buffers
+ Frees the entire buffer chain for each buffer
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u32) number of buffers to free
+
+*/
+void vlib_buffer_free (vlib_main_t * vm,
+ /* pointer to first buffer */
+ u32 * buffers,
+ /* number of buffers to free */
+ u32 n_buffers);
+
+/** \brief Free buffers, does not free the buffer chain for each buffer
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u32) number of buffers to free
+
+*/
+void vlib_buffer_free_no_next (vlib_main_t * vm,
+ /* pointer to first buffer */
+ u32 * buffers,
+ /* number of buffers to free */
+ u32 n_buffers);
+
+/** \brief Free one buffer
+ Shorthand to free a single buffer chain.
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffer_index - (u32) buffer index to free
+*/
+always_inline void
+vlib_buffer_free_one (vlib_main_t * vm, u32 buffer_index)
+{
+ vlib_buffer_free (vm, &buffer_index, /* n_buffers */ 1);
+}
+
+/* Add/delete buffer free lists. */
+u32 vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
+ char *fmt, ...);
+void vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index);
+
+/* Find already existing public free list with given size or create one. */
+u32 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
+ char *fmt, ...);
+
+always_inline vlib_buffer_free_list_t *
+vlib_buffer_get_free_list (vlib_main_t * vm, u32 free_list_index)
+{
+ vlib_buffer_main_t *bm = vm->buffer_main;
+ vlib_buffer_free_list_t *f;
+
+ f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index);
+
+ /* Sanity: indices must match. */
+ ASSERT (f->index == free_list_index);
+
+ return f;
+}
+
+always_inline u32
+vlib_buffer_free_list_buffer_size (vlib_main_t * vm, u32 free_list_index)
+{
+ vlib_buffer_free_list_t *f =
+ vlib_buffer_get_free_list (vm, free_list_index);
+ return f->n_data_bytes;
+}
+
+void vlib_aligned_memcpy (void *_dst, void *_src, int n_bytes);
+
+/* Reasonably fast buffer copy routine. */
+always_inline void
+vlib_copy_buffers (u32 * dst, u32 * src, u32 n)
+{
+ while (n >= 4)
+ {
+ dst[0] = src[0];
+ dst[1] = src[1];
+ dst[2] = src[2];
+ dst[3] = src[3];
+ dst += 4;
+ src += 4;
+ n -= 4;
+ }
+ while (n > 0)
+ {
+ dst[0] = src[0];
+ dst += 1;
+ src += 1;
+ n -= 1;
+ }
+}
+
+always_inline void *
+vlib_physmem_alloc_aligned (vlib_main_t * vm, clib_error_t ** error,
+ uword n_bytes, uword alignment)
+{
+ void *r =
+ vm->os_physmem_alloc_aligned (&vm->physmem_main, n_bytes, alignment);
+ if (!r)
+ *error =
+ clib_error_return (0, "failed to allocate %wd bytes of I/O memory",
+ n_bytes);
+ else
+ *error = 0;
+ return r;
+}
+
+/* By default allocate I/O memory with cache line alignment. */
+always_inline void *
+vlib_physmem_alloc (vlib_main_t * vm, clib_error_t ** error, uword n_bytes)
+{
+ return vlib_physmem_alloc_aligned (vm, error, n_bytes,
+ CLIB_CACHE_LINE_BYTES);
+}
+
+always_inline void
+vlib_physmem_free (vlib_main_t * vm, void *mem)
+{
+ return vm->os_physmem_free (mem);
+}
+
+always_inline u64
+vlib_physmem_virtual_to_physical (vlib_main_t * vm, void *mem)
+{
+ vlib_physmem_main_t *pm = &vm->physmem_main;
+ uword o = pointer_to_uword (mem) - pm->virtual.start;
+ return vlib_physmem_offset_to_physical (pm, o);
+}
+
+/* Append given data to end of buffer, possibly allocating new buffers. */
+u32 vlib_buffer_add_data (vlib_main_t * vm,
+ u32 free_list_index,
+ u32 buffer_index, void *data, u32 n_data_bytes);
+
+/* duplicate all buffers in chain */
+always_inline vlib_buffer_t *
+vlib_buffer_copy (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ vlib_buffer_t *s, *d, *fd;
+ uword n_alloc, n_buffers = 1;
+ u32 *new_buffers = 0;
+ u32 flag_mask = VLIB_BUFFER_NEXT_PRESENT | VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ int i;
+
+ s = b;
+ while (s->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ n_buffers++;
+ s = vlib_get_buffer (vm, s->next_buffer);
+ }
+
+ vec_validate (new_buffers, n_buffers - 1);
+ n_alloc = vlib_buffer_alloc (vm, new_buffers, n_buffers);
+ ASSERT (n_alloc == n_buffers);
+
+ /* 1st segment */
+ s = b;
+ fd = d = vlib_get_buffer (vm, new_buffers[0]);
+ d->current_data = s->current_data;
+ d->current_length = s->current_length;
+ d->flags = s->flags & flag_mask;
+ d->total_length_not_including_first_buffer =
+ s->total_length_not_including_first_buffer;
+ clib_memcpy (d->opaque, s->opaque, sizeof (s->opaque));
+ clib_memcpy (vlib_buffer_get_current (d),
+ vlib_buffer_get_current (s), s->current_length);
+
+ /* next segments */
+ for (i = 1; i < n_buffers; i++)
+ {
+ /* previous */
+ d->next_buffer = new_buffers[i];
+ /* current */
+ s = vlib_get_buffer (vm, s->next_buffer);
+ d = vlib_get_buffer (vm, new_buffers[i]);
+ d->current_data = s->current_data;
+ d->current_length = s->current_length;
+ clib_memcpy (vlib_buffer_get_current (d),
+ vlib_buffer_get_current (s), s->current_length);
+ d->flags = s->flags & flag_mask;
+ }
+
+ return fd;
+}
+
+/*
+ * vlib_buffer_chain_* functions provide a way to create long buffers.
+ * When DPDK is enabled, the 'hidden' DPDK header is taken care of transparently.
+ */
+
+/* Initializes the buffer as an empty packet with no chained buffers. */
+always_inline void
+vlib_buffer_chain_init (vlib_buffer_t * first)
+{
+ first->total_length_not_including_first_buffer = 0;
+ first->current_length = 0;
+ first->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ first->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+}
+
+/* The provided next_bi buffer index is appended to the end of the packet. */
+always_inline vlib_buffer_t *
+vlib_buffer_chain_buffer (vlib_main_t * vm,
+ vlib_buffer_t * first,
+ vlib_buffer_t * last, u32 next_bi)
+{
+ vlib_buffer_t *next_buffer = vlib_get_buffer (vm, next_bi);
+ last->next_buffer = next_bi;
+ last->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ next_buffer->current_length = 0;
+ next_buffer->flags &= ~VLIB_BUFFER_NEXT_PRESENT;
+ return next_buffer;
+}
+
+/* Increases or decreases the packet length.
+ * It does not allocate or deallocate new buffers.
+ * Therefore, the added length must be compatible
+ * with the last buffer. */
+always_inline void
+vlib_buffer_chain_increase_length (vlib_buffer_t * first,
+ vlib_buffer_t * last, i32 len)
+{
+ last->current_length += len;
+ if (first != last)
+ first->total_length_not_including_first_buffer += len;
+}
+
+/* Copy data to the end of the packet and increases its length.
+ * It does not allocate new buffers.
+ * Returns the number of copied bytes. */
+always_inline u16
+vlib_buffer_chain_append_data (vlib_main_t * vm,
+ u32 free_list_index,
+ vlib_buffer_t * first,
+ vlib_buffer_t * last, void *data, u16 data_len)
+{
+ u32 n_buffer_bytes =
+ vlib_buffer_free_list_buffer_size (vm, free_list_index);
+ ASSERT (n_buffer_bytes >= last->current_length + last->current_data);
+ u16 len = clib_min (data_len,
+ n_buffer_bytes - last->current_length -
+ last->current_data);
+ clib_memcpy (vlib_buffer_get_current (last) + last->current_length, data,
+ len);
+ vlib_buffer_chain_increase_length (first, last, len);
+ return len;
+}
+
+/* Copy data to the end of the packet and increases its length.
+ * Allocates additional buffers from the free list if necessary.
+ * Returns the number of copied bytes.
+ * 'last' value is modified whenever new buffers are allocated and
+ * chained and points to the last buffer in the chain. */
+u16
+vlib_buffer_chain_append_data_with_alloc (vlib_main_t * vm,
+ u32 free_list_index,
+ vlib_buffer_t * first,
+ vlib_buffer_t ** last,
+ void *data, u16 data_len);
+void vlib_buffer_chain_validate (vlib_main_t * vm, vlib_buffer_t * first);
+
+format_function_t format_vlib_buffer, format_vlib_buffer_and_data,
+ format_vlib_buffer_contents;
+
+typedef struct
+{
+ /* Vector of packet data. */
+ u8 *packet_data;
+
+ /* Note: the next three fields are unused if DPDK == 1 */
+
+ /* Number of buffers to allocate in each call to physmem
+ allocator. */
+ u32 min_n_buffers_each_physmem_alloc;
+
+ /* Buffer free list for this template. */
+ u32 free_list_index;
+
+ u32 *free_buffers;
+} vlib_packet_template_t;
+
+void vlib_packet_template_get_packet_helper (vlib_main_t * vm,
+ vlib_packet_template_t * t);
+
+void vlib_packet_template_init (vlib_main_t * vm,
+ vlib_packet_template_t * t,
+ void *packet_data,
+ uword n_packet_data_bytes,
+ uword min_n_buffers_each_physmem_alloc,
+ char *fmt, ...);
+
+void *vlib_packet_template_get_packet (vlib_main_t * vm,
+ vlib_packet_template_t * t,
+ u32 * bi_result);
+
+always_inline void
+vlib_packet_template_free (vlib_main_t * vm, vlib_packet_template_t * t)
+{
+ vec_free (t->packet_data);
+}
+
+always_inline u32
+unserialize_vlib_buffer_n_bytes (serialize_main_t * m)
+{
+ serialize_stream_t *s = &m->stream;
+ vlib_serialize_buffer_main_t *sm
+ = uword_to_pointer (m->stream.data_function_opaque,
+ vlib_serialize_buffer_main_t *);
+ vlib_main_t *vm = sm->vlib_main;
+ u32 n, *f;
+
+ n = s->n_buffer_bytes - s->current_buffer_index;
+ if (sm->last_buffer != ~0)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, sm->last_buffer);
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ n += b->current_length;
+ }
+ }
+
+ /* *INDENT-OFF* */
+ clib_fifo_foreach (f, sm->rx.buffer_fifo, ({
+ n += vlib_buffer_index_length_in_chain (vm, f[0]);
+ }));
+/* *INDENT-ON* */
+
+ return n;
+}
+
+typedef union
+{
+ vlib_buffer_t b;
+ vlib_copy_unit_t i[sizeof (vlib_buffer_t) / sizeof (vlib_copy_unit_t)];
+}
+vlib_buffer_union_t;
+
+/* Set a buffer quickly into "uninitialized" state. We want this to
+ be extremely cheap and arrange for all fields that need to be
+ initialized to be in the first 128 bits of the buffer. */
+always_inline void
+vlib_buffer_init_for_free_list (vlib_buffer_t * _dst,
+ vlib_buffer_free_list_t * fl)
+{
+ vlib_buffer_union_t *dst = (vlib_buffer_union_t *) _dst;
+ vlib_buffer_union_t *src =
+ (vlib_buffer_union_t *) & fl->buffer_init_template;
+
+ /* Make sure vlib_buffer_t is cacheline aligned and sized */
+ ASSERT (STRUCT_OFFSET_OF (vlib_buffer_t, cacheline0) == 0);
+ ASSERT (STRUCT_OFFSET_OF (vlib_buffer_t, cacheline1) ==
+ CLIB_CACHE_LINE_BYTES);
+ ASSERT (STRUCT_OFFSET_OF (vlib_buffer_t, cacheline2) ==
+ CLIB_CACHE_LINE_BYTES * 2);
+
+ /* Make sure buffer template is sane. */
+ ASSERT (fl->index == fl->buffer_init_template.free_list_index);
+
+ /* Copy template from src->current_data thru src->free_list_index */
+ dst->i[0] = src->i[0];
+ if (1 * sizeof (dst->i[0]) < 16)
+ dst->i[1] = src->i[1];
+ if (2 * sizeof (dst->i[0]) < 16)
+ dst->i[2] = src->i[2];
+
+ /* Make sure it really worked. */
+#define _(f) ASSERT (dst->b.f == src->b.f)
+ _(current_data);
+ _(current_length);
+ _(flags);
+ _(free_list_index);
+#undef _
+ ASSERT (dst->b.total_length_not_including_first_buffer == 0);
+}
+
+always_inline void
+vlib_buffer_init_two_for_free_list (vlib_buffer_t * _dst0,
+ vlib_buffer_t * _dst1,
+ vlib_buffer_free_list_t * fl)
+{
+ vlib_buffer_union_t *dst0 = (vlib_buffer_union_t *) _dst0;
+ vlib_buffer_union_t *dst1 = (vlib_buffer_union_t *) _dst1;
+ vlib_buffer_union_t *src =
+ (vlib_buffer_union_t *) & fl->buffer_init_template;
+
+ /* Make sure buffer template is sane. */
+ ASSERT (fl->index == fl->buffer_init_template.free_list_index);
+
+ /* Copy template from src->current_data thru src->free_list_index */
+ dst0->i[0] = dst1->i[0] = src->i[0];
+ if (1 * sizeof (dst0->i[0]) < 16)
+ dst0->i[1] = dst1->i[1] = src->i[1];
+ if (2 * sizeof (dst0->i[0]) < 16)
+ dst0->i[2] = dst1->i[2] = src->i[2];
+
+ /* Make sure it really worked. */
+#define _(f) ASSERT (dst0->b.f == src->b.f && dst1->b.f == src->b.f)
+ _(current_data);
+ _(current_length);
+ _(flags);
+ _(free_list_index);
+#undef _
+ ASSERT (dst0->b.total_length_not_including_first_buffer == 0);
+ ASSERT (dst1->b.total_length_not_including_first_buffer == 0);
+}
+
+#if CLIB_DEBUG > 0
+extern u32 *vlib_buffer_state_validation_lock;
+extern uword *vlib_buffer_state_validation_hash;
+extern void *vlib_buffer_state_heap;
+#endif
+
+static inline void
+vlib_validate_buffer_in_use (vlib_buffer_t * b, u32 expected)
+{
+#if CLIB_DEBUG > 0
+ uword *p;
+ void *oldheap;
+
+ oldheap = clib_mem_set_heap (vlib_buffer_state_heap);
+
+ while (__sync_lock_test_and_set (vlib_buffer_state_validation_lock, 1))
+ ;
+
+ p = hash_get (vlib_buffer_state_validation_hash, b);
+
+ /* If we don't know about b, declare it to be in the expected state */
+ if (!p)
+ {
+ hash_set (vlib_buffer_state_validation_hash, b, expected);
+ goto out;
+ }
+
+ if (p[0] != expected)
+ {
+ void cj_stop (void);
+ u32 bi;
+ vlib_main_t *vm = &vlib_global_main;
+
+ cj_stop ();
+
+ bi = vlib_get_buffer_index (vm, b);
+
+ clib_mem_set_heap (oldheap);
+ clib_warning ("%.6f buffer %llx (%d): %s, not %s",
+ vlib_time_now (vm), bi,
+ p[0] ? "busy" : "free", expected ? "busy" : "free");
+ os_panic ();
+ }
+out:
+ CLIB_MEMORY_BARRIER ();
+ *vlib_buffer_state_validation_lock = 0;
+ clib_mem_set_heap (oldheap);
+#endif
+}
+
+static inline void
+vlib_validate_buffer_set_in_use (vlib_buffer_t * b, u32 expected)
+{
+#if CLIB_DEBUG > 0
+ void *oldheap;
+
+ oldheap = clib_mem_set_heap (vlib_buffer_state_heap);
+
+ while (__sync_lock_test_and_set (vlib_buffer_state_validation_lock, 1))
+ ;
+
+ hash_set (vlib_buffer_state_validation_hash, b, expected);
+
+ CLIB_MEMORY_BARRIER ();
+ *vlib_buffer_state_validation_lock = 0;
+ clib_mem_set_heap (oldheap);
+#endif
+}
+
+#endif /* included_vlib_buffer_funcs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/buffer_node.h b/src/vlib/buffer_node.h
new file mode 100644
index 00000000000..8a779049625
--- /dev/null
+++ b/src/vlib/buffer_node.h
@@ -0,0 +1,337 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer_node.h: VLIB buffer handling node helper macros/inlines
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_buffer_node_h
+#define included_vlib_buffer_node_h
+
+/** \file
+ vlib buffer/node functions
+*/
+
+/** \brief Finish enqueueing two buffers forward in the graph.
+ Standard dual loop boilerplate element. This is a MACRO,
+ with MULTIPLE SIDE EFFECTS. In the ideal case,
+ <code>next_index == next0 == next1</code>,
+ which means that the speculative enqueue at the top of the dual loop
+ has correctly dealt with both packets. In that case, the macro does
+ nothing at all.
+
+ @param vm vlib_main_t pointer, varies by thread
+ @param node current node vlib_node_runtime_t pointer
+ @param next_index speculated next index used for both packets
+ @param to_next speculated vector pointer used for both packets
+ @param n_left_to_next number of slots left in speculated vector
+ @param bi0 first buffer index
+ @param bi1 second buffer index
+ @param next0 actual next index to be used for the first packet
+ @param next1 actual next index to be used for the second packet
+
+ @return @c next_index -- speculative next index to be used for future packets
+ @return @c to_next -- speculative frame to be used for future packets
+ @return @c n_left_to_next -- number of slots left in speculative frame
+*/
+
+#define vlib_validate_buffer_enqueue_x2(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,next0,next1) \
+do { \
+ int enqueue_code = (next0 != next_index) + 2*(next1 != next_index); \
+ \
+ if (PREDICT_FALSE (enqueue_code != 0)) \
+ { \
+ switch (enqueue_code) \
+ { \
+ case 1: \
+ /* A B A */ \
+ to_next[-2] = bi1; \
+ to_next -= 1; \
+ n_left_to_next += 1; \
+ vlib_set_next_frame_buffer (vm, node, next0, bi0); \
+ break; \
+ \
+ case 2: \
+ /* A A B */ \
+ to_next -= 1; \
+ n_left_to_next += 1; \
+ vlib_set_next_frame_buffer (vm, node, next1, bi1); \
+ break; \
+ \
+ case 3: \
+ /* A B B or A B C */ \
+ to_next -= 2; \
+ n_left_to_next += 2; \
+ vlib_set_next_frame_buffer (vm, node, next0, bi0); \
+ vlib_set_next_frame_buffer (vm, node, next1, bi1); \
+ if (next0 == next1) \
+ { \
+ vlib_put_next_frame (vm, node, next_index, \
+ n_left_to_next); \
+ next_index = next1; \
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
+ } \
+ } \
+ } \
+} while (0)
+
+
+/** \brief Finish enqueueing four buffers forward in the graph.
+ Standard quad loop boilerplate element. This is a MACRO,
+ with MULTIPLE SIDE EFFECTS. In the ideal case,
+ <code>next_index == next0 == next1 == next2 == next3</code>,
+ which means that the speculative enqueue at the top of the quad loop
+ has correctly dealt with all four packets. In that case, the macro does
+ nothing at all.
+
+ @param vm vlib_main_t pointer, varies by thread
+ @param node current node vlib_node_runtime_t pointer
+ @param next_index speculated next index used for both packets
+ @param to_next speculated vector pointer used for both packets
+ @param n_left_to_next number of slots left in speculated vector
+ @param bi0 first buffer index
+ @param bi1 second buffer index
+ @param bi2 third buffer index
+ @param bi3 fourth buffer index
+ @param next0 actual next index to be used for the first packet
+ @param next1 actual next index to be used for the second packet
+ @param next2 actual next index to be used for the third packet
+ @param next3 actual next index to be used for the fourth packet
+
+ @return @c next_index -- speculative next index to be used for future packets
+ @return @c to_next -- speculative frame to be used for future packets
+ @return @c n_left_to_next -- number of slots left in speculative frame
+*/
+
+#define vlib_validate_buffer_enqueue_x4(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,bi2,bi3,next0,next1,next2,next3) \
+do { \
+ /* After the fact: check the [speculative] enqueue to "next" */ \
+ u32 fix_speculation = next_index != next0 || next_index != next1 \
+ || next_index != next2 || next_index != next3; \
+ if (PREDICT_FALSE(fix_speculation)) \
+ { \
+ /* rewind... */ \
+ to_next -= 4; \
+ n_left_to_next += 4; \
+ \
+ /* If bi0 belongs to "next", send it there */ \
+ if (next_index == next0) \
+ { \
+ to_next[0] = bi0; \
+ to_next++; \
+ n_left_to_next --; \
+ } \
+ else /* send it where it needs to go */ \
+ vlib_set_next_frame_buffer (vm, node, next0, bi0); \
+ \
+ if (next_index == next1) \
+ { \
+ to_next[0] = bi1; \
+ to_next++; \
+ n_left_to_next --; \
+ } \
+ else \
+ vlib_set_next_frame_buffer (vm, node, next1, bi1); \
+ \
+ if (next_index == next2) \
+ { \
+ to_next[0] = bi2; \
+ to_next++; \
+ n_left_to_next --; \
+ } \
+ else \
+ vlib_set_next_frame_buffer (vm, node, next2, bi2); \
+ \
+ if (next_index == next3) \
+ { \
+ to_next[0] = bi3; \
+ to_next++; \
+ n_left_to_next --; \
+ } \
+ else \
+ vlib_set_next_frame_buffer (vm, node, next3, bi3); \
+ \
+ /* Change speculation: last 2 packets went to the same node */ \
+ if (next2 == next3) \
+ { \
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next); \
+ next_index = next3; \
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
+ } \
+ } \
+ } while(0);
+
+/** \brief Finish enqueueing one buffer forward in the graph.
+ Standard single loop boilerplate element. This is a MACRO,
+ with MULTIPLE SIDE EFFECTS. In the ideal case,
+ <code>next_index == next0</code>,
+ which means that the speculative enqueue at the top of the single loop
+ has correctly dealt with the packet in hand. In that case, the macro does
+ nothing at all.
+
+ @param vm vlib_main_t pointer, varies by thread
+ @param node current node vlib_node_runtime_t pointer
+ @param next_index speculated next index used for both packets
+ @param to_next speculated vector pointer used for both packets
+ @param n_left_to_next number of slots left in speculated vector
+ @param bi0 first buffer index
+ @param next0 actual next index to be used for the first packet
+
+ @return @c next_index -- speculative next index to be used for future packets
+ @return @c to_next -- speculative frame to be used for future packets
+ @return @c n_left_to_next -- number of slots left in speculative frame
+*/
+#define vlib_validate_buffer_enqueue_x1(vm,node,next_index,to_next,n_left_to_next,bi0,next0) \
+do { \
+ if (PREDICT_FALSE (next0 != next_index)) \
+ { \
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); \
+ next_index = next0; \
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
+ \
+ to_next[0] = bi0; \
+ to_next += 1; \
+ n_left_to_next -= 1; \
+ } \
+} while (0)
+
+always_inline uword
+generic_buffer_node_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ uword sizeof_trace,
+ void *opaque1,
+ uword opaque2,
+ void (*two_buffers) (vlib_main_t * vm,
+ void *opaque1,
+ uword opaque2,
+ vlib_buffer_t * b0,
+ vlib_buffer_t * b1,
+ u32 * next0, u32 * next1),
+ void (*one_buffer) (vlib_main_t * vm,
+ void *opaque1, uword opaque2,
+ vlib_buffer_t * b0,
+ u32 * next0))
+{
+ u32 n_left_from, *from, *to_next;
+ u32 next_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1, sizeof_trace);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t *p0, *p1;
+ u32 pi0, next0;
+ u32 pi1, next1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t *p2, *p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 64, LOAD);
+ CLIB_PREFETCH (p3->data, 64, LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ two_buffers (vm, opaque1, opaque2, p0, p1, &next0, &next1);
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t *p0;
+ u32 pi0, next0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ one_buffer (vm, opaque1, opaque2, p0, &next0);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+#endif /* included_vlib_buffer_node_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/cli.c b/src/vlib/cli.c
new file mode 100644
index 00000000000..2d141115857
--- /dev/null
+++ b/src/vlib/cli.c
@@ -0,0 +1,1173 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cli.c: command line interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/cpu.h>
+
+/* Root of all show commands. */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_show_command, static) = {
+ .path = "show",
+ .short_help = "Show commands",
+};
+/* *INDENT-ON* */
+
+/* Root of all clear commands. */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_clear_command, static) = {
+ .path = "clear",
+ .short_help = "Clear commands",
+};
+/* *INDENT-ON* */
+
+/* Root of all set commands. */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_set_command, static) = {
+ .path = "set",
+ .short_help = "Set commands",
+};
+/* *INDENT-ON* */
+
+/* Root of all test commands. */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (vlib_cli_test_command, static) = {
+ .path = "test",
+ .short_help = "Test commands",
+};
+/* *INDENT-ON* */
+
+/* Returns bitmap of commands which match key. */
+static uword *
+vlib_cli_sub_command_match (vlib_cli_command_t * c, unformat_input_t * input)
+{
+ int i, n;
+ uword *match = 0;
+ vlib_cli_parse_position_t *p;
+
+ unformat_skip_white_space (input);
+
+ for (i = 0;; i++)
+ {
+ uword k;
+
+ k = unformat_get_input (input);
+ switch (k)
+ {
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '0' ... '9':
+ case '-':
+ case '_':
+ break;
+
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ case UNFORMAT_END_OF_INPUT:
+ /* White space or end of input removes any non-white
+ matches that were before possible. */
+ if (i < vec_len (c->sub_command_positions)
+ && clib_bitmap_count_set_bits (match) > 1)
+ {
+ p = vec_elt_at_index (c->sub_command_positions, i);
+ for (n = 0; n < vec_len (p->bitmaps); n++)
+ match = clib_bitmap_andnot (match, p->bitmaps[n]);
+ }
+ goto done;
+
+ default:
+ unformat_put_input (input);
+ goto done;
+ }
+
+ if (i >= vec_len (c->sub_command_positions))
+ {
+ no_match:
+ clib_bitmap_free (match);
+ return 0;
+ }
+
+ p = vec_elt_at_index (c->sub_command_positions, i);
+ if (vec_len (p->bitmaps) == 0)
+ goto no_match;
+
+ n = k - p->min_char;
+ if (n < 0 || n >= vec_len (p->bitmaps))
+ goto no_match;
+
+ if (i == 0)
+ match = clib_bitmap_dup (p->bitmaps[n]);
+ else
+ match = clib_bitmap_and (match, p->bitmaps[n]);
+
+ if (clib_bitmap_is_zero (match))
+ goto no_match;
+ }
+
+done:
+ return match;
+}
+
+/* Looks for string based sub-input formatted { SUB-INPUT }. */
+uword
+unformat_vlib_cli_sub_input (unformat_input_t * i, va_list * args)
+{
+ unformat_input_t *sub_input = va_arg (*args, unformat_input_t *);
+ u8 *s;
+ uword c;
+
+ while (1)
+ {
+ c = unformat_get_input (i);
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ case '\f':
+ break;
+
+ case '{':
+ default:
+ /* Put back paren. */
+ if (c != UNFORMAT_END_OF_INPUT)
+ unformat_put_input (i);
+
+ if (c == '{' && unformat (i, "%v", &s))
+ {
+ unformat_init_vector (sub_input, s);
+ return 1;
+ }
+ return 0;
+ }
+ }
+ return 0;
+}
+
+static vlib_cli_command_t *
+get_sub_command (vlib_cli_main_t * cm, vlib_cli_command_t * parent, u32 si)
+{
+ vlib_cli_sub_command_t *s = vec_elt_at_index (parent->sub_commands, si);
+ return vec_elt_at_index (cm->commands, s->index);
+}
+
+static uword
+unformat_vlib_cli_sub_command (unformat_input_t * i, va_list * args)
+{
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ vlib_cli_command_t *c = va_arg (*args, vlib_cli_command_t *);
+ vlib_cli_command_t **result = va_arg (*args, vlib_cli_command_t **);
+ vlib_cli_main_t *cm = &vm->cli_main;
+ uword *match_bitmap, is_unique, index;
+
+ {
+ vlib_cli_sub_rule_t *sr;
+ vlib_cli_parse_rule_t *r;
+ vec_foreach (sr, c->sub_rules)
+ {
+ void **d;
+ r = vec_elt_at_index (cm->parse_rules, sr->rule_index);
+ vec_add2 (cm->parse_rule_data, d, 1);
+ vec_reset_length (d[0]);
+ if (r->data_size)
+ d[0] = _vec_resize (d[0],
+ /* length increment */ 1,
+ r->data_size,
+ /* header_bytes */ 0,
+ /* data align */ sizeof (uword));
+ if (unformat_user (i, r->unformat_function, vm, d[0]))
+ {
+ *result = vec_elt_at_index (cm->commands, sr->command_index);
+ return 1;
+ }
+ }
+ }
+
+ match_bitmap = vlib_cli_sub_command_match (c, i);
+ is_unique = clib_bitmap_count_set_bits (match_bitmap) == 1;
+ index = ~0;
+ if (is_unique)
+ {
+ index = clib_bitmap_first_set (match_bitmap);
+ *result = get_sub_command (cm, c, index);
+ }
+ clib_bitmap_free (match_bitmap);
+
+ return is_unique;
+}
+
+static u8 *
+format_vlib_cli_command_help (u8 * s, va_list * args)
+{
+ vlib_cli_command_t *c = va_arg (*args, vlib_cli_command_t *);
+ int is_long = va_arg (*args, int);
+ if (is_long && c->long_help)
+ s = format (s, "%s", c->long_help);
+ else if (c->short_help)
+ s = format (s, "%s", c->short_help);
+ else
+ s = format (s, "%v commands", c->path);
+ return s;
+}
+
+static u8 *
+format_vlib_cli_parse_rule_name (u8 * s, va_list * args)
+{
+ vlib_cli_parse_rule_t *r = va_arg (*args, vlib_cli_parse_rule_t *);
+ return format (s, "<%U>", format_c_identifier, r->name);
+}
+
+static u8 *
+format_vlib_cli_path (u8 * s, va_list * args)
+{
+ u8 *path = va_arg (*args, u8 *);
+ int i, in_rule;
+ in_rule = 0;
+ for (i = 0; i < vec_len (path); i++)
+ {
+ switch (path[i])
+ {
+ case '%':
+ in_rule = 1;
+ vec_add1 (s, '<'); /* start of <RULE> */
+ break;
+
+ case '_':
+ /* _ -> space in rules. */
+ vec_add1 (s, in_rule ? ' ' : '_');
+ break;
+
+ case ' ':
+ if (in_rule)
+ {
+ vec_add1 (s, '>'); /* end of <RULE> */
+ in_rule = 0;
+ }
+ vec_add1 (s, ' ');
+ break;
+
+ default:
+ vec_add1 (s, path[i]);
+ break;
+ }
+ }
+
+ if (in_rule)
+ vec_add1 (s, '>'); /* terminate <RULE> */
+
+ return s;
+}
+
+static vlib_cli_command_t *
+all_subs (vlib_cli_main_t * cm, vlib_cli_command_t * subs, u32 command_index)
+{
+ vlib_cli_command_t *c = vec_elt_at_index (cm->commands, command_index);
+ vlib_cli_sub_command_t *sc;
+ vlib_cli_sub_rule_t *sr;
+
+ if (c->function)
+ vec_add1 (subs, c[0]);
+
+ vec_foreach (sr, c->sub_rules)
+ subs = all_subs (cm, subs, sr->command_index);
+ vec_foreach (sc, c->sub_commands) subs = all_subs (cm, subs, sc->index);
+
+ return subs;
+}
+
+static int
+vlib_cli_cmp_rule (void *a1, void *a2)
+{
+ vlib_cli_sub_rule_t *r1 = a1;
+ vlib_cli_sub_rule_t *r2 = a2;
+
+ return vec_cmp (r1->name, r2->name);
+}
+
+static int
+vlib_cli_cmp_command (void *a1, void *a2)
+{
+ vlib_cli_command_t *c1 = a1;
+ vlib_cli_command_t *c2 = a2;
+
+ return vec_cmp (c1->path, c2->path);
+}
+
+static clib_error_t *
+vlib_cli_dispatch_sub_commands (vlib_main_t * vm,
+ vlib_cli_main_t * cm,
+ unformat_input_t * input,
+ uword parent_command_index)
+{
+ vlib_cli_command_t *parent, *c;
+ clib_error_t *error = 0;
+ unformat_input_t sub_input;
+ u8 *string;
+ uword is_main_dispatch = cm == &vm->cli_main;
+
+ parent = vec_elt_at_index (cm->commands, parent_command_index);
+ if (is_main_dispatch && unformat (input, "help"))
+ {
+ uword help_at_end_of_line, i;
+
+ help_at_end_of_line =
+ unformat_check_input (input) == UNFORMAT_END_OF_INPUT;
+ while (1)
+ {
+ c = parent;
+ if (unformat_user
+ (input, unformat_vlib_cli_sub_command, vm, c, &parent))
+ ;
+
+ else if (!(unformat_check_input (input) == UNFORMAT_END_OF_INPUT))
+ goto unknown;
+
+ else
+ break;
+ }
+
+ /* help SUB-COMMAND => long format help.
+ "help" at end of line: show all commands. */
+ if (!help_at_end_of_line)
+ vlib_cli_output (vm, "%U", format_vlib_cli_command_help, c,
+ /* is_long */ 1);
+
+ else if (vec_len (c->sub_commands) + vec_len (c->sub_rules) == 0)
+ vlib_cli_output (vm, "%v: no sub-commands", c->path);
+
+ else
+ {
+ vlib_cli_sub_command_t *sc;
+ vlib_cli_sub_rule_t *sr, *subs;
+
+ subs = vec_dup (c->sub_rules);
+
+ /* Add in rules if any. */
+ vec_foreach (sc, c->sub_commands)
+ {
+ vec_add2 (subs, sr, 1);
+ sr->name = sc->name;
+ sr->command_index = sc->index;
+ sr->rule_index = ~0;
+ }
+
+ vec_sort_with_function (subs, vlib_cli_cmp_rule);
+
+ for (i = 0; i < vec_len (subs); i++)
+ {
+ vlib_cli_command_t *d;
+ vlib_cli_parse_rule_t *r;
+
+ d = vec_elt_at_index (cm->commands, subs[i].command_index);
+ r =
+ subs[i].rule_index != ~0 ? vec_elt_at_index (cm->parse_rules,
+ subs
+ [i].rule_index) :
+ 0;
+
+ if (r)
+ vlib_cli_output
+ (vm, " %-30U %U",
+ format_vlib_cli_parse_rule_name, r,
+ format_vlib_cli_command_help, d, /* is_long */ 0);
+ else
+ vlib_cli_output
+ (vm, " %-30v %U",
+ subs[i].name,
+ format_vlib_cli_command_help, d, /* is_long */ 0);
+ }
+
+ vec_free (subs);
+ }
+ }
+
+ else if (is_main_dispatch
+ && (unformat (input, "choices") || unformat (input, "?")))
+ {
+ vlib_cli_command_t *sub, *subs;
+
+ subs = all_subs (cm, 0, parent_command_index);
+ vec_sort_with_function (subs, vlib_cli_cmp_command);
+ vec_foreach (sub, subs)
+ vlib_cli_output (vm, " %-40U %U",
+ format_vlib_cli_path, sub->path,
+ format_vlib_cli_command_help, sub, /* is_long */ 0);
+ vec_free (subs);
+ }
+
+ else if (unformat (input, "comment %v", &string))
+ {
+ vec_free (string);
+ }
+
+ else if (unformat (input, "uncomment %U",
+ unformat_vlib_cli_sub_input, &sub_input))
+ {
+ error =
+ vlib_cli_dispatch_sub_commands (vm, cm, &sub_input,
+ parent_command_index);
+ unformat_free (&sub_input);
+ }
+
+ else
+ if (unformat_user (input, unformat_vlib_cli_sub_command, vm, parent, &c))
+ {
+ unformat_input_t *si;
+ uword has_sub_commands =
+ vec_len (c->sub_commands) + vec_len (c->sub_rules) > 0;
+
+ si = input;
+ if (unformat_user (input, unformat_vlib_cli_sub_input, &sub_input))
+ si = &sub_input;
+
+ if (has_sub_commands)
+ error = vlib_cli_dispatch_sub_commands (vm, cm, si, c - cm->commands);
+
+ if (has_sub_commands && !error)
+ /* Found valid sub-command. */ ;
+
+ else if (c->function)
+ {
+ clib_error_t *c_error;
+
+ /* Skip white space for benefit of called function. */
+ unformat_skip_white_space (si);
+
+ if (unformat (si, "?"))
+ {
+ vlib_cli_output (vm, " %-40U %U", format_vlib_cli_path, c->path, format_vlib_cli_command_help, c, /* is_long */
+ 0);
+ }
+ else
+ {
+ if (!c->is_mp_safe)
+ vlib_worker_thread_barrier_sync (vm);
+
+ c_error = c->function (vm, si, c);
+
+ if (!c->is_mp_safe)
+ vlib_worker_thread_barrier_release (vm);
+
+ if (c_error)
+ {
+ error =
+ clib_error_return (0, "%v: %v", c->path, c_error->what);
+ clib_error_free (c_error);
+ /* Free sub input. */
+ if (si != input)
+ unformat_free (si);
+
+ return error;
+ }
+ }
+
+ /* Free any previous error. */
+ clib_error_free (error);
+ }
+
+ else if (!error)
+ error = clib_error_return (0, "%v: no sub-commands", c->path);
+
+ /* Free sub input. */
+ if (si != input)
+ unformat_free (si);
+ }
+
+ else
+ goto unknown;
+
+ return error;
+
+unknown:
+ if (parent->path)
+ return clib_error_return (0, "%v: unknown input `%U'", parent->path,
+ format_unformat_error, input);
+ else
+ return clib_error_return (0, "unknown input `%U'", format_unformat_error,
+ input);
+}
+
+
+void vlib_unix_error_report (vlib_main_t *, clib_error_t *)
+ __attribute__ ((weak));
+
+void
+vlib_unix_error_report (vlib_main_t * vm, clib_error_t * error)
+{
+}
+
+/* Process CLI input. */
+void
+vlib_cli_input (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_output_function_t * function, uword function_arg)
+{
+ vlib_process_t *cp = vlib_get_current_process (vm);
+ vlib_cli_main_t *cm = &vm->cli_main;
+ clib_error_t *error;
+ vlib_cli_output_function_t *save_function;
+ uword save_function_arg;
+
+ save_function = cp->output_function;
+ save_function_arg = cp->output_function_arg;
+
+ cp->output_function = function;
+ cp->output_function_arg = function_arg;
+
+ do
+ {
+ vec_reset_length (cm->parse_rule_data);
+ error = vlib_cli_dispatch_sub_commands (vm, &vm->cli_main, input, /* parent */
+ 0);
+ }
+ while (!error && !unformat (input, "%U", unformat_eof));
+
+ if (error)
+ {
+ vlib_cli_output (vm, "%v", error->what);
+ vlib_unix_error_report (vm, error);
+ clib_error_free (error);
+ }
+
+ cp->output_function = save_function;
+ cp->output_function_arg = save_function_arg;
+}
+
+/* Output to current CLI connection. */
+void
+vlib_cli_output (vlib_main_t * vm, char *fmt, ...)
+{
+ vlib_process_t *cp = vlib_get_current_process (vm);
+ va_list va;
+ u8 *s;
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ /* Terminate with \n if not present. */
+ if (vec_len (s) > 0 && s[vec_len (s) - 1] != '\n')
+ vec_add1 (s, '\n');
+
+ if ((!cp) || (!cp->output_function))
+ fformat (stdout, "%v", s);
+ else
+ cp->output_function (cp->output_function_arg, s, vec_len (s));
+
+ vec_free (s);
+}
+
+static clib_error_t *
+show_memory_usage (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ int verbose = 0;
+ clib_error_t *error;
+ u32 index = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ else
+ {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ return error;
+ }
+ }
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main (
+ ({
+ vlib_cli_output (vm, "Thread %d %v\n", index, vlib_worker_threads[index].name);
+ vlib_cli_output (vm, "%U\n", format_mheap, clib_per_cpu_mheaps[index], verbose);
+ index++;
+ }));
+ /* *INDENT-ON* */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_memory_usage_command, static) = {
+ .path = "show memory",
+ .short_help = "Show current memory usage",
+ .function = show_memory_usage,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_cpu (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+#define _(a,b,c) vlib_cli_output (vm, "%-25s " b, a ":", c);
+ _("Model name", "%U", format_cpu_model_name);
+ _("Microarchitecture", "%U", format_cpu_uarch);
+ _("Flags", "%U", format_cpu_flags);
+ _("Base frequency", "%.2f GHz",
+ ((f64) vm->clib_time.clocks_per_second) * 1e-9);
+#undef _
+ return 0;
+}
+
+/*?
+ * Displays various information about the CPU.
+ *
+ * @cliexpar
+ * @cliexstart{show cpu}
+ * Model name: Intel(R) Xeon(R) CPU E5-2667 v4 @ 3.20GHz
+ * Microarchitecture: Broadwell (Broadwell-EP/EX)
+ * Flags: sse3 ssse3 sse41 sse42 avx avx2 aes
+ * Base Frequency: 3.20 GHz
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_cpu_command, static) = {
+ .path = "show cpu",
+ .short_help = "Show cpu information",
+ .function = show_cpu,
+};
+
+/* *INDENT-ON* */
+static clib_error_t *
+enable_disable_memory_trace (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ int enable;
+
+ if (!unformat_user (input, unformat_vlib_enable_disable, &enable))
+ {
+ error = clib_error_return (0, "expecting enable/on or disable/off");
+ goto done;
+ }
+
+ clib_mem_trace (enable);
+
+done:
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (enable_disable_memory_trace_command, static) = {
+ .path = "memory-trace",
+ .short_help = "Enable/disable memory allocation trace",
+ .function = enable_disable_memory_trace,
+};
+/* *INDENT-ON* */
+
+
+static clib_error_t *
+test_heap_validate (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t *error = 0;
+ void *heap;
+ mheap_t *mheap;
+
+ if (unformat (input, "on"))
+ {
+ /* *INDENT-OFF* */
+ foreach_vlib_main({
+ heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+ mheap = mheap_header(heap);
+ mheap->flags |= MHEAP_FLAG_VALIDATE;
+ // Turn off small object cache because it delays detection of errors
+ mheap->flags &= ~MHEAP_FLAG_SMALL_OBJECT_CACHE;
+ });
+ /* *INDENT-ON* */
+
+ }
+ else if (unformat (input, "off"))
+ {
+ /* *INDENT-OFF* */
+ foreach_vlib_main({
+ heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+ mheap = mheap_header(heap);
+ mheap->flags &= ~MHEAP_FLAG_VALIDATE;
+ mheap->flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
+ });
+ /* *INDENT-ON* */
+ }
+ else if (unformat (input, "now"))
+ {
+ /* *INDENT-OFF* */
+ foreach_vlib_main({
+ heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+ mheap = mheap_header(heap);
+ mheap_validate(heap);
+ });
+ /* *INDENT-ON* */
+ vlib_cli_output (vm, "heap validation complete");
+
+ }
+ else
+ {
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_test_heap_validate,static) = {
+ .path = "test heap-validate",
+ .short_help = "<on/off/now> validate heap on future allocs/frees or right now",
+ .function = test_heap_validate,
+};
+/* *INDENT-ON* */
+
+#ifdef TEST_CODE
+/*
+ * A trivial test harness to verify the per-process output_function
+ * is working correcty.
+ */
+
+static clib_error_t *
+sleep_ten_seconds (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u16 i;
+ u16 my_id = rand ();
+
+ vlib_cli_output (vm, "Starting 10 seconds sleep with id %u\n", my_id);
+
+ for (i = 0; i < 10; i++)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ vlib_cli_output (vm, "Iteration number %u, my id: %u\n", i, my_id);
+ }
+ vlib_cli_output (vm, "Done with sleep with id %u\n", my_id);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (ping_command, static) = {
+ .path = "test sleep",
+ .function = sleep_ten_seconds,
+ .short_help = "Sleep for 10 seconds",
+};
+/* *INDENT-ON* */
+#endif /* ifdef TEST_CODE */
+
+static uword
+vlib_cli_normalize_path (char *input, char **result)
+{
+ char *i = input;
+ char *s = 0;
+ uword l = 0;
+ uword index_of_last_space = ~0;
+
+ while (*i != 0)
+ {
+ u8 c = *i++;
+ /* Multiple white space -> single space. */
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ if (l > 0 && s[l - 1] != ' ')
+ {
+ vec_add1 (s, ' ');
+ l++;
+ }
+ break;
+
+ default:
+ if (l > 0 && s[l - 1] == ' ')
+ index_of_last_space = vec_len (s);
+ vec_add1 (s, c);
+ l++;
+ break;
+ }
+ }
+
+ /* Remove any extra space at end. */
+ if (l > 0 && s[l - 1] == ' ')
+ _vec_len (s) -= 1;
+
+ *result = s;
+ return index_of_last_space;
+}
+
+always_inline uword
+parent_path_len (char *path)
+{
+ word i;
+ for (i = vec_len (path) - 1; i >= 0; i--)
+ {
+ if (path[i] == ' ')
+ return i;
+ }
+ return ~0;
+}
+
+static void
+add_sub_command (vlib_cli_main_t * cm, uword parent_index, uword child_index)
+{
+ vlib_cli_command_t *p, *c;
+ vlib_cli_sub_command_t *sub_c;
+ u8 *sub_name;
+ word i, l;
+
+ p = vec_elt_at_index (cm->commands, parent_index);
+ c = vec_elt_at_index (cm->commands, child_index);
+
+ l = parent_path_len (c->path);
+ if (l == ~0)
+ sub_name = vec_dup ((u8 *) c->path);
+ else
+ {
+ ASSERT (l + 1 < vec_len (c->path));
+ sub_name = 0;
+ vec_add (sub_name, c->path + l + 1, vec_len (c->path) - (l + 1));
+ }
+
+ if (sub_name[0] == '%')
+ {
+ uword *q;
+ vlib_cli_sub_rule_t *sr;
+
+ /* Remove %. */
+ vec_delete (sub_name, 1, 0);
+
+ if (!p->sub_rule_index_by_name)
+ p->sub_rule_index_by_name = hash_create_vec ( /* initial length */ 32,
+ sizeof (sub_name[0]),
+ sizeof (uword));
+ q = hash_get_mem (p->sub_rule_index_by_name, sub_name);
+ if (q)
+ {
+ sr = vec_elt_at_index (p->sub_rules, q[0]);
+ ASSERT (sr->command_index == child_index);
+ return;
+ }
+
+ q = hash_get_mem (cm->parse_rule_index_by_name, sub_name);
+ if (!q)
+ {
+ clib_error ("reference to unknown rule `%%%v' in path `%v'",
+ sub_name, c->path);
+ return;
+ }
+
+ hash_set_mem (p->sub_rule_index_by_name, sub_name,
+ vec_len (p->sub_rules));
+ vec_add2 (p->sub_rules, sr, 1);
+ sr->name = sub_name;
+ sr->rule_index = q[0];
+ sr->command_index = child_index;
+ return;
+ }
+
+ if (!p->sub_command_index_by_name)
+ p->sub_command_index_by_name = hash_create_vec ( /* initial length */ 32,
+ sizeof (c->path[0]),
+ sizeof (uword));
+
+ /* Check if sub-command has already been created. */
+ if (hash_get_mem (p->sub_command_index_by_name, sub_name))
+ {
+ vec_free (sub_name);
+ return;
+ }
+
+ vec_add2 (p->sub_commands, sub_c, 1);
+ sub_c->index = child_index;
+ sub_c->name = sub_name;
+ hash_set_mem (p->sub_command_index_by_name, sub_c->name,
+ sub_c - p->sub_commands);
+
+ vec_validate (p->sub_command_positions, vec_len (sub_c->name) - 1);
+ for (i = 0; i < vec_len (sub_c->name); i++)
+ {
+ int n;
+ vlib_cli_parse_position_t *pos;
+
+ pos = vec_elt_at_index (p->sub_command_positions, i);
+
+ if (!pos->bitmaps)
+ pos->min_char = sub_c->name[i];
+
+ n = sub_c->name[i] - pos->min_char;
+ if (n < 0)
+ {
+ pos->min_char = sub_c->name[i];
+ vec_insert (pos->bitmaps, -n, 0);
+ n = 0;
+ }
+
+ vec_validate (pos->bitmaps, n);
+ pos->bitmaps[n] =
+ clib_bitmap_ori (pos->bitmaps[n], sub_c - p->sub_commands);
+ }
+}
+
+static void
+vlib_cli_make_parent (vlib_cli_main_t * cm, uword ci)
+{
+ uword p_len, pi, *p;
+ char *p_path;
+ vlib_cli_command_t *c, *parent;
+
+ /* Root command (index 0) should have already been added. */
+ ASSERT (vec_len (cm->commands) > 0);
+
+ c = vec_elt_at_index (cm->commands, ci);
+ p_len = parent_path_len (c->path);
+
+ /* No space? Parent is root command. */
+ if (p_len == ~0)
+ {
+ add_sub_command (cm, 0, ci);
+ return;
+ }
+
+ p_path = 0;
+ vec_add (p_path, c->path, p_len);
+
+ p = hash_get_mem (cm->command_index_by_path, p_path);
+
+ /* Parent exists? */
+ if (!p)
+ {
+ /* Parent does not exist; create it. */
+ vec_add2 (cm->commands, parent, 1);
+ parent->path = p_path;
+ hash_set_mem (cm->command_index_by_path, parent->path,
+ parent - cm->commands);
+ pi = parent - cm->commands;
+ }
+ else
+ {
+ pi = p[0];
+ vec_free (p_path);
+ }
+
+ add_sub_command (cm, pi, ci);
+
+ /* Create parent's parent. */
+ if (!p)
+ vlib_cli_make_parent (cm, pi);
+}
+
+always_inline uword
+vlib_cli_command_is_empty (vlib_cli_command_t * c)
+{
+ return (c->long_help == 0 && c->short_help == 0 && c->function == 0);
+}
+
+clib_error_t *
+vlib_cli_register (vlib_main_t * vm, vlib_cli_command_t * c)
+{
+ vlib_cli_main_t *cm = &vm->cli_main;
+ clib_error_t *error = 0;
+ uword ci, *p;
+ char *normalized_path;
+
+ if ((error = vlib_call_init_function (vm, vlib_cli_init)))
+ return error;
+
+ (void) vlib_cli_normalize_path (c->path, &normalized_path);
+
+ if (!cm->command_index_by_path)
+ cm->command_index_by_path = hash_create_vec ( /* initial length */ 32,
+ sizeof (c->path[0]),
+ sizeof (uword));
+
+ /* See if command already exists with given path. */
+ p = hash_get_mem (cm->command_index_by_path, normalized_path);
+ if (p)
+ {
+ vlib_cli_command_t *d;
+
+ ci = p[0];
+ d = vec_elt_at_index (cm->commands, ci);
+
+ /* If existing command was created via vlib_cli_make_parent
+ replaced it with callers data. */
+ if (vlib_cli_command_is_empty (d))
+ {
+ vlib_cli_command_t save = d[0];
+
+ ASSERT (!vlib_cli_command_is_empty (c));
+
+ /* Copy callers fields. */
+ d[0] = c[0];
+
+ /* Save internal fields. */
+ d->path = save.path;
+ d->sub_commands = save.sub_commands;
+ d->sub_command_index_by_name = save.sub_command_index_by_name;
+ d->sub_command_positions = save.sub_command_positions;
+ d->sub_rules = save.sub_rules;
+ }
+ else
+ error =
+ clib_error_return (0, "duplicate command name with path %v",
+ normalized_path);
+
+ vec_free (normalized_path);
+ if (error)
+ return error;
+ }
+ else
+ {
+ /* Command does not exist: create it. */
+
+ /* Add root command (index 0). */
+ if (vec_len (cm->commands) == 0)
+ {
+ /* Create command with index 0; path is empty string. */
+ vec_resize (cm->commands, 1);
+ }
+
+ ci = vec_len (cm->commands);
+ hash_set_mem (cm->command_index_by_path, normalized_path, ci);
+ vec_add1 (cm->commands, c[0]);
+
+ c = vec_elt_at_index (cm->commands, ci);
+ c->path = normalized_path;
+
+ /* Don't inherit from registration. */
+ c->sub_commands = 0;
+ c->sub_command_index_by_name = 0;
+ c->sub_command_positions = 0;
+ }
+
+ vlib_cli_make_parent (cm, ci);
+ return 0;
+}
+
+clib_error_t *
+vlib_cli_register_parse_rule (vlib_main_t * vm, vlib_cli_parse_rule_t * r_reg)
+{
+ vlib_cli_main_t *cm = &vm->cli_main;
+ vlib_cli_parse_rule_t *r;
+ clib_error_t *error = 0;
+ u8 *r_name;
+ uword *p;
+
+ if (!cm->parse_rule_index_by_name)
+ cm->parse_rule_index_by_name = hash_create_vec ( /* initial length */ 32,
+ sizeof (r->name[0]),
+ sizeof (uword));
+
+ /* Make vector copy of name. */
+ r_name = format (0, "%s", r_reg->name);
+
+ if ((p = hash_get_mem (cm->parse_rule_index_by_name, r_name)))
+ {
+ vec_free (r_name);
+ return clib_error_return (0, "duplicate parse rule name `%s'",
+ r_reg->name);
+ }
+
+ vec_add2 (cm->parse_rules, r, 1);
+ r[0] = r_reg[0];
+ r->name = (char *) r_name;
+ hash_set_mem (cm->parse_rule_index_by_name, r->name, r - cm->parse_rules);
+
+ return error;
+}
+
+#if 0
+/* $$$ turn back on again someday, maybe */
+static clib_error_t *vlib_cli_register_parse_rules (vlib_main_t * vm,
+ vlib_cli_parse_rule_t *
+ lo,
+ vlib_cli_parse_rule_t *
+ hi)
+ __attribute__ ((unused))
+{
+ clib_error_t *error = 0;
+ vlib_cli_parse_rule_t *r;
+
+ for (r = lo; r < hi; r = clib_elf_section_data_next (r, 0))
+ {
+ if (!r->name || strlen (r->name) == 0)
+ {
+ error = clib_error_return (0, "parse rule with no name");
+ goto done;
+ }
+
+ error = vlib_cli_register_parse_rule (vm, r);
+ if (error)
+ goto done;
+ }
+
+done:
+ return error;
+}
+#endif
+
+static clib_error_t *
+vlib_cli_init (vlib_main_t * vm)
+{
+ vlib_cli_main_t *cm = &vm->cli_main;
+ clib_error_t *error = 0;
+ vlib_cli_command_t *cmd;
+
+ cmd = cm->cli_command_registrations;
+
+ while (cmd)
+ {
+ error = vlib_cli_register (vm, cmd);
+ if (error)
+ return error;
+ cmd = cmd->next_cli_command;
+ }
+ return error;
+}
+
+VLIB_INIT_FUNCTION (vlib_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/cli.h b/src/vlib/cli.h
new file mode 100644
index 00000000000..009c7e82cf7
--- /dev/null
+++ b/src/vlib/cli.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cli.h: command line interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_cli_h
+#define included_vlib_cli_h
+
+#include <vppinfra/format.h>
+
+struct vlib_cli_command_t;
+
+typedef struct
+{
+ u32 min_char;
+
+ /* Indexed by name[position] - min_char. */
+ uword **bitmaps;
+} vlib_cli_parse_position_t;
+
+typedef struct
+{
+ u8 *name;
+
+ u32 index;
+} vlib_cli_sub_command_t;
+
+typedef struct
+{
+ u8 *name;
+
+ u32 rule_index;
+
+ u32 command_index;
+} vlib_cli_sub_rule_t;
+
+typedef struct
+{
+ char *name;
+ char *short_help;
+ char *long_help;
+
+ /* Number of bytes in parsed data. Zero for vector. */
+ uword data_size;
+
+ unformat_function_t *unformat_function;
+
+ /* Opaque for unformat function. */
+ uword unformat_function_arg[2];
+} vlib_cli_parse_rule_t;
+
+/* CLI command callback function. */
+typedef clib_error_t *(vlib_cli_command_function_t)
+ (struct vlib_main_t * vm,
+ unformat_input_t * input, struct vlib_cli_command_t * cmd);
+
+typedef struct vlib_cli_command_t
+{
+ /* Command path (e.g. "show something").
+ Spaces delimit elements of path. */
+ char *path;
+
+ /* Short/long help strings. */
+ char *short_help;
+ char *long_help;
+
+ /* Callback function. */
+ vlib_cli_command_function_t *function;
+
+ /* Opaque. */
+ uword function_arg;
+
+ /* Known MP-safe? */
+ uword is_mp_safe;
+
+ /* Sub commands for this command. */
+ vlib_cli_sub_command_t *sub_commands;
+
+ /* Hash table mapping name (e.g. last path element) to sub command index. */
+ uword *sub_command_index_by_name;
+
+ /* bitmap[p][c][i] says whether sub-command i has character
+ c in position p. */
+ vlib_cli_parse_position_t *sub_command_positions;
+
+ /* Hash table mapping name (e.g. last path element) to sub rule index. */
+ uword *sub_rule_index_by_name;
+
+ /* Vector of possible parse rules for this path. */
+ vlib_cli_sub_rule_t *sub_rules;
+
+ /* List of CLI commands, built by constructors */
+ struct vlib_cli_command_t *next_cli_command;
+
+} vlib_cli_command_t;
+
+typedef void (vlib_cli_output_function_t) (uword arg,
+ u8 * buffer, uword buffer_bytes);
+typedef struct
+{
+ /* Vector of all known commands. */
+ vlib_cli_command_t *commands;
+
+ /* Hash table mapping normalized path to index into all_commands. */
+ uword *command_index_by_path;
+
+ /* Vector of all known parse rules. */
+ vlib_cli_parse_rule_t *parse_rules;
+
+ /* Hash table mapping parse rule name to index into parse_rule vector. */
+ uword *parse_rule_index_by_name;
+
+ /* Data parsed for rules. */
+ void **parse_rule_data;
+
+ /* registration list added by constructors */
+ vlib_cli_command_t *cli_command_registrations;
+} vlib_cli_main_t;
+
+#define VLIB_CLI_COMMAND(x,...) \
+ __VA_ARGS__ vlib_cli_command_t x; \
+static void __vlib_cli_command_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_cli_command_registration_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ vlib_cli_main_t *cm = &vm->cli_main; \
+ x.next_cli_command = cm->cli_command_registrations; \
+ cm->cli_command_registrations = &x; \
+} \
+__VA_ARGS__ vlib_cli_command_t x
+#define VLIB_CLI_PARSE_RULE(x) \
+ vlib_cli_parse_rule_t x
+/* Output to current CLI connection. */
+void vlib_cli_output (struct vlib_main_t *vm, char *fmt, ...);
+
+/* Process CLI input. */
+void vlib_cli_input (struct vlib_main_t *vm,
+ unformat_input_t * input,
+ vlib_cli_output_function_t * function,
+ uword function_arg);
+
+clib_error_t *vlib_cli_register (struct vlib_main_t *vm,
+ vlib_cli_command_t * c);
+clib_error_t *vlib_cli_register_parse_rule (struct vlib_main_t *vm,
+ vlib_cli_parse_rule_t * c);
+
+uword unformat_vlib_cli_sub_input (unformat_input_t * i, va_list * args);
+
+#endif /* included_vlib_cli_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/cli_funcs.h b/src/vlib/cli_funcs.h
new file mode 100644
index 00000000000..78aef73ba2d
--- /dev/null
+++ b/src/vlib/cli_funcs.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cli_funcs.h: VLIB CLI related functions/inlines
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_cli_funcs_h
+#define included_vlib_cli_funcs_h
+
+always_inline void *
+vlib_cli_get_parse_rule_result (vlib_main_t * vm, uword index)
+{
+ vlib_cli_main_t *cm = &vm->cli_main;
+ return vec_elt (cm->parse_rule_data, index);
+}
+
+#endif /* included_vlib_cli_funcs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/counter.c b/src/vlib/counter.c
new file mode 100644
index 00000000000..9f66e04d88e
--- /dev/null
+++ b/src/vlib/counter.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * counter.c: simple and packet/byte counters
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+void
+vlib_clear_simple_counters (vlib_simple_counter_main_t * cm)
+{
+ uword i, j;
+ u16 *my_minis;
+
+ for (i = 0; i < vec_len (cm->minis); i++)
+ {
+ my_minis = cm->minis[i];
+
+ for (j = 0; j < vec_len (my_minis); j++)
+ {
+ cm->maxi[j] += my_minis[j];
+ my_minis[j] = 0;
+ }
+ }
+
+ j = vec_len (cm->maxi);
+ if (j > 0)
+ vec_validate (cm->value_at_last_clear, j - 1);
+ for (i = 0; i < j; i++)
+ cm->value_at_last_clear[i] = cm->maxi[i];
+}
+
+void
+vlib_clear_combined_counters (vlib_combined_counter_main_t * cm)
+{
+ uword i, j;
+ vlib_mini_counter_t *my_minis;
+
+ for (i = 0; i < vec_len (cm->minis); i++)
+ {
+ my_minis = cm->minis[i];
+
+ for (j = 0; j < vec_len (my_minis); j++)
+ {
+ cm->maxi[j].packets += my_minis[j].packets;
+ cm->maxi[j].bytes += my_minis[j].bytes;
+ my_minis[j].packets = 0;
+ my_minis[j].bytes = 0;
+ }
+ }
+
+ j = vec_len (cm->maxi);
+ if (j > 0)
+ vec_validate (cm->value_at_last_clear, j - 1);
+
+ for (i = 0; i < j; i++)
+ {
+ vlib_counter_t *c = vec_elt_at_index (cm->value_at_last_clear, i);
+
+ c[0] = cm->maxi[i];
+ }
+}
+
+void
+vlib_validate_simple_counter (vlib_simple_counter_main_t * cm, u32 index)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ int i;
+
+ vec_validate (cm->minis, tm->n_vlib_mains - 1);
+ for (i = 0; i < tm->n_vlib_mains; i++)
+ vec_validate_aligned (cm->minis[i], index, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (cm->maxi, index, CLIB_CACHE_LINE_BYTES);
+}
+
+void
+vlib_validate_combined_counter (vlib_combined_counter_main_t * cm, u32 index)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ int i;
+
+ vec_validate (cm->minis, tm->n_vlib_mains - 1);
+ for (i = 0; i < tm->n_vlib_mains; i++)
+ vec_validate_aligned (cm->minis[i], index, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (cm->maxi, index, CLIB_CACHE_LINE_BYTES);
+}
+
+void
+serialize_vlib_simple_counter_main (serialize_main_t * m, va_list * va)
+{
+ clib_warning ("unimplemented");
+}
+
+void
+unserialize_vlib_simple_counter_main (serialize_main_t * m, va_list * va)
+{
+ clib_warning ("unimplemented");
+}
+
+void
+serialize_vlib_combined_counter_main (serialize_main_t * m, va_list * va)
+{
+ clib_warning ("unimplemented");
+}
+
+void
+unserialize_vlib_combined_counter_main (serialize_main_t * m, va_list * va)
+{
+ clib_warning ("unimplemented");
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/counter.h b/src/vlib/counter.h
new file mode 100644
index 00000000000..a79032065d9
--- /dev/null
+++ b/src/vlib/counter.h
@@ -0,0 +1,379 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * counter.h: simple and packet/byte counters
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_counter_h
+#define included_vlib_counter_h
+
+/** \file
+
+ Optimized thread-safe counters.
+
+ Each vlib_[simple|combined]_counter_main_t consists of a single
+ vector of thread-safe / atomically-updated u64 counters [the
+ "maxi" vector], and a (u16 **) per-thread vector [the "minis"
+ vector] of narrow, per-thread counters.
+
+ The idea is to drastically reduce the number of atomic operations.
+ In the case of packet counts, we divide the number of atomic ops
+ by 2**16, etc.
+*/
+
+/** A collection of simple counters */
+
+typedef struct
+{
+ u16 **minis; /**< Per-thread u16 non-atomic counters */
+ u64 *maxi; /**< Shared wide counters */
+ u64 *value_at_last_clear; /**< Counter values as of last clear. */
+ u64 *value_at_last_serialize; /**< Values as of last serialize. */
+ u32 last_incremental_serialize_index; /**< Last counter index
+ serialized incrementally. */
+
+ char *name; /**< The counter collection's name. */
+} vlib_simple_counter_main_t;
+
+/** Increment a simple counter
+ @param cm - (vlib_simple_counter_main_t *) simple counter main pointer
+ @param cpu_index - (u32) the current cpu index
+ @param index - (u32) index of the counter to increment
+ @param increment - (u32) quantitiy to add to the counter
+*/
+always_inline void
+vlib_increment_simple_counter (vlib_simple_counter_main_t * cm,
+ u32 cpu_index, u32 index, u32 increment)
+{
+ u16 *my_minis;
+ u16 *mini;
+ u32 old, new;
+
+ my_minis = cm->minis[cpu_index];
+ mini = vec_elt_at_index (my_minis, index);
+ old = mini[0];
+ new = old + increment;
+ mini[0] = new;
+
+ if (PREDICT_FALSE (mini[0] != new))
+ {
+ __sync_fetch_and_add (&cm->maxi[index], new);
+ my_minis[index] = 0;
+ }
+}
+
+/** Get the value of a simple counter
+ Scrapes the entire set of mini counters. Innacurate unless
+ worker threads which might increment the counter are
+ barrier-synchronized
+
+ @param cm - (vlib_simple_counter_main_t *) simple counter main pointer
+ @param index - (u32) index of the counter to fetch
+ @returns - (u64) current counter value
+*/
+always_inline u64
+vlib_get_simple_counter (vlib_simple_counter_main_t * cm, u32 index)
+{
+ u16 *my_minis, *mini;
+ u64 v;
+ int i;
+
+ ASSERT (index < vec_len (cm->maxi));
+
+ v = 0;
+
+ for (i = 0; i < vec_len (cm->minis); i++)
+ {
+ my_minis = cm->minis[i];
+ mini = vec_elt_at_index (my_minis, index);
+ v += mini[0];
+ }
+
+ v += cm->maxi[index];
+
+ if (index < vec_len (cm->value_at_last_clear))
+ {
+ ASSERT (v >= cm->value_at_last_clear[index]);
+ v -= cm->value_at_last_clear[index];
+ }
+
+ return v;
+}
+
+/** Clear a simple counter
+ Clears the set of per-thread u16 counters, and the u64 counter
+
+ @param cm - (vlib_simple_counter_main_t *) simple counter main pointer
+ @param index - (u32) index of the counter to clear
+*/
+always_inline void
+vlib_zero_simple_counter (vlib_simple_counter_main_t * cm, u32 index)
+{
+ u16 *my_minis;
+ int i;
+
+ ASSERT (index < vec_len (cm->maxi));
+
+ for (i = 0; i < vec_len (cm->minis); i++)
+ {
+ my_minis = cm->minis[i];
+ my_minis[index] = 0;
+ }
+
+ cm->maxi[index] = 0;
+
+ if (index < vec_len (cm->value_at_last_clear))
+ cm->value_at_last_clear[index] = 0;
+}
+
+/** Combined counter to hold both packets and byte differences.
+ */
+typedef struct
+{
+ u64 packets; /**< packet counter */
+ u64 bytes; /**< byte counter */
+} vlib_counter_t;
+
+/** Add two combined counters, results in the first counter
+ @param [in,out] a - (vlib_counter_t *) dst counter
+ @param b - (vlib_counter_t *) src counter
+*/
+
+always_inline void
+vlib_counter_add (vlib_counter_t * a, vlib_counter_t * b)
+{
+ a->packets += b->packets;
+ a->bytes += b->bytes;
+}
+
+/** Subtract combined counters, results in the first counter
+ @param [in,out] a - (vlib_counter_t *) dst counter
+ @param b - (vlib_counter_t *) src counter
+*/
+always_inline void
+vlib_counter_sub (vlib_counter_t * a, vlib_counter_t * b)
+{
+ ASSERT (a->packets >= b->packets);
+ ASSERT (a->bytes >= b->bytes);
+ a->packets -= b->packets;
+ a->bytes -= b->bytes;
+}
+
+/** Clear a combined counter
+ @param a - (vlib_counter_t *) counter to clear
+*/
+always_inline void
+vlib_counter_zero (vlib_counter_t * a)
+{
+ a->packets = a->bytes = 0;
+}
+
+/** Mini combined counter */
+typedef struct
+{
+ u16 packets; /**< Packet count */
+ i16 bytes; /**< Byte count */
+} vlib_mini_counter_t;
+
+/** A collection of combined counters */
+typedef struct
+{
+ vlib_mini_counter_t **minis; /**< Per-thread u16 non-atomic counter pairs */
+ vlib_counter_t *maxi; /**< Shared wide counter pairs */
+ vlib_counter_t *value_at_last_clear; /**< Counter values as of last clear. */
+ vlib_counter_t *value_at_last_serialize; /**< Counter values as of last serialize. */
+ u32 last_incremental_serialize_index; /**< Last counter index serialized incrementally. */
+ char *name; /**< The counter collection's name. */
+} vlib_combined_counter_main_t;
+
+/** Clear a collection of simple counters
+ @param cm - (vlib_simple_counter_main_t *) collection to clear
+*/
+void vlib_clear_simple_counters (vlib_simple_counter_main_t * cm);
+
+/** Clear a collection of combined counters
+ @param cm - (vlib_combined_counter_main_t *) collection to clear
+*/
+void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm);
+
+/** Increment a combined counter
+ @param cm - (vlib_combined_counter_main_t *) comined counter main pointer
+ @param cpu_index - (u32) the current cpu index
+ @param index - (u32) index of the counter to increment
+ @param packet_increment - (u32) number of packets to add to the counter
+ @param byte_increment - (u32) number of bytes to add to the counter
+*/
+
+always_inline void
+vlib_increment_combined_counter (vlib_combined_counter_main_t * cm,
+ u32 cpu_index,
+ u32 index,
+ u32 packet_increment, u32 byte_increment)
+{
+ vlib_mini_counter_t *my_minis, *mini;
+ u32 old_packets, new_packets;
+ i32 old_bytes, new_bytes;
+
+ /* Use this CPU's mini counter array */
+ my_minis = cm->minis[cpu_index];
+
+ mini = vec_elt_at_index (my_minis, index);
+ old_packets = mini->packets;
+ old_bytes = mini->bytes;
+
+ new_packets = old_packets + packet_increment;
+ new_bytes = old_bytes + byte_increment;
+
+ mini->packets = new_packets;
+ mini->bytes = new_bytes;
+
+ /* Bytes always overflow before packets.. */
+ if (PREDICT_FALSE (mini->bytes != new_bytes))
+ {
+ vlib_counter_t *maxi = vec_elt_at_index (cm->maxi, index);
+
+ __sync_fetch_and_add (&maxi->packets, new_packets);
+ __sync_fetch_and_add (&maxi->bytes, new_bytes);
+
+ mini->packets = 0;
+ mini->bytes = 0;
+ }
+}
+
+/** Get the value of a combined counter, never called in the speed path
+ Scrapes the entire set of mini counters. Innacurate unless
+ worker threads which might increment the counter are
+ barrier-synchronized
+
+ @param cm - (vlib_combined_counter_main_t *) combined counter main pointer
+ @param index - (u32) index of the combined counter to fetch
+ @param result [out] - (vlib_counter_t *) result stored here
+*/
+
+static inline void
+vlib_get_combined_counter (vlib_combined_counter_main_t * cm,
+ u32 index, vlib_counter_t * result)
+{
+ vlib_mini_counter_t *my_minis, *mini;
+ vlib_counter_t *maxi;
+ int i;
+
+ result->packets = 0;
+ result->bytes = 0;
+
+ for (i = 0; i < vec_len (cm->minis); i++)
+ {
+ my_minis = cm->minis[i];
+
+ mini = vec_elt_at_index (my_minis, index);
+ result->packets += mini->packets;
+ result->bytes += mini->bytes;
+ }
+
+ maxi = vec_elt_at_index (cm->maxi, index);
+ result->packets += maxi->packets;
+ result->bytes += maxi->bytes;
+
+ if (index < vec_len (cm->value_at_last_clear))
+ vlib_counter_sub (result, &cm->value_at_last_clear[index]);
+}
+
+/** Clear a combined counter
+ Clears the set of per-thread u16 counters, and the shared vlib_counter_t
+
+ @param cm - (vlib_combined_counter_main_t *) combined counter main pointer
+ @param index - (u32) index of the counter to clear
+*/
+always_inline void
+vlib_zero_combined_counter (vlib_combined_counter_main_t * cm, u32 index)
+{
+ vlib_mini_counter_t *mini, *my_minis;
+ int i;
+
+ for (i = 0; i < vec_len (cm->minis); i++)
+ {
+ my_minis = cm->minis[i];
+
+ mini = vec_elt_at_index (my_minis, index);
+ mini->packets = 0;
+ mini->bytes = 0;
+ }
+
+ vlib_counter_zero (&cm->maxi[index]);
+ if (index < vec_len (cm->value_at_last_clear))
+ vlib_counter_zero (&cm->value_at_last_clear[index]);
+}
+
+/** validate a simple counter
+ @param cm - (vlib_simple_counter_main_t *) pointer to the counter collection
+ @param index - (u32) index of the counter to validate
+*/
+
+void vlib_validate_simple_counter (vlib_simple_counter_main_t * cm,
+ u32 index);
+/** validate a combined counter
+ @param cm - (vlib_combined_counter_main_t *) pointer to the counter
+ collection
+ @param index - (u32) index of the counter to validate
+*/
+
+void vlib_validate_combined_counter (vlib_combined_counter_main_t * cm,
+ u32 index);
+
+/** Obtain the number of simple or combined counters allocated.
+ A macro which reduces to to vec_len(cm->maxi), the answer in either
+ case.
+
+ @param cm - (vlib_simple_counter_main_t) or
+ (vlib_combined_counter_main_t) the counter collection to interrogate
+ @returns vec_len(cm->maxi)
+*/
+#define vlib_counter_len(cm) vec_len((cm)->maxi)
+
+serialize_function_t serialize_vlib_simple_counter_main,
+ unserialize_vlib_simple_counter_main;
+serialize_function_t serialize_vlib_combined_counter_main,
+ unserialize_vlib_combined_counter_main;
+
+#endif /* included_vlib_counter_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/defs.h b/src/vlib/defs.h
new file mode 100644
index 00000000000..ad58bc04681
--- /dev/null
+++ b/src/vlib/defs.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * defs.h: VLIB generic C definitions
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_defs_h
+#define included_vlib_defs_h
+
+/* Receive or transmit. */
+typedef enum
+{
+ VLIB_RX,
+ VLIB_TX,
+ VLIB_N_RX_TX = 2, /* Used to size arrays. */
+} vlib_rx_or_tx_t;
+
+#define vlib_foreach_rx_tx(v) for (v = 0; v < VLIB_N_RX_TX; v++)
+
+/* Read/write. */
+typedef enum
+{
+ VLIB_READ,
+ VLIB_WRITE,
+} vlib_read_or_write_t;
+
+/* Up/down. */
+typedef enum
+{
+ VLIB_DOWN = 0,
+ VLIB_UP = 1,
+} vlib_up_or_down_t;
+
+/* Enable/disable. */
+typedef enum
+{
+ VLIB_DISABLE = 0,
+ VLIB_ENABLE = 1,
+} vlib_enable_or_disable_t;
+
+#endif /* included_vlib_defs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/dir.dox b/src/vlib/dir.dox
new file mode 100644
index 00000000000..4806e7a91c6
--- /dev/null
+++ b/src/vlib/dir.dox
@@ -0,0 +1,23 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2016 Comcast Cable Communications Management, LLC.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Doxygen directory documentation */
+/**
+@dir
+@brief VLIB application library source.
+*/
+/*? %%clicmd:group_label VLIB application library%% ?*/
+
diff --git a/src/vlib/elog_samples.c b/src/vlib/elog_samples.c
new file mode 100644
index 00000000000..a8c800df959
--- /dev/null
+++ b/src/vlib/elog_samples.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/elog.h>
+
+static inline void
+elog_four_int_sample (u32 * data)
+{
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "four int: first %d second %d third %d fourth %d",.format_args =
+ "i4i4i4i4",};
+ struct
+ {
+ u32 data[4];
+ } *ed;
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->data[0] = data[0];
+ ed->data[1] = data[1];
+ ed->data[2] = data[2];
+ ed->data[3] = data[3];
+}
+
+static inline void
+elog_four_int_track_sample (u32 * data)
+{
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format =
+ "four_int_track: first %d second %d third %d fourth %d",.format_args =
+ "i4i4i4i4",};
+ struct
+ {
+ u32 data[4];
+ } *ed;
+ ELOG_TRACK (sample_track);
+ ed = ELOG_TRACK_DATA (&vlib_global_main.elog_main, e, sample_track);
+ ed->data[0] = data[0];
+ ed->data[1] = data[1];
+ ed->data[2] = data[2];
+ ed->data[3] = data[3];
+}
+
+static inline void
+elog_enum_sample (u8 which)
+{
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "my enum: %s",.format_args = "t1",.n_enum_strings =
+ 2,.enum_strings =
+ {
+ "string 1", "string 2",},};
+ struct
+ {
+ u8 which;
+ } *ed;
+ ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+ ed->which = which;
+}
+
+static inline void
+elog_one_datum_sample (u32 data)
+{
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "one datum: %d",.format_args = "i4",};
+
+ elog (&vlib_global_main.elog_main, &e, data);
+}
+
+static clib_error_t *
+test_elog_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ int i;
+ u32 samples[4];
+
+ for (i = 0; i < 10; i++)
+ {
+ samples[0] = i;
+ samples[1] = i + 1;
+ samples[2] = i + 2;
+ samples[3] = i + 3;
+
+ elog_four_int_sample (samples);
+ elog_four_int_track_sample (samples);
+ elog_enum_sample (0);
+ elog_enum_sample (1);
+ elog_one_datum_sample (i);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_elog_command, static) = {
+ .path = "test elog sample",
+ .short_help = "test elog sample",
+ .function = test_elog_command_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/error.c b/src/vlib/error.c
new file mode 100644
index 00000000000..a2c2317686b
--- /dev/null
+++ b/src/vlib/error.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * error.c: VLIB error handler
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/heap.h>
+
+uword
+vlib_error_drop_buffers (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 * buffers,
+ u32 next_buffer_stride,
+ u32 n_buffers,
+ u32 next_index,
+ u32 drop_error_node, u32 drop_error_code)
+{
+ u32 n_left_this_frame, n_buffers_left, *args, n_args_left;
+ vlib_error_t drop_error;
+
+ drop_error = vlib_error_set (drop_error_node, drop_error_code);
+
+ n_buffers_left = n_buffers;
+ while (n_buffers_left > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, args, n_args_left);
+
+ n_left_this_frame = clib_min (n_buffers_left, n_args_left);
+ n_buffers_left -= n_left_this_frame;
+ n_args_left -= n_left_this_frame;
+
+ while (n_left_this_frame >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t *b0, *b1, *b2, *b3;
+
+ args[0] = bi0 = buffers[0];
+ args[1] = bi1 = buffers[1];
+ args[2] = bi2 = buffers[2];
+ args[3] = bi3 = buffers[3];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ b0->error = drop_error;
+ b1->error = drop_error;
+ b2->error = drop_error;
+ b3->error = drop_error;
+
+ buffers += 4;
+ args += 4;
+ n_left_this_frame -= 4;
+ }
+
+ while (n_left_this_frame >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+
+ args[0] = bi0 = buffers[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b0->error = drop_error;
+
+ buffers += 1;
+ args += 1;
+ n_left_this_frame -= 1;
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_args_left);
+ }
+
+ return n_buffers;
+}
+
+/* Convenience node to drop a vector of buffers with a "misc error". */
+static uword
+misc_drop_buffers (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ return vlib_error_drop_buffers (vm, node, vlib_frame_args (frame),
+ /* buffer stride */ 1,
+ frame->n_vectors,
+ /* next */ 0,
+ node->node_index,
+ /* error */ 0);
+}
+
+static char *misc_drop_buffers_error_strings[] = {
+ [0] = "misc. errors",
+};
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (misc_drop_buffers_node,static) = {
+ .function = misc_drop_buffers,
+ .name = "misc-drop-buffers",
+ .vector_size = sizeof (u32),
+ .n_errors = 1,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ "error-drop",
+ },
+ .error_strings = misc_drop_buffers_error_strings,
+};
+/* *INDENT-ON* */
+
+/* Reserves given number of error codes for given node. */
+void
+vlib_register_errors (vlib_main_t * vm,
+ u32 node_index, u32 n_errors, char *error_strings[])
+{
+ vlib_error_main_t *em = &vm->error_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ uword l;
+
+ ASSERT (os_get_cpu_number () == 0);
+
+ /* Free up any previous error strings. */
+ if (n->n_errors > 0)
+ heap_dealloc (em->error_strings_heap, n->error_heap_handle);
+
+ n->n_errors = n_errors;
+ n->error_strings = error_strings;
+
+ if (n_errors == 0)
+ return;
+
+ n->error_heap_index =
+ heap_alloc (em->error_strings_heap, n_errors, n->error_heap_handle);
+
+ l = vec_len (em->error_strings_heap);
+
+ clib_memcpy (vec_elt_at_index (em->error_strings_heap, n->error_heap_index),
+ error_strings, n_errors * sizeof (error_strings[0]));
+
+ /* Allocate a counter/elog type for each error. */
+ vec_validate (em->counters, l - 1);
+ vec_validate (vm->error_elog_event_types, l - 1);
+
+ /* Zero counters for re-registrations of errors. */
+ if (n->error_heap_index + n_errors <= vec_len (em->counters_last_clear))
+ clib_memcpy (em->counters + n->error_heap_index,
+ em->counters_last_clear + n->error_heap_index,
+ n_errors * sizeof (em->counters[0]));
+ else
+ memset (em->counters + n->error_heap_index,
+ 0, n_errors * sizeof (em->counters[0]));
+
+ {
+ elog_event_type_t t;
+ uword i;
+
+ memset (&t, 0, sizeof (t));
+ for (i = 0; i < n_errors; i++)
+ {
+ t.format = (char *) format (0, "%v %s: %%d",
+ n->name, error_strings[i]);
+ vm->error_elog_event_types[n->error_heap_index + i] = t;
+ }
+ }
+}
+
+static clib_error_t *
+show_errors (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_error_main_t *em = &vm->error_main;
+ vlib_node_t *n;
+ u32 code, i, ni;
+ u64 c;
+ int index = 0;
+ int verbose = 0;
+ u64 *sums = 0;
+
+ if (unformat (input, "verbose %d", &verbose))
+ ;
+ else if (unformat (input, "verbose"))
+ verbose = 1;
+
+ vec_validate (sums, vec_len (em->counters));
+
+ if (verbose)
+ vlib_cli_output (vm, "%=10s%=40s%=20s%=6s", "Count", "Node", "Reason",
+ "Index");
+ else
+ vlib_cli_output (vm, "%=10s%=40s%=6s", "Count", "Node", "Reason");
+
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main(({
+ em = &this_vlib_main->error_main;
+
+ if (verbose)
+ vlib_cli_output(vm, "Thread %u (%v):", index,
+ vlib_worker_threads[index].name);
+
+ for (ni = 0; ni < vec_len (this_vlib_main->node_main.nodes); ni++)
+ {
+ n = vlib_get_node (this_vlib_main, ni);
+ for (code = 0; code < n->n_errors; code++)
+ {
+ i = n->error_heap_index + code;
+ c = em->counters[i];
+ if (i < vec_len (em->counters_last_clear))
+ c -= em->counters_last_clear[i];
+ sums[i] += c;
+
+ if (c == 0 && verbose < 2)
+ continue;
+
+ if (verbose)
+ vlib_cli_output (vm, "%10Ld%=40v%=20s%=6d", c, n->name,
+ em->error_strings_heap[i], i);
+ else
+ vlib_cli_output (vm, "%10d%=40v%s", c, n->name,
+ em->error_strings_heap[i]);
+ }
+ }
+ index++;
+ }));
+ /* *INDENT-ON* */
+
+ if (verbose)
+ vlib_cli_output (vm, "Total:");
+
+ for (ni = 0; ni < vec_len (vm->node_main.nodes); ni++)
+ {
+ n = vlib_get_node (vm, ni);
+ for (code = 0; code < n->n_errors; code++)
+ {
+ i = n->error_heap_index + code;
+ if (sums[i])
+ {
+ if (verbose)
+ vlib_cli_output (vm, "%10Ld%=40v%=20s%=10d", sums[i], n->name,
+ em->error_strings_heap[i], i);
+ }
+ }
+ }
+
+ vec_free (sums);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_errors, static) = {
+ .path = "show errors",
+ .short_help = "Show error counts",
+ .function = show_errors,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_show_node_counters, static) = {
+ .path = "show node counters",
+ .short_help = "Show node counters",
+ .function = show_errors,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+clear_error_counters (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_error_main_t *em;
+ u32 i;
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main(({
+ em = &this_vlib_main->error_main;
+ vec_validate (em->counters_last_clear, vec_len (em->counters) - 1);
+ for (i = 0; i < vec_len (em->counters); i++)
+ em->counters_last_clear[i] = em->counters[i];
+ }));
+ /* *INDENT-ON* */
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_clear_error_counters, static) = {
+ .path = "clear errors",
+ .short_help = "Clear error counters",
+ .function = clear_error_counters,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_clear_node_counters, static) = {
+ .path = "clear node counters",
+ .short_help = "Clear node counters",
+ .function = clear_error_counters,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/error.h b/src/vlib/error.h
new file mode 100644
index 00000000000..df2075c306d
--- /dev/null
+++ b/src/vlib/error.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * error.h: drop/punt error packets
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_error_h
+#define included_vlib_error_h
+
+/* Combined 16 bit node & 16 bit code as 32 bit number. */
+typedef u32 vlib_error_t;
+
+always_inline u32
+vlib_error_get_node (vlib_error_t e)
+{
+ return e >> 12;
+}
+
+always_inline u32
+vlib_error_get_code (vlib_error_t e)
+{
+ return e & 0xfff;
+}
+
+always_inline vlib_error_t
+vlib_error_set (u32 node_index, u32 code)
+{
+ ASSERT (node_index < (1 << 20));
+ ASSERT (code < (1 << 12));
+ return (node_index << 12) | code;
+}
+
+always_inline vlib_error_t
+vlib_error_set_code (vlib_error_t e, u32 code)
+{
+ ASSERT (vlib_error_get_code (e) == 0);
+ ASSERT (code < (1 << 12));
+ e |= code;
+ return e;
+}
+
+typedef struct
+{
+ /* Error counters. */
+ u64 *counters;
+
+ /* Counter values as of last counter clear. */
+ u64 *counters_last_clear;
+
+ /* Error name strings in heap. Heap index
+ indexes counter vector. */
+ char **error_strings_heap;
+} vlib_error_main_t;
+
+/* Per node error registration. */
+void vlib_register_errors (struct vlib_main_t *vm,
+ u32 node_index,
+ u32 n_errors, char *error_strings[]);
+
+#endif /* included_vlib_error_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/error_funcs.h b/src/vlib/error_funcs.h
new file mode 100644
index 00000000000..1a3602e92c6
--- /dev/null
+++ b/src/vlib/error_funcs.h
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * error_funcs.h: VLIB error handling
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_error_funcs_h
+#define included_vlib_error_funcs_h
+
+#include <vlib/node_funcs.h>
+
+always_inline void
+vlib_error_elog_count (vlib_main_t * vm, uword counter, uword increment)
+{
+ elog_main_t *em = &vm->elog_main;
+ if (VLIB_ELOG_MAIN_LOOP > 0 && increment > 0)
+ elog (em, vec_elt_at_index (vm->error_elog_event_types, counter),
+ increment);
+}
+
+always_inline void
+vlib_error_count (vlib_main_t * vm, uword node_index,
+ uword counter, uword increment)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_error_main_t *em = &vm->error_main;
+
+ ASSERT (counter < n->n_errors);
+ counter += n->error_heap_index;
+
+ ASSERT (counter < vec_len (em->counters));
+ em->counters[counter] += increment;
+
+ vlib_error_elog_count (vm, counter, increment);
+}
+
+/* Drop all buffers in frame with given error code. */
+uword
+vlib_error_drop_buffers (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 * buffers,
+ u32 next_buffer_stride,
+ u32 n_buffers,
+ u32 error_next_index,
+ u32 error_node, u32 error_code);
+
+#endif /* included_vlib_error_funcs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/format.c b/src/vlib/format.c
new file mode 100644
index 00000000000..79a4d6866db
--- /dev/null
+++ b/src/vlib/format.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * format.c: generic network formatting/unformating
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+u8 *
+format_vlib_rx_tx (u8 * s, va_list * args)
+{
+ vlib_rx_or_tx_t r = va_arg (*args, vlib_rx_or_tx_t);
+ char *t;
+
+ switch (r)
+ {
+ case VLIB_RX:
+ t = "rx";
+ break;
+ case VLIB_TX:
+ t = "tx";
+ break;
+ default:
+ t = "INVALID";
+ break;
+ }
+
+ vec_add (s, t, strlen (t));
+ return s;
+}
+
+u8 *
+format_vlib_read_write (u8 * s, va_list * args)
+{
+ vlib_rx_or_tx_t r = va_arg (*args, vlib_rx_or_tx_t);
+ char *t;
+
+ switch (r)
+ {
+ case VLIB_READ:
+ t = "read";
+ break;
+ case VLIB_WRITE:
+ t = "write";
+ break;
+ default:
+ t = "INVALID";
+ break;
+ }
+
+ vec_add (s, t, strlen (t));
+ return s;
+}
+
+/* Formats buffer data as printable ascii or as hex. */
+u8 *
+format_vlib_buffer_data (u8 * s, va_list * args)
+{
+ u8 *data = va_arg (*args, u8 *);
+ u32 n_data_bytes = va_arg (*args, u32);
+ u32 i, is_printable;
+
+ is_printable = 1;
+ for (i = 0; i < n_data_bytes && is_printable; i++)
+ {
+ u8 c = data[i];
+ if (c < 0x20)
+ is_printable = 0;
+ else if (c >= 0x7f)
+ is_printable = 0;
+ }
+
+ if (is_printable)
+ vec_add (s, data, n_data_bytes);
+ else
+ s = format (s, "%U", format_hex_bytes, data, n_data_bytes);
+
+ return s;
+}
+
+/* Enable/on => 1; disable/off => 0. */
+uword
+unformat_vlib_enable_disable (unformat_input_t * input, va_list * args)
+{
+ int *result = va_arg (*args, int *);
+ int enable;
+
+ if (unformat (input, "enable") || unformat (input, "on"))
+ enable = 1;
+ else if (unformat (input, "disable") || unformat (input, "off"))
+ enable = 0;
+ else
+ return 0;
+
+ *result = enable;
+ return 1;
+}
+
+/* rx/tx => VLIB_RX/VLIB_TX. */
+uword
+unformat_vlib_rx_tx (unformat_input_t * input, va_list * args)
+{
+ int *result = va_arg (*args, int *);
+ if (unformat (input, "rx"))
+ *result = VLIB_RX;
+ else if (unformat (input, "tx"))
+ *result = VLIB_TX;
+ else
+ return 0;
+ return 1;
+}
+
+/* Parse an int either %d or 0x%x. */
+uword
+unformat_vlib_number (unformat_input_t * input, va_list * args)
+{
+ int *result = va_arg (*args, int *);
+
+ return (unformat (input, "0x%x", result) || unformat (input, "%d", result));
+}
+
+/* Parse a-zA-Z0-9_ token and hash to value. */
+uword
+unformat_vlib_number_by_name (unformat_input_t * input, va_list * args)
+{
+ uword *hash = va_arg (*args, uword *);
+ int *result = va_arg (*args, int *);
+ uword *p;
+ u8 *token;
+ int i;
+
+ if (!unformat_user (input, unformat_token, "a-zA-Z0-9_", &token))
+ return 0;
+
+ /* Null terminate. */
+ if (vec_len (token) > 0 && token[vec_len (token) - 1] != 0)
+ vec_add1 (token, 0);
+
+ /* Check for exact match. */
+ p = hash_get_mem (hash, token);
+ if (p)
+ goto done;
+
+ /* Convert to upper case & try match. */
+ for (i = 0; i < vec_len (token); i++)
+ if (token[i] >= 'a' && token[i] <= 'z')
+ token[i] = 'A' + token[i] - 'a';
+ p = hash_get_mem (hash, token);
+
+done:
+ vec_free (token);
+ if (p)
+ *result = p[0];
+ return p != 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/format_funcs.h b/src/vlib/format_funcs.h
new file mode 100644
index 00000000000..f60b8940d14
--- /dev/null
+++ b/src/vlib/format_funcs.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * format_funcs.h: VLIB formatting/unformating
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_format_h
+#define included_vlib_format_h
+
+/* Format vlib_rx_or_tx_t/vlib_read_or_write_t enum as string. */
+u8 *format_vlib_rx_tx (u8 * s, va_list * args);
+u8 *format_vlib_read_write (u8 * s, va_list * args);
+
+/* Formats buffer data as printable ascii or as hex. */
+u8 *format_vlib_buffer_data (u8 * s, va_list * args);
+
+/* Enable/on => 1; disable/off => 0. */
+uword unformat_vlib_enable_disable (unformat_input_t * input, va_list * args);
+
+/* rx/tx => VLIB_RX/VLIB_TX. */
+uword unformat_vlib_rx_tx (unformat_input_t * input, va_list * args);
+
+/* Parse a-zA-Z0-9_ token and hash to value. */
+uword unformat_vlib_number_by_name (unformat_input_t * input, va_list * args);
+
+/* Parse an int either %d or 0x%x. */
+uword unformat_vlib_number (unformat_input_t * input, va_list * args);
+
+/* Flag to format_vlib_*_header functions to tell them not to recurse
+ into the next layer's header. For example, tells format_vlib_ethernet_header
+ not to format ip header. */
+#define FORMAT_VLIB_HEADER_NO_RECURSION (~0)
+
+#endif /* included_vlib_format_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/global_funcs.h b/src/vlib/global_funcs.h
new file mode 100644
index 00000000000..bbdbdef50b2
--- /dev/null
+++ b/src/vlib/global_funcs.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * global_funcs.h: global data structure access functions
+ */
+
+#ifndef included_vlib_global_funcs_h_
+#define included_vlib_global_funcs_h_
+
+always_inline vlib_main_t *
+vlib_get_main (void)
+{
+ vlib_main_t *vm;
+ vm = vlib_mains ? vlib_mains[os_get_cpu_number ()] : &vlib_global_main;
+ ASSERT (vm);
+ return vm;
+}
+
+always_inline vlib_thread_main_t *
+vlib_get_thread_main ()
+{
+ return &vlib_thread_main;
+}
+
+#endif /* included_vlib_global_funcs_h_ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/i2c.c b/src/vlib/i2c.c
new file mode 100644
index 00000000000..97f5bb21cc7
--- /dev/null
+++ b/src/vlib/i2c.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/i2c.h>
+
+static inline void
+i2c_delay (i2c_bus_t * b, f64 timeout)
+{
+ vlib_main_t *vm = vlib_get_main ();
+ vlib_time_wait (vm, timeout);
+}
+
+static void
+i2c_wait_for_scl (i2c_bus_t * b)
+{
+ f64 t = 0;
+
+ while (t < b->hold_time)
+ {
+ int sda, scl;
+ i2c_delay (b, b->rise_fall_time);
+ b->get_bits (b, &scl, &sda);
+
+ if (scl)
+ return;
+
+ t += b->rise_fall_time;
+ }
+ b->timeout = 1;
+}
+
+static void
+i2c_start (i2c_bus_t * b)
+{
+ b->timeout = 0;
+
+ b->put_bits (b, 1, 1);
+ i2c_wait_for_scl (b);
+
+ if (vlib_i2c_bus_timed_out (b))
+ return;
+
+ b->put_bits (b, 1, 0);
+ i2c_delay (b, b->hold_time);
+ b->put_bits (b, 0, 0);
+ i2c_delay (b, b->hold_time);
+}
+
+static void
+i2c_stop (i2c_bus_t * b)
+{
+ b->put_bits (b, 0, 0);
+ i2c_delay (b, b->rise_fall_time);
+
+ b->put_bits (b, 1, 0);
+ i2c_delay (b, b->hold_time);
+
+ b->put_bits (b, 1, 1);
+ i2c_delay (b, b->hold_time);
+}
+
+static void
+i2c_write_bit (i2c_bus_t * b, int sda)
+{
+ b->put_bits (b, 0, sda);
+ i2c_delay (b, b->rise_fall_time);
+
+ b->put_bits (b, 1, sda);
+ i2c_wait_for_scl (b);
+ i2c_delay (b, b->hold_time);
+
+ b->put_bits (b, 0, sda);
+ i2c_delay (b, b->rise_fall_time);
+}
+
+static void
+i2c_read_bit (i2c_bus_t * b, int *sda)
+{
+ int scl;
+
+ b->put_bits (b, 1, 1);
+ i2c_wait_for_scl (b);
+ i2c_delay (b, b->hold_time);
+
+ b->get_bits (b, &scl, sda);
+
+ b->put_bits (b, 0, 1);
+ i2c_delay (b, b->rise_fall_time);
+}
+
+static void
+i2c_write_byte (i2c_bus_t * b, u8 data)
+{
+ int i, sda;
+
+ for (i = 7; i >= 0; i--)
+ {
+ i2c_write_bit (b, (data >> i) & 1);
+ if (b->timeout)
+ return;
+ }
+
+ b->put_bits (b, 0, 1);
+ i2c_delay (b, b->rise_fall_time);
+
+ i2c_read_bit (b, &sda);
+
+ if (sda)
+ b->timeout = 1;
+}
+
+
+static void
+i2c_read_byte (i2c_bus_t * b, u8 * data, int ack)
+{
+ int i, sda;
+
+ *data = 0;
+
+ b->put_bits (b, 0, 1);
+ i2c_delay (b, b->rise_fall_time);
+
+ for (i = 7; i >= 0; i--)
+ {
+ i2c_read_bit (b, &sda);
+ if (b->timeout)
+ return;
+
+ *data |= (sda != 0) << i;
+ }
+
+ i2c_write_bit (b, ack == 0);
+}
+
+
+void
+vlib_i2c_init (i2c_bus_t * b)
+{
+ f64 tick;
+ if (!b->clock)
+ b->clock = 400000;
+
+ tick = 1.0 / b->clock;
+
+ /* Spend 40% of time in low and high states */
+ if (!b->hold_time)
+ b->hold_time = 0.4 * tick;
+
+ /* Spend 10% of time waiting for rise and fall */
+ if (!b->rise_fall_time)
+ b->rise_fall_time = 0.1 * tick;
+}
+
+void
+vlib_i2c_xfer (i2c_bus_t * bus, i2c_msg_t * msgs)
+{
+ i2c_msg_t *msg;
+ int i;
+
+ vec_foreach (msg, msgs)
+ {
+ i2c_start (bus);
+ i2c_write_byte (bus,
+ (msg->addr << 1) + (msg->flags == I2C_MSG_FLAG_READ));
+
+ if (msg->flags & I2C_MSG_FLAG_READ)
+ for (i = 0; i < msg->len; i++)
+ {
+ i2c_read_byte (bus, &msg->buffer[i], /* ack */ i + 1 != msg->len);
+ if (bus->timeout)
+ goto done;
+ }
+
+ else
+ for (i = 0; i < msg->len; i++)
+ {
+ i2c_write_byte (bus, msg->buffer[i]);
+ if (bus->timeout)
+ goto done;
+ }
+ }
+
+done:
+ i2c_stop (bus);
+}
+
+void
+vlib_i2c_read_eeprom (i2c_bus_t * bus, u8 i2c_addr, u16 start_addr,
+ u16 length, u8 * data)
+{
+ i2c_msg_t *msg = 0;
+ u8 start_address[1];
+
+ vec_validate (msg, 1);
+
+ start_address[0] = start_addr;
+ msg[0].addr = i2c_addr;
+ msg[0].flags = I2C_MSG_FLAG_WRITE;
+ msg[0].buffer = (u8 *) & start_address;
+ msg[0].len = 1;
+
+ msg[1].addr = i2c_addr;
+ msg[1].flags = I2C_MSG_FLAG_READ;
+ msg[1].buffer = data;
+ msg[1].len = length;
+
+ vlib_i2c_xfer (bus, msg);
+
+ vec_free (msg);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/i2c.h b/src/vlib/i2c.h
new file mode 100644
index 00000000000..b79bdc75b81
--- /dev/null
+++ b/src/vlib/i2c.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vlib_i2c_h
+#define included_vlib_i2c_h
+
+#include <vppinfra/types.h>
+
+
+#define I2C_MSG_FLAG_WRITE 0
+#define I2C_MSG_FLAG_READ 1
+
+typedef struct
+{
+ u8 addr;
+ u8 flags;
+ u16 len;
+ u8 *buffer;
+} i2c_msg_t;
+
+typedef struct i2c_bus_t
+{
+ void (*put_bits) (struct i2c_bus_t * b, int scl, int sda);
+ void (*get_bits) (struct i2c_bus_t * b, int *scl, int *sda);
+
+ int timeout;
+ u32 clock;
+ f64 hold_time;
+ f64 rise_fall_time;
+
+ /* Private data */
+ uword private_data;
+
+} i2c_bus_t;
+
+void vlib_i2c_init (i2c_bus_t * bus);
+void vlib_i2c_xfer (i2c_bus_t * bus, i2c_msg_t * msgs);
+void vlib_i2c_read_eeprom (i2c_bus_t * bus, u8 i2c_addr, u16 start_addr,
+ u16 length, u8 * data);
+
+static inline int
+vlib_i2c_bus_timed_out (i2c_bus_t * bus)
+{
+ return bus->timeout;
+}
+
+#endif /* included_vlib_i2c_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/init.c b/src/vlib/init.c
new file mode 100644
index 00000000000..8d4784513ab
--- /dev/null
+++ b/src/vlib/init.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * init.c: mechanism for functions to be called at init/exit.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+clib_error_t *
+vlib_call_init_exit_functions (vlib_main_t * vm,
+ _vlib_init_function_list_elt_t * head,
+ int call_once)
+{
+ clib_error_t *error = 0;
+ _vlib_init_function_list_elt_t *i;
+
+ i = head;
+ while (i)
+ {
+ if (call_once && !hash_get (vm->init_functions_called, i->f))
+ {
+ if (call_once)
+ hash_set1 (vm->init_functions_called, i->f);
+ error = i->f (vm);
+ if (error)
+ return error;
+ }
+ i = i->next_init_function;
+ }
+ return error;
+}
+
+clib_error_t *
+vlib_call_all_init_functions (vlib_main_t * vm)
+{
+ /* Call dummy functions to make sure purely static modules are
+ linked in. */
+#define _(f) vlib_##f##_reference ();
+ foreach_vlib_module_reference;
+#undef _
+
+ return vlib_call_init_exit_functions
+ (vm, vm->init_function_registrations, 1 /* call_once */ );
+}
+
+clib_error_t *
+vlib_call_all_main_loop_enter_functions (vlib_main_t * vm)
+{
+ return vlib_call_init_exit_functions
+ (vm, vm->main_loop_enter_function_registrations, 1 /* call_once */ );
+}
+
+clib_error_t *
+vlib_call_all_main_loop_exit_functions (vlib_main_t * vm)
+{
+ return vlib_call_init_exit_functions
+ (vm, vm->main_loop_exit_function_registrations, 1 /* call_once */ );
+}
+
+clib_error_t *
+vlib_call_all_config_functions (vlib_main_t * vm,
+ unformat_input_t * input, int is_early)
+{
+ clib_error_t *error = 0;
+ vlib_config_function_runtime_t *c, **all;
+ uword *hash = 0, *p;
+ uword i;
+
+ hash = hash_create_string (0, sizeof (uword));
+ all = 0;
+
+ c = vm->config_function_registrations;
+
+ while (c)
+ {
+ hash_set_mem (hash, c->name, vec_len (all));
+ vec_add1 (all, c);
+ unformat_init (&c->input, 0, 0);
+ c = c->next_registration;
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ u8 *s, *v;
+
+ if (!unformat (input, "%s %v", &s, &v) || !(p = hash_get_mem (hash, s)))
+ {
+ error = clib_error_create ("unknown input `%s %v'", s, v);
+ goto done;
+ }
+
+ c = all[p[0]];
+ if (vec_len (c->input.buffer) > 0)
+ vec_add1 (c->input.buffer, ' ');
+ vec_add (c->input.buffer, v, vec_len (v));
+ vec_free (v);
+ vec_free (s);
+ }
+
+ for (i = 0; i < vec_len (all); i++)
+ {
+ c = all[i];
+
+ /* Is this an early config? Are we doing early configs? */
+ if (is_early ^ c->is_early)
+ continue;
+
+ /* Already called? */
+ if (hash_get (vm->init_functions_called, c->function))
+ continue;
+ hash_set1 (vm->init_functions_called, c->function);
+
+ error = c->function (vm, &c->input);
+ if (error)
+ goto done;
+ }
+
+done:
+ for (i = 0; i < vec_len (all); i++)
+ {
+ c = all[i];
+ unformat_free (&c->input);
+ }
+ vec_free (all);
+ hash_free (hash);
+ return error;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/init.h b/src/vlib/init.h
new file mode 100644
index 00000000000..4fa5b304590
--- /dev/null
+++ b/src/vlib/init.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * init.h: mechanism for functions to be called at init/exit.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_init_h
+#define included_vlib_init_h
+
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/hash.h>
+
+/* Init/exit functions: called at start/end of main routine. Init
+ functions are typically used to register and setup packet
+ processing nodes. */
+
+typedef clib_error_t *(vlib_init_function_t) (struct vlib_main_t * vm);
+
+typedef struct _vlib_init_function_list_elt
+{
+ struct _vlib_init_function_list_elt *next_init_function;
+ vlib_init_function_t *f;
+} _vlib_init_function_list_elt_t;
+
+/* Configuration functions: called with configuration input just before
+ main polling loop starts. */
+typedef clib_error_t *(vlib_config_function_t) (struct vlib_main_t * vm,
+ unformat_input_t * input);
+
+typedef struct vlib_config_function_runtime_t
+{
+ /* Function to call. Set to null once function has already been called. */
+ vlib_config_function_t *function;
+
+ /* Input for function. */
+ unformat_input_t input;
+
+ /* next config function registration */
+ struct vlib_config_function_runtime_t *next_registration;
+
+ /* To be invoked as soon as the clib heap is available */
+ u8 is_early;
+
+ /* Name used to distinguish input on command line. */
+ char name[32];
+} vlib_config_function_runtime_t;
+
+#define _VLIB_INIT_FUNCTION_SYMBOL(x, type) \
+ _vlib_##type##_function_##x
+
+#define VLIB_INIT_FUNCTION_SYMBOL(x) \
+ _VLIB_INIT_FUNCTION_SYMBOL(x, init)
+#define VLIB_MAIN_LOOP_ENTER_FUNCTION_SYMBOL(x) \
+ _VLIB_INIT_FUNCTION_SYMBOL(x, main_loop_enter)
+#define VLIB_MAIN_LOOP_EXIT_FUNCTION_SYMBOL(x) \
+ _VLIB_INIT_FUNCTION_SYMBOL(x, main_loop_exit)
+#define VLIB_CONFIG_FUNCTION_SYMBOL(x) \
+ _VLIB_INIT_FUNCTION_SYMBOL(x, config)
+
+/* Declaration is global (e.g. not static) so that init functions can
+ be called from other modules to resolve init function depend. */
+
+#define VLIB_DECLARE_INIT_FUNCTION(x, tag) \
+vlib_init_function_t * _VLIB_INIT_FUNCTION_SYMBOL (x, tag) = x; \
+static void __vlib_add_##tag##_function_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_##tag##_function_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ static _vlib_init_function_list_elt_t _vlib_init_function; \
+ _vlib_init_function.next_init_function \
+ = vm->tag##_function_registrations; \
+ vm->tag##_function_registrations = &_vlib_init_function; \
+ _vlib_init_function.f = &x; \
+}
+
+#define VLIB_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,init)
+
+#define VLIB_MAIN_LOOP_ENTER_FUNCTION(x) \
+ VLIB_DECLARE_INIT_FUNCTION(x,main_loop_enter)
+#define VLIB_MAIN_LOOP_EXIT_FUNCTION(x) \
+VLIB_DECLARE_INIT_FUNCTION(x,main_loop_exit)
+
+#define VLIB_CONFIG_FUNCTION(x,n,...) \
+ __VA_ARGS__ vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL(x); \
+static void __vlib_add_config_function_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_config_function_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ VLIB_CONFIG_FUNCTION_SYMBOL(x).next_registration \
+ = vm->config_function_registrations; \
+ vm->config_function_registrations \
+ = &VLIB_CONFIG_FUNCTION_SYMBOL(x); \
+} \
+ vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL (x) \
+ = { \
+ .name = n, \
+ .function = x, \
+ .is_early = 0, \
+ }
+
+#define VLIB_EARLY_CONFIG_FUNCTION(x,n,...) \
+ __VA_ARGS__ vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL(x); \
+static void __vlib_add_config_function_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_config_function_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ VLIB_CONFIG_FUNCTION_SYMBOL(x).next_registration \
+ = vm->config_function_registrations; \
+ vm->config_function_registrations \
+ = &VLIB_CONFIG_FUNCTION_SYMBOL(x); \
+} \
+ vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL (x) \
+ = { \
+ .name = n, \
+ .function = x, \
+ .is_early = 1, \
+ }
+
+/* Call given init function: used for init function dependencies. */
+#define vlib_call_init_function(vm, x) \
+ ({ \
+ extern vlib_init_function_t * VLIB_INIT_FUNCTION_SYMBOL (x); \
+ vlib_init_function_t * _f = VLIB_INIT_FUNCTION_SYMBOL (x); \
+ clib_error_t * _error = 0; \
+ if (! hash_get (vm->init_functions_called, _f)) \
+ { \
+ hash_set1 (vm->init_functions_called, _f); \
+ _error = _f (vm); \
+ } \
+ _error; \
+ })
+
+/* Don't call given init function: used to suppress parts of the netstack */
+#define vlib_mark_init_function_complete(vm, x) \
+ ({ \
+ extern vlib_init_function_t * VLIB_INIT_FUNCTION_SYMBOL (x); \
+ vlib_init_function_t * _f = VLIB_INIT_FUNCTION_SYMBOL (x); \
+ hash_set1 (vm->init_functions_called, _f); \
+ })
+
+#define vlib_call_post_graph_init_function(vm, x) \
+ ({ \
+ extern vlib_init_function_t * VLIB_POST_GRAPH_INIT_FUNCTION_SYMBOL (x); \
+ vlib_init_function_t * _f = VLIB_POST_GRAPH_INIT_FUNCTION_SYMBOL (x); \
+ clib_error_t * _error = 0; \
+ if (! hash_get (vm->init_functions_called, _f)) \
+ { \
+ hash_set1 (vm->init_functions_called, _f); \
+ _error = _f (vm); \
+ } \
+ _error; \
+ })
+
+#define vlib_call_config_function(vm, x) \
+ ({ \
+ vlib_config_function_runtime_t * _r; \
+ clib_error_t * _error = 0; \
+ extern vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL (x); \
+ \
+ _r = &VLIB_CONFIG_FUNCTION_SYMBOL (x); \
+ if (! hash_get (vm->init_functions_called, _r->function)) \
+ { \
+ hash_set1 (vm->init_functions_called, _r->function); \
+ _error = _r->function (vm, &_r->input); \
+ } \
+ _error; \
+ })
+
+/* External functions. */
+clib_error_t *vlib_call_all_init_functions (struct vlib_main_t *vm);
+clib_error_t *vlib_call_all_config_functions (struct vlib_main_t *vm,
+ unformat_input_t * input,
+ int is_early);
+clib_error_t *vlib_call_all_main_loop_enter_functions (struct vlib_main_t
+ *vm);
+clib_error_t *vlib_call_all_main_loop_exit_functions (struct vlib_main_t *vm);
+clib_error_t *vlib_call_init_exit_functions (struct vlib_main_t *vm,
+ _vlib_init_function_list_elt_t *
+ head, int call_once);
+
+#define foreach_vlib_module_reference \
+ _ (node_cli) \
+ _ (trace_cli)
+
+/* Dummy function to get node_cli.c linked in. */
+#define _(x) void vlib_##x##_reference (void);
+foreach_vlib_module_reference
+#undef _
+#endif /* included_vlib_init_h */
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/lex.c b/src/vlib/lex.c
new file mode 100644
index 00000000000..1cc8f1678d2
--- /dev/null
+++ b/src/vlib/lex.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vlib/lex.h>
+
+vlib_lex_main_t vlib_lex_main;
+
+#define LEX_DEBUG 0
+
+u8 *
+format_vlib_lex_token (u8 * s, va_list * args)
+{
+ vlib_lex_main_t *lm = va_arg (*args, vlib_lex_main_t *);
+ vlib_lex_token_t *t = va_arg (*args, vlib_lex_token_t *);
+
+ if (t->token == VLIB_LEX_word)
+ s = format (s, "%s", t->value.as_pointer);
+ else
+ s = format (s, "%s", lm->lex_token_names[t->token]);
+ return s;
+}
+
+void
+vlib_lex_get_token (vlib_lex_main_t * lm, vlib_lex_token_t * rv)
+{
+ u8 c;
+ vlib_lex_table_t *t;
+ vlib_lex_table_entry_t *e;
+ uword tv;
+
+ if (PREDICT_FALSE (lm->pushback_sp >= 0))
+ {
+ rv[0] = lm->pushback_vector[lm->pushback_sp--];
+ return;
+ }
+
+ rv->value.as_uword = ~0;
+
+ while (1)
+ {
+ if (PREDICT_FALSE (lm->current_index >= vec_len (lm->input_vector)))
+ {
+ rv->token = VLIB_LEX_eof;
+ return;
+ }
+
+ t = vec_elt_at_index (lm->lex_tables, lm->current_table_index);
+ c = (lm->input_vector[lm->current_index++]) & 0x7f;
+ e = &t->entries[c];
+ lm->current_table_index = e->next_table_index;
+
+ switch (e->action)
+ {
+ case VLIB_LEX_IGNORE:
+ continue;
+
+ case VLIB_LEX_START_NUMBER:
+ lm->current_token_value = 0;
+ /* fallthru */
+
+ case VLIB_LEX_ADD_TO_NUMBER:
+ lm->current_number_base = e->token;
+ lm->current_token_value *= lm->current_number_base;
+ tv = c - '0';
+ if (tv >= lm->current_number_base)
+ {
+ tv = 10 + c - 'A';
+ if (tv >= lm->current_number_base)
+ tv = 10 + c - 'a';
+ }
+ lm->current_token_value += tv;
+ continue;
+
+ case VLIB_LEX_ADD_TO_TOKEN:
+ vec_add1 (lm->token_buffer, c);
+ continue;
+
+ case VLIB_LEX_KEYWORD_CHECK:
+ {
+ uword *p;
+
+ vec_add1 (lm->token_buffer, 0);
+
+ /* It's either a keyword or just a word. */
+ p = hash_get_mem (lm->lex_keywords, lm->token_buffer);
+ if (p)
+ {
+ rv->token = p[0];
+ if (LEX_DEBUG > 0)
+ clib_warning ("keyword '%s' token %s",
+ lm->token_buffer,
+ lm->lex_token_names[rv->token]);
+ }
+ else
+ {
+ /* it's a WORD */
+ rv->token = VLIB_LEX_word;
+ rv->value.as_pointer = vec_dup (lm->token_buffer);
+ if (LEX_DEBUG > 0)
+ clib_warning ("%s, value '%s'",
+ lm->lex_token_names[VLIB_LEX_word],
+ rv->value.as_pointer);
+ }
+ _vec_len (lm->token_buffer) = 0;
+
+ /* Rescan the character which terminated the keyword/word. */
+ lm->current_index--;
+ return;
+ }
+
+ case VLIB_LEX_RETURN_AND_RESCAN:
+ ASSERT (lm->current_index);
+ lm->current_index--;
+ /* note flow-through */
+
+ case VLIB_LEX_RETURN:
+ rv->token = e->token;
+ rv->value.as_uword = lm->current_token_value;
+ lm->current_token_value = ~0;
+ if (LEX_DEBUG > 0)
+ {
+ clib_warning
+ ("table %s char '%c'(0x%02x) next table %s return %s",
+ t->name, c, c, lm->lex_tables[e->next_table_index].name,
+ lm->lex_token_names[e->token]);
+ if (rv->token == VLIB_LEX_number)
+ clib_warning (" numeric value 0x%x (%d)", rv->value,
+ rv->value);
+ }
+ return;
+ }
+ }
+}
+
+u16
+vlib_lex_add_token (vlib_lex_main_t * lm, char *token_name)
+{
+ uword *p;
+ u16 rv;
+
+ p = hash_get_mem (lm->lex_tokens_by_name, token_name);
+
+ if (p)
+ return p[0];
+
+ rv = vec_len (lm->lex_token_names);
+ hash_set_mem (lm->lex_tokens_by_name, token_name, rv);
+ vec_add1 (lm->lex_token_names, token_name);
+
+ return rv;
+}
+
+static u16
+add_keyword (vlib_lex_main_t * lm, char *keyword, char *token_name)
+{
+ uword *p;
+ u16 token;
+
+ p = hash_get_mem (lm->lex_keywords, keyword);
+
+ ASSERT (p == 0);
+
+ token = vlib_lex_add_token (lm, token_name);
+
+ hash_set_mem (lm->lex_keywords, keyword, token);
+ return token;
+}
+
+u16
+vlib_lex_find_or_add_keyword (vlib_lex_main_t * lm, char *keyword,
+ char *token_name)
+{
+ uword *p = hash_get_mem (lm->lex_keywords, keyword);
+ return p ? p[0] : add_keyword (lm, keyword, token_name);
+}
+
+void
+vlib_lex_set_action_range (u32 table_index, u8 lo, u8 hi, u16 action,
+ u16 token, u32 next_table_index)
+{
+ int i;
+ vlib_lex_main_t *lm = &vlib_lex_main;
+ vlib_lex_table_t *t = pool_elt_at_index (lm->lex_tables, table_index);
+
+ for (i = lo; i <= hi; i++)
+ {
+ ASSERT (i < ARRAY_LEN (t->entries));
+ t->entries[i].action = action;
+ t->entries[i].token = token;
+ t->entries[i].next_table_index = next_table_index;
+ }
+}
+
+u16
+vlib_lex_add_table (char *name)
+{
+ vlib_lex_main_t *lm = &vlib_lex_main;
+ vlib_lex_table_t *t;
+ uword *p;
+
+ p = hash_get_mem (lm->lex_tables_by_name, name);
+
+ ASSERT (p == 0);
+
+ pool_get_aligned (lm->lex_tables, t, CLIB_CACHE_LINE_BYTES);
+
+ t->name = name;
+
+ hash_set_mem (lm->lex_tables_by_name, name, t - lm->lex_tables);
+
+ vlib_lex_set_action_range (t - lm->lex_tables, 1, 0x7F, VLIB_LEX_IGNORE, ~0,
+ t - lm->lex_tables);
+
+ vlib_lex_set_action_range (t - lm->lex_tables, 0, 0, VLIB_LEX_RETURN,
+ VLIB_LEX_eof, t - lm->lex_tables);
+
+ return t - lm->lex_tables;
+}
+
+void
+vlib_lex_reset (vlib_lex_main_t * lm, u8 * input_vector)
+{
+ if (lm->pushback_vector)
+ _vec_len (lm->pushback_vector) = 0;
+ lm->pushback_sp = -1;
+
+ lm->input_vector = input_vector;
+ lm->current_index = 0;
+}
+
+static clib_error_t *
+lex_onetime_init (vlib_main_t * vm)
+{
+ vlib_lex_main_t *lm = &vlib_lex_main;
+
+ lm->lex_tables_by_name = hash_create_string (0, sizeof (uword));
+ lm->lex_tokens_by_name = hash_create_string (0, sizeof (uword));
+ lm->lex_keywords = hash_create_string (0, sizeof (uword));
+ lm->pushback_sp = -1;
+
+#define _(f) { u16 tmp = vlib_lex_add_token (lm, #f); ASSERT (tmp == VLIB_LEX_##f); }
+ foreach_vlib_lex_global_token;
+#undef _
+
+ vec_validate (lm->token_buffer, 127);
+ _vec_len (lm->token_buffer) = 0;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (lex_onetime_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/lex.h b/src/vlib/lex.h
new file mode 100644
index 00000000000..4ae58f468c1
--- /dev/null
+++ b/src/vlib/lex.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vlib_lex_h
+#define included_vlib_lex_h
+
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/error.h>
+#include <vppinfra/pool.h>
+
+#define foreach_vlib_lex_global_token \
+ _ (invalid) \
+ _ (eof) \
+ _ (word) \
+ _ (number) \
+ _ (lt) \
+ _ (gt) \
+ _ (dot) \
+ _ (slash) \
+ _ (qmark) \
+ _ (equals) \
+ _ (plus) \
+ _ (minus) \
+ _ (star) \
+ _ (lpar) \
+ _ (rpar)
+
+typedef enum
+{
+#define _(f) VLIB_LEX_##f,
+ foreach_vlib_lex_global_token
+#undef _
+} vlib_lex_global_token_t;
+
+typedef enum
+{
+ VLIB_LEX_IGNORE,
+ VLIB_LEX_ADD_TO_TOKEN,
+ VLIB_LEX_RETURN,
+ VLIB_LEX_RETURN_AND_RESCAN,
+ VLIB_LEX_KEYWORD_CHECK,
+ VLIB_LEX_START_NUMBER,
+ VLIB_LEX_ADD_TO_NUMBER,
+} vlib_lex_action_t;
+
+typedef struct
+{
+ u16 action;
+ u16 next_table_index;
+ u16 token;
+} vlib_lex_table_entry_t;
+
+typedef struct
+{
+ char *name;
+ vlib_lex_table_entry_t entries[128];
+} vlib_lex_table_t;
+
+typedef struct
+{
+ u32 token;
+
+ union
+ {
+ uword as_uword;
+ void *as_pointer;
+ char *as_string;
+ } value;
+} vlib_lex_token_t;
+
+typedef struct
+{
+ vlib_lex_table_t *lex_tables;
+ uword *lex_tables_by_name;
+
+ /* Vector of token strings. */
+ char **lex_token_names;
+
+ /* Hash mapping c string name to token index. */
+ uword *lex_tokens_by_name;
+
+ /* Hash mapping c string keyword name to token index. */
+ uword *lex_keywords;
+
+ vlib_lex_token_t *pushback_vector;
+
+ i32 pushback_sp;
+
+ u32 current_table_index;
+
+ uword current_token_value;
+
+ uword current_number_base;
+
+ /* Input string we are lex-ing. */
+ u8 *input_vector;
+
+ /* Current index into input vector. */
+ u32 current_index;
+
+ /* Re-used vector for forming token strings and hashing them. */
+ u8 *token_buffer;
+} vlib_lex_main_t;
+
+vlib_lex_main_t vlib_lex_main;
+
+always_inline void
+vlib_lex_cleanup_token (vlib_lex_token_t * t)
+{
+ if (t->token == VLIB_LEX_word)
+ {
+ u8 *tv = t->value.as_pointer;
+ vec_free (tv);
+ }
+}
+
+u16 vlib_lex_add_table (char *name);
+void vlib_lex_get_token (vlib_lex_main_t * lm, vlib_lex_token_t * result);
+u16 vlib_lex_add_token (vlib_lex_main_t * lm, char *token_name);
+void vlib_lex_set_action_range (u32 table_index, u8 lo, u8 hi, u16 action,
+ u16 token, u32 next_table_index);
+void vlib_lex_reset (vlib_lex_main_t * lm, u8 * input_vector);
+format_function_t format_vlib_lex_token;
+
+#endif /* included_vlib_lex_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/main.c b/src/vlib/main.c
new file mode 100644
index 00000000000..6c6cad98bba
--- /dev/null
+++ b/src/vlib/main.c
@@ -0,0 +1,1703 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * main.c: main vector processing loop
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <math.h>
+#include <vppinfra/format.h>
+#include <vlib/vlib.h>
+#include <vlib/threads.h>
+
+#include <vlib/unix/cj.h>
+
+CJ_GLOBAL_LOG_PROTOTYPE;
+
+/* Actually allocate a few extra slots of vector data to support
+ speculative vector enqueues which overflow vector data in next frame. */
+#define VLIB_FRAME_SIZE_ALLOC (VLIB_FRAME_SIZE + 4)
+
+u32 wraps;
+
+always_inline u32
+vlib_frame_bytes (u32 n_scalar_bytes, u32 n_vector_bytes)
+{
+ u32 n_bytes;
+
+ /* Make room for vlib_frame_t plus scalar arguments. */
+ n_bytes = vlib_frame_vector_byte_offset (n_scalar_bytes);
+
+ /* Make room for vector arguments.
+ Allocate a few extra slots of vector data to support
+ speculative vector enqueues which overflow vector data in next frame. */
+#define VLIB_FRAME_SIZE_EXTRA 4
+ n_bytes += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * n_vector_bytes;
+
+ /* Magic number is first 32bit number after vector data.
+ Used to make sure that vector data is never overrun. */
+#define VLIB_FRAME_MAGIC (0xabadc0ed)
+ n_bytes += sizeof (u32);
+
+ /* Pad to cache line. */
+ n_bytes = round_pow2 (n_bytes, CLIB_CACHE_LINE_BYTES);
+
+ return n_bytes;
+}
+
+always_inline u32 *
+vlib_frame_find_magic (vlib_frame_t * f, vlib_node_t * node)
+{
+ void *p = f;
+
+ p += vlib_frame_vector_byte_offset (node->scalar_size);
+
+ p += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * node->vector_size;
+
+ return p;
+}
+
+static vlib_frame_size_t *
+get_frame_size_info (vlib_node_main_t * nm,
+ u32 n_scalar_bytes, u32 n_vector_bytes)
+{
+ uword key = (n_scalar_bytes << 16) | n_vector_bytes;
+ uword *p, i;
+
+ p = hash_get (nm->frame_size_hash, key);
+ if (p)
+ i = p[0];
+ else
+ {
+ i = vec_len (nm->frame_sizes);
+ vec_validate (nm->frame_sizes, i);
+ hash_set (nm->frame_size_hash, key, i);
+ }
+
+ return vec_elt_at_index (nm->frame_sizes, i);
+}
+
+static u32
+vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index,
+ u32 frame_flags)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_frame_size_t *fs;
+ vlib_node_t *to_node;
+ vlib_frame_t *f;
+ u32 fi, l, n, scalar_size, vector_size;
+
+ to_node = vlib_get_node (vm, to_node_index);
+
+ scalar_size = to_node->scalar_size;
+ vector_size = to_node->vector_size;
+
+ fs = get_frame_size_info (nm, scalar_size, vector_size);
+ n = vlib_frame_bytes (scalar_size, vector_size);
+ if ((l = vec_len (fs->free_frame_indices)) > 0)
+ {
+ /* Allocate from end of free list. */
+ fi = fs->free_frame_indices[l - 1];
+ f = vlib_get_frame_no_check (vm, fi);
+ _vec_len (fs->free_frame_indices) = l - 1;
+ }
+ else
+ {
+ f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN);
+ f->cpu_index = vm->cpu_index;
+ fi = vlib_frame_index_no_check (vm, f);
+ }
+
+ /* Poison frame when debugging. */
+ if (CLIB_DEBUG > 0)
+ {
+ u32 save_cpu_index = f->cpu_index;
+
+ memset (f, 0xfe, n);
+
+ f->cpu_index = save_cpu_index;
+ }
+
+ /* Insert magic number. */
+ {
+ u32 *magic;
+
+ magic = vlib_frame_find_magic (f, to_node);
+ *magic = VLIB_FRAME_MAGIC;
+ }
+
+ f->flags = VLIB_FRAME_IS_ALLOCATED | frame_flags;
+ f->n_vectors = 0;
+ f->scalar_size = scalar_size;
+ f->vector_size = vector_size;
+
+ fs->n_alloc_frames += 1;
+
+ return fi;
+}
+
+/* Allocate a frame for from FROM_NODE to TO_NODE via TO_NEXT_INDEX.
+ Returns frame index. */
+static u32
+vlib_frame_alloc (vlib_main_t * vm, vlib_node_runtime_t * from_node_runtime,
+ u32 to_next_index)
+{
+ vlib_node_t *from_node;
+
+ from_node = vlib_get_node (vm, from_node_runtime->node_index);
+ ASSERT (to_next_index < vec_len (from_node->next_nodes));
+
+ return vlib_frame_alloc_to_node (vm, from_node->next_nodes[to_next_index],
+ /* frame_flags */ 0);
+}
+
+vlib_frame_t *
+vlib_get_frame_to_node (vlib_main_t * vm, u32 to_node_index)
+{
+ u32 fi = vlib_frame_alloc_to_node (vm, to_node_index,
+ /* frame_flags */
+ VLIB_FRAME_FREE_AFTER_DISPATCH);
+ return vlib_get_frame (vm, fi);
+}
+
+void
+vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_t * f)
+{
+ vlib_pending_frame_t *p;
+ vlib_node_t *to_node;
+
+ if (f->n_vectors == 0)
+ return;
+
+ to_node = vlib_get_node (vm, to_node_index);
+
+ vec_add2 (vm->node_main.pending_frames, p, 1);
+
+ f->flags |= VLIB_FRAME_PENDING;
+ p->frame_index = vlib_frame_index (vm, f);
+ p->node_runtime_index = to_node->runtime_index;
+ p->next_frame_index = VLIB_PENDING_FRAME_NO_NEXT_FRAME;
+}
+
+/* Free given frame. */
+void
+vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *node;
+ vlib_frame_size_t *fs;
+ u32 frame_index;
+
+ ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+
+ node = vlib_get_node (vm, r->node_index);
+ fs = get_frame_size_info (nm, node->scalar_size, node->vector_size);
+
+ frame_index = vlib_frame_index (vm, f);
+
+ ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+
+ /* No next frames may point to freed frame. */
+ if (CLIB_DEBUG > 0)
+ {
+ vlib_next_frame_t *nf;
+ vec_foreach (nf, vm->node_main.next_frames)
+ ASSERT (nf->frame_index != frame_index);
+ }
+
+ f->flags &= ~VLIB_FRAME_IS_ALLOCATED;
+
+ vec_add1 (fs->free_frame_indices, frame_index);
+ ASSERT (fs->n_alloc_frames > 0);
+ fs->n_alloc_frames -= 1;
+}
+
+static clib_error_t *
+show_frame_stats (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_frame_size_t *fs;
+
+ vlib_cli_output (vm, "%=6s%=12s%=12s", "Size", "# Alloc", "# Free");
+ vec_foreach (fs, nm->frame_sizes)
+ {
+ u32 n_alloc = fs->n_alloc_frames;
+ u32 n_free = vec_len (fs->free_frame_indices);
+
+ if (n_alloc + n_free > 0)
+ vlib_cli_output (vm, "%=6d%=12d%=12d",
+ fs - nm->frame_sizes, n_alloc, n_free);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_frame_stats_cli, static) = {
+ .path = "show vlib frame-allocation",
+ .short_help = "Show node dispatch frame statistics",
+ .function = show_frame_stats,
+};
+/* *INDENT-ON* */
+
+/* Change ownership of enqueue rights to given next node. */
+static void
+vlib_next_frame_change_ownership (vlib_main_t * vm,
+ vlib_node_runtime_t * node_runtime,
+ u32 next_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_next_frame_t *next_frame;
+ vlib_node_t *node, *next_node;
+
+ node = vec_elt (nm->nodes, node_runtime->node_index);
+
+ /* Only internal & input nodes are allowed to call other nodes. */
+ ASSERT (node->type == VLIB_NODE_TYPE_INTERNAL
+ || node->type == VLIB_NODE_TYPE_INPUT
+ || node->type == VLIB_NODE_TYPE_PROCESS);
+
+ ASSERT (vec_len (node->next_nodes) == node_runtime->n_next_nodes);
+
+ next_frame =
+ vlib_node_runtime_get_next_frame (vm, node_runtime, next_index);
+ next_node = vec_elt (nm->nodes, node->next_nodes[next_index]);
+
+ if (next_node->owner_node_index != VLIB_INVALID_NODE_INDEX)
+ {
+ /* Get frame from previous owner. */
+ vlib_next_frame_t *owner_next_frame;
+ vlib_next_frame_t tmp;
+
+ owner_next_frame =
+ vlib_node_get_next_frame (vm,
+ next_node->owner_node_index,
+ next_node->owner_next_index);
+
+ /* Swap target next frame with owner's. */
+ tmp = owner_next_frame[0];
+ owner_next_frame[0] = next_frame[0];
+ next_frame[0] = tmp;
+
+ /*
+ * If next_frame is already pending, we have to track down
+ * all pending frames and fix their next_frame_index fields.
+ */
+ if (next_frame->flags & VLIB_FRAME_PENDING)
+ {
+ vlib_pending_frame_t *p;
+ if (next_frame->frame_index != ~0)
+ {
+ vec_foreach (p, nm->pending_frames)
+ {
+ if (p->frame_index == next_frame->frame_index)
+ {
+ p->next_frame_index =
+ next_frame - vm->node_main.next_frames;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ /* No previous owner. Take ownership. */
+ next_frame->flags |= VLIB_FRAME_OWNER;
+ }
+
+ /* Record new owner. */
+ next_node->owner_node_index = node->index;
+ next_node->owner_next_index = next_index;
+
+ /* Now we should be owner. */
+ ASSERT (next_frame->flags & VLIB_FRAME_OWNER);
+}
+
+/* Make sure that magic number is still there.
+ Otherwise, it is likely that caller has overrun frame arguments. */
+always_inline void
+validate_frame_magic (vlib_main_t * vm,
+ vlib_frame_t * f, vlib_node_t * n, uword next_index)
+{
+ vlib_node_t *next_node = vlib_get_node (vm, n->next_nodes[next_index]);
+ u32 *magic = vlib_frame_find_magic (f, next_node);
+ ASSERT (VLIB_FRAME_MAGIC == magic[0]);
+}
+
+vlib_frame_t *
+vlib_get_next_frame_internal (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 next_index, u32 allocate_new_next_frame)
+{
+ vlib_frame_t *f;
+ vlib_next_frame_t *nf;
+ u32 n_used;
+
+ nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
+
+ /* Make sure this next frame owns right to enqueue to destination frame. */
+ if (PREDICT_FALSE (!(nf->flags & VLIB_FRAME_OWNER)))
+ vlib_next_frame_change_ownership (vm, node, next_index);
+
+ /* ??? Don't need valid flag: can use frame_index == ~0 */
+ if (PREDICT_FALSE (!(nf->flags & VLIB_FRAME_IS_ALLOCATED)))
+ {
+ nf->frame_index = vlib_frame_alloc (vm, node, next_index);
+ nf->flags |= VLIB_FRAME_IS_ALLOCATED;
+ }
+
+ f = vlib_get_frame (vm, nf->frame_index);
+
+ /* Has frame been removed from pending vector (e.g. finished dispatching)?
+ If so we can reuse frame. */
+ if ((nf->flags & VLIB_FRAME_PENDING) && !(f->flags & VLIB_FRAME_PENDING))
+ {
+ nf->flags &= ~VLIB_FRAME_PENDING;
+ f->n_vectors = 0;
+ }
+
+ /* Allocate new frame if current one is already full. */
+ n_used = f->n_vectors;
+ if (n_used >= VLIB_FRAME_SIZE || (allocate_new_next_frame && n_used > 0))
+ {
+ /* Old frame may need to be freed after dispatch, since we'll have
+ two redundant frames from node -> next node. */
+ if (!(nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH))
+ {
+ vlib_frame_t *f_old = vlib_get_frame (vm, nf->frame_index);
+ f_old->flags |= VLIB_FRAME_FREE_AFTER_DISPATCH;
+ }
+
+ /* Allocate new frame to replace full one. */
+ nf->frame_index = vlib_frame_alloc (vm, node, next_index);
+ f = vlib_get_frame (vm, nf->frame_index);
+ n_used = f->n_vectors;
+ }
+
+ /* Should have free vectors in frame now. */
+ ASSERT (n_used < VLIB_FRAME_SIZE);
+
+ if (CLIB_DEBUG > 0)
+ {
+ validate_frame_magic (vm, f,
+ vlib_get_node (vm, node->node_index), next_index);
+ }
+
+ return f;
+}
+
+static void
+vlib_put_next_frame_validate (vlib_main_t * vm,
+ vlib_node_runtime_t * rt,
+ u32 next_index, u32 n_vectors_left)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ vlib_node_runtime_t *next_rt;
+ vlib_node_t *next_node;
+ u32 n_before, n_after;
+
+ nf = vlib_node_runtime_get_next_frame (vm, rt, next_index);
+ f = vlib_get_frame (vm, nf->frame_index);
+
+ ASSERT (n_vectors_left <= VLIB_FRAME_SIZE);
+ n_after = VLIB_FRAME_SIZE - n_vectors_left;
+ n_before = f->n_vectors;
+
+ ASSERT (n_after >= n_before);
+
+ next_rt = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
+ nf->node_runtime_index);
+ next_node = vlib_get_node (vm, next_rt->node_index);
+ if (n_after > 0 && next_node->validate_frame)
+ {
+ u8 *msg = next_node->validate_frame (vm, rt, f);
+ if (msg)
+ {
+ clib_warning ("%v", msg);
+ ASSERT (0);
+ }
+ vec_free (msg);
+ }
+}
+
+void
+vlib_put_next_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ u32 next_index, u32 n_vectors_left)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ u32 n_vectors_in_frame;
+
+ if (DPDK == 0 && CLIB_DEBUG > 0)
+ vlib_put_next_frame_validate (vm, r, next_index, n_vectors_left);
+
+ nf = vlib_node_runtime_get_next_frame (vm, r, next_index);
+ f = vlib_get_frame (vm, nf->frame_index);
+
+ /* Make sure that magic number is still there. Otherwise, caller
+ has overrun frame meta data. */
+ if (CLIB_DEBUG > 0)
+ {
+ vlib_node_t *node = vlib_get_node (vm, r->node_index);
+ validate_frame_magic (vm, f, node, next_index);
+ }
+
+ /* Convert # of vectors left -> number of vectors there. */
+ ASSERT (n_vectors_left <= VLIB_FRAME_SIZE);
+ n_vectors_in_frame = VLIB_FRAME_SIZE - n_vectors_left;
+
+ f->n_vectors = n_vectors_in_frame;
+
+ /* If vectors were added to frame, add to pending vector. */
+ if (PREDICT_TRUE (n_vectors_in_frame > 0))
+ {
+ vlib_pending_frame_t *p;
+ u32 v0, v1;
+
+ r->cached_next_index = next_index;
+
+ if (!(f->flags & VLIB_FRAME_PENDING))
+ {
+ __attribute__ ((unused)) vlib_node_t *node;
+ vlib_node_t *next_node;
+ vlib_node_runtime_t *next_runtime;
+
+ node = vlib_get_node (vm, r->node_index);
+ next_node = vlib_get_next_node (vm, r->node_index, next_index);
+ next_runtime = vlib_node_get_runtime (vm, next_node->index);
+
+ vec_add2 (nm->pending_frames, p, 1);
+
+ p->frame_index = nf->frame_index;
+ p->node_runtime_index = nf->node_runtime_index;
+ p->next_frame_index = nf - nm->next_frames;
+ nf->flags |= VLIB_FRAME_PENDING;
+ f->flags |= VLIB_FRAME_PENDING;
+
+ /*
+ * If we're going to dispatch this frame on another thread,
+ * force allocation of a new frame. Otherwise, we create
+ * a dangling frame reference. Each thread has its own copy of
+ * the next_frames vector.
+ */
+ if (0 && r->cpu_index != next_runtime->cpu_index)
+ {
+ nf->frame_index = ~0;
+ nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED);
+ }
+ }
+
+ /* Copy trace flag from next_frame and from runtime. */
+ nf->flags |=
+ (nf->flags & VLIB_NODE_FLAG_TRACE) | (r->
+ flags & VLIB_NODE_FLAG_TRACE);
+
+ v0 = nf->vectors_since_last_overflow;
+ v1 = v0 + n_vectors_in_frame;
+ nf->vectors_since_last_overflow = v1;
+ if (PREDICT_FALSE (v1 < v0))
+ {
+ vlib_node_t *node = vlib_get_node (vm, r->node_index);
+ vec_elt (node->n_vectors_by_next_node, next_index) += v0;
+ }
+ }
+}
+
+/* Sync up runtime (32 bit counters) and main node stats (64 bit counters). */
+never_inline void
+vlib_node_runtime_sync_stats (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ uword n_calls, uword n_vectors, uword n_clocks)
+{
+ vlib_node_t *n = vlib_get_node (vm, r->node_index);
+
+ n->stats_total.calls += n_calls + r->calls_since_last_overflow;
+ n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow;
+ n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow;
+ n->stats_total.max_clock = r->max_clock;
+ n->stats_total.max_clock_n = r->max_clock_n;
+
+ r->calls_since_last_overflow = 0;
+ r->vectors_since_last_overflow = 0;
+ r->clocks_since_last_overflow = 0;
+}
+
+always_inline void __attribute__ ((unused))
+vlib_process_sync_stats (vlib_main_t * vm,
+ vlib_process_t * p,
+ uword n_calls, uword n_vectors, uword n_clocks)
+{
+ vlib_node_runtime_t *rt = &p->node_runtime;
+ vlib_node_t *n = vlib_get_node (vm, rt->node_index);
+ vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks);
+ n->stats_total.suspends += p->n_suspends;
+ p->n_suspends = 0;
+}
+
+void
+vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n)
+{
+ vlib_node_runtime_t *rt;
+
+ if (n->type == VLIB_NODE_TYPE_PROCESS)
+ {
+ /* Nothing to do for PROCESS nodes except in main thread */
+ if (vm != &vlib_global_main)
+ return;
+
+ vlib_process_t *p = vlib_get_process_from_node (vm, n);
+ n->stats_total.suspends += p->n_suspends;
+ p->n_suspends = 0;
+ rt = &p->node_runtime;
+ }
+ else
+ rt =
+ vec_elt_at_index (vm->node_main.nodes_by_type[n->type],
+ n->runtime_index);
+
+ vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0);
+
+ /* Sync up runtime next frame vector counters with main node structure. */
+ {
+ vlib_next_frame_t *nf;
+ uword i;
+ for (i = 0; i < rt->n_next_nodes; i++)
+ {
+ nf = vlib_node_runtime_get_next_frame (vm, rt, i);
+ vec_elt (n->n_vectors_by_next_node, i) +=
+ nf->vectors_since_last_overflow;
+ nf->vectors_since_last_overflow = 0;
+ }
+ }
+}
+
+always_inline u32
+vlib_node_runtime_update_stats (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ uword n_calls,
+ uword n_vectors, uword n_clocks)
+{
+ u32 ca0, ca1, v0, v1, cl0, cl1, r;
+
+ cl0 = cl1 = node->clocks_since_last_overflow;
+ ca0 = ca1 = node->calls_since_last_overflow;
+ v0 = v1 = node->vectors_since_last_overflow;
+
+ ca1 = ca0 + n_calls;
+ v1 = v0 + n_vectors;
+ cl1 = cl0 + n_clocks;
+
+ node->calls_since_last_overflow = ca1;
+ node->clocks_since_last_overflow = cl1;
+ node->vectors_since_last_overflow = v1;
+ node->max_clock_n = node->max_clock > n_clocks ?
+ node->max_clock_n : n_vectors;
+ node->max_clock = node->max_clock > n_clocks ? node->max_clock : n_clocks;
+
+ r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors);
+
+ if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0))
+ {
+ node->calls_since_last_overflow = ca0;
+ node->clocks_since_last_overflow = cl0;
+ node->vectors_since_last_overflow = v0;
+ vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks);
+ }
+
+ return r;
+}
+
+always_inline void
+vlib_process_update_stats (vlib_main_t * vm,
+ vlib_process_t * p,
+ uword n_calls, uword n_vectors, uword n_clocks)
+{
+ vlib_node_runtime_update_stats (vm, &p->node_runtime,
+ n_calls, n_vectors, n_clocks);
+}
+
+static clib_error_t *
+vlib_cli_elog_clear (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ elog_reset_buffer (&vm->elog_main);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (elog_clear_cli, static) = {
+ .path = "event-logger clear",
+ .short_help = "Clear the event log",
+ .function = vlib_cli_elog_clear,
+};
+/* *INDENT-ON* */
+
+#ifdef CLIB_UNIX
+static clib_error_t *
+elog_save_buffer (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ elog_main_t *em = &vm->elog_main;
+ char *file, *chroot_file;
+ clib_error_t *error = 0;
+
+ if (!unformat (input, "%s", &file))
+ {
+ vlib_cli_output (vm, "expected file name, got `%U'",
+ format_unformat_error, input);
+ return 0;
+ }
+
+ /* It's fairly hard to get "../oopsie" through unformat; just in case */
+ if (strstr (file, "..") || index (file, '/'))
+ {
+ vlib_cli_output (vm, "illegal characters in filename '%s'", file);
+ return 0;
+ }
+
+ chroot_file = (char *) format (0, "/tmp/%s%c", file, 0);
+
+ vec_free (file);
+
+ vlib_cli_output (vm, "Saving %wd of %wd events to %s",
+ elog_n_events_in_buffer (em),
+ elog_buffer_capacity (em), chroot_file);
+
+ vlib_worker_thread_barrier_sync (vm);
+ error = elog_write_file (em, chroot_file);
+ vlib_worker_thread_barrier_release (vm);
+ vec_free (chroot_file);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (elog_save_cli, static) = {
+ .path = "event-logger save",
+ .short_help = "event-logger save <filename> (saves log in /tmp/<filename>)",
+ .function = elog_save_buffer,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+elog_stop (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ elog_main_t *em = &vm->elog_main;
+
+ em->n_total_events_disable_limit = em->n_total_events;
+
+ vlib_cli_output (vm, "Stopped the event logger...");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (elog_stop_cli, static) = {
+ .path = "event-logger stop",
+ .short_help = "Stop the event-logger",
+ .function = elog_stop,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+elog_restart (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ elog_main_t *em = &vm->elog_main;
+
+ em->n_total_events_disable_limit = ~0;
+
+ vlib_cli_output (vm, "Restarted the event logger...");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (elog_restart_cli, static) = {
+ .path = "event-logger restart",
+ .short_help = "Restart the event-logger",
+ .function = elog_restart,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+elog_resize (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ elog_main_t *em = &vm->elog_main;
+ u32 tmp;
+
+ /* Stop the parade */
+ elog_reset_buffer (&vm->elog_main);
+
+ if (unformat (input, "%d", &tmp))
+ {
+ elog_alloc (em, tmp);
+ em->n_total_events_disable_limit = ~0;
+ }
+ else
+ return clib_error_return (0, "Must specify how many events in the ring");
+
+ vlib_cli_output (vm, "Resized ring and restarted the event logger...");
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (elog_resize_cli, static) = {
+ .path = "event-logger resize",
+ .short_help = "event-logger resize <nnn>",
+ .function = elog_resize,
+};
+/* *INDENT-ON* */
+
+#endif /* CLIB_UNIX */
+
+static void
+elog_show_buffer_internal (vlib_main_t * vm, u32 n_events_to_show)
+{
+ elog_main_t *em = &vm->elog_main;
+ elog_event_t *e, *es;
+ f64 dt;
+
+ /* Show events in VLIB time since log clock starts after VLIB clock. */
+ dt = (em->init_time.cpu - vm->clib_time.init_cpu_time)
+ * vm->clib_time.seconds_per_clock;
+
+ es = elog_peek_events (em);
+ vlib_cli_output (vm, "%d of %d events in buffer, logger %s", vec_len (es),
+ em->event_ring_size,
+ em->n_total_events < em->n_total_events_disable_limit ?
+ "running" : "stopped");
+ vec_foreach (e, es)
+ {
+ vlib_cli_output (vm, "%18.9f: %U",
+ e->time + dt, format_elog_event, em, e);
+ n_events_to_show--;
+ if (n_events_to_show == 0)
+ break;
+ }
+ vec_free (es);
+
+}
+
+static clib_error_t *
+elog_show_buffer (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ u32 n_events_to_show;
+ clib_error_t *error = 0;
+
+ n_events_to_show = 250;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &n_events_to_show))
+ ;
+ else if (unformat (input, "all"))
+ n_events_to_show = ~0;
+ else
+ return unformat_parse_error (input);
+ }
+ elog_show_buffer_internal (vm, n_events_to_show);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (elog_show_cli, static) = {
+ .path = "show event-logger",
+ .short_help = "Show event logger info",
+ .function = elog_show_buffer,
+};
+/* *INDENT-ON* */
+
+void
+vlib_gdb_show_event_log (void)
+{
+ elog_show_buffer_internal (vlib_get_main (), (u32) ~ 0);
+}
+
+static inline void
+vlib_elog_main_loop_event (vlib_main_t * vm,
+ u32 node_index,
+ u64 time, u32 n_vectors, u32 is_return)
+{
+ vlib_main_t *evm = &vlib_global_main;
+ elog_main_t *em = &evm->elog_main;
+
+ if (VLIB_ELOG_MAIN_LOOP && n_vectors)
+ elog_track (em,
+ /* event type */
+ vec_elt_at_index (is_return
+ ? evm->node_return_elog_event_types
+ : evm->node_call_elog_event_types,
+ node_index),
+ /* track */
+ (vm->cpu_index ? &vlib_worker_threads[vm->cpu_index].
+ elog_track : &em->default_track),
+ /* data to log */ n_vectors);
+}
+
+void
+vlib_dump_context_trace (vlib_main_t * vm, u32 bi)
+{
+ vlib_node_main_t *vnm = &vm->node_main;
+ vlib_buffer_t *b;
+ u8 i, n;
+
+ if (VLIB_BUFFER_TRACE_TRAJECTORY)
+ {
+ b = vlib_get_buffer (vm, bi);
+ n = b->pre_data[0];
+
+ fformat (stderr, "Context trace for bi %d b 0x%llx, visited %d\n",
+ bi, b, n);
+
+ if (n == 0 || n > 20)
+ {
+ fformat (stderr, "n is unreasonable\n");
+ return;
+ }
+
+
+ for (i = 0; i < n; i++)
+ {
+ u32 node_index;
+
+ node_index = b->pre_data[i + 1];
+
+ if (node_index > vec_len (vnm->nodes))
+ {
+ fformat (stderr, "Skip bogus node index %d\n", node_index);
+ continue;
+ }
+
+ fformat (stderr, "%v (%d)\n", vnm->nodes[node_index]->name,
+ node_index);
+ }
+ }
+ else
+ {
+ fformat (stderr,
+ "in vlib/buffers.h, #define VLIB_BUFFER_TRACE_TRAJECTORY 1\n");
+ }
+}
+
+
+/* static_always_inline */ u64
+dispatch_node (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_node_type_t type,
+ vlib_node_state_t dispatch_state,
+ vlib_frame_t * frame, u64 last_time_stamp)
+{
+ uword n, v;
+ u64 t;
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_next_frame_t *nf;
+
+ if (CLIB_DEBUG > 0)
+ {
+ vlib_node_t *n = vlib_get_node (vm, node->node_index);
+ ASSERT (n->type == type);
+ }
+
+ /* Only non-internal nodes may be disabled. */
+ if (type != VLIB_NODE_TYPE_INTERNAL && node->state != dispatch_state)
+ {
+ ASSERT (type != VLIB_NODE_TYPE_INTERNAL);
+ return last_time_stamp;
+ }
+
+ if ((type == VLIB_NODE_TYPE_PRE_INPUT || type == VLIB_NODE_TYPE_INPUT)
+ && dispatch_state != VLIB_NODE_STATE_INTERRUPT)
+ {
+ u32 c = node->input_main_loops_per_call;
+ /* Only call node when count reaches zero. */
+ if (c)
+ {
+ node->input_main_loops_per_call = c - 1;
+ return last_time_stamp;
+ }
+ }
+
+ /* Speculatively prefetch next frames. */
+ if (node->n_next_nodes > 0)
+ {
+ nf = vec_elt_at_index (nm->next_frames, node->next_frame_index);
+ CLIB_PREFETCH (nf, 4 * sizeof (nf[0]), WRITE);
+ }
+
+ vm->cpu_time_last_node_dispatch = last_time_stamp;
+
+ if (1 /* || vm->cpu_index == node->cpu_index */ )
+ {
+ vlib_main_t *stat_vm;
+
+ stat_vm = /* vlib_mains ? vlib_mains[0] : */ vm;
+
+ vlib_elog_main_loop_event (vm, node->node_index,
+ last_time_stamp,
+ frame ? frame->n_vectors : 0,
+ /* is_after */ 0);
+
+ /*
+ * Turn this on if you run into
+ * "bad monkey" contexts, and you want to know exactly
+ * which nodes they've visited... See ixge.c...
+ */
+ if (VLIB_BUFFER_TRACE_TRAJECTORY && frame)
+ {
+ int i;
+ int log_index;
+ u32 *from;
+ from = vlib_frame_vector_args (frame);
+ for (i = 0; i < frame->n_vectors; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, from[i]);
+ ASSERT (b->pre_data[0] < 32);
+ log_index = b->pre_data[0]++ + 1;
+ b->pre_data[log_index] = node->node_index;
+ }
+ n = node->function (vm, node, frame);
+ }
+ else
+ n = node->function (vm, node, frame);
+
+ t = clib_cpu_time_now ();
+
+ vlib_elog_main_loop_event (vm, node->node_index, t, n, /* is_after */
+ 1);
+
+ vm->main_loop_vectors_processed += n;
+ vm->main_loop_nodes_processed += n > 0;
+
+ v = vlib_node_runtime_update_stats (stat_vm, node,
+ /* n_calls */ 1,
+ /* n_vectors */ n,
+ /* n_clocks */ t - last_time_stamp);
+
+ /* When in interrupt mode and vector rate crosses threshold switch to
+ polling mode. */
+ if ((DPDK == 0 && dispatch_state == VLIB_NODE_STATE_INTERRUPT)
+ || (DPDK == 0 && dispatch_state == VLIB_NODE_STATE_POLLING
+ && (node->flags
+ & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)))
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .function = (char *) __FUNCTION__,.format =
+ "%s vector length %d, switching to %s",.format_args =
+ "T4i4t4",.n_enum_strings = 2,.enum_strings =
+ {
+ "interrupt", "polling",},};
+ struct
+ {
+ u32 node_name, vector_length, is_polling;
+ } *ed;
+
+ if (dispatch_state == VLIB_NODE_STATE_INTERRUPT
+ && v >= nm->polling_threshold_vector_length)
+ {
+ vlib_node_t *n = vlib_get_node (vm, node->node_index);
+ n->state = VLIB_NODE_STATE_POLLING;
+ node->state = VLIB_NODE_STATE_POLLING;
+ ASSERT (!
+ (node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE));
+ node->flags &=
+ ~VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE;
+ node->flags |=
+ VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE;
+ nm->input_node_counts_by_state[VLIB_NODE_STATE_INTERRUPT] -= 1;
+ nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] += 1;
+
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->node_name = n->name_elog_string;
+ ed->vector_length = v;
+ ed->is_polling = 1;
+ }
+ else if (dispatch_state == VLIB_NODE_STATE_POLLING
+ && v <= nm->interrupt_threshold_vector_length)
+ {
+ vlib_node_t *n = vlib_get_node (vm, node->node_index);
+ if (node->flags &
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE)
+ {
+ /* Switch to interrupt mode after dispatch in polling one more time.
+ This allows driver to re-enable interrupts. */
+ n->state = VLIB_NODE_STATE_INTERRUPT;
+ node->state = VLIB_NODE_STATE_INTERRUPT;
+ node->flags &=
+ ~VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE;
+ nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] -=
+ 1;
+ nm->input_node_counts_by_state[VLIB_NODE_STATE_INTERRUPT] +=
+ 1;
+
+ }
+ else
+ {
+ node->flags |=
+ VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->node_name = n->name_elog_string;
+ ed->vector_length = v;
+ ed->is_polling = 0;
+ }
+ }
+ }
+ }
+
+ return t;
+}
+
+/* static */ u64
+dispatch_pending_node (vlib_main_t * vm,
+ vlib_pending_frame_t * p, u64 last_time_stamp)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_frame_t *f;
+ vlib_next_frame_t *nf, nf_dummy;
+ vlib_node_runtime_t *n;
+ u32 restore_frame_index;
+
+ n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
+ p->node_runtime_index);
+
+ f = vlib_get_frame (vm, p->frame_index);
+ if (p->next_frame_index == VLIB_PENDING_FRAME_NO_NEXT_FRAME)
+ {
+ /* No next frame: so use dummy on stack. */
+ nf = &nf_dummy;
+ nf->flags = f->flags & VLIB_NODE_FLAG_TRACE;
+ nf->frame_index = ~p->frame_index;
+ }
+ else
+ nf = vec_elt_at_index (nm->next_frames, p->next_frame_index);
+
+ ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+
+ /* Force allocation of new frame while current frame is being
+ dispatched. */
+ restore_frame_index = ~0;
+ if (nf->frame_index == p->frame_index)
+ {
+ nf->frame_index = ~0;
+ nf->flags &= ~VLIB_FRAME_IS_ALLOCATED;
+ if (!(n->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH))
+ restore_frame_index = p->frame_index;
+ }
+
+ /* Frame must be pending. */
+ ASSERT (f->flags & VLIB_FRAME_PENDING);
+ ASSERT (f->n_vectors > 0);
+
+ /* Copy trace flag from next frame to node.
+ Trace flag indicates that at least one vector in the dispatched
+ frame is traced. */
+ n->flags &= ~VLIB_NODE_FLAG_TRACE;
+ n->flags |= (nf->flags & VLIB_FRAME_TRACE) ? VLIB_NODE_FLAG_TRACE : 0;
+ nf->flags &= ~VLIB_FRAME_TRACE;
+
+ last_time_stamp = dispatch_node (vm, n,
+ VLIB_NODE_TYPE_INTERNAL,
+ VLIB_NODE_STATE_POLLING,
+ f, last_time_stamp);
+
+ f->flags &= ~VLIB_FRAME_PENDING;
+
+ /* Frame is ready to be used again, so restore it. */
+ if (restore_frame_index != ~0)
+ {
+ /* we musn't restore a frame that is flagged to be freed. This shouldn't
+ happen since frames to be freed post dispatch are those used
+ when the to-node frame becomes full i.e. they form a sort of queue of
+ frames to a single node. If we get here then the to-node frame and the
+ pending frame *were* the same, and so we removed the to-node frame.
+ Therefore this frame is no longer part of the queue for that node
+ and hence it cannot be it's overspill.
+ */
+ ASSERT (!(f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH));
+
+ /* p->next_frame_index can change during node dispatch if node
+ function decides to change graph hook up. */
+ nf = vec_elt_at_index (nm->next_frames, p->next_frame_index);
+ nf->flags |= VLIB_FRAME_IS_ALLOCATED;
+
+ if (~0 == nf->frame_index)
+ {
+ /* no new frame has been assigned to this node, use the saved one */
+ nf->frame_index = restore_frame_index;
+ f->n_vectors = 0;
+ }
+ else
+ {
+ /* The node has gained a frame, implying packets from the current frame
+ were re-queued to this same node. we don't need the saved one
+ anymore */
+ vlib_frame_free (vm, n, f);
+ }
+ }
+ else
+ {
+ if (f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH)
+ {
+ ASSERT (!(n->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH));
+ vlib_frame_free (vm, n, f);
+ }
+ }
+
+ return last_time_stamp;
+}
+
+always_inline uword
+vlib_process_stack_is_valid (vlib_process_t * p)
+{
+ return p->stack[0] == VLIB_PROCESS_STACK_MAGIC;
+}
+
+typedef struct
+{
+ vlib_main_t *vm;
+ vlib_process_t *process;
+ vlib_frame_t *frame;
+} vlib_process_bootstrap_args_t;
+
+/* Called in process stack. */
+static uword
+vlib_process_bootstrap (uword _a)
+{
+ vlib_process_bootstrap_args_t *a;
+ vlib_main_t *vm;
+ vlib_node_runtime_t *node;
+ vlib_frame_t *f;
+ vlib_process_t *p;
+ uword n;
+
+ a = uword_to_pointer (_a, vlib_process_bootstrap_args_t *);
+
+ vm = a->vm;
+ p = a->process;
+ f = a->frame;
+ node = &p->node_runtime;
+
+ n = node->function (vm, node, f);
+
+ ASSERT (vlib_process_stack_is_valid (p));
+
+ clib_longjmp (&p->return_longjmp, n);
+
+ return n;
+}
+
+/* Called in main stack. */
+static_always_inline uword
+vlib_process_startup (vlib_main_t * vm, vlib_process_t * p, vlib_frame_t * f)
+{
+ vlib_process_bootstrap_args_t a;
+ uword r;
+
+ a.vm = vm;
+ a.process = p;
+ a.frame = f;
+
+ r = clib_setjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_RETURN);
+ if (r == VLIB_PROCESS_RETURN_LONGJMP_RETURN)
+ r = clib_calljmp (vlib_process_bootstrap, pointer_to_uword (&a),
+ (void *) p->stack + (1 << p->log2_n_stack_bytes));
+
+ return r;
+}
+
+static_always_inline uword
+vlib_process_resume (vlib_process_t * p)
+{
+ uword r;
+ p->flags &= ~(VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT
+ | VLIB_PROCESS_RESUME_PENDING);
+ r = clib_setjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_RETURN);
+ if (r == VLIB_PROCESS_RETURN_LONGJMP_RETURN)
+ clib_longjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_RESUME);
+ return r;
+}
+
+static u64
+dispatch_process (vlib_main_t * vm,
+ vlib_process_t * p, vlib_frame_t * f, u64 last_time_stamp)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_runtime_t *node_runtime = &p->node_runtime;
+ vlib_node_t *node = vlib_get_node (vm, node_runtime->node_index);
+ u64 t;
+ uword n_vectors, is_suspend;
+
+ if (node->state != VLIB_NODE_STATE_POLLING
+ || (p->flags & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT)))
+ return last_time_stamp;
+
+ p->flags |= VLIB_PROCESS_IS_RUNNING;
+
+ t = last_time_stamp;
+ vlib_elog_main_loop_event (vm, node_runtime->node_index, t,
+ f ? f->n_vectors : 0, /* is_after */ 0);
+
+ /* Save away current process for suspend. */
+ nm->current_process_index = node->runtime_index;
+
+ n_vectors = vlib_process_startup (vm, p, f);
+
+ nm->current_process_index = ~0;
+
+ ASSERT (n_vectors != VLIB_PROCESS_RETURN_LONGJMP_RETURN);
+ is_suspend = n_vectors == VLIB_PROCESS_RETURN_LONGJMP_SUSPEND;
+ if (is_suspend)
+ {
+ vlib_pending_frame_t *pf;
+
+ n_vectors = 0;
+ pool_get (nm->suspended_process_frames, pf);
+ pf->node_runtime_index = node->runtime_index;
+ pf->frame_index = f ? vlib_frame_index (vm, f) : ~0;
+ pf->next_frame_index = ~0;
+
+ p->n_suspends += 1;
+ p->suspended_process_frame_index = pf - nm->suspended_process_frames;
+
+ if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK)
+ timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time,
+ vlib_timing_wheel_data_set_suspended_process
+ (node->runtime_index));
+ }
+ else
+ p->flags &= ~VLIB_PROCESS_IS_RUNNING;
+
+ t = clib_cpu_time_now ();
+
+ vlib_elog_main_loop_event (vm, node_runtime->node_index, t, is_suspend,
+ /* is_after */ 1);
+
+ vlib_process_update_stats (vm, p,
+ /* n_calls */ !is_suspend,
+ /* n_vectors */ n_vectors,
+ /* n_clocks */ t - last_time_stamp);
+
+ return t;
+}
+
+void
+vlib_start_process (vlib_main_t * vm, uword process_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p = vec_elt (nm->processes, process_index);
+ dispatch_process (vm, p, /* frame */ 0, /* cpu_time_now */ 0);
+}
+
+static u64
+dispatch_suspended_process (vlib_main_t * vm,
+ uword process_index, u64 last_time_stamp)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_runtime_t *node_runtime;
+ vlib_node_t *node;
+ vlib_frame_t *f;
+ vlib_process_t *p;
+ vlib_pending_frame_t *pf;
+ u64 t, n_vectors, is_suspend;
+
+ t = last_time_stamp;
+
+ p = vec_elt (nm->processes, process_index);
+ if (PREDICT_FALSE (!(p->flags & VLIB_PROCESS_IS_RUNNING)))
+ return last_time_stamp;
+
+ ASSERT (p->flags & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT));
+
+ pf =
+ pool_elt_at_index (nm->suspended_process_frames,
+ p->suspended_process_frame_index);
+
+ node_runtime = &p->node_runtime;
+ node = vlib_get_node (vm, node_runtime->node_index);
+ f = pf->frame_index != ~0 ? vlib_get_frame (vm, pf->frame_index) : 0;
+
+ vlib_elog_main_loop_event (vm, node_runtime->node_index, t,
+ f ? f->n_vectors : 0, /* is_after */ 0);
+
+ /* Save away current process for suspend. */
+ nm->current_process_index = node->runtime_index;
+
+ n_vectors = vlib_process_resume (p);
+ t = clib_cpu_time_now ();
+
+ nm->current_process_index = ~0;
+
+ is_suspend = n_vectors == VLIB_PROCESS_RETURN_LONGJMP_SUSPEND;
+ if (is_suspend)
+ {
+ /* Suspend it again. */
+ n_vectors = 0;
+ p->n_suspends += 1;
+ if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK)
+ timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time,
+ vlib_timing_wheel_data_set_suspended_process
+ (node->runtime_index));
+ }
+ else
+ {
+ p->flags &= ~VLIB_PROCESS_IS_RUNNING;
+ p->suspended_process_frame_index = ~0;
+ pool_put (nm->suspended_process_frames, pf);
+ }
+
+ t = clib_cpu_time_now ();
+ vlib_elog_main_loop_event (vm, node_runtime->node_index, t, !is_suspend,
+ /* is_after */ 1);
+
+ vlib_process_update_stats (vm, p,
+ /* n_calls */ !is_suspend,
+ /* n_vectors */ n_vectors,
+ /* n_clocks */ t - last_time_stamp);
+
+ return t;
+}
+
+static void
+vlib_main_loop (vlib_main_t * vm)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ uword i;
+ u64 cpu_time_now;
+
+ /* Initialize pending node vector. */
+ vec_resize (nm->pending_frames, 32);
+ _vec_len (nm->pending_frames) = 0;
+
+ /* Mark time of main loop start. */
+ cpu_time_now = vm->clib_time.last_cpu_time;
+ vm->cpu_time_main_loop_start = cpu_time_now;
+
+ /* Arrange for first level of timing wheel to cover times we care
+ most about. */
+ nm->timing_wheel.min_sched_time = 10e-6;
+ nm->timing_wheel.max_sched_time = 10e-3;
+ timing_wheel_init (&nm->timing_wheel,
+ cpu_time_now, vm->clib_time.clocks_per_second);
+
+ /* Pre-allocate expired nodes. */
+ vec_alloc (nm->data_from_advancing_timing_wheel, 32);
+ vec_alloc (nm->pending_interrupt_node_runtime_indices, 32);
+
+ if (!nm->polling_threshold_vector_length)
+ nm->polling_threshold_vector_length = 10;
+ if (!nm->interrupt_threshold_vector_length)
+ nm->interrupt_threshold_vector_length = 5;
+
+ nm->current_process_index = ~0;
+
+ /* Start all processes. */
+ {
+ uword i;
+ for (i = 0; i < vec_len (nm->processes); i++)
+ cpu_time_now =
+ dispatch_process (vm, nm->processes[i], /* frame */ 0, cpu_time_now);
+ }
+
+ while (1)
+ {
+ vlib_node_runtime_t *n;
+
+ /* Process pre-input nodes. */
+ vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
+ cpu_time_now = dispatch_node (vm, n,
+ VLIB_NODE_TYPE_PRE_INPUT,
+ VLIB_NODE_STATE_POLLING,
+ /* frame */ 0,
+ cpu_time_now);
+
+ /* Next process input nodes. */
+ vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+ cpu_time_now = dispatch_node (vm, n,
+ VLIB_NODE_TYPE_INPUT,
+ VLIB_NODE_STATE_POLLING,
+ /* frame */ 0,
+ cpu_time_now);
+
+ if (PREDICT_TRUE (vm->queue_signal_pending == 0))
+ vm->queue_signal_callback (vm);
+
+ /* Next handle interrupts. */
+ {
+ uword l = _vec_len (nm->pending_interrupt_node_runtime_indices);
+ uword i;
+ if (l > 0)
+ {
+ _vec_len (nm->pending_interrupt_node_runtime_indices) = 0;
+ for (i = 0; i < l; i++)
+ {
+ n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
+ nm->
+ pending_interrupt_node_runtime_indices
+ [i]);
+ cpu_time_now =
+ dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
+ VLIB_NODE_STATE_INTERRUPT,
+ /* frame */ 0,
+ cpu_time_now);
+ }
+ }
+ }
+
+ /* Check if process nodes have expired from timing wheel. */
+ nm->data_from_advancing_timing_wheel
+ = timing_wheel_advance (&nm->timing_wheel, cpu_time_now,
+ nm->data_from_advancing_timing_wheel,
+ &nm->cpu_time_next_process_ready);
+
+ ASSERT (nm->data_from_advancing_timing_wheel != 0);
+ if (PREDICT_FALSE (_vec_len (nm->data_from_advancing_timing_wheel) > 0))
+ {
+ uword i;
+
+ processes_timing_wheel_data:
+ for (i = 0; i < _vec_len (nm->data_from_advancing_timing_wheel);
+ i++)
+ {
+ u32 d = nm->data_from_advancing_timing_wheel[i];
+ u32 di = vlib_timing_wheel_data_get_index (d);
+
+ if (vlib_timing_wheel_data_is_timed_event (d))
+ {
+ vlib_signal_timed_event_data_t *te =
+ pool_elt_at_index (nm->signal_timed_event_data_pool, di);
+ vlib_node_t *n = vlib_get_node (vm, te->process_node_index);
+ vlib_process_t *p =
+ vec_elt (nm->processes, n->runtime_index);
+ void *data;
+ data =
+ vlib_process_signal_event_helper (nm, n, p,
+ te->event_type_index,
+ te->n_data_elts,
+ te->n_data_elt_bytes);
+ if (te->n_data_bytes < sizeof (te->inline_event_data))
+ clib_memcpy (data, te->inline_event_data,
+ te->n_data_bytes);
+ else
+ {
+ clib_memcpy (data, te->event_data_as_vector,
+ te->n_data_bytes);
+ vec_free (te->event_data_as_vector);
+ }
+ pool_put (nm->signal_timed_event_data_pool, te);
+ }
+ else
+ {
+ cpu_time_now = clib_cpu_time_now ();
+ cpu_time_now =
+ dispatch_suspended_process (vm, di, cpu_time_now);
+ }
+ }
+
+ /* Reset vector. */
+ _vec_len (nm->data_from_advancing_timing_wheel) = 0;
+ }
+
+ /* Input nodes may have added work to the pending vector.
+ Process pending vector until there is nothing left.
+ All pending vectors will be processed from input -> output. */
+ for (i = 0; i < _vec_len (nm->pending_frames); i++)
+ cpu_time_now = dispatch_pending_node (vm, nm->pending_frames + i,
+ cpu_time_now);
+ /* Reset pending vector for next iteration. */
+ _vec_len (nm->pending_frames) = 0;
+
+ /* Pending internal nodes may resume processes. */
+ if (_vec_len (nm->data_from_advancing_timing_wheel) > 0)
+ goto processes_timing_wheel_data;
+
+ vlib_increment_main_loop_counter (vm);
+
+ /* Record time stamp in case there are no enabled nodes and above
+ calls do not update time stamp. */
+ cpu_time_now = clib_cpu_time_now ();
+ }
+}
+
+vlib_main_t vlib_global_main;
+
+static clib_error_t *
+vlib_main_configure (vlib_main_t * vm, unformat_input_t * input)
+{
+ int turn_on_mem_trace = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "memory-trace"))
+ turn_on_mem_trace = 1;
+
+ else if (unformat (input, "elog-events %d",
+ &vm->elog_main.event_ring_size))
+ ;
+ else
+ return unformat_parse_error (input);
+ }
+
+ unformat_free (input);
+
+ /* Enable memory trace as early as possible. */
+ if (turn_on_mem_trace)
+ clib_mem_trace (1);
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (vlib_main_configure, "vlib");
+
+static void
+dummy_queue_signal_callback (vlib_main_t * vm)
+{
+}
+
+/* Main function. */
+int
+vlib_main (vlib_main_t * volatile vm, unformat_input_t * input)
+{
+ clib_error_t *volatile error;
+
+ vm->queue_signal_callback = dummy_queue_signal_callback;
+
+ clib_time_init (&vm->clib_time);
+
+ /* Turn on event log. */
+ if (!vm->elog_main.event_ring_size)
+ vm->elog_main.event_ring_size = 128 << 10;
+ elog_init (&vm->elog_main, vm->elog_main.event_ring_size);
+ elog_enable_disable (&vm->elog_main, 1);
+
+ /* Default name. */
+ if (!vm->name)
+ vm->name = "VLIB";
+
+ vec_validate (vm->buffer_main, 0);
+
+ if ((error = vlib_thread_init (vm)))
+ {
+ clib_error_report (error);
+ goto done;
+ }
+
+ /* Register static nodes so that init functions may use them. */
+ vlib_register_all_static_nodes (vm);
+
+ /* Set seed for random number generator.
+ Allow user to specify seed to make random sequence deterministic. */
+ if (!unformat (input, "seed %wd", &vm->random_seed))
+ vm->random_seed = clib_cpu_time_now ();
+ clib_random_buffer_init (&vm->random_buffer, vm->random_seed);
+
+ /* Initialize node graph. */
+ if ((error = vlib_node_main_init (vm)))
+ {
+ /* Arrange for graph hook up error to not be fatal when debugging. */
+ if (CLIB_DEBUG > 0)
+ clib_error_report (error);
+ else
+ goto done;
+ }
+
+ /* See unix/main.c; most likely already set up */
+ if (vm->init_functions_called == 0)
+ vm->init_functions_called = hash_create (0, /* value bytes */ 0);
+ if ((error = vlib_call_all_init_functions (vm)))
+ goto done;
+
+ /* Create default buffer free list. */
+ vlib_buffer_get_or_create_free_list (vm,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
+ "default");
+
+ switch (clib_setjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_NONE))
+ {
+ case VLIB_MAIN_LOOP_EXIT_NONE:
+ vm->main_loop_exit_set = 1;
+ break;
+
+ case VLIB_MAIN_LOOP_EXIT_CLI:
+ goto done;
+
+ default:
+ error = vm->main_loop_error;
+ goto done;
+ }
+
+ if ((error = vlib_call_all_config_functions (vm, input, 0 /* is_early */ )))
+ goto done;
+
+ /* Call all main loop enter functions. */
+ {
+ clib_error_t *sub_error;
+ sub_error = vlib_call_all_main_loop_enter_functions (vm);
+ if (sub_error)
+ clib_error_report (sub_error);
+ }
+
+ vlib_main_loop (vm);
+
+done:
+ /* Call all exit functions. */
+ {
+ clib_error_t *sub_error;
+ sub_error = vlib_call_all_main_loop_exit_functions (vm);
+ if (sub_error)
+ clib_error_report (sub_error);
+ }
+
+ if (error)
+ clib_error_report (error);
+
+ return 0;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/main.h b/src/vlib/main.h
new file mode 100644
index 00000000000..d9ac1445ddd
--- /dev/null
+++ b/src/vlib/main.h
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * main.h: VLIB main data structure
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_main_h
+#define included_vlib_main_h
+
+#include <vppinfra/elog.h>
+#include <vppinfra/format.h>
+#include <vppinfra/longjmp.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/random_buffer.h>
+#include <vppinfra/time.h>
+
+#include <pthread.h>
+
+
+/* By default turn off node/error event logging.
+ Override with -DVLIB_ELOG_MAIN_LOOP */
+#ifndef VLIB_ELOG_MAIN_LOOP
+#define VLIB_ELOG_MAIN_LOOP 0
+#endif
+
+typedef struct vlib_main_t
+{
+ /* Instruction level timing state. */
+ clib_time_t clib_time;
+
+ /* Time stamp of last node dispatch. */
+ u64 cpu_time_last_node_dispatch;
+
+ /* Time stamp when main loop was entered (time 0). */
+ u64 cpu_time_main_loop_start;
+
+ /* Incremented once for each main loop. */
+ u32 main_loop_count;
+
+ /* Count of vectors processed this main loop. */
+ u32 main_loop_vectors_processed;
+ u32 main_loop_nodes_processed;
+
+ /* Circular buffer of input node vector counts.
+ Indexed by low bits of
+ (main_loop_count >> VLIB_LOG2_INPUT_VECTORS_PER_MAIN_LOOP). */
+ u32 vector_counts_per_main_loop[2];
+ u32 node_counts_per_main_loop[2];
+
+ /* Every so often we switch to the next counter. */
+#define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7
+
+ /* Jump target to exit main loop with given code. */
+ u32 main_loop_exit_set;
+ clib_longjmp_t main_loop_exit;
+#define VLIB_MAIN_LOOP_EXIT_NONE 0
+#define VLIB_MAIN_LOOP_EXIT_PANIC 1
+ /* Exit via CLI. */
+#define VLIB_MAIN_LOOP_EXIT_CLI 2
+
+ /* Error marker to use when exiting main loop. */
+ clib_error_t *main_loop_error;
+
+ /* Name for e.g. syslog. */
+ char *name;
+
+ /* Start and size of CLIB heap. */
+ void *heap_base;
+ uword heap_size;
+
+ vlib_buffer_main_t *buffer_main;
+
+ vlib_physmem_main_t physmem_main;
+
+ /* Allocate/free buffer memory for DMA transfers, descriptor rings, etc.
+ buffer memory is guaranteed to be cache-aligned. */
+ void *(*os_physmem_alloc_aligned) (vlib_physmem_main_t * pm,
+ uword n_bytes, uword alignment);
+ void (*os_physmem_free) (void *x);
+
+ /* Node graph main structure. */
+ vlib_node_main_t node_main;
+
+ /* Command line interface. */
+ vlib_cli_main_t cli_main;
+
+ /* Packet trace buffer. */
+ vlib_trace_main_t trace_main;
+
+ /* Error handling. */
+ vlib_error_main_t error_main;
+
+ /* Punt packets to underlying operating system for when fast switching
+ code does not know what to do. */
+ void (*os_punt_frame) (struct vlib_main_t * vm,
+ struct vlib_node_runtime_t * node,
+ vlib_frame_t * frame);
+
+ /* Multicast distribution. Set to zero for MC disabled. */
+ mc_main_t *mc_main;
+
+ /* Stream index to use for distribution when MC is enabled. */
+ u32 mc_stream_index;
+
+ vlib_one_time_waiting_process_t *procs_waiting_for_mc_stream_join;
+
+ /* Event logger. */
+ elog_main_t elog_main;
+
+ /* Node call and return event types. */
+ elog_event_type_t *node_call_elog_event_types;
+ elog_event_type_t *node_return_elog_event_types;
+
+ elog_event_type_t *error_elog_event_types;
+
+ /* Seed for random number generator. */
+ uword random_seed;
+
+ /* Buffer of random data for various uses. */
+ clib_random_buffer_t random_buffer;
+
+ /* Hash table to record which init functions have been called. */
+ uword *init_functions_called;
+
+ /* to compare with node runtime */
+ u32 cpu_index;
+
+ void **mbuf_alloc_list;
+
+ /* List of init functions to call, setup by constructors */
+ _vlib_init_function_list_elt_t *init_function_registrations;
+ _vlib_init_function_list_elt_t *main_loop_enter_function_registrations;
+ _vlib_init_function_list_elt_t *main_loop_exit_function_registrations;
+ _vlib_init_function_list_elt_t *api_init_function_registrations;
+ vlib_config_function_runtime_t *config_function_registrations;
+ mc_serialize_msg_t *mc_msg_registrations; /* mc_main is a pointer... */
+
+ /* control-plane API queue signal pending, length indication */
+ volatile u32 queue_signal_pending;
+ volatile u32 api_queue_nonempty;
+ void (*queue_signal_callback) (struct vlib_main_t *);
+ u8 **argv;
+} vlib_main_t;
+
+/* Global main structure. */
+extern vlib_main_t vlib_global_main;
+
+always_inline f64
+vlib_time_now (vlib_main_t * vm)
+{
+ return clib_time_now (&vm->clib_time);
+}
+
+always_inline f64
+vlib_time_now_ticks (vlib_main_t * vm, u64 n)
+{
+ return clib_time_now_internal (&vm->clib_time, n);
+}
+
+/* Busy wait for specified time. */
+always_inline void
+vlib_time_wait (vlib_main_t * vm, f64 wait)
+{
+ f64 t = vlib_time_now (vm);
+ f64 limit = t + wait;
+ while (t < limit)
+ t = vlib_time_now (vm);
+}
+
+/* Time a piece of code. */
+#define vlib_time_code(vm,body) \
+do { \
+ f64 _t[2]; \
+ _t[0] = vlib_time_now (vm); \
+ do { body; } while (0); \
+ _t[1] = vlib_time_now (vm); \
+ clib_warning ("%.7e", _t[1] - _t[0]); \
+} while (0)
+
+#define vlib_wait_with_timeout(vm,suspend_time,timeout_time,test) \
+({ \
+ uword __vlib_wait_with_timeout = 0; \
+ f64 __vlib_wait_time = 0; \
+ while (! (__vlib_wait_with_timeout = (test)) \
+ && __vlib_wait_time < (timeout_time)) \
+ { \
+ vlib_process_suspend (vm, suspend_time); \
+ __vlib_wait_time += suspend_time; \
+ } \
+ __vlib_wait_with_timeout; \
+})
+
+always_inline void
+vlib_panic_with_error (vlib_main_t * vm, clib_error_t * error)
+{
+ vm->main_loop_error = error;
+ clib_longjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_PANIC);
+}
+
+#define vlib_panic_with_msg(vm,args...) \
+ vlib_panic_with_error (vm, clib_error_return (0, args))
+
+always_inline void
+vlib_panic (vlib_main_t * vm)
+{
+ vlib_panic_with_error (vm, 0);
+}
+
+always_inline u32
+vlib_vector_input_stats_index (vlib_main_t * vm, word delta)
+{
+ u32 i;
+ i = vm->main_loop_count >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
+ ASSERT (is_pow2 (ARRAY_LEN (vm->vector_counts_per_main_loop)));
+ return (i + delta) & (ARRAY_LEN (vm->vector_counts_per_main_loop) - 1);
+}
+
+/* Estimate input rate based on previous
+ 2^VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE
+ samples. */
+always_inline u32
+vlib_last_vectors_per_main_loop (vlib_main_t * vm)
+{
+ u32 i = vlib_vector_input_stats_index (vm, -1);
+ u32 n = vm->vector_counts_per_main_loop[i];
+ return n >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
+}
+
+/* Total ave vector count per iteration of main loop. */
+always_inline f64
+vlib_last_vectors_per_main_loop_as_f64 (vlib_main_t * vm)
+{
+ u32 i = vlib_vector_input_stats_index (vm, -1);
+ u32 v = vm->vector_counts_per_main_loop[i];
+ return (f64) v / (f64) (1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE);
+}
+
+/* Total ave vectors/node count per iteration of main loop. */
+always_inline f64
+vlib_last_vector_length_per_node (vlib_main_t * vm)
+{
+ u32 i = vlib_vector_input_stats_index (vm, -1);
+ u32 v = vm->vector_counts_per_main_loop[i];
+ u32 n = vm->node_counts_per_main_loop[i];
+ return n == 0 ? 0 : (f64) v / (f64) n;
+}
+
+extern u32 wraps;
+
+always_inline void
+vlib_increment_main_loop_counter (vlib_main_t * vm)
+{
+ u32 i, c, n, v, is_wrap;
+
+ c = vm->main_loop_count++;
+
+ is_wrap = (c & pow2_mask (VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE)) == 0;
+
+ if (is_wrap)
+ wraps++;
+
+ i = vlib_vector_input_stats_index (vm, /* delta */ is_wrap);
+
+ v = is_wrap ? 0 : vm->vector_counts_per_main_loop[i];
+ n = is_wrap ? 0 : vm->node_counts_per_main_loop[i];
+
+ v += vm->main_loop_vectors_processed;
+ n += vm->main_loop_nodes_processed;
+ vm->main_loop_vectors_processed = 0;
+ vm->main_loop_nodes_processed = 0;
+ vm->vector_counts_per_main_loop[i] = v;
+ vm->node_counts_per_main_loop[i] = n;
+}
+
+always_inline void vlib_set_queue_signal_callback
+ (vlib_main_t * vm, void (*fp) (vlib_main_t *))
+{
+ vm->queue_signal_callback = fp;
+}
+
+/* Main routine. */
+int vlib_main (vlib_main_t * vm, unformat_input_t * input);
+
+/* Thread stacks, for os_get_cpu_number */
+extern u8 **vlib_thread_stacks;
+
+/* Number of thread stacks that the application needs */
+u32 vlib_app_num_thread_stacks_needed (void) __attribute__ ((weak));
+
+extern void vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n);
+
+#endif /* included_vlib_main_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/mc.c b/src/vlib/mc.c
new file mode 100644
index 00000000000..8fde091389e
--- /dev/null
+++ b/src/vlib/mc.c
@@ -0,0 +1,2609 @@
+/*
+ * mc.c: vlib reliable sequenced multicast distributed applications
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+
+/*
+ * 1 to enable msg id training wheels, which are useful for tracking
+ * down catchup and/or partitioned network problems
+ */
+#define MSG_ID_DEBUG 0
+
+static format_function_t format_mc_stream_state;
+
+static u32
+elog_id_for_peer_id (mc_main_t * m, u64 peer_id)
+{
+ uword *p, r;
+ mhash_t *h = &m->elog_id_by_peer_id;
+
+ if (!m->elog_id_by_peer_id.hash)
+ mhash_init (h, sizeof (uword), sizeof (mc_peer_id_t));
+
+ p = mhash_get (h, &peer_id);
+ if (p)
+ return p[0];
+ r = elog_string (m->elog_main, "%U", m->transport.format_peer_id, peer_id);
+ mhash_set (h, &peer_id, r, /* old_value */ 0);
+ return r;
+}
+
+static u32
+elog_id_for_msg_name (mc_main_t * m, char *msg_name)
+{
+ uword *p, r;
+ uword *h = m->elog_id_by_msg_name;
+ u8 *name_copy;
+
+ if (!h)
+ h = m->elog_id_by_msg_name = hash_create_string (0, sizeof (uword));
+
+ p = hash_get_mem (h, msg_name);
+ if (p)
+ return p[0];
+ r = elog_string (m->elog_main, "%s", msg_name);
+
+ name_copy = format (0, "%s%c", msg_name, 0);
+
+ hash_set_mem (h, name_copy, r);
+ m->elog_id_by_msg_name = h;
+
+ return r;
+}
+
+static void
+elog_tx_msg (mc_main_t * m, u32 stream_id, u32 local_sequence,
+ u32 retry_count)
+{
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "tx-msg: stream %d local seq %d attempt %d",
+ .format_args = "i4i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 stream_id, local_sequence, retry_count;
+ } *ed;
+ ed = ELOG_DATA (m->elog_main, e);
+ ed->stream_id = stream_id;
+ ed->local_sequence = local_sequence;
+ ed->retry_count = retry_count;
+ }
+}
+
+/*
+ * seq_cmp
+ * correctly compare two unsigned sequence numbers.
+ * This function works so long as x and y are within 2**(n-1) of each
+ * other, where n = bits(x, y).
+ *
+ * Magic decoder ring:
+ * seq_cmp == 0 => x and y are equal
+ * seq_cmp < 0 => x is "in the past" with respect to y
+ * seq_cmp > 0 => x is "in the future" with respect to y
+ */
+always_inline i32
+mc_seq_cmp (u32 x, u32 y)
+{
+ return (i32) x - (i32) y;
+}
+
+void *
+mc_get_vlib_buffer (vlib_main_t * vm, u32 n_bytes, u32 * bi_return)
+{
+ u32 n_alloc, bi;
+ vlib_buffer_t *b;
+
+ n_alloc = vlib_buffer_alloc (vm, &bi, 1);
+ ASSERT (n_alloc == 1);
+
+ b = vlib_get_buffer (vm, bi);
+ b->current_length = n_bytes;
+ *bi_return = bi;
+ return (void *) b->data;
+}
+
+static void
+delete_peer_with_index (mc_main_t * mcm, mc_stream_t * s,
+ uword index, int notify_application)
+{
+ mc_stream_peer_t *p = pool_elt_at_index (s->peers, index);
+ ASSERT (p != 0);
+ if (s->config.peer_died && notify_application)
+ s->config.peer_died (mcm, s, p->id);
+
+ s->all_peer_bitmap = clib_bitmap_andnoti (s->all_peer_bitmap, p - s->peers);
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "delete peer %s from all_peer_bitmap",
+ .format_args = "T4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 peer;
+ } *ed = 0;
+
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64);
+ }
+ /* Do not delete the pool / hash table entries, or we lose sequence number state */
+}
+
+static mc_stream_peer_t *
+get_or_create_peer_with_id (mc_main_t * mcm,
+ mc_stream_t * s, mc_peer_id_t id, int *created)
+{
+ uword *q = mhash_get (&s->peer_index_by_id, &id);
+ mc_stream_peer_t *p;
+
+ if (q)
+ {
+ p = pool_elt_at_index (s->peers, q[0]);
+ goto done;
+ }
+
+ pool_get (s->peers, p);
+ memset (p, 0, sizeof (p[0]));
+ p->id = id;
+ p->last_sequence_received = ~0;
+ mhash_set (&s->peer_index_by_id, &id, p - s->peers, /* old_value */ 0);
+ if (created)
+ *created = 1;
+
+done:
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "get_or_create %s peer %s stream %d seq %d",
+ .format_args = "t4T4i4i4",
+ .n_enum_strings = 2,
+ .enum_strings = {
+ "old", "new",
+ },
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 is_new, peer, stream_index, rx_sequence;
+ } *ed = 0;
+
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->is_new = q ? 0 : 1;
+ ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64);
+ ed->stream_index = s->index;
+ ed->rx_sequence = p->last_sequence_received;
+ }
+ /* $$$$ Enable or reenable this peer */
+ s->all_peer_bitmap = clib_bitmap_ori (s->all_peer_bitmap, p - s->peers);
+ return p;
+}
+
+static void
+maybe_send_window_open_event (vlib_main_t * vm, mc_stream_t * stream)
+{
+ vlib_one_time_waiting_process_t *p;
+
+ if (pool_elts (stream->retry_pool) >= stream->config.window_size)
+ return;
+
+ vec_foreach (p, stream->procs_waiting_for_open_window)
+ vlib_signal_one_time_waiting_process (vm, p);
+
+ if (stream->procs_waiting_for_open_window)
+ _vec_len (stream->procs_waiting_for_open_window) = 0;
+}
+
+static void
+mc_retry_free (mc_main_t * mcm, mc_stream_t * s, mc_retry_t * r)
+{
+ mc_retry_t record, *retp;
+
+ if (r->unacked_by_peer_bitmap)
+ _vec_len (r->unacked_by_peer_bitmap) = 0;
+
+ if (clib_fifo_elts (s->retired_fifo) >= 2 * s->config.window_size)
+ {
+ clib_fifo_sub1 (s->retired_fifo, record);
+ vlib_buffer_free_one (mcm->vlib_main, record.buffer_index);
+ }
+
+ clib_fifo_add2 (s->retired_fifo, retp);
+
+ retp->buffer_index = r->buffer_index;
+ retp->local_sequence = r->local_sequence;
+
+ r->buffer_index = ~0; /* poison buffer index in this retry */
+}
+
+static void
+mc_resend_retired (mc_main_t * mcm, mc_stream_t * s, u32 local_sequence)
+{
+ mc_retry_t *retry;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "resend-retired: search for local seq %d",
+ .format_args = "i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 local_sequence;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->local_sequence = local_sequence;
+ }
+
+ /* *INDENT-OFF* */
+ clib_fifo_foreach (retry, s->retired_fifo,
+ ({
+ if (retry->local_sequence == local_sequence)
+ {
+ elog_tx_msg (mcm, s->index, retry-> local_sequence, -13);
+ mcm->transport.tx_buffer (mcm->transport.opaque,
+ MC_TRANSPORT_USER_REQUEST_TO_RELAY,
+ retry->buffer_index);
+ return;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "resend-retired: FAILED search for local seq %d",
+ .format_args = "i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 local_sequence;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->local_sequence = local_sequence;
+ }
+}
+
+static uword *
+delete_retry_fifo_elt (mc_main_t * mcm,
+ mc_stream_t * stream,
+ mc_retry_t * r, uword * dead_peer_bitmap)
+{
+ mc_stream_peer_t *p;
+
+ /* *INDENT-OFF* */
+ pool_foreach (p, stream->peers, ({
+ uword pi = p - stream->peers;
+ uword is_alive = 0 == clib_bitmap_get (r->unacked_by_peer_bitmap, pi);
+
+ if (! is_alive)
+ dead_peer_bitmap = clib_bitmap_ori (dead_peer_bitmap, pi);
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "delete_retry_fifo_elt: peer %s is %s",
+ .format_args = "T4t4",
+ .n_enum_strings = 2,
+ .enum_strings = { "alive", "dead", },
+ };
+ struct { u32 peer, is_alive; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64);
+ ed->is_alive = is_alive;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ hash_unset (stream->retry_index_by_local_sequence, r->local_sequence);
+ mc_retry_free (mcm, stream, r);
+
+ return dead_peer_bitmap;
+}
+
+always_inline mc_retry_t *
+prev_retry (mc_stream_t * s, mc_retry_t * r)
+{
+ return (r->prev_index != ~0
+ ? pool_elt_at_index (s->retry_pool, r->prev_index) : 0);
+}
+
+always_inline mc_retry_t *
+next_retry (mc_stream_t * s, mc_retry_t * r)
+{
+ return (r->next_index != ~0
+ ? pool_elt_at_index (s->retry_pool, r->next_index) : 0);
+}
+
+always_inline void
+remove_retry_from_pool (mc_stream_t * s, mc_retry_t * r)
+{
+ mc_retry_t *p = prev_retry (s, r);
+ mc_retry_t *n = next_retry (s, r);
+
+ if (p)
+ p->next_index = r->next_index;
+ else
+ s->retry_head_index = r->next_index;
+ if (n)
+ n->prev_index = r->prev_index;
+ else
+ s->retry_tail_index = r->prev_index;
+
+ pool_put_index (s->retry_pool, r - s->retry_pool);
+}
+
+static void
+check_retry (mc_main_t * mcm, mc_stream_t * s)
+{
+ mc_retry_t *r;
+ vlib_main_t *vm = mcm->vlib_main;
+ f64 now = vlib_time_now (vm);
+ uword *dead_peer_bitmap = 0;
+ u32 ri, ri_next;
+
+ for (ri = s->retry_head_index; ri != ~0; ri = ri_next)
+ {
+ r = pool_elt_at_index (s->retry_pool, ri);
+ ri_next = r->next_index;
+
+ if (now < r->sent_at + s->config.retry_interval)
+ continue;
+
+ r->n_retries += 1;
+ if (r->n_retries > s->config.retry_limit)
+ {
+ dead_peer_bitmap =
+ delete_retry_fifo_elt (mcm, s, r, dead_peer_bitmap);
+ remove_retry_from_pool (s, r);
+ }
+ else
+ {
+ if (MC_EVENT_LOGGING > 0)
+ {
+ mc_stream_peer_t *p;
+
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "resend local seq %d attempt %d",
+ .format_args = "i4i4",
+ };
+ /* *INDENT-ON* */
+
+ /* *INDENT-OFF* */
+ pool_foreach (p, s->peers, ({
+ if (clib_bitmap_get (r->unacked_by_peer_bitmap, p - s->peers))
+ {
+ ELOG_TYPE_DECLARE (ev) = {
+ .format = "resend: needed by peer %s local seq %d",
+ .format_args = "T4i4",
+ };
+ struct { u32 peer, rx_sequence; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, ev);
+ ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64);
+ ed->rx_sequence = r->local_sequence;
+ }
+ }));
+ /* *INDENT-ON* */
+
+ struct
+ {
+ u32 sequence;
+ u32 trail;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->sequence = r->local_sequence;
+ ed->trail = r->n_retries;
+ }
+
+ r->sent_at = vlib_time_now (vm);
+ s->stats.n_retries += 1;
+
+ elog_tx_msg (mcm, s->index, r->local_sequence, r->n_retries);
+
+ mcm->transport.tx_buffer
+ (mcm->transport.opaque,
+ MC_TRANSPORT_USER_REQUEST_TO_RELAY, r->buffer_index);
+ }
+ }
+
+ maybe_send_window_open_event (mcm->vlib_main, s);
+
+ /* Delete any dead peers we've found. */
+ if (!clib_bitmap_is_zero (dead_peer_bitmap))
+ {
+ uword i;
+
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (i, dead_peer_bitmap, ({
+ delete_peer_with_index (mcm, s, i, /* notify_application */ 1);
+
+ /* Delete any references to just deleted peer in retry pool. */
+ pool_foreach (r, s->retry_pool, ({
+ r->unacked_by_peer_bitmap =
+ clib_bitmap_andnoti (r->unacked_by_peer_bitmap, i);
+ }));
+ }));
+/* *INDENT-ON* */
+ clib_bitmap_free (dead_peer_bitmap);
+ }
+}
+
+always_inline mc_main_t *
+mc_node_get_main (vlib_node_runtime_t * node)
+{
+ mc_main_t **p = (void *) node->runtime_data;
+ return p[0];
+}
+
+static uword
+mc_retry_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ mc_main_t *mcm = mc_node_get_main (node);
+ mc_stream_t *s;
+
+ while (1)
+ {
+ vlib_process_suspend (vm, 1.0);
+ vec_foreach (s, mcm->stream_vector)
+ {
+ if (s->state != MC_STREAM_STATE_invalid)
+ check_retry (mcm, s);
+ }
+ }
+ return 0; /* not likely */
+}
+
+static void
+send_join_or_leave_request (mc_main_t * mcm, u32 stream_index, u32 is_join)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_msg_join_or_leave_request_t *mp;
+ u32 bi;
+
+ mp = mc_get_vlib_buffer (vm, sizeof (mp[0]), &bi);
+ memset (mp, 0, sizeof (*mp));
+ mp->type = MC_MSG_TYPE_join_or_leave_request;
+ mp->peer_id = mcm->transport.our_ack_peer_id;
+ mp->stream_index = stream_index;
+ mp->is_join = is_join;
+
+ mc_byte_swap_msg_join_or_leave_request (mp);
+
+ /*
+ * These msgs are unnumbered, unordered so send on the from-relay
+ * channel.
+ */
+ mcm->transport.tx_buffer (mcm->transport.opaque, MC_TRANSPORT_JOIN, bi);
+}
+
+static uword
+mc_join_ager_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ mc_main_t *mcm = mc_node_get_main (node);
+
+ while (1)
+ {
+ if (mcm->joins_in_progress)
+ {
+ mc_stream_t *s;
+ vlib_one_time_waiting_process_t *p;
+ f64 now = vlib_time_now (vm);
+
+ vec_foreach (s, mcm->stream_vector)
+ {
+ if (s->state != MC_STREAM_STATE_join_in_progress)
+ continue;
+
+ if (now > s->join_timeout)
+ {
+ s->state = MC_STREAM_STATE_ready;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "stream %d join timeout",
+ };
+ /* *INDENT-ON* */
+ ELOG (mcm->elog_main, e, s->index);
+ }
+ /* Make sure that this app instance exists as a stream peer,
+ or we may answer a catchup request with a NULL
+ all_peer_bitmap... */
+ (void) get_or_create_peer_with_id
+ (mcm, s, mcm->transport.our_ack_peer_id, /* created */ 0);
+
+ vec_foreach (p, s->procs_waiting_for_join_done)
+ vlib_signal_one_time_waiting_process (vm, p);
+ if (s->procs_waiting_for_join_done)
+ _vec_len (s->procs_waiting_for_join_done) = 0;
+
+ mcm->joins_in_progress--;
+ ASSERT (mcm->joins_in_progress >= 0);
+ }
+ else
+ {
+ /* Resent join request which may have been lost. */
+ send_join_or_leave_request (mcm, s->index, 1 /* is_join */ );
+
+ /* We're *not* alone, retry for as long as it takes */
+ if (mcm->relay_state == MC_RELAY_STATE_SLAVE)
+ s->join_timeout = vlib_time_now (vm) + 2.0;
+
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "stream %d resend join request",
+ };
+ /* *INDENT-ON* */
+ ELOG (mcm->elog_main, e, s->index);
+ }
+ }
+ }
+ }
+
+ vlib_process_suspend (vm, .5);
+ }
+
+ return 0; /* not likely */
+}
+
+static void
+serialize_mc_register_stream_name (serialize_main_t * m, va_list * va)
+{
+ char *name = va_arg (*va, char *);
+ serialize_cstring (m, name);
+}
+
+static void
+elog_stream_name (char *buf, int n_buf_bytes, char *v)
+{
+ clib_memcpy (buf, v, clib_min (n_buf_bytes - 1, vec_len (v)));
+ buf[n_buf_bytes - 1] = 0;
+}
+
+static void
+unserialize_mc_register_stream_name (serialize_main_t * m, va_list * va)
+{
+ mc_main_t *mcm = va_arg (*va, mc_main_t *);
+ char *name;
+ mc_stream_t *s;
+ uword *p;
+
+ unserialize_cstring (m, &name);
+
+ if ((p = hash_get_mem (mcm->stream_index_by_name, name)))
+ {
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "stream index %d already named %s",
+ .format_args = "i4s16",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 stream_index;
+ char name[16];
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->stream_index = p[0];
+ elog_stream_name (ed->name, sizeof (ed->name), name);
+ }
+
+ vec_free (name);
+ return;
+ }
+
+ vec_add2 (mcm->stream_vector, s, 1);
+ mc_stream_init (s);
+ s->state = MC_STREAM_STATE_name_known;
+ s->index = s - mcm->stream_vector;
+ s->config.name = name;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "stream index %d named %s",
+ .format_args = "i4s16",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 stream_index;
+ char name[16];
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->stream_index = s->index;
+ elog_stream_name (ed->name, sizeof (ed->name), name);
+ }
+
+ hash_set_mem (mcm->stream_index_by_name, name, s->index);
+
+ p = hash_get (mcm->procs_waiting_for_stream_name_by_name, name);
+ if (p)
+ {
+ vlib_one_time_waiting_process_t *wp, **w;
+ w = pool_elt_at_index (mcm->procs_waiting_for_stream_name_pool, p[0]);
+ vec_foreach (wp, w[0])
+ vlib_signal_one_time_waiting_process (mcm->vlib_main, wp);
+ pool_put (mcm->procs_waiting_for_stream_name_pool, w);
+ hash_unset_mem (mcm->procs_waiting_for_stream_name_by_name, name);
+ }
+}
+
+/* *INDENT-OFF* */
+MC_SERIALIZE_MSG (mc_register_stream_name_msg, static) =
+{
+ .name = "mc_register_stream_name",
+ .serialize = serialize_mc_register_stream_name,
+ .unserialize = unserialize_mc_register_stream_name,
+};
+/* *INDENT-ON* */
+
+void
+mc_rx_buffer_unserialize (mc_main_t * mcm,
+ mc_stream_t * stream,
+ mc_peer_id_t peer_id, u32 buffer_index)
+{
+ return mc_unserialize (mcm, stream, buffer_index);
+}
+
+static u8 *
+mc_internal_catchup_snapshot (mc_main_t * mcm,
+ u8 * data_vector,
+ u32 last_global_sequence_processed)
+{
+ serialize_main_t m;
+
+ /* Append serialized data to data vector. */
+ serialize_open_vector (&m, data_vector);
+ m.stream.current_buffer_index = vec_len (data_vector);
+
+ serialize (&m, serialize_mc_main, mcm);
+ return serialize_close_vector (&m);
+}
+
+static void
+mc_internal_catchup (mc_main_t * mcm, u8 * data, u32 n_data_bytes)
+{
+ serialize_main_t s;
+
+ unserialize_open_data (&s, data, n_data_bytes);
+
+ unserialize (&s, unserialize_mc_main, mcm);
+}
+
+/* Overridden from the application layer, not actually used here */
+void mc_stream_join_process_hold (void) __attribute__ ((weak));
+void
+mc_stream_join_process_hold (void)
+{
+}
+
+static u32
+mc_stream_join_helper (mc_main_t * mcm,
+ mc_stream_config_t * config, u32 is_internal)
+{
+ mc_stream_t *s;
+ vlib_main_t *vm = mcm->vlib_main;
+
+ s = 0;
+ if (!is_internal)
+ {
+ uword *p;
+
+ /* Already have a stream with given name? */
+ if ((s = mc_stream_by_name (mcm, config->name)))
+ {
+ /* Already joined and ready? */
+ if (s->state == MC_STREAM_STATE_ready)
+ return s->index;
+ }
+
+ /* First join MC internal stream. */
+ if (!mcm->stream_vector
+ || (mcm->stream_vector[MC_STREAM_INDEX_INTERNAL].state
+ == MC_STREAM_STATE_invalid))
+ {
+ static mc_stream_config_t c = {
+ .name = "mc-internal",
+ .rx_buffer = mc_rx_buffer_unserialize,
+ .catchup = mc_internal_catchup,
+ .catchup_snapshot = mc_internal_catchup_snapshot,
+ };
+
+ c.save_snapshot = config->save_snapshot;
+
+ mc_stream_join_helper (mcm, &c, /* is_internal */ 1);
+ }
+
+ /* If stream is still unknown register this name and wait for
+ sequenced message to name stream. This way all peers agree
+ on stream name to index mappings. */
+ s = mc_stream_by_name (mcm, config->name);
+ if (!s)
+ {
+ vlib_one_time_waiting_process_t *wp, **w;
+ u8 *name_copy = format (0, "%s", config->name);
+
+ mc_serialize_stream (mcm,
+ MC_STREAM_INDEX_INTERNAL,
+ &mc_register_stream_name_msg, config->name);
+
+ /* Wait for this stream to be named. */
+ p =
+ hash_get_mem (mcm->procs_waiting_for_stream_name_by_name,
+ name_copy);
+ if (p)
+ w =
+ pool_elt_at_index (mcm->procs_waiting_for_stream_name_pool,
+ p[0]);
+ else
+ {
+ pool_get (mcm->procs_waiting_for_stream_name_pool, w);
+ if (!mcm->procs_waiting_for_stream_name_by_name)
+ mcm->procs_waiting_for_stream_name_by_name = hash_create_string ( /* elts */ 0, /* value size */
+ sizeof
+ (uword));
+ hash_set_mem (mcm->procs_waiting_for_stream_name_by_name,
+ name_copy,
+ w - mcm->procs_waiting_for_stream_name_pool);
+ w[0] = 0;
+ }
+
+ vec_add2 (w[0], wp, 1);
+ vlib_current_process_wait_for_one_time_event (vm, wp);
+ vec_free (name_copy);
+ }
+
+ /* Name should be known now. */
+ s = mc_stream_by_name (mcm, config->name);
+ ASSERT (s != 0);
+ ASSERT (s->state == MC_STREAM_STATE_name_known);
+ }
+
+ if (!s)
+ {
+ vec_add2 (mcm->stream_vector, s, 1);
+ mc_stream_init (s);
+ s->index = s - mcm->stream_vector;
+ }
+
+ {
+ /* Save name since we could have already used it as hash key. */
+ char *name_save = s->config.name;
+
+ s->config = config[0];
+
+ if (name_save)
+ s->config.name = name_save;
+ }
+
+ if (s->config.window_size == 0)
+ s->config.window_size = 8;
+
+ if (s->config.retry_interval == 0.0)
+ s->config.retry_interval = 1.0;
+
+ /* Sanity. */
+ ASSERT (s->config.retry_interval < 30);
+
+ if (s->config.retry_limit == 0)
+ s->config.retry_limit = 7;
+
+ s->state = MC_STREAM_STATE_join_in_progress;
+ if (!s->peer_index_by_id.hash)
+ mhash_init (&s->peer_index_by_id, sizeof (uword), sizeof (mc_peer_id_t));
+
+ /* If we don't hear from someone in 5 seconds, we're alone */
+ s->join_timeout = vlib_time_now (vm) + 5.0;
+ mcm->joins_in_progress++;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "stream index %d join request %s",
+ .format_args = "i4s16",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 stream_index;
+ char name[16];
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->stream_index = s->index;
+ elog_stream_name (ed->name, sizeof (ed->name), s->config.name);
+ }
+
+ send_join_or_leave_request (mcm, s->index, 1 /* join */ );
+
+ vlib_current_process_wait_for_one_time_event_vector
+ (vm, &s->procs_waiting_for_join_done);
+
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "join complete stream %d");
+ ELOG (mcm->elog_main, e, s->index);
+ }
+
+ return s->index;
+}
+
+u32
+mc_stream_join (mc_main_t * mcm, mc_stream_config_t * config)
+{
+ return mc_stream_join_helper (mcm, config, /* is_internal */ 0);
+}
+
+void
+mc_stream_leave (mc_main_t * mcm, u32 stream_index)
+{
+ mc_stream_t *s = mc_stream_by_index (mcm, stream_index);
+
+ if (!s)
+ return;
+
+ if (MC_EVENT_LOGGING)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "leave-stream: %d",.format_args = "i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 index;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->index = stream_index;
+ }
+
+ send_join_or_leave_request (mcm, stream_index, 0 /* is_join */ );
+ mc_stream_free (s);
+ s->state = MC_STREAM_STATE_name_known;
+}
+
+void
+mc_msg_join_or_leave_request_handler (mc_main_t * mcm,
+ mc_msg_join_or_leave_request_t * req,
+ u32 buffer_index)
+{
+ mc_stream_t *s;
+ mc_msg_join_reply_t *rep;
+ u32 bi;
+
+ mc_byte_swap_msg_join_or_leave_request (req);
+
+ s = mc_stream_by_index (mcm, req->stream_index);
+ if (!s || s->state != MC_STREAM_STATE_ready)
+ return;
+
+ /* If the peer is joining, create it */
+ if (req->is_join)
+ {
+ mc_stream_t *this_s;
+
+ /* We're not in a position to catch up a peer until all
+ stream joins are complete. */
+ if (0)
+ {
+ /* XXX This is hard to test so we've. */
+ vec_foreach (this_s, mcm->stream_vector)
+ {
+ if (this_s->state != MC_STREAM_STATE_ready
+ && this_s->state != MC_STREAM_STATE_name_known)
+ return;
+ }
+ }
+ else if (mcm->joins_in_progress > 0)
+ return;
+
+ (void) get_or_create_peer_with_id (mcm, s, req->peer_id,
+ /* created */ 0);
+
+ rep = mc_get_vlib_buffer (mcm->vlib_main, sizeof (rep[0]), &bi);
+ memset (rep, 0, sizeof (rep[0]));
+ rep->type = MC_MSG_TYPE_join_reply;
+ rep->stream_index = req->stream_index;
+
+ mc_byte_swap_msg_join_reply (rep);
+ /* These two are already in network byte order... */
+ rep->peer_id = mcm->transport.our_ack_peer_id;
+ rep->catchup_peer_id = mcm->transport.our_catchup_peer_id;
+
+ mcm->transport.tx_buffer (mcm->transport.opaque, MC_TRANSPORT_JOIN, bi);
+ }
+ else
+ {
+ if (s->config.peer_died)
+ s->config.peer_died (mcm, s, req->peer_id);
+ }
+}
+
+void
+mc_msg_join_reply_handler (mc_main_t * mcm,
+ mc_msg_join_reply_t * mp, u32 buffer_index)
+{
+ mc_stream_t *s;
+
+ mc_byte_swap_msg_join_reply (mp);
+
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ if (!s || s->state != MC_STREAM_STATE_join_in_progress)
+ return;
+
+ /* Switch to catchup state; next join reply
+ for this stream will be ignored. */
+ s->state = MC_STREAM_STATE_catchup;
+
+ mcm->joins_in_progress--;
+ mcm->transport.catchup_request_fun (mcm->transport.opaque,
+ mp->stream_index, mp->catchup_peer_id);
+}
+
+void
+mc_wait_for_stream_ready (mc_main_t * m, char *stream_name)
+{
+ mc_stream_t *s;
+
+ while (1)
+ {
+ s = mc_stream_by_name (m, stream_name);
+ if (s)
+ break;
+ vlib_process_suspend (m->vlib_main, .1);
+ }
+
+ /* It's OK to send a message in catchup and ready states. */
+ if (s->state == MC_STREAM_STATE_catchup
+ || s->state == MC_STREAM_STATE_ready)
+ return;
+
+ /* Otherwise we are waiting for a join to finish. */
+ vlib_current_process_wait_for_one_time_event_vector
+ (m->vlib_main, &s->procs_waiting_for_join_done);
+}
+
+u32
+mc_stream_send (mc_main_t * mcm, u32 stream_index, u32 buffer_index)
+{
+ mc_stream_t *s = mc_stream_by_index (mcm, stream_index);
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_retry_t *r;
+ mc_msg_user_request_t *mp;
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
+ u32 ri;
+
+ if (!s)
+ return 0;
+
+ if (s->state != MC_STREAM_STATE_ready)
+ vlib_current_process_wait_for_one_time_event_vector
+ (vm, &s->procs_waiting_for_join_done);
+
+ while (pool_elts (s->retry_pool) >= s->config.window_size)
+ {
+ vlib_current_process_wait_for_one_time_event_vector
+ (vm, &s->procs_waiting_for_open_window);
+ }
+
+ pool_get (s->retry_pool, r);
+ ri = r - s->retry_pool;
+
+ r->prev_index = s->retry_tail_index;
+ r->next_index = ~0;
+ s->retry_tail_index = ri;
+
+ if (r->prev_index == ~0)
+ s->retry_head_index = ri;
+ else
+ {
+ mc_retry_t *p = pool_elt_at_index (s->retry_pool, r->prev_index);
+ p->next_index = ri;
+ }
+
+ vlib_buffer_advance (b, -sizeof (mp[0]));
+ mp = vlib_buffer_get_current (b);
+
+ mp->peer_id = mcm->transport.our_ack_peer_id;
+ /* mp->transport.global_sequence set by relay agent. */
+ mp->global_sequence = 0xdeadbeef;
+ mp->stream_index = s->index;
+ mp->local_sequence = s->our_local_sequence++;
+ mp->n_data_bytes =
+ vlib_buffer_index_length_in_chain (vm, buffer_index) - sizeof (mp[0]);
+
+ r->buffer_index = buffer_index;
+ r->local_sequence = mp->local_sequence;
+ r->sent_at = vlib_time_now (vm);
+ r->n_retries = 0;
+
+ /* Retry will be freed when all currently known peers have acked. */
+ vec_validate (r->unacked_by_peer_bitmap, vec_len (s->all_peer_bitmap) - 1);
+ vec_copy (r->unacked_by_peer_bitmap, s->all_peer_bitmap);
+
+ hash_set (s->retry_index_by_local_sequence, r->local_sequence,
+ r - s->retry_pool);
+
+ elog_tx_msg (mcm, s->index, mp->local_sequence, r->n_retries);
+
+ mc_byte_swap_msg_user_request (mp);
+
+ mcm->transport.tx_buffer (mcm->transport.opaque,
+ MC_TRANSPORT_USER_REQUEST_TO_RELAY, buffer_index);
+
+ s->user_requests_sent++;
+
+ /* return amount of window remaining */
+ return s->config.window_size - pool_elts (s->retry_pool);
+}
+
+void
+mc_msg_user_request_handler (mc_main_t * mcm, mc_msg_user_request_t * mp,
+ u32 buffer_index)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_stream_t *s;
+ mc_stream_peer_t *peer;
+ i32 seq_cmp_result;
+ static int once = 0;
+
+ mc_byte_swap_msg_user_request (mp);
+
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ /* Not signed up for this stream? Turf-o-matic */
+ if (!s || s->state != MC_STREAM_STATE_ready)
+ {
+ vlib_buffer_free_one (vm, buffer_index);
+ return;
+ }
+
+ /* Find peer, including ourselves. */
+ peer = get_or_create_peer_with_id (mcm, s, mp->peer_id,
+ /* created */ 0);
+
+ seq_cmp_result = mc_seq_cmp (mp->local_sequence,
+ peer->last_sequence_received + 1);
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "rx-msg: peer %s stream %d rx seq %d seq_cmp %d",
+ .format_args = "T4i4i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 peer, stream_index, rx_sequence;
+ i32 seq_cmp_result;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, peer->id.as_u64);
+ ed->stream_index = mp->stream_index;
+ ed->rx_sequence = mp->local_sequence;
+ ed->seq_cmp_result = seq_cmp_result;
+ }
+
+ if (0 && mp->stream_index == 1 && once == 0)
+ {
+ once = 1;
+ ELOG_TYPE (e, "FAKE lost msg on stream 1");
+ ELOG (mcm->elog_main, e, 0);
+ return;
+ }
+
+ peer->last_sequence_received += seq_cmp_result == 0;
+ s->user_requests_received++;
+
+ if (seq_cmp_result > 0)
+ peer->stats.n_msgs_from_future += 1;
+
+ /* Send ack even if msg from future */
+ if (1)
+ {
+ mc_msg_user_ack_t *rp;
+ u32 bi;
+
+ rp = mc_get_vlib_buffer (vm, sizeof (rp[0]), &bi);
+ rp->peer_id = mcm->transport.our_ack_peer_id;
+ rp->stream_index = s->index;
+ rp->local_sequence = mp->local_sequence;
+ rp->seq_cmp_result = seq_cmp_result;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "tx-ack: stream %d local seq %d",
+ .format_args = "i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 stream_index;
+ u32 local_sequence;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->stream_index = rp->stream_index;
+ ed->local_sequence = rp->local_sequence;
+ }
+
+ mc_byte_swap_msg_user_ack (rp);
+
+ mcm->transport.tx_ack (mcm->transport.opaque, mp->peer_id, bi);
+ /* Msg from past? If so, free the buffer... */
+ if (seq_cmp_result < 0)
+ {
+ vlib_buffer_free_one (vm, buffer_index);
+ peer->stats.n_msgs_from_past += 1;
+ }
+ }
+
+ if (seq_cmp_result == 0)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
+ switch (s->state)
+ {
+ case MC_STREAM_STATE_ready:
+ vlib_buffer_advance (b, sizeof (mp[0]));
+ s->config.rx_buffer (mcm, s, mp->peer_id, buffer_index);
+
+ /* Stream vector can change address via rx callback for mc-internal
+ stream. */
+ s = mc_stream_by_index (mcm, mp->stream_index);
+ ASSERT (s != 0);
+ s->last_global_sequence_processed = mp->global_sequence;
+ break;
+
+ case MC_STREAM_STATE_catchup:
+ clib_fifo_add1 (s->catchup_fifo, buffer_index);
+ break;
+
+ default:
+ clib_warning ("stream in unknown state %U",
+ format_mc_stream_state, s->state);
+ break;
+ }
+ }
+}
+
+void
+mc_msg_user_ack_handler (mc_main_t * mcm, mc_msg_user_ack_t * mp,
+ u32 buffer_index)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ uword *p;
+ mc_stream_t *s;
+ mc_stream_peer_t *peer;
+ mc_retry_t *r;
+ int peer_created = 0;
+
+ mc_byte_swap_msg_user_ack (mp);
+
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "rx-ack: local seq %d peer %s seq_cmp_result %d",
+ .format_args = "i4T4i4",
+ };
+ /* *INDENT-ON* */
+
+ struct
+ {
+ u32 local_sequence;
+ u32 peer;
+ i32 seq_cmp_result;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->local_sequence = mp->local_sequence;
+ ed->peer = elog_id_for_peer_id (mcm, mp->peer_id.as_u64);
+ ed->seq_cmp_result = mp->seq_cmp_result;
+ }
+
+ /* Unknown stream? */
+ if (!s)
+ return;
+
+ /* Find the peer which just ack'ed. */
+ peer = get_or_create_peer_with_id (mcm, s, mp->peer_id,
+ /* created */ &peer_created);
+
+ /*
+ * Peer reports message from the future. If it's not in the retry
+ * fifo, look for a retired message.
+ */
+ if (mp->seq_cmp_result > 0)
+ {
+ p = hash_get (s->retry_index_by_local_sequence, mp->local_sequence -
+ mp->seq_cmp_result);
+ if (p == 0)
+ mc_resend_retired (mcm, s, mp->local_sequence - mp->seq_cmp_result);
+
+ /* Normal retry should fix it... */
+ return;
+ }
+
+ /*
+ * Pointer to the indicated retry fifo entry.
+ * Worth hashing because we could use a window size of 100 or 1000.
+ */
+ p = hash_get (s->retry_index_by_local_sequence, mp->local_sequence);
+
+ /*
+ * Is this a duplicate ACK, received after we've retired the
+ * fifo entry. This can happen when learning about new
+ * peers.
+ */
+ if (p == 0)
+ {
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "ack: for seq %d from peer %s no fifo elt",
+ .format_args = "i4T4",
+ };
+ /* *INDENT-ON* */
+
+ struct
+ {
+ u32 seq;
+ u32 peer;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->seq = mp->local_sequence;
+ ed->peer = elog_id_for_peer_id (mcm, mp->peer_id.as_u64);
+ }
+
+ return;
+ }
+
+ r = pool_elt_at_index (s->retry_pool, p[0]);
+
+ /* Make sure that this new peer ACKs our msgs from now on */
+ if (peer_created)
+ {
+ mc_retry_t *later_retry = next_retry (s, r);
+
+ while (later_retry)
+ {
+ later_retry->unacked_by_peer_bitmap =
+ clib_bitmap_ori (later_retry->unacked_by_peer_bitmap,
+ peer - s->peers);
+ later_retry = next_retry (s, later_retry);
+ }
+ }
+
+ ASSERT (mp->local_sequence == r->local_sequence);
+
+ /* If we weren't expecting to hear from this peer */
+ if (!peer_created &&
+ !clib_bitmap_get (r->unacked_by_peer_bitmap, peer - s->peers))
+ {
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "dup-ack: for seq %d from peer %s",
+ .format_args = "i4T4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 seq;
+ u32 peer;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->seq = r->local_sequence;
+ ed->peer = elog_id_for_peer_id (mcm, peer->id.as_u64);
+ }
+ if (!clib_bitmap_is_zero (r->unacked_by_peer_bitmap))
+ return;
+ }
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "ack: for seq %d from peer %s",
+ .format_args = "i4T4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 seq;
+ u32 peer;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->seq = mp->local_sequence;
+ ed->peer = elog_id_for_peer_id (mcm, peer->id.as_u64);
+ }
+
+ r->unacked_by_peer_bitmap =
+ clib_bitmap_andnoti (r->unacked_by_peer_bitmap, peer - s->peers);
+
+ /* Not all clients have ack'ed */
+ if (!clib_bitmap_is_zero (r->unacked_by_peer_bitmap))
+ {
+ return;
+ }
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "ack: retire fifo elt loc seq %d after %d acks",
+ .format_args = "i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 seq;
+ u32 npeers;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->seq = r->local_sequence;
+ ed->npeers = pool_elts (s->peers);
+ }
+
+ hash_unset (s->retry_index_by_local_sequence, mp->local_sequence);
+ mc_retry_free (mcm, s, r);
+ remove_retry_from_pool (s, r);
+ maybe_send_window_open_event (vm, s);
+}
+
+#define EVENT_MC_SEND_CATCHUP_DATA 0
+
+static uword
+mc_catchup_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ mc_main_t *mcm = mc_node_get_main (node);
+ uword *event_data = 0;
+ mc_catchup_process_arg_t *args;
+ int i;
+
+ while (1)
+ {
+ if (event_data)
+ _vec_len (event_data) = 0;
+ vlib_process_wait_for_event_with_type (vm, &event_data,
+ EVENT_MC_SEND_CATCHUP_DATA);
+
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ args = pool_elt_at_index (mcm->catchup_process_args, event_data[i]);
+
+ mcm->transport.catchup_send_fun (mcm->transport.opaque,
+ args->catchup_opaque,
+ args->catchup_snapshot);
+
+ /* Send function will free snapshot data vector. */
+ pool_put (mcm->catchup_process_args, args);
+ }
+ }
+
+ return 0; /* not likely */
+}
+
+static void
+serialize_mc_stream (serialize_main_t * m, va_list * va)
+{
+ mc_stream_t *s = va_arg (*va, mc_stream_t *);
+ mc_stream_peer_t *p;
+
+ serialize_integer (m, pool_elts (s->peers), sizeof (u32));
+ /* *INDENT-OFF* */
+ pool_foreach (p, s->peers, ({
+ u8 * x = serialize_get (m, sizeof (p->id));
+ clib_memcpy (x, p->id.as_u8, sizeof (p->id));
+ serialize_integer (m, p->last_sequence_received,
+ sizeof (p->last_sequence_received));
+ }));
+/* *INDENT-ON* */
+ serialize_bitmap (m, s->all_peer_bitmap);
+}
+
+void
+unserialize_mc_stream (serialize_main_t * m, va_list * va)
+{
+ mc_stream_t *s = va_arg (*va, mc_stream_t *);
+ u32 i, n_peers;
+ mc_stream_peer_t *p;
+
+ unserialize_integer (m, &n_peers, sizeof (u32));
+ mhash_init (&s->peer_index_by_id, sizeof (uword), sizeof (mc_peer_id_t));
+ for (i = 0; i < n_peers; i++)
+ {
+ u8 *x;
+ pool_get (s->peers, p);
+ x = unserialize_get (m, sizeof (p->id));
+ clib_memcpy (p->id.as_u8, x, sizeof (p->id));
+ unserialize_integer (m, &p->last_sequence_received,
+ sizeof (p->last_sequence_received));
+ mhash_set (&s->peer_index_by_id, &p->id, p - s->peers, /* old_value */
+ 0);
+ }
+ s->all_peer_bitmap = unserialize_bitmap (m);
+
+ /* This is really bad. */
+ if (!s->all_peer_bitmap)
+ clib_warning ("BUG: stream %s all_peer_bitmap NULL", s->config.name);
+}
+
+void
+mc_msg_catchup_request_handler (mc_main_t * mcm,
+ mc_msg_catchup_request_t * req,
+ u32 catchup_opaque)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_stream_t *s;
+ mc_catchup_process_arg_t *args;
+
+ mc_byte_swap_msg_catchup_request (req);
+
+ s = mc_stream_by_index (mcm, req->stream_index);
+ if (!s || s->state != MC_STREAM_STATE_ready)
+ return;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "catchup-request: from %s stream %d",
+ .format_args = "T4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 peer, stream;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->peer = elog_id_for_peer_id (mcm, req->peer_id.as_u64);
+ ed->stream = req->stream_index;
+ }
+
+ /*
+ * The application has to snapshoot its data structures right
+ * here, right now. If we process any messages after
+ * noting the last global sequence we've processed, the client
+ * won't be able to accurately reconstruct our data structures.
+ *
+ * Once the data structures are e.g. vec_dup()'ed, we
+ * send the resulting messages from a separate process, to
+ * make sure that we don't cause a bunch of message retransmissions
+ */
+ pool_get (mcm->catchup_process_args, args);
+
+ args->stream_index = s - mcm->stream_vector;
+ args->catchup_opaque = catchup_opaque;
+ args->catchup_snapshot = 0;
+
+ /* Construct catchup reply and snapshot state for stream to send as
+ catchup reply payload. */
+ {
+ mc_msg_catchup_reply_t *rep;
+ serialize_main_t m;
+
+ vec_resize (args->catchup_snapshot, sizeof (rep[0]));
+
+ rep = (void *) args->catchup_snapshot;
+
+ rep->peer_id = req->peer_id;
+ rep->stream_index = req->stream_index;
+ rep->last_global_sequence_included = s->last_global_sequence_processed;
+
+ /* Setup for serialize to append to catchup snapshot. */
+ serialize_open_vector (&m, args->catchup_snapshot);
+ m.stream.current_buffer_index = vec_len (m.stream.buffer);
+
+ serialize (&m, serialize_mc_stream, s);
+
+ args->catchup_snapshot = serialize_close_vector (&m);
+
+ /* Actually copy internal state */
+ args->catchup_snapshot = s->config.catchup_snapshot
+ (mcm, args->catchup_snapshot, rep->last_global_sequence_included);
+
+ rep = (void *) args->catchup_snapshot;
+ rep->n_data_bytes = vec_len (args->catchup_snapshot) - sizeof (rep[0]);
+
+ mc_byte_swap_msg_catchup_reply (rep);
+ }
+
+ /* now go send it... */
+ vlib_process_signal_event (vm, mcm->catchup_process,
+ EVENT_MC_SEND_CATCHUP_DATA,
+ args - mcm->catchup_process_args);
+}
+
+#define EVENT_MC_UNSERIALIZE_BUFFER 0
+#define EVENT_MC_UNSERIALIZE_CATCHUP 1
+
+void
+mc_msg_catchup_reply_handler (mc_main_t * mcm, mc_msg_catchup_reply_t * mp,
+ u32 catchup_opaque)
+{
+ vlib_process_signal_event (mcm->vlib_main,
+ mcm->unserialize_process,
+ EVENT_MC_UNSERIALIZE_CATCHUP,
+ pointer_to_uword (mp));
+}
+
+static void
+perform_catchup (mc_main_t * mcm, mc_msg_catchup_reply_t * mp)
+{
+ mc_stream_t *s;
+ i32 seq_cmp_result;
+
+ mc_byte_swap_msg_catchup_reply (mp);
+
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ /* Never heard of this stream or already caught up. */
+ if (!s || s->state == MC_STREAM_STATE_ready)
+ return;
+
+ {
+ serialize_main_t m;
+ mc_stream_peer_t *p;
+ u32 n_stream_bytes;
+
+ /* For offline sim replay: save the entire catchup snapshot... */
+ if (s->config.save_snapshot)
+ s->config.save_snapshot (mcm, /* is_catchup */ 1, mp->data,
+ mp->n_data_bytes);
+
+ unserialize_open_data (&m, mp->data, mp->n_data_bytes);
+ unserialize (&m, unserialize_mc_stream, s);
+
+ /* Make sure we start numbering our messages as expected */
+ /* *INDENT-OFF* */
+ pool_foreach (p, s->peers, ({
+ if (p->id.as_u64 == mcm->transport.our_ack_peer_id.as_u64)
+ s->our_local_sequence = p->last_sequence_received + 1;
+ }));
+/* *INDENT-ON* */
+
+ n_stream_bytes = m.stream.current_buffer_index;
+
+ /* No need to unserialize close; nothing to free. */
+
+ /* After serialized stream is user's catchup data. */
+ s->config.catchup (mcm, mp->data + n_stream_bytes,
+ mp->n_data_bytes - n_stream_bytes);
+ }
+
+ /* Vector could have been moved by catchup.
+ This can only happen for mc-internal stream. */
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ s->last_global_sequence_processed = mp->last_global_sequence_included;
+
+ while (clib_fifo_elts (s->catchup_fifo))
+ {
+ mc_msg_user_request_t *gp;
+ u32 bi;
+ vlib_buffer_t *b;
+
+ clib_fifo_sub1 (s->catchup_fifo, bi);
+
+ b = vlib_get_buffer (mcm->vlib_main, bi);
+ gp = vlib_buffer_get_current (b);
+
+ /* Make sure we're replaying "new" news */
+ seq_cmp_result = mc_seq_cmp (gp->global_sequence,
+ mp->last_global_sequence_included);
+
+ if (seq_cmp_result > 0)
+ {
+ vlib_buffer_advance (b, sizeof (gp[0]));
+ s->config.rx_buffer (mcm, s, gp->peer_id, bi);
+ s->last_global_sequence_processed = gp->global_sequence;
+
+ if (MC_EVENT_LOGGING)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "catchup replay local sequence 0x%x",
+ .format_args = "i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 local_sequence;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->local_sequence = gp->local_sequence;
+ }
+ }
+ else
+ {
+ if (MC_EVENT_LOGGING)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "catchup discard local sequence 0x%x",
+ .format_args = "i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 local_sequence;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->local_sequence = gp->local_sequence;
+ }
+
+ vlib_buffer_free_one (mcm->vlib_main, bi);
+ }
+ }
+
+ s->state = MC_STREAM_STATE_ready;
+
+ /* Now that we are caught up wake up joining process. */
+ {
+ vlib_one_time_waiting_process_t *wp;
+ vec_foreach (wp, s->procs_waiting_for_join_done)
+ vlib_signal_one_time_waiting_process (mcm->vlib_main, wp);
+ if (s->procs_waiting_for_join_done)
+ _vec_len (s->procs_waiting_for_join_done) = 0;
+ }
+}
+
+static void
+this_node_maybe_master (mc_main_t * mcm)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_msg_master_assert_t *mp;
+ uword event_type;
+ int timeouts = 0;
+ int is_master = mcm->relay_state == MC_RELAY_STATE_MASTER;
+ clib_error_t *error;
+ f64 now, time_last_master_assert = -1;
+ u32 bi;
+
+ while (1)
+ {
+ if (!mcm->we_can_be_relay_master)
+ {
+ mcm->relay_state = MC_RELAY_STATE_SLAVE;
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "become slave (config)");
+ ELOG (mcm->elog_main, e, 0);
+ }
+ return;
+ }
+
+ now = vlib_time_now (vm);
+ if (now >= time_last_master_assert + 1)
+ {
+ time_last_master_assert = now;
+ mp = mc_get_vlib_buffer (mcm->vlib_main, sizeof (mp[0]), &bi);
+
+ mp->peer_id = mcm->transport.our_ack_peer_id;
+ mp->global_sequence = mcm->relay_global_sequence;
+
+ /*
+ * these messages clog the event log, set MC_EVENT_LOGGING higher
+ * if you want them
+ */
+ if (MC_EVENT_LOGGING > 1)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "tx-massert: peer %s global seq %u",
+ .format_args = "T4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 peer, global_sequence;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, mp->peer_id.as_u64);
+ ed->global_sequence = mp->global_sequence;
+ }
+
+ mc_byte_swap_msg_master_assert (mp);
+
+ error =
+ mcm->transport.tx_buffer (mcm->transport.opaque,
+ MC_TRANSPORT_MASTERSHIP, bi);
+ if (error)
+ clib_error_report (error);
+ }
+
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ event_type = vlib_process_get_events (vm, /* no event data */ 0);
+
+ switch (event_type)
+ {
+ case ~0:
+ if (!is_master && timeouts++ > 2)
+ {
+ mcm->relay_state = MC_RELAY_STATE_MASTER;
+ mcm->relay_master_peer_id =
+ mcm->transport.our_ack_peer_id.as_u64;
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "become master (was maybe_master)");
+ ELOG (mcm->elog_main, e, 0);
+ }
+ return;
+ }
+ break;
+
+ case MC_RELAY_STATE_SLAVE:
+ mcm->relay_state = MC_RELAY_STATE_SLAVE;
+ if (MC_EVENT_LOGGING && mcm->relay_state != MC_RELAY_STATE_SLAVE)
+ {
+ ELOG_TYPE (e, "become slave (was maybe_master)");
+ ELOG (mcm->elog_main, e, 0);
+ }
+ return;
+ }
+ }
+}
+
+static void
+this_node_slave (mc_main_t * mcm)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ uword event_type;
+ int timeouts = 0;
+
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "become slave");
+ ELOG (mcm->elog_main, e, 0);
+ }
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ event_type = vlib_process_get_events (vm, /* no event data */ 0);
+
+ switch (event_type)
+ {
+ case ~0:
+ if (timeouts++ > 2)
+ {
+ mcm->relay_state = MC_RELAY_STATE_NEGOTIATE;
+ mcm->relay_master_peer_id = ~0ULL;
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "timeouts; negoitate mastership");
+ ELOG (mcm->elog_main, e, 0);
+ }
+ return;
+ }
+ break;
+
+ case MC_RELAY_STATE_SLAVE:
+ mcm->relay_state = MC_RELAY_STATE_SLAVE;
+ timeouts = 0;
+ break;
+ }
+ }
+}
+
+static uword
+mc_mastership_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ mc_main_t *mcm = mc_node_get_main (node);
+
+ while (1)
+ {
+ switch (mcm->relay_state)
+ {
+ case MC_RELAY_STATE_NEGOTIATE:
+ case MC_RELAY_STATE_MASTER:
+ this_node_maybe_master (mcm);
+ break;
+
+ case MC_RELAY_STATE_SLAVE:
+ this_node_slave (mcm);
+ break;
+ }
+ }
+ return 0; /* not likely */
+}
+
+void
+mc_enable_disable_mastership (mc_main_t * mcm, int we_can_be_master)
+{
+ if (we_can_be_master != mcm->we_can_be_relay_master)
+ {
+ mcm->we_can_be_relay_master = we_can_be_master;
+ vlib_process_signal_event (mcm->vlib_main,
+ mcm->mastership_process,
+ MC_RELAY_STATE_NEGOTIATE, 0);
+ }
+}
+
+void
+mc_msg_master_assert_handler (mc_main_t * mcm, mc_msg_master_assert_t * mp,
+ u32 buffer_index)
+{
+ mc_peer_id_t his_peer_id, our_peer_id;
+ i32 seq_cmp_result;
+ u8 signal_slave = 0;
+ u8 update_global_sequence = 0;
+
+ mc_byte_swap_msg_master_assert (mp);
+
+ his_peer_id = mp->peer_id;
+ our_peer_id = mcm->transport.our_ack_peer_id;
+
+ /* compare the incoming global sequence with ours */
+ seq_cmp_result = mc_seq_cmp (mp->global_sequence,
+ mcm->relay_global_sequence);
+
+ /* If the sender has a lower peer id and the sender's sequence >=
+ our global sequence, we become a slave. Otherwise we are master. */
+ if (mc_peer_id_compare (his_peer_id, our_peer_id) < 0
+ && seq_cmp_result >= 0)
+ {
+ vlib_process_signal_event (mcm->vlib_main,
+ mcm->mastership_process,
+ MC_RELAY_STATE_SLAVE, 0);
+ signal_slave = 1;
+ }
+
+ /* Update our global sequence. */
+ if (seq_cmp_result > 0)
+ {
+ mcm->relay_global_sequence = mp->global_sequence;
+ update_global_sequence = 1;
+ }
+
+ {
+ uword *q = mhash_get (&mcm->mastership_peer_index_by_id, &his_peer_id);
+ mc_mastership_peer_t *p;
+
+ if (q)
+ p = vec_elt_at_index (mcm->mastership_peers, q[0]);
+ else
+ {
+ vec_add2 (mcm->mastership_peers, p, 1);
+ p->peer_id = his_peer_id;
+ mhash_set (&mcm->mastership_peer_index_by_id, &p->peer_id,
+ p - mcm->mastership_peers,
+ /* old_value */ 0);
+ }
+ p->time_last_master_assert_received = vlib_time_now (mcm->vlib_main);
+ }
+
+ /*
+ * these messages clog the event log, set MC_EVENT_LOGGING higher
+ * if you want them.
+ */
+ if (MC_EVENT_LOGGING > 1)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "rx-massert: peer %s global seq %u upd %d slave %d",
+ .format_args = "T4i4i1i1",
+ };
+ /* *INDENT-ON* */
+
+ struct
+ {
+ u32 peer;
+ u32 global_sequence;
+ u8 update_sequence;
+ u8 slave;
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, his_peer_id.as_u64);
+ ed->global_sequence = mp->global_sequence;
+ ed->update_sequence = update_global_sequence;
+ ed->slave = signal_slave;
+ }
+}
+
+static void
+mc_serialize_init (mc_main_t * mcm)
+{
+ mc_serialize_msg_t *m;
+ vlib_main_t *vm = vlib_get_main ();
+
+ mcm->global_msg_index_by_name
+ = hash_create_string ( /* elts */ 0, sizeof (uword));
+
+ m = vm->mc_msg_registrations;
+
+ while (m)
+ {
+ m->global_index = vec_len (mcm->global_msgs);
+ hash_set_mem (mcm->global_msg_index_by_name, m->name, m->global_index);
+ vec_add1 (mcm->global_msgs, m);
+ m = m->next_registration;
+ }
+}
+
+clib_error_t *
+mc_serialize_va (mc_main_t * mc,
+ u32 stream_index,
+ u32 multiple_messages_per_vlib_buffer,
+ mc_serialize_msg_t * msg, va_list * va)
+{
+ mc_stream_t *s;
+ clib_error_t *error;
+ serialize_main_t *m = &mc->serialize_mains[VLIB_TX];
+ vlib_serialize_buffer_main_t *sbm = &mc->serialize_buffer_mains[VLIB_TX];
+ u32 bi, n_before, n_after, n_total, n_this_msg;
+ u32 si, gi;
+
+ if (!sbm->vlib_main)
+ {
+ sbm->tx.max_n_data_bytes_per_chain = 4096;
+ sbm->tx.free_list_index = VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX;
+ }
+
+ if (sbm->first_buffer == 0)
+ serialize_open_vlib_buffer (m, mc->vlib_main, sbm);
+
+ n_before = serialize_vlib_buffer_n_bytes (m);
+
+ s = mc_stream_by_index (mc, stream_index);
+ gi = msg->global_index;
+ ASSERT (msg == vec_elt (mc->global_msgs, gi));
+
+ si = ~0;
+ if (gi < vec_len (s->stream_msg_index_by_global_index))
+ si = s->stream_msg_index_by_global_index[gi];
+
+ serialize_likely_small_unsigned_integer (m, si);
+
+ /* For first time message is sent, use name to identify message. */
+ if (si == ~0 || MSG_ID_DEBUG)
+ serialize_cstring (m, msg->name);
+
+ if (MSG_ID_DEBUG && MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "serialize-msg: %s index %d",
+ .format_args = "T4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 c[2];
+ } *ed;
+ ed = ELOG_DATA (mc->elog_main, e);
+ ed->c[0] = elog_id_for_msg_name (mc, msg->name);
+ ed->c[1] = si;
+ }
+
+ error = va_serialize (m, va);
+
+ n_after = serialize_vlib_buffer_n_bytes (m);
+ n_this_msg = n_after - n_before;
+ n_total = n_after + sizeof (mc_msg_user_request_t);
+
+ /* For max message size ignore first message where string name is sent. */
+ if (si != ~0)
+ msg->max_n_bytes_serialized =
+ clib_max (msg->max_n_bytes_serialized, n_this_msg);
+
+ if (!multiple_messages_per_vlib_buffer
+ || si == ~0
+ || n_total + msg->max_n_bytes_serialized >
+ mc->transport.max_packet_size)
+ {
+ bi = serialize_close_vlib_buffer (m);
+ sbm->first_buffer = 0;
+ if (!error)
+ mc_stream_send (mc, stream_index, bi);
+ else if (bi != ~0)
+ vlib_buffer_free_one (mc->vlib_main, bi);
+ }
+
+ return error;
+}
+
+clib_error_t *
+mc_serialize_internal (mc_main_t * mc,
+ u32 stream_index,
+ u32 multiple_messages_per_vlib_buffer,
+ mc_serialize_msg_t * msg, ...)
+{
+ vlib_main_t *vm = mc->vlib_main;
+ va_list va;
+ clib_error_t *error;
+
+ if (stream_index == ~0)
+ {
+ if (vm->mc_main && vm->mc_stream_index == ~0)
+ vlib_current_process_wait_for_one_time_event_vector
+ (vm, &vm->procs_waiting_for_mc_stream_join);
+ stream_index = vm->mc_stream_index;
+ }
+
+ va_start (va, msg);
+ error = mc_serialize_va (mc, stream_index,
+ multiple_messages_per_vlib_buffer, msg, &va);
+ va_end (va);
+ return error;
+}
+
+uword
+mc_unserialize_message (mc_main_t * mcm,
+ mc_stream_t * s, serialize_main_t * m)
+{
+ mc_serialize_stream_msg_t *sm;
+ u32 gi, si;
+
+ si = unserialize_likely_small_unsigned_integer (m);
+
+ if (!(si == ~0 || MSG_ID_DEBUG))
+ {
+ sm = vec_elt_at_index (s->stream_msgs, si);
+ gi = sm->global_index;
+ }
+ else
+ {
+ char *name;
+
+ unserialize_cstring (m, &name);
+
+ if (MSG_ID_DEBUG && MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "unserialize-msg: %s rx index %d",
+ .format_args = "T4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 c[2];
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->c[0] = elog_id_for_msg_name (mcm, name);
+ ed->c[1] = si;
+ }
+
+ {
+ uword *p = hash_get_mem (mcm->global_msg_index_by_name, name);
+ gi = p ? p[0] : ~0;
+ }
+
+ /* Unknown message? */
+ if (gi == ~0)
+ {
+ vec_free (name);
+ goto done;
+ }
+
+ vec_validate_init_empty (s->stream_msg_index_by_global_index, gi, ~0);
+ si = s->stream_msg_index_by_global_index[gi];
+
+ /* Stream local index unknown? Create it. */
+ if (si == ~0)
+ {
+ vec_add2 (s->stream_msgs, sm, 1);
+
+ si = sm - s->stream_msgs;
+ sm->global_index = gi;
+ s->stream_msg_index_by_global_index[gi] = si;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "msg-bind: stream %d %s to index %d",
+ .format_args = "i4T4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 c[3];
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->c[0] = s->index;
+ ed->c[1] = elog_id_for_msg_name (mcm, name);
+ ed->c[2] = si;
+ }
+ }
+ else
+ {
+ sm = vec_elt_at_index (s->stream_msgs, si);
+ if (gi != sm->global_index && MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "msg-id-ERROR: %s index %d expected %d",
+ .format_args = "T4i4i4",
+ };
+ /* *INDENT-ON* */
+ struct
+ {
+ u32 c[3];
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->c[0] = elog_id_for_msg_name (mcm, name);
+ ed->c[1] = si;
+ ed->c[2] = ~0;
+ if (sm->global_index <
+ vec_len (s->stream_msg_index_by_global_index))
+ ed->c[2] =
+ s->stream_msg_index_by_global_index[sm->global_index];
+ }
+ }
+
+ vec_free (name);
+ }
+
+ if (gi != ~0)
+ {
+ mc_serialize_msg_t *msg;
+ msg = vec_elt (mcm->global_msgs, gi);
+ unserialize (m, msg->unserialize, mcm);
+ }
+
+done:
+ return gi != ~0;
+}
+
+void
+mc_unserialize_internal (mc_main_t * mcm, u32 stream_and_buffer_index)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ serialize_main_t *m = &mcm->serialize_mains[VLIB_RX];
+ vlib_serialize_buffer_main_t *sbm = &mcm->serialize_buffer_mains[VLIB_RX];
+ mc_stream_and_buffer_t *sb;
+ mc_stream_t *stream;
+ u32 buffer_index;
+
+ sb =
+ pool_elt_at_index (mcm->mc_unserialize_stream_and_buffers,
+ stream_and_buffer_index);
+ buffer_index = sb->buffer_index;
+ stream = vec_elt_at_index (mcm->stream_vector, sb->stream_index);
+ pool_put (mcm->mc_unserialize_stream_and_buffers, sb);
+
+ if (stream->config.save_snapshot)
+ {
+ u32 n_bytes = vlib_buffer_index_length_in_chain (vm, buffer_index);
+ static u8 *contents;
+ vec_reset_length (contents);
+ vec_validate (contents, n_bytes - 1);
+ vlib_buffer_contents (vm, buffer_index, contents);
+ stream->config.save_snapshot (mcm, /* is_catchup */ 0, contents,
+ n_bytes);
+ }
+
+ ASSERT (vlib_in_process_context (vm));
+
+ unserialize_open_vlib_buffer (m, vm, sbm);
+
+ clib_fifo_add1 (sbm->rx.buffer_fifo, buffer_index);
+
+ while (unserialize_vlib_buffer_n_bytes (m) > 0)
+ mc_unserialize_message (mcm, stream, m);
+
+ /* Frees buffer. */
+ unserialize_close_vlib_buffer (m);
+}
+
+void
+mc_unserialize (mc_main_t * mcm, mc_stream_t * s, u32 buffer_index)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_stream_and_buffer_t *sb;
+ pool_get (mcm->mc_unserialize_stream_and_buffers, sb);
+ sb->stream_index = s->index;
+ sb->buffer_index = buffer_index;
+ vlib_process_signal_event (vm, mcm->unserialize_process,
+ EVENT_MC_UNSERIALIZE_BUFFER,
+ sb - mcm->mc_unserialize_stream_and_buffers);
+}
+
+static uword
+mc_unserialize_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * f)
+{
+ mc_main_t *mcm = mc_node_get_main (node);
+ uword event_type, *event_data = 0;
+ int i;
+
+ while (1)
+ {
+ if (event_data)
+ _vec_len (event_data) = 0;
+
+ vlib_process_wait_for_event (vm);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case EVENT_MC_UNSERIALIZE_BUFFER:
+ for (i = 0; i < vec_len (event_data); i++)
+ mc_unserialize_internal (mcm, event_data[i]);
+ break;
+
+ case EVENT_MC_UNSERIALIZE_CATCHUP:
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ u8 *mp = uword_to_pointer (event_data[i], u8 *);
+ perform_catchup (mcm, (void *) mp);
+ vec_free (mp);
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return 0; /* not likely */
+}
+
+void
+serialize_mc_main (serialize_main_t * m, va_list * va)
+{
+ mc_main_t *mcm = va_arg (*va, mc_main_t *);
+ mc_stream_t *s;
+ mc_serialize_stream_msg_t *sm;
+ mc_serialize_msg_t *msg;
+
+ serialize_integer (m, vec_len (mcm->stream_vector), sizeof (u32));
+ vec_foreach (s, mcm->stream_vector)
+ {
+ /* Stream name. */
+ serialize_cstring (m, s->config.name);
+
+ /* Serialize global names for all sent messages. */
+ serialize_integer (m, vec_len (s->stream_msgs), sizeof (u32));
+ vec_foreach (sm, s->stream_msgs)
+ {
+ msg = vec_elt (mcm->global_msgs, sm->global_index);
+ serialize_cstring (m, msg->name);
+ }
+ }
+}
+
+void
+unserialize_mc_main (serialize_main_t * m, va_list * va)
+{
+ mc_main_t *mcm = va_arg (*va, mc_main_t *);
+ u32 i, n_streams, n_stream_msgs;
+ char *name;
+ mc_stream_t *s;
+ mc_serialize_stream_msg_t *sm;
+
+ unserialize_integer (m, &n_streams, sizeof (u32));
+ for (i = 0; i < n_streams; i++)
+ {
+ unserialize_cstring (m, &name);
+ if (i != MC_STREAM_INDEX_INTERNAL && !mc_stream_by_name (mcm, name))
+ {
+ vec_validate (mcm->stream_vector, i);
+ s = vec_elt_at_index (mcm->stream_vector, i);
+ mc_stream_init (s);
+ s->index = s - mcm->stream_vector;
+ s->config.name = name;
+ s->state = MC_STREAM_STATE_name_known;
+ hash_set_mem (mcm->stream_index_by_name, s->config.name, s->index);
+ }
+ else
+ vec_free (name);
+
+ s = vec_elt_at_index (mcm->stream_vector, i);
+
+ vec_free (s->stream_msgs);
+ vec_free (s->stream_msg_index_by_global_index);
+
+ unserialize_integer (m, &n_stream_msgs, sizeof (u32));
+ vec_resize (s->stream_msgs, n_stream_msgs);
+ vec_foreach (sm, s->stream_msgs)
+ {
+ uword *p;
+ u32 si, gi;
+
+ unserialize_cstring (m, &name);
+ p = hash_get (mcm->global_msg_index_by_name, name);
+ gi = p ? p[0] : ~0;
+ si = sm - s->stream_msgs;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ /* *INDENT-OFF* */
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "catchup-bind: %s to %d global index %d stream %d",
+ .format_args = "T4i4i4i4",
+ };
+ /* *INDENT-ON* */
+
+ struct
+ {
+ u32 c[4];
+ } *ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->c[0] = elog_id_for_msg_name (mcm, name);
+ ed->c[1] = si;
+ ed->c[2] = gi;
+ ed->c[3] = s->index;
+ }
+
+ vec_free (name);
+
+ sm->global_index = gi;
+ if (gi != ~0)
+ {
+ vec_validate_init_empty (s->stream_msg_index_by_global_index,
+ gi, ~0);
+ s->stream_msg_index_by_global_index[gi] = si;
+ }
+ }
+ }
+}
+
+void
+mc_main_init (mc_main_t * mcm, char *tag)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ mcm->vlib_main = vm;
+ mcm->elog_main = &vm->elog_main;
+
+ mcm->relay_master_peer_id = ~0ULL;
+ mcm->relay_state = MC_RELAY_STATE_NEGOTIATE;
+
+ mcm->stream_index_by_name
+ = hash_create_string ( /* elts */ 0, /* value size */ sizeof (uword));
+
+ {
+ vlib_node_registration_t r;
+
+ memset (&r, 0, sizeof (r));
+
+ r.type = VLIB_NODE_TYPE_PROCESS;
+
+ /* Point runtime data to main instance. */
+ r.runtime_data = &mcm;
+ r.runtime_data_bytes = sizeof (&mcm);
+
+ r.name = (char *) format (0, "mc-mastership-%s", tag);
+ r.function = mc_mastership_process;
+ mcm->mastership_process = vlib_register_node (vm, &r);
+
+ r.name = (char *) format (0, "mc-join-ager-%s", tag);
+ r.function = mc_join_ager_process;
+ mcm->join_ager_process = vlib_register_node (vm, &r);
+
+ r.name = (char *) format (0, "mc-retry-%s", tag);
+ r.function = mc_retry_process;
+ mcm->retry_process = vlib_register_node (vm, &r);
+
+ r.name = (char *) format (0, "mc-catchup-%s", tag);
+ r.function = mc_catchup_process;
+ mcm->catchup_process = vlib_register_node (vm, &r);
+
+ r.name = (char *) format (0, "mc-unserialize-%s", tag);
+ r.function = mc_unserialize_process;
+ mcm->unserialize_process = vlib_register_node (vm, &r);
+ }
+
+ if (MC_EVENT_LOGGING > 0)
+ mhash_init (&mcm->elog_id_by_peer_id, sizeof (uword),
+ sizeof (mc_peer_id_t));
+
+ mhash_init (&mcm->mastership_peer_index_by_id, sizeof (uword),
+ sizeof (mc_peer_id_t));
+ mc_serialize_init (mcm);
+}
+
+static u8 *
+format_mc_relay_state (u8 * s, va_list * args)
+{
+ mc_relay_state_t state = va_arg (*args, mc_relay_state_t);
+ char *t = 0;
+ switch (state)
+ {
+ case MC_RELAY_STATE_NEGOTIATE:
+ t = "negotiate";
+ break;
+ case MC_RELAY_STATE_MASTER:
+ t = "master";
+ break;
+ case MC_RELAY_STATE_SLAVE:
+ t = "slave";
+ break;
+ default:
+ return format (s, "unknown 0x%x", state);
+ }
+
+ return format (s, "%s", t);
+}
+
+static u8 *
+format_mc_stream_state (u8 * s, va_list * args)
+{
+ mc_stream_state_t state = va_arg (*args, mc_stream_state_t);
+ char *t = 0;
+ switch (state)
+ {
+#define _(f) case MC_STREAM_STATE_##f: t = #f; break;
+ foreach_mc_stream_state
+#undef _
+ default:
+ return format (s, "unknown 0x%x", state);
+ }
+
+ return format (s, "%s", t);
+}
+
+static int
+mc_peer_comp (void *a1, void *a2)
+{
+ mc_stream_peer_t *p1 = a1;
+ mc_stream_peer_t *p2 = a2;
+
+ return mc_peer_id_compare (p1->id, p2->id);
+}
+
+u8 *
+format_mc_main (u8 * s, va_list * args)
+{
+ mc_main_t *mcm = va_arg (*args, mc_main_t *);
+ mc_stream_t *t;
+ mc_stream_peer_t *p, *ps;
+ uword indent = format_get_indent (s);
+
+ s = format (s, "MC state %U, %d streams joined, global sequence 0x%x",
+ format_mc_relay_state, mcm->relay_state,
+ vec_len (mcm->stream_vector), mcm->relay_global_sequence);
+
+ {
+ mc_mastership_peer_t *mp;
+ f64 now = vlib_time_now (mcm->vlib_main);
+ s = format (s, "\n%UMost recent mastership peers:",
+ format_white_space, indent + 2);
+ vec_foreach (mp, mcm->mastership_peers)
+ {
+ s = format (s, "\n%U%-30U%.4e",
+ format_white_space, indent + 4,
+ mcm->transport.format_peer_id, mp->peer_id,
+ now - mp->time_last_master_assert_received);
+ }
+ }
+
+ vec_foreach (t, mcm->stream_vector)
+ {
+ s = format (s, "\n%Ustream `%s' index %d",
+ format_white_space, indent + 2, t->config.name, t->index);
+
+ s = format (s, "\n%Ustate %U",
+ format_white_space, indent + 4,
+ format_mc_stream_state, t->state);
+
+ s =
+ format (s,
+ "\n%Uretries: interval %.0f sec, limit %d, pool elts %d, %Ld sent",
+ format_white_space, indent + 4, t->config.retry_interval,
+ t->config.retry_limit, pool_elts (t->retry_pool),
+ t->stats.n_retries - t->stats_last_clear.n_retries);
+
+ s = format (s, "\n%U%Ld/%Ld user requests sent/received",
+ format_white_space, indent + 4,
+ t->user_requests_sent, t->user_requests_received);
+
+ s = format (s, "\n%U%d peers, local/global sequence 0x%x/0x%x",
+ format_white_space, indent + 4,
+ pool_elts (t->peers),
+ t->our_local_sequence, t->last_global_sequence_processed);
+
+ ps = 0;
+ /* *INDENT-OFF* */
+ pool_foreach (p, t->peers,
+ ({
+ if (clib_bitmap_get (t->all_peer_bitmap, p - t->peers))
+ vec_add1 (ps, p[0]);
+ }));
+ /* *INDENT-ON* */
+ vec_sort_with_function (ps, mc_peer_comp);
+ s = format (s, "\n%U%=30s%10s%16s%16s",
+ format_white_space, indent + 6,
+ "Peer", "Last seq", "Retries", "Future");
+
+ vec_foreach (p, ps)
+ {
+ s = format (s, "\n%U%-30U0x%08x%16Ld%16Ld%s",
+ format_white_space, indent + 6,
+ mcm->transport.format_peer_id, p->id.as_u64,
+ p->last_sequence_received,
+ p->stats.n_msgs_from_past -
+ p->stats_last_clear.n_msgs_from_past,
+ p->stats.n_msgs_from_future -
+ p->stats_last_clear.n_msgs_from_future,
+ (mcm->transport.our_ack_peer_id.as_u64 ==
+ p->id.as_u64 ? " (self)" : ""));
+ }
+ vec_free (ps);
+ }
+
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/mc.h b/src/vlib/mc.h
new file mode 100644
index 00000000000..dc95b0e9074
--- /dev/null
+++ b/src/vlib/mc.h
@@ -0,0 +1,687 @@
+/*
+ * mc.h: vlib reliable sequenced multicast distributed applications
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vlib_mc_h
+#define included_vlib_mc_h
+
+#include <vppinfra/elog.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/mhash.h>
+#include <vlib/node.h>
+
+#ifndef MC_EVENT_LOGGING
+#define MC_EVENT_LOGGING 1
+#endif
+
+always_inline uword
+mc_need_byte_swap (void)
+{
+ return CLIB_ARCH_IS_LITTLE_ENDIAN;
+}
+
+/*
+ * Used to uniquely identify hosts.
+ * For IP4 this would be ip4_address plus tcp/udp port.
+ */
+typedef union
+{
+ u8 as_u8[8];
+ u64 as_u64;
+} mc_peer_id_t;
+
+always_inline mc_peer_id_t
+mc_byte_swap_peer_id (mc_peer_id_t i)
+{
+ /* Peer id is already in network byte order. */
+ return i;
+}
+
+always_inline int
+mc_peer_id_compare (mc_peer_id_t a, mc_peer_id_t b)
+{
+ return memcmp (a.as_u8, b.as_u8, sizeof (a.as_u8));
+}
+
+/* Assert mastership. Lowest peer_id amount all peers wins mastership.
+ Only sent/received over mastership channel (MC_TRANSPORT_MASTERSHIP).
+ So, we don't need a message opcode. */
+typedef CLIB_PACKED (struct
+ {
+ /* Peer id asserting mastership. */
+ mc_peer_id_t peer_id;
+ /* Global sequence number asserted. */
+ u32 global_sequence;}) mc_msg_master_assert_t;
+
+always_inline void
+mc_byte_swap_msg_master_assert (mc_msg_master_assert_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->global_sequence = clib_byte_swap_u32 (r->global_sequence);
+ }
+}
+
+#define foreach_mc_msg_type \
+ _ (master_assert) \
+ _ (join_or_leave_request) \
+ _ (join_reply) \
+ _ (user_request) \
+ _ (user_ack) \
+ _ (catchup_request) \
+ _ (catchup_reply)
+
+typedef enum
+{
+#define _(f) MC_MSG_TYPE_##f,
+ foreach_mc_msg_type
+#undef _
+} mc_relay_msg_type_t;
+
+/* Request to join a given stream. Multicast over MC_TRANSPORT_JOIN. */
+typedef CLIB_PACKED (struct
+ {
+mc_peer_id_t peer_id; mc_relay_msg_type_t type:32;
+ /* MC_MSG_TYPE_join_or_leave_request */
+ /* Stream to join or leave. */
+ u32 stream_index;
+ /* join = 1, leave = 0 */
+ u8 is_join;}) mc_msg_join_or_leave_request_t;
+
+always_inline void
+mc_byte_swap_msg_join_or_leave_request (mc_msg_join_or_leave_request_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->type = clib_byte_swap_u32 (r->type);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ }
+}
+
+/* Join reply. Multicast over MC_TRANSPORT_JOIN. */
+typedef CLIB_PACKED (struct
+ {
+mc_peer_id_t peer_id; mc_relay_msg_type_t type:32;
+ /* MC_MSG_TYPE_join_reply */
+ u32 stream_index;
+ /* Peer ID to contact to catchup with this stream. */
+ mc_peer_id_t catchup_peer_id;}) mc_msg_join_reply_t;
+
+always_inline void
+mc_byte_swap_msg_join_reply (mc_msg_join_reply_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->type = clib_byte_swap_u32 (r->type);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ r->catchup_peer_id = mc_byte_swap_peer_id (r->catchup_peer_id);
+ }
+}
+
+/* Generic (application) request. Multicast over MC_TRANSPORT_USER_REQUEST_TO_RELAY and then
+ relayed by relay master after filling in global sequence number. */
+typedef CLIB_PACKED (struct
+ {
+ mc_peer_id_t peer_id; u32 stream_index;
+ /* Global sequence number as filled in by relay master. */
+ u32 global_sequence;
+ /* Local sequence number as filled in by peer sending message. */
+ u32 local_sequence;
+ /* Size of request data. */
+ u32 n_data_bytes;
+ /* Opaque request data. */
+ u8 data[0];}) mc_msg_user_request_t;
+
+always_inline void
+mc_byte_swap_msg_user_request (mc_msg_user_request_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ r->global_sequence = clib_byte_swap_u32 (r->global_sequence);
+ r->local_sequence = clib_byte_swap_u32 (r->local_sequence);
+ r->n_data_bytes = clib_byte_swap_u32 (r->n_data_bytes);
+ }
+}
+
+/* Sent unicast over ACK channel. */
+typedef CLIB_PACKED (struct
+ {
+ mc_peer_id_t peer_id;
+ u32 global_sequence; u32 stream_index;
+ u32 local_sequence;
+ i32 seq_cmp_result;}) mc_msg_user_ack_t;
+
+always_inline void
+mc_byte_swap_msg_user_ack (mc_msg_user_ack_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ r->global_sequence = clib_byte_swap_u32 (r->global_sequence);
+ r->local_sequence = clib_byte_swap_u32 (r->local_sequence);
+ r->seq_cmp_result = clib_byte_swap_i32 (r->seq_cmp_result);
+ }
+}
+
+/* Sent/received unicast over catchup channel (e.g. using TCP). */
+typedef CLIB_PACKED (struct
+ {
+ mc_peer_id_t peer_id;
+ u32 stream_index;}) mc_msg_catchup_request_t;
+
+always_inline void
+mc_byte_swap_msg_catchup_request (mc_msg_catchup_request_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ }
+}
+
+/* Sent/received unicast over catchup channel. */
+typedef CLIB_PACKED (struct
+ {
+ mc_peer_id_t peer_id; u32 stream_index;
+ /* Last global sequence number included in catchup data. */
+ u32 last_global_sequence_included;
+ /* Size of catchup data. */
+ u32 n_data_bytes;
+ /* Catchup data. */
+ u8 data[0];}) mc_msg_catchup_reply_t;
+
+always_inline void
+mc_byte_swap_msg_catchup_reply (mc_msg_catchup_reply_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ r->last_global_sequence_included =
+ clib_byte_swap_u32 (r->last_global_sequence_included);
+ r->n_data_bytes = clib_byte_swap_u32 (r->n_data_bytes);
+ }
+}
+
+typedef struct _mc_serialize_msg
+{
+ /* Name for this type. */
+ char *name;
+
+ /* Functions to serialize/unserialize data. */
+ serialize_function_t *serialize;
+ serialize_function_t *unserialize;
+
+ /* Maximum message size in bytes when serialized.
+ If zero then this will be set to the largest sent message. */
+ u32 max_n_bytes_serialized;
+
+ /* Opaque to use for first argument to serialize/unserialize function. */
+ u32 opaque;
+
+ /* Index in global message vector. */
+ u32 global_index;
+
+ /* Registration list */
+ struct _mc_serialize_msg *next_registration;
+} mc_serialize_msg_t;
+
+typedef struct
+{
+ /* Index into global message vector. */
+ u32 global_index;
+} mc_serialize_stream_msg_t;
+
+#define MC_SERIALIZE_MSG(x,...) \
+ __VA_ARGS__ mc_serialize_msg_t x; \
+static void __mc_serialize_msg_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __mc_serialize_msg_registration_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ x.next_registration = vm->mc_msg_registrations; \
+ vm->mc_msg_registrations = &x; \
+} \
+__VA_ARGS__ mc_serialize_msg_t x
+
+typedef enum
+{
+ MC_TRANSPORT_MASTERSHIP,
+ MC_TRANSPORT_JOIN,
+ MC_TRANSPORT_USER_REQUEST_TO_RELAY,
+ MC_TRANSPORT_USER_REQUEST_FROM_RELAY,
+ MC_N_TRANSPORT_TYPE,
+} mc_transport_type_t;
+
+typedef struct
+{
+ clib_error_t *(*tx_buffer) (void *opaque, mc_transport_type_t type,
+ u32 buffer_index);
+
+ clib_error_t *(*tx_ack) (void *opaque, mc_peer_id_t peer_id,
+ u32 buffer_index);
+
+ /* Returns catchup opaque. */
+ uword (*catchup_request_fun) (void *opaque, u32 stream_index,
+ mc_peer_id_t catchup_peer_id);
+
+ void (*catchup_send_fun) (void *opaque, uword catchup_opaque,
+ u8 * data_vector);
+
+ /* Opaque passed to callbacks. */
+ void *opaque;
+
+ mc_peer_id_t our_ack_peer_id;
+ mc_peer_id_t our_catchup_peer_id;
+
+ /* Max packet size (MTU) for this transport.
+ For IP this is interface MTU less IP + UDP header size. */
+ u32 max_packet_size;
+
+ format_function_t *format_peer_id;
+} mc_transport_t;
+
+typedef struct
+{
+ /* Count of messages received from this peer from the past/future
+ (with seq_cmp != 0). */
+ u64 n_msgs_from_past;
+ u64 n_msgs_from_future;
+} mc_stream_peer_stats_t;
+
+typedef struct
+{
+ /* ID of this peer. */
+ mc_peer_id_t id;
+
+ /* The last sequence we received from this peer. */
+ u32 last_sequence_received;
+
+ mc_stream_peer_stats_t stats, stats_last_clear;
+} mc_stream_peer_t;
+
+typedef struct
+{
+ u32 buffer_index;
+
+ /* Cached copy of local sequence number from buffer. */
+ u32 local_sequence;
+
+ /* Number of times this buffer has been sent (retried). */
+ u32 n_retries;
+
+ /* Previous/next retries in doubly-linked list. */
+ u32 prev_index, next_index;
+
+ /* Bitmap of all peers which have acked this msg */
+ uword *unacked_by_peer_bitmap;
+
+ /* Message send or resend time */
+ f64 sent_at;
+} mc_retry_t;
+
+typedef struct
+{
+ /* Number of retries sent for this stream. */
+ u64 n_retries;
+} mc_stream_stats_t;
+
+struct mc_main_t;
+struct mc_stream_t;
+
+typedef struct
+{
+ /* Stream name. */
+ char *name;
+
+ /* Number of outstanding messages. */
+ u32 window_size;
+
+ /* Retry interval, in seconds */
+ f64 retry_interval;
+
+ /* Retry limit */
+ u32 retry_limit;
+
+ /* User rx buffer callback */
+ void (*rx_buffer) (struct mc_main_t * mc_main,
+ struct mc_stream_t * stream,
+ mc_peer_id_t peer_id, u32 buffer_index);
+
+ /* User callback to create a snapshot */
+ u8 *(*catchup_snapshot) (struct mc_main_t * mc_main,
+ u8 * snapshot_vector,
+ u32 last_global_sequence_included);
+
+ /* User callback to replay a snapshot */
+ void (*catchup) (struct mc_main_t * mc_main,
+ u8 * snapshot_data, u32 n_snapshot_data_bytes);
+
+ /* Callback to save a snapshot for offline replay */
+ void (*save_snapshot) (struct mc_main_t * mc_main,
+ u32 is_catchup,
+ u8 * snapshot_data, u32 n_snapshot_data_bytes);
+
+ /* Called when a peer dies */
+ void (*peer_died) (struct mc_main_t * mc_main,
+ struct mc_stream_t * stream, mc_peer_id_t peer_id);
+} mc_stream_config_t;
+
+#define foreach_mc_stream_state \
+ _ (invalid) \
+ _ (name_known) \
+ _ (join_in_progress) \
+ _ (catchup) \
+ _ (ready)
+
+typedef enum
+{
+#define _(f) MC_STREAM_STATE_##f,
+ foreach_mc_stream_state
+#undef _
+} mc_stream_state_t;
+
+typedef struct mc_stream_t
+{
+ mc_stream_config_t config;
+
+ mc_stream_state_t state;
+
+ /* Index in stream pool. */
+ u32 index;
+
+ /* Stream index 0 is always for MC internal use. */
+#define MC_STREAM_INDEX_INTERNAL 0
+
+ mc_retry_t *retry_pool;
+
+ /* Head and tail index of retry pool. */
+ u32 retry_head_index, retry_tail_index;
+
+ /*
+ * Country club for recently retired messages
+ * If the set of peers is expanding and a new peer
+ * misses a message, we can easily retire the FIFO
+ * element before we even know about the new peer
+ */
+ mc_retry_t *retired_fifo;
+
+ /* Hash mapping local sequence to retry pool index. */
+ uword *retry_index_by_local_sequence;
+
+ /* catch-up fifo of VLIB buffer indices.
+ start recording when catching up. */
+ u32 *catchup_fifo;
+
+ mc_stream_stats_t stats, stats_last_clear;
+
+ /* Peer pool. */
+ mc_stream_peer_t *peers;
+
+ /* Bitmap with ones for all peers in peer pool. */
+ uword *all_peer_bitmap;
+
+ /* Map of 64 bit id to index in stream pool. */
+ mhash_t peer_index_by_id;
+
+ /* Timeout, in case we're alone in the world */
+ f64 join_timeout;
+
+ vlib_one_time_waiting_process_t *procs_waiting_for_join_done;
+
+ vlib_one_time_waiting_process_t *procs_waiting_for_open_window;
+
+ /* Next sequence number to use */
+ u32 our_local_sequence;
+
+ /*
+ * Last global sequence we processed.
+ * When supplying catchup data, we need to tell
+ * the client precisely where to start replaying
+ */
+ u32 last_global_sequence_processed;
+
+ /* Vector of unique messages we've sent on this stream. */
+ mc_serialize_stream_msg_t *stream_msgs;
+
+ /* Vector global message index into per stream message index. */
+ u32 *stream_msg_index_by_global_index;
+
+ /* Hashed by message name. */
+ uword *stream_msg_index_by_name;
+
+ u64 user_requests_sent;
+ u64 user_requests_received;
+} mc_stream_t;
+
+always_inline void
+mc_stream_free (mc_stream_t * s)
+{
+ pool_free (s->retry_pool);
+ hash_free (s->retry_index_by_local_sequence);
+ clib_fifo_free (s->catchup_fifo);
+ pool_free (s->peers);
+ mhash_free (&s->peer_index_by_id);
+ vec_free (s->procs_waiting_for_join_done);
+ vec_free (s->procs_waiting_for_open_window);
+}
+
+always_inline void
+mc_stream_init (mc_stream_t * s)
+{
+ memset (s, 0, sizeof (s[0]));
+ s->retry_head_index = s->retry_tail_index = ~0;
+}
+
+typedef struct
+{
+ u32 stream_index;
+ u32 catchup_opaque;
+ u8 *catchup_snapshot;
+} mc_catchup_process_arg_t;
+
+typedef enum
+{
+ MC_RELAY_STATE_NEGOTIATE,
+ MC_RELAY_STATE_MASTER,
+ MC_RELAY_STATE_SLAVE,
+} mc_relay_state_t;
+
+typedef struct
+{
+ mc_peer_id_t peer_id;
+
+ f64 time_last_master_assert_received;
+} mc_mastership_peer_t;
+
+typedef struct
+{
+ u32 stream_index;
+ u32 buffer_index;
+} mc_stream_and_buffer_t;
+
+typedef struct mc_main_t
+{
+ mc_relay_state_t relay_state;
+
+ /* Mastership */
+ u32 we_can_be_relay_master;
+
+ u64 relay_master_peer_id;
+
+ mc_mastership_peer_t *mastership_peers;
+
+ /* Map of 64 bit id to index in stream pool. */
+ mhash_t mastership_peer_index_by_id;
+
+ /* The transport we're using. */
+ mc_transport_t transport;
+
+ /* Last-used global sequence number. */
+ u32 relay_global_sequence;
+
+ /* Vector of streams. */
+ mc_stream_t *stream_vector;
+
+ /* Hash table mapping stream name to pool index. */
+ uword *stream_index_by_name;
+
+ uword *procs_waiting_for_stream_name_by_name;
+
+ vlib_one_time_waiting_process_t **procs_waiting_for_stream_name_pool;
+
+ int joins_in_progress;
+
+ mc_catchup_process_arg_t *catchup_process_args;
+
+ /* Node indices for mastership, join ager,
+ retry and catchup processes. */
+ u32 mastership_process;
+ u32 join_ager_process;
+ u32 retry_process;
+ u32 catchup_process;
+ u32 unserialize_process;
+
+ /* Global vector of messages. */
+ mc_serialize_msg_t **global_msgs;
+
+ /* Hash table mapping message name to index. */
+ uword *global_msg_index_by_name;
+
+ /* Shared serialize/unserialize main. */
+ serialize_main_t serialize_mains[VLIB_N_RX_TX];
+
+ vlib_serialize_buffer_main_t serialize_buffer_mains[VLIB_N_RX_TX];
+
+ /* Convenience variables */
+ struct vlib_main_t *vlib_main;
+ elog_main_t *elog_main;
+
+ /* Maps 64 bit peer id to elog string table offset for this formatted peer id. */
+ mhash_t elog_id_by_peer_id;
+
+ uword *elog_id_by_msg_name;
+
+ /* For mc_unserialize. */
+ mc_stream_and_buffer_t *mc_unserialize_stream_and_buffers;
+} mc_main_t;
+
+always_inline mc_stream_t *
+mc_stream_by_name (mc_main_t * m, char *name)
+{
+ uword *p = hash_get (m->stream_index_by_name, name);
+ return p ? vec_elt_at_index (m->stream_vector, p[0]) : 0;
+}
+
+always_inline mc_stream_t *
+mc_stream_by_index (mc_main_t * m, u32 i)
+{
+ return i < vec_len (m->stream_vector) ? m->stream_vector + i : 0;
+}
+
+always_inline void
+mc_clear_stream_stats (mc_main_t * m)
+{
+ mc_stream_t *s;
+ mc_stream_peer_t *p;
+ vec_foreach (s, m->stream_vector)
+ {
+ s->stats_last_clear = s->stats;
+ /* *INDENT-OFF* */
+ pool_foreach (p, s->peers, ({
+ p->stats_last_clear = p->stats;
+ }));
+ /* *INDENT-ON* */
+ }
+}
+
+/* Declare all message handlers. */
+#define _(f) void mc_msg_##f##_handler (mc_main_t * mcm, mc_msg_##f##_t * msg, u32 buffer_index);
+foreach_mc_msg_type
+#undef _
+ u32 mc_stream_join (mc_main_t * mcm, mc_stream_config_t *);
+
+void mc_stream_leave (mc_main_t * mcm, u32 stream_index);
+
+void mc_wait_for_stream_ready (mc_main_t * m, char *stream_name);
+
+u32 mc_stream_send (mc_main_t * mcm, u32 stream_index, u32 buffer_index);
+
+void mc_main_init (mc_main_t * mcm, char *tag);
+
+void mc_enable_disable_mastership (mc_main_t * mcm, int we_can_be_master);
+
+void *mc_get_vlib_buffer (struct vlib_main_t *vm, u32 n_bytes,
+ u32 * bi_return);
+
+format_function_t format_mc_main;
+
+clib_error_t *mc_serialize_internal (mc_main_t * mc,
+ u32 stream_index,
+ u32 multiple_messages_per_vlib_buffer,
+ mc_serialize_msg_t * msg, ...);
+
+clib_error_t *mc_serialize_va (mc_main_t * mc,
+ u32 stream_index,
+ u32 multiple_messages_per_vlib_buffer,
+ mc_serialize_msg_t * msg, va_list * va);
+
+#define mc_serialize_stream(mc,si,msg,args...) \
+ mc_serialize_internal((mc),(si),(0),(msg),(msg)->serialize,args)
+
+#define mc_serialize(mc,msg,args...) \
+ mc_serialize_internal((mc),(~0),(0),(msg),(msg)->serialize,args)
+
+#define mc_serialize2(mc,add,msg,args...) \
+ mc_serialize_internal((mc),(~0),(add),(msg),(msg)->serialize,args)
+
+void mc_unserialize (mc_main_t * mcm, mc_stream_t * s, u32 buffer_index);
+uword mc_unserialize_message (mc_main_t * mcm, mc_stream_t * s,
+ serialize_main_t * m);
+
+serialize_function_t serialize_mc_main, unserialize_mc_main;
+
+always_inline uword
+mc_max_message_size_in_bytes (mc_main_t * mcm)
+{
+ return mcm->transport.max_packet_size - sizeof (mc_msg_user_request_t);
+}
+
+always_inline word
+mc_serialize_n_bytes_left (mc_main_t * mcm, serialize_main_t * m)
+{
+ return mc_max_message_size_in_bytes (mcm) -
+ serialize_vlib_buffer_n_bytes (m);
+}
+
+void unserialize_mc_stream (serialize_main_t * m, va_list * va);
+void mc_stream_join_process_hold (void);
+
+#endif /* included_vlib_mc_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/node.c b/src/vlib/node.c
new file mode 100644
index 00000000000..c419a13a487
--- /dev/null
+++ b/src/vlib/node.c
@@ -0,0 +1,631 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node.c: VLIB processing nodes
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/threads.h>
+
+/* Query node given name. */
+vlib_node_t *
+vlib_get_node_by_name (vlib_main_t * vm, u8 * name)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ uword *p;
+ u8 *key = name;
+ if (!clib_mem_is_heap_object (key))
+ key = format (0, "%s", key);
+ p = hash_get (nm->node_by_name, key);
+ if (key != name)
+ vec_free (key);
+ return p ? vec_elt (nm->nodes, p[0]) : 0;
+}
+
+static void
+node_set_elog_name (vlib_main_t * vm, uword node_index)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ elog_event_type_t *t;
+
+ t = vec_elt_at_index (vm->node_call_elog_event_types, node_index);
+ vec_free (t->format);
+ t->format = (char *) format (0, "%v-call: %%d%c", n->name, 0);
+
+ t = vec_elt_at_index (vm->node_return_elog_event_types, node_index);
+ vec_free (t->format);
+ t->format = (char *) format (0, "%v-return: %%d%c", n->name, 0);
+
+ n->name_elog_string = elog_string (&vm->elog_main, "%v%c", n->name, 0);
+}
+
+void
+vlib_node_rename (vlib_main_t * vm, u32 node_index, char *fmt, ...)
+{
+ va_list va;
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+
+ va_start (va, fmt);
+ hash_unset (nm->node_by_name, n->name);
+ vec_free (n->name);
+ n->name = va_format (0, fmt, &va);
+ va_end (va);
+ hash_set (nm->node_by_name, n->name, n->index);
+
+ node_set_elog_name (vm, node_index);
+}
+
+static void
+vlib_node_runtime_update (vlib_main_t * vm, u32 node_index, u32 next_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_runtime_t *r, *s;
+ vlib_node_t *node, *next_node;
+ vlib_next_frame_t *nf;
+ vlib_pending_frame_t *pf;
+ i32 i, j, n_insert;
+
+ ASSERT (os_get_cpu_number () == 0);
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ node = vec_elt (nm->nodes, node_index);
+ r = vlib_node_get_runtime (vm, node_index);
+
+ n_insert = vec_len (node->next_nodes) - r->n_next_nodes;
+ if (n_insert > 0)
+ {
+ i = r->next_frame_index + r->n_next_nodes;
+ vec_insert (nm->next_frames, n_insert, i);
+
+ /* Initialize newly inserted next frames. */
+ for (j = 0; j < n_insert; j++)
+ vlib_next_frame_init (nm->next_frames + i + j);
+
+ /* Relocate other next frames at higher indices. */
+ for (j = 0; j < vec_len (nm->nodes); j++)
+ {
+ s = vlib_node_get_runtime (vm, j);
+ if (j != node_index && s->next_frame_index >= i)
+ s->next_frame_index += n_insert;
+ }
+
+ /* Pending frames may need to be relocated also. */
+ vec_foreach (pf, nm->pending_frames)
+ {
+ if (pf->next_frame_index != VLIB_PENDING_FRAME_NO_NEXT_FRAME
+ && pf->next_frame_index >= i)
+ pf->next_frame_index += n_insert;
+ }
+ /* *INDENT-OFF* */
+ pool_foreach (pf, nm->suspended_process_frames, ({
+ if (pf->next_frame_index != ~0 && pf->next_frame_index >= i)
+ pf->next_frame_index += n_insert;
+ }));
+ /* *INDENT-ON* */
+
+ r->n_next_nodes = vec_len (node->next_nodes);
+ }
+
+ /* Set frame's node runtime index. */
+ next_node = vlib_get_node (vm, node->next_nodes[next_index]);
+ nf = nm->next_frames + r->next_frame_index + next_index;
+ nf->node_runtime_index = next_node->runtime_index;
+
+ vlib_worker_thread_node_runtime_update ();
+
+ vlib_worker_thread_barrier_release (vm);
+}
+
+/* Add next node to given node in given slot. */
+uword
+vlib_node_add_next_with_slot (vlib_main_t * vm,
+ uword node_index,
+ uword next_node_index, uword slot)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *node, *next;
+ uword *p;
+
+ node = vec_elt (nm->nodes, node_index);
+ next = vec_elt (nm->nodes, next_node_index);
+
+ /* Runtime has to be initialized. */
+ ASSERT (nm->flags & VLIB_NODE_MAIN_RUNTIME_STARTED);
+
+ if ((p = hash_get (node->next_slot_by_node, next_node_index)))
+ {
+ /* Next already exists: slot must match. */
+ if (slot != ~0)
+ ASSERT (slot == p[0]);
+ return p[0];
+ }
+
+ if (slot == ~0)
+ slot = vec_len (node->next_nodes);
+
+ vec_validate_init_empty (node->next_nodes, slot, ~0);
+ vec_validate (node->n_vectors_by_next_node, slot);
+
+ node->next_nodes[slot] = next_node_index;
+ hash_set (node->next_slot_by_node, next_node_index, slot);
+
+ vlib_node_runtime_update (vm, node_index, slot);
+
+ next->prev_node_bitmap = clib_bitmap_ori (next->prev_node_bitmap,
+ node_index);
+
+ /* Siblings all get same node structure. */
+ {
+ uword sib_node_index, sib_slot;
+ vlib_node_t *sib_node;
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (sib_node_index, node->sibling_bitmap, ({
+ sib_node = vec_elt (nm->nodes, sib_node_index);
+ if (sib_node != node)
+ {
+ sib_slot = vlib_node_add_next_with_slot (vm, sib_node_index, next_node_index, slot);
+ ASSERT (sib_slot == slot);
+ }
+ }));
+ /* *INDENT-ON* */
+ }
+
+ return slot;
+}
+
+/* Add named next node to given node in given slot. */
+uword
+vlib_node_add_named_next_with_slot (vlib_main_t * vm,
+ uword node, char *name, uword slot)
+{
+ vlib_node_main_t *nm;
+ vlib_node_t *n, *n_next;
+
+ nm = &vm->node_main;
+ n = vlib_get_node (vm, node);
+
+ n_next = vlib_get_node_by_name (vm, (u8 *) name);
+ if (!n_next)
+ {
+ if (nm->flags & VLIB_NODE_MAIN_RUNTIME_STARTED)
+ return ~0;
+
+ if (slot == ~0)
+ slot = clib_max (vec_len (n->next_node_names),
+ vec_len (n->next_nodes));
+ vec_validate (n->next_node_names, slot);
+ n->next_node_names[slot] = name;
+ return slot;
+ }
+
+ return vlib_node_add_next_with_slot (vm, node, n_next->index, slot);
+}
+
+static void
+node_elog_init (vlib_main_t * vm, uword ni)
+{
+ elog_event_type_t t;
+
+ memset (&t, 0, sizeof (t));
+
+ /* 2 event types for this node: one when node function is called.
+ One when it returns. */
+ vec_validate (vm->node_call_elog_event_types, ni);
+ vm->node_call_elog_event_types[ni] = t;
+
+ vec_validate (vm->node_return_elog_event_types, ni);
+ vm->node_return_elog_event_types[ni] = t;
+
+ node_set_elog_name (vm, ni);
+}
+
+#ifdef CLIB_UNIX
+#define STACK_ALIGN (clib_mem_get_page_size())
+#else
+#define STACK_ALIGN CLIB_CACHE_LINE_BYTES
+#endif
+
+static void
+register_node (vlib_main_t * vm, vlib_node_registration_t * r)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n;
+ u32 page_size = clib_mem_get_page_size ();
+ int i;
+
+ if (CLIB_DEBUG > 0)
+ {
+ /* Default (0) type should match INTERNAL. */
+ vlib_node_t zero = { 0 };
+ ASSERT (VLIB_NODE_TYPE_INTERNAL == zero.type);
+ }
+
+ ASSERT (r->function != 0);
+
+ n = clib_mem_alloc_no_fail (sizeof (n[0]));
+ memset (n, 0, sizeof (n[0]));
+ n->index = vec_len (nm->nodes);
+
+ vec_add1 (nm->nodes, n);
+
+ /* Name is always a vector so it can be formatted with %v. */
+ if (clib_mem_is_heap_object (vec_header (r->name, 0)))
+ n->name = vec_dup ((u8 *) r->name);
+ else
+ n->name = format (0, "%s", r->name);
+
+ if (!nm->node_by_name)
+ nm->node_by_name = hash_create_vec ( /* size */ 32,
+ sizeof (n->name[0]), sizeof (uword));
+
+ /* Node names must be unique. */
+ {
+ vlib_node_t *o = vlib_get_node_by_name (vm, n->name);
+ if (o)
+ clib_error ("more than one node named `%v'", n->name);
+ }
+
+ hash_set (nm->node_by_name, n->name, n->index);
+
+ r->index = n->index; /* save index in registration */
+ n->function = r->function;
+
+ /* Node index of next sibling will be filled in by vlib_node_main_init. */
+ n->sibling_of = r->sibling_of;
+ if (r->sibling_of && r->n_next_nodes > 0)
+ clib_error ("sibling node should not have any next nodes `%v'", n->name);
+
+ if (r->type == VLIB_NODE_TYPE_INTERNAL)
+ ASSERT (r->vector_size > 0);
+
+#define _(f) n->f = r->f
+
+ _(type);
+ _(flags);
+ _(state);
+ _(scalar_size);
+ _(vector_size);
+ _(format_buffer);
+ _(unformat_buffer);
+ _(format_trace);
+ _(validate_frame);
+
+ /* Register error counters. */
+ vlib_register_errors (vm, n->index, r->n_errors, r->error_strings);
+ node_elog_init (vm, n->index);
+
+ _(runtime_data_bytes);
+ if (r->runtime_data_bytes > 0)
+ {
+ vec_resize (n->runtime_data, r->runtime_data_bytes);
+ if (r->runtime_data)
+ clib_memcpy (n->runtime_data, r->runtime_data, r->runtime_data_bytes);
+ }
+
+ vec_resize (n->next_node_names, r->n_next_nodes);
+ for (i = 0; i < r->n_next_nodes; i++)
+ n->next_node_names[i] = r->next_nodes[i];
+
+ vec_validate_init_empty (n->next_nodes, r->n_next_nodes - 1, ~0);
+ vec_validate (n->n_vectors_by_next_node, r->n_next_nodes - 1);
+
+ n->owner_node_index = n->owner_next_index = ~0;
+
+ /* Initialize node runtime. */
+ {
+ vlib_node_runtime_t *rt;
+ u32 i;
+
+ if (n->type == VLIB_NODE_TYPE_PROCESS)
+ {
+ vlib_process_t *p;
+ uword log2_n_stack_bytes;
+
+ log2_n_stack_bytes = clib_max (r->process_log2_n_stack_bytes, 15);
+
+#ifdef CLIB_UNIX
+ /*
+ * Bump the stack size if running over a kernel with a large page size,
+ * and the stack isn't any too big to begin with. Otherwise, we'll
+ * trip over the stack guard page for sure.
+ */
+ if ((page_size > (4 << 10)) && log2_n_stack_bytes < 19)
+ {
+ if ((1 << log2_n_stack_bytes) <= page_size)
+ log2_n_stack_bytes = min_log2 (page_size) + 1;
+ else
+ log2_n_stack_bytes++;
+ }
+#endif
+
+ p = clib_mem_alloc_aligned_at_offset
+ (sizeof (p[0]) + (1 << log2_n_stack_bytes),
+ STACK_ALIGN, STRUCT_OFFSET_OF (vlib_process_t, stack),
+ 0 /* no, don't call os_out_of_memory */ );
+ if (p == 0)
+ clib_panic ("failed to allocate process stack (%d bytes)",
+ 1 << log2_n_stack_bytes);
+
+ memset (p, 0, sizeof (p[0]));
+ p->log2_n_stack_bytes = log2_n_stack_bytes;
+
+ /* Process node's runtime index is really index into process
+ pointer vector. */
+ n->runtime_index = vec_len (nm->processes);
+
+ vec_add1 (nm->processes, p);
+
+ /* Paint first stack word with magic number so we can at least
+ detect process stack overruns. */
+ p->stack[0] = VLIB_PROCESS_STACK_MAGIC;
+
+ /* Node runtime is stored inside of process. */
+ rt = &p->node_runtime;
+
+#ifdef CLIB_UNIX
+ /*
+ * Disallow writes to the bottom page of the stack, to
+ * catch stack overflows.
+ */
+ if (mprotect (p->stack, page_size, PROT_READ) < 0)
+ clib_unix_warning ("process stack");
+#endif
+
+ }
+ else
+ {
+ vec_add2_aligned (nm->nodes_by_type[n->type], rt, 1,
+ /* align */ CLIB_CACHE_LINE_BYTES);
+ n->runtime_index = rt - nm->nodes_by_type[n->type];
+ }
+
+ if (n->type == VLIB_NODE_TYPE_INPUT)
+ nm->input_node_counts_by_state[n->state] += 1;
+
+ rt->function = n->function;
+ rt->flags = n->flags;
+ rt->state = n->state;
+ rt->node_index = n->index;
+
+ rt->n_next_nodes = r->n_next_nodes;
+ rt->next_frame_index = vec_len (nm->next_frames);
+
+ vec_resize (nm->next_frames, rt->n_next_nodes);
+ for (i = 0; i < rt->n_next_nodes; i++)
+ vlib_next_frame_init (nm->next_frames + rt->next_frame_index + i);
+
+ vec_resize (rt->errors, r->n_errors);
+ for (i = 0; i < vec_len (rt->errors); i++)
+ rt->errors[i] = vlib_error_set (n->index, i);
+
+ STATIC_ASSERT_SIZEOF (vlib_node_runtime_t, 128);
+ ASSERT (vec_len (n->runtime_data) <=
+ sizeof (vlib_node_runtime_t) -
+ STRUCT_OFFSET_OF (vlib_node_runtime_t, runtime_data));
+
+ if (vec_len (n->runtime_data) > 0)
+ clib_memcpy (rt->runtime_data, n->runtime_data,
+ vec_len (n->runtime_data));
+
+ vec_free (n->runtime_data);
+ }
+}
+
+/* Register new packet processing node. */
+u32
+vlib_register_node (vlib_main_t * vm, vlib_node_registration_t * r)
+{
+ register_node (vm, r);
+ return r->index;
+}
+
+static uword
+null_node_fn (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ u16 n_vectors = frame->n_vectors;
+
+ vlib_node_increment_counter (vm, node->node_index, 0, n_vectors);
+ vlib_buffer_free (vm, vlib_frame_args (frame), n_vectors);
+ vlib_frame_free (vm, node, frame);
+
+ return n_vectors;
+}
+
+void
+vlib_register_all_static_nodes (vlib_main_t * vm)
+{
+ vlib_node_registration_t *r;
+
+ static char *null_node_error_strings[] = {
+ "blackholed packets",
+ };
+
+ static vlib_node_registration_t null_node_reg = {
+ .function = null_node_fn,
+ .vector_size = sizeof (u32),
+ .name = "null-node",
+ .n_errors = 1,
+ .error_strings = null_node_error_strings,
+ };
+
+ /* make sure that node index 0 is not used by
+ real node */
+ register_node (vm, &null_node_reg);
+
+ r = vm->node_main.node_registrations;
+ while (r)
+ {
+ register_node (vm, r);
+ r = r->next_registration;
+ }
+}
+
+clib_error_t *
+vlib_node_main_init (vlib_main_t * vm)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ clib_error_t *error = 0;
+ vlib_node_t *n;
+ uword ni;
+
+ nm->flags |= VLIB_NODE_MAIN_RUNTIME_STARTED;
+
+ /* Generate sibling relationships */
+ {
+ vlib_node_t *n, *sib;
+ uword si;
+
+ for (ni = 0; ni < vec_len (nm->nodes); ni++)
+ {
+ n = vec_elt (nm->nodes, ni);
+
+ if (!n->sibling_of)
+ continue;
+
+ sib = vlib_get_node_by_name (vm, (u8 *) n->sibling_of);
+ if (!sib)
+ {
+ error = clib_error_create ("sibling `%s' not found for node `%v'",
+ n->sibling_of, n->name);
+ goto done;
+ }
+
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (si, sib->sibling_bitmap, ({
+ vlib_node_t * m = vec_elt (nm->nodes, si);
+
+ /* Connect all of sibling's siblings to us. */
+ m->sibling_bitmap = clib_bitmap_ori (m->sibling_bitmap, n->index);
+
+ /* Connect us to all of sibling's siblings. */
+ n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, si);
+ }));
+ /* *INDENT-ON* */
+
+ /* Connect sibling to us. */
+ sib->sibling_bitmap = clib_bitmap_ori (sib->sibling_bitmap, n->index);
+
+ /* Connect us to sibling. */
+ n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, sib->index);
+ }
+ }
+
+ /* Resolve next names into next indices. */
+ for (ni = 0; ni < vec_len (nm->nodes); ni++)
+ {
+ uword i;
+
+ n = vec_elt (nm->nodes, ni);
+
+ for (i = 0; i < vec_len (n->next_node_names); i++)
+ {
+ char *a = n->next_node_names[i];
+
+ if (!a)
+ continue;
+
+ if (~0 == vlib_node_add_named_next_with_slot (vm, n->index, a, i))
+ {
+ error = clib_error_create
+ ("node `%v' refers to unknown node `%s'", n->name, a);
+ goto done;
+ }
+ }
+
+ vec_free (n->next_node_names);
+ }
+
+ /* Set previous node pointers. */
+ for (ni = 0; ni < vec_len (nm->nodes); ni++)
+ {
+ vlib_node_t *n_next;
+ uword i;
+
+ n = vec_elt (nm->nodes, ni);
+
+ for (i = 0; i < vec_len (n->next_nodes); i++)
+ {
+ if (n->next_nodes[i] >= vec_len (nm->nodes))
+ continue;
+
+ n_next = vec_elt (nm->nodes, n->next_nodes[i]);
+ n_next->prev_node_bitmap =
+ clib_bitmap_ori (n_next->prev_node_bitmap, n->index);
+ }
+ }
+
+ {
+ vlib_next_frame_t *nf;
+ vlib_node_runtime_t *r;
+ vlib_node_t *next;
+ uword i;
+
+ vec_foreach (r, nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
+ {
+ if (r->n_next_nodes == 0)
+ continue;
+
+ n = vlib_get_node (vm, r->node_index);
+ nf = vec_elt_at_index (nm->next_frames, r->next_frame_index);
+
+ for (i = 0; i < vec_len (n->next_nodes); i++)
+ {
+ next = vlib_get_node (vm, n->next_nodes[i]);
+
+ /* Validate node runtime indices are correctly initialized. */
+ ASSERT (nf[i].node_runtime_index == next->runtime_index);
+
+ nf[i].flags = 0;
+ if (next->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH)
+ nf[i].flags |= VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
+ }
+ }
+ }
+
+done:
+ return error;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/node.h b/src/vlib/node.h
new file mode 100644
index 00000000000..b624e9d636d
--- /dev/null
+++ b/src/vlib/node.h
@@ -0,0 +1,725 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node.h: VLIB processing nodes
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_node_h
+#define included_vlib_node_h
+
+#include <vppinfra/cpu.h>
+#include <vppinfra/longjmp.h>
+#include <vppinfra/timing_wheel.h>
+#include <vlib/trace.h> /* for vlib_trace_filter_t */
+
+/* Forward declaration. */
+struct vlib_node_runtime_t;
+struct vlib_frame_t;
+
+/* Internal nodes (including output nodes) move data from node to
+ node (or out of the graph for output nodes). */
+typedef uword (vlib_node_function_t) (struct vlib_main_t * vm,
+ struct vlib_node_runtime_t * node,
+ struct vlib_frame_t * frame);
+
+typedef enum
+{
+ /* An internal node on the call graph (could be output). */
+ VLIB_NODE_TYPE_INTERNAL,
+
+ /* Nodes which input data into the processing graph.
+ Input nodes are called for each iteration of main loop. */
+ VLIB_NODE_TYPE_INPUT,
+
+ /* Nodes to be called before all input nodes.
+ Used, for example, to clean out driver TX rings before
+ processing input. */
+ VLIB_NODE_TYPE_PRE_INPUT,
+
+ /* "Process" nodes which can be suspended and later resumed. */
+ VLIB_NODE_TYPE_PROCESS,
+
+ VLIB_N_NODE_TYPE,
+} vlib_node_type_t;
+
+typedef struct _vlib_node_registration
+{
+ /* Vector processing function for this node. */
+ vlib_node_function_t *function;
+
+ /* Node name. */
+ char *name;
+
+ /* Name of sibling (if applicable). */
+ char *sibling_of;
+
+ /* Node index filled in by registration. */
+ u32 index;
+
+ /* Type of this node. */
+ vlib_node_type_t type;
+
+ /* Error strings indexed by error code for this node. */
+ char **error_strings;
+
+ /* Buffer format/unformat for this node. */
+ format_function_t *format_buffer;
+ unformat_function_t *unformat_buffer;
+
+ /* Trace format/unformat for this node. */
+ format_function_t *format_trace;
+ unformat_function_t *unformat_trace;
+
+ /* Function to validate incoming frames. */
+ u8 *(*validate_frame) (struct vlib_main_t * vm,
+ struct vlib_node_runtime_t *,
+ struct vlib_frame_t * f);
+
+ /* Per-node runtime data. */
+ void *runtime_data;
+
+ /* Process stack size. */
+ u16 process_log2_n_stack_bytes;
+
+ /* Number of bytes of per-node run time data. */
+ u8 runtime_data_bytes;
+
+ /* State for input nodes. */
+ u8 state;
+
+ /* Node flags. */
+ u16 flags;
+
+ /* Size of scalar and vector arguments in bytes. */
+ u16 scalar_size, vector_size;
+
+ /* Number of error codes used by this node. */
+ u16 n_errors;
+
+ /* Number of next node names that follow. */
+ u16 n_next_nodes;
+
+ /* Constructor link-list, don't ask... */
+ struct _vlib_node_registration *next_registration;
+
+ /* Names of next nodes which this node feeds into. */
+ char *next_nodes[];
+
+} vlib_node_registration_t;
+
+#define VLIB_REGISTER_NODE(x,...) \
+ __VA_ARGS__ vlib_node_registration_t x; \
+static void __vlib_add_node_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_node_registration_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ x.next_registration = vm->node_main.node_registrations; \
+ vm->node_main.node_registrations = &x; \
+} \
+__VA_ARGS__ vlib_node_registration_t x
+
+#if CLIB_DEBUG > 0
+#define VLIB_NODE_FUNCTION_CLONE_TEMPLATE(arch, fn)
+#define VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn)
+#define VLIB_NODE_FUNCTION_MULTIARCH(node, fn)
+#else
+#define VLIB_NODE_FUNCTION_CLONE_TEMPLATE(arch, fn, tgt) \
+ uword \
+ __attribute__ ((flatten)) \
+ __attribute__ ((target (tgt))) \
+ CLIB_CPU_OPTIMIZED \
+ fn ## _ ## arch ( struct vlib_main_t * vm, \
+ struct vlib_node_runtime_t * node, \
+ struct vlib_frame_t * frame) \
+ { return fn (vm, node, frame); }
+
+#define VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) \
+ foreach_march_variant(VLIB_NODE_FUNCTION_CLONE_TEMPLATE, fn)
+
+#define VLIB_NODE_FUNCTION_MULTIARCH(node, fn) \
+ VLIB_NODE_FUNCTION_MULTIARCH_CLONE(fn) \
+ CLIB_MULTIARCH_SELECT_FN(fn, static inline) \
+ static void __attribute__((__constructor__)) \
+ __vlib_node_function_multiarch_select_##node (void) \
+ { node.function = fn ## _multiarch_select(); }
+#endif
+
+always_inline vlib_node_registration_t *
+vlib_node_next_registered (vlib_node_registration_t * c)
+{
+ c =
+ clib_elf_section_data_next (c,
+ c->n_next_nodes * sizeof (c->next_nodes[0]));
+ return c;
+}
+
+typedef struct
+{
+ /* Total calls, clock ticks and vector elements processed for this node. */
+ u64 calls, vectors, clocks, suspends;
+ u64 max_clock;
+ u64 max_clock_n;
+} vlib_node_stats_t;
+
+#define foreach_vlib_node_state \
+ /* Input node is called each iteration of main loop. \
+ This is the default (zero). */ \
+ _ (POLLING) \
+ /* Input node is called when device signals an interrupt. */ \
+ _ (INTERRUPT) \
+ /* Input node is never called. */ \
+ _ (DISABLED)
+
+typedef enum
+{
+#define _(f) VLIB_NODE_STATE_##f,
+ foreach_vlib_node_state
+#undef _
+ VLIB_N_NODE_STATE,
+} vlib_node_state_t;
+
+typedef struct vlib_node_t
+{
+ /* Vector processing function for this node. */
+ vlib_node_function_t *function;
+
+ /* Node name. */
+ u8 *name;
+
+ /* Node name index in elog string table. */
+ u32 name_elog_string;
+
+ /* Total statistics for this node. */
+ vlib_node_stats_t stats_total;
+
+ /* Saved values as of last clear (or zero if never cleared).
+ Current values are always stats_total - stats_last_clear. */
+ vlib_node_stats_t stats_last_clear;
+
+ /* Type of this node. */
+ vlib_node_type_t type;
+
+ /* Node index. */
+ u32 index;
+
+ /* Index of corresponding node runtime. */
+ u32 runtime_index;
+
+ /* Runtime data for this node. */
+ void *runtime_data;
+
+ /* Node flags. */
+ u16 flags;
+
+ /* Processing function keeps frame. Tells node dispatching code not
+ to free frame after dispatch is done. */
+#define VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH (1 << 0)
+
+ /* Node counts as output/drop/punt node for stats purposes. */
+#define VLIB_NODE_FLAG_IS_OUTPUT (1 << 1)
+#define VLIB_NODE_FLAG_IS_DROP (1 << 2)
+#define VLIB_NODE_FLAG_IS_PUNT (1 << 3)
+#define VLIB_NODE_FLAG_IS_HANDOFF (1 << 4)
+
+ /* Set if current node runtime has traced vectors. */
+#define VLIB_NODE_FLAG_TRACE (1 << 5)
+
+#define VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE (1 << 6)
+#define VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE (1 << 7)
+
+ /* State for input nodes. */
+ u8 state;
+
+ /* Number of bytes of run time data. */
+ u8 runtime_data_bytes;
+
+ /* Number of error codes used by this node. */
+ u16 n_errors;
+
+ /* Size of scalar and vector arguments in bytes. */
+ u16 scalar_size, vector_size;
+
+ /* Handle/index in error heap for this node. */
+ u32 error_heap_handle;
+ u32 error_heap_index;
+
+ /* Error strings indexed by error code for this node. */
+ char **error_strings;
+
+ /* Vector of next node names.
+ Only used before next_nodes array is initialized. */
+ char **next_node_names;
+
+ /* Next node indices for this node. */
+ u32 *next_nodes;
+
+ /* Name of node that we are sibling of. */
+ char *sibling_of;
+
+ /* Bitmap of all of this node's siblings. */
+ uword *sibling_bitmap;
+
+ /* Total number of vectors sent to each next node. */
+ u64 *n_vectors_by_next_node;
+
+ /* Hash table mapping next node index into slot in
+ next_nodes vector. Quickly determines whether this node
+ is connected to given next node and, if so, with which slot. */
+ uword *next_slot_by_node;
+
+ /* Bitmap of node indices which feed this node. */
+ uword *prev_node_bitmap;
+
+ /* Node/next-index which own enqueue rights with to this node. */
+ u32 owner_node_index, owner_next_index;
+
+ /* Buffer format/unformat for this node. */
+ format_function_t *format_buffer;
+ unformat_function_t *unformat_buffer;
+
+ /* Trace buffer format/unformat for this node. */
+ format_function_t *format_trace;
+
+ /* Function to validate incoming frames. */
+ u8 *(*validate_frame) (struct vlib_main_t * vm,
+ struct vlib_node_runtime_t *,
+ struct vlib_frame_t * f);
+ /* for pretty-printing, not typically valid */
+ u8 *state_string;
+} vlib_node_t;
+
+#define VLIB_INVALID_NODE_INDEX ((u32) ~0)
+
+/* Max number of vector elements to process at once per node. */
+#define VLIB_FRAME_SIZE 256
+#define VLIB_FRAME_ALIGN VLIB_MAX_CPUS
+
+/* Calling frame (think stack frame) for a node. */
+typedef struct vlib_frame_t
+{
+ /* Frame flags. */
+ u16 flags;
+
+ /* Number of scalar bytes in arguments. */
+ u8 scalar_size;
+
+ /* Number of bytes per vector argument. */
+ u8 vector_size;
+
+ /* Number of vector elements currently in frame. */
+ u16 n_vectors;
+
+ /* Owner cpuid / heap id */
+ u16 cpu_index;
+
+ /* Scalar and vector arguments to next node. */
+ u8 arguments[0];
+} vlib_frame_t;
+
+typedef struct
+{
+ /* Frame index. */
+ u32 frame_index;
+
+ /* Node runtime for this next. */
+ u32 node_runtime_index;
+
+ /* Next frame flags. */
+ u32 flags;
+
+ /* Reflects node frame-used flag for this next. */
+#define VLIB_FRAME_NO_FREE_AFTER_DISPATCH \
+ VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH
+
+ /* This next frame owns enqueue to node
+ corresponding to node_runtime_index. */
+#define VLIB_FRAME_OWNER (1 << 15)
+
+ /* Set when frame has been allocated for this next. */
+#define VLIB_FRAME_IS_ALLOCATED VLIB_NODE_FLAG_IS_OUTPUT
+
+ /* Set when frame has been added to pending vector. */
+#define VLIB_FRAME_PENDING VLIB_NODE_FLAG_IS_DROP
+
+ /* Set when frame is to be freed after dispatch. */
+#define VLIB_FRAME_FREE_AFTER_DISPATCH VLIB_NODE_FLAG_IS_PUNT
+
+ /* Set when frame has traced packets. */
+#define VLIB_FRAME_TRACE VLIB_NODE_FLAG_TRACE
+
+ /* Number of vectors enqueue to this next since last overflow. */
+ u32 vectors_since_last_overflow;
+} vlib_next_frame_t;
+
+always_inline void
+vlib_next_frame_init (vlib_next_frame_t * nf)
+{
+ memset (nf, 0, sizeof (nf[0]));
+ nf->frame_index = ~0;
+ nf->node_runtime_index = ~0;
+}
+
+/* A frame pending dispatch by main loop. */
+typedef struct
+{
+ /* Node and runtime for this frame. */
+ u32 node_runtime_index;
+
+ /* Frame index (in the heap). */
+ u32 frame_index;
+
+ /* Start of next frames for this node. */
+ u32 next_frame_index;
+
+ /* Special value for next_frame_index when there is no next frame. */
+#define VLIB_PENDING_FRAME_NO_NEXT_FRAME ((u32) ~0)
+} vlib_pending_frame_t;
+
+typedef struct vlib_node_runtime_t
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ /* Node function to call. */
+ vlib_node_function_t *function;
+
+ /* Vector of errors for this node. */
+ vlib_error_t *errors;
+
+ /* Number of clock cycles. */
+ u32 clocks_since_last_overflow;
+
+ /* Maximum clock cycle for an invocation. */
+ u32 max_clock;
+
+ /* Number of vectors in the recorded max_clock. */
+ u32 max_clock_n;
+
+ /* Number of calls. */
+ u32 calls_since_last_overflow;
+
+ /* Number of vector elements processed by this node. */
+ u32 vectors_since_last_overflow;
+
+ /* Start of next frames for this node. */
+ u32 next_frame_index;
+
+ /* Node index. */
+ u32 node_index;
+
+ /* For input nodes: decremented on each main loop interation until it reaches zero
+ and function is called. Allows some input nodes to be called
+ more than others. */
+ u32 input_main_loops_per_call;
+
+ /* Saved main loop counter of last dispatch of this node. */
+ u32 main_loop_count_last_dispatch;
+
+ u32 main_loop_vector_stats[2];
+
+ /* Copy of main node flags. */
+ u16 flags;
+
+ /* Input node state. */
+ u16 state;
+
+ u16 n_next_nodes;
+
+ /* Next frame index that vector arguments were last enqueued to
+ last time this node ran. Set to zero before first run
+ of this node. */
+ u16 cached_next_index;
+
+ /* CPU this node runs on */
+ u16 cpu_index;
+
+ /* Function dependent node-runtime. */
+ u8 runtime_data[0];
+}
+vlib_node_runtime_t;
+
+typedef struct
+{
+ /* Number of allocated frames for this scalar/vector size. */
+ u32 n_alloc_frames;
+
+ /* Vector of free frame indices for this scalar/vector size. */
+ u32 *free_frame_indices;
+} vlib_frame_size_t;
+
+typedef struct
+{
+ /* Users opaque value for event type. */
+ uword opaque;
+} vlib_process_event_type_t;
+
+typedef struct
+{
+ /* Node runtime for this process. */
+ vlib_node_runtime_t node_runtime;
+
+ /* Where to longjmp when process is done. */
+ clib_longjmp_t return_longjmp;
+
+#define VLIB_PROCESS_RETURN_LONGJMP_RETURN ((uword) ~0 - 0)
+#define VLIB_PROCESS_RETURN_LONGJMP_SUSPEND ((uword) ~0 - 1)
+
+ /* Where to longjmp to resume node after suspend. */
+ clib_longjmp_t resume_longjmp;
+#define VLIB_PROCESS_RESUME_LONGJMP_SUSPEND 0
+#define VLIB_PROCESS_RESUME_LONGJMP_RESUME 1
+
+ u16 flags;
+#define VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK (1 << 0)
+#define VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT (1 << 1)
+ /* Set to indicate that this process has been added to resume vector. */
+#define VLIB_PROCESS_RESUME_PENDING (1 << 2)
+
+ /* Process function is currently running. */
+#define VLIB_PROCESS_IS_RUNNING (1 << 3)
+
+ /* Size of process stack. */
+ u16 log2_n_stack_bytes;
+
+ u32 suspended_process_frame_index;
+
+ /* Number of times this process was suspended. */
+ u32 n_suspends;
+
+ /* Vectors of pending event data indexed by event type index. */
+ void **pending_event_data_by_type_index;
+
+ /* Bitmap of event type-indices with non-empty vectors. */
+ uword *non_empty_event_type_bitmap;
+
+ /* Bitmap of event type-indices which are one time events. */
+ uword *one_time_event_type_bitmap;
+
+ /* Type is opaque pointer -- typically a pointer to an event handler
+ function. Hash table to map opaque to a type index. */
+ uword *event_type_index_by_type_opaque;
+
+ /* Pool of currently valid event types. */
+ vlib_process_event_type_t *event_type_pool;
+
+ /* When suspending saves cpu cycle counter when process is to be resumed. */
+ u64 resume_cpu_time;
+
+ /* Default output function and its argument for any CLI outputs
+ within the process. */
+ vlib_cli_output_function_t *output_function;
+ uword output_function_arg;
+
+#ifdef CLIB_UNIX
+ /* Pad to a multiple of the page size so we can mprotect process stacks */
+#define PAGE_SIZE_MULTIPLE 0x1000
+#define ALIGN_ON_MULTIPLE_PAGE_BOUNDARY_FOR_MPROTECT __attribute__ ((aligned (PAGE_SIZE_MULTIPLE)))
+#else
+#define ALIGN_ON_MULTIPLE_PAGE_BOUNDARY_FOR_MPROTECT
+#endif
+
+ /* Process stack. Starts here and extends 2^log2_n_stack_bytes
+ bytes. */
+
+#define VLIB_PROCESS_STACK_MAGIC (0xdead7ead)
+ u32 stack[0] ALIGN_ON_MULTIPLE_PAGE_BOUNDARY_FOR_MPROTECT;
+} vlib_process_t __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES)));
+
+#ifdef CLIB_UNIX
+ /* Ensure that the stack is aligned on the multiple of the page size */
+typedef char
+ assert_process_stack_must_be_aligned_exactly_to_page_size_multiple[(sizeof
+ (vlib_process_t)
+ -
+ PAGE_SIZE_MULTIPLE)
+ ==
+ 0 ? 0 :
+ -1];
+#endif
+
+typedef struct
+{
+ u32 node_index;
+
+ u32 one_time_event;
+} vlib_one_time_waiting_process_t;
+
+typedef struct
+{
+ u16 n_data_elts;
+
+ u16 n_data_elt_bytes;
+
+ /* n_data_elts * n_data_elt_bytes */
+ u32 n_data_bytes;
+
+ /* Process node & event type to be used to signal event. */
+ u32 process_node_index;
+
+ u32 event_type_index;
+
+ union
+ {
+ u8 inline_event_data[64 - 3 * sizeof (u32) - 2 * sizeof (u16)];
+
+ /* Vector of event data used only when data does not fit inline. */
+ u8 *event_data_as_vector;
+ };
+}
+vlib_signal_timed_event_data_t;
+
+always_inline uword
+vlib_timing_wheel_data_is_timed_event (u32 d)
+{
+ return d & 1;
+}
+
+always_inline u32
+vlib_timing_wheel_data_set_suspended_process (u32 i)
+{
+ return 0 + 2 * i;
+}
+
+always_inline u32
+vlib_timing_wheel_data_set_timed_event (u32 i)
+{
+ return 1 + 2 * i;
+}
+
+always_inline uword
+vlib_timing_wheel_data_get_index (u32 d)
+{
+ return d / 2;
+}
+
+typedef struct
+{
+ /* Public nodes. */
+ vlib_node_t **nodes;
+
+ /* Node index hashed by node name. */
+ uword *node_by_name;
+
+ u32 flags;
+#define VLIB_NODE_MAIN_RUNTIME_STARTED (1 << 0)
+
+ /* Nodes segregated by type for cache locality.
+ Does not apply to nodes of type VLIB_NODE_TYPE_INTERNAL. */
+ vlib_node_runtime_t *nodes_by_type[VLIB_N_NODE_TYPE];
+
+ /* Node runtime indices for input nodes with pending interrupts. */
+ u32 *pending_interrupt_node_runtime_indices;
+
+ /* Input nodes are switched from/to interrupt to/from polling mode
+ when average vector length goes above/below polling/interrupt
+ thresholds. */
+ u32 polling_threshold_vector_length;
+ u32 interrupt_threshold_vector_length;
+
+ /* Vector of next frames. */
+ vlib_next_frame_t *next_frames;
+
+ /* Vector of internal node's frames waiting to be called. */
+ vlib_pending_frame_t *pending_frames;
+
+ /* Timing wheel for scheduling time-based node dispatch. */
+ timing_wheel_t timing_wheel;
+
+ vlib_signal_timed_event_data_t *signal_timed_event_data_pool;
+
+ /* Opaque data vector added via timing_wheel_advance. */
+ u32 *data_from_advancing_timing_wheel;
+
+ /* CPU time of next process to be ready on timing wheel. */
+ u64 cpu_time_next_process_ready;
+
+ /* Vector of process nodes.
+ One for each node of type VLIB_NODE_TYPE_PROCESS. */
+ vlib_process_t **processes;
+
+ /* Current running process or ~0 if no process running. */
+ u32 current_process_index;
+
+ /* Pool of pending process frames. */
+ vlib_pending_frame_t *suspended_process_frames;
+
+ /* Vector of event data vectors pending recycle. */
+ void **recycled_event_data_vectors;
+
+ /* Current counts of nodes in each state. */
+ u32 input_node_counts_by_state[VLIB_N_NODE_STATE];
+
+ /* Hash of (scalar_size,vector_size) to frame_sizes index. */
+ uword *frame_size_hash;
+
+ /* Per-size frame allocation information. */
+ vlib_frame_size_t *frame_sizes;
+
+ /* Time of last node runtime stats clear. */
+ f64 time_last_runtime_stats_clear;
+
+ /* Node registrations added by constructors */
+ vlib_node_registration_t *node_registrations;
+} vlib_node_main_t;
+
+
+#define FRAME_QUEUE_MAX_NELTS 32
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ u64 head;
+ u64 head_hint;
+ u64 tail;
+ u32 n_in_use;
+ u32 nelts;
+ u32 written;
+ u32 threshold;
+ i32 n_vectors[FRAME_QUEUE_MAX_NELTS];
+} frame_queue_trace_t;
+
+typedef struct
+{
+ u64 count[FRAME_QUEUE_MAX_NELTS];
+} frame_queue_nelt_counter_t;
+
+#endif /* included_vlib_node_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c
new file mode 100644
index 00000000000..05d0f0b5a95
--- /dev/null
+++ b/src/vlib/node_cli.c
@@ -0,0 +1,466 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node_cli.c: node CLI
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/threads.h>
+
+static int
+node_cmp (void *a1, void *a2)
+{
+ vlib_node_t **n1 = a1;
+ vlib_node_t **n2 = a2;
+
+ return vec_cmp (n1[0]->name, n2[0]->name);
+}
+
+static clib_error_t *
+show_node_graph (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n;
+ u32 node_index;
+
+ vlib_cli_output (vm, "%U\n", format_vlib_node_graph, nm, 0);
+
+ if (unformat (input, "%U", unformat_vlib_node, vm, &node_index))
+ {
+ n = vlib_get_node (vm, node_index);
+ vlib_cli_output (vm, "%U\n", format_vlib_node_graph, nm, n);
+ }
+ else
+ {
+ vlib_node_t **nodes = vec_dup (nm->nodes);
+ uword i;
+
+ vec_sort_with_function (nodes, node_cmp);
+
+ for (i = 0; i < vec_len (nodes); i++)
+ vlib_cli_output (vm, "%U\n\n", format_vlib_node_graph, nm, nodes[i]);
+
+ vec_free (nodes);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_node_graph_command, static) = {
+ .path = "show vlib graph",
+ .short_help = "Show packet processing node graph",
+ .function = show_node_graph,
+};
+/* *INDENT-ON* */
+
+static u8 *
+format_vlib_node_stats (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ vlib_node_t *n = va_arg (*va, vlib_node_t *);
+ int max = va_arg (*va, int);
+ f64 v;
+ char *state;
+ u8 *ns;
+ u8 *misc_info = 0;
+ u64 c, p, l, d;
+ f64 x;
+ f64 maxc, maxcn;
+ u32 maxn;
+ uword indent;
+
+ if (!n)
+ {
+ if (max)
+ return format (s,
+ "%=30s%=17s%=16s%=16s%=16s%=16s",
+ "Name", "Max Node Clocks", "Vectors at Max",
+ "Max Clocks", "Avg Clocks", "Avg Vectors/Call");
+ else
+ return format (s,
+ "%=30s%=12s%=16s%=16s%=16s%=16s%=16s",
+ "Name", "State", "Calls", "Vectors", "Suspends",
+ "Clocks", "Vectors/Call");
+ }
+
+ indent = format_get_indent (s);
+
+ l = n->stats_total.clocks - n->stats_last_clear.clocks;
+ c = n->stats_total.calls - n->stats_last_clear.calls;
+ p = n->stats_total.vectors - n->stats_last_clear.vectors;
+ d = n->stats_total.suspends - n->stats_last_clear.suspends;
+ maxc = (f64) n->stats_total.max_clock;
+ maxn = n->stats_total.max_clock_n;
+ if (n->stats_total.max_clock_n)
+ maxcn = (f64) n->stats_total.max_clock / (f64) maxn;
+ else
+ maxcn = 0.0;
+
+ /* Clocks per packet, per call or per suspend. */
+ x = 0;
+ if (p > 0)
+ x = (f64) l / (f64) p;
+ else if (c > 0)
+ x = (f64) l / (f64) c;
+ else if (d > 0)
+ x = (f64) l / (f64) d;
+
+ if (c > 0)
+ v = (double) p / (double) c;
+ else
+ v = 0;
+
+ state = "active";
+ if (n->type == VLIB_NODE_TYPE_PROCESS)
+ {
+ vlib_process_t *p = vlib_get_process_from_node (vm, n);
+
+ /* Show processes with events pending. This helps spot bugs where events are not
+ being handled. */
+ if (!clib_bitmap_is_zero (p->non_empty_event_type_bitmap))
+ misc_info = format (misc_info, "events pending, ");
+
+ switch (p->flags & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT))
+ {
+ default:
+ if (!(p->flags & VLIB_PROCESS_IS_RUNNING))
+ state = "done";
+ break;
+
+ case VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK:
+ state = "time wait";
+ break;
+
+ case VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT:
+ state = "event wait";
+ break;
+
+ case (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK):
+ state =
+ "any wait";
+ break;
+ }
+ }
+ else if (n->type != VLIB_NODE_TYPE_INTERNAL)
+ {
+ state = "polling";
+ if (n->state == VLIB_NODE_STATE_DISABLED)
+ state = "disabled";
+ else if (n->state == VLIB_NODE_STATE_INTERRUPT)
+ state = "interrupt wait";
+ }
+
+ ns = n->name;
+
+ if (max)
+ s = format (s, "%-30v%=17.2e%=16d%=16.2e%=16.2e%=16.2e",
+ ns, maxc, maxn, maxcn, x, v);
+ else
+ s = format (s, "%-30v%=12s%16Ld%16Ld%16Ld%16.2e%16.2f", ns, state,
+ c, p, d, x, v);
+
+ if (ns != n->name)
+ vec_free (ns);
+
+ if (misc_info)
+ {
+ s = format (s, "\n%U%v", format_white_space, indent + 4, misc_info);
+ vec_free (misc_info);
+ }
+
+ return s;
+}
+
+static clib_error_t *
+show_node_runtime (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n;
+ f64 time_now;
+ u32 node_index;
+ vlib_node_t ***node_dups = 0;
+ f64 *vectors_per_main_loop = 0;
+ f64 *last_vector_length_per_node = 0;
+
+ time_now = vlib_time_now (vm);
+
+ if (unformat (input, "%U", unformat_vlib_node, vm, &node_index))
+ {
+ n = vlib_get_node (vm, node_index);
+ vlib_node_sync_stats (vm, n);
+ vlib_cli_output (vm, "%U\n", format_vlib_node_stats, vm, 0, 0);
+ vlib_cli_output (vm, "%U\n", format_vlib_node_stats, vm, n, 0);
+ }
+ else
+ {
+ vlib_node_t **nodes;
+ uword i, j;
+ f64 dt;
+ u64 n_input, n_output, n_drop, n_punt;
+ u64 n_internal_vectors, n_internal_calls;
+ u64 n_clocks, l, v, c, d;
+ int brief = 1;
+ int max = 0;
+ vlib_main_t **stat_vms = 0, *stat_vm;
+
+ /* Suppress nodes with zero calls since last clear */
+ if (unformat (input, "brief") || unformat (input, "b"))
+ brief = 1;
+ if (unformat (input, "verbose") || unformat (input, "v"))
+ brief = 0;
+ if (unformat (input, "max") || unformat (input, "m"))
+ max = 1;
+
+ if (vec_len (vlib_mains) == 0)
+ vec_add1 (stat_vms, vm);
+ else
+ {
+ for (i = 0; i < vec_len (vlib_mains); i++)
+ {
+ stat_vm = vlib_mains[i];
+ if (stat_vm)
+ vec_add1 (stat_vms, stat_vm);
+ }
+ }
+
+ /*
+ * Barrier sync across stats scraping.
+ * Otherwise, the counts will be grossly inaccurate.
+ */
+ vlib_worker_thread_barrier_sync (vm);
+
+ for (j = 0; j < vec_len (stat_vms); j++)
+ {
+ stat_vm = stat_vms[j];
+ nm = &stat_vm->node_main;
+
+ for (i = 0; i < vec_len (nm->nodes); i++)
+ {
+ n = nm->nodes[i];
+ vlib_node_sync_stats (stat_vm, n);
+ }
+
+ nodes = vec_dup (nm->nodes);
+
+ vec_add1 (node_dups, nodes);
+ vec_add1 (vectors_per_main_loop,
+ vlib_last_vectors_per_main_loop_as_f64 (stat_vm));
+ vec_add1 (last_vector_length_per_node,
+ vlib_last_vector_length_per_node (stat_vm));
+ }
+ vlib_worker_thread_barrier_release (vm);
+
+
+ for (j = 0; j < vec_len (stat_vms); j++)
+ {
+ stat_vm = stat_vms[j];
+ nodes = node_dups[j];
+
+ vec_sort_with_function (nodes, node_cmp);
+
+ n_input = n_output = n_drop = n_punt = n_clocks = 0;
+ n_internal_vectors = n_internal_calls = 0;
+ for (i = 0; i < vec_len (nodes); i++)
+ {
+ n = nodes[i];
+
+ l = n->stats_total.clocks - n->stats_last_clear.clocks;
+ n_clocks += l;
+
+ v = n->stats_total.vectors - n->stats_last_clear.vectors;
+ c = n->stats_total.calls - n->stats_last_clear.calls;
+
+ switch (n->type)
+ {
+ default:
+ continue;
+
+ case VLIB_NODE_TYPE_INTERNAL:
+ n_output += (n->flags & VLIB_NODE_FLAG_IS_OUTPUT) ? v : 0;
+ n_drop += (n->flags & VLIB_NODE_FLAG_IS_DROP) ? v : 0;
+ n_punt += (n->flags & VLIB_NODE_FLAG_IS_PUNT) ? v : 0;
+ if (!(n->flags & VLIB_NODE_FLAG_IS_OUTPUT))
+ {
+ n_internal_vectors += v;
+ n_internal_calls += c;
+ }
+ if (n->flags & VLIB_NODE_FLAG_IS_HANDOFF)
+ n_input += v;
+ break;
+
+ case VLIB_NODE_TYPE_INPUT:
+ n_input += v;
+ break;
+ }
+ }
+
+ if (vec_len (vlib_mains))
+ {
+ vlib_worker_thread_t *w = vlib_worker_threads + j;
+ if (j > 0)
+ vlib_cli_output (vm, "---------------");
+
+ if (w->lcore_id > -1)
+ vlib_cli_output (vm, "Thread %d %s (lcore %u)", j, w->name,
+ w->lcore_id);
+ else
+ vlib_cli_output (vm, "Thread %d %s", j, w->name);
+ }
+
+ dt = time_now - nm->time_last_runtime_stats_clear;
+ vlib_cli_output
+ (vm,
+ "Time %.1f, average vectors/node %.2f, last %d main loops %.2f per node %.2f"
+ "\n vector rates in %.4e, out %.4e, drop %.4e, punt %.4e",
+ dt,
+ (n_internal_calls > 0
+ ? (f64) n_internal_vectors / (f64) n_internal_calls
+ : 0),
+ 1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE,
+ vectors_per_main_loop[j],
+ last_vector_length_per_node[j],
+ (f64) n_input / dt,
+ (f64) n_output / dt, (f64) n_drop / dt, (f64) n_punt / dt);
+
+ vlib_cli_output (vm, "%U", format_vlib_node_stats, stat_vm, 0, max);
+ for (i = 0; i < vec_len (nodes); i++)
+ {
+ c =
+ nodes[i]->stats_total.calls -
+ nodes[i]->stats_last_clear.calls;
+ d =
+ nodes[i]->stats_total.suspends -
+ nodes[i]->stats_last_clear.suspends;
+ if (c || d || !brief)
+ {
+ vlib_cli_output (vm, "%U", format_vlib_node_stats, stat_vm,
+ nodes[i], max);
+ }
+ }
+ vec_free (nodes);
+ }
+ vec_free (stat_vms);
+ vec_free (node_dups);
+ vec_free (vectors_per_main_loop);
+ vec_free (last_vector_length_per_node);
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_node_runtime_command, static) = {
+ .path = "show runtime",
+ .short_help = "Show packet processing runtime",
+ .function = show_node_runtime,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+clear_node_runtime (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_node_main_t *nm;
+ vlib_node_t *n;
+ int i, j;
+ vlib_main_t **stat_vms = 0, *stat_vm;
+ vlib_node_runtime_t *r;
+
+ if (vec_len (vlib_mains) == 0)
+ vec_add1 (stat_vms, vm);
+ else
+ {
+ for (i = 0; i < vec_len (vlib_mains); i++)
+ {
+ stat_vm = vlib_mains[i];
+ if (stat_vm)
+ vec_add1 (stat_vms, stat_vm);
+ }
+ }
+
+ vlib_worker_thread_barrier_sync (vm);
+
+ for (j = 0; j < vec_len (stat_vms); j++)
+ {
+ stat_vm = stat_vms[j];
+ nm = &stat_vm->node_main;
+
+ for (i = 0; i < vec_len (nm->nodes); i++)
+ {
+ n = nm->nodes[i];
+ vlib_node_sync_stats (stat_vm, n);
+ n->stats_last_clear = n->stats_total;
+
+ r = vlib_node_get_runtime (stat_vm, n->index);
+ r->max_clock = 0;
+ }
+ /* Note: input/output rates computed using vlib_global_main */
+ nm->time_last_runtime_stats_clear = vlib_time_now (vm);
+ }
+
+ vlib_worker_thread_barrier_release (vm);
+
+ vec_free (stat_vms);
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_node_runtime_command, static) = {
+ .path = "clear runtime",
+ .short_help = "Clear packet processing runtime statistics",
+ .function = clear_node_runtime,
+};
+/* *INDENT-ON* */
+
+/* Dummy function to get us linked in. */
+void
+vlib_node_cli_reference (void)
+{
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/node_format.c b/src/vlib/node_format.c
new file mode 100644
index 00000000000..e9dde40fa70
--- /dev/null
+++ b/src/vlib/node_format.c
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node_format.c: node formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+u8 *
+format_vlib_node_graph (u8 * s, va_list * va)
+{
+ vlib_node_main_t *nm = va_arg (*va, vlib_node_main_t *);
+ vlib_node_t *n = va_arg (*va, vlib_node_t *);
+ int i, j;
+ uword indent;
+ typedef struct
+ {
+ u32 next_node;
+ u32 next_slot;
+ u32 prev_node;
+ } tmp_t;
+ tmp_t *tmps = 0;
+ tmp_t empty = {.next_node = ~0,.prev_node = ~0 };
+
+ if (!n)
+ return format (s, "%=26s%=26s%=26s", "Name", "Next", "Previous");
+
+ s = format (s, "%-26v", n->name);
+
+ indent = format_get_indent (s);
+
+ for (i = j = 0; i < vec_len (n->next_nodes); i++)
+ {
+ if (n->next_nodes[i] == VLIB_INVALID_NODE_INDEX)
+ continue;
+ vec_validate_init_empty (tmps, j, empty);
+ tmps[j].next_node = n->next_nodes[i];
+ tmps[j].next_slot = i;
+ j++;
+ }
+
+ j = 0;
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (i, n->prev_node_bitmap, ({
+ vec_validate_init_empty (tmps, j, empty);
+ tmps[j].prev_node = i;
+ j++;
+ }));
+ /* *INDENT-ON* */
+
+ for (i = 0; i < vec_len (tmps); i++)
+ {
+ if (i > 0)
+ s = format (s, "\n%U", format_white_space, indent);
+
+ if (tmps[i].next_node != ~0)
+ {
+ vlib_node_t *x;
+ u8 *t = 0;
+
+ x = vec_elt (nm->nodes, tmps[i].next_node);
+ t = format (t, "%v [%d]", x->name, tmps[i].next_slot);
+ s = format (s, "%=26v", t);
+ vec_free (t);
+ }
+ else
+ s = format (s, "%26s", "");
+
+ if (tmps[i].prev_node != ~0)
+ {
+ vlib_node_t *x;
+ x = vec_elt (nm->nodes, tmps[i].prev_node);
+ s = format (s, "%=26v", x->name);
+ }
+ }
+
+ vec_free (tmps);
+
+ return s;
+}
+
+u8 *
+format_vlib_node_and_next (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ vlib_node_t *n = va_arg (*va, vlib_node_t *);
+ u32 next_index = va_arg (*va, u32);
+ vlib_node_t *n_next;
+ u32 *ni;
+
+ ni = vec_elt_at_index (n->next_nodes, next_index);
+ n_next = vlib_get_node (vm, ni[0]);
+ return format (s, "%v -> %v", n->name, n_next->name);
+}
+
+u8 *
+format_vlib_node_name (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ u32 node_index = va_arg (*va, u32);
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+
+ return format (s, "%v", n->name);
+}
+
+u8 *
+format_vlib_next_node_name (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ u32 node_index = va_arg (*va, u32);
+ u32 next_index = va_arg (*va, u32);
+ vlib_node_t *next = vlib_get_next_node (vm, node_index, next_index);
+ return format (s, "%v", next->name);
+}
+
+/* Parse node name -> node index. */
+uword
+unformat_vlib_node (unformat_input_t * input, va_list * args)
+{
+ vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+ u32 *result = va_arg (*args, u32 *);
+
+ return unformat_user (input, unformat_hash_vec_string,
+ vm->node_main.node_by_name, result);
+}
+
+u8 *
+format_vlib_time (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ f64 time = va_arg (*va, f64);
+ return format (s, "%12.4f", time);
+}
+
+u8 *
+format_vlib_cpu_time (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ u64 cpu_time = va_arg (*va, u64);
+ f64 dt;
+
+ dt =
+ (cpu_time -
+ vm->clib_time.init_cpu_time) * vm->clib_time.seconds_per_clock;
+ return format (s, "%U", format_vlib_time, vm, dt);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h
new file mode 100644
index 00000000000..2116739602e
--- /dev/null
+++ b/src/vlib/node_funcs.h
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node_funcs.h: processing nodes global functions/inlines
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/** \file
+ vlib node functions
+*/
+
+
+#ifndef included_vlib_node_funcs_h
+#define included_vlib_node_funcs_h
+
+#include <vppinfra/fifo.h>
+
+/** \brief Get vlib node by index.
+ @warning This function will ASSERT if @c i is out of range.
+ @param vm vlib_main_t pointer, varies by thread
+ @param i node index.
+ @return pointer to the requested vlib_node_t.
+*/
+
+always_inline vlib_node_t *
+vlib_get_node (vlib_main_t * vm, u32 i)
+{
+ return vec_elt (vm->node_main.nodes, i);
+}
+
+/** \brief Get vlib node by graph arc (next) index.
+ @param vm vlib_main_t pointer, varies by thread
+ @param node_index index of original node
+ @param next_index graph arc index
+ @return pointer to the vlib_node_t at the end of the indicated arc
+*/
+
+always_inline vlib_node_t *
+vlib_get_next_node (vlib_main_t * vm, u32 node_index, u32 next_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n;
+
+ n = vec_elt (nm->nodes, node_index);
+ ASSERT (next_index < vec_len (n->next_nodes));
+ return vlib_get_node (vm, n->next_nodes[next_index]);
+}
+
+/** \brief Get node runtime by node index.
+ @param vm vlib_main_t pointer, varies by thread
+ @param node_index index of node
+ @return pointer to the indicated vlib_node_runtime_t
+*/
+
+always_inline vlib_node_runtime_t *
+vlib_node_get_runtime (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vec_elt (nm->nodes, node_index);
+ vlib_process_t *p;
+ if (n->type != VLIB_NODE_TYPE_PROCESS)
+ return vec_elt_at_index (nm->nodes_by_type[n->type], n->runtime_index);
+ else
+ {
+ p = vec_elt (nm->processes, n->runtime_index);
+ return &p->node_runtime;
+ }
+}
+
+/** \brief Get node runtime private data by node index.
+ @param vm vlib_main_t pointer, varies by thread
+ @param node_index index of the node
+ @return pointer to the indicated vlib_node_runtime_t private data
+*/
+
+always_inline void *
+vlib_node_get_runtime_data (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_runtime_t *r = vlib_node_get_runtime (vm, node_index);
+ return r->runtime_data;
+}
+
+/** \brief Set node runtime private data.
+ @param vm vlib_main_t pointer, varies by thread
+ @param node_index index of the node
+ @param runtime_data arbitrary runtime private data
+ @param n_runtime_data_bytes size of runtime private data
+*/
+
+always_inline void
+vlib_node_set_runtime_data (vlib_main_t * vm, u32 node_index,
+ void *runtime_data, u32 n_runtime_data_bytes)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_node_runtime_t *r = vlib_node_get_runtime (vm, node_index);
+
+ n->runtime_data_bytes = n_runtime_data_bytes;
+ vec_free (n->runtime_data);
+ vec_add (n->runtime_data, runtime_data, n_runtime_data_bytes);
+
+ ASSERT (vec_len (n->runtime_data) <= sizeof (vlib_node_runtime_t) -
+ STRUCT_OFFSET_OF (vlib_node_runtime_t, runtime_data));
+
+ if (vec_len (n->runtime_data) > 0)
+ clib_memcpy (r->runtime_data, n->runtime_data, vec_len (n->runtime_data));
+}
+
+/** \brief Set node dispatch state.
+ @param vm vlib_main_t pointer, varies by thread
+ @param node_index index of the node
+ @param new_state new state for node, see vlib_node_state_t
+*/
+always_inline void
+vlib_node_set_state (vlib_main_t * vm, u32 node_index,
+ vlib_node_state_t new_state)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n;
+ vlib_node_runtime_t *r;
+
+ n = vec_elt (nm->nodes, node_index);
+ if (n->type == VLIB_NODE_TYPE_PROCESS)
+ {
+ vlib_process_t *p = vec_elt (nm->processes, n->runtime_index);
+ r = &p->node_runtime;
+
+ /* When disabling make sure flags are cleared. */
+ p->flags &= ~(VLIB_PROCESS_RESUME_PENDING
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT);
+ }
+ else
+ r = vec_elt_at_index (nm->nodes_by_type[n->type], n->runtime_index);
+
+ ASSERT (new_state < VLIB_N_NODE_STATE);
+
+ if (n->type == VLIB_NODE_TYPE_INPUT)
+ {
+ ASSERT (nm->input_node_counts_by_state[n->state] > 0);
+ nm->input_node_counts_by_state[n->state] -= 1;
+ nm->input_node_counts_by_state[new_state] += 1;
+ }
+
+ n->state = new_state;
+ r->state = new_state;
+}
+
+always_inline void
+vlib_node_set_interrupt_pending (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vec_elt (nm->nodes, node_index);
+ ASSERT (n->type == VLIB_NODE_TYPE_INPUT);
+ vec_add1 (nm->pending_interrupt_node_runtime_indices, n->runtime_index);
+}
+
+always_inline vlib_process_t *
+vlib_get_process_from_node (vlib_main_t * vm, vlib_node_t * node)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ ASSERT (node->type == VLIB_NODE_TYPE_PROCESS);
+ return vec_elt (nm->processes, node->runtime_index);
+}
+
+/* Fetches frame with given handle. */
+always_inline vlib_frame_t *
+vlib_get_frame_no_check (vlib_main_t * vm, uword frame_index)
+{
+ vlib_frame_t *f;
+ u32 cpu_index = frame_index & VLIB_CPU_MASK;
+ u32 offset = frame_index & VLIB_OFFSET_MASK;
+ vm = vlib_mains ? vlib_mains[cpu_index] : vm;
+ f = vm->heap_base + offset;
+ return f;
+}
+
+always_inline u32
+vlib_frame_index_no_check (vlib_main_t * vm, vlib_frame_t * f)
+{
+ u32 i;
+
+ ASSERT (((uword) f & VLIB_CPU_MASK) == 0);
+
+ vm = vlib_mains ? vlib_mains[f->cpu_index] : vm;
+
+ i = ((u8 *) f - (u8 *) vm->heap_base);
+ return i | f->cpu_index;
+}
+
+always_inline vlib_frame_t *
+vlib_get_frame (vlib_main_t * vm, uword frame_index)
+{
+ vlib_frame_t *f = vlib_get_frame_no_check (vm, frame_index);
+ ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+ return f;
+}
+
+always_inline u32
+vlib_frame_index (vlib_main_t * vm, vlib_frame_t * f)
+{
+ uword i = vlib_frame_index_no_check (vm, f);
+ ASSERT (vlib_get_frame (vm, i) == f);
+ return i;
+}
+
+/* Byte alignment for vector arguments. */
+#define VLIB_FRAME_VECTOR_ALIGN (1 << 4)
+
+always_inline u32
+vlib_frame_vector_byte_offset (u32 scalar_size)
+{
+ return round_pow2 (sizeof (vlib_frame_t) + scalar_size,
+ VLIB_FRAME_VECTOR_ALIGN);
+}
+
+/** \brief Get pointer to frame vector data.
+ @param f vlib_frame_t pointer
+ @return pointer to first vector element in frame
+*/
+always_inline void *
+vlib_frame_vector_args (vlib_frame_t * f)
+{
+ return (void *) f + vlib_frame_vector_byte_offset (f->scalar_size);
+}
+
+/** \brief Get pointer to frame scalar data.
+
+ @warning This is almost certainly not the function you wish to call.
+ See @ref vlib_frame_vector_args instead.
+
+ @param f vlib_frame_t pointer
+
+ @return arbitrary node scalar data
+
+ @sa vlib_frame_vector_args
+*/
+always_inline void *
+vlib_frame_args (vlib_frame_t * f)
+{
+ return vlib_frame_vector_args (f) - f->scalar_size;
+}
+
+always_inline vlib_next_frame_t *
+vlib_node_runtime_get_next_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * n, u32 next_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_next_frame_t *nf;
+
+ ASSERT (next_index < n->n_next_nodes);
+ nf = vec_elt_at_index (nm->next_frames, n->next_frame_index + next_index);
+
+ if (CLIB_DEBUG > 0)
+ {
+ vlib_node_t *node, *next;
+ node = vec_elt (nm->nodes, n->node_index);
+ next = vec_elt (nm->nodes, node->next_nodes[next_index]);
+ ASSERT (nf->node_runtime_index == next->runtime_index);
+ }
+
+ return nf;
+}
+
+/** \brief Get pointer to frame by (@c node_index, @c next_index).
+
+ @warning This is not a function that you should call directly.
+ See @ref vlib_get_next_frame instead.
+
+ @param vm vlib_main_t pointer, varies by thread
+ @param node_index index of the node
+ @param next_index graph arc index
+
+ @return pointer to the requested vlib_next_frame_t
+
+ @sa vlib_get_next_frame
+*/
+
+always_inline vlib_next_frame_t *
+vlib_node_get_next_frame (vlib_main_t * vm, u32 node_index, u32 next_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n;
+ vlib_node_runtime_t *r;
+
+ n = vec_elt (nm->nodes, node_index);
+ r = vec_elt_at_index (nm->nodes_by_type[n->type], n->runtime_index);
+ return vlib_node_runtime_get_next_frame (vm, r, next_index);
+}
+
+vlib_frame_t *vlib_get_next_frame_internal (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 next_index,
+ u32 alloc_new_frame);
+
+#define vlib_get_next_frame_macro(vm,node,next_index,vectors,n_vectors_left,alloc_new_frame) \
+do { \
+ vlib_frame_t * _f \
+ = vlib_get_next_frame_internal ((vm), (node), (next_index), \
+ (alloc_new_frame)); \
+ u32 _n = _f->n_vectors; \
+ (vectors) = vlib_frame_vector_args (_f) + _n * sizeof ((vectors)[0]); \
+ (n_vectors_left) = VLIB_FRAME_SIZE - _n; \
+} while (0)
+
+
+/** \brief Get pointer to next frame vector data by
+ (@c vlib_node_runtime_t, @c next_index).
+ Standard single/dual loop boilerplate element.
+ @attention This is a MACRO, with SIDE EFFECTS.
+
+ @param vm vlib_main_t pointer, varies by thread
+ @param node current node vlib_node_runtime_t pointer
+ @param next_index requested graph arc index
+
+ @return @c vectors -- pointer to next available vector slot
+ @return @c n_vectors_left -- number of vector slots available
+*/
+#define vlib_get_next_frame(vm,node,next_index,vectors,n_vectors_left) \
+ vlib_get_next_frame_macro (vm, node, next_index, \
+ vectors, n_vectors_left, \
+ /* alloc new frame */ 0)
+
+#define vlib_get_new_next_frame(vm,node,next_index,vectors,n_vectors_left) \
+ vlib_get_next_frame_macro (vm, node, next_index, \
+ vectors, n_vectors_left, \
+ /* alloc new frame */ 1)
+
+/** \brief Release pointer to next frame vector data.
+ Standard single/dual loop boilerplate element.
+ @param vm vlib_main_t pointer, varies by thread
+ @param r current node vlib_node_runtime_t pointer
+ @param next_index graph arc index
+ @param n_packets_left number of slots still available in vector
+*/
+void
+vlib_put_next_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ u32 next_index, u32 n_packets_left);
+
+/* Combination get plus put. Returns vector argument just added. */
+#define vlib_set_next_frame(vm,node,next_index,v) \
+({ \
+ uword _n_left; \
+ vlib_get_next_frame ((vm), (node), (next_index), (v), _n_left); \
+ ASSERT (_n_left > 0); \
+ vlib_put_next_frame ((vm), (node), (next_index), _n_left - 1); \
+ (v); \
+})
+
+always_inline void
+vlib_set_next_frame_buffer (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 next_index, u32 buffer_index)
+{
+ u32 *p;
+ p = vlib_set_next_frame (vm, node, next_index, p);
+ p[0] = buffer_index;
+}
+
+vlib_frame_t *vlib_get_frame_to_node (vlib_main_t * vm, u32 to_node_index);
+void vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index,
+ vlib_frame_t * f);
+
+always_inline vlib_process_t *
+vlib_get_current_process (vlib_main_t * vm)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ return vec_elt (nm->processes, nm->current_process_index);
+}
+
+always_inline uword
+vlib_in_process_context (vlib_main_t * vm)
+{
+ return vm->node_main.current_process_index != ~0;
+}
+
+always_inline uword
+vlib_current_process (vlib_main_t * vm)
+{
+ return vlib_get_current_process (vm)->node_runtime.node_index;
+}
+
+/** Returns TRUE if a process suspend time is less than 1us
+ @param dt - remaining poll time in seconds
+ @returns 1 if dt < 1e-6, 0 otherwise
+*/
+always_inline uword
+vlib_process_suspend_time_is_zero (f64 dt)
+{
+ return dt < 1e-6;
+}
+
+/** Suspend a vlib cooperative multi-tasking thread for a period of time
+ @param vm - vlib_main_t *
+ @param dt - suspend interval in seconds
+ @returns VLIB_PROCESS_RESUME_LONGJMP_RESUME, routinely ignored
+*/
+
+always_inline uword
+vlib_process_suspend (vlib_main_t * vm, f64 dt)
+{
+ uword r;
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p = vec_elt (nm->processes, nm->current_process_index);
+ u64 dt_cpu = dt * vm->clib_time.clocks_per_second;
+
+ if (vlib_process_suspend_time_is_zero (dt))
+ return VLIB_PROCESS_RESUME_LONGJMP_RESUME;
+
+ p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK;
+ r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ {
+ p->resume_cpu_time = clib_cpu_time_now () + dt_cpu;
+ clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+ }
+
+ return r;
+}
+
+always_inline void
+vlib_process_free_event_type (vlib_process_t * p, uword t,
+ uword is_one_time_event)
+{
+ ASSERT (!pool_is_free_index (p->event_type_pool, t));
+ pool_put_index (p->event_type_pool, t);
+ if (is_one_time_event)
+ p->one_time_event_type_bitmap =
+ clib_bitmap_andnoti (p->one_time_event_type_bitmap, t);
+}
+
+always_inline void
+vlib_process_maybe_free_event_type (vlib_process_t * p, uword t)
+{
+ ASSERT (!pool_is_free_index (p->event_type_pool, t));
+ if (clib_bitmap_get (p->one_time_event_type_bitmap, t))
+ vlib_process_free_event_type (p, t, /* is_one_time_event */ 1);
+}
+
+always_inline void *
+vlib_process_get_event_data (vlib_main_t * vm,
+ uword * return_event_type_opaque)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p;
+ vlib_process_event_type_t *et;
+ uword t, l;
+ void *event_data_vector;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+
+ /* Find first type with events ready.
+ Return invalid type when there's nothing there. */
+ t = clib_bitmap_first_set (p->non_empty_event_type_bitmap);
+ if (t == ~0)
+ return 0;
+
+ p->non_empty_event_type_bitmap =
+ clib_bitmap_andnoti (p->non_empty_event_type_bitmap, t);
+
+ l = _vec_len (p->pending_event_data_by_type_index[t]);
+ ASSERT (l > 0);
+ event_data_vector = p->pending_event_data_by_type_index[t];
+ p->pending_event_data_by_type_index[t] = 0;
+
+ et = pool_elt_at_index (p->event_type_pool, t);
+
+ /* Return user's opaque value and possibly index. */
+ *return_event_type_opaque = et->opaque;
+
+ vlib_process_maybe_free_event_type (p, t);
+
+ return event_data_vector;
+}
+
+/* Return event data vector for later reuse. We reuse event data to avoid
+ repeatedly allocating event vectors in cases where we care about speed. */
+always_inline void
+vlib_process_put_event_data (vlib_main_t * vm, void *event_data)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vec_add1 (nm->recycled_event_data_vectors, event_data);
+}
+
+/** Return the first event type which has occurred and a vector of per-event
+ data of that type, or a timeout indication
+
+ @param vm - vlib_main_t pointer
+ @param data_vector - pointer to a (uword *) vector to receive event data
+ @returns either an event type and a vector of per-event instance data,
+ or ~0 to indicate a timeout.
+*/
+
+always_inline uword
+vlib_process_get_events (vlib_main_t * vm, uword ** data_vector)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p;
+ vlib_process_event_type_t *et;
+ uword r, t, l;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+
+ /* Find first type with events ready.
+ Return invalid type when there's nothing there. */
+ t = clib_bitmap_first_set (p->non_empty_event_type_bitmap);
+ if (t == ~0)
+ return t;
+
+ p->non_empty_event_type_bitmap =
+ clib_bitmap_andnoti (p->non_empty_event_type_bitmap, t);
+
+ l = _vec_len (p->pending_event_data_by_type_index[t]);
+ if (data_vector)
+ vec_add (*data_vector, p->pending_event_data_by_type_index[t], l);
+ _vec_len (p->pending_event_data_by_type_index[t]) = 0;
+
+ et = pool_elt_at_index (p->event_type_pool, t);
+
+ /* Return user's opaque value. */
+ r = et->opaque;
+
+ vlib_process_maybe_free_event_type (p, t);
+
+ return r;
+}
+
+always_inline uword
+vlib_process_get_events_helper (vlib_process_t * p, uword t,
+ uword ** data_vector)
+{
+ uword l;
+
+ p->non_empty_event_type_bitmap =
+ clib_bitmap_andnoti (p->non_empty_event_type_bitmap, t);
+
+ l = _vec_len (p->pending_event_data_by_type_index[t]);
+ if (data_vector)
+ vec_add (*data_vector, p->pending_event_data_by_type_index[t], l);
+ _vec_len (p->pending_event_data_by_type_index[t]) = 0;
+
+ vlib_process_maybe_free_event_type (p, t);
+
+ return l;
+}
+
+/* As above but query as specified type of event. Returns number of
+ events found. */
+always_inline uword
+vlib_process_get_events_with_type (vlib_main_t * vm, uword ** data_vector,
+ uword with_type_opaque)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p;
+ uword t, *h;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+ h = hash_get (p->event_type_index_by_type_opaque, with_type_opaque);
+ if (!h)
+ /* This can happen when an event has not yet been
+ signaled with given opaque type. */
+ return 0;
+
+ t = h[0];
+ if (!clib_bitmap_get (p->non_empty_event_type_bitmap, t))
+ return 0;
+
+ return vlib_process_get_events_helper (p, t, data_vector);
+}
+
+always_inline uword *
+vlib_process_wait_for_event (vlib_main_t * vm)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p;
+ uword r;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+ if (clib_bitmap_is_zero (p->non_empty_event_type_bitmap))
+ {
+ p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT;
+ r =
+ clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ clib_longjmp (&p->return_longjmp,
+ VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+ }
+
+ return p->non_empty_event_type_bitmap;
+}
+
+always_inline uword
+vlib_process_wait_for_one_time_event (vlib_main_t * vm,
+ uword ** data_vector,
+ uword with_type_index)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p;
+ uword r;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+ ASSERT (!pool_is_free_index (p->event_type_pool, with_type_index));
+ while (!clib_bitmap_get (p->non_empty_event_type_bitmap, with_type_index))
+ {
+ p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT;
+ r =
+ clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ clib_longjmp (&p->return_longjmp,
+ VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+ }
+
+ return vlib_process_get_events_helper (p, with_type_index, data_vector);
+}
+
+always_inline uword
+vlib_process_wait_for_event_with_type (vlib_main_t * vm,
+ uword ** data_vector,
+ uword with_type_opaque)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p;
+ uword r, *h;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+ h = hash_get (p->event_type_index_by_type_opaque, with_type_opaque);
+ while (!h || !clib_bitmap_get (p->non_empty_event_type_bitmap, h[0]))
+ {
+ p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT;
+ r =
+ clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ clib_longjmp (&p->return_longjmp,
+ VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+
+ /* See if unknown event type has been signaled now. */
+ if (!h)
+ h = hash_get (p->event_type_index_by_type_opaque, with_type_opaque);
+ }
+
+ return vlib_process_get_events_helper (p, h[0], data_vector);
+}
+
+/** Suspend a cooperative multi-tasking thread
+ Waits for an event, or for the indicated number of seconds to elapse
+ @param vm - vlib_main_t pointer
+ @param dt - timeout, in seconds.
+ @returns the remaining time interval
+*/
+
+always_inline f64
+vlib_process_wait_for_event_or_clock (vlib_main_t * vm, f64 dt)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_process_t *p;
+ f64 wakeup_time;
+ uword r;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+
+ if (vlib_process_suspend_time_is_zero (dt)
+ || !clib_bitmap_is_zero (p->non_empty_event_type_bitmap))
+ return dt;
+
+ wakeup_time = vlib_time_now (vm) + dt;
+
+ /* Suspend waiting for both clock and event to occur. */
+ p->flags |= (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK);
+
+ r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ {
+ p->resume_cpu_time = (clib_cpu_time_now ()
+ + (dt * vm->clib_time.clocks_per_second));
+ clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+ }
+
+ /* Return amount of time still left to sleep.
+ If <= 0 then we've been waken up by the clock (and not an event). */
+ return wakeup_time - vlib_time_now (vm);
+}
+
+always_inline vlib_process_event_type_t *
+vlib_process_new_event_type (vlib_process_t * p, uword with_type_opaque)
+{
+ vlib_process_event_type_t *et;
+ pool_get (p->event_type_pool, et);
+ et->opaque = with_type_opaque;
+ return et;
+}
+
+always_inline uword
+vlib_process_create_one_time_event (vlib_main_t * vm, uword node_index,
+ uword with_type_opaque)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_process_t *p = vec_elt (nm->processes, n->runtime_index);
+ vlib_process_event_type_t *et;
+ uword t;
+
+ et = vlib_process_new_event_type (p, with_type_opaque);
+ t = et - p->event_type_pool;
+ p->one_time_event_type_bitmap =
+ clib_bitmap_ori (p->one_time_event_type_bitmap, t);
+ return t;
+}
+
+always_inline void
+vlib_process_delete_one_time_event (vlib_main_t * vm, uword node_index,
+ uword t)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_process_t *p = vec_elt (nm->processes, n->runtime_index);
+
+ ASSERT (clib_bitmap_get (p->one_time_event_type_bitmap, t));
+ vlib_process_free_event_type (p, t, /* is_one_time_event */ 1);
+}
+
+always_inline void *
+vlib_process_signal_event_helper (vlib_node_main_t * nm,
+ vlib_node_t * n,
+ vlib_process_t * p,
+ uword t,
+ uword n_data_elts, uword n_data_elt_bytes)
+{
+ uword p_flags, add_to_pending, delete_from_wheel;
+ void *data_to_be_written_by_caller;
+
+ ASSERT (!pool_is_free_index (p->event_type_pool, t));
+
+ vec_validate (p->pending_event_data_by_type_index, t);
+
+ /* Resize data vector and return caller's data to be written. */
+ {
+ void *data_vec = p->pending_event_data_by_type_index[t];
+ uword l;
+
+ if (!data_vec && vec_len (nm->recycled_event_data_vectors))
+ {
+ data_vec = vec_pop (nm->recycled_event_data_vectors);
+ _vec_len (data_vec) = 0;
+ }
+
+ l = vec_len (data_vec);
+
+ data_vec = _vec_resize (data_vec,
+ /* length_increment */ n_data_elts,
+ /* total size after increment */
+ (l + n_data_elts) * n_data_elt_bytes,
+ /* header_bytes */ 0, /* data_align */ 0);
+
+ p->pending_event_data_by_type_index[t] = data_vec;
+ data_to_be_written_by_caller = data_vec + l * n_data_elt_bytes;
+ }
+
+ p->non_empty_event_type_bitmap =
+ clib_bitmap_ori (p->non_empty_event_type_bitmap, t);
+
+ p_flags = p->flags;
+
+ /* Event was already signalled? */
+ add_to_pending = (p_flags & VLIB_PROCESS_RESUME_PENDING) == 0;
+
+ /* Process will resume when suspend time elapses? */
+ delete_from_wheel = 0;
+ if (p_flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK)
+ {
+ /* Waiting for both event and clock? */
+ if (p_flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT)
+ delete_from_wheel = 1;
+ else
+ /* Waiting only for clock. Event will be queue and may be
+ handled when timer expires. */
+ add_to_pending = 0;
+ }
+
+ /* Never add current process to pending vector since current process is
+ already running. */
+ add_to_pending &= nm->current_process_index != n->runtime_index;
+
+ if (add_to_pending)
+ {
+ u32 x = vlib_timing_wheel_data_set_suspended_process (n->runtime_index);
+ p->flags = p_flags | VLIB_PROCESS_RESUME_PENDING;
+ vec_add1 (nm->data_from_advancing_timing_wheel, x);
+ if (delete_from_wheel)
+ timing_wheel_delete (&nm->timing_wheel, x);
+ }
+
+ return data_to_be_written_by_caller;
+}
+
+always_inline void *
+vlib_process_signal_event_data (vlib_main_t * vm,
+ uword node_index,
+ uword type_opaque,
+ uword n_data_elts, uword n_data_elt_bytes)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_process_t *p = vec_elt (nm->processes, n->runtime_index);
+ uword *h, t;
+
+ h = hash_get (p->event_type_index_by_type_opaque, type_opaque);
+ if (!h)
+ {
+ vlib_process_event_type_t *et =
+ vlib_process_new_event_type (p, type_opaque);
+ t = et - p->event_type_pool;
+ hash_set (p->event_type_index_by_type_opaque, type_opaque, t);
+ }
+ else
+ t = h[0];
+
+ return vlib_process_signal_event_helper (nm, n, p, t, n_data_elts,
+ n_data_elt_bytes);
+}
+
+always_inline void *
+vlib_process_signal_event_at_time (vlib_main_t * vm,
+ f64 dt,
+ uword node_index,
+ uword type_opaque,
+ uword n_data_elts, uword n_data_elt_bytes)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_process_t *p = vec_elt (nm->processes, n->runtime_index);
+ uword *h, t;
+
+ h = hash_get (p->event_type_index_by_type_opaque, type_opaque);
+ if (!h)
+ {
+ vlib_process_event_type_t *et =
+ vlib_process_new_event_type (p, type_opaque);
+ t = et - p->event_type_pool;
+ hash_set (p->event_type_index_by_type_opaque, type_opaque, t);
+ }
+ else
+ t = h[0];
+
+ if (vlib_process_suspend_time_is_zero (dt))
+ return vlib_process_signal_event_helper (nm, n, p, t, n_data_elts,
+ n_data_elt_bytes);
+ else
+ {
+ vlib_signal_timed_event_data_t *te;
+ u64 dt_cpu = dt * vm->clib_time.clocks_per_second;
+
+ pool_get_aligned (nm->signal_timed_event_data_pool, te, sizeof (te[0]));
+
+ te->n_data_elts = n_data_elts;
+ te->n_data_elt_bytes = n_data_elt_bytes;
+ te->n_data_bytes = n_data_elts * n_data_elt_bytes;
+
+ /* Assert that structure fields are big enough. */
+ ASSERT (te->n_data_elts == n_data_elts);
+ ASSERT (te->n_data_elt_bytes == n_data_elt_bytes);
+ ASSERT (te->n_data_bytes == n_data_elts * n_data_elt_bytes);
+
+ te->process_node_index = n->runtime_index;
+ te->event_type_index = t;
+
+ timing_wheel_insert (&nm->timing_wheel, clib_cpu_time_now () + dt_cpu,
+ vlib_timing_wheel_data_set_timed_event (te -
+ nm->
+ signal_timed_event_data_pool));
+
+ /* Inline data big enough to hold event? */
+ if (te->n_data_bytes < sizeof (te->inline_event_data))
+ return te->inline_event_data;
+ else
+ {
+ te->event_data_as_vector = 0;
+ vec_resize (te->event_data_as_vector, te->n_data_bytes);
+ return te->event_data_as_vector;
+ }
+ }
+}
+
+always_inline void *
+vlib_process_signal_one_time_event_data (vlib_main_t * vm,
+ uword node_index,
+ uword type_index,
+ uword n_data_elts,
+ uword n_data_elt_bytes)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_process_t *p = vec_elt (nm->processes, n->runtime_index);
+ return vlib_process_signal_event_helper (nm, n, p, type_index, n_data_elts,
+ n_data_elt_bytes);
+}
+
+always_inline void
+vlib_process_signal_event (vlib_main_t * vm,
+ uword node_index, uword type_opaque, uword data)
+{
+ uword *d = vlib_process_signal_event_data (vm, node_index, type_opaque,
+ 1 /* elts */ , sizeof (uword));
+ d[0] = data;
+}
+
+always_inline void
+vlib_process_signal_event_pointer (vlib_main_t * vm,
+ uword node_index,
+ uword type_opaque, void *data)
+{
+ void **d = vlib_process_signal_event_data (vm, node_index, type_opaque,
+ 1 /* elts */ , sizeof (data));
+ d[0] = data;
+}
+
+always_inline void
+vlib_process_signal_one_time_event (vlib_main_t * vm,
+ uword node_index,
+ uword type_index, uword data)
+{
+ uword *d =
+ vlib_process_signal_one_time_event_data (vm, node_index, type_index,
+ 1 /* elts */ , sizeof (uword));
+ d[0] = data;
+}
+
+always_inline void
+vlib_signal_one_time_waiting_process (vlib_main_t * vm,
+ vlib_one_time_waiting_process_t * p)
+{
+ vlib_process_signal_one_time_event (vm, p->node_index, p->one_time_event,
+ /* data */ ~0);
+ memset (p, ~0, sizeof (p[0]));
+}
+
+always_inline void
+vlib_signal_one_time_waiting_process_vector (vlib_main_t * vm,
+ vlib_one_time_waiting_process_t
+ ** wps)
+{
+ vlib_one_time_waiting_process_t *wp;
+ vec_foreach (wp, *wps) vlib_signal_one_time_waiting_process (vm, wp);
+ vec_free (*wps);
+}
+
+always_inline void
+vlib_current_process_wait_for_one_time_event (vlib_main_t * vm,
+ vlib_one_time_waiting_process_t
+ * p)
+{
+ p->node_index = vlib_current_process (vm);
+ p->one_time_event = vlib_process_create_one_time_event (vm, p->node_index, /* type opaque */
+ ~0);
+ vlib_process_wait_for_one_time_event (vm,
+ /* don't care about data */ 0,
+ p->one_time_event);
+}
+
+always_inline void
+vlib_current_process_wait_for_one_time_event_vector (vlib_main_t * vm,
+ vlib_one_time_waiting_process_t
+ ** wps)
+{
+ vlib_one_time_waiting_process_t *wp;
+ vec_add2 (*wps, wp, 1);
+ vlib_current_process_wait_for_one_time_event (vm, wp);
+}
+
+always_inline u32
+vlib_node_runtime_update_main_loop_vector_stats (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ uword n_vectors)
+{
+ u32 i, d, vi0, vi1;
+ u32 i0, i1;
+
+ ASSERT (is_pow2 (ARRAY_LEN (node->main_loop_vector_stats)));
+ i = ((vm->main_loop_count >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE)
+ & (ARRAY_LEN (node->main_loop_vector_stats) - 1));
+ i0 = i ^ 0;
+ i1 = i ^ 1;
+ d = ((vm->main_loop_count >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE)
+ -
+ (node->main_loop_count_last_dispatch >>
+ VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE));
+ vi0 = node->main_loop_vector_stats[i0];
+ vi1 = node->main_loop_vector_stats[i1];
+ vi0 = d == 0 ? vi0 : 0;
+ vi1 = d <= 1 ? vi1 : 0;
+ vi0 += n_vectors;
+ node->main_loop_vector_stats[i0] = vi0;
+ node->main_loop_vector_stats[i1] = vi1;
+ node->main_loop_count_last_dispatch = vm->main_loop_count;
+ /* Return previous counter. */
+ return node->main_loop_vector_stats[i1];
+}
+
+always_inline f64
+vlib_node_vectors_per_main_loop_as_float (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, node_index);
+ u32 v;
+
+ v = vlib_node_runtime_update_main_loop_vector_stats (vm, rt, /* n_vectors */
+ 0);
+ return (f64) v / (1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE);
+}
+
+always_inline u32
+vlib_node_vectors_per_main_loop_as_integer (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_runtime_t *rt = vlib_node_get_runtime (vm, node_index);
+ u32 v;
+
+ v = vlib_node_runtime_update_main_loop_vector_stats (vm, rt, /* n_vectors */
+ 0);
+ return v >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
+}
+
+void
+vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f);
+
+/* Add next node to given node in given slot. */
+uword
+vlib_node_add_next_with_slot (vlib_main_t * vm,
+ uword node, uword next_node, uword slot);
+
+/* As above but adds to end of node's next vector. */
+always_inline uword
+vlib_node_add_next (vlib_main_t * vm, uword node, uword next_node)
+{
+ return vlib_node_add_next_with_slot (vm, node, next_node, ~0);
+}
+
+/* Add next node to given node in given slot. */
+uword
+vlib_node_add_named_next_with_slot (vlib_main_t * vm,
+ uword node, char *next_name, uword slot);
+
+/* As above but adds to end of node's next vector. */
+always_inline uword
+vlib_node_add_named_next (vlib_main_t * vm, uword node, char *name)
+{
+ return vlib_node_add_named_next_with_slot (vm, node, name, ~0);
+}
+
+/* Query node given name. */
+vlib_node_t *vlib_get_node_by_name (vlib_main_t * vm, u8 * name);
+
+/* Rename a node. */
+void vlib_node_rename (vlib_main_t * vm, u32 node_index, char *fmt, ...);
+
+/* Register new packet processing node. Nodes can be registered
+ dynamically via this call or statically via the VLIB_REGISTER_NODE
+ macro. */
+u32 vlib_register_node (vlib_main_t * vm, vlib_node_registration_t * r);
+
+/* Register all static nodes registered via VLIB_REGISTER_NODE. */
+void vlib_register_all_static_nodes (vlib_main_t * vm);
+
+/* Start a process. */
+void vlib_start_process (vlib_main_t * vm, uword process_index);
+
+/* Sync up runtime and main node stats. */
+void vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n);
+
+/* Node graph initialization function. */
+clib_error_t *vlib_node_main_init (vlib_main_t * vm);
+
+format_function_t format_vlib_node_graph;
+format_function_t format_vlib_node_name;
+format_function_t format_vlib_next_node_name;
+format_function_t format_vlib_node_and_next;
+format_function_t format_vlib_cpu_time;
+format_function_t format_vlib_time;
+/* Parse node name -> node index. */
+unformat_function_t unformat_vlib_node;
+
+always_inline void
+vlib_node_increment_counter (vlib_main_t * vm, u32 node_index,
+ u32 counter_index, u64 increment)
+{
+ vlib_node_t *n = vlib_get_node (vm, node_index);
+ vlib_error_main_t *em = &vm->error_main;
+ u32 node_counter_base_index = n->error_heap_index;
+ em->counters[node_counter_base_index + counter_index] += increment;
+}
+
+#endif /* included_vlib_node_funcs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/parse.c b/src/vlib/parse.c
new file mode 100644
index 00000000000..1c4500ce85a
--- /dev/null
+++ b/src/vlib/parse.c
@@ -0,0 +1,1007 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/parse.h>
+
+#define PARSE_DEBUG 0
+
+u16 word_type_index, number_type_index, eof_type_index, rule_eof_type_index,
+ plus_type_index, minus_type_index, star_type_index, slash_type_index,
+ lpar_type_index, rpar_type_index;
+
+u8 *
+format_vlib_parse_value (u8 * s, va_list * args)
+{
+ vlib_parse_main_t *pm = va_arg (*args, vlib_parse_main_t *);
+ vlib_parse_type_t *type;
+ vlib_parse_value_t *v;
+ u16 type_index;
+
+ s = format (s, "%d items:\n", vec_len (pm->parse_value));
+ vec_foreach (v, pm->parse_value)
+ {
+ type_index = v->type;
+ type = pool_elt_at_index (pm->parse_types, type_index);
+ if (type->format_value)
+ s = format (s, "[%d]: %U\n", v - pm->parse_value,
+ type->format_value, v);
+ else
+ s = format (s, "[%d]: (nofun)\n", v - pm->parse_value);
+ }
+ return s;
+}
+
+static u8 *
+format_vlib_parse_match (u8 * s, va_list * args)
+{
+ vlib_parse_match_t m = va_arg (*args, vlib_parse_match_t);
+ char *t = 0;
+ switch (m)
+ {
+#define _(a) case VLIB_PARSE_##a: t = #a; break;
+ foreach_parse_match_type
+#undef _
+ default:
+ t = 0;
+ break;
+ }
+
+ if (t)
+ return format (s, "%s", t);
+ else
+ return format (s, "unknown 0x%x", m);
+}
+
+static u8 *
+format_vlib_parse_item (u8 * s, va_list * args)
+{
+ vlib_parse_main_t *pm = va_arg (*args, vlib_parse_main_t *);
+ vlib_parse_item_t *item = va_arg (*args, vlib_parse_item_t *);
+ vlib_parse_type_t *type = pool_elt_at_index (pm->parse_types, item->type);
+
+ if (item->type == word_type_index)
+ s = format (s, "%s", item->value.as_pointer);
+ else
+ s = format (s, "<%s>", type->name);
+ return s;
+}
+
+static u8 *
+format_vlib_parse_graph (u8 * s, va_list * args)
+{
+ vlib_parse_main_t *pm = va_arg (*args, vlib_parse_main_t *);
+ vlib_parse_graph_t *node = va_arg (*args, vlib_parse_graph_t *);
+ vlib_parse_item_t *item;
+ vlib_parse_type_t *type;
+
+ /* $$$ hash table */
+ /* *INDENT-OFF* */
+ pool_foreach (type, pm->parse_types,
+ ({
+ if (type->rule_index == node - pm->parse_graph)
+ s = format (s, "\n<%s>\n", type->name);
+ }));
+/* *INDENT-ON* */
+
+ if (pm->root_index == (node - pm->parse_graph))
+ s = format (s, "\n<root>\n");
+
+ item = pool_elt_at_index (pm->parse_items, node->item);
+
+ s = format (s, "[%d] %U ", node - pm->parse_graph,
+ format_vlib_parse_item, pm, item);
+
+ if (node->peer == (u32) ~ 0)
+ s = format (s, "peer nil ");
+ else
+ s = format (s, "peer %4u ", node->peer);
+
+ if (node->deeper == (u32) ~ 0)
+ s = format (s, "deeper nil ");
+ else
+ s = format (s, "deeper %4u ", node->deeper);
+
+ return s;
+}
+
+void
+dump_parse_graph (void)
+{
+ vlib_parse_main_t *pm = &vlib_parse_main;
+ vlib_parse_graph_t *node;
+
+ /* *INDENT-OFF* */
+ pool_foreach (node, pm->parse_graph, ({
+ fformat(stdout, "%U\n", format_vlib_parse_graph, pm, node);
+ }));
+/* *INDENT-ON* */
+}
+
+always_inline void
+parse_cleanup_value (vlib_parse_main_t * pm, vlib_parse_value_t * pv)
+{
+ vlib_parse_type_t *type = pool_elt_at_index (pm->parse_types, pv->type);
+ if (type->value_cleanup_function)
+ type->value_cleanup_function (pv);
+}
+
+static void
+parse_reset (vlib_parse_main_t * pm, u8 * input)
+{
+ vlib_lex_token_t *t;
+ vlib_parse_value_t *pv;
+
+ vlib_lex_reset (pm->lex_main, input);
+
+ vec_foreach (t, pm->tokens) vlib_lex_cleanup_token (t);
+
+ vec_foreach (pv, pm->parse_value) parse_cleanup_value (pm, pv);
+
+ _vec_len (pm->parse_value) = 0;
+ _vec_len (pm->tokens) = 0;
+ pm->current_token_index = 0;
+}
+
+static void
+parse_help (vlib_parse_main_t * pm, u32 index)
+{
+ vlib_parse_graph_t *node;
+ vlib_parse_item_t *item;
+ vlib_parse_type_t *type;
+ vlib_main_t *vm = pm->vlib_main;
+ u8 *help_input;
+ int i;
+
+ help_input = vec_dup (pm->lex_main->input_vector);
+
+ for (i = vec_len (help_input) - 1; i >= 0; i--)
+ if (help_input[i] == '?')
+ {
+ help_input[i] = 0;
+ _vec_len (help_input) = i;
+ break;
+ }
+
+ for (i = vec_len (help_input) - 1; i >= 0; i--)
+ {
+ if (help_input[i] != ' ' && help_input[i] != '\t')
+ break;
+ help_input[i] = 0;
+ break;
+ }
+ _vec_len (help_input) = i + 1;
+
+ while (index != (u32) ~ 0)
+ {
+ node = pool_elt_at_index (pm->parse_graph, index);
+ item = pool_elt_at_index (pm->parse_items, node->item);
+ type = pool_elt_at_index (pm->parse_types, item->type);
+
+ if (item->type == eof_type_index && vec_len (pm->match_items) == 0)
+ /* do nothing */ ;
+ else if (item->type == word_type_index)
+ vlib_cli_output (vm, "%s %s\n", help_input, item->value.as_pointer);
+ else
+ vlib_cli_output (vm, "%s <%s>\n", help_input, type->name);
+ index = node->peer;
+ }
+ vec_free (help_input);
+}
+
+static vlib_parse_match_t
+parse_eval_internal (vlib_parse_main_t * pm, u32 index)
+{
+ vlib_parse_graph_t *node;
+ vlib_parse_item_t *item;
+ vlib_parse_type_t *type;
+ vlib_parse_value_t value, *pv;
+ vlib_parse_match_t rv;
+ u32 *partial_matches = 0;
+ vlib_lex_token_t *t;
+ u32 save_token_index = (u32) ~ 0, save_match_items = 0;
+ int had_value = 0;
+
+ if (pm->current_token_index >= vec_len (pm->tokens))
+ return VLIB_PARSE_MATCH_FAIL;
+
+ /* current token */
+ t = vec_elt_at_index (pm->tokens, pm->current_token_index);
+
+ /* Help ? */
+ if (PREDICT_FALSE (t->token == VLIB_LEX_qmark))
+ {
+ parse_help (pm, index);
+ _vec_len (pm->match_items) = 0;
+ return VLIB_PARSE_MATCH_DONE;
+ }
+
+ /* Across all peers at this level of the parse graph */
+ while (index != (u32) ~ 0)
+ {
+ node = pool_elt_at_index (pm->parse_graph, index);
+ item = pool_elt_at_index (pm->parse_items, node->item);
+ type = pool_elt_at_index (pm->parse_types, item->type);
+
+ /*
+ * Save the token index. We may have to back up several
+ * trie plies. Type-specific match functions can consume
+ * multiple tokens, and they may not be optimally careful
+ */
+ save_token_index = pm->current_token_index;
+ save_match_items = vec_len (pm->match_items);
+ vec_add1 (pm->match_items, node->item);
+
+ if (PARSE_DEBUG > 1)
+ clib_warning ("Try to match token %U against node %d",
+ format_vlib_lex_token, pm->lex_main, t, index);
+
+ /* Call the type-specific match function */
+ rv = type->match_function (pm, type, t, &value);
+
+ if (PARSE_DEBUG > 1)
+ clib_warning ("returned %U", format_vlib_parse_match, rv);
+
+ switch (rv)
+ {
+ case VLIB_PARSE_MATCH_VALUE:
+ /*
+ * Matched, and returned a value to append to the
+ * set of args passed to the action function
+ */
+ value.type = item->type;
+ vec_add1 (pm->parse_value, value);
+ had_value = 1;
+ /* fallthrough */
+
+ case VLIB_PARSE_MATCH_FULL:
+ unambiguous_partial_match:
+ /* Consume the matched token */
+ pm->current_token_index++;
+
+ /* continue matching along this path */
+ rv = parse_eval_internal (pm, node->deeper);
+
+ /* this is not the right path */
+ if (rv == VLIB_PARSE_MATCH_FAIL)
+ {
+ if (had_value)
+ {
+ /* Delete the value */
+ value = pm->parse_value[vec_len (pm->parse_value) - 1];
+ parse_cleanup_value (pm, &value);
+ _vec_len (pm->parse_value) -= 1;
+ }
+ /* Continue with the next sibling */
+ pm->current_token_index = save_token_index;
+ _vec_len (pm->match_items) = save_match_items;
+ index = node->peer;
+ break;
+ }
+ return rv;
+
+ case VLIB_PARSE_MATCH_PARTIAL:
+ /* Partial (substring) match, remember it but keep going */
+ vec_add1 (partial_matches, node - pm->parse_graph);
+ index = node->peer;
+ break;
+
+ case VLIB_PARSE_MATCH_FAIL:
+ /* Continue with the next sibling */
+ index = node->peer;
+ _vec_len (pm->match_items) = save_match_items;
+ break;
+
+ case VLIB_PARSE_MATCH_DONE:
+ /* Parse complete, invoke the action function */
+ if (PARSE_DEBUG > 0)
+ clib_warning ("parse_value: %U", format_vlib_parse_value, pm);
+
+ {
+ vlib_parse_eval_function_t *f = item->value.as_pointer;
+ if (f)
+ rv = f (pm, item, pm->parse_value);
+ }
+
+ vec_foreach (pv, pm->parse_value) parse_cleanup_value (pm, pv);
+ _vec_len (pm->parse_value) = 0;
+ _vec_len (pm->match_items) = 0;
+ return rv;
+
+ case VLIB_PARSE_MATCH_AMBIGUOUS:
+ case VLIB_PARSE_MATCH_EVAL_FAIL:
+ case VLIB_PARSE_MATCH_RULE:
+ _vec_len (pm->match_items) = save_match_items;
+ return rv;
+ }
+ }
+
+ /*
+ * Out of siblings. If we have exactly one partial match
+ * we win
+ */
+ if (vec_len (partial_matches) == 1)
+ {
+ index = partial_matches[0];
+ node = pool_elt_at_index (pm->parse_graph, index);
+ vec_free (partial_matches);
+ goto unambiguous_partial_match;
+ }
+
+ /* Ordinary loser */
+ rv = VLIB_PARSE_MATCH_FAIL;
+
+ /* Ambiguous loser */
+ if (vec_len (partial_matches) > 1)
+ {
+ vec_free (partial_matches);
+ rv = VLIB_PARSE_MATCH_AMBIGUOUS;
+ }
+
+ _vec_len (pm->match_items) = save_match_items;
+ return rv;
+}
+
+vlib_parse_match_t
+rule_match (vlib_parse_main_t * pm, vlib_parse_type_t * type,
+ vlib_lex_token_t * t, vlib_parse_value_t * valuep)
+{
+ vlib_parse_match_t rv;
+ static int recursion_level;
+
+ if (PARSE_DEBUG > 1)
+ clib_warning ("[%d]: try to match type %s graph index %d",
+ recursion_level, type->name, type->rule_index);
+ recursion_level++;
+ rv = parse_eval_internal (pm, type->rule_index);
+ recursion_level--;
+
+ /* Break the recusive unwind here... */
+ if (rv == VLIB_PARSE_MATCH_RULE)
+ {
+ if (PARSE_DEBUG > 1)
+ clib_warning ("[%d]: type %s matched", recursion_level, type->name);
+
+ return VLIB_PARSE_MATCH_FULL;
+ }
+ else
+ {
+ if (PARSE_DEBUG > 1)
+ clib_warning ("[%d]: type %s returns %U", recursion_level, type->name,
+ format_vlib_parse_match, rv);
+ }
+ return rv;
+}
+
+static int
+parse_eval (vlib_parse_main_t * pm, u8 * input)
+{
+ vlib_lex_token_t *t;
+
+ parse_reset (pm, input);
+
+ /* Tokenize the entire input vector */
+ do
+ {
+ vec_add2 (pm->tokens, t, 1);
+ vlib_lex_get_token (pm->lex_main, t);
+ }
+ while (t->token != VLIB_LEX_eof);
+
+ /* Feed it to the parser */
+ return parse_eval_internal (pm, pm->root_index);
+}
+
+/* Temporary vlib stub */
+vlib_parse_match_t
+vlib_parse_eval (u8 * input)
+{
+ return parse_eval (&vlib_parse_main, input);
+}
+
+u16
+parse_type_find_or_create (vlib_parse_main_t * pm, vlib_parse_type_t * t)
+{
+ uword *p;
+ vlib_parse_type_t *n;
+ u8 *name_copy;
+
+ p = hash_get_mem (pm->parse_type_by_name_hash, t->name);
+ if (p)
+ return p[0];
+
+ pool_get (pm->parse_types, n);
+ *n = *t;
+ n->rule_index = (u32) ~ 0;
+
+ name_copy = format (0, "%s%c", n->name, 0);
+
+ hash_set_mem (pm->parse_type_by_name_hash, name_copy, n - pm->parse_types);
+ return n - pm->parse_types;
+}
+
+u16
+parse_type_find_by_name (vlib_parse_main_t * pm, char *name)
+{
+ uword *p;
+
+ p = hash_get_mem (pm->parse_type_by_name_hash, name);
+ if (p)
+ return p[0];
+
+ return (u16) ~ 0;
+}
+
+u32
+parse_item_find_or_create (vlib_parse_main_t * pm, vlib_parse_item_t * item)
+{
+ uword *p;
+ vlib_parse_item_t *i;
+
+ /* Exact match the entire item */
+ p = mhash_get (&pm->parse_item_hash, item);
+ if (p)
+ return p[0];
+
+ pool_get (pm->parse_items, i);
+ *i = *item;
+
+ mhash_set (&pm->parse_item_hash, i, i - pm->parse_items, 0);
+ return i - pm->parse_items;
+}
+
+static void
+parse_type_and_graph_init (vlib_parse_main_t * pm)
+{
+ u32 eof_index;
+ vlib_parse_type_t type;
+ vlib_parse_item_t item;
+
+ memset (&type, 0, sizeof (type));
+
+#define foreach_token_type \
+ _ (eof) \
+ _ (rule_eof) \
+ _ (word) \
+ _ (number) \
+ _ (plus) \
+ _ (minus) \
+ _ (star) \
+ _ (slash) \
+ _ (lpar) \
+ _ (rpar)
+
+#define _(a) a##_type_index = parse_type_find_by_name (pm, #a);
+ foreach_token_type
+#undef _
+ memset (&item, 0, sizeof (item));
+ item.type = eof_type_index;
+
+ eof_index = parse_item_find_or_create (pm, &item);
+ pm->root_index = (u32) ~ 0;
+
+#if 0
+ pool_get (pm->parse_graph, g);
+ memset (g, 0xff, sizeof (*g));
+ g->item = eof_index;
+ pm->root_index = 0;
+#endif
+}
+
+
+
+static void
+tokenize (vlib_parse_main_t * pm, parse_registration_t * pr)
+{
+ vlib_lex_token_t *t;
+ pm->register_input = format (pm->register_input,
+ "%s%c", pr->initializer, 0);
+
+ parse_reset (pm, pm->register_input);
+
+ do
+ {
+ vec_add2 (pm->tokens, t, 1);
+ vlib_lex_get_token (pm->lex_main, t);
+ }
+ while (t->token != VLIB_LEX_eof);
+ _vec_len (pm->register_input) = 0;
+}
+
+static int
+is_typed_rule (vlib_parse_main_t * pm)
+{
+ vlib_lex_token_t *t = vec_elt_at_index (pm->tokens, 0);
+
+ /* <mytype> = blah blah blah */
+ if (vec_len (pm->tokens) >= 4
+ && t[0].token == VLIB_LEX_lt
+ && t[1].token == VLIB_LEX_word
+ && t[2].token == VLIB_LEX_gt && t[3].token == VLIB_LEX_equals)
+ return 1;
+ return 0;
+}
+
+static int
+token_matches_graph_node (vlib_parse_main_t * pm,
+ vlib_lex_token_t * t,
+ vlib_parse_graph_t * node,
+ vlib_parse_item_t * item,
+ vlib_parse_type_t * type, u32 * token_increment)
+{
+ /* EOFs don't match */
+ if (t->token == VLIB_LEX_eof)
+ return 0;
+
+ /* New chain element is a word */
+ if (t->token == VLIB_LEX_word)
+ {
+ /* but the item in hand is not a word */
+ if (item->type != word_type_index)
+ return 0;
+
+ /* Or it's not this particular word */
+ if (strcmp (t->value.as_pointer, item->value.as_pointer))
+ return 0;
+ *token_increment = 1;
+ return 1;
+ }
+ /* New chain element is a type-name: < TYPE-NAME > */
+ if (t->token == VLIB_LEX_lt)
+ {
+ u16 token_type_index;
+
+ /* < TYPE > */
+ if (t[1].token != VLIB_LEX_word || t[2].token != VLIB_LEX_gt)
+ {
+ clib_warning (0, "broken type name in '%s'", pm->register_input);
+ return 0;
+ }
+
+ token_type_index = parse_type_find_by_name (pm, t[1].value.as_pointer);
+ if (token_type_index == (u16) ~ 0)
+ {
+ clib_warning (0, "unknown type '%s'", t[1].value.as_pointer);
+ return 0;
+ }
+
+ /* Its a known type but does not match. */
+ if (item->type != token_type_index)
+ return 0;
+
+ *token_increment = 3;
+ return 1;
+ }
+ clib_warning ("BUG: t->token = %d", t->token);
+ return 0;
+}
+
+u32
+generate_subgraph_from_tokens (vlib_parse_main_t * pm,
+ vlib_lex_token_t * t,
+ u32 * new_subgraph_depth,
+ parse_registration_t * pr, int not_a_rule)
+{
+ vlib_parse_graph_t *g, *last_g;
+ vlib_parse_item_t new_item;
+ u32 rv = (u32) ~ 0, new_item_index, last_index = (u32) ~ 0;
+ u16 token_type_index;
+ u32 depth = 0;
+
+ while (t < pm->tokens + vec_len (pm->tokens))
+ {
+ memset (&new_item, 0, sizeof (new_item));
+
+ if (t->token == VLIB_LEX_word)
+ {
+ new_item.type = word_type_index;
+ new_item.value.as_pointer = vec_dup ((u8 *) t->value.as_pointer);
+ new_item_index = parse_item_find_or_create (pm, &new_item);
+ t++;
+ }
+ else if (t->token == VLIB_LEX_lt)
+ {
+ if (t[1].token != VLIB_LEX_word || t[2].token != VLIB_LEX_gt)
+ {
+ clib_warning ("broken type name in '%s'", pm->register_input);
+ goto screwed;
+ }
+ token_type_index = parse_type_find_by_name (pm,
+ t[1].value.as_pointer);
+ if (token_type_index == (u16) ~ 0)
+ {
+ clib_warning ("unknown type 2 '%s'", t[1].value.as_pointer);
+ goto screwed;
+ }
+
+ new_item.type = token_type_index;
+ new_item.value.as_pointer = 0;
+ new_item_index = parse_item_find_or_create (pm, &new_item);
+ t += 3; /* skip < <type-name> and > */
+ }
+ else if (t->token == VLIB_LEX_eof)
+ {
+ screwed:
+ new_item.type = not_a_rule ? eof_type_index : rule_eof_type_index;
+ new_item.value.as_pointer = pr->eof_match;
+ new_item_index = parse_item_find_or_create (pm, &new_item);
+ t++;
+ }
+ else
+ {
+ clib_warning ("unexpected token %U index %d in '%s'",
+ format_vlib_lex_token, pm->lex_main, t,
+ t - pm->tokens, pm->register_input);
+ goto screwed;
+ }
+
+ pool_get (pm->parse_graph, g);
+ memset (g, 0xff, sizeof (*g));
+ g->item = new_item_index;
+ depth++;
+
+ if (rv == (u32) ~ 0)
+ {
+ rv = g - pm->parse_graph;
+ last_index = rv;
+ }
+ else
+ {
+ last_g = pool_elt_at_index (pm->parse_graph, last_index);
+ last_index = last_g->deeper = g - pm->parse_graph;
+ }
+ }
+ *new_subgraph_depth = depth;
+ return rv;
+}
+
+static u32
+measure_depth (vlib_parse_main_t * pm, u32 index)
+{
+ vlib_parse_graph_t *node;
+ vlib_parse_item_t *item;
+ u32 max = 0;
+ u32 depth;
+
+ if (index == (u32) ~ 0)
+ return 0;
+
+ node = pool_elt_at_index (pm->parse_graph, index);
+ item = pool_elt_at_index (pm->parse_items, node->item);
+
+ if (item->type == eof_type_index)
+ return 1;
+
+ while (index != (u32) ~ 0)
+ {
+ node = pool_elt_at_index (pm->parse_graph, index);
+ depth = measure_depth (pm, node->deeper);
+ if (max < depth)
+ max = depth;
+ index = node->peer;
+ }
+
+ return max + 1;
+}
+
+static void
+add_subgraph_to_graph (vlib_parse_main_t * pm,
+ u32 last_matching_index,
+ u32 graph_root_index,
+ u32 new_subgraph_index, u32 new_subgraph_depth)
+{
+ vlib_parse_graph_t *parent_node;
+ int new_subgraph_longest = 1;
+ u32 current_peer_index;
+ u32 current_depth;
+ vlib_parse_graph_t *current_peer = 0;
+ vlib_parse_graph_t *new_subgraph_node =
+ pool_elt_at_index (pm->parse_graph, new_subgraph_index);
+
+ /*
+ * Case 1: top-level peer. Splice into the top-level
+ * peer chain according to rule depth
+ */
+ if (last_matching_index == (u32) ~ 0)
+ {
+ u32 index = graph_root_index;
+ while (1)
+ {
+ current_peer = pool_elt_at_index (pm->parse_graph, index);
+ current_depth = measure_depth (pm, index);
+ if (current_depth < new_subgraph_depth
+ || current_peer->peer == (u32) ~ 0)
+ break;
+ index = current_peer->peer;
+ }
+ new_subgraph_node->peer = current_peer->peer;
+ current_peer->peer = new_subgraph_index;
+ return;
+ }
+
+ parent_node = pool_elt_at_index (pm->parse_graph, last_matching_index);
+ current_peer_index = parent_node->deeper;
+
+ while (current_peer_index != (u32) ~ 0)
+ {
+ current_peer = pool_elt_at_index (pm->parse_graph, current_peer_index);
+ current_depth = measure_depth (pm, current_peer_index);
+ if (current_depth < new_subgraph_depth)
+ break;
+ new_subgraph_longest = 0;
+ current_peer_index = current_peer->peer;
+ }
+
+ ASSERT (current_peer);
+
+ if (new_subgraph_longest)
+ {
+ new_subgraph_node->peer = parent_node->deeper;
+ parent_node->deeper = new_subgraph_index;
+ }
+ else
+ {
+ new_subgraph_node->peer = current_peer->peer;
+ current_peer->peer = new_subgraph_index;
+ }
+}
+
+static clib_error_t *
+parse_register_one (vlib_parse_main_t * pm, parse_registration_t * pr)
+{
+ u32 graph_root_index;
+ u16 subgraph_type_index = (u16) ~ 0;
+ vlib_parse_type_t *subgraph_type = 0;
+ vlib_lex_token_t *t;
+ vlib_parse_graph_t *node;
+ u32 node_index, last_index, token_increment, new_subgraph_index;
+ u32 new_subgraph_depth, last_matching_index;
+ vlib_parse_item_t *item;
+ vlib_parse_type_t *type;
+
+ int use_main_graph = 1;
+
+ tokenize (pm, pr);
+
+ /* A typed rule? */
+ if (is_typed_rule (pm))
+ {
+ /* Get the type and its current subgraph root, if any */
+ t = vec_elt_at_index (pm->tokens, 1);
+ subgraph_type_index = parse_type_find_by_name (pm, t->value.as_pointer);
+ if (subgraph_type_index == (u16) ~ 0)
+ return clib_error_return (0, "undeclared type '%s'",
+ t->value.as_pointer);
+ subgraph_type =
+ pool_elt_at_index (pm->parse_types, subgraph_type_index);
+ graph_root_index = subgraph_type->rule_index;
+ /* Skip "mytype> = */
+ t += 3;
+ use_main_graph = 0;
+ }
+ else
+ {
+ /* top-level graph */
+ graph_root_index = pm->root_index;
+ t = vec_elt_at_index (pm->tokens, 0);
+ }
+
+ last_matching_index = (u32) ~ 0;
+ last_index = node_index = graph_root_index;
+
+ /* Find the first token which isn't already being parsed */
+ while (t < pm->tokens + vec_len (pm->tokens) && node_index != (u32) ~ 0)
+ {
+ node = pool_elt_at_index (pm->parse_graph, node_index);
+ item = pool_elt_at_index (pm->parse_items, node->item);
+ type = pool_elt_at_index (pm->parse_types, item->type);
+ last_index = node_index;
+
+ if (token_matches_graph_node
+ (pm, t, node, item, type, &token_increment))
+ {
+ t += token_increment;
+ last_matching_index = node_index;
+ node_index = node->deeper;
+ }
+ else
+ node_index = node->peer;
+ }
+
+ new_subgraph_index =
+ generate_subgraph_from_tokens (pm, t, &new_subgraph_depth, pr,
+ use_main_graph);
+
+ /* trivial cases: first graph node or first type rule */
+ if (graph_root_index == (u32) ~ 0)
+ {
+ if (use_main_graph)
+ pm->root_index = new_subgraph_index;
+ else
+ subgraph_type->rule_index = new_subgraph_index;
+ return 0;
+ }
+
+ add_subgraph_to_graph (pm, last_matching_index, graph_root_index,
+ new_subgraph_index, new_subgraph_depth);
+ return 0;
+}
+
+static clib_error_t *
+parse_register (vlib_main_t * vm,
+ parse_registration_t * lo,
+ parse_registration_t * hi, vlib_parse_main_t * pm)
+{
+ parse_registration_t *pr;
+
+ for (pr = lo; pr < hi; pr = vlib_elf_section_data_next (pr, 0))
+ vec_add1 (pm->parse_registrations, pr);
+
+ return 0;
+}
+
+static clib_error_t *
+parse_register_one_type (vlib_parse_main_t * pm, vlib_parse_type_t * rp)
+{
+ (void) parse_type_find_or_create (pm, (vlib_parse_type_t *) rp);
+ return 0;
+}
+
+static clib_error_t *
+parse_type_register (vlib_main_t * vm,
+ vlib_parse_type_t * lo,
+ vlib_parse_type_t * hi, vlib_parse_main_t * pm)
+{
+ clib_error_t *error = 0;
+ vlib_parse_type_t *ptr;
+
+ for (ptr = lo; ptr < hi; ptr = vlib_elf_section_data_next (ptr, 0))
+ {
+ error = parse_register_one_type (pm, ptr);
+ if (error)
+ goto done;
+ }
+
+done:
+ return error;
+}
+
+clib_error_t *vlib_stdlex_init (vlib_main_t * vm) __attribute__ ((weak));
+clib_error_t *
+vlib_stdlex_init (vlib_main_t * vm)
+{
+ (void) vlib_lex_add_table ("ignore_everything");
+ return 0;
+}
+
+static int
+compute_rule_length (parse_registration_t * r)
+{
+ int length, i;
+ vlib_parse_main_t *pm = &vlib_parse_main;
+
+ if (r->rule_length)
+ return r->rule_length;
+
+ length = 0;
+
+ tokenize (pm, r);
+ length = vec_len (pm->tokens);
+
+ /* Account for "<foo> = " in "<foo> = bar" etc. */
+ if (is_typed_rule (pm))
+ length -= 2;
+
+ for (i = 0; i < vec_len (pm->tokens); i++)
+ {
+ switch (pm->tokens[i].token)
+ {
+ case VLIB_LEX_lt:
+ case VLIB_LEX_gt:
+ length -= 1;
+
+ default:
+ break;
+ }
+ }
+
+ ASSERT (length > 0);
+ r->rule_length = length;
+ return length;
+}
+
+static int
+rule_length_compare (parse_registration_t * r1, parse_registration_t * r2)
+{
+ compute_rule_length (r1);
+ compute_rule_length (r2);
+ /* Descending sort */
+ return r2->rule_length - r1->rule_length;
+}
+
+
+static clib_error_t *
+parse_init (vlib_main_t * vm)
+{
+ vlib_parse_main_t *pm = &vlib_parse_main;
+ vlib_lex_main_t *lm = &vlib_lex_main;
+ vlib_elf_section_bounds_t *b, *bounds;
+ clib_error_t *error = 0;
+ parse_registration_t *rule;
+ int i;
+
+ if ((error = vlib_call_init_function (vm, lex_onetime_init)))
+ return error;
+
+ if ((error = vlib_stdlex_init (vm)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, parse_builtin_init)))
+ return error;
+
+ pm->vlib_main = vm;
+ pm->lex_main = lm;
+
+ mhash_init (&pm->parse_item_hash, sizeof (u32), sizeof (vlib_parse_item_t));
+ pm->parse_type_by_name_hash = hash_create_string (0, sizeof (u32));
+
+ vec_validate (pm->parse_value, 16);
+ vec_validate (pm->tokens, 16);
+ vec_validate (pm->register_input, 32);
+ vec_validate (pm->match_items, 16);
+
+ _vec_len (pm->parse_value) = 0;
+ _vec_len (pm->tokens) = 0;
+ _vec_len (pm->register_input) = 0;
+ _vec_len (pm->match_items) = 0;
+
+ bounds = vlib_get_elf_section_bounds (vm, "parse_type_registrations");
+ vec_foreach (b, bounds)
+ {
+ error = parse_type_register (vm, b->lo, b->hi, pm);
+ if (error)
+ break;
+ }
+ vec_free (bounds);
+
+ parse_type_and_graph_init (pm);
+
+ bounds = vlib_get_elf_section_bounds (vm, "parse_registrations");
+ vec_foreach (b, bounds)
+ {
+ error = parse_register (vm, b->lo, b->hi, pm);
+ if (error)
+ break;
+ }
+ vec_free (bounds);
+
+ vec_sort_with_function (pm->parse_registrations, rule_length_compare);
+
+ for (i = 0; i < vec_len (pm->parse_registrations); i++)
+ {
+ rule = pm->parse_registrations[i];
+ parse_register_one (pm, rule);
+ }
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (parse_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/parse.h b/src/vlib/parse.h
new file mode 100644
index 00000000000..036e744723b
--- /dev/null
+++ b/src/vlib/parse.h
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vlib_parse_h
+#define included_vlib_parse_h
+
+#include <vlib/vlib.h>
+#include <vlib/lex.h>
+#include <vppinfra/mhash.h>
+
+typedef struct
+{
+ /* Word aligned value. */
+ union
+ {
+ u8 as_u8[32 - 1 * sizeof (u16)];
+ void *as_pointer;
+ uword as_uword;
+ word as_word;
+ u64 as_u64;
+ } value;
+
+ /* 16 bit type at end so that 30 bytes of value are aligned. */
+ u16 type;
+} __attribute ((packed))
+ vlib_parse_value_t;
+
+/* Instance of a type. */
+ typedef struct
+ {
+ u32
+ type;
+
+ u32
+ origin;
+
+ u32
+ help_index;
+
+ union
+ {
+ void *
+ as_pointer;
+ uword
+ as_uword;
+ } value;
+ } vlib_parse_item_t;
+
+ typedef struct
+ {
+ /* Index of item for this node. */
+ u32
+ item;
+
+ /* Graph index of peer (sibling) node (linked list of peers). */
+ u32
+ peer;
+
+ /* Graph index of deeper (child) node (linked list of children). */
+ u32
+ deeper;
+ } vlib_parse_graph_t;
+
+#define foreach_parse_match_type \
+ _(MATCH_DONE) \
+ _(MATCH_RULE) \
+ _(MATCH_FAIL) \
+ _(MATCH_FULL) \
+ _(MATCH_VALUE) \
+ _(MATCH_PARTIAL) \
+ _(MATCH_AMBIGUOUS) \
+ _(MATCH_EVAL_FAIL)
+
+ typedef enum
+ {
+#define _(a) VLIB_PARSE_##a,
+ foreach_parse_match_type
+#undef _
+ } vlib_parse_match_t;
+
+ struct vlib_parse_type;
+ struct vlib_parse_main;
+
+ typedef
+ vlib_parse_match_t (vlib_parse_match_function_t)
+ (struct vlib_parse_main *,
+ struct vlib_parse_type *, vlib_lex_token_t *, vlib_parse_value_t *);
+ typedef void (vlib_parse_value_cleanup_function_t) (vlib_parse_value_t
+ *);
+
+ typedef struct vlib_parse_type
+ {
+ /* Type name. */
+ char *
+ name;
+
+ vlib_parse_match_function_t *
+ match_function;
+
+ vlib_parse_value_cleanup_function_t *
+ value_cleanup_function;
+
+ format_function_t *
+ format_value;
+
+ u32
+ rule_index;
+ } vlib_parse_type_t;
+
+ typedef struct
+ {
+ char *
+ initializer;
+ void *
+ eof_match;
+ int
+ rule_length;
+ } parse_registration_t;
+
+ typedef struct vlib_parse_main
+ {
+ /* (type, origin, help, value) tuples */
+ vlib_parse_item_t *
+ parse_items;
+ mhash_t
+ parse_item_hash;
+
+ /* (item, peer, deeper) tuples */
+ vlib_parse_graph_t *
+ parse_graph;
+ u32
+ root_index;
+
+ u8 *
+ register_input;
+
+ /* parser types */
+ vlib_parse_type_t *
+ parse_types;
+ uword *
+ parse_type_by_name_hash;
+
+ /* Vector of MATCH_VALUEs */
+ vlib_parse_value_t *
+ parse_value;
+ u32 *
+ match_items;
+
+ /* Parse registrations */
+ parse_registration_t **
+ parse_registrations;
+
+ /* Token vector */
+ vlib_lex_token_t *
+ tokens;
+ u32
+ current_token_index;
+
+ vlib_lex_main_t *
+ lex_main;
+ vlib_main_t *
+ vlib_main;
+ } vlib_parse_main_t;
+
+ vlib_parse_main_t
+ vlib_parse_main;
+
+ typedef
+ vlib_parse_match_t (vlib_parse_eval_function_t)
+ (vlib_parse_main_t *, vlib_parse_item_t *, vlib_parse_value_t *);
+
+vlib_parse_match_t
+vlib_parse_eval (u8 * input);
+
+ format_function_t format_vlib_parse_value;
+
+/* FIXME need these to be global? */
+ vlib_parse_match_function_t rule_match, eof_match, word_match,
+ number_match;
+
+#define _PARSE_REGISTRATION_DATA(x) \
+VLIB_ELF_SECTION_DATA(x##_registration,parse_registration_t,parse_registrations)
+
+#define PARSE_INIT(x, s, e) \
+static _PARSE_REGISTRATION_DATA(x) = { \
+ .initializer = s, \
+ .eof_match = e, \
+};
+
+#define _PARSE_TYPE_REGISTRATION_DATA(x) \
+VLIB_ELF_SECTION_DATA(x##_type_registration,vlib_parse_type_t, \
+parse_type_registrations)
+
+#define PARSE_TYPE_INIT(n, m, c, f) \
+static _PARSE_TYPE_REGISTRATION_DATA(n) = { \
+ .name = #n, \
+ .match_function = m, \
+ .value_cleanup_function = c, \
+ .format_value = f, \
+};
+
+#endif /* included_vlib_parse_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/parse_builtin.c b/src/vlib/parse_builtin.c
new file mode 100644
index 00000000000..0ce716b539e
--- /dev/null
+++ b/src/vlib/parse_builtin.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/parse.h>
+
+always_inline void *
+parse_last_match_value (vlib_parse_main_t * pm)
+{
+ vlib_parse_item_t *i;
+ i = pool_elt_at_index (pm->parse_items,
+ vec_elt (pm->match_items,
+ vec_len (pm->match_items) - 1));
+ return i->value.as_pointer;
+}
+
+vlib_parse_match_t
+eof_match (vlib_parse_main_t * pm, vlib_parse_type_t * type,
+ vlib_lex_token_t * t, vlib_parse_value_t * valuep)
+{
+ return t->token ==
+ VLIB_LEX_eof ? VLIB_PARSE_MATCH_DONE : VLIB_PARSE_MATCH_FAIL;
+}
+
+PARSE_TYPE_INIT (eof, eof_match, 0 /* cleanup value */ ,
+ 0 /* format value */ );
+
+vlib_parse_match_t
+rule_eof_match (vlib_parse_main_t * pm, vlib_parse_type_t * type,
+ vlib_lex_token_t * t, vlib_parse_value_t * valuep)
+{
+ vlib_parse_match_function_t *fp = parse_last_match_value (pm);
+ pm->current_token_index--;
+ return fp ? fp (pm, type, t, valuep) : VLIB_PARSE_MATCH_RULE;
+}
+
+PARSE_TYPE_INIT (rule_eof, rule_eof_match, 0, 0);
+
+vlib_parse_match_t
+word_match (vlib_parse_main_t * pm, vlib_parse_type_t * type,
+ vlib_lex_token_t * t, vlib_parse_value_t * valuep)
+{
+ u8 *tv, *iv;
+ int i;
+
+ if (t->token != VLIB_LEX_word)
+ return VLIB_PARSE_MATCH_FAIL;
+
+ tv = t->value.as_pointer;
+ iv = parse_last_match_value (pm);
+
+ for (i = 0; tv[i]; i++)
+ {
+ if (tv[i] != iv[i])
+ return VLIB_PARSE_MATCH_FAIL;
+ }
+
+ return iv[i] == 0 ? VLIB_PARSE_MATCH_FULL : VLIB_PARSE_MATCH_PARTIAL;
+}
+
+PARSE_TYPE_INIT (word, word_match, 0 /* clnup value */ ,
+ 0 /* format value */ );
+
+vlib_parse_match_t
+number_match (vlib_parse_main_t * pm, vlib_parse_type_t * type,
+ vlib_lex_token_t * t, vlib_parse_value_t * valuep)
+{
+ if (t->token == VLIB_LEX_number)
+ {
+ valuep->value.as_uword = t->value.as_uword;
+ return VLIB_PARSE_MATCH_VALUE;
+ }
+ return VLIB_PARSE_MATCH_FAIL;
+}
+
+static u8 *
+format_value_number (u8 * s, va_list * args)
+{
+ vlib_parse_value_t *v = va_arg (*args, vlib_parse_value_t *);
+ uword a = v->value.as_uword;
+
+ if (BITS (uword) == 64)
+ s = format (s, "%lld(0x%llx)", a, a);
+ else
+ s = format (s, "%ld(0x%lx)", a, a);
+ return s;
+}
+
+PARSE_TYPE_INIT (number, number_match, 0 /* cln value */ ,
+ format_value_number /* fmt value */ );
+
+
+#define foreach_vanilla_lex_match_function \
+ _(plus) \
+ _(minus) \
+ _(star) \
+ _(slash) \
+ _(lpar) \
+ _(rpar)
+
+#define LEX_MATCH_DEBUG 0
+
+#define _(name) \
+vlib_parse_match_t name##_match (vlib_parse_main_t *pm, \
+ vlib_parse_type_t *type, \
+ vlib_lex_token_t *t, \
+ vlib_parse_value_t *valuep) \
+{ \
+ if (LEX_MATCH_DEBUG > 0) \
+ clib_warning ("against %U returns %s", \
+ format_vlib_lex_token, pm->lex_main, t, \
+ (t->token == VLIB_LEX_##name) \
+ ? "VLIB_PARSE_MATCH_FULL" : \
+ "VLIB_PARSE_MATCH_FAIL"); \
+ if (t->token == VLIB_LEX_##name) \
+ return VLIB_PARSE_MATCH_FULL; \
+ return VLIB_PARSE_MATCH_FAIL; \
+} \
+ \
+PARSE_TYPE_INIT (name, name##_match, 0 /* cln value */, \
+ 0 /* fmt val */);
+
+foreach_vanilla_lex_match_function
+#undef _
+/* So we're linked in. */
+static clib_error_t *
+parse_builtin_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (parse_builtin_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/pci/linux_pci.c b/src/vlib/pci/linux_pci.c
new file mode 100644
index 00000000000..f9ee47ac145
--- /dev/null
+++ b/src/vlib/pci/linux_pci.c
@@ -0,0 +1,642 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pci.c: Linux user space PCI bus management.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
+#include <vlib/unix/unix.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+
+typedef struct
+{
+ /* /sys/bus/pci/devices/... directory name for this device. */
+ u8 *dev_dir_name;
+
+ /* Resource file descriptors. */
+ int *resource_fds;
+
+ /* File descriptor for config space read/write. */
+ int config_fd;
+
+ /* File descriptor for /dev/uio%d */
+ int uio_fd;
+
+ /* Minor device for uio device. */
+ u32 uio_minor;
+
+ /* Index given by unix_file_add. */
+ u32 unix_file_index;
+
+} linux_pci_device_t;
+
+/* Pool of PCI devices. */
+typedef struct
+{
+ vlib_main_t *vlib_main;
+ linux_pci_device_t *linux_pci_devices;
+} linux_pci_main_t;
+
+extern linux_pci_main_t linux_pci_main;
+
+/* Call to allocate/initialize the pci subsystem.
+ This is not an init function so that users can explicitly enable
+ pci only when it's needed. */
+clib_error_t *pci_bus_init (vlib_main_t * vm);
+
+clib_error_t *vlib_pci_bind_to_uio (vlib_pci_device_t * d,
+ char *uio_driver_name);
+
+linux_pci_main_t linux_pci_main;
+
+clib_error_t *
+vlib_pci_bind_to_uio (vlib_pci_device_t * d, char *uio_driver_name)
+{
+ clib_error_t *error = 0;
+ u8 *s = 0;
+ DIR *dir = 0;
+ struct dirent *e;
+ int fd;
+ u8 *dev_dir_name = format (0, "/sys/bus/pci/devices/%U",
+ format_vlib_pci_addr, &d->bus_address);
+
+ /* if uio sub-directory exists, we are fine, device is
+ already bound to UIO driver */
+ s = format (s, "%v/uio%c", dev_dir_name, 0);
+ if (access ((char *) s, F_OK) == 0)
+ goto done;
+ vec_reset_length (s);
+
+ /* walk trough all linux interfaces and if interface belonging to
+ this device is founf check if interface is admin up */
+ dir = opendir ("/sys/class/net");
+ s = format (s, "%U%c", format_vlib_pci_addr, &d->bus_address, 0);
+
+ if (!dir)
+ {
+ error = clib_error_return (0, "Skipping PCI device %U: failed to "
+ "read /sys/class/net",
+ format_vlib_pci_addr, &d->bus_address);
+ goto done;
+ }
+
+ fd = socket (PF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "socket");
+ goto done;
+ }
+
+ while ((e = readdir (dir)))
+ {
+ struct ifreq ifr;
+ struct ethtool_drvinfo drvinfo;
+
+ if (e->d_name[0] == '.') /* skip . and .. */
+ continue;
+
+ memset (&ifr, 0, sizeof ifr);
+ memset (&drvinfo, 0, sizeof drvinfo);
+ ifr.ifr_data = (char *) &drvinfo;
+ strncpy (ifr.ifr_name, e->d_name, IFNAMSIZ - 1);
+ drvinfo.cmd = ETHTOOL_GDRVINFO;
+ if (ioctl (fd, SIOCETHTOOL, &ifr) < 0)
+ {
+ /* Some interfaces (eg "lo") don't support this ioctl */
+ if ((errno != ENOTSUP) && (errno != ENODEV))
+ clib_unix_warning ("ioctl fetch intf %s bus info error",
+ e->d_name);
+ continue;
+ }
+
+ if (strcmp ((char *) s, drvinfo.bus_info))
+ continue;
+
+ memset (&ifr, 0, sizeof (ifr));
+ strncpy (ifr.ifr_name, e->d_name, IFNAMSIZ - 1);
+ if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0)
+ {
+ error = clib_error_return_unix (0, "ioctl fetch intf %s flags",
+ e->d_name);
+ close (fd);
+ goto done;
+ }
+
+ if (ifr.ifr_flags & IFF_UP)
+ {
+ error = clib_error_return (0, "Skipping PCI device %U as host "
+ "interface %s is up",
+ format_vlib_pci_addr, &d->bus_address,
+ e->d_name);
+ close (fd);
+ goto done;
+ }
+ }
+
+ close (fd);
+ vec_reset_length (s);
+
+ s = format (s, "%v/driver/unbind%c", dev_dir_name, 0);
+ vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address);
+ vec_reset_length (s);
+
+ s = format (s, "/sys/bus/pci/drivers/%s/new_id%c", uio_driver_name, 0);
+ vlib_sysfs_write ((char *) s, "0x%04x 0x%04x", d->vendor_id, d->device_id);
+ vec_reset_length (s);
+
+ s = format (s, "/sys/bus/pci/drivers/%s/bind%c", uio_driver_name, 0);
+ vlib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, &d->bus_address);
+
+done:
+ closedir (dir);
+ vec_free (s);
+ vec_free (dev_dir_name);
+ return error;
+}
+
+
+static clib_error_t *
+scan_uio_dir (void *arg, u8 * path_name, u8 * file_name)
+{
+ linux_pci_device_t *l = arg;
+ unformat_input_t input;
+
+ unformat_init_string (&input, (char *) file_name, vec_len (file_name));
+
+ if (!unformat (&input, "uio%d", &l->uio_minor))
+ abort ();
+
+ unformat_free (&input);
+ return 0;
+}
+
+static clib_error_t *
+linux_pci_uio_read_ready (unix_file_t * uf)
+{
+ vlib_pci_main_t *pm = &pci_main;
+ vlib_pci_device_t *d;
+ int __attribute__ ((unused)) rv;
+
+ u32 icount;
+ rv = read (uf->file_descriptor, &icount, 4);
+
+ d = pool_elt_at_index (pm->pci_devs, uf->private_data);
+
+ if (d->interrupt_handler)
+ d->interrupt_handler (d);
+
+ vlib_pci_intr_enable (d);
+
+ return /* no error */ 0;
+}
+
+static clib_error_t *
+linux_pci_uio_error_ready (unix_file_t * uf)
+{
+ u32 error_index = (u32) uf->private_data;
+
+ return clib_error_return (0, "pci device %d: error", error_index);
+}
+
+static void
+add_device (vlib_pci_device_t * dev, linux_pci_device_t * pdev)
+{
+ vlib_pci_main_t *pm = &pci_main;
+ linux_pci_main_t *lpm = &linux_pci_main;
+ linux_pci_device_t *l;
+
+ pool_get (lpm->linux_pci_devices, l);
+ l[0] = pdev[0];
+
+ l->dev_dir_name = vec_dup (l->dev_dir_name);
+
+ dev->os_handle = l - lpm->linux_pci_devices;
+
+ {
+ u8 *uio_dir = format (0, "%s/uio", l->dev_dir_name);
+ foreach_directory_file ((char *) uio_dir, scan_uio_dir, l, /* scan_dirs */
+ 1);
+ vec_free (uio_dir);
+ }
+
+ {
+ char *uio_name = (char *) format (0, "/dev/uio%d%c", l->uio_minor, 0);
+ l->uio_fd = open (uio_name, O_RDWR);
+ if (l->uio_fd < 0)
+ clib_unix_error ("open `%s'", uio_name);
+ vec_free (uio_name);
+ }
+
+ {
+ unix_file_t template = { 0 };
+ unix_main_t *um = &unix_main;
+
+ template.read_function = linux_pci_uio_read_ready;
+ template.file_descriptor = l->uio_fd;
+ template.error_function = linux_pci_uio_error_ready;
+ template.private_data = dev - pm->pci_devs;
+
+ l->unix_file_index = unix_file_add (um, &template);
+ }
+}
+
+static void
+linux_pci_device_free (linux_pci_device_t * l)
+{
+ int i;
+ for (i = 0; i < vec_len (l->resource_fds); i++)
+ if (l->resource_fds[i] > 0)
+ close (l->resource_fds[i]);
+ if (l->config_fd > 0)
+ close (l->config_fd);
+ if (l->uio_fd > 0)
+ close (l->uio_fd);
+ vec_free (l->resource_fds);
+ vec_free (l->dev_dir_name);
+}
+
+/* Configuration space read/write. */
+clib_error_t *
+vlib_pci_read_write_config (vlib_pci_device_t * dev,
+ vlib_read_or_write_t read_or_write,
+ uword address, void *data, u32 n_bytes)
+{
+ linux_pci_main_t *lpm = &linux_pci_main;
+ linux_pci_device_t *p;
+ int n;
+
+ p = pool_elt_at_index (lpm->linux_pci_devices, dev->os_handle);
+
+ if (read_or_write == VLIB_READ)
+ n = pread (p->config_fd, data, n_bytes, address);
+ else
+ n = pwrite (p->config_fd, data, n_bytes, address);
+
+ if (n != n_bytes)
+ return clib_error_return_unix (0, "%s",
+ read_or_write == VLIB_READ
+ ? "read" : "write");
+
+ return 0;
+}
+
+static clib_error_t *
+os_map_pci_resource_internal (uword os_handle,
+ u32 resource, u8 * addr, void **result)
+{
+ linux_pci_main_t *pm = &linux_pci_main;
+ linux_pci_device_t *p;
+ struct stat stat_buf;
+ u8 *file_name;
+ int fd;
+ clib_error_t *error;
+ int flags = MAP_SHARED;
+
+ error = 0;
+ p = pool_elt_at_index (pm->linux_pci_devices, os_handle);
+
+ file_name = format (0, "%v/resource%d%c", p->dev_dir_name, resource, 0);
+ fd = open ((char *) file_name, O_RDWR);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "open `%s'", file_name);
+ goto done;
+ }
+
+ if (fstat (fd, &stat_buf) < 0)
+ {
+ error = clib_error_return_unix (0, "fstat `%s'", file_name);
+ goto done;
+ }
+
+ vec_validate (p->resource_fds, resource);
+ p->resource_fds[resource] = fd;
+ if (addr != 0)
+ flags |= MAP_FIXED;
+
+ *result = mmap (addr,
+ /* size */ stat_buf.st_size,
+ PROT_READ | PROT_WRITE, flags,
+ /* file */ fd,
+ /* offset */ 0);
+ if (*result == (void *) -1)
+ {
+ error = clib_error_return_unix (0, "mmap `%s'", file_name);
+ goto done;
+ }
+
+done:
+ if (error)
+ {
+ if (fd >= 0)
+ close (fd);
+ }
+ vec_free (file_name);
+ return error;
+}
+
+clib_error_t *
+vlib_pci_map_resource (vlib_pci_device_t * dev, u32 resource, void **result)
+{
+ return (os_map_pci_resource_internal
+ (dev->os_handle, resource, 0 /* addr */ ,
+ result));
+}
+
+clib_error_t *
+vlib_pci_map_resource_fixed (vlib_pci_device_t * dev,
+ u32 resource, u8 * addr, void **result)
+{
+ return (os_map_pci_resource_internal
+ (dev->os_handle, resource, addr, result));
+}
+
+void
+vlib_pci_free_device (vlib_pci_device_t * dev)
+{
+ linux_pci_main_t *pm = &linux_pci_main;
+ linux_pci_device_t *l;
+
+ l = pool_elt_at_index (pm->linux_pci_devices, dev->os_handle);
+ linux_pci_device_free (l);
+ pool_put (pm->linux_pci_devices, l);
+}
+
+pci_device_registration_t * __attribute__ ((unused))
+pci_device_next_registered (pci_device_registration_t * r)
+{
+ uword i;
+
+ /* Null vendor id marks end of initialized list. */
+ for (i = 0; r->supported_devices[i].vendor_id != 0; i++)
+ ;
+
+ return clib_elf_section_data_next (r, i * sizeof (r->supported_devices[0]));
+}
+
+static clib_error_t *
+init_device_from_registered (vlib_main_t * vm,
+ vlib_pci_device_t * dev,
+ linux_pci_device_t * pdev)
+{
+ vlib_pci_main_t *pm = &pci_main;
+ pci_device_registration_t *r;
+ pci_device_id_t *i;
+ clib_error_t *error;
+
+ r = pm->pci_device_registrations;
+
+ while (r)
+ {
+ for (i = r->supported_devices; i->vendor_id != 0; i++)
+ if (i->vendor_id == dev->vendor_id && i->device_id == dev->device_id)
+ {
+ error = vlib_pci_bind_to_uio (dev, "uio_pci_generic");
+ if (error)
+ {
+ clib_error_report (error);
+ continue;
+ }
+
+ add_device (dev, pdev);
+ dev->interrupt_handler = r->interrupt_handler;
+ return r->init_function (vm, dev);
+ }
+ r = r->next_registration;
+ }
+ /* No driver, close the PCI config-space FD */
+ close (pdev->config_fd);
+ return 0;
+}
+
+static clib_error_t *
+init_device (vlib_main_t * vm,
+ vlib_pci_device_t * dev, linux_pci_device_t * pdev)
+{
+ return init_device_from_registered (vm, dev, pdev);
+}
+
+static clib_error_t *
+scan_device (void *arg, u8 * dev_dir_name, u8 * ignored)
+{
+ vlib_main_t *vm = arg;
+ vlib_pci_main_t *pm = &pci_main;
+ int fd;
+ u8 *f;
+ clib_error_t *error = 0;
+ vlib_pci_device_t *dev;
+ linux_pci_device_t pdev = { 0 };
+ u32 tmp;
+
+ f = format (0, "%v/config%c", dev_dir_name, 0);
+ fd = open ((char *) f, O_RDWR);
+
+ /* Try read-only access if write fails. */
+ if (fd < 0)
+ fd = open ((char *) f, O_RDONLY);
+
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "open `%s'", f);
+ goto done;
+ }
+
+ pool_get (pm->pci_devs, dev);
+
+ /* You can only read more that 64 bytes of config space as root; so we try to
+ read the full space but fall back to just the first 64 bytes. */
+ if (read (fd, &dev->config_data, sizeof (dev->config_data)) !=
+ sizeof (dev->config_data)
+ && read (fd, &dev->config0,
+ sizeof (dev->config0)) != sizeof (dev->config0))
+ {
+ pool_put (pm->pci_devs, dev);
+ error = clib_error_return_unix (0, "read `%s'", f);
+ close (fd);
+ goto done;
+ }
+
+ {
+ static pci_config_header_t all_ones;
+ if (all_ones.vendor_id == 0)
+ memset (&all_ones, ~0, sizeof (all_ones));
+
+ if (!memcmp (&dev->config0.header, &all_ones, sizeof (all_ones)))
+ {
+ pool_put (pm->pci_devs, dev);
+ error = clib_error_return (0, "invalid PCI config for `%s'", f);
+ close (fd);
+ goto done;
+ }
+ }
+
+ if (dev->config0.header.header_type == 0)
+ pci_config_type0_little_to_host (&dev->config0);
+ else
+ pci_config_type1_little_to_host (&dev->config1);
+
+ /* Parse bus, dev, function from directory name. */
+ {
+ unformat_input_t input;
+
+ unformat_init_string (&input, (char *) dev_dir_name,
+ vec_len (dev_dir_name));
+
+ if (!unformat (&input, "/sys/bus/pci/devices/%U",
+ unformat_vlib_pci_addr, &dev->bus_address))
+ abort ();
+
+ unformat_free (&input);
+
+ }
+
+
+ pdev.config_fd = fd;
+ pdev.dev_dir_name = dev_dir_name;
+
+ hash_set (pm->pci_dev_index_by_pci_addr, dev->bus_address.as_u32,
+ dev - pm->pci_devs);
+
+ error = init_device (vm, dev, &pdev);
+
+ vec_reset_length (f);
+ f = format (f, "%v/vpd%c", dev_dir_name, 0);
+ fd = open ((char *) f, O_RDONLY);
+ if (fd >= 0)
+ {
+ while (1)
+ {
+ u8 tag[3];
+ u8 *data = 0;
+ int len;
+
+ if (read (fd, &tag, 3) != 3)
+ break;
+
+ if (tag[0] != 0x82 && tag[0] != 0x90 && tag[0] != 0x91)
+ break;
+
+ len = (tag[2] << 8) | tag[1];
+ vec_validate (data, len);
+
+ if (read (fd, data, len) != len)
+ {
+ vec_free (data);
+ break;
+ }
+ if (tag[0] == 0x82)
+ dev->product_name = data;
+ else if (tag[0] == 0x90)
+ dev->vpd_r = data;
+ else if (tag[0] == 0x91)
+ dev->vpd_w = data;
+
+ data = 0;
+ }
+ close (fd);
+ }
+
+ vec_reset_length (f);
+ f = format (f, "%v/driver%c", dev_dir_name, 0);
+ dev->driver_name = vlib_sysfs_link_to_name ((char *) f);
+
+ dev->numa_node = -1;
+ vec_reset_length (f);
+ f = format (f, "%v/numa_node%c", dev_dir_name, 0);
+ vlib_sysfs_read ((char *) f, "%u", &dev->numa_node);
+
+ vec_reset_length (f);
+ f = format (f, "%v/class%c", dev_dir_name, 0);
+ vlib_sysfs_read ((char *) f, "0x%x", &tmp);
+ dev->device_class = tmp >> 8;
+
+ vec_reset_length (f);
+ f = format (f, "%v/vendor%c", dev_dir_name, 0);
+ vlib_sysfs_read ((char *) f, "0x%x", &tmp);
+ dev->vendor_id = tmp;
+
+ vec_reset_length (f);
+ f = format (f, "%v/device%c", dev_dir_name, 0);
+ vlib_sysfs_read ((char *) f, "0x%x", &tmp);
+ dev->device_id = tmp;
+
+done:
+ vec_free (f);
+ return error;
+}
+
+clib_error_t *
+linux_pci_init (vlib_main_t * vm)
+{
+ vlib_pci_main_t *pm = &pci_main;
+ clib_error_t *error;
+
+ pm->vlib_main = vm;
+
+ if ((error = vlib_call_init_function (vm, unix_input_init)))
+ return error;
+
+ ASSERT (sizeof (vlib_pci_addr_t) == sizeof (u32));
+ pm->pci_dev_index_by_pci_addr = hash_create (0, sizeof (uword));
+
+ error = foreach_directory_file ("/sys/bus/pci/devices", scan_device, vm,
+ /* scan_dirs */ 0);
+
+ /* Complain and continue. might not be root, etc. */
+ if (error)
+ clib_error_report (error);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (linux_pci_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/pci/pci.c b/src/vlib/pci/pci.c
new file mode 100644
index 00000000000..7100064df42
--- /dev/null
+++ b/src/vlib/pci/pci.c
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pci.c: Linux user space PCI bus management.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
+#include <vlib/unix/unix.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/ioctl.h>
+#include <net/if.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+
+vlib_pci_main_t pci_main;
+
+vlib_pci_device_t *
+vlib_get_pci_device (vlib_pci_addr_t * addr)
+{
+ vlib_pci_main_t *pm = &pci_main;
+ uword *p;
+ p = hash_get (pm->pci_dev_index_by_pci_addr, addr->as_u32);
+
+ if (p == 0)
+ return 0;
+
+ return vec_elt_at_index (pm->pci_devs, p[0]);
+}
+
+static clib_error_t *
+show_pci_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_pci_main_t *pm = &pci_main;
+ vlib_pci_device_t *d;
+ int show_all = 0;
+ u8 *s = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "all"))
+ show_all = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ vlib_cli_output (vm, "%-13s%-5s%-12s%-13s%-16s%-32s%s",
+ "Address", "Sock", "VID:PID", "Link Speed", "Driver",
+ "Product Name", "Vital Product Data");
+
+ /* *INDENT-OFF* */
+ pool_foreach (d, pm->pci_devs, ({
+
+ if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && !show_all)
+ continue;
+
+ vec_reset_length (s);
+
+ if (d->numa_node >= 0)
+ s = format (s, " %d", d->numa_node);
+
+ vlib_cli_output (vm, "%-13U%-5v%04x:%04x %-13U%-16s%-32v%U",
+ format_vlib_pci_addr, &d->bus_address, s,
+ d->vendor_id, d->device_id,
+ format_vlib_pci_link_speed, d,
+ d->driver_name ? (char *) d->driver_name : "",
+ d->product_name,
+ format_vlib_pci_vpd, d->vpd_r, 0);
+ }));
+/* *INDENT-ON* */
+
+ vec_free (s);
+ return 0;
+}
+
+uword
+unformat_vlib_pci_addr (unformat_input_t * input, va_list * args)
+{
+ vlib_pci_addr_t *addr = va_arg (*args, vlib_pci_addr_t *);
+ u32 x[4];
+
+ if (!unformat (input, "%x:%x:%x.%x", &x[0], &x[1], &x[2], &x[3]))
+ return 0;
+
+ addr->domain = x[0];
+ addr->bus = x[1];
+ addr->slot = x[2];
+ addr->function = x[3];
+
+ return 1;
+}
+
+u8 *
+format_vlib_pci_addr (u8 * s, va_list * va)
+{
+ vlib_pci_addr_t *addr = va_arg (*va, vlib_pci_addr_t *);
+ return format (s, "%04x:%02x:%02x.%x", addr->domain, addr->bus,
+ addr->slot, addr->function);
+}
+
+u8 *
+format_vlib_pci_handle (u8 * s, va_list * va)
+{
+ vlib_pci_addr_t *addr = va_arg (*va, vlib_pci_addr_t *);
+ return format (s, "%x/%x/%x", addr->bus, addr->slot, addr->function);
+}
+
+u8 *
+format_vlib_pci_link_speed (u8 * s, va_list * va)
+{
+ vlib_pci_device_t *d = va_arg (*va, vlib_pci_device_t *);
+ pcie_config_regs_t *r =
+ pci_config_find_capability (&d->config0, PCI_CAP_ID_PCIE);
+ int width;
+
+ if (!r)
+ return format (s, "unknown");
+
+ width = (r->link_status >> 4) & 0x3f;
+
+ if ((r->link_status & 0xf) == 1)
+ return format (s, "2.5 GT/s x%u", width);
+ if ((r->link_status & 0xf) == 2)
+ return format (s, "5.0 GT/s x%u", width);
+ if ((r->link_status & 0xf) == 3)
+ return format (s, "8.0 GT/s x%u", width);
+ return format (s, "unknown");
+}
+
+u8 *
+format_vlib_pci_vpd (u8 * s, va_list * args)
+{
+ u8 *data = va_arg (*args, u8 *);
+ u8 *id = va_arg (*args, u8 *);
+ uword indent = format_get_indent (s);
+ char *string_types[] = { "PN", "EC", "SN", "MN", 0 };
+ uword p = 0;
+ int first_line = 1;
+
+ if (vec_len (data) < 3)
+ return s;
+
+ while (p + 3 < vec_len (data))
+ {
+
+ if (data[p] == 0 && data[p + 1] == 0)
+ return s;
+
+ if (p + data[p + 2] > vec_len (data))
+ return s;
+
+ if (id == 0)
+ {
+ int is_string = 0;
+ char **c = string_types;
+
+ while (c[0])
+ {
+ if (*(u16 *) & data[p] == *(u16 *) c[0])
+ is_string = 1;
+ c++;
+ }
+
+ if (data[p + 2])
+ {
+ if (!first_line)
+ s = format (s, "\n%U", format_white_space, indent);
+ else
+ {
+ first_line = 0;
+ s = format (s, " ");
+ }
+
+ s = format (s, "%c%c: ", data[p], data[p + 1]);
+ if (is_string)
+ vec_add (s, data + p + 3, data[p + 2]);
+ else
+ {
+ int i;
+ const int max_bytes = 8;
+ s = format (s, "0x");
+ for (i = 0; i < clib_min (data[p + 2], max_bytes); i++)
+ s = format (s, " %02x", data[p + 3 + i]);
+
+ if (data[p + 2] > max_bytes)
+ s = format (s, " ...");
+ }
+ }
+ }
+ else if (*(u16 *) & data[p] == *(u16 *) id)
+ {
+ vec_add (s, data + p + 3, data[p + 2]);
+ return s;
+ }
+
+ p += 3 + data[p + 2];
+ }
+
+ return s;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_pci_command, static) = {
+ .path = "show pci",
+ .short_help = "show pci [all]",
+ .function = show_pci_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+pci_bus_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (pci_bus_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/pci/pci.h b/src/vlib/pci/pci.h
new file mode 100644
index 00000000000..811a6ff2336
--- /dev/null
+++ b/src/vlib/pci/pci.h
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pci.h: PCI definitions.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_pci_h
+#define included_vlib_pci_h
+
+#include <vlib/vlib.h>
+#include <vlib/pci/pci_config.h>
+
+typedef CLIB_PACKED (union
+ {
+ struct
+ {
+u16 domain; u8 bus; u8 slot: 5; u8 function:3;};
+ u32 as_u32;}) vlib_pci_addr_t;
+
+typedef struct vlib_pci_device
+{
+ /* Operating system handle for this device. */
+ uword os_handle;
+
+ vlib_pci_addr_t bus_address;
+
+ /* First 64 bytes of configuration space. */
+ union
+ {
+ pci_config_type0_regs_t config0;
+ pci_config_type1_regs_t config1;
+ u8 config_data[256];
+ };
+
+ /* Interrupt handler */
+ void (*interrupt_handler) (struct vlib_pci_device * dev);
+
+ /* Driver name */
+ u8 *driver_name;
+
+ /* Numa Node */
+ int numa_node;
+
+ /* Device data */
+ u16 device_class;
+ u16 vendor_id;
+ u16 device_id;
+
+ /* Vital Product Data */
+ u8 *product_name;
+ u8 *vpd_r;
+ u8 *vpd_w;
+
+ /* Private data */
+ uword private_data;
+
+} vlib_pci_device_t;
+
+typedef struct
+{
+ u16 vendor_id, device_id;
+} pci_device_id_t;
+
+typedef struct _pci_device_registration
+{
+ /* Driver init function. */
+ clib_error_t *(*init_function) (vlib_main_t * vm, vlib_pci_device_t * dev);
+
+ /* Interrupt handler */
+ void (*interrupt_handler) (vlib_pci_device_t * dev);
+
+ /* List of registrations */
+ struct _pci_device_registration *next_registration;
+
+ /* Vendor/device ids supported by this driver. */
+ pci_device_id_t supported_devices[];
+} pci_device_registration_t;
+
+/* Pool of PCI devices. */
+typedef struct
+{
+ vlib_main_t *vlib_main;
+ vlib_pci_device_t *pci_devs;
+ pci_device_registration_t *pci_device_registrations;
+ uword *pci_dev_index_by_pci_addr;
+} vlib_pci_main_t;
+
+extern vlib_pci_main_t pci_main;
+
+#define PCI_REGISTER_DEVICE(x,...) \
+ __VA_ARGS__ pci_device_registration_t x; \
+static void __vlib_add_pci_device_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_pci_device_registration_##x (void) \
+{ \
+ vlib_pci_main_t * pm = &pci_main; \
+ x.next_registration = pm->pci_device_registrations; \
+ pm->pci_device_registrations = &x; \
+} \
+__VA_ARGS__ pci_device_registration_t x
+
+clib_error_t *vlib_pci_bind_to_uio (vlib_pci_device_t * d,
+ char *uio_driver_name);
+
+/* Configuration space read/write. */
+clib_error_t *vlib_pci_read_write_config (vlib_pci_device_t * dev,
+ vlib_read_or_write_t read_or_write,
+ uword address,
+ void *data, u32 n_bytes);
+
+#define _(t) \
+static inline clib_error_t * \
+vlib_pci_read_config_##t (vlib_pci_device_t * dev, \
+ uword address, t * data) \
+{ \
+ return vlib_pci_read_write_config (dev, VLIB_READ,address, data, \
+ sizeof (data[0])); \
+}
+
+_(u32);
+_(u16);
+_(u8);
+
+#undef _
+
+#define _(t) \
+static inline clib_error_t * \
+vlib_pci_write_config_##t (vlib_pci_device_t * dev, uword address, \
+ t * data) \
+{ \
+ return vlib_pci_read_write_config (dev, VLIB_WRITE, \
+ address, data, sizeof (data[0])); \
+}
+
+_(u32);
+_(u16);
+_(u8);
+
+#undef _
+
+static inline clib_error_t *
+vlib_pci_intr_enable (vlib_pci_device_t * dev)
+{
+ u16 command;
+ clib_error_t *err;
+
+ err = vlib_pci_read_config_u16 (dev, 4, &command);
+
+ if (err)
+ return err;
+
+ command &= ~PCI_COMMAND_INTX_DISABLE;
+
+ return vlib_pci_write_config_u16 (dev, 4, &command);
+}
+
+static inline clib_error_t *
+vlib_pci_intr_disable (vlib_pci_device_t * dev)
+{
+ u16 command;
+ clib_error_t *err;
+
+ err = vlib_pci_read_config_u16 (dev, 4, &command);
+
+ if (err)
+ return err;
+
+ command |= PCI_COMMAND_INTX_DISABLE;
+
+ return vlib_pci_write_config_u16 (dev, 4, &command);
+}
+
+static inline clib_error_t *
+vlib_pci_bus_master_enable (vlib_pci_device_t * dev)
+{
+ clib_error_t *err;
+ u16 command;
+
+ /* Set bus master enable (BME) */
+ err = vlib_pci_read_config_u16 (dev, 4, &command);
+
+ if (err)
+ return err;
+
+ if (!(command & PCI_COMMAND_BUS_MASTER))
+ return 0;
+
+ command |= PCI_COMMAND_BUS_MASTER;
+
+ return vlib_pci_write_config_u16 (dev, 4, &command);
+}
+
+clib_error_t *vlib_pci_map_resource (vlib_pci_device_t * dev, u32 resource,
+ void **result);
+
+clib_error_t *vlib_pci_map_resource_fixed (vlib_pci_device_t * dev,
+ u32 resource, u8 * addr,
+ void **result);
+
+vlib_pci_device_t *vlib_get_pci_device (vlib_pci_addr_t * addr);
+/* Free's device. */
+void vlib_pci_free_device (vlib_pci_device_t * dev);
+
+unformat_function_t unformat_vlib_pci_addr;
+format_function_t format_vlib_pci_addr;
+format_function_t format_vlib_pci_handle;
+format_function_t format_vlib_pci_link_speed;
+format_function_t format_vlib_pci_vpd;
+
+#endif /* included_vlib_pci_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/pci/pci_config.h b/src/vlib/pci/pci_config.h
new file mode 100644
index 00000000000..92e56af6d57
--- /dev/null
+++ b/src/vlib/pci/pci_config.h
@@ -0,0 +1,731 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pci.h: PCI definitions.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_pci_config_h
+#define included_vlib_pci_config_h
+
+#include <vppinfra/byte_order.h>
+#include <vppinfra/error.h>
+
+typedef enum
+{
+ PCI_CLASS_NOT_DEFINED = 0x0000,
+ PCI_CLASS_NOT_DEFINED_VGA = 0x0001,
+
+ PCI_CLASS_STORAGE_SCSI = 0x0100,
+ PCI_CLASS_STORAGE_IDE = 0x0101,
+ PCI_CLASS_STORAGE_FLOPPY = 0x0102,
+ PCI_CLASS_STORAGE_IPI = 0x0103,
+ PCI_CLASS_STORAGE_RAID = 0x0104,
+ PCI_CLASS_STORAGE_OTHER = 0x0180,
+ PCI_CLASS_STORAGE = 0x0100,
+
+ PCI_CLASS_NETWORK_ETHERNET = 0x0200,
+ PCI_CLASS_NETWORK_TOKEN_RING = 0x0201,
+ PCI_CLASS_NETWORK_FDDI = 0x0202,
+ PCI_CLASS_NETWORK_ATM = 0x0203,
+ PCI_CLASS_NETWORK_OTHER = 0x0280,
+ PCI_CLASS_NETWORK = 0x0200,
+
+ PCI_CLASS_DISPLAY_VGA = 0x0300,
+ PCI_CLASS_DISPLAY_XGA = 0x0301,
+ PCI_CLASS_DISPLAY_3D = 0x0302,
+ PCI_CLASS_DISPLAY_OTHER = 0x0380,
+ PCI_CLASS_DISPLAY = 0x0300,
+
+ PCI_CLASS_MULTIMEDIA_VIDEO = 0x0400,
+ PCI_CLASS_MULTIMEDIA_AUDIO = 0x0401,
+ PCI_CLASS_MULTIMEDIA_PHONE = 0x0402,
+ PCI_CLASS_MULTIMEDIA_OTHER = 0x0480,
+ PCI_CLASS_MULTIMEDIA = 0x0400,
+
+ PCI_CLASS_MEMORY_RAM = 0x0500,
+ PCI_CLASS_MEMORY_FLASH = 0x0501,
+ PCI_CLASS_MEMORY_OTHER = 0x0580,
+ PCI_CLASS_MEMORY = 0x0500,
+
+ PCI_CLASS_BRIDGE_HOST = 0x0600,
+ PCI_CLASS_BRIDGE_ISA = 0x0601,
+ PCI_CLASS_BRIDGE_EISA = 0x0602,
+ PCI_CLASS_BRIDGE_MC = 0x0603,
+ PCI_CLASS_BRIDGE_PCI = 0x0604,
+ PCI_CLASS_BRIDGE_PCMCIA = 0x0605,
+ PCI_CLASS_BRIDGE_NUBUS = 0x0606,
+ PCI_CLASS_BRIDGE_CARDBUS = 0x0607,
+ PCI_CLASS_BRIDGE_RACEWAY = 0x0608,
+ PCI_CLASS_BRIDGE_OTHER = 0x0680,
+ PCI_CLASS_BRIDGE = 0x0600,
+
+ PCI_CLASS_COMMUNICATION_SERIAL = 0x0700,
+ PCI_CLASS_COMMUNICATION_PARALLEL = 0x0701,
+ PCI_CLASS_COMMUNICATION_MULTISERIAL = 0x0702,
+ PCI_CLASS_COMMUNICATION_MODEM = 0x0703,
+ PCI_CLASS_COMMUNICATION_OTHER = 0x0780,
+ PCI_CLASS_COMMUNICATION = 0x0700,
+
+ PCI_CLASS_SYSTEM_PIC = 0x0800,
+ PCI_CLASS_SYSTEM_DMA = 0x0801,
+ PCI_CLASS_SYSTEM_TIMER = 0x0802,
+ PCI_CLASS_SYSTEM_RTC = 0x0803,
+ PCI_CLASS_SYSTEM_PCI_HOTPLUG = 0x0804,
+ PCI_CLASS_SYSTEM_OTHER = 0x0880,
+ PCI_CLASS_SYSTEM = 0x0800,
+
+ PCI_CLASS_INPUT_KEYBOARD = 0x0900,
+ PCI_CLASS_INPUT_PEN = 0x0901,
+ PCI_CLASS_INPUT_MOUSE = 0x0902,
+ PCI_CLASS_INPUT_SCANNER = 0x0903,
+ PCI_CLASS_INPUT_GAMEPORT = 0x0904,
+ PCI_CLASS_INPUT_OTHER = 0x0980,
+ PCI_CLASS_INPUT = 0x0900,
+
+ PCI_CLASS_DOCKING_GENERIC = 0x0a00,
+ PCI_CLASS_DOCKING_OTHER = 0x0a80,
+ PCI_CLASS_DOCKING = 0x0a00,
+
+ PCI_CLASS_PROCESSOR_386 = 0x0b00,
+ PCI_CLASS_PROCESSOR_486 = 0x0b01,
+ PCI_CLASS_PROCESSOR_PENTIUM = 0x0b02,
+ PCI_CLASS_PROCESSOR_ALPHA = 0x0b10,
+ PCI_CLASS_PROCESSOR_POWERPC = 0x0b20,
+ PCI_CLASS_PROCESSOR_MIPS = 0x0b30,
+ PCI_CLASS_PROCESSOR_CO = 0x0b40,
+ PCI_CLASS_PROCESSOR = 0x0b00,
+
+ PCI_CLASS_SERIAL_FIREWIRE = 0x0c00,
+ PCI_CLASS_SERIAL_ACCESS = 0x0c01,
+ PCI_CLASS_SERIAL_SSA = 0x0c02,
+ PCI_CLASS_SERIAL_USB = 0x0c03,
+ PCI_CLASS_SERIAL_FIBER = 0x0c04,
+ PCI_CLASS_SERIAL_SMBUS = 0x0c05,
+ PCI_CLASS_SERIAL = 0x0c00,
+
+ PCI_CLASS_INTELLIGENT_I2O = 0x0e00,
+ PCI_CLASS_INTELLIGENT = 0x0e00,
+
+ PCI_CLASS_SATELLITE_TV = 0x0f00,
+ PCI_CLASS_SATELLITE_AUDIO = 0x0f01,
+ PCI_CLASS_SATELLITE_VOICE = 0x0f03,
+ PCI_CLASS_SATELLITE_DATA = 0x0f04,
+ PCI_CLASS_SATELLITE = 0x0f00,
+
+ PCI_CLASS_CRYPT_NETWORK = 0x1000,
+ PCI_CLASS_CRYPT_ENTERTAINMENT = 0x1001,
+ PCI_CLASS_CRYPT_OTHER = 0x1080,
+ PCI_CLASS_CRYPT = 0x1000,
+
+ PCI_CLASS_SP_DPIO = 0x1100,
+ PCI_CLASS_SP_OTHER = 0x1180,
+ PCI_CLASS_SP = 0x1100,
+} pci_device_class_t;
+
+static inline pci_device_class_t
+pci_device_class_base (pci_device_class_t c)
+{
+ return c & ~0xff;
+}
+
+/*
+ * Under PCI, each device has 256 bytes of configuration address space,
+ * of which the first 64 bytes are standardized as follows:
+ */
+typedef struct
+{
+ u16 vendor_id;
+ u16 device_id;
+
+ u16 command;
+#define PCI_COMMAND_IO (1 << 0) /* Enable response in I/O space */
+#define PCI_COMMAND_MEMORY (1 << 1) /* Enable response in Memory space */
+#define PCI_COMMAND_BUS_MASTER (1 << 2) /* Enable bus mastering */
+#define PCI_COMMAND_SPECIAL (1 << 3) /* Enable response to special cycles */
+#define PCI_COMMAND_WRITE_INVALIDATE (1 << 4) /* Use memory write and invalidate */
+#define PCI_COMMAND_VGA_PALETTE_SNOOP (1 << 5)
+#define PCI_COMMAND_PARITY (1 << 6)
+#define PCI_COMMAND_WAIT (1 << 7) /* Enable address/data stepping */
+#define PCI_COMMAND_SERR (1 << 8) /* Enable SERR */
+#define PCI_COMMAND_BACK_TO_BACK_WRITE (1 << 9)
+#define PCI_COMMAND_INTX_DISABLE (1 << 10) /* INTx Emulation Disable */
+
+ u16 status;
+#define PCI_STATUS_INTX_PENDING (1 << 3)
+#define PCI_STATUS_CAPABILITY_LIST (1 << 4)
+#define PCI_STATUS_66MHZ (1 << 5) /* Support 66 Mhz PCI 2.1 bus */
+#define PCI_STATUS_UDF (1 << 6) /* Support User Definable Features (obsolete) */
+#define PCI_STATUS_BACK_TO_BACK_WRITE (1 << 7) /* Accept fast-back to back */
+#define PCI_STATUS_PARITY_ERROR (1 << 8) /* Detected parity error */
+#define PCI_STATUS_DEVSEL_GET(x) ((x >> 9) & 3) /* DEVSEL timing */
+#define PCI_STATUS_DEVSEL_FAST (0 << 9)
+#define PCI_STATUS_DEVSEL_MEDIUM (1 << 9)
+#define PCI_STATUS_DEVSEL_SLOW (2 << 9)
+#define PCI_STATUS_SIG_TARGET_ABORT (1 << 11) /* Set on target abort */
+#define PCI_STATUS_REC_TARGET_ABORT (1 << 12) /* Master ack of " */
+#define PCI_STATUS_REC_MASTER_ABORT (1 << 13) /* Set on master abort */
+#define PCI_STATUS_SIG_SYSTEM_ERROR (1 << 14) /* Set when we drive SERR */
+#define PCI_STATUS_DETECTED_PARITY_ERROR (1 << 15)
+
+ u8 revision_id;
+ u8 programming_interface_class; /* Reg. Level Programming Interface */
+
+ pci_device_class_t device_class:16;
+
+ u8 cache_size;
+ u8 latency_timer;
+
+ u8 header_type;
+#define PCI_HEADER_TYPE_NORMAL 0
+#define PCI_HEADER_TYPE_BRIDGE 1
+#define PCI_HEADER_TYPE_CARDBUS 2
+
+ u8 bist;
+#define PCI_BIST_CODE_MASK 0x0f /* Return result */
+#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */
+#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */
+} pci_config_header_t;
+
+/* Byte swap config header. */
+always_inline void
+pci_config_header_little_to_host (pci_config_header_t * r)
+{
+ if (!CLIB_ARCH_IS_BIG_ENDIAN)
+ return;
+#define _(f,t) r->f = clib_byte_swap_##t (r->f)
+ _(vendor_id, u16);
+ _(device_id, u16);
+ _(command, u16);
+ _(status, u16);
+ _(device_class, u16);
+#undef _
+}
+
+/* Header type 0 (normal devices) */
+typedef struct
+{
+ pci_config_header_t header;
+
+ /*
+ * Base addresses specify locations in memory or I/O space.
+ * Decoded size can be determined by writing a value of
+ * 0xffffffff to the register, and reading it back. Only
+ * 1 bits are decoded.
+ */
+ u32 base_address[6];
+
+ u16 cardbus_cis;
+
+ u16 subsystem_vendor_id;
+ u16 subsystem_id;
+
+ u32 rom_address;
+#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */
+#define PCI_ROM_ADDRESS_ENABLE 0x01
+#define PCI_ROM_ADDRESS_MASK (~0x7ffUL)
+
+ u8 first_capability_offset;
+ CLIB_PAD_FROM_TO (0x35, 0x3c);
+
+ u8 interrupt_line;
+ u8 interrupt_pin;
+ u8 min_grant;
+ u8 max_latency;
+
+ u8 capability_data[0];
+} pci_config_type0_regs_t;
+
+always_inline void
+pci_config_type0_little_to_host (pci_config_type0_regs_t * r)
+{
+ int i;
+ if (!CLIB_ARCH_IS_BIG_ENDIAN)
+ return;
+ pci_config_header_little_to_host (&r->header);
+#define _(f,t) r->f = clib_byte_swap_##t (r->f)
+ for (i = 0; i < ARRAY_LEN (r->base_address); i++)
+ _(base_address[i], u32);
+ _(cardbus_cis, u16);
+ _(subsystem_vendor_id, u16);
+ _(subsystem_id, u16);
+ _(rom_address, u32);
+#undef _
+}
+
+/* Header type 1 (PCI-to-PCI bridges) */
+typedef struct
+{
+ pci_config_header_t header;
+
+ u32 base_address[2];
+
+ /* Primary/secondary bus number. */
+ u8 primary_bus;
+ u8 secondary_bus;
+
+ /* Highest bus number behind the bridge */
+ u8 subordinate_bus;
+
+ u8 secondary_bus_latency_timer;
+
+ /* I/O range behind bridge. */
+ u8 io_base, io_limit;
+
+ /* Secondary status register, only bit 14 used */
+ u16 secondary_status;
+
+ /* Memory range behind bridge in units of 64k bytes. */
+ u16 memory_base, memory_limit;
+#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL
+#define PCI_MEMORY_RANGE_MASK (~0x0fUL)
+
+ u16 prefetchable_memory_base, prefetchable_memory_limit;
+#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL
+#define PCI_PREF_RANGE_TYPE_32 0x00
+#define PCI_PREF_RANGE_TYPE_64 0x01
+#define PCI_PREF_RANGE_MASK (~0x0fUL)
+
+ u32 prefetchable_memory_base_upper_32bits;
+ u32 prefetchable_memory_limit_upper_32bits;
+ u16 io_base_upper_16bits;
+ u16 io_limit_upper_16bits;
+
+ /* Same as for type 0. */
+ u8 capability_list_offset;
+ CLIB_PAD_FROM_TO (0x35, 0x37);
+
+ u32 rom_address;
+ CLIB_PAD_FROM_TO (0x3c, 0x3e);
+
+ u16 bridge_control;
+#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */
+#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */
+#define PCI_BRIDGE_CTL_NO_ISA 0x04 /* Disable bridging of ISA ports */
+#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */
+#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */
+#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */
+#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */
+
+ u8 capability_data[0];
+} pci_config_type1_regs_t;
+
+always_inline void
+pci_config_type1_little_to_host (pci_config_type1_regs_t * r)
+{
+ int i;
+ if (!CLIB_ARCH_IS_BIG_ENDIAN)
+ return;
+ pci_config_header_little_to_host (&r->header);
+#define _(f,t) r->f = clib_byte_swap_##t (r->f)
+ for (i = 0; i < ARRAY_LEN (r->base_address); i++)
+ _(base_address[i], u32);
+ _(secondary_status, u16);
+ _(memory_base, u16);
+ _(memory_limit, u16);
+ _(prefetchable_memory_base, u16);
+ _(prefetchable_memory_limit, u16);
+ _(prefetchable_memory_base_upper_32bits, u32);
+ _(prefetchable_memory_limit_upper_32bits, u32);
+ _(io_base_upper_16bits, u16);
+ _(io_limit_upper_16bits, u16);
+ _(rom_address, u32);
+ _(bridge_control, u16);
+#undef _
+}
+
+/* Capabilities. */
+typedef enum pci_capability_type
+{
+ /* Power Management */
+ PCI_CAP_ID_PM = 1,
+
+ /* Accelerated Graphics Port */
+ PCI_CAP_ID_AGP = 2,
+
+ /* Vital Product Data */
+ PCI_CAP_ID_VPD = 3,
+
+ /* Slot Identification */
+ PCI_CAP_ID_SLOTID = 4,
+
+ /* Message Signalled Interrupts */
+ PCI_CAP_ID_MSI = 5,
+
+ /* CompactPCI HotSwap */
+ PCI_CAP_ID_CHSWP = 6,
+
+ /* PCI-X */
+ PCI_CAP_ID_PCIX = 7,
+
+ /* Hypertransport. */
+ PCI_CAP_ID_HYPERTRANSPORT = 8,
+
+ /* PCI Standard Hot-Plug Controller */
+ PCI_CAP_ID_SHPC = 0xc,
+
+ /* PCI Express */
+ PCI_CAP_ID_PCIE = 0x10,
+
+ /* MSI-X */
+ PCI_CAP_ID_MSIX = 0x11,
+} pci_capability_type_t;
+
+/* Common header for capabilities. */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ enum pci_capability_type type:8;
+ u8 next_offset;}) pci_capability_regs_t;
+/* *INDENT-ON* */
+
+always_inline void *
+pci_config_find_capability (pci_config_type0_regs_t * t, int cap_type)
+{
+ pci_capability_regs_t *c;
+ u32 next_offset;
+ u32 ttl = 48;
+
+ if (!(t->header.status & PCI_STATUS_CAPABILITY_LIST))
+ return 0;
+
+ next_offset = t->first_capability_offset;
+ while (ttl-- && next_offset >= 0x40)
+ {
+ c = (void *) t + (next_offset & ~3);
+ if ((u8) c->type == 0xff)
+ break;
+ if (c->type == cap_type)
+ return c;
+ next_offset = c->next_offset;
+ }
+ return 0;
+}
+
+/* Power Management Registers */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u16 capabilities;
+#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */
+#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */
+#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */
+#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization */
+#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxilliary power support mask */
+#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */
+#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */
+#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */
+#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */
+#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */
+#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */
+#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */
+#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */
+#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */
+ u16 control;
+#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */
+#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */
+#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */
+#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */
+#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */
+ u8 extensions;
+#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */
+#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??) */
+ u8 data;}) pci_power_management_regs_t;
+/* *INDENT-ON* */
+
+/* AGP registers */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u8 version;
+ u8 rest_of_capability_flags; u32 status; u32 command;
+ /* Command & status common bits. */
+#define PCI_AGP_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */
+#define PCI_AGP_SBA 0x0200 /* Sideband addressing supported */
+#define PCI_AGP_64BIT 0x0020 /* 64-bit addressing supported */
+#define PCI_AGP_ALLOW_TRANSACTIONS 0x0100 /* Allow processing of AGP transactions */
+#define PCI_AGP_FW 0x0010 /* FW transfers supported/forced */
+#define PCI_AGP_RATE4 0x0004 /* 4x transfer rate supported */
+#define PCI_AGP_RATE2 0x0002 /* 2x transfer rate supported */
+#define PCI_AGP_RATE1 0x0001 /* 1x transfer rate supported */
+ }) pci_agp_regs_t;
+/* *INDENT-ON* */
+
+/* Vital Product Data */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u16 address;
+#define PCI_VPD_ADDR_MASK 0x7fff /* Address mask */
+#define PCI_VPD_ADDR_F 0x8000 /* Write 0, 1 indicates completion */
+ u32 data;}) pci_vpd_regs_t;
+/* *INDENT-ON* */
+
+/* Slot Identification */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u8 esr;
+#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */
+#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */
+ u8 chassis;}) pci_sid_regs_t;
+/* *INDENT-ON* */
+
+/* Message Signalled Interrupts registers */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u16 flags;
+#define PCI_MSI_FLAGS_ENABLE (1 << 0) /* MSI feature enabled */
+#define PCI_MSI_FLAGS_GET_MAX_QUEUE_SIZE(x) ((x >> 1) & 0x7)
+#define PCI_MSI_FLAGS_MAX_QUEUE_SIZE(x) (((x) & 0x7) << 1)
+#define PCI_MSI_FLAGS_GET_QUEUE_SIZE(x) ((x >> 4) & 0x7)
+#define PCI_MSI_FLAGS_QUEUE_SIZE(x) (((x) & 0x7) << 4)
+#define PCI_MSI_FLAGS_64BIT (1 << 7) /* 64-bit addresses allowed */
+#define PCI_MSI_FLAGS_MASKBIT (1 << 8) /* 64-bit mask bits allowed */
+ u32 address; u32 data; u32 mask_bits;}) pci_msi32_regs_t;
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u16 flags;
+ u32 address[2];
+ u32 data; u32 mask_bits;}) pci_msi64_regs_t;
+/* *INDENT-ON* */
+
+/* CompactPCI Hotswap Register */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u16 control_status;
+#define PCI_CHSWP_DHA 0x01 /* Device Hiding Arm */
+#define PCI_CHSWP_EIM 0x02 /* ENUM# Signal Mask */
+#define PCI_CHSWP_PIE 0x04 /* Pending Insert or Extract */
+#define PCI_CHSWP_LOO 0x08 /* LED On / Off */
+#define PCI_CHSWP_PI 0x30 /* Programming Interface */
+#define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */
+#define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */
+ }) pci_chswp_regs_t;
+/* *INDENT-ON* */
+
+/* PCIX registers */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u16 command;
+#define PCIX_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */
+#define PCIX_CMD_ERO 0x0002 /* Enable Relaxed Ordering */
+#define PCIX_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */
+#define PCIX_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */
+#define PCIX_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */
+ u32 status;
+#define PCIX_STATUS_DEVFN 0x000000ff /* A copy of devfn */
+#define PCIX_STATUS_BUS 0x0000ff00 /* A copy of bus nr */
+#define PCIX_STATUS_64BIT 0x00010000 /* 64-bit device */
+#define PCIX_STATUS_133MHZ 0x00020000 /* 133 MHz capable */
+#define PCIX_STATUS_SPL_DISC 0x00040000 /* Split Completion Discarded */
+#define PCIX_STATUS_UNX_SPL 0x00080000 /* Unexpected Split Completion */
+#define PCIX_STATUS_COMPLEX 0x00100000 /* Device Complexity */
+#define PCIX_STATUS_MAX_READ 0x00600000 /* Designed Max Memory Read Count */
+#define PCIX_STATUS_MAX_SPLIT 0x03800000 /* Designed Max Outstanding Split Transactions */
+#define PCIX_STATUS_MAX_CUM 0x1c000000 /* Designed Max Cumulative Read Size */
+#define PCIX_STATUS_SPL_ERR 0x20000000 /* Rcvd Split Completion Error Msg */
+#define PCIX_STATUS_266MHZ 0x40000000 /* 266 MHz capable */
+#define PCIX_STATUS_533MHZ 0x80000000 /* 533 MHz capable */
+ }) pcix_config_regs_t;
+/* *INDENT-ON* */
+
+static inline int
+pcie_size_to_code (int bytes)
+{
+ ASSERT (is_pow2 (bytes));
+ ASSERT (bytes <= 4096);
+ return min_log2 (bytes) - 7;
+}
+
+static inline int
+pcie_code_to_size (int code)
+{
+ int size = 1 << (code + 7);
+ ASSERT (size <= 4096);
+ return size;
+}
+
+/* PCI Express capability registers */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pci_capability_regs_t header; u16 pcie_capabilities;
+#define PCIE_CAP_VERSION(x) (((x) >> 0) & 0xf)
+#define PCIE_CAP_DEVICE_TYPE(x) (((x) >> 4) & 0xf)
+#define PCIE_DEVICE_TYPE_ENDPOINT 0
+#define PCIE_DEVICE_TYPE_LEGACY_ENDPOINT 1
+#define PCIE_DEVICE_TYPE_ROOT_PORT 4
+ /* Upstream/downstream port of PCI Express switch. */
+#define PCIE_DEVICE_TYPE_SWITCH_UPSTREAM 5
+#define PCIE_DEVICE_TYPE_SWITCH_DOWNSTREAM 6
+#define PCIE_DEVICE_TYPE_PCIE_TO_PCI_BRIDGE 7
+#define PCIE_DEVICE_TYPE_PCI_TO_PCIE_BRIDGE 8
+ /* Root complex integrated endpoint. */
+#define PCIE_DEVICE_TYPE_ROOT_COMPLEX_ENDPOINT 9
+#define PCIE_DEVICE_TYPE_ROOT_COMPLEX_EVENT_COLLECTOR 10
+#define PCIE_CAP_SLOW_IMPLEMENTED (1 << 8)
+#define PCIE_CAP_MSI_IRQ(x) (((x) >> 9) & 0x1f)
+ u32 dev_capabilities;
+#define PCIE_DEVCAP_MAX_PAYLOAD(x) (128 << (((x) >> 0) & 0x7))
+#define PCIE_DEVCAP_PHANTOM_BITS(x) (((x) >> 3) & 0x3)
+#define PCIE_DEVCAP_EXTENTED_TAG (1 << 5)
+#define PCIE_DEVCAP_L0S 0x1c0 /* L0s Acceptable Latency */
+#define PCIE_DEVCAP_L1 0xe00 /* L1 Acceptable Latency */
+#define PCIE_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */
+#define PCIE_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */
+#define PCIE_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */
+#define PCIE_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */
+#define PCIE_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */
+ u16 dev_control;
+#define PCIE_CTRL_CERE 0x0001 /* Correctable Error Reporting En. */
+#define PCIE_CTRL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */
+#define PCIE_CTRL_FERE 0x0004 /* Fatal Error Reporting Enable */
+#define PCIE_CTRL_URRE 0x0008 /* Unsupported Request Reporting En. */
+#define PCIE_CTRL_RELAX_EN 0x0010 /* Enable relaxed ordering */
+#define PCIE_CTRL_MAX_PAYLOAD(n) (((n) & 7) << 5)
+#define PCIE_CTRL_EXT_TAG 0x0100 /* Extended Tag Field Enable */
+#define PCIE_CTRL_PHANTOM 0x0200 /* Phantom Functions Enable */
+#define PCIE_CTRL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */
+#define PCIE_CTRL_NOSNOOP_EN 0x0800 /* Enable No Snoop */
+#define PCIE_CTRL_MAX_READ_REQUEST(n) (((n) & 7) << 12)
+ u16 dev_status;
+#define PCIE_DEVSTA_AUXPD 0x10 /* AUX Power Detected */
+#define PCIE_DEVSTA_TRPND 0x20 /* Transactions Pending */
+ u32 link_capabilities; u16 link_control; u16 link_status;
+ u32 slot_capabilities;
+ u16 slot_control; u16 slot_status; u16 root_control;
+#define PCIE_RTCTL_SECEE 0x01 /* System Error on Correctable Error */
+#define PCIE_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */
+#define PCIE_RTCTL_SEFEE 0x04 /* System Error on Fatal Error */
+#define PCIE_RTCTL_PMEIE 0x08 /* PME Interrupt Enable */
+#define PCIE_RTCTL_CRSSVE 0x10 /* CRS Software Visibility Enable */
+ u16 root_capabilities;
+ u32 root_status;
+ u32 dev_capabilities2;
+ u16 dev_control2;
+ u16 dev_status2;
+ u32 link_capabilities2;
+ u16 link_control2;
+ u16 link_status2;
+ u32 slot_capabilities2; u16 slot_control2;
+ u16 slot_status2;}) pcie_config_regs_t;
+/* *INDENT-ON* */
+
+/* PCI express extended capabilities. */
+typedef enum pcie_capability_type
+{
+ PCIE_CAP_ADVANCED_ERROR = 1,
+ PCIE_CAP_VC = 2,
+ PCIE_CAP_DSN = 3,
+ PCIE_CAP_PWR = 4,
+} pcie_capability_type_t;
+
+/* Common header for capabilities. */
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+enum pcie_capability_type type:16; u16 version: 4; u16 next_capability:12;})
+ /* *INDENT-ON* */
+pcie_capability_regs_t;
+
+/* *INDENT-OFF* */
+typedef CLIB_PACKED (struct
+ {
+ pcie_capability_regs_t header; u32 uncorrectable_status;
+#define PCIE_ERROR_UNC_LINK_TRAINING (1 << 0)
+#define PCIE_ERROR_UNC_DATA_LINK_PROTOCOL (1 << 4)
+#define PCIE_ERROR_UNC_SURPRISE_DOWN (1 << 5)
+#define PCIE_ERROR_UNC_POISONED_TLP (1 << 12)
+#define PCIE_ERROR_UNC_FLOW_CONTROL (1 << 13)
+#define PCIE_ERROR_UNC_COMPLETION_TIMEOUT (1 << 14)
+#define PCIE_ERROR_UNC_COMPLETER_ABORT (1 << 15)
+#define PCIE_ERROR_UNC_UNEXPECTED_COMPLETION (1 << 16)
+#define PCIE_ERROR_UNC_RX_OVERFLOW (1 << 17)
+#define PCIE_ERROR_UNC_MALFORMED_TLP (1 << 18)
+#define PCIE_ERROR_UNC_CRC_ERROR (1 << 19)
+#define PCIE_ERROR_UNC_UNSUPPORTED_REQUEST (1 << 20)
+ u32 uncorrectable_mask;
+ u32 uncorrectable_severity; u32 correctable_status;
+#define PCIE_ERROR_COR_RX_ERROR (1 << 0)
+#define PCIE_ERROR_COR_BAD_TLP (1 << 6)
+#define PCIE_ERROR_COR_BAD_DLLP (1 << 7)
+#define PCIE_ERROR_COR_REPLAY_ROLLOVER (1 << 8)
+#define PCIE_ERROR_COR_REPLAY_TIMER (1 << 12)
+#define PCIE_ERROR_COR_ADVISORY (1 << 13)
+ u32 correctable_mask;
+ u32 control;
+ u32 log[4];
+ u32 root_command;
+ u32 root_status; u16 correctable_error_source;
+ u16 error_source;}) pcie_advanced_error_regs_t;
+/* *INDENT-ON* */
+
+/* Virtual Channel */
+#define PCI_VC_PORT_REG1 4
+#define PCI_VC_PORT_REG2 8
+#define PCI_VC_PORT_CTRL 12
+#define PCI_VC_PORT_STATUS 14
+#define PCI_VC_RES_CAP 16
+#define PCI_VC_RES_CTRL 20
+#define PCI_VC_RES_STATUS 26
+
+/* Power Budgeting */
+#define PCI_PWR_DSR 4 /* Data Select Register */
+#define PCI_PWR_DATA 8 /* Data Register */
+#define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */
+#define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */
+#define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */
+#define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */
+#define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */
+#define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */
+#define PCI_PWR_CAP 12 /* Capability */
+#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */
+
+#endif /* included_vlib_pci_config_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/physmem.h b/src/vlib/physmem.h
new file mode 100644
index 00000000000..9e7d52a6226
--- /dev/null
+++ b/src/vlib/physmem.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * physmem.h: virtual <-> physical memory mapping for VLIB buffers
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_physmem_h
+#define included_vlib_physmem_h
+
+typedef struct
+{
+ uword start, end, size;
+} vlib_physmem_region_t;
+
+typedef struct
+{
+ vlib_physmem_region_t virtual;
+
+ uword log2_n_bytes_per_page;
+
+ /* 1 << log2_n_bytes_per_page - 1. */
+ uword page_mask;
+
+ u64 *page_table;
+
+ /* is fake physmem */
+ u8 is_fake;
+} vlib_physmem_main_t;
+
+always_inline u64
+vlib_physmem_offset_to_physical (vlib_physmem_main_t * pm, uword o)
+{
+ uword page_index = o >> pm->log2_n_bytes_per_page;
+ ASSERT (o < pm->virtual.size);
+ ASSERT (pm->page_table[page_index] != 0);
+ return (vec_elt (pm->page_table, page_index) + (o & pm->page_mask));
+}
+
+always_inline int
+vlib_physmem_is_virtual (vlib_physmem_main_t * pm, uword p)
+{
+ return p >= pm->virtual.start && p < pm->virtual.end;
+}
+
+always_inline uword
+vlib_physmem_offset_of (vlib_physmem_main_t * pm, void *p)
+{
+ uword a = pointer_to_uword (p);
+ uword o;
+
+ ASSERT (vlib_physmem_is_virtual (pm, a));
+ o = a - pm->virtual.start;
+
+ /* Offset must fit in 32 bits. */
+ ASSERT ((uword) o == a - pm->virtual.start);
+
+ return o;
+}
+
+always_inline void *
+vlib_physmem_at_offset (vlib_physmem_main_t * pm, uword offset)
+{
+ ASSERT (offset < pm->virtual.size);
+ return uword_to_pointer (pm->virtual.start + offset, void *);
+}
+
+#endif /* included_vlib_physmem_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/threads.c b/src/vlib/threads.c
new file mode 100644
index 00000000000..c5e58bc001a
--- /dev/null
+++ b/src/vlib/threads.c
@@ -0,0 +1,1492 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#define _GNU_SOURCE
+
+#include <signal.h>
+#include <math.h>
+#include <vppinfra/format.h>
+#include <vlib/vlib.h>
+
+#include <vlib/threads.h>
+#include <vlib/unix/cj.h>
+
+
+#if DPDK==1
+#include <rte_config.h>
+#include <rte_common.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#endif
+DECLARE_CJ_GLOBAL_LOG;
+
+#define FRAME_QUEUE_NELTS 32
+
+
+#if DPDK==1
+/*
+ * Weak definitions of DPDK symbols used in this file.
+ * Needed for linking test programs without DPDK libs.
+ */
+unsigned __thread __attribute__ ((weak)) RTE_PER_LCORE (_lcore_id);
+struct lcore_config __attribute__ ((weak)) lcore_config[];
+unsigned __attribute__ ((weak)) rte_socket_id ();
+int __attribute__ ((weak)) rte_eal_remote_launch ();
+#endif
+u32
+vl (void *p)
+{
+ return vec_len (p);
+}
+
+vlib_worker_thread_t *vlib_worker_threads;
+vlib_thread_main_t vlib_thread_main;
+
+uword
+os_get_cpu_number (void)
+{
+ void *sp;
+ uword n;
+ u32 len;
+
+ len = vec_len (vlib_thread_stacks);
+ if (len == 0)
+ return 0;
+
+ /* Get any old stack address. */
+ sp = &sp;
+
+ n = ((uword) sp - (uword) vlib_thread_stacks[0])
+ >> VLIB_LOG2_THREAD_STACK_SIZE;
+
+ /* "processes" have their own stacks, and they always run in thread 0 */
+ n = n >= len ? 0 : n;
+
+ return n;
+}
+
+uword
+os_get_ncpus (void)
+{
+ u32 len;
+
+ len = vec_len (vlib_thread_stacks);
+ if (len == 0)
+ return 1;
+ else
+ return len;
+}
+
+void
+vlib_set_thread_name (char *name)
+{
+ int pthread_setname_np (pthread_t __target_thread, const char *__name);
+ int rv;
+ pthread_t thread = pthread_self ();
+
+ if (thread)
+ {
+ rv = pthread_setname_np (thread, name);
+ if (rv)
+ clib_warning ("pthread_setname_np returned %d", rv);
+ }
+}
+
+static int
+sort_registrations_by_no_clone (void *a0, void *a1)
+{
+ vlib_thread_registration_t **tr0 = a0;
+ vlib_thread_registration_t **tr1 = a1;
+
+ return ((i32) ((*tr0)->no_data_structure_clone)
+ - ((i32) ((*tr1)->no_data_structure_clone)));
+}
+
+static uword *
+vlib_sysfs_list_to_bitmap (char *filename)
+{
+ FILE *fp;
+ uword *r = 0;
+
+ fp = fopen (filename, "r");
+
+ if (fp != NULL)
+ {
+ u8 *buffer = 0;
+ vec_validate (buffer, 256 - 1);
+ if (fgets ((char *) buffer, 256, fp))
+ {
+ unformat_input_t in;
+ unformat_init_string (&in, (char *) buffer,
+ strlen ((char *) buffer));
+ if (unformat (&in, "%U", unformat_bitmap_list, &r) != 1)
+ clib_warning ("unformat_bitmap_list failed");
+ unformat_free (&in);
+ }
+ vec_free (buffer);
+ fclose (fp);
+ }
+ return r;
+}
+
+
+/* Called early in the init sequence */
+
+clib_error_t *
+vlib_thread_init (vlib_main_t * vm)
+{
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ vlib_worker_thread_t *w;
+ vlib_thread_registration_t *tr;
+ u32 n_vlib_mains = 1;
+ u32 first_index = 1;
+ u32 i;
+ uword *avail_cpu;
+
+ /* get bitmaps of active cpu cores and sockets */
+ tm->cpu_core_bitmap =
+ vlib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online");
+ tm->cpu_socket_bitmap =
+ vlib_sysfs_list_to_bitmap ("/sys/devices/system/node/online");
+
+ avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap);
+
+ /* skip cores */
+ for (i = 0; i < tm->skip_cores; i++)
+ {
+ uword c = clib_bitmap_first_set (avail_cpu);
+ if (c == ~0)
+ return clib_error_return (0, "no available cpus to skip");
+
+ avail_cpu = clib_bitmap_set (avail_cpu, c, 0);
+ }
+
+ /* grab cpu for main thread */
+ if (!tm->main_lcore)
+ {
+ tm->main_lcore = clib_bitmap_first_set (avail_cpu);
+ if (tm->main_lcore == (u8) ~ 0)
+ return clib_error_return (0, "no available cpus to be used for the"
+ " main thread");
+ }
+ else
+ {
+ if (clib_bitmap_get (avail_cpu, tm->main_lcore) == 0)
+ return clib_error_return (0, "cpu %u is not available to be used"
+ " for the main thread", tm->main_lcore);
+ }
+ avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0);
+
+ /* assume that there is socket 0 only if there is no data from sysfs */
+ if (!tm->cpu_socket_bitmap)
+ tm->cpu_socket_bitmap = clib_bitmap_set (0, 0, 1);
+
+ /* pin main thread to main_lcore */
+#if DPDK==0
+ {
+ cpu_set_t cpuset;
+ CPU_ZERO (&cpuset);
+ CPU_SET (tm->main_lcore, &cpuset);
+ pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset);
+ }
+#endif
+
+ /* as many threads as stacks... */
+ vec_validate_aligned (vlib_worker_threads, vec_len (vlib_thread_stacks) - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ /* Preallocate thread 0 */
+ _vec_len (vlib_worker_threads) = 1;
+ w = vlib_worker_threads;
+ w->thread_mheap = clib_mem_get_heap ();
+ w->thread_stack = vlib_thread_stacks[0];
+ w->lcore_id = tm->main_lcore;
+ w->lwp = syscall (SYS_gettid);
+ w->thread_id = pthread_self ();
+ tm->n_vlib_mains = 1;
+
+ if (tm->sched_policy != ~0)
+ {
+ struct sched_param sched_param;
+ if (!sched_getparam (w->lwp, &sched_param))
+ {
+ if (tm->sched_priority != ~0)
+ sched_param.sched_priority = tm->sched_priority;
+ sched_setscheduler (w->lwp, tm->sched_policy, &sched_param);
+ }
+ }
+
+ /* assign threads to cores and set n_vlib_mains */
+ tr = tm->next;
+
+ while (tr)
+ {
+ vec_add1 (tm->registrations, tr);
+ tr = tr->next;
+ }
+
+ vec_sort_with_function (tm->registrations, sort_registrations_by_no_clone);
+
+ for (i = 0; i < vec_len (tm->registrations); i++)
+ {
+ int j;
+ tr = tm->registrations[i];
+ tr->first_index = first_index;
+ first_index += tr->count;
+ n_vlib_mains += (tr->no_data_structure_clone == 0) ? tr->count : 0;
+
+ /* construct coremask */
+ if (tr->use_pthreads || !tr->count)
+ continue;
+
+ if (tr->coremask)
+ {
+ uword c;
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (c, tr->coremask, ({
+ if (clib_bitmap_get(avail_cpu, c) == 0)
+ return clib_error_return (0, "cpu %u is not available to be used"
+ " for the '%s' thread",c, tr->name);
+
+ avail_cpu = clib_bitmap_set(avail_cpu, c, 0);
+ }));
+/* *INDENT-ON* */
+
+ }
+ else
+ {
+ for (j = 0; j < tr->count; j++)
+ {
+ uword c = clib_bitmap_first_set (avail_cpu);
+ if (c == ~0)
+ return clib_error_return (0,
+ "no available cpus to be used for"
+ " the '%s' thread", tr->name);
+
+ avail_cpu = clib_bitmap_set (avail_cpu, c, 0);
+ tr->coremask = clib_bitmap_set (tr->coremask, c, 1);
+ }
+ }
+ }
+
+ clib_bitmap_free (avail_cpu);
+
+ tm->n_vlib_mains = n_vlib_mains;
+
+ vec_validate_aligned (vlib_worker_threads, first_index - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ return 0;
+}
+
+vlib_worker_thread_t *
+vlib_alloc_thread (vlib_main_t * vm)
+{
+ vlib_worker_thread_t *w;
+
+ if (vec_len (vlib_worker_threads) >= vec_len (vlib_thread_stacks))
+ {
+ clib_warning ("out of worker threads... Quitting...");
+ exit (1);
+ }
+ vec_add2 (vlib_worker_threads, w, 1);
+ w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
+ return w;
+}
+
+vlib_frame_queue_t *
+vlib_frame_queue_alloc (int nelts)
+{
+ vlib_frame_queue_t *fq;
+
+ fq = clib_mem_alloc_aligned (sizeof (*fq), CLIB_CACHE_LINE_BYTES);
+ memset (fq, 0, sizeof (*fq));
+ fq->nelts = nelts;
+ fq->vector_threshold = 128; // packets
+ vec_validate_aligned (fq->elts, nelts - 1, CLIB_CACHE_LINE_BYTES);
+
+ if (1)
+ {
+ if (((uword) & fq->tail) & (CLIB_CACHE_LINE_BYTES - 1))
+ fformat (stderr, "WARNING: fq->tail unaligned\n");
+ if (((uword) & fq->head) & (CLIB_CACHE_LINE_BYTES - 1))
+ fformat (stderr, "WARNING: fq->head unaligned\n");
+ if (((uword) fq->elts) & (CLIB_CACHE_LINE_BYTES - 1))
+ fformat (stderr, "WARNING: fq->elts unaligned\n");
+
+ if (sizeof (fq->elts[0]) % CLIB_CACHE_LINE_BYTES)
+ fformat (stderr, "WARNING: fq->elts[0] size %d\n",
+ sizeof (fq->elts[0]));
+ if (nelts & (nelts - 1))
+ {
+ fformat (stderr, "FATAL: nelts MUST be a power of 2\n");
+ abort ();
+ }
+ }
+
+ return (fq);
+}
+
+void vl_msg_api_handler_no_free (void *) __attribute__ ((weak));
+void
+vl_msg_api_handler_no_free (void *v)
+{
+}
+
+/* Turned off, save as reference material... */
+#if 0
+static inline int
+vlib_frame_queue_dequeue_internal (int thread_id,
+ vlib_main_t * vm, vlib_node_main_t * nm)
+{
+ vlib_frame_queue_t *fq = vlib_frame_queues[thread_id];
+ vlib_frame_queue_elt_t *elt;
+ vlib_frame_t *f;
+ vlib_pending_frame_t *p;
+ vlib_node_runtime_t *r;
+ u32 node_runtime_index;
+ int msg_type;
+ u64 before;
+ int processed = 0;
+
+ ASSERT (vm == vlib_mains[thread_id]);
+
+ while (1)
+ {
+ if (fq->head == fq->tail)
+ return processed;
+
+ elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1));
+
+ if (!elt->valid)
+ return processed;
+
+ before = clib_cpu_time_now ();
+
+ f = elt->frame;
+ node_runtime_index = elt->node_runtime_index;
+ msg_type = elt->msg_type;
+
+ switch (msg_type)
+ {
+ case VLIB_FRAME_QUEUE_ELT_FREE_BUFFERS:
+ vlib_buffer_free (vm, vlib_frame_vector_args (f), f->n_vectors);
+ /* note fallthrough... */
+ case VLIB_FRAME_QUEUE_ELT_FREE_FRAME:
+ r = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
+ node_runtime_index);
+ vlib_frame_free (vm, r, f);
+ break;
+ case VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME:
+ vec_add2 (vm->node_main.pending_frames, p, 1);
+ f->flags |= (VLIB_FRAME_PENDING | VLIB_FRAME_FREE_AFTER_DISPATCH);
+ p->node_runtime_index = elt->node_runtime_index;
+ p->frame_index = vlib_frame_index (vm, f);
+ p->next_frame_index = VLIB_PENDING_FRAME_NO_NEXT_FRAME;
+ fq->dequeue_vectors += (u64) f->n_vectors;
+ break;
+ case VLIB_FRAME_QUEUE_ELT_API_MSG:
+ vl_msg_api_handler_no_free (f);
+ break;
+ default:
+ clib_warning ("bogus frame queue message, type %d", msg_type);
+ break;
+ }
+ elt->valid = 0;
+ fq->dequeues++;
+ fq->dequeue_ticks += clib_cpu_time_now () - before;
+ CLIB_MEMORY_BARRIER ();
+ fq->head++;
+ processed++;
+ }
+ ASSERT (0);
+ return processed;
+}
+
+int
+vlib_frame_queue_dequeue (int thread_id,
+ vlib_main_t * vm, vlib_node_main_t * nm)
+{
+ return vlib_frame_queue_dequeue_internal (thread_id, vm, nm);
+}
+
+int
+vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
+ u32 frame_queue_index, vlib_frame_t * frame,
+ vlib_frame_queue_msg_type_t type)
+{
+ vlib_frame_queue_t *fq = vlib_frame_queues[frame_queue_index];
+ vlib_frame_queue_elt_t *elt;
+ u32 save_count;
+ u64 new_tail;
+ u64 before = clib_cpu_time_now ();
+
+ ASSERT (fq);
+
+ new_tail = __sync_add_and_fetch (&fq->tail, 1);
+
+ /* Wait until a ring slot is available */
+ while (new_tail >= fq->head + fq->nelts)
+ {
+ f64 b4 = vlib_time_now_ticks (vm, before);
+ vlib_worker_thread_barrier_check (vm, b4);
+ /* Bad idea. Dequeue -> enqueue -> dequeue -> trouble */
+ // vlib_frame_queue_dequeue (vm->cpu_index, vm, nm);
+ }
+
+ elt = fq->elts + (new_tail & (fq->nelts - 1));
+
+ /* this would be very bad... */
+ while (elt->valid)
+ {
+ }
+
+ /* Once we enqueue the frame, frame->n_vectors is owned elsewhere... */
+ save_count = frame->n_vectors;
+
+ elt->frame = frame;
+ elt->node_runtime_index = node_runtime_index;
+ elt->msg_type = type;
+ CLIB_MEMORY_BARRIER ();
+ elt->valid = 1;
+
+ return save_count;
+}
+#endif /* 0 */
+
+/* To be called by vlib worker threads upon startup */
+void
+vlib_worker_thread_init (vlib_worker_thread_t * w)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+
+ /*
+ * Note: disabling signals in worker threads as follows
+ * prevents the api post-mortem dump scheme from working
+ * {
+ * sigset_t s;
+ * sigfillset (&s);
+ * pthread_sigmask (SIG_SETMASK, &s, 0);
+ * }
+ */
+
+ clib_mem_set_heap (w->thread_mheap);
+
+ if (vec_len (tm->thread_prefix) && w->registration->short_name)
+ {
+ w->name = format (0, "%v_%s_%d%c", tm->thread_prefix,
+ w->registration->short_name, w->instance_id, '\0');
+ vlib_set_thread_name ((char *) w->name);
+ }
+
+ if (!w->registration->use_pthreads)
+ {
+
+ /* Initial barrier sync, for both worker and i/o threads */
+ clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1);
+
+ while (*vlib_worker_threads->wait_at_barrier)
+ ;
+
+ clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1);
+ }
+}
+
+void *
+vlib_worker_thread_bootstrap_fn (void *arg)
+{
+ void *rv;
+ vlib_worker_thread_t *w = arg;
+
+ w->lwp = syscall (SYS_gettid);
+ w->thread_id = pthread_self ();
+
+ rv = (void *) clib_calljmp
+ ((uword (*)(uword)) w->thread_function,
+ (uword) arg, w->thread_stack + VLIB_THREAD_STACK_SIZE);
+ /* NOTREACHED, we hope */
+ return rv;
+}
+
+static int
+vlib_launch_thread (void *fp, vlib_worker_thread_t * w, unsigned lcore_id)
+{
+ void *(*fp_arg) (void *) = fp;
+
+ w->lcore_id = lcore_id;
+#if DPDK==1
+ if (!w->registration->use_pthreads)
+ if (rte_eal_remote_launch) /* do we have dpdk linked */
+ return rte_eal_remote_launch (fp, (void *) w, lcore_id);
+ else
+ return -1;
+ else
+#endif
+ {
+ int ret;
+ pthread_t worker;
+ cpu_set_t cpuset;
+ CPU_ZERO (&cpuset);
+ CPU_SET (lcore_id, &cpuset);
+
+ ret = pthread_create (&worker, NULL /* attr */ , fp_arg, (void *) w);
+ if (ret == 0)
+ return pthread_setaffinity_np (worker, sizeof (cpu_set_t), &cpuset);
+ else
+ return ret;
+ }
+}
+
+static clib_error_t *
+start_workers (vlib_main_t * vm)
+{
+ int i, j;
+ vlib_worker_thread_t *w;
+ vlib_main_t *vm_clone;
+ void *oldheap;
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ vlib_thread_registration_t *tr;
+ vlib_node_runtime_t *rt;
+ u32 n_vlib_mains = tm->n_vlib_mains;
+ u32 worker_thread_index;
+ u8 *main_heap = clib_mem_get_per_cpu_heap ();
+ mheap_t *main_heap_header = mheap_header (main_heap);
+
+ vec_reset_length (vlib_worker_threads);
+
+ /* Set up the main thread */
+ vec_add2_aligned (vlib_worker_threads, w, 1, CLIB_CACHE_LINE_BYTES);
+ w->elog_track.name = "main thread";
+ elog_track_register (&vm->elog_main, &w->elog_track);
+
+ if (vec_len (tm->thread_prefix))
+ {
+ w->name = format (0, "%v_main%c", tm->thread_prefix, '\0');
+ vlib_set_thread_name ((char *) w->name);
+ }
+
+ /*
+ * Truth of the matter: we always use at least two
+ * threads. So, make the main heap thread-safe
+ * and make the event log thread-safe.
+ */
+ main_heap_header->flags |= MHEAP_FLAG_THREAD_SAFE;
+ vm->elog_main.lock =
+ clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES);
+ vm->elog_main.lock[0] = 0;
+
+ if (n_vlib_mains > 1)
+ {
+ vec_validate (vlib_mains, tm->n_vlib_mains - 1);
+ _vec_len (vlib_mains) = 0;
+ vec_add1 (vlib_mains, vm);
+
+ vlib_worker_threads->wait_at_barrier =
+ clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
+ vlib_worker_threads->workers_at_barrier =
+ clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
+
+ /* Ask for an initial barrier sync */
+ *vlib_worker_threads->workers_at_barrier = 0;
+ *vlib_worker_threads->wait_at_barrier = 1;
+
+ worker_thread_index = 1;
+
+ for (i = 0; i < vec_len (tm->registrations); i++)
+ {
+ vlib_node_main_t *nm, *nm_clone;
+ vlib_buffer_main_t *bm_clone;
+ vlib_buffer_free_list_t *fl_clone, *fl_orig;
+ vlib_buffer_free_list_t *orig_freelist_pool;
+ int k;
+
+ tr = tm->registrations[i];
+
+ if (tr->count == 0)
+ continue;
+
+ for (k = 0; k < tr->count; k++)
+ {
+ vec_add2 (vlib_worker_threads, w, 1);
+ if (tr->mheap_size)
+ w->thread_mheap =
+ mheap_alloc (0 /* use VM */ , tr->mheap_size);
+ else
+ w->thread_mheap = main_heap;
+ w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
+ w->thread_function = tr->function;
+ w->thread_function_arg = w;
+ w->instance_id = k;
+ w->registration = tr;
+
+ w->elog_track.name =
+ (char *) format (0, "%s %d", tr->name, k + 1);
+ vec_add1 (w->elog_track.name, 0);
+ elog_track_register (&vm->elog_main, &w->elog_track);
+
+ if (tr->no_data_structure_clone)
+ continue;
+
+ /* Fork vlib_global_main et al. Look for bugs here */
+ oldheap = clib_mem_set_heap (w->thread_mheap);
+
+ vm_clone = clib_mem_alloc (sizeof (*vm_clone));
+ clib_memcpy (vm_clone, vlib_mains[0], sizeof (*vm_clone));
+
+ vm_clone->cpu_index = worker_thread_index;
+ vm_clone->heap_base = w->thread_mheap;
+ vm_clone->mbuf_alloc_list = 0;
+ memset (&vm_clone->random_buffer, 0,
+ sizeof (vm_clone->random_buffer));
+
+ nm = &vlib_mains[0]->node_main;
+ nm_clone = &vm_clone->node_main;
+ /* fork next frames array, preserving node runtime indices */
+ nm_clone->next_frames = vec_dup (nm->next_frames);
+ for (j = 0; j < vec_len (nm_clone->next_frames); j++)
+ {
+ vlib_next_frame_t *nf = &nm_clone->next_frames[j];
+ u32 save_node_runtime_index;
+ u32 save_flags;
+
+ save_node_runtime_index = nf->node_runtime_index;
+ save_flags = nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
+ vlib_next_frame_init (nf);
+ nf->node_runtime_index = save_node_runtime_index;
+ nf->flags = save_flags;
+ }
+
+ /* fork the frame dispatch queue */
+ nm_clone->pending_frames = 0;
+ vec_validate (nm_clone->pending_frames, 10); /* $$$$$?????? */
+ _vec_len (nm_clone->pending_frames) = 0;
+
+ /* fork nodes */
+ nm_clone->nodes = 0;
+ for (j = 0; j < vec_len (nm->nodes); j++)
+ {
+ vlib_node_t *n;
+ n = clib_mem_alloc_no_fail (sizeof (*n));
+ clib_memcpy (n, nm->nodes[j], sizeof (*n));
+ /* none of the copied nodes have enqueue rights given out */
+ n->owner_node_index = VLIB_INVALID_NODE_INDEX;
+ memset (&n->stats_total, 0, sizeof (n->stats_total));
+ memset (&n->stats_last_clear, 0,
+ sizeof (n->stats_last_clear));
+ vec_add1 (nm_clone->nodes, n);
+ }
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
+ vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
+
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
+ vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
+ vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+ rt->cpu_index = vm_clone->cpu_index;
+
+ nm_clone->processes = vec_dup (nm->processes);
+
+ /* zap the (per worker) frame freelists, etc */
+ nm_clone->frame_sizes = 0;
+ nm_clone->frame_size_hash = 0;
+
+ /* Packet trace buffers are guaranteed to be empty, nothing to do here */
+
+ clib_mem_set_heap (oldheap);
+ vec_add1 (vlib_mains, vm_clone);
+
+ vm_clone->error_main.counters =
+ vec_dup (vlib_mains[0]->error_main.counters);
+ vm_clone->error_main.counters_last_clear =
+ vec_dup (vlib_mains[0]->error_main.counters_last_clear);
+
+ /* Fork the vlib_buffer_main_t free lists, etc. */
+ bm_clone = vec_dup (vm_clone->buffer_main);
+ vm_clone->buffer_main = bm_clone;
+
+ orig_freelist_pool = bm_clone->buffer_free_list_pool;
+ bm_clone->buffer_free_list_pool = 0;
+
+ /* *INDENT-OFF* */
+ pool_foreach (fl_orig, orig_freelist_pool,
+ ({
+ pool_get_aligned (bm_clone->buffer_free_list_pool,
+ fl_clone, CLIB_CACHE_LINE_BYTES);
+ ASSERT (fl_orig - orig_freelist_pool
+ == fl_clone - bm_clone->buffer_free_list_pool);
+
+ fl_clone[0] = fl_orig[0];
+ fl_clone->aligned_buffers = 0;
+ fl_clone->unaligned_buffers = 0;
+ fl_clone->n_alloc = 0;
+ }));
+/* *INDENT-ON* */
+
+ worker_thread_index++;
+ }
+ }
+ }
+ else
+ {
+ /* only have non-data-structure copy threads to create... */
+ for (i = 0; i < vec_len (tm->registrations); i++)
+ {
+ tr = tm->registrations[i];
+
+ for (j = 0; j < tr->count; j++)
+ {
+ vec_add2 (vlib_worker_threads, w, 1);
+ if (tr->mheap_size)
+ w->thread_mheap =
+ mheap_alloc (0 /* use VM */ , tr->mheap_size);
+ else
+ w->thread_mheap = main_heap;
+ w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
+ w->thread_function = tr->function;
+ w->thread_function_arg = w;
+ w->instance_id = j;
+ w->elog_track.name =
+ (char *) format (0, "%s %d", tr->name, j + 1);
+ w->registration = tr;
+ vec_add1 (w->elog_track.name, 0);
+ elog_track_register (&vm->elog_main, &w->elog_track);
+ }
+ }
+ }
+
+ worker_thread_index = 1;
+
+ for (i = 0; i < vec_len (tm->registrations); i++)
+ {
+ int j;
+
+ tr = tm->registrations[i];
+
+ if (tr->use_pthreads || tm->use_pthreads)
+ {
+ for (j = 0; j < tr->count; j++)
+ {
+ w = vlib_worker_threads + worker_thread_index++;
+ if (vlib_launch_thread (vlib_worker_thread_bootstrap_fn, w, 0) <
+ 0)
+ clib_warning ("Couldn't start '%s' pthread ", tr->name);
+ }
+ }
+ else
+ {
+ uword c;
+ /* *INDENT-OFF* */
+ clib_bitmap_foreach (c, tr->coremask, ({
+ w = vlib_worker_threads + worker_thread_index++;
+ if (vlib_launch_thread (vlib_worker_thread_bootstrap_fn, w, c) < 0)
+ clib_warning ("Couldn't start DPDK lcore %d", c);
+
+ }));
+/* *INDENT-ON* */
+ }
+ }
+ vlib_worker_thread_barrier_sync (vm);
+ vlib_worker_thread_barrier_release (vm);
+ return 0;
+}
+
+VLIB_MAIN_LOOP_ENTER_FUNCTION (start_workers);
+
+void
+vlib_worker_thread_node_runtime_update (void)
+{
+ int i, j;
+ vlib_worker_thread_t *w;
+ vlib_main_t *vm;
+ vlib_node_main_t *nm, *nm_clone;
+ vlib_node_t **old_nodes_clone;
+ vlib_main_t *vm_clone;
+ vlib_node_runtime_t *rt, *old_rt;
+ void *oldheap;
+ never_inline void
+ vlib_node_runtime_sync_stats (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ uword n_calls,
+ uword n_vectors, uword n_clocks);
+
+ ASSERT (os_get_cpu_number () == 0);
+
+ if (vec_len (vlib_mains) == 0)
+ return;
+
+ vm = vlib_mains[0];
+ nm = &vm->node_main;
+
+ ASSERT (os_get_cpu_number () == 0);
+ ASSERT (*vlib_worker_threads->wait_at_barrier == 1);
+
+ /*
+ * Scrape all runtime stats, so we don't lose node runtime(s) with
+ * pending counts, or throw away worker / io thread counts.
+ */
+ for (j = 0; j < vec_len (nm->nodes); j++)
+ {
+ vlib_node_t *n;
+ n = nm->nodes[j];
+ vlib_node_sync_stats (vm, n);
+ }
+
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ vlib_node_t *n;
+
+ vm_clone = vlib_mains[i];
+ nm_clone = &vm_clone->node_main;
+
+ for (j = 0; j < vec_len (nm_clone->nodes); j++)
+ {
+ n = nm_clone->nodes[j];
+
+ rt = vlib_node_get_runtime (vm_clone, n->index);
+ vlib_node_runtime_sync_stats (vm_clone, rt, 0, 0, 0);
+ }
+ }
+
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ vlib_node_runtime_t *rt;
+ w = vlib_worker_threads + i;
+ oldheap = clib_mem_set_heap (w->thread_mheap);
+
+ vm_clone = vlib_mains[i];
+
+ /* Re-clone error heap */
+ u64 *old_counters = vm_clone->error_main.counters;
+ u64 *old_counters_all_clear = vm_clone->error_main.counters_last_clear;
+ clib_memcpy (&vm_clone->error_main, &vm->error_main,
+ sizeof (vm->error_main));
+ j = vec_len (vm->error_main.counters) - 1;
+ vec_validate_aligned (old_counters, j, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (old_counters_all_clear, j, CLIB_CACHE_LINE_BYTES);
+ vm_clone->error_main.counters = old_counters;
+ vm_clone->error_main.counters_last_clear = old_counters_all_clear;
+
+ nm_clone = &vm_clone->node_main;
+ vec_free (nm_clone->next_frames);
+ nm_clone->next_frames = vec_dup (nm->next_frames);
+
+ for (j = 0; j < vec_len (nm_clone->next_frames); j++)
+ {
+ vlib_next_frame_t *nf = &nm_clone->next_frames[j];
+ u32 save_node_runtime_index;
+ u32 save_flags;
+
+ save_node_runtime_index = nf->node_runtime_index;
+ save_flags = nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
+ vlib_next_frame_init (nf);
+ nf->node_runtime_index = save_node_runtime_index;
+ nf->flags = save_flags;
+ }
+
+ old_nodes_clone = nm_clone->nodes;
+ nm_clone->nodes = 0;
+
+ /* re-fork nodes */
+ for (j = 0; j < vec_len (nm->nodes); j++)
+ {
+ vlib_node_t *old_n_clone;
+ vlib_node_t *new_n, *new_n_clone;
+
+ new_n = nm->nodes[j];
+ old_n_clone = old_nodes_clone[j];
+
+ new_n_clone = clib_mem_alloc_no_fail (sizeof (*new_n_clone));
+ clib_memcpy (new_n_clone, new_n, sizeof (*new_n));
+ /* none of the copied nodes have enqueue rights given out */
+ new_n_clone->owner_node_index = VLIB_INVALID_NODE_INDEX;
+
+ if (j >= vec_len (old_nodes_clone))
+ {
+ /* new node, set to zero */
+ memset (&new_n_clone->stats_total, 0,
+ sizeof (new_n_clone->stats_total));
+ memset (&new_n_clone->stats_last_clear, 0,
+ sizeof (new_n_clone->stats_last_clear));
+ }
+ else
+ {
+ /* Copy stats if the old data is valid */
+ clib_memcpy (&new_n_clone->stats_total,
+ &old_n_clone->stats_total,
+ sizeof (new_n_clone->stats_total));
+ clib_memcpy (&new_n_clone->stats_last_clear,
+ &old_n_clone->stats_last_clear,
+ sizeof (new_n_clone->stats_last_clear));
+
+ /* keep previous node state */
+ new_n_clone->state = old_n_clone->state;
+ }
+ vec_add1 (nm_clone->nodes, new_n_clone);
+ }
+ /* Free the old node clone */
+ for (j = 0; j < vec_len (old_nodes_clone); j++)
+ clib_mem_free (old_nodes_clone[j]);
+ vec_free (old_nodes_clone);
+
+ vec_free (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
+
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
+ vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
+
+ /* clone input node runtime */
+ old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT];
+
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
+ vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
+
+ vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+ {
+ rt->cpu_index = vm_clone->cpu_index;
+ }
+
+ for (j = 0; j < vec_len (old_rt); j++)
+ {
+ rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
+ rt->state = old_rt[j].state;
+ }
+
+ vec_free (old_rt);
+
+ nm_clone->processes = vec_dup (nm->processes);
+
+ clib_mem_set_heap (oldheap);
+
+ // vnet_main_fork_fixup (i);
+ }
+}
+
+u32
+unformat_sched_policy (unformat_input_t * input, va_list * args)
+{
+ u32 *r = va_arg (*args, u32 *);
+
+ if (0);
+#define _(v,f,s) else if (unformat (input, s)) *r = SCHED_POLICY_##f;
+ foreach_sched_policy
+#undef _
+ else
+ return 0;
+ return 1;
+}
+
+static clib_error_t *
+cpu_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ vlib_thread_registration_t *tr;
+ uword *p;
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ u8 *name;
+ u64 coremask;
+ uword *bitmap;
+ u32 count;
+
+ tm->thread_registrations_by_name = hash_create_string (0, sizeof (uword));
+
+ tm->n_thread_stacks = 1; /* account for main thread */
+ tm->sched_policy = ~0;
+ tm->sched_priority = ~0;
+
+ tr = tm->next;
+
+ while (tr)
+ {
+ hash_set_mem (tm->thread_registrations_by_name, tr->name, (uword) tr);
+ tr = tr->next;
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "use-pthreads"))
+ tm->use_pthreads = 1;
+ else if (unformat (input, "thread-prefix %v", &tm->thread_prefix))
+ ;
+ else if (unformat (input, "main-core %u", &tm->main_lcore))
+ ;
+ else if (unformat (input, "skip-cores %u", &tm->skip_cores))
+ ;
+ else if (unformat (input, "coremask-%s %llx", &name, &coremask))
+ {
+ p = hash_get_mem (tm->thread_registrations_by_name, name);
+ if (p == 0)
+ return clib_error_return (0, "no such thread type '%s'", name);
+
+ tr = (vlib_thread_registration_t *) p[0];
+
+ if (tr->use_pthreads)
+ return clib_error_return (0,
+ "coremask cannot be set for '%s' threads",
+ name);
+
+ tr->coremask = clib_bitmap_set_multiple
+ (tr->coremask, 0, coremask, BITS (coremask));
+ tr->count = clib_bitmap_count_set_bits (tr->coremask);
+ }
+ else if (unformat (input, "corelist-%s %U", &name, unformat_bitmap_list,
+ &bitmap))
+ {
+ p = hash_get_mem (tm->thread_registrations_by_name, name);
+ if (p == 0)
+ return clib_error_return (0, "no such thread type '%s'", name);
+
+ tr = (vlib_thread_registration_t *) p[0];
+
+ if (tr->use_pthreads)
+ return clib_error_return (0,
+ "corelist cannot be set for '%s' threads",
+ name);
+
+ tr->coremask = bitmap;
+ tr->count = clib_bitmap_count_set_bits (tr->coremask);
+ }
+ else
+ if (unformat
+ (input, "scheduler-policy %U", unformat_sched_policy,
+ &tm->sched_policy))
+ ;
+ else if (unformat (input, "scheduler-priority %u", &tm->sched_priority))
+ ;
+ else if (unformat (input, "%s %u", &name, &count))
+ {
+ p = hash_get_mem (tm->thread_registrations_by_name, name);
+ if (p == 0)
+ return clib_error_return (0, "no such thread type 3 '%s'", name);
+
+ tr = (vlib_thread_registration_t *) p[0];
+ if (tr->fixed_count)
+ return clib_error_return
+ (0, "number of %s threads not configurable", tr->name);
+ tr->count = count;
+ }
+ else
+ break;
+ }
+
+ if (tm->sched_priority != ~0)
+ {
+ if (tm->sched_policy == SCHED_FIFO || tm->sched_policy == SCHED_RR)
+ {
+ u32 prio_max = sched_get_priority_max (tm->sched_policy);
+ u32 prio_min = sched_get_priority_min (tm->sched_policy);
+ if (tm->sched_priority > prio_max)
+ tm->sched_priority = prio_max;
+ if (tm->sched_priority < prio_min)
+ tm->sched_priority = prio_min;
+ }
+ else
+ {
+ return clib_error_return
+ (0,
+ "scheduling priority (%d) is not allowed for `normal` scheduling policy",
+ tm->sched_priority);
+ }
+ }
+ tr = tm->next;
+
+ if (!tm->thread_prefix)
+ tm->thread_prefix = format (0, "vpp");
+
+ while (tr)
+ {
+ tm->n_thread_stacks += tr->count;
+ tm->n_pthreads += tr->count * tr->use_pthreads;
+ tm->n_eal_threads += tr->count * (tr->use_pthreads == 0);
+ tr = tr->next;
+ }
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (cpu_config, "cpu");
+
+#if !defined (__x86_64__) && !defined (__aarch64__) && !defined (__powerpc64__) && !defined(__arm__)
+void
+__sync_fetch_and_add_8 (void)
+{
+ fformat (stderr, "%s called\n", __FUNCTION__);
+ abort ();
+}
+
+void
+__sync_add_and_fetch_8 (void)
+{
+ fformat (stderr, "%s called\n", __FUNCTION__);
+ abort ();
+}
+#endif
+
+void vnet_main_fixup (vlib_fork_fixup_t which) __attribute__ ((weak));
+void
+vnet_main_fixup (vlib_fork_fixup_t which)
+{
+}
+
+void
+vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which)
+{
+ vlib_main_t *vm = vlib_get_main ();
+
+ if (vlib_mains == 0)
+ return;
+
+ ASSERT (os_get_cpu_number () == 0);
+ vlib_worker_thread_barrier_sync (vm);
+
+ switch (which)
+ {
+ case VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX:
+ vnet_main_fixup (VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX);
+ break;
+
+ default:
+ ASSERT (0);
+ }
+ vlib_worker_thread_barrier_release (vm);
+}
+
+void
+vlib_worker_thread_barrier_sync (vlib_main_t * vm)
+{
+ f64 deadline;
+ u32 count;
+
+ if (!vlib_mains)
+ return;
+
+ count = vec_len (vlib_mains) - 1;
+
+ /* Tolerate recursive calls */
+ if (++vlib_worker_threads[0].recursion_level > 1)
+ return;
+
+ vlib_worker_threads[0].barrier_sync_count++;
+
+ ASSERT (os_get_cpu_number () == 0);
+
+ deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+
+ *vlib_worker_threads->wait_at_barrier = 1;
+ while (*vlib_worker_threads->workers_at_barrier != count)
+ {
+ if (vlib_time_now (vm) > deadline)
+ {
+ fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
+ os_panic ();
+ }
+ }
+}
+
+void
+vlib_worker_thread_barrier_release (vlib_main_t * vm)
+{
+ f64 deadline;
+
+ if (!vlib_mains)
+ return;
+
+ if (--vlib_worker_threads[0].recursion_level > 0)
+ return;
+
+ deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+
+ *vlib_worker_threads->wait_at_barrier = 0;
+
+ while (*vlib_worker_threads->workers_at_barrier > 0)
+ {
+ if (vlib_time_now (vm) > deadline)
+ {
+ fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__);
+ os_panic ();
+ }
+ }
+}
+
+/*
+ * Check the frame queue to see if any frames are available.
+ * If so, pull the packets off the frames and put them to
+ * the handoff node.
+ */
+static inline int
+vlib_frame_queue_dequeue_internal (vlib_main_t * vm,
+ vlib_frame_queue_main_t * fqm)
+{
+ u32 thread_id = vm->cpu_index;
+ vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id];
+ vlib_frame_queue_elt_t *elt;
+ u32 *from, *to;
+ vlib_frame_t *f;
+ int msg_type;
+ int processed = 0;
+ u32 n_left_to_node;
+ u32 vectors = 0;
+
+ ASSERT (fq);
+ ASSERT (vm == vlib_mains[thread_id]);
+
+ if (PREDICT_FALSE (fqm->node_index == ~0))
+ return 0;
+ /*
+ * Gather trace data for frame queues
+ */
+ if (PREDICT_FALSE (fq->trace))
+ {
+ frame_queue_trace_t *fqt;
+ frame_queue_nelt_counter_t *fqh;
+ u32 elix;
+
+ fqt = &fqm->frame_queue_traces[thread_id];
+
+ fqt->nelts = fq->nelts;
+ fqt->head = fq->head;
+ fqt->head_hint = fq->head_hint;
+ fqt->tail = fq->tail;
+ fqt->threshold = fq->vector_threshold;
+ fqt->n_in_use = fqt->tail - fqt->head;
+ if (fqt->n_in_use >= fqt->nelts)
+ {
+ // if beyond max then use max
+ fqt->n_in_use = fqt->nelts - 1;
+ }
+
+ /* Record the number of elements in use in the histogram */
+ fqh = &fqm->frame_queue_histogram[thread_id];
+ fqh->count[fqt->n_in_use]++;
+
+ /* Record a snapshot of the elements in use */
+ for (elix = 0; elix < fqt->nelts; elix++)
+ {
+ elt = fq->elts + ((fq->head + 1 + elix) & (fq->nelts - 1));
+ if (1 || elt->valid)
+ {
+ fqt->n_vectors[elix] = elt->n_vectors;
+ }
+ }
+ fqt->written = 1;
+ }
+
+ while (1)
+ {
+ if (fq->head == fq->tail)
+ {
+ fq->head_hint = fq->head;
+ return processed;
+ }
+
+ elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1));
+
+ if (!elt->valid)
+ {
+ fq->head_hint = fq->head;
+ return processed;
+ }
+
+ from = elt->buffer_index;
+ msg_type = elt->msg_type;
+
+ ASSERT (msg_type == VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME);
+ ASSERT (elt->n_vectors <= VLIB_FRAME_SIZE);
+
+ f = vlib_get_frame_to_node (vm, fqm->node_index);
+
+ to = vlib_frame_vector_args (f);
+
+ n_left_to_node = elt->n_vectors;
+
+ while (n_left_to_node >= 4)
+ {
+ to[0] = from[0];
+ to[1] = from[1];
+ to[2] = from[2];
+ to[3] = from[3];
+ to += 4;
+ from += 4;
+ n_left_to_node -= 4;
+ }
+
+ while (n_left_to_node > 0)
+ {
+ to[0] = from[0];
+ to++;
+ from++;
+ n_left_to_node--;
+ }
+
+ vectors += elt->n_vectors;
+ f->n_vectors = elt->n_vectors;
+ vlib_put_frame_to_node (vm, fqm->node_index, f);
+
+ elt->valid = 0;
+ elt->n_vectors = 0;
+ elt->msg_type = 0xfefefefe;
+ CLIB_MEMORY_BARRIER ();
+ fq->head++;
+ processed++;
+
+ /*
+ * Limit the number of packets pushed into the graph
+ */
+ if (vectors >= fq->vector_threshold)
+ {
+ fq->head_hint = fq->head;
+ return processed;
+ }
+ }
+ ASSERT (0);
+ return processed;
+}
+
+static_always_inline void
+vlib_worker_thread_internal (vlib_main_t * vm)
+{
+ vlib_node_main_t *nm = &vm->node_main;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ u64 cpu_time_now = clib_cpu_time_now ();
+ vlib_frame_queue_main_t *fqm;
+
+ vec_alloc (nm->pending_interrupt_node_runtime_indices, 32);
+
+ while (1)
+ {
+ vlib_worker_thread_barrier_check ();
+
+ vec_foreach (fqm, tm->frame_queue_mains)
+ vlib_frame_queue_dequeue_internal (vm, fqm);
+
+ vlib_node_runtime_t *n;
+ vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+ {
+ cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
+ VLIB_NODE_STATE_POLLING, /* frame */ 0,
+ cpu_time_now);
+ }
+
+ /* Next handle interrupts. */
+ {
+ uword l = _vec_len (nm->pending_interrupt_node_runtime_indices);
+ uword i;
+ if (l > 0)
+ {
+ _vec_len (nm->pending_interrupt_node_runtime_indices) = 0;
+ for (i = 0; i < l; i++)
+ {
+ n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
+ nm->
+ pending_interrupt_node_runtime_indices
+ [i]);
+ cpu_time_now =
+ dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT,
+ VLIB_NODE_STATE_INTERRUPT,
+ /* frame */ 0,
+ cpu_time_now);
+ }
+ }
+ }
+
+ if (_vec_len (nm->pending_frames))
+ {
+ int i;
+ cpu_time_now = clib_cpu_time_now ();
+ for (i = 0; i < _vec_len (nm->pending_frames); i++)
+ {
+ vlib_pending_frame_t *p;
+
+ p = nm->pending_frames + i;
+
+ cpu_time_now = dispatch_pending_node (vm, p, cpu_time_now);
+ }
+ _vec_len (nm->pending_frames) = 0;
+ }
+ vlib_increment_main_loop_counter (vm);
+
+ /* Record time stamp in case there are no enabled nodes and above
+ calls do not update time stamp. */
+ cpu_time_now = clib_cpu_time_now ();
+ }
+}
+
+void
+vlib_worker_thread_fn (void *arg)
+{
+ vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg;
+ vlib_main_t *vm = vlib_get_main ();
+
+ ASSERT (vm->cpu_index == os_get_cpu_number ());
+
+ vlib_worker_thread_init (w);
+ clib_time_init (&vm->clib_time);
+ clib_mem_set_heap (w->thread_mheap);
+
+#if DPDK > 0
+ /* Wait until the dpdk init sequence is complete */
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ while (tm->worker_thread_release == 0)
+ vlib_worker_thread_barrier_check ();
+#endif
+
+ vlib_worker_thread_internal (vm);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_THREAD (worker_thread_reg, static) = {
+ .name = "workers",
+ .short_name = "wk",
+ .function = vlib_worker_thread_fn,
+};
+/* *INDENT-ON* */
+
+u32
+vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_frame_queue_main_t *fqm;
+ vlib_frame_queue_t *fq;
+ int i;
+
+ if (frame_queue_nelts == 0)
+ frame_queue_nelts = FRAME_QUEUE_NELTS;
+
+ vec_add2 (tm->frame_queue_mains, fqm, 1);
+
+ fqm->node_index = node_index;
+
+ vec_validate (fqm->vlib_frame_queues, tm->n_vlib_mains - 1);
+ _vec_len (fqm->vlib_frame_queues) = 0;
+ for (i = 0; i < tm->n_vlib_mains; i++)
+ {
+ fq = vlib_frame_queue_alloc (frame_queue_nelts);
+ vec_add1 (fqm->vlib_frame_queues, fq);
+ }
+
+ return (fqm - tm->frame_queue_mains);
+}
+
+clib_error_t *
+threads_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (threads_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/threads.h b/src/vlib/threads.h
new file mode 100644
index 00000000000..34ab5be8650
--- /dev/null
+++ b/src/vlib/threads.h
@@ -0,0 +1,470 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vlib_threads_h
+#define included_vlib_threads_h
+
+#include <vlib/main.h>
+#include <linux/sched.h>
+
+extern vlib_main_t **vlib_mains;
+
+void vlib_set_thread_name (char *name);
+
+/* arg is actually a vlib__thread_t * */
+typedef void (vlib_thread_function_t) (void *arg);
+
+typedef struct vlib_thread_registration_
+{
+ /* constructor generated list of thread registrations */
+ struct vlib_thread_registration_ *next;
+
+ /* config parameters */
+ char *name;
+ char *short_name;
+ vlib_thread_function_t *function;
+ uword mheap_size;
+ int fixed_count;
+ u32 count;
+ int no_data_structure_clone;
+ u32 frame_queue_nelts;
+
+ /* All threads of this type run on pthreads */
+ int use_pthreads;
+ u32 first_index;
+ uword *coremask;
+} vlib_thread_registration_t;
+
+/*
+ * Frames have their cpu / vlib_main_t index in the low-order N bits
+ * Make VLIB_MAX_CPUS a power-of-two, please...
+ */
+
+#ifndef VLIB_MAX_CPUS
+#define VLIB_MAX_CPUS 256
+#endif
+
+#if VLIB_MAX_CPUS > CLIB_MAX_MHEAPS
+#error Please increase number of per-cpu mheaps
+#endif
+
+#define VLIB_CPU_MASK (VLIB_MAX_CPUS - 1) /* 0x3f, max */
+#define VLIB_OFFSET_MASK (~VLIB_CPU_MASK)
+
+#define VLIB_LOG2_THREAD_STACK_SIZE (20)
+#define VLIB_THREAD_STACK_SIZE (1<<VLIB_LOG2_THREAD_STACK_SIZE)
+
+typedef enum
+{
+ VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME,
+} vlib_frame_queue_msg_type_t;
+
+typedef struct
+{
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ volatile u32 valid;
+ u32 msg_type;
+ u32 n_vectors;
+ u32 last_n_vectors;
+
+ /* 256 * 4 = 1024 bytes, even mult of cache line size */
+ u32 buffer_index[VLIB_FRAME_SIZE];
+}
+vlib_frame_queue_elt_t;
+
+typedef struct
+{
+ /* First cache line */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ volatile u32 *wait_at_barrier;
+ volatile u32 *workers_at_barrier;
+
+ /* Second Cache Line */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+ void *thread_mheap;
+ u8 *thread_stack;
+ void (*thread_function) (void *);
+ void *thread_function_arg;
+ i64 recursion_level;
+ elog_track_t elog_track;
+ u32 instance_id;
+ vlib_thread_registration_t *registration;
+ u8 *name;
+ u64 barrier_sync_count;
+
+ long lwp;
+ int lcore_id;
+ pthread_t thread_id;
+} vlib_worker_thread_t;
+
+extern vlib_worker_thread_t *vlib_worker_threads;
+
+typedef struct
+{
+ /* enqueue side */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+ volatile u64 tail;
+ u64 enqueues;
+ u64 enqueue_ticks;
+ u64 enqueue_vectors;
+ u32 enqueue_full_events;
+
+ /* dequeue side */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
+ volatile u64 head;
+ u64 dequeues;
+ u64 dequeue_ticks;
+ u64 dequeue_vectors;
+ u64 trace;
+ u64 vector_threshold;
+
+ /* dequeue hint to enqueue side */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
+ volatile u64 head_hint;
+
+ /* read-only, constant, shared */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline3);
+ vlib_frame_queue_elt_t *elts;
+ u32 nelts;
+}
+vlib_frame_queue_t;
+
+typedef struct
+{
+ u32 node_index;
+ vlib_frame_queue_t **vlib_frame_queues;
+
+ /* for frame queue tracing */
+ frame_queue_trace_t *frame_queue_traces;
+ frame_queue_nelt_counter_t *frame_queue_histogram;
+} vlib_frame_queue_main_t;
+
+/* Called early, in thread 0's context */
+clib_error_t *vlib_thread_init (vlib_main_t * vm);
+
+vlib_worker_thread_t *vlib_alloc_thread (vlib_main_t * vm);
+
+int vlib_frame_queue_enqueue (vlib_main_t * vm, u32 node_runtime_index,
+ u32 frame_queue_index, vlib_frame_t * frame,
+ vlib_frame_queue_msg_type_t type);
+
+int vlib_frame_queue_dequeue (int thread_id,
+ vlib_main_t * vm, vlib_node_main_t * nm);
+
+u64 dispatch_node (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_node_type_t type,
+ vlib_node_state_t dispatch_state,
+ vlib_frame_t * frame, u64 last_time_stamp);
+
+u64 dispatch_pending_node (vlib_main_t * vm,
+ vlib_pending_frame_t * p, u64 last_time_stamp);
+
+void vlib_worker_thread_node_runtime_update (void);
+
+void vlib_create_worker_threads (vlib_main_t * vm, int n,
+ void (*thread_function) (void *));
+
+void vlib_worker_thread_init (vlib_worker_thread_t * w);
+u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts);
+
+/* Check for a barrier sync request every 30ms */
+#define BARRIER_SYNC_DELAY (0.030000)
+
+#if CLIB_DEBUG > 0
+/* long barrier timeout, for gdb... */
+#define BARRIER_SYNC_TIMEOUT (600.1)
+#else
+#define BARRIER_SYNC_TIMEOUT (1.0)
+#endif
+
+void vlib_worker_thread_barrier_sync (vlib_main_t * vm);
+void vlib_worker_thread_barrier_release (vlib_main_t * vm);
+
+always_inline void
+vlib_smp_unsafe_warning (void)
+{
+ if (CLIB_DEBUG > 0)
+ {
+ if (os_get_cpu_number ())
+ fformat (stderr, "%s: SMP unsafe warning...\n", __FUNCTION__);
+ }
+}
+
+typedef enum
+{
+ VLIB_WORKER_THREAD_FORK_FIXUP_ILLEGAL = 0,
+ VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX,
+} vlib_fork_fixup_t;
+
+void vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which);
+
+static inline void
+vlib_worker_thread_barrier_check (void)
+{
+ if (PREDICT_FALSE (*vlib_worker_threads->wait_at_barrier))
+ {
+ clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1);
+ while (*vlib_worker_threads->wait_at_barrier)
+ ;
+ clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1);
+ }
+}
+
+#define foreach_vlib_main(body) \
+do { \
+ vlib_main_t ** __vlib_mains = 0, *this_vlib_main; \
+ int ii; \
+ \
+ if (vec_len (vlib_mains) == 0) \
+ vec_add1 (__vlib_mains, &vlib_global_main); \
+ else \
+ { \
+ for (ii = 0; ii < vec_len (vlib_mains); ii++) \
+ { \
+ this_vlib_main = vlib_mains[ii]; \
+ if (this_vlib_main) \
+ vec_add1 (__vlib_mains, this_vlib_main); \
+ } \
+ } \
+ \
+ for (ii = 0; ii < vec_len (__vlib_mains); ii++) \
+ { \
+ this_vlib_main = __vlib_mains[ii]; \
+ /* body uses this_vlib_main... */ \
+ (body); \
+ } \
+ vec_free (__vlib_mains); \
+} while (0);
+
+#define foreach_sched_policy \
+ _(SCHED_OTHER, OTHER, "other") \
+ _(SCHED_BATCH, BATCH, "batch") \
+ _(SCHED_IDLE, IDLE, "idle") \
+ _(SCHED_FIFO, FIFO, "fifo") \
+ _(SCHED_RR, RR, "rr")
+
+typedef enum
+{
+#define _(v,f,s) SCHED_POLICY_##f = v,
+ foreach_sched_policy
+#undef _
+ SCHED_POLICY_N,
+} sched_policy_t;
+
+typedef struct
+{
+ /* Link list of registrations, built by constructors */
+ vlib_thread_registration_t *next;
+
+ /* Vector of registrations, w/ non-data-structure clones at the top */
+ vlib_thread_registration_t **registrations;
+
+ uword *thread_registrations_by_name;
+
+ vlib_worker_thread_t *worker_threads;
+
+ /*
+ * Launch all threads as pthreads,
+ * not eal_rte_launch (strict affinity) threads
+ */
+ int use_pthreads;
+
+ /* Number of vlib_main / vnet_main clones */
+ u32 n_vlib_mains;
+
+ /* Number of thread stacks to create */
+ u32 n_thread_stacks;
+
+ /* Number of pthreads */
+ u32 n_pthreads;
+
+ /* Number of DPDK eal threads */
+ u32 n_eal_threads;
+
+ /* Number of cores to skip, must match the core mask */
+ u32 skip_cores;
+
+ /* Thread prefix name */
+ u8 *thread_prefix;
+
+ /* main thread lcore */
+ u8 main_lcore;
+
+ /* Bitmap of available CPU cores */
+ uword *cpu_core_bitmap;
+
+ /* Bitmap of available CPU sockets (NUMA nodes) */
+ uword *cpu_socket_bitmap;
+
+ /* Worker handoff queues */
+ vlib_frame_queue_main_t *frame_queue_mains;
+
+ /* worker thread initialization barrier */
+ volatile u32 worker_thread_release;
+
+ /* scheduling policy */
+ u32 sched_policy;
+
+ /* scheduling policy priority */
+ u32 sched_priority;
+
+} vlib_thread_main_t;
+
+extern vlib_thread_main_t vlib_thread_main;
+
+#define VLIB_REGISTER_THREAD(x,...) \
+ __VA_ARGS__ vlib_thread_registration_t x; \
+static void __vlib_add_thread_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_thread_registration_##x (void) \
+{ \
+ vlib_thread_main_t * tm = &vlib_thread_main; \
+ x.next = tm->next; \
+ tm->next = &x; \
+} \
+__VA_ARGS__ vlib_thread_registration_t x
+
+always_inline u32
+vlib_num_workers ()
+{
+ return vlib_thread_main.n_vlib_mains - 1;
+}
+
+always_inline u32
+vlib_get_worker_cpu_index (u32 worker_index)
+{
+ return worker_index + 1;
+}
+
+always_inline u32
+vlib_get_worker_index (u32 cpu_index)
+{
+ return cpu_index - 1;
+}
+
+always_inline u32
+vlib_get_current_worker_index ()
+{
+ return os_get_cpu_number () - 1;
+}
+
+always_inline vlib_main_t *
+vlib_get_worker_vlib_main (u32 worker_index)
+{
+ vlib_main_t *vm;
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ ASSERT (worker_index < tm->n_vlib_mains - 1);
+ vm = vlib_mains[worker_index + 1];
+ ASSERT (vm);
+ return vm;
+}
+
+static inline void
+vlib_put_frame_queue_elt (vlib_frame_queue_elt_t * hf)
+{
+ CLIB_MEMORY_BARRIER ();
+ hf->valid = 1;
+}
+
+static inline vlib_frame_queue_elt_t *
+vlib_get_frame_queue_elt (u32 frame_queue_index, u32 index)
+{
+ vlib_frame_queue_t *fq;
+ vlib_frame_queue_elt_t *elt;
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ vlib_frame_queue_main_t *fqm =
+ vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
+ u64 new_tail;
+
+ fq = fqm->vlib_frame_queues[index];
+ ASSERT (fq);
+
+ new_tail = __sync_add_and_fetch (&fq->tail, 1);
+
+ /* Wait until a ring slot is available */
+ while (new_tail >= fq->head_hint + fq->nelts)
+ vlib_worker_thread_barrier_check ();
+
+ elt = fq->elts + (new_tail & (fq->nelts - 1));
+
+ /* this would be very bad... */
+ while (elt->valid)
+ ;
+
+ elt->msg_type = VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME;
+ elt->last_n_vectors = elt->n_vectors = 0;
+
+ return elt;
+}
+
+static inline vlib_frame_queue_t *
+is_vlib_frame_queue_congested (u32 frame_queue_index,
+ u32 index,
+ u32 queue_hi_thresh,
+ vlib_frame_queue_t **
+ handoff_queue_by_worker_index)
+{
+ vlib_frame_queue_t *fq;
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ vlib_frame_queue_main_t *fqm =
+ vec_elt_at_index (tm->frame_queue_mains, frame_queue_index);
+
+ fq = handoff_queue_by_worker_index[index];
+ if (fq != (vlib_frame_queue_t *) (~0))
+ return fq;
+
+ fq = fqm->vlib_frame_queues[index];
+ ASSERT (fq);
+
+ if (PREDICT_FALSE (fq->tail >= (fq->head_hint + queue_hi_thresh)))
+ {
+ /* a valid entry in the array will indicate the queue has reached
+ * the specified threshold and is congested
+ */
+ handoff_queue_by_worker_index[index] = fq;
+ fq->enqueue_full_events++;
+ return fq;
+ }
+
+ return NULL;
+}
+
+static inline vlib_frame_queue_elt_t *
+vlib_get_worker_handoff_queue_elt (u32 frame_queue_index,
+ u32 vlib_worker_index,
+ vlib_frame_queue_elt_t **
+ handoff_queue_elt_by_worker_index)
+{
+ vlib_frame_queue_elt_t *elt;
+
+ if (handoff_queue_elt_by_worker_index[vlib_worker_index])
+ return handoff_queue_elt_by_worker_index[vlib_worker_index];
+
+ elt = vlib_get_frame_queue_elt (frame_queue_index, vlib_worker_index);
+
+ handoff_queue_elt_by_worker_index[vlib_worker_index] = elt;
+
+ return elt;
+}
+
+#endif /* included_vlib_threads_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c
new file mode 100644
index 00000000000..ee632279db5
--- /dev/null
+++ b/src/vlib/threads_cli.c
@@ -0,0 +1,579 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#define _GNU_SOURCE
+
+#include <vppinfra/format.h>
+#include <vlib/vlib.h>
+
+#include <vlib/threads.h>
+#include <vlib/unix/unix.h>
+
+#if DPDK==1
+#include <rte_config.h>
+#include <rte_common.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#endif
+
+static u8 *
+format_sched_policy_and_priority (u8 * s, va_list * args)
+{
+ long i = va_arg (*args, long);
+ struct sched_param sched_param;
+ u8 *t = 0;
+
+ switch (sched_getscheduler (i))
+ {
+#define _(v,f,str) case SCHED_POLICY_##f: t = (u8 *) str; break;
+ foreach_sched_policy
+#undef _
+ }
+ if (sched_getparam (i, &sched_param) == 0)
+ return format (s, "%s (%d)", t, sched_param.sched_priority);
+ else
+ return format (s, "%s (n/a)", t);
+}
+
+static clib_error_t *
+show_threads_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_worker_thread_t *w;
+ int i;
+
+ vlib_cli_output (vm, "%-7s%-20s%-12s%-8s%-25s%-7s%-7s%-7s%-10s",
+ "ID", "Name", "Type", "LWP", "Sched Policy (Priority)",
+ "lcore", "Core", "Socket", "State");
+
+#if !defined(__powerpc64__)
+ for (i = 0; i < vec_len (vlib_worker_threads); i++)
+ {
+ w = vlib_worker_threads + i;
+ u8 *line = NULL;
+
+ line = format (line, "%-7d%-20s%-12s%-8d",
+ i,
+ w->name ? w->name : (u8 *) "",
+ w->registration ? w->registration->name : "", w->lwp);
+
+ line = format (line, "%-25U", format_sched_policy_and_priority, w->lwp);
+
+ int lcore = -1;
+ cpu_set_t cpuset;
+ CPU_ZERO (&cpuset);
+ int ret = -1;
+
+ ret =
+ pthread_getaffinity_np (w->thread_id, sizeof (cpu_set_t), &cpuset);
+ if (!ret)
+ {
+ int c;
+ for (c = 0; c < CPU_SETSIZE; c++)
+ if (CPU_ISSET (c, &cpuset))
+ {
+ if (lcore > -1)
+ {
+ lcore = -2;
+ break;
+ }
+ lcore = c;
+ }
+ }
+ else
+ {
+ lcore = w->lcore_id;
+ }
+
+ if (lcore > -1)
+ {
+ const char *sys_cpu_path = "/sys/devices/system/cpu/cpu";
+ int socket_id = -1;
+ int core_id = -1;
+ u8 *p = 0;
+
+ p = format (p, "%s%u/topology/core_id%c", sys_cpu_path, lcore, 0);
+ vlib_sysfs_read ((char *) p, "%d", &core_id);
+
+ vec_reset_length (p);
+ p =
+ format (p,
+ "%s%u/topology/physical_package_id%c",
+ sys_cpu_path, lcore, 0);
+ vlib_sysfs_read ((char *) p, "%d", &socket_id);
+ vec_free (p);
+
+ line = format (line, "%-7u%-7u%-7u%", lcore, core_id, socket_id);
+#if DPDK==1
+ ASSERT (lcore <= RTE_MAX_LCORE);
+ switch (lcore_config[lcore].state)
+ {
+ case WAIT:
+ line = format (line, "wait");
+ break;
+ case RUNNING:
+ line = format (line, "running");
+ break;
+ case FINISHED:
+ line = format (line, "finished");
+ break;
+ default:
+ line = format (line, "unknown");
+ }
+#endif
+ }
+ else
+ {
+ line =
+ format (line, "%-7s%-7s%-7s%", (lcore == -2) ? "M" : "n/a", "n/a",
+ "n/a");
+ }
+
+ vlib_cli_output (vm, "%v", line);
+ vec_free (line);
+ }
+#endif
+
+ return 0;
+}
+
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_threads_command, static) = {
+ .path = "show threads",
+ .short_help = "Show threads",
+ .function = show_threads_fn,
+};
+/* *INDENT-ON* */
+
+/*
+ * Trigger threads to grab frame queue trace data
+ */
+static clib_error_t *
+trace_frame_queue (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ clib_error_t *error = NULL;
+ frame_queue_trace_t *fqt;
+ frame_queue_nelt_counter_t *fqh;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_frame_queue_main_t *fqm;
+ u32 num_fq;
+ u32 fqix;
+ u32 enable = 2;
+ u32 index = ~(u32) 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on"))
+ enable = 1;
+ else if (unformat (line_input, "off"))
+ enable = 0;
+ else if (unformat (line_input, "index %u"), &index)
+ ;
+ else
+ return clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
+
+ if (enable > 1)
+ return clib_error_return (0, "expecting on or off");
+
+ if (vec_len (tm->frame_queue_mains) == 0)
+ return clib_error_return (0, "no worker handoffs exist");
+
+ if (index > vec_len (tm->frame_queue_mains) - 1)
+ return clib_error_return (0,
+ "expecting valid worker handoff queue index");
+
+ fqm = vec_elt_at_index (tm->frame_queue_mains, index);
+
+ num_fq = vec_len (fqm->vlib_frame_queues);
+ if (num_fq == 0)
+ {
+ vlib_cli_output (vm, "No frame queues exist\n");
+ return error;
+ }
+
+ // Allocate storage for trace if necessary
+ vec_validate_aligned (fqm->frame_queue_traces, num_fq - 1,
+ CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (fqm->frame_queue_histogram, num_fq - 1,
+ CLIB_CACHE_LINE_BYTES);
+
+ for (fqix = 0; fqix < num_fq; fqix++)
+ {
+ fqt = &fqm->frame_queue_traces[fqix];
+ fqh = &fqm->frame_queue_histogram[fqix];
+
+ memset (fqt->n_vectors, 0xff, sizeof (fqt->n_vectors));
+ fqt->written = 0;
+ memset (fqh, 0, sizeof (*fqh));
+ fqm->vlib_frame_queues[fqix]->trace = enable;
+ }
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_trace_frame_queue,static) = {
+ .path = "trace frame-queue",
+ .short_help = "trace frame-queue (on|off)",
+ .function = trace_frame_queue,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * Adding two counters and compute percent of total
+ * Round up, e.g. 0.000001 => 1%
+ */
+static u32
+compute_percent (u64 * two_counters, u64 total)
+{
+ if (total == 0)
+ {
+ return 0;
+ }
+ else
+ {
+ return (((two_counters[0] + two_counters[1]) * 100) +
+ (total - 1)) / total;
+ }
+}
+
+/*
+ * Display frame queue trace data gathered by threads.
+ */
+static clib_error_t *
+show_frame_queue_internal (vlib_main_t * vm,
+ vlib_frame_queue_main_t * fqm, u32 histogram)
+{
+ clib_error_t *error = NULL;
+ frame_queue_trace_t *fqt;
+ frame_queue_nelt_counter_t *fqh;
+ u32 num_fq;
+ u32 fqix;
+
+ num_fq = vec_len (fqm->frame_queue_traces);
+ if (num_fq == 0)
+ {
+ vlib_cli_output (vm, "No trace data for frame queues\n");
+ return error;
+ }
+
+ if (histogram)
+ {
+ vlib_cli_output (vm, "0-1 2-3 4-5 6-7 8-9 10-11 12-13 14-15 "
+ "16-17 18-19 20-21 22-23 24-25 26-27 28-29 30-31\n");
+ }
+
+ for (fqix = 0; fqix < num_fq; fqix++)
+ {
+ fqt = &(fqm->frame_queue_traces[fqix]);
+
+ vlib_cli_output (vm, "Thread %d %v\n", fqix,
+ vlib_worker_threads[fqix].name);
+
+ if (fqt->written == 0)
+ {
+ vlib_cli_output (vm, " no trace data\n");
+ continue;
+ }
+
+ if (histogram)
+ {
+ fqh = &(fqm->frame_queue_histogram[fqix]);
+ u32 nelt;
+ u64 total = 0;
+
+ for (nelt = 0; nelt < FRAME_QUEUE_MAX_NELTS; nelt++)
+ {
+ total += fqh->count[nelt];
+ }
+
+ /*
+ * Print in pairs to condense the output.
+ * Allow entries with 0 counts to be clearly identified, by rounding up.
+ * Any non-zero value will be displayed as at least one percent. This
+ * also means the sum of percentages can be > 100, but that is fine. The
+ * histogram is counted from the last time "trace frame on" was issued.
+ */
+ vlib_cli_output (vm,
+ "%3d%% %3d%% %3d%% %3d%% %3d%% %3d%% %3d%% %3d%% "
+ "%3d%% %3d%% %3d%% %3d%% %3d%% %3d%% %3d%% %3d%%\n",
+ compute_percent (&fqh->count[0], total),
+ compute_percent (&fqh->count[2], total),
+ compute_percent (&fqh->count[4], total),
+ compute_percent (&fqh->count[6], total),
+ compute_percent (&fqh->count[8], total),
+ compute_percent (&fqh->count[10], total),
+ compute_percent (&fqh->count[12], total),
+ compute_percent (&fqh->count[14], total),
+ compute_percent (&fqh->count[16], total),
+ compute_percent (&fqh->count[18], total),
+ compute_percent (&fqh->count[20], total),
+ compute_percent (&fqh->count[22], total),
+ compute_percent (&fqh->count[24], total),
+ compute_percent (&fqh->count[26], total),
+ compute_percent (&fqh->count[28], total),
+ compute_percent (&fqh->count[30], total));
+ }
+ else
+ {
+ vlib_cli_output (vm,
+ " vector-threshold %d ring size %d in use %d\n",
+ fqt->threshold, fqt->nelts, fqt->n_in_use);
+ vlib_cli_output (vm, " head %12d head_hint %12d tail %12d\n",
+ fqt->head, fqt->head_hint, fqt->tail);
+ vlib_cli_output (vm,
+ " %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d\n",
+ fqt->n_vectors[0], fqt->n_vectors[1],
+ fqt->n_vectors[2], fqt->n_vectors[3],
+ fqt->n_vectors[4], fqt->n_vectors[5],
+ fqt->n_vectors[6], fqt->n_vectors[7],
+ fqt->n_vectors[8], fqt->n_vectors[9],
+ fqt->n_vectors[10], fqt->n_vectors[11],
+ fqt->n_vectors[12], fqt->n_vectors[13],
+ fqt->n_vectors[14], fqt->n_vectors[15]);
+
+ if (fqt->nelts > 16)
+ {
+ vlib_cli_output (vm,
+ " %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d %3d\n",
+ fqt->n_vectors[16], fqt->n_vectors[17],
+ fqt->n_vectors[18], fqt->n_vectors[19],
+ fqt->n_vectors[20], fqt->n_vectors[21],
+ fqt->n_vectors[22], fqt->n_vectors[23],
+ fqt->n_vectors[24], fqt->n_vectors[25],
+ fqt->n_vectors[26], fqt->n_vectors[27],
+ fqt->n_vectors[28], fqt->n_vectors[29],
+ fqt->n_vectors[30], fqt->n_vectors[31]);
+ }
+ }
+
+ }
+ return error;
+}
+
+static clib_error_t *
+show_frame_queue_trace (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_frame_queue_main_t *fqm;
+ clib_error_t *error;
+
+ vec_foreach (fqm, tm->frame_queue_mains)
+ {
+ vlib_cli_output (vm, "Worker handoff queue index %u (next node '%U'):",
+ fqm - tm->frame_queue_mains,
+ format_vlib_node_name, vm, fqm->node_index);
+ error = show_frame_queue_internal (vm, fqm, 0);
+ if (error)
+ return error;
+ }
+ return 0;
+}
+
+static clib_error_t *
+show_frame_queue_histogram (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_frame_queue_main_t *fqm;
+ clib_error_t *error;
+
+ vec_foreach (fqm, tm->frame_queue_mains)
+ {
+ vlib_cli_output (vm, "Worker handoff queue index %u (next node '%U'):",
+ fqm - tm->frame_queue_mains,
+ format_vlib_node_name, vm, fqm->node_index);
+ error = show_frame_queue_internal (vm, fqm, 1);
+ if (error)
+ return error;
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_show_frame_queue_trace,static) = {
+ .path = "show frame-queue",
+ .short_help = "show frame-queue trace",
+ .function = show_frame_queue_trace,
+};
+/* *INDENT-ON* */
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_show_frame_queue_histogram,static) = {
+ .path = "show frame-queue histogram",
+ .short_help = "show frame-queue histogram",
+ .function = show_frame_queue_histogram,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * Modify the number of elements on the frame_queues
+ */
+static clib_error_t *
+test_frame_queue_nelts (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_frame_queue_main_t *fqm;
+ clib_error_t *error = NULL;
+ u32 num_fq;
+ u32 fqix;
+ u32 nelts = 0;
+ u32 index = ~(u32) 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "nelts %u", &nelts))
+ ;
+ else if (unformat (line_input, "index %u", &index))
+ ;
+ else
+ return clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
+
+ if (index > vec_len (tm->frame_queue_mains) - 1)
+ return clib_error_return (0,
+ "expecting valid worker handoff queue index");
+
+ fqm = vec_elt_at_index (tm->frame_queue_mains, index);
+
+ if ((nelts != 4) && (nelts != 8) && (nelts != 16) && (nelts != 32))
+ {
+ return clib_error_return (0, "expecting 4,8,16,32");
+ }
+
+ num_fq = vec_len (fqm->vlib_frame_queues);
+ if (num_fq == 0)
+ {
+ vlib_cli_output (vm, "No frame queues exist\n");
+ return error;
+ }
+
+ for (fqix = 0; fqix < num_fq; fqix++)
+ {
+ fqm->vlib_frame_queues[fqix]->nelts = nelts;
+ }
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_test_frame_queue_nelts,static) = {
+ .path = "test frame-queue nelts",
+ .short_help = "test frame-queue nelts (4,8,16,32)",
+ .function = test_frame_queue_nelts,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * Modify the max number of packets pulled off the frame queues
+ */
+static clib_error_t *
+test_frame_queue_threshold (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vlib_thread_main_t *tm = vlib_get_thread_main ();
+ vlib_frame_queue_main_t *fqm;
+ clib_error_t *error = NULL;
+ u32 num_fq;
+ u32 fqix;
+ u32 threshold = ~(u32) 0;
+ u32 index = ~(u32) 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "threshold %u", &threshold))
+ ;
+ else if (unformat (line_input, "index %u", &index))
+ ;
+ else
+ return clib_error_return (0, "parse error: '%U'",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
+
+ if (index > vec_len (tm->frame_queue_mains) - 1)
+ return clib_error_return (0,
+ "expecting valid worker handoff queue index");
+
+ fqm = vec_elt_at_index (tm->frame_queue_mains, index);
+
+
+ if (threshold == ~(u32) 0)
+ {
+ vlib_cli_output (vm, "expecting threshold value\n");
+ return error;
+ }
+
+ if (threshold == 0)
+ threshold = ~0;
+
+ num_fq = vec_len (fqm->vlib_frame_queues);
+ if (num_fq == 0)
+ {
+ vlib_cli_output (vm, "No frame queues exist\n");
+ return error;
+ }
+
+ for (fqix = 0; fqix < num_fq; fqix++)
+ {
+ fqm->vlib_frame_queues[fqix]->vector_threshold = threshold;
+ }
+
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cmd_test_frame_queue_threshold,static) = {
+ .path = "test frame-queue threshold",
+ .short_help = "test frame-queue threshold N (0=no limit)",
+ .function = test_frame_queue_threshold,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/trace.c b/src/vlib/trace.c
new file mode 100644
index 00000000000..dcdb837f16c
--- /dev/null
+++ b/src/vlib/trace.c
@@ -0,0 +1,545 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * trace.c: VLIB trace buffer.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/threads.h>
+
+/* Helper function for nodes which only trace buffer data. */
+void
+vlib_trace_frame_buffers_only (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 * buffers,
+ uword n_buffers,
+ uword next_buffer_stride,
+ uword n_buffer_data_bytes_in_trace)
+{
+ u32 n_left, *from;
+
+ n_left = n_buffers;
+ from = buffers;
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t *b0, *b1;
+ u8 *t0, *t1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, n_buffer_data_bytes_in_trace);
+ clib_memcpy (t0, b0->data + b0->current_data,
+ n_buffer_data_bytes_in_trace);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t1 = vlib_add_trace (vm, node, b1, n_buffer_data_bytes_in_trace);
+ clib_memcpy (t1, b1->data + b1->current_data,
+ n_buffer_data_bytes_in_trace);
+ }
+ from += 2;
+ n_left -= 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t *b0;
+ u8 *t0;
+
+ bi0 = from[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, n_buffer_data_bytes_in_trace);
+ clib_memcpy (t0, b0->data + b0->current_data,
+ n_buffer_data_bytes_in_trace);
+ }
+ from += 1;
+ n_left -= 1;
+ }
+}
+
+/* Free up all trace buffer memory. */
+always_inline void
+clear_trace_buffer (void)
+{
+ int i;
+ vlib_trace_main_t *tm;
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main (
+ ({
+ void *mainheap;
+
+ tm = &this_vlib_main->trace_main;
+ mainheap = clib_mem_set_heap (this_vlib_main->heap_base);
+
+ tm->trace_active_hint = 0;
+
+ for (i = 0; i < vec_len (tm->trace_buffer_pool); i++)
+ if (! pool_is_free_index (tm->trace_buffer_pool, i))
+ vec_free (tm->trace_buffer_pool[i]);
+ pool_free (tm->trace_buffer_pool);
+ clib_mem_set_heap (mainheap);
+ }));
+ /* *INDENT-ON* */
+}
+
+static u8 *
+format_vlib_trace (u8 * s, va_list * va)
+{
+ vlib_main_t *vm = va_arg (*va, vlib_main_t *);
+ vlib_trace_header_t *h = va_arg (*va, vlib_trace_header_t *);
+ vlib_trace_header_t *e = vec_end (h);
+ vlib_node_t *node, *prev_node;
+ clib_time_t *ct = &vm->clib_time;
+ f64 t;
+
+ prev_node = 0;
+ while (h < e)
+ {
+ node = vlib_get_node (vm, h->node_index);
+
+ if (node != prev_node)
+ {
+ t =
+ (h->time - vm->cpu_time_main_loop_start) * ct->seconds_per_clock;
+ s =
+ format (s, "\n%U: %v", format_time_interval, "h:m:s:u", t,
+ node->name);
+ }
+ prev_node = node;
+
+ if (node->format_trace)
+ s = format (s, "\n %U", node->format_trace, vm, node, h->data);
+ else
+ s = format (s, "\n %U", node->format_buffer, h->data);
+
+ h = vlib_trace_header_next (h);
+ }
+
+ return s;
+}
+
+/* Root of all trace cli commands. */
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (trace_cli_command,static) = {
+ .path = "trace",
+ .short_help = "Packet tracer commands",
+};
+/* *INDENT-ON* */
+
+static int
+trace_cmp (void *a1, void *a2)
+{
+ vlib_trace_header_t **t1 = a1;
+ vlib_trace_header_t **t2 = a2;
+ i64 dt = t1[0]->time - t2[0]->time;
+ return dt < 0 ? -1 : (dt > 0 ? +1 : 0);
+}
+
+/*
+ * Return 1 if this packet passes the trace filter, or 0 otherwise
+ */
+u32
+filter_accept (vlib_trace_main_t * tm, vlib_trace_header_t * h)
+{
+ vlib_trace_header_t *e = vec_end (h);
+
+ if (tm->filter_flag == 0)
+ return 1;
+
+ if (tm->filter_flag == FILTER_FLAG_INCLUDE)
+ {
+ while (h < e)
+ {
+ if (h->node_index == tm->filter_node_index)
+ return 1;
+ h = vlib_trace_header_next (h);
+ }
+ return 0;
+ }
+ else /* FILTER_FLAG_EXCLUDE */
+ {
+ while (h < e)
+ {
+ if (h->node_index == tm->filter_node_index)
+ return 0;
+ h = vlib_trace_header_next (h);
+ }
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Remove traces from the trace buffer pool that don't pass the filter
+ */
+void
+trace_apply_filter (vlib_main_t * vm)
+{
+ vlib_trace_main_t *tm = &vm->trace_main;
+ vlib_trace_header_t **h;
+ vlib_trace_header_t ***traces_to_remove = 0;
+ u32 index;
+ u32 trace_index;
+ u32 n_accepted;
+
+ u32 accept;
+
+ if (tm->filter_flag == FILTER_FLAG_NONE)
+ return;
+
+ /*
+ * Ideally we would retain the first N traces that pass the filter instead
+ * of any N traces.
+ */
+ n_accepted = 0;
+ /* *INDENT-OFF* */
+ pool_foreach (h, tm->trace_buffer_pool,
+ ({
+ accept = filter_accept(tm, h[0]);
+
+ if ((n_accepted == tm->filter_count) || !accept)
+ vec_add1 (traces_to_remove, h);
+ else
+ n_accepted++;
+ }));
+ /* *INDENT-ON* */
+
+ /* remove all traces that we don't want to keep */
+ for (index = 0; index < vec_len (traces_to_remove); index++)
+ {
+ trace_index = traces_to_remove[index] - tm->trace_buffer_pool;
+ _vec_len (tm->trace_buffer_pool[trace_index]) = 0;
+ pool_put_index (tm->trace_buffer_pool, trace_index);
+ }
+
+ vec_free (traces_to_remove);
+}
+
+static clib_error_t *
+cli_show_trace_buffer (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_trace_main_t *tm;
+ vlib_trace_header_t **h, **traces;
+ u32 i, index = 0;
+ char *fmt;
+ u8 *s = 0;
+ u32 max;
+
+ /*
+ * By default display only this many traces. To display more, explicitly
+ * specify a max. This prevents unexpectedly huge outputs.
+ */
+ max = 50;
+ while (unformat_check_input (input) != (uword) UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "max %d", &max))
+ ;
+ else
+ return clib_error_create ("expected 'max COUNT', got `%U'",
+ format_unformat_error, input);
+ }
+
+
+ /* Get active traces from pool. */
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main (
+ ({
+ void *mainheap;
+
+ fmt = "------------------- Start of thread %d %s -------------------\n";
+ s = format (s, fmt, index, vlib_worker_threads[index].name);
+
+ tm = &this_vlib_main->trace_main;
+
+ mainheap = clib_mem_set_heap (this_vlib_main->heap_base);
+
+ trace_apply_filter(this_vlib_main);
+
+ traces = 0;
+ pool_foreach (h, tm->trace_buffer_pool,
+ ({
+ vec_add1 (traces, h[0]);
+ }));
+
+ if (vec_len (traces) == 0)
+ {
+ clib_mem_set_heap (mainheap);
+ s = format (s, "No packets in trace buffer\n");
+ goto done;
+ }
+
+ /* Sort them by increasing time. */
+ vec_sort_with_function (traces, trace_cmp);
+
+ for (i = 0; i < vec_len (traces); i++)
+ {
+ if (i == max)
+ {
+ vlib_cli_output (vm, "Limiting display to %d packets."
+ " To display more specify max.", max);
+ goto done;
+ }
+
+ clib_mem_set_heap (mainheap);
+
+ s = format (s, "Packet %d\n%U\n\n", i + 1,
+ format_vlib_trace, vm, traces[i]);
+
+ mainheap = clib_mem_set_heap (this_vlib_main->heap_base);
+ }
+
+ done:
+ vec_free (traces);
+ clib_mem_set_heap (mainheap);
+
+ index++;
+ }));
+ /* *INDENT-ON* */
+
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_trace_cli,static) = {
+ .path = "show trace",
+ .short_help = "Show trace buffer [max COUNT]",
+ .function = cli_show_trace_buffer,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+cli_add_trace_buffer (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unformat_input_t _line_input, *line_input = &_line_input;
+ vlib_trace_main_t *tm;
+ vlib_trace_node_t *tn;
+ u32 node_index, add;
+ u8 verbose = 0;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ while (unformat_check_input (line_input) != (uword) UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "%U %d",
+ unformat_vlib_node, vm, &node_index, &add))
+ ;
+ else if (unformat (line_input, "verbose"))
+ verbose = 1;
+ else
+ return clib_error_create ("expected NODE COUNT, got `%U'",
+ format_unformat_error, line_input);
+ }
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main ((
+ {
+ void *oldheap;
+ tm = &this_vlib_main->trace_main;
+ tm->trace_active_hint = 1;
+ tm->verbose = verbose;
+ oldheap =
+ clib_mem_set_heap (this_vlib_main->heap_base);
+ vec_validate (tm->nodes, node_index);
+ tn = tm->nodes + node_index;
+ tn->limit += add; clib_mem_set_heap (oldheap);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (add_trace_cli,static) = {
+ .path = "trace add",
+ .short_help = "Trace given number of packets",
+ .function = cli_add_trace_buffer,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * Configure a filter for packet traces.
+ *
+ * This supplements the packet trace feature so that only packets matching
+ * the filter are included in the trace. Currently the only filter is to
+ * keep packets that include a certain node in the trace or exclude a certain
+ * node in the trace.
+ *
+ * The count of traced packets in the "trace add" command is still used to
+ * create a certain number of traces. The "trace filter" command specifies
+ * how many of those packets should be retained in the trace.
+ *
+ * For example, 1Mpps of traffic is arriving and one of those packets is being
+ * dropped. To capture the trace for only that dropped packet, you can do:
+ * trace filter include error-drop 1
+ * trace add dpdk-input 1000000
+ * <wait one second>
+ * show trace
+ *
+ * Note that the filter could be implemented by capturing all traces and just
+ * reducing traces displayed by the "show trace" function. But that would
+ * require a lot of memory for storing the traces, making that infeasible.
+ *
+ * To remove traces from the trace pool that do not include a certain node
+ * requires that the trace be "complete" before applying the filter. To
+ * accomplish this, the trace pool is filtered upon each iteraction of the
+ * main vlib loop. Doing so keeps the number of allocated traces down to a
+ * reasonably low number. This requires that tracing for a buffer is not
+ * performed after the vlib main loop interation completes. i.e. you can't
+ * save away a buffer temporarily then inject it back into the graph and
+ * expect that the trace_index is still valid (such as a traffic manager might
+ * do). A new trace buffer should be allocated for those types of packets.
+ *
+ * The filter can be extended to support multiple nodes and other match
+ * criteria (e.g. input sw_if_index, mac address) but for now just checks if
+ * a specified node is in the trace or not in the trace.
+ */
+static clib_error_t *
+cli_filter_trace (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ vlib_trace_main_t *tm = &vm->trace_main;
+ u32 filter_node_index;
+ u32 filter_flag;
+ u32 filter_count;
+ void *mainheap;
+
+ if (unformat (input, "include %U %d",
+ unformat_vlib_node, vm, &filter_node_index, &filter_count))
+ {
+ filter_flag = FILTER_FLAG_INCLUDE;
+ }
+ else if (unformat (input, "exclude %U %d",
+ unformat_vlib_node, vm, &filter_node_index,
+ &filter_count))
+ {
+ filter_flag = FILTER_FLAG_EXCLUDE;
+ }
+ else if (unformat (input, "none"))
+ {
+ filter_flag = FILTER_FLAG_NONE;
+ filter_node_index = 0;
+ filter_count = 0;
+ }
+ else
+ return
+ clib_error_create
+ ("expected 'include NODE COUNT' or 'exclude NODE COUNT' or 'none', got `%U'",
+ format_unformat_error, input);
+
+ /* *INDENT-OFF* */
+ foreach_vlib_main (
+ ({
+ tm = &this_vlib_main->trace_main;
+ tm->filter_node_index = filter_node_index;
+ tm->filter_flag = filter_flag;
+ tm->filter_count = filter_count;
+
+ /*
+ * Clear the trace limits to stop any in-progress tracing
+ * Prevents runaway trace allocations when the filter changes (or is removed)
+ */
+ mainheap = clib_mem_set_heap (this_vlib_main->heap_base);
+ vec_free (tm->nodes);
+ clib_mem_set_heap (mainheap);
+ }));
+ /* *INDENT-ON* */
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (filter_trace_cli,static) = {
+ .path = "trace filter",
+ .short_help = "filter trace output - include NODE COUNT | exclude NODE COUNT | none",
+ .function = cli_filter_trace,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+cli_clear_trace_buffer (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ clear_trace_buffer ();
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (clear_trace_cli,static) = {
+ .path = "clear trace",
+ .short_help = "Clear trace buffer and free memory",
+ .function = cli_clear_trace_buffer,
+};
+/* *INDENT-ON* */
+
+/* Dummy function to get us linked in. */
+void
+vlib_trace_cli_reference (void)
+{
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/trace.h b/src/vlib/trace.h
new file mode 100644
index 00000000000..fc0fc5c8ed4
--- /dev/null
+++ b/src/vlib/trace.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * trace.h: VLIB trace buffer.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_trace_h
+#define included_vlib_trace_h
+
+#include <vppinfra/pool.h>
+
+typedef struct
+{
+ /* CPU time stamp trace was made. */
+ u64 time;
+
+ /* Node which generated this trace. */
+ u32 node_index;
+
+ /* Number of data words in this trace. */
+ u32 n_data;
+
+ /* Trace data follows. */
+ u8 data[0];
+} vlib_trace_header_t;
+
+typedef struct
+{
+ /* Current number of traces in buffer. */
+ u32 count;
+
+ /* Max. number of traces to be added to buffer. */
+ u32 limit;
+} vlib_trace_node_t;
+
+typedef struct
+{
+ /* Pool of trace buffers. */
+ vlib_trace_header_t **trace_buffer_pool;
+
+ u32 last_main_loop_count;
+ u32 filter_node_index;
+ u32 filter_flag;
+#define FILTER_FLAG_NONE 0
+#define FILTER_FLAG_INCLUDE 1
+#define FILTER_FLAG_EXCLUDE 2
+ u32 filter_count;
+
+ /* set on trace add, cleared on clear trace */
+ u32 trace_active_hint;
+
+ /* Per node trace counts. */
+ vlib_trace_node_t *nodes;
+
+ /* verbosity */
+ int verbose;
+} vlib_trace_main_t;
+
+#endif /* included_vlib_trace_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/trace_funcs.h b/src/vlib/trace_funcs.h
new file mode 100644
index 00000000000..5280eae9904
--- /dev/null
+++ b/src/vlib/trace_funcs.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * trace_funcs.h: VLIB trace buffer.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_trace_funcs_h
+#define included_vlib_trace_funcs_h
+
+always_inline void
+vlib_validate_trace (vlib_trace_main_t * tm, vlib_buffer_t * b)
+{
+ /*
+ * this assert seems right, but goes off constantly.
+ * disabling it appears to make the pain go away
+ */
+ ASSERT (1 || b->flags & VLIB_BUFFER_IS_TRACED);
+ ASSERT (!pool_is_free_index (tm->trace_buffer_pool, b->trace_index));
+}
+
+always_inline void *
+vlib_add_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * r, vlib_buffer_t * b, u32 n_data_bytes)
+{
+ vlib_trace_main_t *tm = &vm->trace_main;
+ vlib_trace_header_t *h;
+ u32 n_data_words;
+
+ vlib_validate_trace (tm, b);
+
+ n_data_bytes = round_pow2 (n_data_bytes, sizeof (h[0]));
+ n_data_words = n_data_bytes / sizeof (h[0]);
+ vec_add2_aligned (tm->trace_buffer_pool[b->trace_index], h,
+ 1 + n_data_words, sizeof (h[0]));
+
+ h->time = vm->cpu_time_last_node_dispatch;
+ h->n_data = n_data_words;
+ h->node_index = r->node_index;
+
+ return h->data;
+}
+
+always_inline vlib_trace_header_t *
+vlib_trace_header_next (vlib_trace_header_t * h)
+{
+ return h + 1 + h->n_data;
+}
+
+always_inline void
+vlib_free_trace (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ vlib_trace_main_t *tm = &vm->trace_main;
+ vlib_validate_trace (tm, b);
+ _vec_len (tm->trace_buffer_pool[b->trace_index]) = 0;
+ pool_put_index (tm->trace_buffer_pool, b->trace_index);
+}
+
+always_inline void
+vlib_trace_next_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * r, u32 next_index)
+{
+ vlib_next_frame_t *nf;
+ nf = vlib_node_runtime_get_next_frame (vm, r, next_index);
+ nf->flags |= VLIB_FRAME_TRACE;
+}
+
+void trace_apply_filter (vlib_main_t * vm);
+
+/* Mark buffer as traced and allocate trace buffer. */
+always_inline void
+vlib_trace_buffer (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ u32 next_index, vlib_buffer_t * b, int follow_chain)
+{
+ vlib_trace_main_t *tm = &vm->trace_main;
+ vlib_trace_header_t **h;
+
+ /*
+ * Apply filter to existing traces to keep number of allocated traces low.
+ * Performed each time around the main loop.
+ */
+ if (tm->last_main_loop_count != vm->main_loop_count)
+ {
+ tm->last_main_loop_count = vm->main_loop_count;
+ trace_apply_filter (vm);
+ }
+
+ vlib_trace_next_frame (vm, r, next_index);
+
+ pool_get (tm->trace_buffer_pool, h);
+
+ do
+ {
+ b->flags |= VLIB_BUFFER_IS_TRACED;
+ b->trace_index = h - tm->trace_buffer_pool;
+ }
+ while (follow_chain && (b = vlib_get_next_buffer (vm, b)));
+}
+
+always_inline void
+vlib_buffer_copy_trace_flag (vlib_main_t * vm, vlib_buffer_t * b,
+ u32 bi_target)
+{
+ vlib_buffer_t *b_target = vlib_get_buffer (vm, bi_target);
+ b_target->flags |= b->flags & VLIB_BUFFER_IS_TRACED;
+ b_target->trace_index = b->trace_index;
+}
+
+always_inline u32
+vlib_get_trace_count (vlib_main_t * vm, vlib_node_runtime_t * rt)
+{
+ vlib_trace_main_t *tm = &vm->trace_main;
+ vlib_trace_node_t *tn;
+ int n;
+
+ if (rt->node_index >= vec_len (tm->nodes))
+ return 0;
+ tn = tm->nodes + rt->node_index;
+ n = tn->limit - tn->count;
+ ASSERT (n >= 0);
+
+ return n;
+}
+
+always_inline void
+vlib_set_trace_count (vlib_main_t * vm, vlib_node_runtime_t * rt, u32 count)
+{
+ vlib_trace_main_t *tm = &vm->trace_main;
+ vlib_trace_node_t *tn = vec_elt_at_index (tm->nodes, rt->node_index);
+
+ ASSERT (count <= tn->limit);
+ tn->count = tn->limit - count;
+}
+
+/* Helper function for nodes which only trace buffer data. */
+void
+vlib_trace_frame_buffers_only (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 * buffers,
+ uword n_buffers,
+ uword next_buffer_stride,
+ uword n_buffer_data_bytes_in_trace);
+
+#endif /* included_vlib_trace_funcs_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/cj.c b/src/vlib/unix/cj.c
new file mode 100644
index 00000000000..33ba163abca
--- /dev/null
+++ b/src/vlib/unix/cj.c
@@ -0,0 +1,271 @@
+/*
+ *------------------------------------------------------------------
+ * cj.c
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+/**
+ * @file
+ * Circular joournal diagnostic mechanism.
+ *
+ * The @c cj thread-safe circular log buffer scheme is occasionally useful
+ * when chasing bugs. Calls to it should not be checked in.
+ */
+/*? %%clicmd:group_label Circular Journal %% ?*/
+/*? %%syscfg:group_label Circular Journal %% ?*/
+
+#include <stdio.h>
+#include <vlib/vlib.h>
+
+#include <vlib/unix/cj.h>
+
+cj_main_t cj_main;
+
+void
+cj_log (u32 type, void *data0, void *data1)
+{
+ u64 new_tail;
+ cj_main_t *cjm = &cj_main;
+ cj_record_t *r;
+
+ if (cjm->enable == 0)
+ return;
+
+ new_tail = __sync_add_and_fetch (&cjm->tail, 1);
+
+ r = (cj_record_t *) & (cjm->records[new_tail & (cjm->num_records - 1)]);
+ r->time = vlib_time_now (cjm->vlib_main);
+ r->cpu = os_get_cpu_number ();
+ r->type = type;
+ r->data[0] = pointer_to_uword (data0);
+ r->data[1] = pointer_to_uword (data1);
+}
+
+void
+cj_stop (void)
+{
+ cj_main_t *cjm = &cj_main;
+
+ cjm->enable = 0;
+}
+
+
+clib_error_t *
+cj_init (vlib_main_t * vm)
+{
+ cj_main_t *cjm = &cj_main;
+
+ cjm->vlib_main = vm;
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (cj_init);
+
+static clib_error_t *
+cj_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ cj_main_t *cjm = &cj_main;
+ int matched = 0;
+ int enable = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "records %d", &cjm->num_records))
+ matched = 1;
+ else if (unformat (input, "on"))
+ enable = 1;
+ else
+ return clib_error_return (0, "cj_config: unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ if (matched == 0)
+ return 0;
+
+ cjm->num_records = max_pow2 (cjm->num_records);
+ vec_validate (cjm->records, cjm->num_records - 1);
+ memset (cjm->records, 0xff, cjm->num_records * sizeof (cj_record_t));
+ cjm->tail = ~0;
+ cjm->enable = enable;
+
+ return 0;
+}
+
+/*?
+ * Configure the circular journal diagnostic mechanism. This is only useful
+ * if you, the deveoper, have written code to make use of the circular
+ * journal.
+ *
+ * @cfgcmd{records, &lt;number&gt;}
+ * Configure the number of records to allocate for the circular journal.
+ *
+ * @cfgcmd{on}
+ * Enable the collection of records in the circular journal at the
+ * earliest opportunity.
+?*/
+VLIB_CONFIG_FUNCTION (cj_config, "cj");
+
+void
+cj_enable_disable (int is_enable)
+{
+ cj_main_t *cjm = &cj_main;
+
+ if (cjm->num_records)
+ cjm->enable = is_enable;
+ else
+ vlib_cli_output (cjm->vlib_main, "CJ not configured...");
+}
+
+static inline void
+cj_dump_one_record (cj_record_t * r)
+{
+ fprintf (stderr, "[%d]: %10.6f T%02d %llx %llx\n",
+ r->cpu, r->time, r->type, (long long unsigned int) r->data[0],
+ (long long unsigned int) r->data[1]);
+}
+
+static void
+cj_dump_internal (u8 filter0_enable, u64 filter0,
+ u8 filter1_enable, u64 filter1)
+{
+ cj_main_t *cjm = &cj_main;
+ cj_record_t *r;
+ u32 i, index;
+
+ if (cjm->num_records == 0)
+ {
+ fprintf (stderr, "CJ not configured...\n");
+ return;
+ }
+
+ if (cjm->tail == (u64) ~ 0)
+ {
+ fprintf (stderr, "No data collected...\n");
+ return;
+ }
+
+ /* Has the trace wrapped? */
+ index = (cjm->tail + 1) & (cjm->num_records - 1);
+ r = &(cjm->records[index]);
+
+ if (r->cpu != (u32) ~ 0)
+ {
+ /* Yes, dump from tail + 1 to the end */
+ for (i = index; i < cjm->num_records; i++)
+ {
+ if (filter0_enable && (r->data[0] != filter0))
+ goto skip;
+ if (filter1_enable && (r->data[1] != filter1))
+ goto skip;
+ cj_dump_one_record (r);
+ skip:
+ r++;
+ }
+ }
+ /* dump from the beginning through the final tail */
+ r = cjm->records;
+ for (i = 0; i <= cjm->tail; i++)
+ {
+ if (filter0_enable && (r->data[0] != filter0))
+ goto skip2;
+ if (filter1_enable && (r->data[1] != filter1))
+ goto skip2;
+ cj_dump_one_record (r);
+ skip2:
+ r++;
+ }
+}
+
+void
+cj_dump (void)
+{
+ cj_dump_internal (0, 0, 0, 0);
+}
+
+void
+cj_dump_filter_data0 (u64 filter0)
+{
+ cj_dump_internal (1 /* enable f0 */ , filter0, 0, 0);
+}
+
+void
+cj_dump_filter_data1 (u64 filter1)
+{
+ cj_dump_internal (0, 0, 1 /* enable f1 */ , filter1);
+}
+
+void
+cj_dump_filter_data12 (u64 filter0, u64 filter1)
+{
+ cj_dump_internal (1, filter0, 1, filter1);
+}
+
+static clib_error_t *
+cj_command_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ int is_enable = -1;
+ int is_dump = -1;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "enable") || unformat (input, "on"))
+ is_enable = 1;
+ else if (unformat (input, "disable") || unformat (input, "off"))
+ is_enable = 0;
+ else if (unformat (input, "dump"))
+ is_dump = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (is_enable >= 0)
+ cj_enable_disable (is_enable);
+
+ if (is_dump > 0)
+ cj_dump ();
+
+ return 0;
+}
+
+/*?
+ * Enable, disable the collection of diagnostic data into a
+ * circular journal or dump the circular journal diagnostic data.
+ * This is only useful if you, the deveoper, have written code to make
+ * use of the circular journal.
+ *
+ * When dumping the data it is formatted and sent to @c stderr of the
+ * VPP process; when running VPP in <code>unix interactive</code> mode
+ * this is typically the same place as the Debug CLI.
+?*/
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cj_command,static) = {
+ .path = "cj",
+ .short_help = "cj <enable | disable | dump>",
+ .function = cj_command_fn,
+};
+/* *INDENT-ON* */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/cj.h b/src/vlib/unix/cj.h
new file mode 100644
index 00000000000..67626afee2b
--- /dev/null
+++ b/src/vlib/unix/cj.h
@@ -0,0 +1,79 @@
+/*
+ *------------------------------------------------------------------
+ * cj.h
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __included_cj_h__
+#define __included_cj_h__
+
+typedef struct
+{
+ f64 time;
+ u32 cpu;
+ u32 type;
+ u64 data[2];
+} cj_record_t;
+
+typedef struct
+{
+ volatile u64 tail;
+ cj_record_t *records;
+ u32 num_records;
+ volatile u32 enable;
+
+ vlib_main_t *vlib_main;
+} cj_main_t;
+
+void cj_log (u32 type, void *data0, void *data1);
+
+/*
+ * Supply in application main, so we can log from any library...
+ * Declare a weak reference in the library, off you go.
+ */
+
+#define DECLARE_CJ_GLOBAL_LOG \
+void cj_global_log (unsigned type, void * data0, void * data1) \
+ __attribute__ ((weak)); \
+ \
+unsigned __cj_type; \
+void * __cj_data0; \
+void * __cj_data1; \
+ \
+void \
+cj_global_log (unsigned type, void * data0, void * data1) \
+{ \
+ __cj_type = type; \
+ __cj_data0 = data0; \
+ __cj_data1 = data1; \
+}
+
+#define CJ_GLOBAL_LOG_PROTOTYPE
+void
+cj_global_log (unsigned type, void *data0, void *data1)
+__attribute__ ((weak));
+
+void cj_stop (void);
+
+#endif /* __included_cj_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c
new file mode 100644
index 00000000000..69fca6ec7bc
--- /dev/null
+++ b/src/vlib/unix/cli.c
@@ -0,0 +1,2989 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cli.c: Unix stdin/socket CLI.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+/**
+ * @file
+ * @brief Unix stdin/socket command line interface.
+ * Provides a command line interface so humans can interact with VPP.
+ * This is predominantly a debugging and testing mechanism.
+ */
+/*? %%clicmd:group_label Command line session %% ?*/
+/*? %%syscfg:group_label Command line session %% ?*/
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vppinfra/timer.h>
+
+#include <ctype.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <termios.h>
+#include <signal.h>
+#include <unistd.h>
+#include <arpa/telnet.h>
+#include <sys/ioctl.h>
+
+/** ANSI escape code. */
+#define ESC "\x1b"
+
+/** ANSI Control Sequence Introducer. */
+#define CSI ESC "["
+
+/** ANSI clear screen. */
+#define ANSI_CLEAR CSI "2J" CSI "1;1H"
+/** ANSI reset color settings. */
+#define ANSI_RESET CSI "0m"
+/** ANSI Start bold text. */
+#define ANSI_BOLD CSI "1m"
+/** ANSI Stop bold text. */
+#define ANSI_DIM CSI "2m"
+/** ANSI Start dark red text. */
+#define ANSI_DRED ANSI_DIM CSI "31m"
+/** ANSI Start bright red text. */
+#define ANSI_BRED ANSI_BOLD CSI "31m"
+/** ANSI clear line cursor is on. */
+#define ANSI_CLEARLINE CSI "2K"
+/** ANSI scroll screen down one line. */
+#define ANSI_SCROLLDN CSI "1T"
+/** ANSI save cursor position. */
+#define ANSI_SAVECURSOR CSI "s"
+/** ANSI restore cursor position if previously saved. */
+#define ANSI_RESTCURSOR CSI "u"
+
+/** Maximum depth into a byte stream from which to compile a Telnet
+ * protocol message. This is a saftey measure. */
+#define UNIX_CLI_MAX_DEPTH_TELNET 24
+
+/** Unix standard in */
+#define UNIX_CLI_STDIN_FD 0
+
+
+/** A CLI banner line. */
+typedef struct
+{
+ u8 *line; /**< The line to print. */
+ u32 length; /**< The length of the line without terminating NUL. */
+} unix_cli_banner_t;
+
+#define _(a) { .line = (u8 *)(a), .length = sizeof(a) - 1 }
+/** Plain welcome banner. */
+static unix_cli_banner_t unix_cli_banner[] = {
+ _(" _______ _ _ _____ ___ \n"),
+ _(" __/ __/ _ \\ (_)__ | | / / _ \\/ _ \\\n"),
+ _(" _/ _// // / / / _ \\ | |/ / ___/ ___/\n"),
+ _(" /_/ /____(_)_/\\___/ |___/_/ /_/ \n"),
+ _("\n")
+};
+
+/** ANSI color welcome banner. */
+static unix_cli_banner_t unix_cli_banner_color[] = {
+ _(ANSI_BRED " _______ _ " ANSI_RESET " _ _____ ___ \n"),
+ _(ANSI_BRED " __/ __/ _ \\ (_)__ " ANSI_RESET " | | / / _ \\/ _ \\\n"),
+ _(ANSI_BRED " _/ _// // / / / _ \\" ANSI_RESET " | |/ / ___/ ___/\n"),
+ _(ANSI_BRED " /_/ /____(_)_/\\___/" ANSI_RESET " |___/_/ /_/ \n"),
+ _("\n")
+};
+
+#undef _
+
+/** Pager line index */
+typedef struct
+{
+ /** Index into pager_vector */
+ u32 line;
+
+ /** Offset of the string in the line */
+ u32 offset;
+
+ /** Length of the string in the line */
+ u32 length;
+} unix_cli_pager_index_t;
+
+
+/** Unix CLI session. */
+typedef struct
+{
+ /** The file index held by unix.c */
+ u32 unix_file_index;
+
+ /** Vector of output pending write to file descriptor. */
+ u8 *output_vector;
+
+ /** Vector of input saved by Unix input node to be processed by
+ CLI process. */
+ u8 *input_vector;
+
+ /** This session has command history. */
+ u8 has_history;
+ /** Array of vectors of commands in the history. */
+ u8 **command_history;
+ /** The command currently pointed at by the history cursor. */
+ u8 *current_command;
+ /** How far from the end of the history array the user has browsed. */
+ i32 excursion;
+
+ /** Maximum number of history entries this session will store. */
+ u32 history_limit;
+
+ /** Current command line counter */
+ u32 command_number;
+
+ /** The string being searched for in the history. */
+ u8 *search_key;
+ /** If non-zero then the CLI is searching in the history array.
+ * - @c -1 means search backwards.
+ * - @c 1 means search forwards.
+ */
+ int search_mode;
+
+ /** Position of the insert cursor on the current input line */
+ u32 cursor;
+
+ /** Line mode or char mode */
+ u8 line_mode;
+
+ /** Set if the CRLF mode wants CR + LF */
+ u8 crlf_mode;
+
+ /** Can we do ANSI output? */
+ u8 ansi_capable;
+
+ /** Has the session started? */
+ u8 started;
+
+ /** Disable the pager? */
+ u8 no_pager;
+
+ /** Pager buffer */
+ u8 **pager_vector;
+
+ /** Index of line fragments in the pager buffer */
+ unix_cli_pager_index_t *pager_index;
+
+ /** Line number of top of page */
+ u32 pager_start;
+
+ /** Terminal width */
+ u32 width;
+
+ /** Terminal height */
+ u32 height;
+
+ /** Process node identifier */
+ u32 process_node_index;
+} unix_cli_file_t;
+
+/** Resets the pager buffer and other data.
+ * @param f The CLI session whose pager needs to be reset.
+ */
+always_inline void
+unix_cli_pager_reset (unix_cli_file_t * f)
+{
+ u8 **p;
+
+ f->pager_start = 0;
+
+ vec_free (f->pager_index);
+ f->pager_index = 0;
+
+ vec_foreach (p, f->pager_vector)
+ {
+ vec_free (*p);
+ }
+ vec_free (f->pager_vector);
+ f->pager_vector = 0;
+}
+
+/** Release storage used by a CLI session.
+ * @param f The CLI session whose storage needs to be released.
+ */
+always_inline void
+unix_cli_file_free (unix_cli_file_t * f)
+{
+ vec_free (f->output_vector);
+ vec_free (f->input_vector);
+ unix_cli_pager_reset (f);
+}
+
+/** CLI actions */
+typedef enum
+{
+ UNIX_CLI_PARSE_ACTION_NOACTION = 0, /**< No action */
+ UNIX_CLI_PARSE_ACTION_CRLF, /**< Carriage return, newline or enter */
+ UNIX_CLI_PARSE_ACTION_TAB, /**< Tab key */
+ UNIX_CLI_PARSE_ACTION_ERASE, /**< Erase cursor left */
+ UNIX_CLI_PARSE_ACTION_ERASERIGHT, /**< Erase cursor right */
+ UNIX_CLI_PARSE_ACTION_UP, /**< Up arrow */
+ UNIX_CLI_PARSE_ACTION_DOWN, /**< Down arrow */
+ UNIX_CLI_PARSE_ACTION_LEFT, /**< Left arrow */
+ UNIX_CLI_PARSE_ACTION_RIGHT, /**< Right arrow */
+ UNIX_CLI_PARSE_ACTION_HOME, /**< Home key (jump to start of line) */
+ UNIX_CLI_PARSE_ACTION_END, /**< End key (jump to end of line) */
+ UNIX_CLI_PARSE_ACTION_WORDLEFT, /**< Jump cursor to start of left word */
+ UNIX_CLI_PARSE_ACTION_WORDRIGHT, /**< Jump cursor to start of right word */
+ UNIX_CLI_PARSE_ACTION_ERASELINELEFT, /**< Erase line to left of cursor */
+ UNIX_CLI_PARSE_ACTION_ERASELINERIGHT, /**< Erase line to right & including cursor */
+ UNIX_CLI_PARSE_ACTION_CLEAR, /**< Clear the terminal */
+ UNIX_CLI_PARSE_ACTION_REVSEARCH, /**< Search backwards in command history */
+ UNIX_CLI_PARSE_ACTION_FWDSEARCH, /**< Search forwards in command history */
+ UNIX_CLI_PARSE_ACTION_YANK, /**< Undo last erase action */
+ UNIX_CLI_PARSE_ACTION_TELNETIAC, /**< Telnet control code */
+
+ UNIX_CLI_PARSE_ACTION_PAGER_CRLF, /**< Enter pressed (CR, CRLF, LF, etc) */
+ UNIX_CLI_PARSE_ACTION_PAGER_QUIT, /**< Exit the pager session */
+ UNIX_CLI_PARSE_ACTION_PAGER_NEXT, /**< Scroll to next page */
+ UNIX_CLI_PARSE_ACTION_PAGER_DN, /**< Scroll to next line */
+ UNIX_CLI_PARSE_ACTION_PAGER_UP, /**< Scroll to previous line */
+ UNIX_CLI_PARSE_ACTION_PAGER_TOP, /**< Scroll to first line */
+ UNIX_CLI_PARSE_ACTION_PAGER_BOTTOM, /**< Scroll to last line */
+ UNIX_CLI_PARSE_ACTION_PAGER_PGDN, /**< Scroll to next page */
+ UNIX_CLI_PARSE_ACTION_PAGER_PGUP, /**< Scroll to previous page */
+ UNIX_CLI_PARSE_ACTION_PAGER_REDRAW, /**< Clear and redraw the page on the terminal */
+ UNIX_CLI_PARSE_ACTION_PAGER_SEARCH, /**< Search the pager buffer */
+
+ UNIX_CLI_PARSE_ACTION_PARTIALMATCH, /**< Action parser found a partial match */
+ UNIX_CLI_PARSE_ACTION_NOMATCH /**< Action parser did not find any match */
+} unix_cli_parse_action_t;
+
+/** @brief Mapping of input buffer strings to action values.
+ * @note This won't work as a hash since we need to be able to do
+ * partial matches on the string.
+ */
+typedef struct
+{
+ u8 *input; /**< Input string to match. */
+ u32 len; /**< Length of input without final NUL. */
+ unix_cli_parse_action_t action; /**< Action to take when matched. */
+} unix_cli_parse_actions_t;
+
+/** @brief Given a capital ASCII letter character return a @c NUL terminated
+ * string with the control code for that letter.
+ *
+ * @param c An ASCII character.
+ * @return A @c NUL terminated string of type @c u8[].
+ *
+ * @par Example
+ * @c CTL('A') returns <code>{ 0x01, 0x00 }</code> as a @c u8[].
+ */
+#define CTL(c) (u8[]){ (c) - '@', 0 }
+
+#define _(a,b) { .input = (u8 *)(a), .len = sizeof(a) - 1, .action = (b) }
+/**
+ * Patterns to match on a CLI input stream.
+ * @showinitializer
+ */
+static unix_cli_parse_actions_t unix_cli_parse_strings[] = {
+ /* Line handling */
+ _("\r\n", UNIX_CLI_PARSE_ACTION_CRLF), /* Must be before '\r' */
+ _("\n", UNIX_CLI_PARSE_ACTION_CRLF),
+ _("\r\0", UNIX_CLI_PARSE_ACTION_CRLF), /* Telnet does this */
+ _("\r", UNIX_CLI_PARSE_ACTION_CRLF),
+
+ /* Unix shell control codes */
+ _(CTL ('B'), UNIX_CLI_PARSE_ACTION_LEFT),
+ _(CTL ('F'), UNIX_CLI_PARSE_ACTION_RIGHT),
+ _(CTL ('P'), UNIX_CLI_PARSE_ACTION_UP),
+ _(CTL ('N'), UNIX_CLI_PARSE_ACTION_DOWN),
+ _(CTL ('A'), UNIX_CLI_PARSE_ACTION_HOME),
+ _(CTL ('E'), UNIX_CLI_PARSE_ACTION_END),
+ _(CTL ('D'), UNIX_CLI_PARSE_ACTION_ERASERIGHT),
+ _(CTL ('U'), UNIX_CLI_PARSE_ACTION_ERASELINELEFT),
+ _(CTL ('K'), UNIX_CLI_PARSE_ACTION_ERASELINERIGHT),
+ _(CTL ('Y'), UNIX_CLI_PARSE_ACTION_YANK),
+ _(CTL ('L'), UNIX_CLI_PARSE_ACTION_CLEAR),
+ _(ESC "b", UNIX_CLI_PARSE_ACTION_WORDLEFT), /* Alt-B */
+ _(ESC "f", UNIX_CLI_PARSE_ACTION_WORDRIGHT), /* Alt-F */
+ _("\b", UNIX_CLI_PARSE_ACTION_ERASE), /* ^H */
+ _("\x7f", UNIX_CLI_PARSE_ACTION_ERASE), /* Backspace */
+ _("\t", UNIX_CLI_PARSE_ACTION_TAB), /* ^I */
+
+ /* VT100 Normal mode - Broadest support */
+ _(CSI "A", UNIX_CLI_PARSE_ACTION_UP),
+ _(CSI "B", UNIX_CLI_PARSE_ACTION_DOWN),
+ _(CSI "C", UNIX_CLI_PARSE_ACTION_RIGHT),
+ _(CSI "D", UNIX_CLI_PARSE_ACTION_LEFT),
+ _(CSI "H", UNIX_CLI_PARSE_ACTION_HOME),
+ _(CSI "F", UNIX_CLI_PARSE_ACTION_END),
+ _(CSI "3~", UNIX_CLI_PARSE_ACTION_ERASERIGHT), /* Delete */
+ _(CSI "1;5D", UNIX_CLI_PARSE_ACTION_WORDLEFT), /* C-Left */
+ _(CSI "1;5C", UNIX_CLI_PARSE_ACTION_WORDRIGHT), /* C-Right */
+
+ /* VT100 Application mode - Some Gnome Terminal functions use these */
+ _(ESC "OA", UNIX_CLI_PARSE_ACTION_UP),
+ _(ESC "OB", UNIX_CLI_PARSE_ACTION_DOWN),
+ _(ESC "OC", UNIX_CLI_PARSE_ACTION_RIGHT),
+ _(ESC "OD", UNIX_CLI_PARSE_ACTION_LEFT),
+ _(ESC "OH", UNIX_CLI_PARSE_ACTION_HOME),
+ _(ESC "OF", UNIX_CLI_PARSE_ACTION_END),
+
+ /* ANSI X3.41-1974 - sent by Microsoft Telnet and PuTTY */
+ _(CSI "1~", UNIX_CLI_PARSE_ACTION_HOME),
+ _(CSI "4~", UNIX_CLI_PARSE_ACTION_END),
+
+ /* Emacs-ish history search */
+ _(CTL ('S'), UNIX_CLI_PARSE_ACTION_FWDSEARCH),
+ _(CTL ('R'), UNIX_CLI_PARSE_ACTION_REVSEARCH),
+
+ /* Other protocol things */
+ _("\xff", UNIX_CLI_PARSE_ACTION_TELNETIAC), /* IAC */
+ _("\0", UNIX_CLI_PARSE_ACTION_NOACTION), /* NUL */
+ _(NULL, UNIX_CLI_PARSE_ACTION_NOMATCH)
+};
+
+/**
+ * Patterns to match when a CLI session is in the pager.
+ * @showinitializer
+ */
+static unix_cli_parse_actions_t unix_cli_parse_pager[] = {
+ /* Line handling */
+ _("\r\n", UNIX_CLI_PARSE_ACTION_PAGER_CRLF), /* Must be before '\r' */
+ _("\n", UNIX_CLI_PARSE_ACTION_PAGER_CRLF),
+ _("\r\0", UNIX_CLI_PARSE_ACTION_PAGER_CRLF), /* Telnet does this */
+ _("\r", UNIX_CLI_PARSE_ACTION_PAGER_CRLF),
+
+ /* Pager commands */
+ _(" ", UNIX_CLI_PARSE_ACTION_PAGER_NEXT),
+ _("q", UNIX_CLI_PARSE_ACTION_PAGER_QUIT),
+ _(CTL ('L'), UNIX_CLI_PARSE_ACTION_PAGER_REDRAW),
+ _(CTL ('R'), UNIX_CLI_PARSE_ACTION_PAGER_REDRAW),
+ _("/", UNIX_CLI_PARSE_ACTION_PAGER_SEARCH),
+
+ /* VT100 */
+ _(CSI "A", UNIX_CLI_PARSE_ACTION_PAGER_UP),
+ _(CSI "B", UNIX_CLI_PARSE_ACTION_PAGER_DN),
+ _(CSI "H", UNIX_CLI_PARSE_ACTION_PAGER_TOP),
+ _(CSI "F", UNIX_CLI_PARSE_ACTION_PAGER_BOTTOM),
+
+ /* VT100 Application mode */
+ _(ESC "OA", UNIX_CLI_PARSE_ACTION_PAGER_UP),
+ _(ESC "OB", UNIX_CLI_PARSE_ACTION_PAGER_DN),
+ _(ESC "OH", UNIX_CLI_PARSE_ACTION_PAGER_TOP),
+ _(ESC "OF", UNIX_CLI_PARSE_ACTION_PAGER_BOTTOM),
+
+ /* ANSI X3.41-1974 */
+ _(CSI "1~", UNIX_CLI_PARSE_ACTION_PAGER_TOP),
+ _(CSI "4~", UNIX_CLI_PARSE_ACTION_PAGER_BOTTOM),
+ _(CSI "5~", UNIX_CLI_PARSE_ACTION_PAGER_PGUP),
+ _(CSI "6~", UNIX_CLI_PARSE_ACTION_PAGER_PGDN),
+
+ /* Other protocol things */
+ _("\xff", UNIX_CLI_PARSE_ACTION_TELNETIAC), /* IAC */
+ _("\0", UNIX_CLI_PARSE_ACTION_NOACTION), /* NUL */
+ _(NULL, UNIX_CLI_PARSE_ACTION_NOMATCH)
+};
+
+#undef _
+
+/** CLI session events. */
+typedef enum
+{
+ UNIX_CLI_PROCESS_EVENT_READ_READY, /**< A file descriptor has data to be read. */
+ UNIX_CLI_PROCESS_EVENT_QUIT, /**< A CLI session wants to close. */
+} unix_cli_process_event_type_t;
+
+/** CLI global state. */
+typedef struct
+{
+ /** Prompt string for CLI. */
+ u8 *cli_prompt;
+
+ /** Vec pool of CLI sessions. */
+ unix_cli_file_t *cli_file_pool;
+
+ /** Vec pool of unused session indices. */
+ u32 *unused_cli_process_node_indices;
+
+ /** The session index of the stdin cli */
+ u32 stdin_cli_file_index;
+
+ /** File pool index of current input. */
+ u32 current_input_file_index;
+} unix_cli_main_t;
+
+/** CLI global state */
+static unix_cli_main_t unix_cli_main;
+
+/**
+ * @brief Search for a byte sequence in the action list.
+ *
+ * Searches the @ref unix_cli_parse_actions_t list in @a a for a match with
+ * the bytes in @a input of maximum length @a ilen bytes.
+ * When a match is made @a *matched indicates how many bytes were matched.
+ * Returns a value from the enum @ref unix_cli_parse_action_t to indicate
+ * whether no match was found, a partial match was found or a complete
+ * match was found and what action, if any, should be taken.
+ *
+ * @param[in] a Actions list to search within.
+ * @param[in] input String fragment to search for.
+ * @param[in] ilen Length of the string in 'input'.
+ * @param[out] matched Pointer to an integer that will contain the number
+ * of bytes matched when a complete match is found.
+ *
+ * @return Action from @ref unix_cli_parse_action_t that the string fragment
+ * matches.
+ * @ref UNIX_CLI_PARSE_ACTION_PARTIALMATCH is returned when the
+ * whole input string matches the start of at least one action.
+ * @ref UNIX_CLI_PARSE_ACTION_NOMATCH is returned when there is no
+ * match at all.
+ */
+static unix_cli_parse_action_t
+unix_cli_match_action (unix_cli_parse_actions_t * a,
+ u8 * input, u32 ilen, i32 * matched)
+{
+ u8 partial = 0;
+
+ while (a->input)
+ {
+ if (ilen >= a->len)
+ {
+ /* see if the start of the input buffer exactly matches the current
+ * action string. */
+ if (memcmp (input, a->input, a->len) == 0)
+ {
+ *matched = a->len;
+ return a->action;
+ }
+ }
+ else
+ {
+ /* if the first ilen characters match, flag this as a partial -
+ * meaning keep collecting bytes in case of a future match */
+ if (memcmp (input, a->input, ilen) == 0)
+ partial = 1;
+ }
+
+ /* check next action */
+ a++;
+ }
+
+ return partial ?
+ UNIX_CLI_PARSE_ACTION_PARTIALMATCH : UNIX_CLI_PARSE_ACTION_NOMATCH;
+}
+
+
+/** Add bytes to the output vector and then flagg the I/O system that bytes
+ * are available to be sent.
+ */
+static void
+unix_cli_add_pending_output (unix_file_t * uf,
+ unix_cli_file_t * cf,
+ u8 * buffer, uword buffer_bytes)
+{
+ unix_main_t *um = &unix_main;
+
+ vec_add (cf->output_vector, buffer, buffer_bytes);
+ if (vec_len (cf->output_vector) > 0)
+ {
+ int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (!skip_update)
+ um->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+}
+
+/** Delete all bytes from the output vector and flag the I/O system
+ * that no more bytes are available to be sent.
+ */
+static void
+unix_cli_del_pending_output (unix_file_t * uf,
+ unix_cli_file_t * cf, uword n_bytes)
+{
+ unix_main_t *um = &unix_main;
+
+ vec_delete (cf->output_vector, n_bytes, 0);
+ if (vec_len (cf->output_vector) <= 0)
+ {
+ int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (!skip_update)
+ um->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+}
+
+/** @brief A bit like strchr with a buffer length limit.
+ * Search a buffer for the first instance of a character up to the limit of
+ * the buffer length. If found then return the position of that character.
+ *
+ * The key departure from strchr is that if the character is not found then
+ * return the buffer length.
+ *
+ * @param chr The byte value to search for.
+ * @param str The buffer in which to search for the value.
+ * @param len The depth into the buffer to search.
+ *
+ * @return The index of the first occurence of \c chr. If \c chr is not
+ * found then \c len instead.
+ */
+always_inline word
+unix_vlib_findchr (u8 chr, u8 * str, word len)
+{
+ word i = 0;
+ for (i = 0; i < len; i++, str++)
+ {
+ if (*str == chr)
+ return i;
+ }
+ return len;
+}
+
+/** @brief Send a buffer to the CLI stream if possible, enqueue it otherwise.
+ * Attempts to write given buffer to the file descriptor of the given
+ * Unix CLI session. If that session already has data in the output buffer
+ * or if the write attempt tells us to try again later then the given buffer
+ * is appended to the pending output buffer instead.
+ *
+ * This is typically called only from \c unix_vlib_cli_output_cooked since
+ * that is where CRLF handling occurs or from places where we explicitly do
+ * not want cooked handling.
+ *
+ * @param cf Unix CLI session of the desired stream to write to.
+ * @param uf The Unix file structure of the desired stream to write to.
+ * @param buffer Pointer to the buffer that needs to be written.
+ * @param buffer_bytes The number of bytes from \c buffer to write.
+ */
+static void
+unix_vlib_cli_output_raw (unix_cli_file_t * cf,
+ unix_file_t * uf, u8 * buffer, uword buffer_bytes)
+{
+ int n = 0;
+
+ if (vec_len (cf->output_vector) == 0)
+ n = write (uf->file_descriptor, buffer, buffer_bytes);
+
+ if (n < 0 && errno != EAGAIN)
+ {
+ clib_unix_warning ("write");
+ }
+ else if ((word) n < (word) buffer_bytes)
+ {
+ /* We got EAGAIN or we already have stuff in the buffer;
+ * queue up whatever didn't get sent for later. */
+ if (n < 0)
+ n = 0;
+ unix_cli_add_pending_output (uf, cf, buffer + n, buffer_bytes - n);
+ }
+}
+
+/** @brief Process a buffer for CRLF handling before outputting it to the CLI.
+ *
+ * @param cf Unix CLI session of the desired stream to write to.
+ * @param uf The Unix file structure of the desired stream to write to.
+ * @param buffer Pointer to the buffer that needs to be written.
+ * @param buffer_bytes The number of bytes from \c buffer to write.
+ */
+static void
+unix_vlib_cli_output_cooked (unix_cli_file_t * cf,
+ unix_file_t * uf,
+ u8 * buffer, uword buffer_bytes)
+{
+ word end = 0, start = 0;
+
+ while (end < buffer_bytes)
+ {
+ if (cf->crlf_mode)
+ {
+ /* iterate the line on \n's so we can insert a \r before it */
+ end = unix_vlib_findchr ('\n',
+ buffer + start,
+ buffer_bytes - start) + start;
+ }
+ else
+ {
+ /* otherwise just send the whole buffer */
+ end = buffer_bytes;
+ }
+
+ unix_vlib_cli_output_raw (cf, uf, buffer + start, end - start);
+
+ if (cf->crlf_mode)
+ {
+ if (end < buffer_bytes)
+ {
+ unix_vlib_cli_output_raw (cf, uf, (u8 *) "\r\n", 2);
+ end++; /* skip the \n that we already sent */
+ }
+ start = end;
+ }
+ }
+}
+
+/** @brief Output the CLI prompt */
+static void
+unix_cli_cli_prompt (unix_cli_file_t * cf, unix_file_t * uf)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+
+ unix_vlib_cli_output_raw (cf, uf, cm->cli_prompt, vec_len (cm->cli_prompt));
+}
+
+/** @brief Output a pager prompt and show number of buffered lines */
+static void
+unix_cli_pager_prompt (unix_cli_file_t * cf, unix_file_t * uf)
+{
+ u8 *prompt;
+ u32 h;
+
+ h = cf->pager_start + (cf->height - 1);
+ if (h > vec_len (cf->pager_index))
+ h = vec_len (cf->pager_index);
+
+ prompt = format (0, "\r%s-- more -- (%d-%d/%d)%s",
+ cf->ansi_capable ? ANSI_BOLD : "",
+ cf->pager_start + 1,
+ h,
+ vec_len (cf->pager_index),
+ cf->ansi_capable ? ANSI_RESET : "");
+
+ unix_vlib_cli_output_cooked (cf, uf, prompt, vec_len (prompt));
+
+ vec_free (prompt);
+}
+
+/** @brief Output a pager "skipping" message */
+static void
+unix_cli_pager_message (unix_cli_file_t * cf, unix_file_t * uf,
+ char *message, char *postfix)
+{
+ u8 *prompt;
+
+ prompt = format (0, "\r%s-- %s --%s%s",
+ cf->ansi_capable ? ANSI_BOLD : "",
+ message, cf->ansi_capable ? ANSI_RESET : "", postfix);
+
+ unix_vlib_cli_output_cooked (cf, uf, prompt, vec_len (prompt));
+
+ vec_free (prompt);
+}
+
+/** @brief Erase the printed pager prompt */
+static void
+unix_cli_pager_prompt_erase (unix_cli_file_t * cf, unix_file_t * uf)
+{
+ if (cf->ansi_capable)
+ {
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\r", 1);
+ unix_vlib_cli_output_cooked (cf, uf,
+ (u8 *) ANSI_CLEARLINE,
+ sizeof (ANSI_CLEARLINE) - 1);
+ }
+ else
+ {
+ int i;
+
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\r", 1);
+ for (i = 0; i < cf->width - 1; i++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) " ", 1);
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\r", 1);
+ }
+}
+
+/** @brief Uses an ANSI escape sequence to move the cursor */
+static void
+unix_cli_ansi_cursor (unix_cli_file_t * cf, unix_file_t * uf, u16 x, u16 y)
+{
+ u8 *str;
+
+ str = format (0, "%s%d;%dH", CSI, y, x);
+
+ unix_vlib_cli_output_cooked (cf, uf, str, vec_len (str));
+
+ vec_free (str);
+}
+
+/** Redraw the currently displayed page of text.
+ * @param cf CLI session to redraw the pager buffer of.
+ * @param uf Unix file of the CLI session.
+ */
+static void
+unix_cli_pager_redraw (unix_cli_file_t * cf, unix_file_t * uf)
+{
+ unix_cli_pager_index_t *pi = NULL;
+ u8 *line = NULL;
+ word i;
+
+ /* No active pager? Do nothing. */
+ if (!vec_len (cf->pager_index))
+ return;
+
+ if (cf->ansi_capable)
+ {
+ /* If we have ANSI, send the clear screen sequence */
+ unix_vlib_cli_output_cooked (cf, uf,
+ (u8 *) ANSI_CLEAR,
+ sizeof (ANSI_CLEAR) - 1);
+ }
+ else
+ {
+ /* Otherwise make sure we're on a blank line */
+ unix_cli_pager_prompt_erase (cf, uf);
+ }
+
+ /* (Re-)send the current page of content */
+ for (i = 0; i < cf->height - 1 &&
+ i + cf->pager_start < vec_len (cf->pager_index); i++)
+ {
+ pi = &cf->pager_index[cf->pager_start + i];
+ line = cf->pager_vector[pi->line] + pi->offset;
+
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ }
+ /* if the last line didn't end in newline, add a newline */
+ if (pi && line[pi->length - 1] != '\n')
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+
+ unix_cli_pager_prompt (cf, uf);
+}
+
+/** @brief Process and add a line to the pager index.
+ * In normal operation this function will take the given character string
+ * found in @c line and with length @c len_or_index and iterates the over the
+ * contents, adding each line of text discovered within it to the
+ * pager index. Lines are identified by newlines ("<code>\\n</code>") and by
+ * strings longer than the width of the terminal.
+ *
+ * If instead @c line is @c NULL then @c len_or_index is taken to mean the
+ * index of an existing line in the pager buffer; this simply means that the
+ * input line does not need to be cloned since we alreayd have it. This is
+ * typical if we are reindexing the pager buffer.
+ *
+ * @param cf The CLI session whose pager we are adding to.
+ * @param line The string of text to be indexed into the pager buffer.
+ * If @c line is @c NULL then the mode of operation
+ * changes slightly; see the description above.
+ * @param len_or_index If @c line is a pointer to a string then this parameter
+ * indicates the length of that string; Otherwise this
+ * value provides the index in the pager buffer of an
+ * existing string to be indexed.
+ */
+static void
+unix_cli_pager_add_line (unix_cli_file_t * cf, u8 * line, word len_or_index)
+{
+ u8 *p;
+ word i, j, k;
+ word line_index, len;
+ u32 width = cf->width;
+ unix_cli_pager_index_t *pi;
+
+ if (line == NULL)
+ {
+ /* Use a line already in the pager buffer */
+ line_index = len_or_index;
+ p = cf->pager_vector[line_index];
+ len = vec_len (p);
+ }
+ else
+ {
+ len = len_or_index;
+ /* Add a copy of the raw string to the pager buffer */
+ p = vec_new (u8, len);
+ clib_memcpy (p, line, len);
+
+ /* store in pager buffer */
+ line_index = vec_len (cf->pager_vector);
+ vec_add1 (cf->pager_vector, p);
+ }
+
+ i = 0;
+ while (i < len)
+ {
+ /* Find the next line, or run to terminal width, or run to EOL */
+ int l = len - i;
+ j = unix_vlib_findchr ((u8) '\n', p, l < width ? l : width);
+
+ if (j < l && p[j] == '\n') /* incl \n */
+ j++;
+
+ /* Add the line to the index */
+ k = vec_len (cf->pager_index);
+ vec_validate (cf->pager_index, k);
+ pi = &cf->pager_index[k];
+
+ pi->line = line_index;
+ pi->offset = i;
+ pi->length = j;
+
+ i += j;
+ p += j;
+ }
+}
+
+/** @brief Reindex entire pager buffer.
+ * Resets the current pager index and then re-adds the lines in the pager
+ * buffer to the index.
+ *
+ * Additionally this function attempts to retain the current page start
+ * line offset by searching for the same top-of-screen line in the new index.
+ *
+ * @param cf The CLI session whose pager buffer should be reindexed.
+ */
+static void
+unix_cli_pager_reindex (unix_cli_file_t * cf)
+{
+ word i, old_line, old_offset;
+ unix_cli_pager_index_t *pi;
+
+ /* If there is nothing in the pager buffer then make sure the index
+ * is empty and move on.
+ */
+ if (cf->pager_vector == 0)
+ {
+ vec_reset_length (cf->pager_index);
+ return;
+ }
+
+ /* Retain a pointer to the current page start line so we can
+ * find it later
+ */
+ pi = &cf->pager_index[cf->pager_start];
+ old_line = pi->line;
+ old_offset = pi->offset;
+
+ /* Re-add the buffered lines to the index */
+ vec_reset_length (cf->pager_index);
+ vec_foreach_index (i, cf->pager_vector)
+ {
+ unix_cli_pager_add_line (cf, NULL, i);
+ }
+
+ /* Attempt to re-locate the previously stored page start line */
+ vec_foreach_index (i, cf->pager_index)
+ {
+ pi = &cf->pager_index[i];
+
+ if (pi->line == old_line &&
+ (pi->offset <= old_offset || pi->offset + pi->length > old_offset))
+ {
+ /* Found it! */
+ cf->pager_start = i;
+ break;
+ }
+ }
+
+ /* In case the start line was not found (rare), ensure the pager start
+ * index is within bounds
+ */
+ if (cf->pager_start >= vec_len (cf->pager_index))
+ {
+ if (!cf->height || vec_len (cf->pager_index) < (cf->height - 1))
+ cf->pager_start = 0;
+ else
+ cf->pager_start = vec_len (cf->pager_index) - (cf->height - 1);
+ }
+}
+
+/** VLIB CLI output function.
+ *
+ * If the terminal has a pager configured then this function takes care
+ * of collating output into the pager buffer; ensuring only the first page
+ * is displayed and any lines in excess of the first page are buffered.
+ *
+ * If the maximum number of index lines in the buffer is exceeded then the
+ * pager is cancelled and the contents of the current buffer are sent to the
+ * terminal.
+ *
+ * If there is no pager configured then the output is sent directly to the
+ * terminal.
+ *
+ * @param cli_file_index Index of the CLI session where this output is
+ * directed.
+ * @param buffer String of printabe bytes to be output.
+ * @param buffer_bytes The number of bytes in @c buffer to be output.
+ */
+static void
+unix_vlib_cli_output (uword cli_file_index, u8 * buffer, uword buffer_bytes)
+{
+ unix_main_t *um = &unix_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ unix_file_t *uf;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+ uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+
+ if (cf->no_pager || um->cli_pager_buffer_limit == 0 || cf->height == 0)
+ {
+ unix_vlib_cli_output_cooked (cf, uf, buffer, buffer_bytes);
+ }
+ else
+ {
+ word row = vec_len (cf->pager_index);
+ u8 *line;
+ unix_cli_pager_index_t *pi;
+
+ /* Index and add the output lines to the pager buffer. */
+ unix_cli_pager_add_line (cf, buffer, buffer_bytes);
+
+ /* Now iterate what was added to display the lines.
+ * If we reach the bottom of the page, display a prompt.
+ */
+ while (row < vec_len (cf->pager_index))
+ {
+ if (row < cf->height - 1)
+ {
+ /* output this line */
+ pi = &cf->pager_index[row];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+
+ /* if the last line didn't end in newline, and we're at the
+ * bottom of the page, add a newline */
+ if (line[pi->length - 1] != '\n' && row == cf->height - 2)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ }
+ else
+ {
+ /* Display the pager prompt every 10 lines */
+ if (!(row % 10))
+ unix_cli_pager_prompt (cf, uf);
+ }
+ row++;
+ }
+
+ /* Check if we went over the pager buffer limit */
+ if (vec_len (cf->pager_index) > um->cli_pager_buffer_limit)
+ {
+ /* Stop using the pager for the remainder of this CLI command */
+ cf->no_pager = 2;
+
+ /* If we likely printed the prompt, erase it */
+ if (vec_len (cf->pager_index) > cf->height - 1)
+ unix_cli_pager_prompt_erase (cf, uf);
+
+ /* Dump out the contents of the buffer */
+ for (row = cf->pager_start + (cf->height - 1);
+ row < vec_len (cf->pager_index); row++)
+ {
+ pi = &cf->pager_index[row];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ }
+
+ unix_cli_pager_reset (cf);
+ }
+ }
+}
+
+/** Identify whether a terminal type is ANSI capable.
+ *
+ * Compares the string given in @c term with a list of terminal types known
+ * to support ANSI escape sequences.
+ *
+ * This list contains, for example, @c xterm, @c screen and @c ansi.
+ *
+ * @param term A string with a terminal type in it.
+ * @param len The length of the string in @c term.
+ *
+ * @return @c 1 if the terminal type is recognized as supporting ANSI
+ * terminal sequences; @c 0 otherwise.
+ */
+static u8
+unix_cli_terminal_type (u8 * term, uword len)
+{
+ /* This may later be better done as a hash of some sort. */
+#define _(a) do { \
+ if (strncasecmp(a, (char *)term, (size_t)len) == 0) return 1; \
+ } while(0)
+
+ _("xterm");
+ _("xterm-color");
+ _("xterm-256color"); /* iTerm on Mac */
+ _("screen");
+ _("ansi"); /* Microsoft Telnet */
+#undef _
+
+ return 0;
+}
+
+/** @brief Emit initial welcome banner and prompt on a connection. */
+static void
+unix_cli_file_welcome (unix_cli_main_t * cm, unix_cli_file_t * cf)
+{
+ unix_main_t *um = &unix_main;
+ unix_file_t *uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+ unix_cli_banner_t *banner;
+ int i, len;
+
+ /*
+ * Put the first bytes directly into the buffer so that further output is
+ * queued until everything is ready. (oterwise initial prompt can appear
+ * mid way through VPP initialization)
+ */
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\r", 1);
+
+ if (!um->cli_no_banner)
+ {
+ if (cf->ansi_capable)
+ {
+ banner = unix_cli_banner_color;
+ len = ARRAY_LEN (unix_cli_banner_color);
+ }
+ else
+ {
+ banner = unix_cli_banner;
+ len = ARRAY_LEN (unix_cli_banner);
+ }
+
+ for (i = 0; i < len; i++)
+ {
+ unix_vlib_cli_output_cooked (cf, uf,
+ banner[i].line, banner[i].length);
+ }
+ }
+
+ /* Prompt. */
+ unix_cli_cli_prompt (cf, uf);
+
+ cf->started = 1;
+}
+
+/** @brief A failsafe triggered on a timer to ensure we send the prompt
+ * to telnet sessions that fail to negotiate the terminal type. */
+static void
+unix_cli_file_welcome_timer (any arg, f64 delay)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ (void) delay;
+
+ /* Check the connection didn't close already */
+ if (pool_is_free_index (cm->cli_file_pool, (uword) arg))
+ return;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, (uword) arg);
+
+ if (!cf->started)
+ unix_cli_file_welcome (cm, cf);
+}
+
+/** @brief A mostly no-op Telnet state machine.
+ * Process Telnet command bytes in a way that ensures we're mostly
+ * transparent to the Telnet protocol. That is, it's mostly a no-op.
+ *
+ * @return -1 if we need more bytes, otherwise a positive integer number of
+ * bytes to consume from the input_vector, not including the initial
+ * IAC byte.
+ */
+static i32
+unix_cli_process_telnet (unix_main_t * um,
+ unix_cli_file_t * cf,
+ unix_file_t * uf, u8 * input_vector, uword len)
+{
+ /* Input_vector starts at IAC byte.
+ * See if we have a complete message; if not, return -1 so we wait for more.
+ * if we have a complete message, consume those bytes from the vector.
+ */
+ i32 consume = 0;
+
+ if (len == 1)
+ return -1; /* want more bytes */
+
+ switch (input_vector[1])
+ {
+ case IAC:
+ /* two IAC's in a row means to pass through 0xff.
+ * since that makes no sense here, just consume it.
+ */
+ consume = 1;
+ break;
+
+ case WILL:
+ case WONT:
+ case DO:
+ case DONT:
+ /* Expect 3 bytes */
+ if (vec_len (input_vector) < 3)
+ return -1; /* want more bytes */
+
+ consume = 2;
+ break;
+
+ case SB:
+ {
+ /* Sub option - search ahead for IAC SE to end it */
+ i32 i;
+ for (i = 3; i < len && i < UNIX_CLI_MAX_DEPTH_TELNET; i++)
+ {
+ if (input_vector[i - 1] == IAC && input_vector[i] == SE)
+ {
+ /* We have a complete message; see if we care about it */
+ switch (input_vector[2])
+ {
+ case TELOPT_TTYPE:
+ if (input_vector[3] != 0)
+ break;
+ /* See if the terminal type is ANSI capable */
+ cf->ansi_capable =
+ unix_cli_terminal_type (input_vector + 4, i - 5);
+ /* If session not started, we can release the pause */
+ if (!cf->started)
+ /* Send the welcome banner and initial prompt */
+ unix_cli_file_welcome (&unix_cli_main, cf);
+ break;
+
+ case TELOPT_NAWS:
+ /* Window size */
+ if (i != 8) /* check message is correct size */
+ break;
+ cf->width =
+ clib_net_to_host_u16 (*((u16 *) (input_vector + 3)));
+ cf->height =
+ clib_net_to_host_u16 (*((u16 *) (input_vector + 5)));
+ /* reindex pager buffer */
+ unix_cli_pager_reindex (cf);
+ /* redraw page */
+ unix_cli_pager_redraw (cf, uf);
+ break;
+
+ default:
+ break;
+ }
+ /* Consume it all */
+ consume = i;
+ break;
+ }
+ }
+
+ if (i == UNIX_CLI_MAX_DEPTH_TELNET)
+ consume = 1; /* hit max search depth, advance one byte */
+
+ if (consume == 0)
+ return -1; /* want more bytes */
+
+ break;
+ }
+
+ case GA:
+ case EL:
+ case EC:
+ case AO:
+ case IP:
+ case BREAK:
+ case DM:
+ case NOP:
+ case SE:
+ case EOR:
+ case ABORT:
+ case SUSP:
+ case xEOF:
+ /* Simple one-byte messages */
+ consume = 1;
+ break;
+
+ case AYT:
+ /* Are You There - trigger a visible response */
+ consume = 1;
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "fd.io VPP\n", 10);
+ break;
+
+ default:
+ /* Unknown command! Eat the IAC byte */
+ break;
+ }
+
+ return consume;
+}
+
+/** @brief Process actionable input.
+ * Based on the \c action process the input; this typically involves
+ * searching the command history or editing the current command line.
+ */
+static int
+unix_cli_line_process_one (unix_cli_main_t * cm,
+ unix_main_t * um,
+ unix_cli_file_t * cf,
+ unix_file_t * uf,
+ u8 input, unix_cli_parse_action_t action)
+{
+ u8 *prev;
+ int j, delta;
+
+ switch (action)
+ {
+ case UNIX_CLI_PARSE_ACTION_NOACTION:
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_REVSEARCH:
+ case UNIX_CLI_PARSE_ACTION_FWDSEARCH:
+ if (!cf->has_history || !cf->history_limit)
+ break;
+ if (cf->search_mode == 0)
+ {
+ /* Erase the current command (if any) */
+ for (j = 0; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b \b", 3);
+
+ vec_reset_length (cf->search_key);
+ vec_reset_length (cf->current_command);
+ if (action == UNIX_CLI_PARSE_ACTION_REVSEARCH)
+ cf->search_mode = -1;
+ else
+ cf->search_mode = 1;
+ cf->cursor = 0;
+ }
+ else
+ {
+ if (action == UNIX_CLI_PARSE_ACTION_REVSEARCH)
+ cf->search_mode = -1;
+ else
+ cf->search_mode = 1;
+
+ cf->excursion += cf->search_mode;
+ goto search_again;
+ }
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_ERASELINELEFT:
+ /* Erase the command from the cursor to the start */
+
+ /* Shimmy forwards to the new end of line position */
+ delta = vec_len (cf->current_command) - cf->cursor;
+ for (j = cf->cursor; j > delta; j--)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ /* Zap from here to the end of what is currently displayed */
+ for (; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) " ", 1);
+ /* Get back to the start of the line */
+ for (j = 0; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+
+ j = vec_len (cf->current_command) - cf->cursor;
+ memmove (cf->current_command, cf->current_command + cf->cursor, j);
+ _vec_len (cf->current_command) = j;
+
+ /* Print the new contents */
+ unix_vlib_cli_output_cooked (cf, uf, cf->current_command, j);
+ /* Shimmy back to the start */
+ for (j = 0; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ cf->cursor = 0;
+
+ cf->search_mode = 0;
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_ERASELINERIGHT:
+ /* Erase the command from the cursor to the end */
+
+ /* Zap from cursor to end of what is currently displayed */
+ for (j = cf->cursor; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) " ", 1);
+ /* Get back to where we were */
+ for (j = cf->cursor; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+
+ /* Truncate the line at the cursor */
+ _vec_len (cf->current_command) = cf->cursor;
+
+ cf->search_mode = 0;
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_LEFT:
+ if (cf->cursor > 0)
+ {
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ cf->cursor--;
+ }
+
+ cf->search_mode = 0;
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_RIGHT:
+ if (cf->cursor < vec_len (cf->current_command))
+ {
+ /* have to emit the character under the cursor */
+ unix_vlib_cli_output_cooked (cf, uf,
+ cf->current_command + cf->cursor, 1);
+ cf->cursor++;
+ }
+
+ cf->search_mode = 0;
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_UP:
+ case UNIX_CLI_PARSE_ACTION_DOWN:
+ if (!cf->has_history || !cf->history_limit)
+ break;
+ cf->search_mode = 0;
+ /* Erase the command */
+ for (j = cf->cursor; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) " ", 1);
+ for (j = 0; j < (vec_len (cf->current_command)); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b \b", 3);
+ vec_reset_length (cf->current_command);
+ if (vec_len (cf->command_history))
+ {
+ if (action == UNIX_CLI_PARSE_ACTION_UP)
+ delta = -1;
+ else
+ delta = 1;
+
+ cf->excursion += delta;
+
+ if (cf->excursion == vec_len (cf->command_history))
+ {
+ /* down-arrowed to last entry - want a blank line */
+ _vec_len (cf->current_command) = 0;
+ }
+ else if (cf->excursion < 0)
+ {
+ /* up-arrowed over the start to the end, want a blank line */
+ cf->excursion = vec_len (cf->command_history);
+ _vec_len (cf->current_command) = 0;
+ }
+ else
+ {
+ if (cf->excursion > (i32) vec_len (cf->command_history) - 1)
+ /* down-arrowed past end - wrap to start */
+ cf->excursion = 0;
+
+ /* Print the command at the current position */
+ prev = cf->command_history[cf->excursion];
+ vec_validate (cf->current_command, vec_len (prev) - 1);
+
+ clib_memcpy (cf->current_command, prev, vec_len (prev));
+ _vec_len (cf->current_command) = vec_len (prev);
+ unix_vlib_cli_output_cooked (cf, uf, cf->current_command,
+ vec_len (cf->current_command));
+ }
+ cf->cursor = vec_len (cf->current_command);
+
+ break;
+ }
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_HOME:
+ if (vec_len (cf->current_command) && cf->cursor > 0)
+ {
+ while (cf->cursor)
+ {
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ cf->cursor--;
+ }
+ }
+
+ cf->search_mode = 0;
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_END:
+ if (vec_len (cf->current_command) &&
+ cf->cursor < vec_len (cf->current_command))
+ {
+ unix_vlib_cli_output_cooked (cf, uf,
+ cf->current_command + cf->cursor,
+ vec_len (cf->current_command) -
+ cf->cursor);
+ cf->cursor = vec_len (cf->current_command);
+ }
+
+ cf->search_mode = 0;
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_WORDLEFT:
+ if (vec_len (cf->current_command) && cf->cursor > 0)
+ {
+ j = cf->cursor;
+
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ j--;
+
+ while (j && isspace (cf->current_command[j]))
+ {
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ j--;
+ }
+ while (j && !isspace (cf->current_command[j]))
+ {
+ if (isspace (cf->current_command[j - 1]))
+ break;
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ j--;
+ }
+
+ cf->cursor = j;
+ }
+
+ cf->search_mode = 0;
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_WORDRIGHT:
+ if (vec_len (cf->current_command) &&
+ cf->cursor < vec_len (cf->current_command))
+ {
+ int e = vec_len (cf->current_command);
+ j = cf->cursor;
+ while (j < e && !isspace (cf->current_command[j]))
+ j++;
+ while (j < e && isspace (cf->current_command[j]))
+ j++;
+ unix_vlib_cli_output_cooked (cf, uf,
+ cf->current_command + cf->cursor,
+ j - cf->cursor);
+ cf->cursor = j;
+ }
+
+ cf->search_mode = 0;
+ break;
+
+
+ case UNIX_CLI_PARSE_ACTION_ERASE:
+ if (vec_len (cf->current_command))
+ {
+ if (cf->cursor == vec_len (cf->current_command))
+ {
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b \b", 3);
+ _vec_len (cf->current_command)--;
+ cf->cursor--;
+ }
+ else if (cf->cursor > 0)
+ {
+ /* shift everything at & to the right of the cursor left by 1 */
+ j = vec_len (cf->current_command) - cf->cursor;
+ memmove (cf->current_command + cf->cursor - 1,
+ cf->current_command + cf->cursor, j);
+ _vec_len (cf->current_command)--;
+ cf->cursor--;
+ /* redraw the rest of the line */
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ unix_vlib_cli_output_cooked (cf, uf,
+ cf->current_command + cf->cursor,
+ j);
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) " \b\b", 3);
+ /* and shift the terminal cursor back where it should be */
+ while (--j)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ }
+ }
+ cf->search_mode = 0;
+ cf->excursion = 0;
+ vec_reset_length (cf->search_key);
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_ERASERIGHT:
+ if (vec_len (cf->current_command))
+ {
+ if (cf->cursor < vec_len (cf->current_command))
+ {
+ /* shift everything to the right of the cursor left by 1 */
+ j = vec_len (cf->current_command) - cf->cursor - 1;
+ memmove (cf->current_command + cf->cursor,
+ cf->current_command + cf->cursor + 1, j);
+ _vec_len (cf->current_command)--;
+ /* redraw the rest of the line */
+ unix_vlib_cli_output_cooked (cf, uf,
+ cf->current_command + cf->cursor,
+ j);
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) " \b", 2);
+ /* and shift the terminal cursor back where it should be */
+ if (j)
+ {
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ while (--j)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+ }
+ }
+ }
+ else if (input == 'D' - '@')
+ {
+ /* ^D with no command entered = quit */
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "quit\n", 5);
+ vlib_process_signal_event (um->vlib_main,
+ vlib_current_process (um->vlib_main),
+ UNIX_CLI_PROCESS_EVENT_QUIT,
+ cf - cm->cli_file_pool);
+ }
+ cf->search_mode = 0;
+ cf->excursion = 0;
+ vec_reset_length (cf->search_key);
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_CLEAR:
+ /* If we're in ANSI mode, clear the screen.
+ * Then redraw the prompt and any existing command input, then put
+ * the cursor back where it was in that line.
+ */
+ if (cf->ansi_capable)
+ unix_vlib_cli_output_cooked (cf, uf,
+ (u8 *) ANSI_CLEAR,
+ sizeof (ANSI_CLEAR) - 1);
+ else
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+
+ unix_vlib_cli_output_raw (cf, uf,
+ cm->cli_prompt, vec_len (cm->cli_prompt));
+ unix_vlib_cli_output_raw (cf, uf,
+ cf->current_command,
+ vec_len (cf->current_command));
+ for (j = cf->cursor; j < vec_len (cf->current_command); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b", 1);
+
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_TAB:
+ case UNIX_CLI_PARSE_ACTION_YANK:
+ /* TODO */
+ break;
+
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_QUIT:
+ pager_quit:
+ unix_cli_pager_prompt_erase (cf, uf);
+ unix_cli_pager_reset (cf);
+ unix_cli_cli_prompt (cf, uf);
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_NEXT:
+ case UNIX_CLI_PARSE_ACTION_PAGER_PGDN:
+ /* show next page of the buffer */
+ if (cf->height + cf->pager_start < vec_len (cf->pager_index))
+ {
+ u8 *line = NULL;
+ unix_cli_pager_index_t *pi = NULL;
+
+ int m = cf->pager_start + (cf->height - 1);
+ unix_cli_pager_prompt_erase (cf, uf);
+ for (j = m;
+ j < vec_len (cf->pager_index) && cf->pager_start < m;
+ j++, cf->pager_start++)
+ {
+ pi = &cf->pager_index[j];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ }
+ /* if the last line didn't end in newline, add a newline */
+ if (pi && line[pi->length - 1] != '\n')
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ unix_cli_pager_prompt (cf, uf);
+ }
+ else
+ {
+ if (action == UNIX_CLI_PARSE_ACTION_PAGER_NEXT)
+ /* no more in buffer, exit, but only if it was <space> */
+ goto pager_quit;
+ }
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_DN:
+ case UNIX_CLI_PARSE_ACTION_PAGER_CRLF:
+ /* display the next line of the buffer */
+ if (cf->pager_start < vec_len (cf->pager_index) - (cf->height - 1))
+ {
+ u8 *line;
+ unix_cli_pager_index_t *pi;
+
+ unix_cli_pager_prompt_erase (cf, uf);
+ pi = &cf->pager_index[cf->pager_start + (cf->height - 1)];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ cf->pager_start++;
+ /* if the last line didn't end in newline, add a newline */
+ if (line[pi->length - 1] != '\n')
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ unix_cli_pager_prompt (cf, uf);
+ }
+ else
+ {
+ if (action == UNIX_CLI_PARSE_ACTION_PAGER_CRLF)
+ /* no more in buffer, exit, but only if it was <enter> */
+ goto pager_quit;
+ }
+
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_UP:
+ /* scroll the page back one line */
+ if (cf->pager_start > 0)
+ {
+ u8 *line = NULL;
+ unix_cli_pager_index_t *pi = NULL;
+
+ cf->pager_start--;
+ if (cf->ansi_capable)
+ {
+ pi = &cf->pager_index[cf->pager_start];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_cli_pager_prompt_erase (cf, uf);
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) ANSI_SCROLLDN,
+ sizeof (ANSI_SCROLLDN) - 1);
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) ANSI_SAVECURSOR,
+ sizeof (ANSI_SAVECURSOR) - 1);
+ unix_cli_ansi_cursor (cf, uf, 1, 1);
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) ANSI_CLEARLINE,
+ sizeof (ANSI_CLEARLINE) - 1);
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) ANSI_RESTCURSOR,
+ sizeof (ANSI_RESTCURSOR) - 1);
+ unix_cli_pager_prompt_erase (cf, uf);
+ unix_cli_pager_prompt (cf, uf);
+ }
+ else
+ {
+ int m = cf->pager_start + (cf->height - 1);
+ unix_cli_pager_prompt_erase (cf, uf);
+ for (j = cf->pager_start;
+ j < vec_len (cf->pager_index) && j < m; j++)
+ {
+ pi = &cf->pager_index[j];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ }
+ /* if the last line didn't end in newline, add a newline */
+ if (pi && line[pi->length - 1] != '\n')
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ unix_cli_pager_prompt (cf, uf);
+ }
+ }
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_TOP:
+ /* back to the first page of the buffer */
+ if (cf->pager_start > 0)
+ {
+ u8 *line = NULL;
+ unix_cli_pager_index_t *pi = NULL;
+
+ cf->pager_start = 0;
+ int m = cf->pager_start + (cf->height - 1);
+ unix_cli_pager_prompt_erase (cf, uf);
+ for (j = cf->pager_start; j < vec_len (cf->pager_index) && j < m;
+ j++)
+ {
+ pi = &cf->pager_index[j];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ }
+ /* if the last line didn't end in newline, add a newline */
+ if (pi && line[pi->length - 1] != '\n')
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ unix_cli_pager_prompt (cf, uf);
+ }
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_BOTTOM:
+ /* skip to the last page of the buffer */
+ if (cf->pager_start < vec_len (cf->pager_index) - (cf->height - 1))
+ {
+ u8 *line = NULL;
+ unix_cli_pager_index_t *pi = NULL;
+
+ cf->pager_start = vec_len (cf->pager_index) - (cf->height - 1);
+ unix_cli_pager_prompt_erase (cf, uf);
+ unix_cli_pager_message (cf, uf, "skipping", "\n");
+ for (j = cf->pager_start; j < vec_len (cf->pager_index); j++)
+ {
+ pi = &cf->pager_index[j];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ }
+ /* if the last line didn't end in newline, add a newline */
+ if (pi && line[pi->length - 1] != '\n')
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ unix_cli_pager_prompt (cf, uf);
+ }
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_PGUP:
+ /* wander back one page in the buffer */
+ if (cf->pager_start > 0)
+ {
+ u8 *line = NULL;
+ unix_cli_pager_index_t *pi = NULL;
+ int m;
+
+ if (cf->pager_start >= cf->height)
+ cf->pager_start -= cf->height - 1;
+ else
+ cf->pager_start = 0;
+ m = cf->pager_start + cf->height - 1;
+ unix_cli_pager_prompt_erase (cf, uf);
+ for (j = cf->pager_start; j < vec_len (cf->pager_index) && j < m;
+ j++)
+ {
+ pi = &cf->pager_index[j];
+ line = cf->pager_vector[pi->line] + pi->offset;
+ unix_vlib_cli_output_cooked (cf, uf, line, pi->length);
+ }
+ /* if the last line didn't end in newline, add a newline */
+ if (pi && line[pi->length - 1] != '\n')
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+ unix_cli_pager_prompt (cf, uf);
+ }
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_REDRAW:
+ /* Redraw the current pager screen */
+ unix_cli_pager_redraw (cf, uf);
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_PAGER_SEARCH:
+ /* search forwards in the buffer */
+ break;
+
+
+ case UNIX_CLI_PARSE_ACTION_CRLF:
+ crlf:
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\n", 1);
+
+ if (cf->has_history && cf->history_limit)
+ {
+ if (cf->command_history
+ && vec_len (cf->command_history) >= cf->history_limit)
+ {
+ vec_free (cf->command_history[0]);
+ vec_delete (cf->command_history, 1, 0);
+ }
+ /* Don't add blank lines to the cmd history */
+ if (vec_len (cf->current_command))
+ {
+ /* Don't duplicate the previous command */
+ j = vec_len (cf->command_history);
+ if (j == 0 ||
+ (vec_len (cf->current_command) !=
+ vec_len (cf->command_history[j - 1])
+ || memcmp (cf->current_command, cf->command_history[j - 1],
+ vec_len (cf->current_command)) != 0))
+ {
+ /* copy the command to the history */
+ u8 *c = 0;
+ vec_append (c, cf->current_command);
+ vec_add1 (cf->command_history, c);
+ cf->command_number++;
+ }
+ }
+ cf->excursion = vec_len (cf->command_history);
+ }
+
+ cf->search_mode = 0;
+ vec_reset_length (cf->search_key);
+ cf->cursor = 0;
+
+ return 0;
+
+ case UNIX_CLI_PARSE_ACTION_PARTIALMATCH:
+ case UNIX_CLI_PARSE_ACTION_NOMATCH:
+ if (vec_len (cf->pager_index))
+ {
+ /* no-op for now */
+ }
+ else if (cf->has_history && cf->search_mode && isprint (input))
+ {
+ int k, limit, offset;
+ u8 *item;
+
+ vec_add1 (cf->search_key, input);
+
+ search_again:
+ for (j = 0; j < vec_len (cf->command_history); j++)
+ {
+ if (cf->excursion > (i32) vec_len (cf->command_history) - 1)
+ cf->excursion = 0;
+ else if (cf->excursion < 0)
+ cf->excursion = vec_len (cf->command_history) - 1;
+
+ item = cf->command_history[cf->excursion];
+
+ limit = (vec_len (cf->search_key) > vec_len (item)) ?
+ vec_len (item) : vec_len (cf->search_key);
+
+ for (offset = 0; offset <= vec_len (item) - limit; offset++)
+ {
+ for (k = 0; k < limit; k++)
+ {
+ if (item[k + offset] != cf->search_key[k])
+ goto next_offset;
+ }
+ goto found_at_offset;
+
+ next_offset:
+ ;
+ }
+ goto next;
+
+ found_at_offset:
+ for (j = 0; j < vec_len (cf->current_command); j++)
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\b \b", 3);
+
+ vec_validate (cf->current_command, vec_len (item) - 1);
+ clib_memcpy (cf->current_command, item, vec_len (item));
+ _vec_len (cf->current_command) = vec_len (item);
+
+ unix_vlib_cli_output_cooked (cf, uf, cf->current_command,
+ vec_len (cf->current_command));
+ cf->cursor = vec_len (cf->current_command);
+ goto found;
+
+ next:
+ cf->excursion += cf->search_mode;
+ }
+
+ unix_vlib_cli_output_cooked (cf, uf, (u8 *) "\nNo match...", 12);
+ vec_reset_length (cf->search_key);
+ vec_reset_length (cf->current_command);
+ cf->search_mode = 0;
+ cf->cursor = 0;
+ goto crlf;
+ }
+ else if (isprint (input)) /* skip any errant control codes */
+ {
+ if (cf->cursor == vec_len (cf->current_command))
+ {
+ /* Append to end */
+ vec_add1 (cf->current_command, input);
+ cf->cursor++;
+
+ /* Echo the character back to the client */
+ unix_vlib_cli_output_raw (cf, uf, &input, 1);
+ }
+ else
+ {
+ /* Insert at cursor: resize +1 byte, move everything over */
+ j = vec_len (cf->current_command) - cf->cursor;
+ vec_add1 (cf->current_command, (u8) 'A');
+ memmove (cf->current_command + cf->cursor + 1,
+ cf->current_command + cf->cursor, j);
+ cf->current_command[cf->cursor] = input;
+ /* Redraw the line */
+ j++;
+ unix_vlib_cli_output_raw (cf, uf,
+ cf->current_command + cf->cursor, j);
+ /* Put terminal cursor back */
+ while (--j)
+ unix_vlib_cli_output_raw (cf, uf, (u8 *) "\b", 1);
+ cf->cursor++;
+ }
+ }
+ else
+ {
+ /* no-op - not printable or otherwise not actionable */
+ }
+
+ found:
+
+ break;
+
+ case UNIX_CLI_PARSE_ACTION_TELNETIAC:
+ break;
+ }
+ return 1;
+}
+
+/** @brief Process input bytes on a stream to provide line editing and
+ * command history in the CLI. */
+static int
+unix_cli_line_edit (unix_cli_main_t * cm,
+ unix_main_t * um, unix_cli_file_t * cf)
+{
+ unix_file_t *uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+ int i;
+
+ for (i = 0; i < vec_len (cf->input_vector); i++)
+ {
+ unix_cli_parse_action_t action;
+ i32 matched = 0;
+ unix_cli_parse_actions_t *a;
+
+ /* If we're in the pager mode, search the pager actions */
+ a =
+ vec_len (cf->pager_index) ? unix_cli_parse_pager :
+ unix_cli_parse_strings;
+
+ /* See if the input buffer is some sort of control code */
+ action = unix_cli_match_action (a, &cf->input_vector[i],
+ vec_len (cf->input_vector) - i,
+ &matched);
+
+ switch (action)
+ {
+ case UNIX_CLI_PARSE_ACTION_PARTIALMATCH:
+ if (i)
+ {
+ /* There was a partial match which means we need more bytes
+ * than the input buffer currently has.
+ * Since the bytes before here have been processed, shift
+ * the remaining contents to the start of the input buffer.
+ */
+ vec_delete (cf->input_vector, i, 0);
+ }
+ return 1; /* wait for more */
+
+ case UNIX_CLI_PARSE_ACTION_TELNETIAC:
+ /* process telnet options */
+ matched = unix_cli_process_telnet (um, cf, uf,
+ cf->input_vector + i,
+ vec_len (cf->input_vector) - i);
+ if (matched < 0)
+ {
+ if (i)
+ {
+ /* There was a partial match which means we need more bytes
+ * than the input buffer currently has.
+ * Since the bytes before here have been processed, shift
+ * the remaining contents to the start of the input buffer.
+ */
+ vec_delete (cf->input_vector, i, 0);
+ }
+ return 1; /* wait for more */
+ }
+ break;
+
+ default:
+ /* process the action */
+ if (!unix_cli_line_process_one (cm, um, cf, uf,
+ cf->input_vector[i], action))
+ {
+ /* CRLF found. Consume the bytes from the input_vector */
+ vec_delete (cf->input_vector, i + matched, 0);
+ /* And tell our caller to execute cf->input_command */
+ return 0;
+ }
+ }
+
+ i += matched;
+ }
+
+ vec_reset_length (cf->input_vector);
+ return 1;
+}
+
+/** @brief Process input to a CLI session. */
+static void
+unix_cli_process_input (unix_cli_main_t * cm, uword cli_file_index)
+{
+ unix_main_t *um = &unix_main;
+ unix_file_t *uf;
+ unix_cli_file_t *cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+ unformat_input_t input;
+ int vlib_parse_eval (u8 *);
+
+more:
+ /* Try vlibplex first. Someday... */
+ if (0 && vlib_parse_eval (cf->input_vector) == 0)
+ goto done;
+
+ if (cf->line_mode)
+ {
+ /* just treat whatever we got as a complete line of input */
+ cf->current_command = cf->input_vector;
+ }
+ else
+ {
+ /* Line edit, echo, etc. */
+ if (unix_cli_line_edit (cm, um, cf))
+ /* want more input */
+ return;
+ }
+
+ if (um->log_fd)
+ {
+ static u8 *lv;
+ vec_reset_length (lv);
+ lv = format (lv, "%U[%d]: %v",
+ format_timeval, 0 /* current bat-time */ ,
+ 0 /* current bat-format */ ,
+ cli_file_index, cf->input_vector);
+ {
+ int rv __attribute__ ((unused)) =
+ write (um->log_fd, lv, vec_len (lv));
+ }
+ }
+
+ /* Copy our input command to a new string */
+ unformat_init_vector (&input, cf->current_command);
+
+ /* Remove leading white space from input. */
+ (void) unformat (&input, "");
+
+ cm->current_input_file_index = cli_file_index;
+ cf->pager_start = 0; /* start a new pager session */
+
+ if (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ vlib_cli_input (um->vlib_main, &input, unix_vlib_cli_output,
+ cli_file_index);
+
+ /* Zero buffer since otherwise unformat_free will call vec_free on it. */
+ input.buffer = 0;
+
+ unformat_free (&input);
+
+ /* Re-fetch pointer since pool may have moved. */
+ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+ uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+
+done:
+ /* reset vector; we'll re-use it later */
+ if (cf->line_mode)
+ vec_reset_length (cf->input_vector);
+ else
+ vec_reset_length (cf->current_command);
+
+ if (cf->no_pager == 2)
+ {
+ /* Pager was programmatically disabled */
+ unix_cli_pager_message (cf, uf, "pager buffer overflowed", "\n");
+ cf->no_pager = um->cli_no_pager;
+ }
+
+ if (vec_len (cf->pager_index) == 0
+ || vec_len (cf->pager_index) < cf->height)
+ {
+ /* There was no need for the pager */
+ unix_cli_pager_reset (cf);
+
+ /* Prompt. */
+ unix_cli_cli_prompt (cf, uf);
+ }
+ else
+ {
+ /* Display the pager prompt */
+ unix_cli_pager_prompt (cf, uf);
+ }
+
+ /* Any residual data in the input vector? */
+ if (vec_len (cf->input_vector))
+ goto more;
+}
+
+/** Destroy a CLI session.
+ * @note If we destroy the @c stdin session this additionally signals
+ * the shutdown of VPP.
+ */
+static void
+unix_cli_kill (unix_cli_main_t * cm, uword cli_file_index)
+{
+ unix_main_t *um = &unix_main;
+ unix_cli_file_t *cf;
+ unix_file_t *uf;
+ int i;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+ uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+
+ /* Quit/EOF on stdin means quit program. */
+ if (uf->file_descriptor == UNIX_CLI_STDIN_FD)
+ clib_longjmp (&um->vlib_main->main_loop_exit, VLIB_MAIN_LOOP_EXIT_CLI);
+
+ vec_free (cf->current_command);
+ vec_free (cf->search_key);
+
+ for (i = 0; i < vec_len (cf->command_history); i++)
+ vec_free (cf->command_history[i]);
+
+ vec_free (cf->command_history);
+
+ unix_file_del (um, uf);
+
+ unix_cli_file_free (cf);
+ pool_put (cm->cli_file_pool, cf);
+}
+
+/** Handle system events. */
+static uword
+unix_cli_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+ uword i, *data = 0;
+
+ while (1)
+ {
+ unix_cli_process_event_type_t event_type;
+ vlib_process_wait_for_event (vm);
+ event_type = vlib_process_get_events (vm, &data);
+
+ switch (event_type)
+ {
+ case UNIX_CLI_PROCESS_EVENT_READ_READY:
+ for (i = 0; i < vec_len (data); i++)
+ unix_cli_process_input (cm, data[i]);
+ break;
+
+ case UNIX_CLI_PROCESS_EVENT_QUIT:
+ /* Kill this process. */
+ for (i = 0; i < vec_len (data); i++)
+ unix_cli_kill (cm, data[i]);
+ goto done;
+ }
+
+ if (data)
+ _vec_len (data) = 0;
+ }
+
+done:
+ vec_free (data);
+
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+
+ /* Add node index so we can re-use this process later. */
+ vec_add1 (cm->unused_cli_process_node_indices, rt->node_index);
+
+ return 0;
+}
+
+/** Called when a CLI session file descriptor can be written to without
+ * blocking. */
+static clib_error_t *
+unix_cli_write_ready (unix_file_t * uf)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ int n;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, uf->private_data);
+
+ /* Flush output vector. */
+ n = write (uf->file_descriptor,
+ cf->output_vector, vec_len (cf->output_vector));
+
+ if (n < 0 && errno != EAGAIN)
+ return clib_error_return_unix (0, "write");
+
+ else if (n > 0)
+ unix_cli_del_pending_output (uf, cf, n);
+
+ return /* no error */ 0;
+}
+
+/** Called when a CLI session file descriptor has data to be read. */
+static clib_error_t *
+unix_cli_read_ready (unix_file_t * uf)
+{
+ unix_main_t *um = &unix_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ uword l;
+ int n, n_read, n_try;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, uf->private_data);
+
+ n = n_try = 4096;
+ while (n == n_try)
+ {
+ l = vec_len (cf->input_vector);
+ vec_resize (cf->input_vector, l + n_try);
+
+ n = read (uf->file_descriptor, cf->input_vector + l, n_try);
+
+ /* Error? */
+ if (n < 0 && errno != EAGAIN)
+ return clib_error_return_unix (0, "read");
+
+ n_read = n < 0 ? 0 : n;
+ _vec_len (cf->input_vector) = l + n_read;
+ }
+
+ if (!(n < 0))
+ vlib_process_signal_event (um->vlib_main,
+ cf->process_node_index,
+ (n_read == 0
+ ? UNIX_CLI_PROCESS_EVENT_QUIT
+ : UNIX_CLI_PROCESS_EVENT_READ_READY),
+ /* event data */ uf->private_data);
+
+ return /* no error */ 0;
+}
+
+/** Store a new CLI session.
+ * @param name The name of the session.
+ * @param fd The file descriptor for the session I/O.
+ * @return The session ID.
+ */
+static u32
+unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd)
+{
+ unix_main_t *um = &unix_main;
+ unix_cli_file_t *cf;
+ unix_file_t template = { 0 };
+ vlib_main_t *vm = um->vlib_main;
+ vlib_node_t *n;
+
+ name = (char *) format (0, "unix-cli-%s", name);
+
+ if (vec_len (cm->unused_cli_process_node_indices) > 0)
+ {
+ uword l = vec_len (cm->unused_cli_process_node_indices);
+
+ /* Find node and give it new name. */
+ n = vlib_get_node (vm, cm->unused_cli_process_node_indices[l - 1]);
+ vec_free (n->name);
+ n->name = (u8 *) name;
+
+ vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
+
+ _vec_len (cm->unused_cli_process_node_indices) = l - 1;
+ }
+ else
+ {
+ static vlib_node_registration_t r = {
+ .function = unix_cli_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 16,
+ };
+
+ r.name = name;
+ vlib_register_node (vm, &r);
+ vec_free (name);
+
+ n = vlib_get_node (vm, r.index);
+ }
+
+ pool_get (cm->cli_file_pool, cf);
+ memset (cf, 0, sizeof (*cf));
+
+ template.read_function = unix_cli_read_ready;
+ template.write_function = unix_cli_write_ready;
+ template.file_descriptor = fd;
+ template.private_data = cf - cm->cli_file_pool;
+
+ cf->process_node_index = n->index;
+ cf->unix_file_index = unix_file_add (um, &template);
+ cf->output_vector = 0;
+ cf->input_vector = 0;
+
+ vlib_start_process (vm, n->runtime_index);
+
+ vlib_process_t *p = vlib_get_process_from_node (vm, n);
+ p->output_function = unix_vlib_cli_output;
+ p->output_function_arg = cf - cm->cli_file_pool;
+
+ return cf - cm->cli_file_pool;
+}
+
+/** Telnet listening socket has a new connection. */
+static clib_error_t *
+unix_cli_listen_read_ready (unix_file_t * uf)
+{
+ unix_main_t *um = &unix_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ clib_socket_t *s = &um->cli_listen_socket;
+ clib_socket_t client;
+ char *client_name;
+ clib_error_t *error;
+ unix_cli_file_t *cf;
+ u32 cf_index;
+
+ error = clib_socket_accept (s, &client);
+ if (error)
+ return error;
+
+ client_name = (char *) format (0, "%U%c", format_sockaddr, &client.peer, 0);
+
+ cf_index = unix_cli_file_add (cm, client_name, client.fd);
+ cf = pool_elt_at_index (cm->cli_file_pool, cf_index);
+
+ /* No longer need CLIB version of socket. */
+ clib_socket_free (&client);
+
+ vec_free (client_name);
+
+ /* if we're supposed to run telnet session in character mode (default) */
+ if (um->cli_line_mode == 0)
+ {
+ /*
+ * Set telnet client character mode, echo on, suppress "go-ahead".
+ * Technically these should be negotiated, but this works.
+ */
+ u8 charmode_option[] = {
+ IAC, WONT, TELOPT_LINEMODE, /* server will do char-by-char */
+ IAC, DONT, TELOPT_LINEMODE, /* client should do char-by-char */
+ IAC, WILL, TELOPT_SGA, /* server willl supress GA */
+ IAC, DO, TELOPT_SGA, /* client should supress Go Ahead */
+ IAC, WILL, TELOPT_ECHO, /* server will do echo */
+ IAC, DONT, TELOPT_ECHO, /* client should not echo */
+ IAC, DO, TELOPT_TTYPE, /* client should tell us its term type */
+ IAC, SB, TELOPT_TTYPE, 1, IAC, SE, /* now tell me ttype */
+ IAC, DO, TELOPT_NAWS, /* client should tell us its window sz */
+ IAC, SB, TELOPT_NAWS, 1, IAC, SE, /* now tell me window size */
+ };
+
+ /* Enable history on this CLI */
+ cf->history_limit = um->cli_history_limit;
+ cf->has_history = cf->history_limit != 0;
+
+ /* Make sure this session is in line mode */
+ cf->line_mode = 0;
+
+ /* We need CRLF */
+ cf->crlf_mode = 1;
+
+ /* Setup the pager */
+ cf->no_pager = um->cli_no_pager;
+
+ uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+
+ /* Send the telnet options */
+ unix_vlib_cli_output_raw (cf, uf, charmode_option,
+ ARRAY_LEN (charmode_option));
+
+ /* In case the client doesn't negotiate terminal type, use
+ * a timer to kick off the initial prompt. */
+ timer_call (unix_cli_file_welcome_timer, cf_index, 1);
+ }
+
+ return error;
+}
+
+/** The system terminal has informed us that the window size
+ * has changed.
+ */
+static void
+unix_cli_resize_interrupt (int signum)
+{
+ unix_main_t *um = &unix_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf = pool_elt_at_index (cm->cli_file_pool,
+ cm->stdin_cli_file_index);
+ unix_file_t *uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+ struct winsize ws;
+ (void) signum;
+
+ /* Terminal resized, fetch the new size */
+ if (ioctl (UNIX_CLI_STDIN_FD, TIOCGWINSZ, &ws) < 0)
+ {
+ /* "Should never happen..." */
+ clib_unix_warning ("TIOCGWINSZ");
+ /* We can't trust ws.XXX... */
+ return;
+ }
+ cf->width = ws.ws_col;
+ cf->height = ws.ws_row;
+
+ /* Reindex the pager buffer */
+ unix_cli_pager_reindex (cf);
+
+ /* Redraw the page */
+ unix_cli_pager_redraw (cf, uf);
+}
+
+/** Handle configuration directives in the @em unix section. */
+static clib_error_t *
+unix_cli_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ unix_main_t *um = &unix_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ int flags;
+ clib_error_t *error = 0;
+ unix_cli_file_t *cf;
+ u32 cf_index;
+ struct termios tio;
+ struct sigaction sa;
+ struct winsize ws;
+ u8 *term;
+
+ /* We depend on unix flags being set. */
+ if ((error = vlib_call_config_function (vm, unix_config)))
+ return error;
+
+ if (um->flags & UNIX_FLAG_INTERACTIVE)
+ {
+ /* Set stdin to be non-blocking. */
+ if ((flags = fcntl (UNIX_CLI_STDIN_FD, F_GETFL, 0)) < 0)
+ flags = 0;
+ (void) fcntl (UNIX_CLI_STDIN_FD, F_SETFL, flags | O_NONBLOCK);
+
+ cf_index = unix_cli_file_add (cm, "stdin", UNIX_CLI_STDIN_FD);
+ cf = pool_elt_at_index (cm->cli_file_pool, cf_index);
+ cm->stdin_cli_file_index = cf_index;
+
+ /* If stdin is a tty and we are using chacracter mode, enable
+ * history on the CLI and set the tty line discipline accordingly. */
+ if (isatty (UNIX_CLI_STDIN_FD) && um->cli_line_mode == 0)
+ {
+ /* Capture terminal resize events */
+ memset (&sa, 0, sizeof (sa));
+ sa.sa_handler = unix_cli_resize_interrupt;
+ if (sigaction (SIGWINCH, &sa, 0) < 0)
+ clib_panic ("sigaction");
+
+ /* Retrieve the current terminal size */
+ ioctl (UNIX_CLI_STDIN_FD, TIOCGWINSZ, &ws);
+ cf->width = ws.ws_col;
+ cf->height = ws.ws_row;
+
+ if (cf->width == 0 || cf->height == 0)
+ /* We have a tty, but no size. Stick to line mode. */
+ goto notty;
+
+ /* Setup the history */
+ cf->history_limit = um->cli_history_limit;
+ cf->has_history = cf->history_limit != 0;
+
+ /* Setup the pager */
+ cf->no_pager = um->cli_no_pager;
+
+ /* We're going to be in char by char mode */
+ cf->line_mode = 0;
+
+ /* Save the original tty state so we can restore it later */
+ tcgetattr (UNIX_CLI_STDIN_FD, &um->tio_stdin);
+ um->tio_isset = 1;
+
+ /* Tweak the tty settings */
+ tio = um->tio_stdin;
+ /* echo off, canonical mode off, ext'd input processing off */
+ tio.c_lflag &= ~(ECHO | ICANON | IEXTEN);
+ tio.c_cc[VMIN] = 1; /* 1 byte at a time */
+ tio.c_cc[VTIME] = 0; /* no timer */
+ tcsetattr (UNIX_CLI_STDIN_FD, TCSAFLUSH, &tio);
+
+ /* See if we can do ANSI/VT100 output */
+ term = (u8 *) getenv ("TERM");
+ if (term != NULL)
+ cf->ansi_capable = unix_cli_terminal_type (term,
+ strlen ((char *)
+ term));
+ }
+ else
+ {
+ notty:
+ /* No tty, so make sure these things are off */
+ cf->no_pager = 1;
+ cf->history_limit = 0;
+ cf->has_history = 0;
+ cf->line_mode = 1;
+ }
+
+ /* Send banner and initial prompt */
+ unix_cli_file_welcome (cm, cf);
+ }
+
+ /* If we have socket config, LISTEN, otherwise, don't */
+ clib_socket_t *s = &um->cli_listen_socket;
+ if (s->config && s->config[0] != 0)
+ {
+ /* CLI listen. */
+ unix_file_t template = { 0 };
+
+ s->flags = SOCKET_IS_SERVER; /* listen, don't connect */
+ error = clib_socket_init (s);
+
+ if (error)
+ return error;
+
+ template.read_function = unix_cli_listen_read_ready;
+ template.file_descriptor = s->fd;
+
+ unix_file_add (um, &template);
+ }
+
+ /* Set CLI prompt. */
+ if (!cm->cli_prompt)
+ cm->cli_prompt = format (0, "VLIB: ");
+
+ return 0;
+}
+
+/*?
+ * This module has no configurable parameters.
+?*/
+VLIB_CONFIG_FUNCTION (unix_cli_config, "unix-cli");
+
+/** Called when VPP is shutting down, this restores the system
+ * terminal state if previously saved.
+ */
+static clib_error_t *
+unix_cli_exit (vlib_main_t * vm)
+{
+ unix_main_t *um = &unix_main;
+
+ /* If stdin is a tty and we saved the tty state, reset the tty state */
+ if (isatty (UNIX_CLI_STDIN_FD) && um->tio_isset)
+ tcsetattr (UNIX_CLI_STDIN_FD, TCSAFLUSH, &um->tio_stdin);
+
+ return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (unix_cli_exit);
+
+/** Set the CLI prompt.
+ * @param prompt The C string to set the prompt to.
+ * @note This setting is global; it impacts all current
+ * and future CLI sessions.
+ */
+void
+vlib_unix_cli_set_prompt (char *prompt)
+{
+ char *fmt = (prompt[strlen (prompt) - 1] == ' ') ? "%s" : "%s ";
+ unix_cli_main_t *cm = &unix_cli_main;
+ if (cm->cli_prompt)
+ vec_free (cm->cli_prompt);
+ cm->cli_prompt = format (0, fmt, prompt);
+}
+
+/** CLI command to quit the terminal session.
+ * @note If this is a stdin session then this will
+ * shutdown VPP also.
+ */
+static clib_error_t *
+unix_cli_quit (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+
+ vlib_process_signal_event (vm,
+ vlib_current_process (vm),
+ UNIX_CLI_PROCESS_EVENT_QUIT,
+ cm->current_input_file_index);
+ return 0;
+}
+
+/*?
+ * Terminates the current CLI session.
+ *
+ * If VPP is running in @em interactive mode and this is the console session
+ * (that is, the session on @c stdin) then this will also terminate VPP.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (unix_cli_quit_command, static) = {
+ .path = "quit",
+ .short_help = "Exit CLI",
+ .function = unix_cli_quit,
+};
+/* *INDENT-ON* */
+
+/** CLI command to execute a VPP command script. */
+static clib_error_t *
+unix_cli_exec (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ char *file_name;
+ int fd;
+ unformat_input_t sub_input;
+ clib_error_t *error;
+
+ file_name = 0;
+ fd = -1;
+ error = 0;
+
+ if (!unformat (input, "%s", &file_name))
+ {
+ error = clib_error_return (0, "expecting file name, got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ fd = open (file_name, O_RDONLY);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "failed to open `%s'", file_name);
+ goto done;
+ }
+
+ /* Make sure its a regular file. */
+ {
+ struct stat s;
+
+ if (fstat (fd, &s) < 0)
+ {
+ error = clib_error_return_unix (0, "failed to stat `%s'", file_name);
+ goto done;
+ }
+
+ if (!(S_ISREG (s.st_mode) || S_ISLNK (s.st_mode)))
+ {
+ error = clib_error_return (0, "not a regular file `%s'", file_name);
+ goto done;
+ }
+ }
+
+ unformat_init_unix_file (&sub_input, fd);
+
+ vlib_cli_input (vm, &sub_input, 0, 0);
+ unformat_free (&sub_input);
+
+done:
+ if (fd > 0)
+ close (fd);
+ vec_free (file_name);
+
+ return error;
+}
+
+/*?
+ * Executes a sequence of CLI commands which are read from a file.
+ *
+ * If a command is unrecognised or otherwise invalid then the usual CLI
+ * feedback will be generated, however execution of subsequent commands
+ * from the file will continue.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_exec, static) = {
+ .path = "exec",
+ .short_help = "Execute commands from file",
+ .function = unix_cli_exec,
+ .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+/** CLI command to show various unix error statistics. */
+static clib_error_t *
+unix_show_errors (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unix_main_t *um = &unix_main;
+ clib_error_t *error = 0;
+ int i, n_errors_to_show;
+ unix_error_history_t *unix_errors = 0;
+
+ n_errors_to_show = 1 << 30;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (!unformat (input, "%d", &n_errors_to_show))
+ {
+ error =
+ clib_error_return (0,
+ "expecting integer number of errors to show, got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ n_errors_to_show =
+ clib_min (ARRAY_LEN (um->error_history), n_errors_to_show);
+
+ i =
+ um->error_history_index >
+ 0 ? um->error_history_index - 1 : ARRAY_LEN (um->error_history) - 1;
+
+ while (n_errors_to_show > 0)
+ {
+ unix_error_history_t *eh = um->error_history + i;
+
+ if (!eh->error)
+ break;
+
+ vec_add1 (unix_errors, eh[0]);
+ n_errors_to_show -= 1;
+ if (i == 0)
+ i = ARRAY_LEN (um->error_history) - 1;
+ else
+ i--;
+ }
+
+ if (vec_len (unix_errors) == 0)
+ vlib_cli_output (vm, "no Unix errors so far");
+ else
+ {
+ vlib_cli_output (vm, "%Ld total errors seen", um->n_total_errors);
+ for (i = vec_len (unix_errors) - 1; i >= 0; i--)
+ {
+ unix_error_history_t *eh = vec_elt_at_index (unix_errors, i);
+ vlib_cli_output (vm, "%U: %U",
+ format_time_interval, "h:m:s:u", eh->time,
+ format_clib_error, eh->error);
+ }
+ vlib_cli_output (vm, "%U: time now",
+ format_time_interval, "h:m:s:u", vlib_time_now (vm));
+ }
+
+done:
+ vec_free (unix_errors);
+ return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_unix_show_errors, static) = {
+ .path = "show unix-errors",
+ .short_help = "Show Unix system call error history",
+ .function = unix_show_errors,
+};
+/* *INDENT-ON* */
+
+/** CLI command to show session command history. */
+static clib_error_t *
+unix_cli_show_history (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ int i, j;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index);
+
+ if (cf->has_history && cf->history_limit)
+ {
+ i = 1 + cf->command_number - vec_len (cf->command_history);
+ for (j = 0; j < vec_len (cf->command_history); j++)
+ vlib_cli_output (vm, "%d %v\n", i + j, cf->command_history[j]);
+ }
+ else
+ {
+ vlib_cli_output (vm, "History not enabled.\n");
+ }
+
+ return 0;
+}
+
+/*?
+ * Displays the command history for the current session, if any.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_unix_cli_show_history, static) = {
+ .path = "history",
+ .short_help = "Show current session command history",
+ .function = unix_cli_show_history,
+};
+/* *INDENT-ON* */
+
+/** CLI command to show terminal status. */
+static clib_error_t *
+unix_cli_show_terminal (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ unix_main_t *um = &unix_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ vlib_node_t *n;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index);
+ n = vlib_get_node (vm, cf->process_node_index);
+
+ vlib_cli_output (vm, "Terminal name: %v\n", n->name);
+ vlib_cli_output (vm, "Terminal mode: %s\n", cf->line_mode ?
+ "line-by-line" : "char-by-char");
+ vlib_cli_output (vm, "Terminal width: %d\n", cf->width);
+ vlib_cli_output (vm, "Terminal height: %d\n", cf->height);
+ vlib_cli_output (vm, "ANSI capable: %s\n",
+ cf->ansi_capable ? "yes" : "no");
+ vlib_cli_output (vm, "History enabled: %s%s\n",
+ cf->has_history ? "yes" : "no", !cf->has_history
+ || cf->history_limit ? "" :
+ " (disabled by history limit)");
+ if (cf->has_history)
+ vlib_cli_output (vm, "History limit: %d\n", cf->history_limit);
+ vlib_cli_output (vm, "Pager enabled: %s%s%s\n",
+ cf->no_pager ? "no" : "yes",
+ cf->no_pager
+ || cf->height ? "" : " (disabled by terminal height)",
+ cf->no_pager
+ || um->cli_pager_buffer_limit ? "" :
+ " (disabled by buffer limit)");
+ if (!cf->no_pager)
+ vlib_cli_output (vm, "Pager limit: %d\n", um->cli_pager_buffer_limit);
+ vlib_cli_output (vm, "CRLF mode: %s\n",
+ cf->crlf_mode ? "CR+LF" : "LF");
+
+ return 0;
+}
+
+/*?
+ * Displays various information about the state of the current terminal
+ * session.
+ *
+ * @cliexpar
+ * @cliexstart{show terminal}
+ * Terminal name: unix-cli-stdin
+ * Terminal mode: char-by-char
+ * Terminal width: 123
+ * Terminal height: 48
+ * ANSI capable: yes
+ * History enabled: yes
+ * History limit: 50
+ * Pager enabled: yes
+ * Pager limit: 100000
+ * CRLF mode: LF
+ * @cliexend
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_unix_cli_show_terminal, static) = {
+ .path = "show terminal",
+ .short_help = "Show current session terminal settings",
+ .function = unix_cli_show_terminal,
+};
+/* *INDENT-ON* */
+
+/** CLI command to set terminal pager settings. */
+static clib_error_t *
+unix_cli_set_terminal_pager (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unix_main_t *um = &unix_main;
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ unformat_input_t _line_input, *line_input = &_line_input;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on"))
+ cf->no_pager = 0;
+ else if (unformat (line_input, "off"))
+ cf->no_pager = 1;
+ else if (unformat (line_input, "limit %u", &um->cli_pager_buffer_limit))
+ vlib_cli_output (vm,
+ "Pager limit set to %u lines; note, this is global.\n",
+ um->cli_pager_buffer_limit);
+ else
+ return clib_error_return (0, "unknown parameter: `%U`",
+ format_unformat_error, line_input);
+ }
+
+ unformat_free (line_input);
+
+ return 0;
+}
+
+/*?
+ * Enables or disables the terminal pager for this session. Generally
+ * this defaults to enabled.
+ *
+ * Additionally allows the pager buffer size to be set; though note that
+ * this value is set globally and not per session.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_unix_cli_set_terminal_pager, static) = {
+ .path = "set terminal pager",
+ .short_help = "set terminal pager [on|off] [limit <lines>]",
+ .function = unix_cli_set_terminal_pager,
+};
+/* *INDENT-ON* */
+
+/** CLI command to set terminal history settings. */
+static clib_error_t *
+unix_cli_set_terminal_history (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+ unformat_input_t _line_input, *line_input = &_line_input;
+ u32 limit;
+
+ if (!unformat_user (input, unformat_line_input, line_input))
+ return 0;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index);
+
+ while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (line_input, "on"))
+ cf->has_history = 1;
+ else if (unformat (line_input, "off"))
+ cf->has_history = 0;
+ else if (unformat (line_input, "limit %u", &cf->history_limit))
+ ;
+ else
+ return clib_error_return (0, "unknown parameter: `%U`",
+ format_unformat_error, line_input);
+
+ /* If we reduced history size, or turned it off, purge the history */
+ limit = cf->has_history ? cf->history_limit : 0;
+
+ while (cf->command_history && vec_len (cf->command_history) >= limit)
+ {
+ vec_free (cf->command_history[0]);
+ vec_delete (cf->command_history, 1, 0);
+ }
+ }
+
+ unformat_free (line_input);
+
+ return 0;
+}
+
+/*?
+ * Enables or disables the command history function of the current
+ * terminal. Generally this defaults to enabled.
+ *
+ * This command also allows the maximum size of the history buffer for
+ * this session to be altered.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_unix_cli_set_terminal_history, static) = {
+ .path = "set terminal history",
+ .short_help = "set terminal history [on|off] [limit <lines>]",
+ .function = unix_cli_set_terminal_history,
+};
+/* *INDENT-ON* */
+
+/** CLI command to set terminal ANSI settings. */
+static clib_error_t *
+unix_cli_set_terminal_ansi (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unix_cli_main_t *cm = &unix_cli_main;
+ unix_cli_file_t *cf;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index);
+
+ if (unformat (input, "on"))
+ cf->ansi_capable = 1;
+ else if (unformat (input, "off"))
+ cf->ansi_capable = 0;
+ else
+ return clib_error_return (0, "unknown parameter: `%U`",
+ format_unformat_error, input);
+
+ return 0;
+}
+
+/*?
+ * Enables or disables the use of ANSI control sequences by this terminal.
+ * The default will vary based on terminal detection at the start of the
+ * session.
+ *
+ * ANSI control sequences are used in a small number of places to provide,
+ * for example, color text output and to control the cursor in the pager.
+?*/
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (cli_unix_cli_set_terminal_ansi, static) = {
+ .path = "set terminal ansi",
+ .short_help = "set terminal ansi [on|off]",
+ .function = unix_cli_set_terminal_ansi,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+unix_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (unix_cli_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/dir.dox b/src/vlib/unix/dir.dox
new file mode 100644
index 00000000000..1380fa56b37
--- /dev/null
+++ b/src/vlib/unix/dir.dox
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Comcast Cable Communications Management, LLC.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Doxygen directory documentation */
+
+/**
+@dir
+@brief VLIB Unix interface
+
+VLIB application library Unix interface layer.
+
+*/
+/*? %%clicmd:group_label Unix Interface %% ?*/
+/*? %%syscfg:group_label Unix Interface %% ?*/
+
diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c
new file mode 100644
index 00000000000..07096ed27dc
--- /dev/null
+++ b/src/vlib/unix/input.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * input.c: Unix file input
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <signal.h>
+
+/* FIXME autoconf */
+#define HAVE_LINUX_EPOLL
+
+#ifdef HAVE_LINUX_EPOLL
+
+#include <sys/epoll.h>
+
+typedef struct
+{
+ int epoll_fd;
+ struct epoll_event *epoll_events;
+
+ /* Statistics. */
+ u64 epoll_files_ready;
+ u64 epoll_waits;
+} linux_epoll_main_t;
+
+static linux_epoll_main_t linux_epoll_main;
+
+static void
+linux_epoll_file_update (unix_file_t * f, unix_file_update_type_t update_type)
+{
+ unix_main_t *um = &unix_main;
+ linux_epoll_main_t *em = &linux_epoll_main;
+ struct epoll_event e;
+
+ memset (&e, 0, sizeof (e));
+
+ e.events = EPOLLIN;
+ if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE)
+ e.events |= EPOLLOUT;
+ if (f->flags & UNIX_FILE_EVENT_EDGE_TRIGGERED)
+ e.events |= EPOLLET;
+ e.data.u32 = f - um->file_pool;
+
+ if (epoll_ctl (em->epoll_fd,
+ (update_type == UNIX_FILE_UPDATE_ADD
+ ? EPOLL_CTL_ADD
+ : (update_type == UNIX_FILE_UPDATE_MODIFY
+ ? EPOLL_CTL_MOD
+ : EPOLL_CTL_DEL)), f->file_descriptor, &e) < 0)
+ clib_warning ("epoll_ctl");
+}
+
+static uword
+linux_epoll_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+ unix_main_t *um = &unix_main;
+ linux_epoll_main_t *em = &linux_epoll_main;
+ struct epoll_event *e;
+ int n_fds_ready;
+
+ {
+ vlib_node_main_t *nm = &vm->node_main;
+ u64 t = nm->cpu_time_next_process_ready;
+ f64 timeout;
+ int timeout_ms, max_timeout_ms = 10;
+ f64 vector_rate = vlib_last_vectors_per_main_loop (vm);
+
+ if (t == ~0ULL)
+ {
+ timeout = 10e-3;
+ timeout_ms = max_timeout_ms;
+ }
+ else
+ {
+ timeout =
+ (((i64) t - (i64) clib_cpu_time_now ())
+ * vm->clib_time.seconds_per_clock)
+ /* subtract off some slop time */ - 50e-6;
+
+ if (timeout < 1e3)
+ {
+ /* We have event happenning in less than 1 ms so
+ don't allow epoll to wait */
+ timeout_ms = 0;
+ }
+ else
+ {
+ timeout_ms = timeout * 1e3;
+
+ /* Must be between 1 and 10 ms. */
+ timeout_ms = clib_max (1, timeout_ms);
+ timeout_ms = clib_min (max_timeout_ms, timeout_ms);
+ }
+ }
+
+ /* If we still have input nodes polling (e.g. vnet packet generator)
+ don't sleep. */
+ if (nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] > 0)
+ timeout_ms = 0;
+
+ /*
+ * When busy: don't wait & only epoll for input
+ * every 1024 times through main loop.
+ */
+ if (vector_rate > 1 || vm->api_queue_nonempty)
+ {
+ timeout_ms = 0;
+ node->input_main_loops_per_call = 1024;
+ }
+ else
+ /* We're not busy; go to sleep for a while. */
+ node->input_main_loops_per_call = 0;
+
+ /* Allow any signal to wakeup our sleep. */
+ {
+ static sigset_t unblock_all_signals;
+ n_fds_ready = epoll_pwait (em->epoll_fd,
+ em->epoll_events,
+ vec_len (em->epoll_events),
+ timeout_ms, &unblock_all_signals);
+
+ /* This kludge is necessary to run over absurdly old kernels */
+ if (n_fds_ready < 0 && errno == ENOSYS)
+ {
+ n_fds_ready = epoll_wait (em->epoll_fd,
+ em->epoll_events,
+ vec_len (em->epoll_events), timeout_ms);
+ }
+ }
+ }
+
+ if (n_fds_ready < 0)
+ {
+ if (unix_error_is_fatal (errno))
+ vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait"));
+
+ /* non fatal error (e.g. EINTR). */
+ return 0;
+ }
+
+ em->epoll_waits += 1;
+ em->epoll_files_ready += n_fds_ready;
+
+ for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++)
+ {
+ u32 i = e->data.u32;
+ unix_file_t *f = pool_elt_at_index (um->file_pool, i);
+ clib_error_t *errors[4];
+ int n_errors = 0;
+
+ if (PREDICT_TRUE (!(e->events & EPOLLERR)))
+ {
+ if (e->events & EPOLLIN)
+ {
+ errors[n_errors] = f->read_function (f);
+ n_errors += errors[n_errors] != 0;
+ }
+ if (e->events & EPOLLOUT)
+ {
+ errors[n_errors] = f->write_function (f);
+ n_errors += errors[n_errors] != 0;
+ }
+ }
+ else
+ {
+ if (f->error_function)
+ {
+ errors[n_errors] = f->error_function (f);
+ n_errors += errors[n_errors] != 0;
+ }
+ else
+ close (f->file_descriptor);
+ }
+
+ ASSERT (n_errors < ARRAY_LEN (errors));
+ for (i = 0; i < n_errors; i++)
+ {
+ unix_save_error (um, errors[i]);
+ }
+ }
+
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (linux_epoll_input_node,static) = {
+ .function = linux_epoll_input,
+ .type = VLIB_NODE_TYPE_PRE_INPUT,
+ .name = "unix-epoll-input",
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+linux_epoll_input_init (vlib_main_t * vm)
+{
+ linux_epoll_main_t *em = &linux_epoll_main;
+ unix_main_t *um = &unix_main;
+
+ /* Allocate some events. */
+ vec_resize (em->epoll_events, VLIB_FRAME_SIZE);
+
+ em->epoll_fd = epoll_create (vec_len (em->epoll_events));
+ if (em->epoll_fd < 0)
+ return clib_error_return_unix (0, "epoll_create");
+
+ um->file_update = linux_epoll_file_update;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (linux_epoll_input_init);
+
+#endif /* HAVE_LINUX_EPOLL */
+
+static clib_error_t *
+unix_input_init (vlib_main_t * vm)
+{
+ return vlib_call_init_function (vm, linux_epoll_input_init);
+}
+
+VLIB_INIT_FUNCTION (unix_input_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c
new file mode 100644
index 00000000000..562778e0e5d
--- /dev/null
+++ b/src/vlib/unix/main.c
@@ -0,0 +1,557 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * main.c: Unix main routine
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/unix/plugin.h>
+
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+/** Default CLI pager limit is not configured in startup.conf */
+#define UNIX_CLI_DEFAULT_PAGER_LIMIT 100000
+
+/** Default CLI history depth if not configured in startup.conf */
+#define UNIX_CLI_DEFAULT_HISTORY 50
+
+
+unix_main_t unix_main;
+
+static clib_error_t *
+unix_main_init (vlib_main_t * vm)
+{
+ unix_main_t *um = &unix_main;
+ um->vlib_main = vm;
+ return vlib_call_init_function (vm, unix_input_init);
+}
+
+VLIB_INIT_FUNCTION (unix_main_init);
+
+static void
+unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc)
+{
+ uword fatal;
+ u8 *msg = 0;
+
+ msg = format (msg, "received signal %U, PC %U",
+ format_signal, signum, format_ucontext_pc, uc);
+
+ if (signum == SIGSEGV)
+ msg = format (msg, ", faulting address %p", si->si_addr);
+
+ switch (signum)
+ {
+ /* these (caught) signals cause the application to exit */
+ case SIGTERM:
+ if (unix_main.vlib_main->main_loop_exit_set)
+ {
+ syslog (LOG_ERR | LOG_DAEMON, "received SIGTERM, exiting...");
+
+ clib_longjmp (&unix_main.vlib_main->main_loop_exit,
+ VLIB_MAIN_LOOP_EXIT_CLI);
+ }
+ /* fall through */
+ case SIGQUIT:
+ case SIGINT:
+ case SIGILL:
+ case SIGBUS:
+ case SIGSEGV:
+ case SIGHUP:
+ case SIGFPE:
+ fatal = 1;
+ break;
+
+ /* by default, print a message and continue */
+ default:
+ fatal = 0;
+ break;
+ }
+
+ /* Null terminate. */
+ vec_add1 (msg, 0);
+
+ if (fatal)
+ {
+ syslog (LOG_ERR | LOG_DAEMON, "%s", msg);
+ os_exit (1);
+ }
+ else
+ clib_warning ("%s", msg);
+
+ vec_free (msg);
+}
+
+static clib_error_t *
+setup_signal_handlers (unix_main_t * um)
+{
+ uword i;
+ struct sigaction sa;
+
+ for (i = 1; i < 32; i++)
+ {
+ memset (&sa, 0, sizeof (sa));
+ sa.sa_sigaction = (void *) unix_signal_handler;
+ sa.sa_flags = SA_SIGINFO;
+
+ switch (i)
+ {
+ /* these signals take the default action */
+ case SIGABRT:
+ case SIGKILL:
+ case SIGSTOP:
+ case SIGUSR1:
+ case SIGUSR2:
+ continue;
+
+ /* ignore SIGPIPE, SIGCHLD */
+ case SIGPIPE:
+ case SIGCHLD:
+ sa.sa_sigaction = (void *) SIG_IGN;
+ break;
+
+ /* catch and handle all other signals */
+ default:
+ break;
+ }
+
+ if (sigaction (i, &sa, 0) < 0)
+ return clib_error_return_unix (0, "sigaction %U", format_signal, i);
+ }
+
+ return 0;
+}
+
+static void
+unix_error_handler (void *arg, u8 * msg, int msg_len)
+{
+ unix_main_t *um = arg;
+
+ /* Echo to stderr when interactive. */
+ if (um->flags & UNIX_FLAG_INTERACTIVE)
+ {
+ CLIB_UNUSED (int r) = write (2, msg, msg_len);
+ }
+ else
+ {
+ char save = msg[msg_len - 1];
+
+ /* Null Terminate. */
+ msg[msg_len - 1] = 0;
+
+ syslog (LOG_ERR | LOG_DAEMON, "%s", msg);
+
+ msg[msg_len - 1] = save;
+ }
+}
+
+void
+vlib_unix_error_report (vlib_main_t * vm, clib_error_t * error)
+{
+ unix_main_t *um = &unix_main;
+
+ if (um->flags & UNIX_FLAG_INTERACTIVE || error == 0)
+ return;
+
+ {
+ char save;
+ u8 *msg;
+ u32 msg_len;
+
+ msg = error->what;
+ msg_len = vec_len (msg);
+
+ /* Null Terminate. */
+ save = msg[msg_len - 1];
+ msg[msg_len - 1] = 0;
+
+ syslog (LOG_ERR | LOG_DAEMON, "%s", msg);
+
+ msg[msg_len - 1] = save;
+ }
+}
+
+static uword
+startup_config_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+ unix_main_t *um = &unix_main;
+ u8 *buf = 0;
+ uword l, n = 1;
+
+ vlib_process_suspend (vm, 2.0);
+
+ while (um->unix_config_complete == 0)
+ vlib_process_suspend (vm, 0.1);
+
+ if (um->startup_config_filename)
+ {
+ unformat_input_t sub_input;
+ int fd;
+ struct stat s;
+ char *fn = (char *) um->startup_config_filename;
+
+ fd = open (fn, O_RDONLY);
+ if (fd < 0)
+ {
+ clib_warning ("failed to open `%s'", fn);
+ return 0;
+ }
+
+ if (fstat (fd, &s) < 0)
+ {
+ clib_warning ("failed to stat `%s'", fn);
+ bail:
+ close (fd);
+ return 0;
+ }
+
+ if (!(S_ISREG (s.st_mode) || S_ISLNK (s.st_mode)))
+ {
+ clib_warning ("not a regular file: `%s'", fn);
+ goto bail;
+ }
+
+ while (n > 0)
+ {
+ l = vec_len (buf);
+ vec_resize (buf, 4096);
+ n = read (fd, buf + l, 4096);
+ if (n > 0)
+ {
+ _vec_len (buf) = l + n;
+ if (n < 4096)
+ break;
+ }
+ else
+ break;
+ }
+ if (um->log_fd && vec_len (buf))
+ {
+ u8 *lv = 0;
+ lv = format (lv, "%U: ***** Startup Config *****\n%v",
+ format_timeval, 0 /* current bat-time */ ,
+ 0 /* current bat-format */ ,
+ buf);
+ {
+ int rv __attribute__ ((unused)) =
+ write (um->log_fd, lv, vec_len (lv));
+ }
+ vec_reset_length (lv);
+ lv = format (lv, "%U: ***** End Startup Config *****\n",
+ format_timeval, 0 /* current bat-time */ ,
+ 0 /* current bat-format */ );
+ {
+ int rv __attribute__ ((unused)) =
+ write (um->log_fd, lv, vec_len (lv));
+ }
+ vec_free (lv);
+ }
+
+ if (vec_len (buf))
+ {
+ unformat_init_vector (&sub_input, buf);
+ vlib_cli_input (vm, &sub_input, 0, 0);
+ /* frees buf for us */
+ unformat_free (&sub_input);
+ }
+ close (fd);
+ }
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (startup_config_node,static) = {
+ .function = startup_config_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "startup-config-process",
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+unix_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ unix_main_t *um = &unix_main;
+ clib_error_t *error = 0;
+
+ /* Defaults */
+ um->cli_pager_buffer_limit = UNIX_CLI_DEFAULT_PAGER_LIMIT;
+ um->cli_history_limit = UNIX_CLI_DEFAULT_HISTORY;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ char *cli_prompt;
+ if (unformat (input, "interactive"))
+ um->flags |= UNIX_FLAG_INTERACTIVE;
+ else if (unformat (input, "nodaemon"))
+ um->flags |= UNIX_FLAG_NODAEMON;
+ else if (unformat (input, "cli-prompt %s", &cli_prompt))
+ vlib_unix_cli_set_prompt (cli_prompt);
+ else
+ if (unformat (input, "cli-listen %s", &um->cli_listen_socket.config))
+ ;
+ else if (unformat (input, "cli-line-mode"))
+ um->cli_line_mode = 1;
+ else if (unformat (input, "cli-no-banner"))
+ um->cli_no_banner = 1;
+ else if (unformat (input, "cli-no-pager"))
+ um->cli_no_pager = 1;
+ else if (unformat (input, "cli-pager-buffer-limit %d",
+ &um->cli_pager_buffer_limit))
+ ;
+ else
+ if (unformat (input, "cli-history-limit %d", &um->cli_history_limit))
+ ;
+ else if (unformat (input, "full-coredump"))
+ {
+ int fd;
+
+ fd = open ("/proc/self/coredump_filter", O_WRONLY);
+ if (fd >= 0)
+ {
+ if (write (fd, "0x6f\n", 5) != 5)
+ clib_unix_warning ("coredump filter write failed!");
+ close (fd);
+ }
+ else
+ clib_unix_warning ("couldn't open /proc/self/coredump_filter");
+ }
+ else if (unformat (input, "startup-config %s",
+ &um->startup_config_filename))
+ ;
+ else if (unformat (input, "exec %s", &um->startup_config_filename))
+ ;
+ else if (unformat (input, "log %s", &um->log_filename))
+ {
+ um->log_fd = open ((char *) um->log_filename,
+ O_CREAT | O_WRONLY | O_APPEND, 0644);
+ if (um->log_fd < 0)
+ {
+ clib_warning ("couldn't open log '%s'\n", um->log_filename);
+ um->log_fd = 0;
+ }
+ else
+ {
+ u8 *lv = 0;
+ lv = format (0, "%U: ***** Start: PID %d *****\n",
+ format_timeval, 0 /* current bat-time */ ,
+ 0 /* current bat-format */ ,
+ getpid ());
+ {
+ int rv __attribute__ ((unused)) =
+ write (um->log_fd, lv, vec_len (lv));
+ }
+ vec_free (lv);
+ }
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (!(um->flags & UNIX_FLAG_INTERACTIVE))
+ {
+ error = setup_signal_handlers (um);
+ if (error)
+ return error;
+
+ openlog (vm->name, LOG_CONS | LOG_PERROR | LOG_PID, LOG_DAEMON);
+ clib_error_register_handler (unix_error_handler, um);
+
+ if (!(um->flags & UNIX_FLAG_NODAEMON) && daemon ( /* chdir to / */ 0,
+ /* stdin/stdout/stderr -> /dev/null */
+ 0) < 0)
+ clib_error_return (0, "daemon () fails");
+ }
+ um->unix_config_complete = 1;
+
+ return 0;
+}
+
+/* unix { ... } configuration. */
+/*?
+ *
+ * @cfgcmd{interactive}
+ * Attach CLI to stdin/out and provide a debugging command line interface.
+ * Implies @c nodaemon.
+ *
+ * @cfgcmd{nodaemon}
+ * Do not fork or background the VPP process. Typically used when invoking
+ * VPP applications from a process monitor.
+ *
+ * @cfgcmd{exec, &lt;filename&gt;}
+ * @par <code>startup-config &lt;filename&gt;</code>
+ * Read startup operational configuration from @c filename.
+ * The contents of the file will be performed as though entered at the CLI.
+ * The two keywords are aliases for the same function; if both are specified,
+ * only the last will have an effect.
+ *
+ * @cfgcmd{log, &lt;filename&gt;}
+ * Logs the startup configuration and all subsequent CLI commands in
+ * @c filename.
+ * Very useful in situations where folks don't remember or can't be bothered
+ * to include CLI commands in bug reports.
+ *
+ * @cfgcmd{full-coredump}
+ * Ask the Linux kernel to dump all memory-mapped address regions, instead
+ * of just text+data+bss.
+ *
+ * @cfgcmd{cli-listen, &lt;address:port&gt;}
+ * Bind the CLI to listen at the address and port given. @clocalhost
+ * on TCP port @c 5002, given as <tt>cli-listen localhost:5002</tt>,
+ * is typical.
+ *
+ * @cfgcmd{cli-line-mode}
+ * Disable character-by-character I/O on stdin. Useful when combined with,
+ * for example, <tt>emacs M-x gud-gdb</tt>.
+ *
+ * @cfgcmd{cli-prompt, &lt;string&gt;}
+ * Configure the CLI prompt to be @c string.
+ *
+ * @cfgcmd{cli-history-limit, &lt;nn&gt;}
+ * Limit commmand history to @c nn lines. A value of @c 0
+ * disables command history. Default value: @c 50
+ *
+ * @cfgcmd{cli-no-banner}
+ * Disable the login banner on stdin and Telnet connections.
+ *
+ * @cfgcmd{cli-no-pager}
+ * Disable the output pager.
+ *
+ * @cfgcmd{cli-pager-buffer-limit, &lt;nn&gt;}
+ * Limit pager buffer to @c nn lines of output.
+ * A value of @c 0 disables the pager. Default value: @c 100000
+?*/
+VLIB_CONFIG_FUNCTION (unix_config, "unix");
+
+static clib_error_t *
+unix_exit (vlib_main_t * vm)
+{
+ /* Close syslog connection. */
+ closelog ();
+ return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (unix_exit);
+
+u8 **vlib_thread_stacks;
+
+static uword
+thread0 (uword arg)
+{
+ vlib_main_t *vm = (vlib_main_t *) arg;
+ unformat_input_t input;
+ int i;
+
+ unformat_init_command_line (&input, (char **) vm->argv);
+ i = vlib_main (vm, &input);
+ unformat_free (&input);
+
+ return i;
+}
+
+int
+vlib_unix_main (int argc, char *argv[])
+{
+ vlib_main_t *vm = &vlib_global_main; /* one and only time for this! */
+ vlib_thread_main_t *tm = &vlib_thread_main;
+ unformat_input_t input;
+ u8 *thread_stacks;
+ clib_error_t *e;
+ int i;
+
+ vm->argv = (u8 **) argv;
+ vm->name = argv[0];
+ vm->heap_base = clib_mem_get_heap ();
+ ASSERT (vm->heap_base);
+
+ i = vlib_plugin_early_init (vm);
+ if (i)
+ return i;
+
+ unformat_init_command_line (&input, (char **) vm->argv);
+ if (vm->init_functions_called == 0)
+ vm->init_functions_called = hash_create (0, /* value bytes */ 0);
+ e = vlib_call_all_config_functions (vm, &input, 1 /* early */ );
+ if (e != 0)
+ {
+ clib_error_report (e);
+ return 1;
+ }
+ unformat_free (&input);
+
+ /*
+ * allocate n x VLIB_THREAD_STACK_SIZE stacks, aligned to a
+ * VLIB_THREAD_STACK_SIZE boundary
+ * See also: os_get_cpu_number() in vlib/vlib/threads.c
+ */
+ thread_stacks = clib_mem_alloc_aligned
+ ((uword) tm->n_thread_stacks * VLIB_THREAD_STACK_SIZE,
+ VLIB_THREAD_STACK_SIZE);
+
+ vec_validate (vlib_thread_stacks, tm->n_thread_stacks - 1);
+ for (i = 0; i < vec_len (vlib_thread_stacks); i++)
+ {
+ vlib_thread_stacks[i] = thread_stacks;
+
+ /*
+ * Disallow writes to the bottom page of the stack, to
+ * catch stack overflows.
+ */
+ if (mprotect (thread_stacks, clib_mem_get_page_size (), PROT_READ) < 0)
+ clib_unix_warning ("thread stack");
+
+ thread_stacks += VLIB_THREAD_STACK_SIZE;
+ }
+
+ i = clib_calljmp (thread0, (uword) vm,
+ (void *) (vlib_thread_stacks[0] +
+ VLIB_THREAD_STACK_SIZE));
+ return i;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/mc_socket.c b/src/vlib/unix/mc_socket.c
new file mode 100644
index 00000000000..9c12ad3b559
--- /dev/null
+++ b/src/vlib/unix/mc_socket.c
@@ -0,0 +1,1049 @@
+/*
+ * mc_socket.c: socket based multicast for vlib mc
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/mc_socket.h>
+
+#include <sys/ioctl.h> /* for FIONBIO */
+#include <netinet/tcp.h> /* for TCP_NODELAY */
+#include <net/if.h> /* for struct ifreq */
+
+static u8 *
+format_socket_peer_id (u8 * s, va_list * args)
+{
+ u64 peer_id_as_u64 = va_arg (*args, u64);
+ mc_peer_id_t peer_id;
+ peer_id.as_u64 = peer_id_as_u64;
+ u32 a = mc_socket_peer_id_get_address (peer_id);
+ u32 p = mc_socket_peer_id_get_port (peer_id);
+
+ s = format (s, "%U:%04x", format_network_address, AF_INET, &a, ntohs (p));
+
+ return s;
+}
+
+typedef void (mc_msg_handler_t) (mc_main_t * mcm, void *msg,
+ u32 buffer_index);
+
+always_inline void
+msg_handler (mc_main_t * mcm,
+ u32 buffer_index, u32 handler_frees_buffer, void *_h)
+{
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_msg_handler_t *h = _h;
+ vlib_buffer_t *b = vlib_get_buffer (vm, buffer_index);
+ void *the_msg = vlib_buffer_get_current (b);
+
+ h (mcm, the_msg, buffer_index);
+ if (!handler_frees_buffer)
+ vlib_buffer_free_one (vm, buffer_index);
+}
+
+static uword
+append_buffer_index_to_iovec (vlib_main_t * vm,
+ u32 buffer_index, struct iovec **iovs_return)
+{
+ struct iovec *i;
+ vlib_buffer_t *b;
+ u32 bi = buffer_index;
+ u32 l = 0;
+
+ while (1)
+ {
+ b = vlib_get_buffer (vm, bi);
+ vec_add2 (*iovs_return, i, 1);
+ i->iov_base = vlib_buffer_get_current (b);
+ i->iov_len = b->current_length;
+ l += i->iov_len;
+ if (!(b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+ bi = b->next_buffer;
+ }
+
+ return l;
+}
+
+static clib_error_t *
+sendmsg_helper (mc_socket_main_t * msm,
+ int socket, struct sockaddr_in *tx_addr, u32 buffer_index)
+{
+ vlib_main_t *vm = msm->mc_main.vlib_main;
+ struct msghdr h;
+ word n_bytes, n_bytes_tx, n_retries;
+
+ memset (&h, 0, sizeof (h));
+ h.msg_name = tx_addr;
+ h.msg_namelen = sizeof (tx_addr[0]);
+
+ if (msm->iovecs)
+ _vec_len (msm->iovecs) = 0;
+
+ n_bytes = append_buffer_index_to_iovec (vm, buffer_index, &msm->iovecs);
+ ASSERT (n_bytes <= msm->mc_main.transport.max_packet_size);
+ if (n_bytes > msm->mc_main.transport.max_packet_size)
+ clib_error ("sending packet larger than interace MTU %d bytes", n_bytes);
+
+ h.msg_iov = msm->iovecs;
+ h.msg_iovlen = vec_len (msm->iovecs);
+
+ n_retries = 0;
+ while ((n_bytes_tx = sendmsg (socket, &h, /* flags */ 0)) != n_bytes
+ && errno == EAGAIN)
+ n_retries++;
+ if (n_bytes_tx != n_bytes)
+ {
+ clib_unix_warning ("sendmsg");
+ return 0;
+ }
+ if (n_retries)
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "sendmsg-helper: %d retries",.format_args = "i4",};
+ struct
+ {
+ u32 retries;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->retries = n_retries;
+ }
+ return 0;
+}
+
+static clib_error_t *
+tx_buffer (void *transport, mc_transport_type_t type, u32 buffer_index)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) transport;
+ vlib_main_t *vm = msm->mc_main.vlib_main;
+ mc_multicast_socket_t *ms = &msm->multicast_sockets[type];
+ clib_error_t *error;
+ error = sendmsg_helper (msm, ms->socket, &ms->tx_addr, buffer_index);
+ if (type != MC_TRANSPORT_USER_REQUEST_TO_RELAY)
+ vlib_buffer_free_one (vm, buffer_index);
+ return error;
+}
+
+static clib_error_t *
+tx_ack (void *transport, mc_peer_id_t dest_peer_id, u32 buffer_index)
+{
+ struct sockaddr_in tx_addr;
+ mc_socket_main_t *msm = (mc_socket_main_t *) transport;
+ vlib_main_t *vm = msm->mc_main.vlib_main;
+ clib_error_t *error;
+
+ memset (&tx_addr, 0, sizeof (tx_addr));
+ tx_addr.sin_family = AF_INET;
+ tx_addr.sin_addr.s_addr = mc_socket_peer_id_get_address (dest_peer_id);
+ tx_addr.sin_port = mc_socket_peer_id_get_port (dest_peer_id);
+
+ error = sendmsg_helper (msm, msm->ack_socket, &tx_addr, buffer_index);
+ vlib_buffer_free_one (vm, buffer_index);
+ return error;
+}
+
+static clib_error_t *
+recvmsg_helper (mc_socket_main_t * msm,
+ int socket,
+ struct sockaddr_in *rx_addr,
+ u32 * buffer_index, u32 drop_message)
+{
+ vlib_main_t *vm = msm->mc_main.vlib_main;
+ vlib_buffer_t *b;
+ uword n_left, n_alloc, n_mtu, i, i_rx;
+ const uword buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
+ word n_bytes_left;
+
+ /* Make sure we have at least a MTU worth of buffers. */
+ n_mtu = msm->rx_mtu_n_buffers;
+ n_left = vec_len (msm->rx_buffers);
+ if (n_left < n_mtu)
+ {
+ uword max_alloc = 8 * n_mtu;
+ vec_validate (msm->rx_buffers, max_alloc - 1);
+ n_alloc =
+ vlib_buffer_alloc (vm, msm->rx_buffers + n_left, max_alloc - n_left);
+ _vec_len (msm->rx_buffers) = n_left + n_alloc;
+ }
+
+ ASSERT (vec_len (msm->rx_buffers) >= n_mtu);
+ vec_validate (msm->iovecs, n_mtu - 1);
+
+ /* Allocate RX buffers from end of rx_buffers.
+ Turn them into iovecs to pass to readv. */
+ i_rx = vec_len (msm->rx_buffers) - 1;
+ for (i = 0; i < n_mtu; i++)
+ {
+ b = vlib_get_buffer (vm, msm->rx_buffers[i_rx - i]);
+ msm->iovecs[i].iov_base = b->data;
+ msm->iovecs[i].iov_len = buffer_size;
+ }
+ _vec_len (msm->iovecs) = n_mtu;
+
+ {
+ struct msghdr h;
+
+ memset (&h, 0, sizeof (h));
+ if (rx_addr)
+ {
+ h.msg_name = rx_addr;
+ h.msg_namelen = sizeof (rx_addr[0]);
+ }
+ h.msg_iov = msm->iovecs;
+ h.msg_iovlen = vec_len (msm->iovecs);
+
+ n_bytes_left = recvmsg (socket, &h, 0);
+ if (n_bytes_left < 0)
+ return clib_error_return_unix (0, "recvmsg");
+ }
+
+ if (drop_message)
+ {
+ *buffer_index = ~0;
+ return 0;
+ }
+
+ *buffer_index = msm->rx_buffers[i_rx];
+ while (1)
+ {
+ b = vlib_get_buffer (vm, msm->rx_buffers[i_rx]);
+
+ b->flags = 0;
+ b->current_data = 0;
+ b->current_length =
+ n_bytes_left < buffer_size ? n_bytes_left : buffer_size;
+
+ n_bytes_left -= buffer_size;
+
+ if (n_bytes_left <= 0)
+ break;
+
+ i_rx--;
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b->next_buffer = msm->rx_buffers[i_rx];
+ }
+
+ _vec_len (msm->rx_buffers) = i_rx;
+
+ return 0 /* no error */ ;
+}
+
+static clib_error_t *
+mastership_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ mc_multicast_socket_t *ms =
+ &msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP];
+ clib_error_t *error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
+ 0);
+ if (!error)
+ msg_handler (mcm, bi,
+ /* handler_frees_buffer */ 0,
+ mc_msg_master_assert_handler);
+
+ return error;
+}
+
+static clib_error_t *
+to_relay_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ vlib_main_t *vm = msm->mc_main.vlib_main;
+ mc_multicast_socket_t *ms_to_relay =
+ &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY];
+ mc_multicast_socket_t *ms_from_relay =
+ &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
+ clib_error_t *error;
+ u32 bi;
+ u32 is_master = mcm->relay_state == MC_RELAY_STATE_MASTER;
+
+ /* Not the ordering master? Turf the msg */
+ error = recvmsg_helper (msm, ms_to_relay->socket, /* rx_addr */ 0, &bi,
+ /* drop_message */ !is_master);
+
+ /* If we are the master, number and rebroadcast the msg. */
+ if (!error && is_master)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ mc_msg_user_request_t *mp = vlib_buffer_get_current (b);
+ mp->global_sequence = clib_host_to_net_u32 (mcm->relay_global_sequence);
+ mcm->relay_global_sequence++;
+ error =
+ sendmsg_helper (msm, ms_from_relay->socket, &ms_from_relay->tx_addr,
+ bi);
+ vlib_buffer_free_one (vm, bi);
+ }
+
+ return error;
+}
+
+static clib_error_t *
+from_relay_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ mc_multicast_socket_t *ms =
+ &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
+ clib_error_t *error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
+ 0);
+ if (!error)
+ {
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 1,
+ mc_msg_user_request_handler);
+ }
+ return error;
+}
+
+static clib_error_t *
+join_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_multicast_socket_t *ms = &msm->multicast_sockets[MC_TRANSPORT_JOIN];
+ clib_error_t *error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */
+ 0);
+ if (!error)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, bi);
+ mc_msg_join_or_leave_request_t *mp = vlib_buffer_get_current (b);
+
+ switch (clib_host_to_net_u32 (mp->type))
+ {
+ case MC_MSG_TYPE_join_or_leave_request:
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
+ mc_msg_join_or_leave_request_handler);
+ break;
+
+ case MC_MSG_TYPE_join_reply:
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
+ mc_msg_join_reply_handler);
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+ }
+ return error;
+}
+
+static clib_error_t *
+ack_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ clib_error_t *error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, msm->ack_socket, /* rx_addr */ 0, &bi,
+ /* drop_message */ 0);
+ if (!error)
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
+ mc_msg_user_ack_handler);
+ return error;
+}
+
+static void
+catchup_cleanup (mc_socket_main_t * msm,
+ mc_socket_catchup_t * c, unix_main_t * um, unix_file_t * uf)
+{
+ hash_unset (msm->catchup_index_by_file_descriptor, uf->file_descriptor);
+ unix_file_del (um, uf);
+ vec_free (c->input_vector);
+ vec_free (c->output_vector);
+ pool_put (msm->catchups, c);
+}
+
+static mc_socket_catchup_t *
+find_catchup_from_file_descriptor (mc_socket_main_t * msm,
+ int file_descriptor)
+{
+ uword *p =
+ hash_get (msm->catchup_index_by_file_descriptor, file_descriptor);
+ return p ? pool_elt_at_index (msm->catchups, p[0]) : 0;
+}
+
+static clib_error_t *
+catchup_socket_read_ready (unix_file_t * uf, int is_server)
+{
+ unix_main_t *um = &unix_main;
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ mc_socket_catchup_t *c =
+ find_catchup_from_file_descriptor (msm, uf->file_descriptor);
+ word l, n, is_eof;
+
+ l = vec_len (c->input_vector);
+ vec_resize (c->input_vector, 4096);
+ n =
+ read (uf->file_descriptor, c->input_vector + l,
+ vec_len (c->input_vector) - l);
+ is_eof = n == 0;
+
+ if (n < 0)
+ {
+ if (errno == EAGAIN)
+ n = 0;
+ else
+ {
+ catchup_cleanup (msm, c, um, uf);
+ return clib_error_return_unix (0, "read");
+ }
+ }
+
+ _vec_len (c->input_vector) = l + n;
+
+ if (is_eof && vec_len (c->input_vector) > 0)
+ {
+ if (is_server)
+ {
+ mc_msg_catchup_request_handler (mcm, (void *) c->input_vector,
+ c - msm->catchups);
+ _vec_len (c->input_vector) = 0;
+ }
+ else
+ {
+ mc_msg_catchup_reply_handler (mcm, (void *) c->input_vector,
+ c - msm->catchups);
+ c->input_vector = 0; /* reply handler is responsible for freeing vector */
+ catchup_cleanup (msm, c, um, uf);
+ }
+ }
+
+ return 0 /* no error */ ;
+}
+
+static clib_error_t *
+catchup_server_read_ready (unix_file_t * uf)
+{
+ return catchup_socket_read_ready (uf, /* is_server */ 1);
+}
+
+static clib_error_t *
+catchup_client_read_ready (unix_file_t * uf)
+{
+ if (MC_EVENT_LOGGING)
+ {
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ vlib_main_t *vm = msm->mc_main.vlib_main;
+
+ ELOG_TYPE (e, "catchup_client_read_ready");
+ ELOG (&vm->elog_main, e, 0);
+ }
+ return catchup_socket_read_ready (uf, /* is_server */ 0);
+}
+
+static clib_error_t *
+catchup_socket_write_ready (unix_file_t * uf, int is_server)
+{
+ unix_main_t *um = &unix_main;
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_socket_catchup_t *c =
+ find_catchup_from_file_descriptor (msm, uf->file_descriptor);
+ clib_error_t *error = 0;
+ int n;
+
+ if (c->connect_in_progress)
+ {
+ u32 len, value;
+
+ c->connect_in_progress = 0;
+ len = sizeof (value);
+ if (getsockopt (c->socket, SOL_SOCKET, SO_ERROR, &value, &len) < 0)
+ {
+ error = clib_error_return_unix (0, "getsockopt SO_ERROR");
+ goto error_quit;
+ }
+ if (value != 0)
+ {
+ error =
+ clib_error_return_code (0, value, CLIB_ERROR_ERRNO_VALID,
+ "connect fails");
+ goto error_quit;
+ }
+ }
+
+ while (1)
+ {
+ u32 n_this_write;
+
+ n_this_write =
+ clib_min (vec_len (c->output_vector) - c->output_vector_n_written,
+ msm->rx_mtu_n_bytes -
+ 64 /* ip + tcp + option allowance */ );
+
+ if (n_this_write <= 0)
+ break;
+
+ do
+ {
+ n = write (uf->file_descriptor,
+ c->output_vector + c->output_vector_n_written,
+ n_this_write);
+ }
+ while (n < 0 && errno == EAGAIN);
+
+ if (n < 0)
+ {
+ error = clib_error_return_unix (0, "write");
+ goto error_quit;
+ }
+ c->output_vector_n_written += n;
+ }
+
+ if (c->output_vector_n_written >= vec_len (c->output_vector))
+ {
+ if (!is_server)
+ {
+ uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ /* Send EOF to other side. */
+ shutdown (uf->file_descriptor, SHUT_WR);
+ return error;
+ }
+ else
+ {
+ error_quit:
+ catchup_cleanup (msm, c, um, uf);
+ }
+ }
+ return error;
+}
+
+static clib_error_t *
+catchup_server_write_ready (unix_file_t * uf)
+{
+ return catchup_socket_write_ready (uf, /* is_server */ 1);
+}
+
+static clib_error_t *
+catchup_client_write_ready (unix_file_t * uf)
+{
+ return catchup_socket_write_ready (uf, /* is_server */ 0);
+}
+
+static clib_error_t *
+catchup_socket_error_ready (unix_file_t * uf)
+{
+ unix_main_t *um = &unix_main;
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ mc_socket_catchup_t *c =
+ find_catchup_from_file_descriptor (msm, uf->file_descriptor);
+ catchup_cleanup (msm, c, um, uf);
+ return clib_error_return (0, "error");
+}
+
+static clib_error_t *
+catchup_listen_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) uf->private_data;
+ struct sockaddr_in client_addr;
+ int client_len;
+ mc_socket_catchup_t *c;
+ unix_file_t template = { 0 };
+
+ pool_get (msm->catchups, c);
+ memset (c, 0, sizeof (c[0]));
+
+ client_len = sizeof (client_addr);
+
+ /* Acquires the non-blocking attrib from the server socket. */
+ c->socket = accept (uf->file_descriptor,
+ (struct sockaddr *) &client_addr,
+ (socklen_t *) & client_len);
+
+ if (c->socket < 0)
+ {
+ pool_put (msm->catchups, c);
+ return clib_error_return_unix (0, "accept");
+ }
+
+ if (MC_EVENT_LOGGING)
+ {
+ mc_main_t *mcm = &msm->mc_main;
+ vlib_main_t *vm = mcm->vlib_main;
+
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "catchup accepted from 0x%lx",.format_args = "i4",};
+ struct
+ {
+ u32 addr;
+ } *ed = 0;
+
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->addr = ntohl (client_addr.sin_addr.s_addr);
+ }
+
+ /* Disable the Nagle algorithm, ship catchup pkts immediately */
+ {
+ int one = 1;
+ if ((setsockopt (c->socket, IPPROTO_TCP,
+ TCP_NODELAY, (void *) &one, sizeof (one))) < 0)
+ {
+ clib_unix_warning ("catchup socket: set TCP_NODELAY");
+ }
+ }
+
+ template.read_function = catchup_server_read_ready;
+ template.write_function = catchup_server_write_ready;
+ template.error_function = catchup_socket_error_ready;
+ template.file_descriptor = c->socket;
+ template.private_data = pointer_to_uword (msm);
+ c->unix_file_index = unix_file_add (&unix_main, &template);
+ hash_set (msm->catchup_index_by_file_descriptor, c->socket,
+ c - msm->catchups);
+
+ return 0;
+}
+
+/* Return and bind to an unused port. */
+static word
+find_and_bind_to_free_port (word sock, word port)
+{
+ for (; port < 1 << 16; port++)
+ {
+ struct sockaddr_in a;
+
+ memset (&a, 0, sizeof (a)); /* Warnings be gone */
+
+ a.sin_family = PF_INET;
+ a.sin_addr.s_addr = INADDR_ANY;
+ a.sin_port = htons (port);
+
+ if (bind (sock, (struct sockaddr *) &a, sizeof (a)) >= 0)
+ break;
+ }
+
+ return port < 1 << 16 ? port : -1;
+}
+
+static clib_error_t *
+setup_mutlicast_socket (mc_socket_main_t * msm,
+ mc_multicast_socket_t * ms,
+ char *type, uword udp_port)
+{
+ int one = 1;
+ struct ip_mreq mcast_req;
+
+ if (!msm->multicast_ttl)
+ msm->multicast_ttl = 1;
+
+ /* mastership (multicast) TX socket */
+ if ((ms->socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0)
+ return clib_error_return_unix (0, "%s socket", type);
+
+ {
+ u8 ttl = msm->multicast_ttl;
+
+ if ((setsockopt (ms->socket, IPPROTO_IP,
+ IP_MULTICAST_TTL, (void *) &ttl, sizeof (ttl))) < 0)
+ return clib_error_return_unix (0, "%s set multicast ttl", type);
+ }
+
+ if (setsockopt (ms->socket, SOL_SOCKET, SO_REUSEADDR, &one, sizeof (one)) <
+ 0)
+ return clib_error_return_unix (0, "%s setsockopt SO_REUSEADDR", type);
+
+ memset (&ms->tx_addr, 0, sizeof (ms->tx_addr));
+ ms->tx_addr.sin_family = AF_INET;
+ ms->tx_addr.sin_addr.s_addr =
+ htonl (msm->multicast_tx_ip4_address_host_byte_order);
+ ms->tx_addr.sin_port = htons (udp_port);
+
+ if (bind (ms->socket, (struct sockaddr *) &ms->tx_addr,
+ sizeof (ms->tx_addr)) < 0)
+ return clib_error_return_unix (0, "%s bind", type);
+
+ memset (&mcast_req, 0, sizeof (mcast_req));
+ mcast_req.imr_multiaddr.s_addr =
+ htonl (msm->multicast_tx_ip4_address_host_byte_order);
+ mcast_req.imr_interface.s_addr = msm->if_ip4_address_net_byte_order;
+
+ if ((setsockopt (ms->socket, IPPROTO_IP,
+ IP_ADD_MEMBERSHIP, (void *) &mcast_req,
+ sizeof (mcast_req))) < 0)
+ return clib_error_return_unix (0, "%s IP_ADD_MEMBERSHIP setsockopt",
+ type);
+
+ if (ioctl (ms->socket, FIONBIO, &one) < 0)
+ return clib_error_return_unix (0, "%s set FIONBIO", type);
+
+ /* FIXME remove this when we support tx_ready. */
+ {
+ u32 len = 1 << 20;
+ socklen_t sl = sizeof (len);
+ if (setsockopt (ms->socket, SOL_SOCKET, SO_SNDBUF, &len, sl) < 0)
+ clib_unix_error ("setsockopt");
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+socket_setup (mc_socket_main_t * msm)
+{
+ int one = 1;
+ clib_error_t *error;
+ u32 port;
+
+ if (!msm->base_multicast_udp_port_host_byte_order)
+ msm->base_multicast_udp_port_host_byte_order =
+ 0xffff - ((MC_N_TRANSPORT_TYPE + 2 /* ack socket, catchup socket */ )
+ - 1);
+
+ port = msm->base_multicast_udp_port_host_byte_order;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets
+ [MC_TRANSPORT_MASTERSHIP], "mastership",
+ port++);
+ if (error)
+ return error;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets[MC_TRANSPORT_JOIN],
+ "join", port++);
+ if (error)
+ return error;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets
+ [MC_TRANSPORT_USER_REQUEST_TO_RELAY],
+ "to relay", port++);
+ if (error)
+ return error;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets
+ [MC_TRANSPORT_USER_REQUEST_FROM_RELAY],
+ "from relay", port++);
+ if (error)
+ return error;
+
+ /* ACK rx socket */
+ msm->ack_socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (msm->ack_socket < 0)
+ return clib_error_return_unix (0, "ack socket");
+
+ msm->ack_udp_port = find_and_bind_to_free_port (msm->ack_socket, port++);
+
+ if (ioctl (msm->ack_socket, FIONBIO, &one) < 0)
+ return clib_error_return_unix (0, "ack socket FIONBIO");
+
+ msm->catchup_server_socket = socket (AF_INET, SOCK_STREAM, 0);
+ if (msm->catchup_server_socket < 0)
+ return clib_error_return_unix (0, "catchup server socket");
+
+ msm->catchup_tcp_port =
+ find_and_bind_to_free_port (msm->catchup_server_socket, port++);
+
+ if (ioctl (msm->catchup_server_socket, FIONBIO, &one) < 0)
+ return clib_error_return_unix (0, "catchup server socket FIONBIO");
+
+ if (listen (msm->catchup_server_socket, 5) < 0)
+ return clib_error_return_unix (0, "catchup server socket listen");
+
+ /* epoll setup for multicast mastership socket */
+ {
+ unix_file_t template = { 0 };
+
+ template.read_function = mastership_socket_read_ready;
+ template.file_descriptor =
+ msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP].socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ /* epoll setup for multicast to_relay socket */
+ template.read_function = to_relay_socket_read_ready;
+ template.file_descriptor =
+ msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY].socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ /* epoll setup for multicast from_relay socket */
+ template.read_function = from_relay_socket_read_ready;
+ template.file_descriptor =
+ msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY].socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ template.read_function = join_socket_read_ready;
+ template.file_descriptor =
+ msm->multicast_sockets[MC_TRANSPORT_JOIN].socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ /* epoll setup for ack rx socket */
+ template.read_function = ack_socket_read_ready;
+ template.file_descriptor = msm->ack_socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ /* epoll setup for TCP catchup server */
+ template.read_function = catchup_listen_read_ready;
+ template.file_descriptor = msm->catchup_server_socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+ }
+
+ return 0;
+}
+
+static void *
+catchup_add_pending_output (mc_socket_catchup_t * c, uword n_bytes,
+ u8 * set_output_vector)
+{
+ unix_file_t *uf = pool_elt_at_index (unix_main.file_pool,
+ c->unix_file_index);
+ u8 *result = 0;
+
+ if (set_output_vector)
+ c->output_vector = set_output_vector;
+ else
+ vec_add2 (c->output_vector, result, n_bytes);
+ if (vec_len (c->output_vector) > 0)
+ {
+ int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (!skip_update)
+ unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+ return result;
+}
+
+static uword
+catchup_request_fun (void *transport_main,
+ u32 stream_index, mc_peer_id_t catchup_peer_id)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) transport_main;
+ mc_main_t *mcm = &msm->mc_main;
+ vlib_main_t *vm = mcm->vlib_main;
+ mc_socket_catchup_t *c;
+ struct sockaddr_in addr;
+ unix_main_t *um = &unix_main;
+ int one = 1;
+
+ pool_get (msm->catchups, c);
+ memset (c, 0, sizeof (*c));
+
+ c->socket = socket (AF_INET, SOCK_STREAM, 0);
+ if (c->socket < 0)
+ {
+ clib_unix_warning ("socket");
+ return 0;
+ }
+
+ if (ioctl (c->socket, FIONBIO, &one) < 0)
+ {
+ clib_unix_warning ("FIONBIO");
+ return 0;
+ }
+
+ memset (&addr, 0, sizeof (addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = mc_socket_peer_id_get_address (catchup_peer_id);
+ addr.sin_port = mc_socket_peer_id_get_port (catchup_peer_id);
+
+ c->connect_in_progress = 1;
+
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE_DECLARE (e) =
+ {
+ .format = "connecting to peer 0x%Lx",.format_args = "i8",};
+ struct
+ {
+ u64 peer;
+ } *ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->peer = catchup_peer_id.as_u64;
+ }
+
+ if (connect (c->socket, (const void *) &addr, sizeof (addr))
+ < 0 && errno != EINPROGRESS)
+ {
+ clib_unix_warning ("connect to %U fails",
+ format_socket_peer_id, catchup_peer_id);
+ return 0;
+ }
+
+ {
+ unix_file_t template = { 0 };
+
+ template.read_function = catchup_client_read_ready;
+ template.write_function = catchup_client_write_ready;
+ template.error_function = catchup_socket_error_ready;
+ template.file_descriptor = c->socket;
+ template.private_data = (uword) msm;
+ c->unix_file_index = unix_file_add (um, &template);
+
+ hash_set (msm->catchup_index_by_file_descriptor, c->socket,
+ c - msm->catchups);
+ }
+
+ {
+ mc_msg_catchup_request_t *mp;
+ mp = catchup_add_pending_output (c, sizeof (mp[0]), /* set_output_vector */
+ 0);
+ mp->peer_id = msm->mc_main.transport.our_catchup_peer_id;
+ mp->stream_index = stream_index;
+ mc_byte_swap_msg_catchup_request (mp);
+ }
+
+ return c - msm->catchups;
+}
+
+static void
+catchup_send_fun (void *transport_main, uword opaque, u8 * data)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *) transport_main;
+ mc_socket_catchup_t *c = pool_elt_at_index (msm->catchups, opaque);
+ catchup_add_pending_output (c, 0, data);
+}
+
+static int
+find_interface_ip4_address (char *if_name, u32 * ip4_address, u32 * mtu)
+{
+ int fd;
+ struct ifreq ifr;
+ struct sockaddr_in *sa;
+
+ /* Dig up our IP address */
+ fd = socket (PF_INET, AF_INET, 0);
+ if (fd < 0)
+ {
+ clib_unix_error ("socket");
+ return -1;
+ }
+
+ ifr.ifr_addr.sa_family = AF_INET;
+ strncpy (ifr.ifr_name, if_name, sizeof (ifr.ifr_name) - 1);
+ if (ioctl (fd, SIOCGIFADDR, &ifr) < 0)
+ {
+ clib_unix_error ("ioctl(SIOCFIGADDR)");
+ close (fd);
+ return -1;
+ }
+
+ sa = (void *) &ifr.ifr_addr;
+ clib_memcpy (ip4_address, &sa->sin_addr.s_addr, sizeof (ip4_address[0]));
+
+ if (ioctl (fd, SIOCGIFMTU, &ifr) < 0)
+ {
+ close (fd);
+ return -1;
+ }
+ if (mtu)
+ *mtu = ifr.ifr_mtu - ( /* IP4 header */ 20 + /* UDP header */ 8);
+
+ close (fd);
+
+ return 0;
+}
+
+clib_error_t *
+mc_socket_main_init (mc_socket_main_t * msm, char **intfc_probe_list,
+ int n_intfcs_to_probe)
+{
+ clib_error_t *error;
+ mc_main_t *mcm;
+ u32 mtu;
+
+ mcm = &msm->mc_main;
+
+ /* 239.255.0.7 */
+ if (!msm->multicast_tx_ip4_address_host_byte_order)
+ msm->multicast_tx_ip4_address_host_byte_order = 0xefff0007;
+
+ {
+ u32 i, a, win;
+
+ win = 0;
+ if (msm->multicast_interface_name)
+ {
+ win =
+ !find_interface_ip4_address (msm->multicast_interface_name, &a,
+ &mtu);
+ }
+ else
+ {
+ for (i = 0; i < n_intfcs_to_probe; i++)
+ if (!find_interface_ip4_address (intfc_probe_list[i], &a, &mtu))
+ {
+ win = 1;
+ msm->multicast_interface_name = intfc_probe_list[i];
+ break;
+ }
+ }
+
+ if (!win)
+ return clib_error_return (0, "can't find interface ip4 address");
+
+ msm->if_ip4_address_net_byte_order = a;
+ }
+
+ msm->rx_mtu_n_bytes = mtu;
+ msm->rx_mtu_n_buffers =
+ msm->rx_mtu_n_bytes / VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
+ msm->rx_mtu_n_buffers +=
+ (msm->rx_mtu_n_bytes % VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES) != 0;
+
+ error = socket_setup (msm);
+ if (error)
+ return error;
+
+ mcm->transport.our_ack_peer_id =
+ mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order,
+ msm->ack_udp_port);
+
+ mcm->transport.our_catchup_peer_id =
+ mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order,
+ msm->catchup_tcp_port);
+
+ mcm->transport.tx_buffer = tx_buffer;
+ mcm->transport.tx_ack = tx_ack;
+ mcm->transport.catchup_request_fun = catchup_request_fun;
+ mcm->transport.catchup_send_fun = catchup_send_fun;
+ mcm->transport.format_peer_id = format_socket_peer_id;
+ mcm->transport.opaque = msm;
+ mcm->transport.max_packet_size = mtu;
+
+ mc_main_init (mcm, "socket");
+
+ return error;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/mc_socket.h b/src/vlib/unix/mc_socket.h
new file mode 100644
index 00000000000..273c9ad430b
--- /dev/null
+++ b/src/vlib/unix/mc_socket.h
@@ -0,0 +1,137 @@
+/*
+ * mc_socket.h: socket based multicast for vlib mc
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_mc_socket_h__
+#define __included_mc_socket_h__
+
+#include <vlib/unix/unix.h>
+#include <netinet/in.h>
+
+typedef struct
+{
+ int socket;
+ struct sockaddr_in tx_addr;
+} mc_multicast_socket_t;
+
+/* TCP catchup socket */
+typedef struct
+{
+ int socket;
+ u32 unix_file_index;
+
+ u8 *input_vector;
+ u8 *output_vector;
+ u32 output_vector_n_written;
+
+ u32 connect_in_progress;
+} mc_socket_catchup_t;
+
+typedef struct mc_socket_main_t
+{
+ mc_main_t mc_main;
+
+ /* Multicast mastership/to-relay/from-relay sockets. */
+ mc_multicast_socket_t multicast_sockets[MC_N_TRANSPORT_TYPE];
+
+ /* Unicast UDP ack sockets */
+ int ack_socket;
+
+ /* TCP catchup server socket */
+ int catchup_server_socket;
+
+ /* Pool of stream-private catchup sockets */
+ mc_socket_catchup_t *catchups;
+
+ uword *catchup_index_by_file_descriptor;
+
+ u32 rx_mtu_n_bytes;
+
+ /* Receive MTU in bytes and VLIB buffers. */
+ u32 rx_mtu_n_buffers;
+
+ /* Vector of RX VLIB buffers. */
+ u32 *rx_buffers;
+ /* Vector of scatter/gather descriptors for sending/receiving VLIB buffers
+ via kernel. */
+ struct iovec *iovecs;
+
+ /* IP address of interface to use for multicast. */
+ u32 if_ip4_address_net_byte_order;
+
+ u32 ack_udp_port;
+ u32 catchup_tcp_port;
+
+ /* Interface on which to listen for multicasts. */
+ char *multicast_interface_name;
+
+ /* Multicast address to use (e.g. 0xefff0000).
+ Host byte order. */
+ u32 multicast_tx_ip4_address_host_byte_order;
+
+ /* TTL to use for multicasts. */
+ u32 multicast_ttl;
+
+ /* Multicast ports for mastership, joins, etc. will be chosen
+ starting at the given port in host byte order.
+ A total of MC_N_TRANSPORT_TYPE ports will be used. */
+ u32 base_multicast_udp_port_host_byte_order;
+} mc_socket_main_t;
+
+always_inline u32
+mc_socket_peer_id_get_address (mc_peer_id_t i)
+{
+ u32 a = ((i.as_u8[0] << 24)
+ | (i.as_u8[1] << 16) | (i.as_u8[2] << 8) | (i.as_u8[3] << 0));
+ return clib_host_to_net_u32 (a);
+}
+
+always_inline u32
+mc_socket_peer_id_get_port (mc_peer_id_t i)
+{
+ return clib_host_to_net_u16 ((i.as_u8[4] << 8) | i.as_u8[5]);
+}
+
+static_always_inline mc_peer_id_t
+mc_socket_set_peer_id (u32 address_net_byte_order, u32 port_host_byte_order)
+{
+ mc_peer_id_t i;
+ u32 a = ntohl (address_net_byte_order);
+ u32 p = port_host_byte_order;
+ i.as_u8[0] = (a >> 24) & 0xff;
+ i.as_u8[1] = (a >> 16) & 0xff;
+ i.as_u8[2] = (a >> 8) & 0xff;
+ i.as_u8[3] = (a >> 0) & 0xff;
+ i.as_u8[4] = (p >> 8) & 0xff;
+ i.as_u8[5] = (p >> 0) & 0xff;
+ i.as_u8[6] = 0;
+ i.as_u8[7] = 0;
+ return i;
+}
+
+clib_error_t *mc_socket_main_init (mc_socket_main_t * msm,
+ char **intfc_probe_list,
+ int n_intfcs_to_probe);
+#endif /* __included_mc_socket_h__ */
+
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/physmem.c b/src/vlib/unix/physmem.c
new file mode 100644
index 00000000000..80ab7b9d6f8
--- /dev/null
+++ b/src/vlib/unix/physmem.c
@@ -0,0 +1,470 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * physmem.c: Unix physical memory
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/unix/physmem.h>
+
+static physmem_main_t physmem_main;
+
+static void *
+unix_physmem_alloc_aligned (vlib_physmem_main_t * vpm, uword n_bytes,
+ uword alignment)
+{
+ physmem_main_t *pm = &physmem_main;
+ uword lo_offset, hi_offset;
+ uword *to_free = 0;
+
+#if DPDK > 0
+ clib_warning ("unsafe alloc!");
+#endif
+
+ /* IO memory is always at least cache aligned. */
+ alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES);
+
+ while (1)
+ {
+ mheap_get_aligned (pm->heap, n_bytes,
+ /* align */ alignment,
+ /* align offset */ 0,
+ &lo_offset);
+
+ /* Allocation failed? */
+ if (lo_offset == ~0)
+ break;
+
+ /* Make sure allocation does not span DMA physical chunk boundary. */
+ hi_offset = lo_offset + n_bytes - 1;
+
+ if ((lo_offset >> vpm->log2_n_bytes_per_page) ==
+ (hi_offset >> vpm->log2_n_bytes_per_page))
+ break;
+
+ /* Allocation would span chunk boundary, queue it to be freed as soon as
+ we find suitable chunk. */
+ vec_add1 (to_free, lo_offset);
+ }
+
+ if (to_free != 0)
+ {
+ uword i;
+ for (i = 0; i < vec_len (to_free); i++)
+ mheap_put (pm->heap, to_free[i]);
+ vec_free (to_free);
+ }
+
+ return lo_offset != ~0 ? pm->heap + lo_offset : 0;
+}
+
+static void
+unix_physmem_free (void *x)
+{
+ physmem_main_t *pm = &physmem_main;
+
+ /* Return object to region's heap. */
+ mheap_put (pm->heap, x - pm->heap);
+}
+
+static void
+htlb_shutdown (void)
+{
+ physmem_main_t *pm = &physmem_main;
+
+ if (!pm->shmid)
+ return;
+ shmctl (pm->shmid, IPC_RMID, 0);
+ pm->shmid = 0;
+}
+
+/* try to use huge TLB pgs if possible */
+static int
+htlb_init (vlib_main_t * vm)
+{
+ vlib_physmem_main_t *vpm = &vm->physmem_main;
+ physmem_main_t *pm = &physmem_main;
+ u64 hugepagesize, pagesize;
+ u64 pfn, seek_loc;
+ u64 cur, physaddr, ptbits;
+ int fd, i;
+
+ pm->shmid = shmget (11 /* key, my amp goes to 11 */ , pm->mem_size,
+ IPC_CREAT | SHM_HUGETLB | SHM_R | SHM_W);
+ if (pm->shmid < 0)
+ {
+ clib_unix_warning ("shmget");
+ return 0;
+ }
+
+ pm->mem = shmat (pm->shmid, NULL, 0 /* flags */ );
+ if (pm->mem == 0)
+ {
+ shmctl (pm->shmid, IPC_RMID, 0);
+ return 0;
+ }
+
+ memset (pm->mem, 0, pm->mem_size);
+
+ /* $$$ get page size info from /proc/meminfo */
+ hugepagesize = 2 << 20;
+ pagesize = 4 << 10;
+ vpm->log2_n_bytes_per_page = min_log2 (hugepagesize);
+ vec_resize (vpm->page_table, pm->mem_size / hugepagesize);
+
+ vpm->page_mask = pow2_mask (vpm->log2_n_bytes_per_page);
+ vpm->virtual.start = pointer_to_uword (pm->mem);
+ vpm->virtual.size = pm->mem_size;
+ vpm->virtual.end = vpm->virtual.start + vpm->virtual.size;
+
+ fd = open ("/proc/self/pagemap", O_RDONLY);
+
+ if (fd < 0)
+ {
+ (void) shmdt (pm->mem);
+ return 0;
+ }
+
+ pm->heap = mheap_alloc_with_flags (pm->mem, pm->mem_size,
+ /* Don't want mheap mmap/munmap with IO memory. */
+ MHEAP_FLAG_DISABLE_VM);
+
+ cur = pointer_to_uword (pm->mem);
+ i = 0;
+
+ while (cur < pointer_to_uword (pm->mem) + pm->mem_size)
+ {
+ pfn = (u64) cur / pagesize;
+ seek_loc = pfn * sizeof (u64);
+ if (lseek (fd, seek_loc, SEEK_SET) != seek_loc)
+ {
+ clib_unix_warning ("lseek to 0x%llx", seek_loc);
+ shmctl (pm->shmid, IPC_RMID, 0);
+ close (fd);
+ return 0;
+ }
+ if (read (fd, &ptbits, sizeof (ptbits)) != (sizeof (ptbits)))
+ {
+ clib_unix_warning ("read ptbits");
+ shmctl (pm->shmid, IPC_RMID, 0);
+ close (fd);
+ return 0;
+ }
+
+ /* bits 0-54 are the physical page number */
+ physaddr = (ptbits & 0x7fffffffffffffULL) * pagesize;
+ if (CLIB_DEBUG > 1)
+ fformat (stderr, "pm: virtual 0x%llx physical 0x%llx\n",
+ cur, physaddr);
+ vpm->page_table[i++] = physaddr;
+
+ cur += hugepagesize;
+ }
+ close (fd);
+ atexit (htlb_shutdown);
+ return 1;
+}
+
+int vlib_app_physmem_init (vlib_main_t * vm,
+ physmem_main_t * pm, int) __attribute__ ((weak));
+int
+vlib_app_physmem_init (vlib_main_t * vm, physmem_main_t * pm, int x)
+{
+ return 0;
+}
+
+clib_error_t *
+unix_physmem_init (vlib_main_t * vm, int physical_memory_required)
+{
+ vlib_physmem_main_t *vpm = &vm->physmem_main;
+ physmem_main_t *pm = &physmem_main;
+ clib_error_t *error = 0;
+
+ /* Avoid multiple calls. */
+ if (vm->os_physmem_alloc_aligned)
+ return error;
+
+ vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned;
+ vm->os_physmem_free = unix_physmem_free;
+ pm->mem = MAP_FAILED;
+
+ if (pm->mem_size == 0)
+ pm->mem_size = 16 << 20;
+
+ /* OK, Mr. App, you tell us */
+ if (vlib_app_physmem_init (vm, pm, physical_memory_required))
+ return 0;
+
+ if (!pm->no_hugepages && htlb_init (vm))
+ {
+ fformat (stderr, "%s: use huge pages\n", __FUNCTION__);
+ return 0;
+ }
+
+ pm->mem =
+ mmap (0, pm->mem_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (pm->mem == MAP_FAILED)
+ {
+ error = clib_error_return_unix (0, "mmap");
+ goto done;
+ }
+
+ pm->heap = mheap_alloc (pm->mem, pm->mem_size);
+
+ /* Identity map with a single page. */
+ vpm->log2_n_bytes_per_page = min_log2 (pm->mem_size);
+ vec_add1 (vpm->page_table, pointer_to_uword (pm->mem));
+
+ vpm->page_mask = pow2_mask (vpm->log2_n_bytes_per_page);
+ vpm->virtual.start = pointer_to_uword (pm->mem);
+ vpm->virtual.size = pm->mem_size;
+ vpm->virtual.end = vpm->virtual.start + vpm->virtual.size;
+ vpm->is_fake = 1;
+
+ fformat (stderr, "%s: use fake dma pages\n", __FUNCTION__);
+
+done:
+ if (error)
+ {
+ if (pm->mem != MAP_FAILED)
+ munmap (pm->mem, pm->mem_size);
+ }
+ return error;
+}
+
+static clib_error_t *
+show_physmem (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+#if DPDK > 0
+ vlib_cli_output (vm, "Not supported with DPDK drivers.");
+#else
+ physmem_main_t *pm = &physmem_main;
+
+ if (pm->heap)
+ vlib_cli_output (vm, "%U", format_mheap, pm->heap, /* verbose */ 1);
+ else
+ vlib_cli_output (vm, "No physmem allocated.");
+#endif
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_physmem_command, static) = {
+ .path = "show physmem",
+ .short_help = "Show physical memory allocation",
+ .function = show_physmem,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+show_affinity (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ cpu_set_t set;
+ cpu_set_t *setp = &set;
+ int i, rv;
+ u8 *s = 0;
+ int first_set_bit_in_run = -1;
+ int last_set_bit_in_run = -1;
+ int output_done = 0;
+
+ rv = sched_getaffinity (0 /* pid, 0 = this proc */ ,
+ sizeof (*setp), setp);
+ if (rv < 0)
+ {
+ vlib_cli_output (vm, "Couldn't get affinity mask: %s\n",
+ strerror (errno));
+ return 0;
+ }
+
+ for (i = 0; i < 64; i++)
+ {
+ if (CPU_ISSET (i, setp))
+ {
+ if (first_set_bit_in_run == -1)
+ {
+ first_set_bit_in_run = i;
+ last_set_bit_in_run = i;
+ if (output_done)
+ s = format (s, ",");
+ s = format (s, "%d-", i);
+ output_done = 1;
+ }
+ else
+ {
+ if (i == (last_set_bit_in_run + 1))
+ last_set_bit_in_run = i;
+ }
+ }
+ else
+ {
+ if (first_set_bit_in_run != -1)
+ {
+ if (first_set_bit_in_run == (i - 1))
+ {
+ _vec_len (s) -= 2 + ((first_set_bit_in_run / 10));
+ }
+ s = format (s, "%d", last_set_bit_in_run);
+ first_set_bit_in_run = -1;
+ last_set_bit_in_run = -1;
+ }
+ }
+ }
+
+ if (first_set_bit_in_run != -1)
+ s = format (s, "%d", first_set_bit_in_run);
+
+ vlib_cli_output (vm, "Process runs on: %v", s);
+ return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (show_affinity_command, static) = {
+ .path = "show affinity",
+ .short_help = "Show process cpu affinity",
+ .function = show_affinity,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+set_affinity (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ cpu_set_t set;
+ cpu_set_t *setp = &set;
+ int i, rv;
+ int another_round;
+ u32 first, last;
+
+ memset (setp, 0, sizeof (*setp));
+
+ do
+ {
+ another_round = 0;
+ if (unformat (input, "%d-%d,", &first, &last))
+ {
+ if (first > 64 || last > 64)
+ {
+ barf1:
+ vlib_cli_output (vm, "range %d-%d invalid", first, last);
+ return 0;
+ }
+
+ for (i = first; i <= last; i++)
+ CPU_SET (i, setp);
+ another_round = 1;
+ }
+ else if (unformat (input, "%d-%d", &first, &last))
+ {
+ if (first > 64 || last > 64)
+ goto barf1;
+
+ for (i = first; i <= last; i++)
+ CPU_SET (i, setp);
+ }
+ else if (unformat (input, "%d,", &first))
+ {
+ if (first > 64)
+ {
+ barf2:
+ vlib_cli_output (vm, "cpu %d invalid", first);
+ return 0;
+ }
+ CPU_SET (first, setp);
+ another_round = 1;
+ }
+ else if (unformat (input, "%d", &first))
+ {
+ if (first > 64)
+ goto barf2;
+
+ CPU_SET (first, setp);
+ }
+ }
+ while (another_round);
+
+ rv = sched_setaffinity (0 /* pid, 0 = this proc */ ,
+ sizeof (*setp), setp);
+
+ if (rv < 0)
+ {
+ vlib_cli_output (vm, "Couldn't get affinity mask: %s\n",
+ strerror (errno));
+ return 0;
+ }
+ return show_affinity (vm, input, cmd);
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (set_affinity_command, static) = {
+ .path = "set affinity",
+ .short_help = "Set process cpu affinity",
+ .function = set_affinity,
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+vlib_physmem_configure (vlib_main_t * vm, unformat_input_t * input)
+{
+ physmem_main_t *pm = &physmem_main;
+ u32 size_in_mb;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "no-huge") || unformat (input, "no-huge-pages"))
+ pm->no_hugepages = 1;
+
+ else if (unformat (input, "size-in-mb %d", &size_in_mb) ||
+ unformat (input, "size %d", &size_in_mb))
+ pm->mem_size = size_in_mb << 20;
+ else
+ return unformat_parse_error (input);
+ }
+
+ unformat_free (input);
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (vlib_physmem_configure, "physmem");
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/physmem.h b/src/vlib/unix/physmem.h
new file mode 100644
index 00000000000..5519a7d6f3e
--- /dev/null
+++ b/src/vlib/unix/physmem.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_physmem_h__
+#define __included_physmem_h__
+
+/* Manage I/O physical memory. */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/error.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/os.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <sys/fcntl.h> /* for open */
+#include <sys/file.h> /* for flock */
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+
+typedef struct
+{
+ /* Virtual memory via mmaped. */
+ void *mem;
+
+ /* Size in bytes. */
+ uword mem_size;
+
+ /* Heap allocated out of virtual memory. */
+ void *heap;
+
+ /* huge TLB segment id */
+ int shmid;
+
+ /* should we try to use htlb ? */
+ int no_hugepages;
+
+} physmem_main_t;
+
+#endif /* __included_physmem_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/plugin.c b/src/vlib/unix/plugin.c
new file mode 100644
index 00000000000..b3d5be02ed6
--- /dev/null
+++ b/src/vlib/unix/plugin.c
@@ -0,0 +1,260 @@
+/*
+ * plugin.c: plugin handling
+ *
+ * Copyright (c) 2011 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/unix/plugin.h>
+#include <dlfcn.h>
+#include <dirent.h>
+
+plugin_main_t vlib_plugin_main;
+
+void
+vlib_set_get_handoff_structure_cb (void *cb)
+{
+ plugin_main_t *pm = &vlib_plugin_main;
+ pm->handoff_structure_get_cb = cb;
+}
+
+static void *
+vnet_get_handoff_structure (void)
+{
+ void *(*fp) (void);
+
+ fp = vlib_plugin_main.handoff_structure_get_cb;
+ if (fp == 0)
+ return 0;
+ else
+ return (*fp) ();
+}
+
+static int
+load_one_plugin (plugin_main_t * pm, plugin_info_t * pi, int from_early_init)
+{
+ void *handle, *register_handle;
+ clib_error_t *(*fp) (vlib_main_t *, void *, int);
+ clib_error_t *error;
+ void *handoff_structure;
+
+ handle = dlopen ((char *) pi->name, RTLD_LAZY);
+
+ /*
+ * Note: this can happen if the plugin has an undefined symbol reference,
+ * so print a warning. Otherwise, the poor slob won't know what happened.
+ * Ask me how I know that...
+ */
+ if (handle == 0)
+ {
+ clib_warning ("%s", dlerror ());
+ return -1;
+ }
+
+ pi->handle = handle;
+
+
+ register_handle = dlsym (pi->handle, "vlib_plugin_register");
+ if (register_handle == 0)
+ {
+ dlclose (handle);
+ clib_warning ("Plugin missing vlib_plugin_register: %s\n",
+ (char *) pi->name);
+ return 1;
+ }
+
+ fp = register_handle;
+
+ handoff_structure = vnet_get_handoff_structure ();
+
+ if (handoff_structure == 0)
+ error = clib_error_return (0, "handoff structure callback returned 0");
+ else
+ error = (*fp) (pm->vlib_main, handoff_structure, from_early_init);
+
+ if (error)
+ {
+ clib_error_report (error);
+ dlclose (handle);
+ return 1;
+ }
+
+ clib_warning ("Loaded plugin: %s", pi->name);
+
+ return 0;
+}
+
+static u8 **
+split_plugin_path (plugin_main_t * pm)
+{
+ int i;
+ u8 **rv = 0;
+ u8 *path = pm->plugin_path;
+ u8 *this = 0;
+
+ for (i = 0; i < vec_len (pm->plugin_path); i++)
+ {
+ if (path[i] != ':')
+ {
+ vec_add1 (this, path[i]);
+ continue;
+ }
+ vec_add1 (this, 0);
+ vec_add1 (rv, this);
+ this = 0;
+ }
+ if (this)
+ {
+ vec_add1 (this, 0);
+ vec_add1 (rv, this);
+ }
+ return rv;
+}
+
+int
+vlib_load_new_plugins (plugin_main_t * pm, int from_early_init)
+{
+ DIR *dp;
+ struct dirent *entry;
+ struct stat statb;
+ uword *p;
+ plugin_info_t *pi;
+ u8 **plugin_path;
+ int i;
+
+ plugin_path = split_plugin_path (pm);
+
+ for (i = 0; i < vec_len (plugin_path); i++)
+ {
+ dp = opendir ((char *) plugin_path[i]);
+
+ if (dp == 0)
+ continue;
+
+ while ((entry = readdir (dp)))
+ {
+ u8 *plugin_name;
+
+ if (pm->plugin_name_filter)
+ {
+ int j;
+ for (j = 0; j < vec_len (pm->plugin_name_filter); j++)
+ if (entry->d_name[j] != pm->plugin_name_filter[j])
+ goto next;
+ }
+
+ plugin_name = format (0, "%s/%s%c", plugin_path[i],
+ entry->d_name, 0);
+
+ /* Only accept .so */
+ char *ext = strrchr ((const char *) plugin_name, '.');
+ /* unreadable */
+ if (!ext || (strcmp (ext, ".so") != 0) ||
+ stat ((char *) plugin_name, &statb) < 0)
+ {
+ ignore:
+ vec_free (plugin_name);
+ continue;
+ }
+
+ /* a dir or other things which aren't plugins */
+ if (!S_ISREG (statb.st_mode))
+ goto ignore;
+
+ p = hash_get_mem (pm->plugin_by_name_hash, plugin_name);
+ if (p == 0)
+ {
+ vec_add2 (pm->plugin_info, pi, 1);
+ pi->name = plugin_name;
+ pi->file_info = statb;
+
+ if (load_one_plugin (pm, pi, from_early_init))
+ {
+ vec_free (plugin_name);
+ _vec_len (pm->plugin_info) = vec_len (pm->plugin_info) - 1;
+ continue;
+ }
+ memset (pi, 0, sizeof (*pi));
+ hash_set_mem (pm->plugin_by_name_hash, plugin_name,
+ pi - pm->plugin_info);
+ }
+ next:
+ ;
+ }
+ closedir (dp);
+ vec_free (plugin_path[i]);
+ }
+ vec_free (plugin_path);
+ return 0;
+}
+
+char *vlib_plugin_path __attribute__ ((weak));
+char *vlib_plugin_path = "";
+char *vlib_plugin_name_filter __attribute__ ((weak));
+char *vlib_plugin_name_filter = 0;
+
+int
+vlib_plugin_early_init (vlib_main_t * vm)
+{
+ plugin_main_t *pm = &vlib_plugin_main;
+
+ pm->plugin_path = format (0, "%s%c", vlib_plugin_path, 0);
+
+ clib_warning ("plugin path %s", pm->plugin_path);
+
+ if (vlib_plugin_name_filter)
+ pm->plugin_name_filter = format (0, "%s%c", vlib_plugin_name_filter, 0);
+
+ pm->plugin_by_name_hash = hash_create_string (0, sizeof (uword));
+ pm->vlib_main = vm;
+
+ return vlib_load_new_plugins (pm, 1 /* from_early_init */ );
+}
+
+static clib_error_t *
+vlib_plugins_show_cmd_fn (vlib_main_t * vm,
+ unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+ plugin_main_t *pm = &vlib_plugin_main;
+ u8 *s = 0;
+ u8 *key = 0;
+ uword *value = 0;
+ int index = 1;
+
+ s = format (s, " Plugin path is: %s\n", pm->plugin_path);
+ if (vlib_plugin_name_filter)
+ s = format (s, " Plugin filter: %s\n", vlib_plugin_name_filter);
+
+ s = format (s, " Plugins loaded: \n");
+ hash_foreach_mem (key, value, pm->plugin_by_name_hash,
+ {
+ if (key != 0)
+ s = format (s, " %d.%s\n", index, key); index++;}
+ );
+
+ vlib_cli_output (vm, "%v", s);
+ vec_free (s);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (plugins_show_cmd, static) =
+{
+.path = "show plugins",.short_help = "show loaded plugins",.function =
+ vlib_plugins_show_cmd_fn,};
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/plugin.h b/src/vlib/unix/plugin.h
new file mode 100644
index 00000000000..c17053bd306
--- /dev/null
+++ b/src/vlib/unix/plugin.h
@@ -0,0 +1,98 @@
+/*
+ * plugin.h: plugin handling
+ *
+ * Copyright (c) 2011 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_plugin_h__
+#define __included_plugin_h__
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+/*
+ * vlib plugin scheme
+ *
+ * Almost anything which can be made to work in a vlib unix
+ * application will also work in a vlib plugin.
+ *
+ * The elf-section magic which registers static objects
+ * works so long as plugins are preset when the vlib unix process
+ * starts. But wait: there's more...
+ *
+ * If an application calls vlib_load_new_plugins() -- possibly after
+ * changing vlib_plugin_main.plugin_path / vlib_plugin_main.plugin_name_filter,
+ * -- new plugins will be loaded. That, in turn, allows considerable
+ * flexibility in terms of adding feature code or fixing bugs without
+ * requiring the data-plane process to restart.
+ *
+ * When the plugin mechanism loads a plugin, it uses dlsym to locate
+ * and call the plugin's function vlib_plugin_register() if it exists.
+ * A plugin which expects to be loaded after the vlib application
+ * starts uses this callback to modify the application. If vlib_plugin_register
+ * returns non-zero, the plugin mechanism dlclose()'s the plugin.
+ *
+ * Applications control the plugin search path and name filter by
+ * declaring the variables vlib_plugin_path and vlib_plugin_name_filter.
+ * libvlib_unix.la supplies weak references for these symbols which
+ * effectively disable the scheme. In order for the elf-section magic to
+ * work, static plugins must be loaded at the earliest possible moment.
+ *
+ * An application can change these parameters at any time and call
+ * vlib_load_new_plugins().
+ */
+
+
+
+typedef struct
+{
+ u8 *name;
+ struct stat file_info;
+ void *handle;
+} plugin_info_t;
+
+typedef struct
+{
+ /* loaded plugin info */
+ plugin_info_t *plugin_info;
+ uword *plugin_by_name_hash;
+
+ /* path and name filter */
+ u8 *plugin_path;
+ u8 *plugin_name_filter;
+
+ /* handoff structure get callback */
+ void *handoff_structure_get_cb;
+
+ /* usual */
+ vlib_main_t *vlib_main;
+} plugin_main_t;
+
+extern plugin_main_t vlib_plugin_main;
+
+int vlib_plugin_early_init (vlib_main_t * vm);
+int vlib_load_new_plugins (plugin_main_t * pm, int from_early_init);
+
+#endif /* __included_plugin_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/unix.h b/src/vlib/unix/unix.h
new file mode 100644
index 00000000000..ea0d417b2b1
--- /dev/null
+++ b/src/vlib/unix/unix.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * unix.h: Unix specific main state
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_unix_unix_h
+#define included_unix_unix_h
+
+#include <vppinfra/socket.h>
+#include <termios.h>
+
+struct unix_file;
+typedef clib_error_t *(unix_file_function_t) (struct unix_file * f);
+
+typedef struct unix_file
+{
+ /* Unix file descriptor from open/socket. */
+ u32 file_descriptor;
+
+ u32 flags;
+#define UNIX_FILE_DATA_AVAILABLE_TO_WRITE (1 << 0)
+#define UNIX_FILE_EVENT_EDGE_TRIGGERED (1 << 1)
+
+ /* Data available for function's use. */
+ uword private_data;
+
+ /* Functions to be called when read/write data becomes ready. */
+ unix_file_function_t *read_function, *write_function, *error_function;
+} unix_file_t;
+
+typedef struct
+{
+ f64 time;
+ clib_error_t *error;
+} unix_error_history_t;
+
+typedef enum
+{
+ UNIX_FILE_UPDATE_ADD,
+ UNIX_FILE_UPDATE_MODIFY,
+ UNIX_FILE_UPDATE_DELETE,
+} unix_file_update_type_t;
+
+typedef struct
+{
+ /* Back pointer to main structure. */
+ vlib_main_t *vlib_main;
+
+ u32 flags;
+ /* Run interactively or as daemon (background process). */
+#define UNIX_FLAG_INTERACTIVE (1 << 0)
+#define UNIX_FLAG_NODAEMON (1 << 1)
+
+ /* Pool of files to poll for input/output. */
+ unix_file_t *file_pool;
+
+ /* CLI listen socket. */
+ clib_socket_t cli_listen_socket;
+
+ void (*file_update) (unix_file_t * file,
+ unix_file_update_type_t update_type);
+
+ /* Circular buffer of last unix errors. */
+ unix_error_history_t error_history[128];
+ u32 error_history_index;
+ u64 n_total_errors;
+
+ /* startup-config filename */
+ u8 *startup_config_filename;
+
+ /* unix config complete */
+ volatile int unix_config_complete;
+
+ /* CLI log file. GIGO. */
+ u8 *log_filename;
+ int log_fd;
+
+ /* Don't put CLI connections into character mode */
+ int cli_line_mode;
+
+ /* Maximum amount of command line history to keep per session */
+ u32 cli_history_limit;
+
+ /* Suppress the welcome banner at CLI session start */
+ int cli_no_banner;
+
+ /* Maximum pager buffer size */
+ u32 cli_pager_buffer_limit;
+
+ /* Suppress the pager */
+ int cli_no_pager;
+
+ /* Store the original state of stdin when it's a tty */
+ struct termios tio_stdin;
+ int tio_isset;
+} unix_main_t;
+
+/* Global main structure. */
+extern unix_main_t unix_main;
+
+always_inline uword
+unix_file_add (unix_main_t * um, unix_file_t * template)
+{
+ unix_file_t *f;
+ pool_get (um->file_pool, f);
+ f[0] = template[0];
+ um->file_update (f, UNIX_FILE_UPDATE_ADD);
+ return f - um->file_pool;
+}
+
+always_inline void
+unix_file_del (unix_main_t * um, unix_file_t * f)
+{
+ um->file_update (f, UNIX_FILE_UPDATE_DELETE);
+ close (f->file_descriptor);
+ f->file_descriptor = ~0;
+ pool_put (um->file_pool, f);
+}
+
+always_inline uword
+unix_file_set_data_available_to_write (u32 unix_file_index,
+ uword is_available)
+{
+ unix_file_t *uf = pool_elt_at_index (unix_main.file_pool, unix_file_index);
+ uword was_available = (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ if ((was_available != 0) != (is_available != 0))
+ {
+ uf->flags ^= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+ return was_available != 0;
+}
+
+always_inline void
+unix_save_error (unix_main_t * um, clib_error_t * error)
+{
+ unix_error_history_t *eh = um->error_history + um->error_history_index;
+ clib_error_free_vector (eh->error);
+ eh->error = error;
+ eh->time = vlib_time_now (um->vlib_main);
+ um->n_total_errors += 1;
+ if (++um->error_history_index >= ARRAY_LEN (um->error_history))
+ um->error_history_index = 0;
+}
+
+/* Main function for Unix VLIB. */
+int vlib_unix_main (int argc, char *argv[]);
+
+/* Call to allocate/initialize physical DMA memory subsystem.
+ This is not an init function so that users can explicitly enable/disable
+ physmem when its not needed. */
+clib_error_t *unix_physmem_init (vlib_main_t * vm,
+ int fail_if_physical_memory_not_present);
+
+static inline int
+unix_physmem_is_fake (vlib_main_t * vm)
+{
+ vlib_physmem_main_t *vpm = &vm->physmem_main;
+ return vpm->is_fake;
+}
+
+/* Set prompt for CLI. */
+void vlib_unix_cli_set_prompt (char *prompt);
+
+static inline unix_main_t *
+vlib_unix_get_main (void)
+{
+ return &unix_main;
+}
+
+/* thread stack array; vec_len = max number of threads */
+extern u8 **vlib_thread_stacks;
+
+/* utils */
+
+clib_error_t *vlib_sysfs_write (char *file_name, char *fmt, ...);
+
+clib_error_t *vlib_sysfs_read (char *file_name, char *fmt, ...);
+
+u8 *vlib_sysfs_link_to_name (char *link);
+
+int vlib_sysfs_get_free_hugepages (unsigned int numa_node, int page_size);
+
+clib_error_t *foreach_directory_file (char *dir_name,
+ clib_error_t * (*f) (void *arg,
+ u8 * path_name,
+ u8 * file_name),
+ void *arg, int scan_dirs);
+
+#endif /* included_unix_unix_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/unix/util.c b/src/vlib/unix/util.c
new file mode 100644
index 00000000000..edc3e591baf
--- /dev/null
+++ b/src/vlib/unix/util.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pci.c: Linux user space PCI bus management.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+
+clib_error_t *
+foreach_directory_file (char *dir_name,
+ clib_error_t * (*f) (void *arg, u8 * path_name,
+ u8 * file_name), void *arg,
+ int scan_dirs)
+{
+ DIR *d;
+ struct dirent *e;
+ clib_error_t *error = 0;
+ u8 *s, *t;
+
+ d = opendir (dir_name);
+ if (!d)
+ {
+ if (errno == ENOENT)
+ return 0;
+ return clib_error_return_unix (0, "open `%s'", dir_name);
+ }
+
+ s = t = 0;
+ while (1)
+ {
+ e = readdir (d);
+ if (!e)
+ break;
+ if (scan_dirs)
+ {
+ if (e->d_type == DT_DIR
+ && (!strcmp (e->d_name, ".") || !strcmp (e->d_name, "..")))
+ continue;
+ }
+ else
+ {
+ if (e->d_type == DT_DIR)
+ continue;
+ }
+
+ s = format (s, "%s/%s", dir_name, e->d_name);
+ t = format (t, "%s", e->d_name);
+ error = f (arg, s, t);
+ _vec_len (s) = 0;
+ _vec_len (t) = 0;
+
+ if (error)
+ break;
+ }
+
+ vec_free (s);
+ closedir (d);
+
+ return error;
+}
+
+clib_error_t *
+vlib_sysfs_write (char *file_name, char *fmt, ...)
+{
+ u8 *s;
+ int fd;
+ clib_error_t *error = 0;
+
+ fd = open (file_name, O_WRONLY);
+ if (fd < 0)
+ return clib_error_return_unix (0, "open `%s'", file_name);
+
+ va_list va;
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ if (write (fd, s, vec_len (s)) < 0)
+ error = clib_error_return_unix (0, "write `%s'", file_name);
+
+ vec_free (s);
+ close (fd);
+ return error;
+}
+
+clib_error_t *
+vlib_sysfs_read (char *file_name, char *fmt, ...)
+{
+ unformat_input_t input;
+ u8 *s = 0;
+ int fd;
+ ssize_t sz;
+ uword result;
+
+ fd = open (file_name, O_RDONLY);
+ if (fd < 0)
+ return clib_error_return_unix (0, "open `%s'", file_name);
+
+ vec_validate (s, 4095);
+
+ sz = read (fd, s, vec_len (s));
+ if (sz < 0)
+ {
+ close (fd);
+ vec_free (s);
+ return clib_error_return_unix (0, "read `%s'", file_name);
+ }
+
+ _vec_len (s) = sz;
+ unformat_init_vector (&input, s);
+
+ va_list va;
+ va_start (va, fmt);
+ result = va_unformat (&input, fmt, &va);
+ va_end (va);
+
+ vec_free (s);
+ close (fd);
+
+ if (result == 0)
+ return clib_error_return (0, "unformat error");
+
+ return 0;
+}
+
+u8 *
+vlib_sysfs_link_to_name (char *link)
+{
+ char *p, buffer[64];
+ unformat_input_t in;
+ u8 *s = 0;
+ int r;
+
+ r = readlink (link, buffer, sizeof (buffer) - 1);
+
+ if (r < 0)
+ return 0;
+
+ buffer[r] = 0;
+ p = strrchr (buffer, '/');
+
+ if (!p)
+ return 0;
+
+ unformat_init_string (&in, p + 1, strlen (p + 1));
+ if (unformat (&in, "%s", &s) != 1)
+ clib_unix_warning ("no string?");
+ unformat_free (&in);
+
+ return s;
+}
+
+int
+vlib_sysfs_get_free_hugepages (unsigned int numa_node, int page_size)
+{
+ struct stat sb;
+ u8 *p = 0;
+ int r = -1;
+
+ p = format (p, "/sys/devices/system/node/node%u%c", numa_node, 0);
+
+ if (stat ((char *) p, &sb) == 0)
+ {
+ if (S_ISDIR (sb.st_mode) == 0)
+ goto done;
+ }
+ else if (numa_node == 0)
+ {
+ vec_reset_length (p);
+ p = format (p, "/sys/kernel/mm%c", 0);
+ if (stat ((char *) p, &sb) < 0 || S_ISDIR (sb.st_mode) == 0)
+ goto done;
+ }
+ else
+ goto done;
+
+ _vec_len (p) -= 1;
+ p = format (p, "/hugepages/hugepages-%ukB/free_hugepages%c", page_size, 0);
+ vlib_sysfs_read ((char *) p, "%d", &r);
+
+done:
+ vec_free (p);
+ return r;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/vlib.h b/src/vlib/vlib.h
new file mode 100644
index 00000000000..b146a49b7f2
--- /dev/null
+++ b/src/vlib/vlib.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * vlib.h: top-level include file
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_h
+#define included_vlib_h
+
+#include <vppinfra/clib.h>
+#include <vppinfra/elf_clib.h>
+
+/* Generic definitions. */
+#include <vlib/defs.h>
+
+/* Forward declarations of structs to avoid circular dependencies. */
+struct vlib_main_t;
+
+/* All includes in alphabetical order. */
+#include <vlib/buffer.h>
+#include <vlib/cli.h>
+#include <vlib/counter.h>
+#include <vlib/error.h>
+#include <vlib/init.h>
+#include <vlib/mc.h>
+#include <vlib/node.h>
+#include <vlib/physmem.h>
+#include <vlib/trace.h>
+
+/* Main include depends on other vlib/ includes so we put it last. */
+#include <vlib/main.h>
+
+/* Inline/extern function declarations. */
+#include <vlib/threads.h>
+#include <vlib/buffer_funcs.h>
+#include <vlib/cli_funcs.h>
+#include <vlib/error_funcs.h>
+#include <vlib/format_funcs.h>
+#include <vlib/node_funcs.h>
+#include <vlib/trace_funcs.h>
+#include <vlib/global_funcs.h>
+
+#include <vlib/buffer_node.h>
+
+#endif /* included_vlib_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vlib/vlib_process_doc.h b/src/vlib/vlib_process_doc.h
new file mode 100644
index 00000000000..a47c5e4bbe4
--- /dev/null
+++ b/src/vlib/vlib_process_doc.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+#error do not #include this file!
+
+/** \file
+
+ Cooperative multi-tasking thread support.
+
+ Vlib provides a lightweight cooperative multi-tasking thread
+ model. Context switching costs a setjmp/longjump pair. It's not
+ unreasonable to put vlib threads to sleep for 10us.
+
+ The graph node scheduler invokes these processes in much the same
+ way as traditional vector-processing run-to-completion graph
+ nodes; plus-or-minus a setjmp/longjmp pair required to switch
+ stacks. Simply set the vlib_node_registration_t type field to
+ VLIB_NODE_TYPE_PROCESS. Process is a misnomer; these are threads.
+
+ As of this writing, the default stack size is 2<<15;
+ 32kb. Initialize the node registration's
+ process_log2_n_stack_bytes member as needed. The graph node
+ dispatcher makes some effort to detect stack overrun. We map a
+ no-access page below each thread stack.
+
+ Process node dispatch functions are expected to be while(1) { }
+ loops which suspend when not otherwise occupied, and which must
+ not run for unreasonably long periods of time. Unreasonably long
+ is an application-dependent concept. Over the years, we have
+ constructed frame-size sensitive control-plane nodes which will
+ use a much higher fraction of the available CPU bandwidth when the
+ frame size is low. Classic example: modifying forwarding
+ tables. So long as the table-builder leaves the forwarding tables
+ in a valid state, one can suspend the table builder to avoid
+ dropping packets as a result of control-plane activity.
+
+ Process nodes can suspend for fixed amounts of time, or until another
+ entity signals an event, or both. See the example below.
+
+ When running in VLIB process context, one must pay strict attention to
+ loop invariant issues. If one walks a data structure and calls a
+ function which may suspend, one had best know by construction that it
+ cannot change. Often, it s best to simply make a snapshot copy of a
+ data structure, walk the copy at leisure, then free the copy.
+
+ Here's an example:
+
+ <code><pre>
+ \#define EXAMPLE_POLL_PERIOD 10.0
+
+ static uword
+ example_process (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+ {
+ f64 poll_time_remaining;
+ uword event_type, *event_data = 0;
+
+ poll_time_remaining = EXAMPLE_POLL_PERIOD;
+ while (1)
+ {
+ int i;
+
+ // Sleep until next periodic call due,
+ // or until we receive event(s)
+ //
+ poll_time_remaining =
+ vlib_process_wait_for_event_or_clock (vm, poll_time_remaining);
+
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case ~0: // no events => timeout
+ break;
+
+ case EVENT1:
+ for (i = 0; i < vec_len (event_data); i++)
+ handle_event1 (mm, event_data[i]);
+ break;
+
+ case EVENT2:
+ for (i = 0; i < vec_len (event_data); i++)
+ handle_event2 (vm, event_data[i]);
+ break;
+
+ // ... and so forth for each event type
+
+ default:
+ // This should never happen...
+ clib_warning ("BUG: unhandled event type %d",
+ event_type);
+ break;
+ }
+ vec_reset_length (event_data);
+
+ // Timer expired, call periodic function
+ if (vlib_process_suspend_time_is_zero (poll_time_remaining))
+ {
+ example_periodic (vm);
+ poll_time_remaining = EXAMPLE_POLL_PERIOD;
+ }
+ }
+ // NOTREACHED
+ return 0;
+ }
+
+ static VLIB_REGISTER_NODE (example_node) = {
+ .function = example_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "example-process",
+ };
+ </pre></code>
+
+ In this example, the VLIB process node waits for an event to
+ occur, or for 10 seconds to elapse. The code demuxes on the event
+ type, calling the appropriate handler function.
+
+ Each call to vlib_process_get_events returns a vector of
+ per-event-type data passed to successive vlib_process_signal_event
+ calls; vec_len (event_data) >= 1. It is an error to process only
+ event_data[0].
+
+ Resetting the event_data vector-length to 0 by calling
+ vec_reset_length (event_data) - instead of calling vec_free (...)
+ - means that the event scheme doesn t burn cycles continuously
+ allocating and freeing the event data vector. This is a common
+ coding pattern, well worth using when appropriate.
+*/
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */