aboutsummaryrefslogtreecommitdiffstats
path: root/vlib/vlib
diff options
context:
space:
mode:
Diffstat (limited to 'vlib/vlib')
-rw-r--r--vlib/vlib/buffer.c1435
-rw-r--r--vlib/vlib/buffer.h376
-rw-r--r--vlib/vlib/buffer_funcs.h602
-rw-r--r--vlib/vlib/buffer_node.h197
-rw-r--r--vlib/vlib/cli.c1015
-rw-r--r--vlib/vlib/cli.h187
-rw-r--r--vlib/vlib/cli_funcs.h50
-rw-r--r--vlib/vlib/counter.c113
-rw-r--r--vlib/vlib/counter.h336
-rw-r--r--vlib/vlib/defs.h70
-rw-r--r--vlib/vlib/dpdk_buffer.c1206
-rw-r--r--vlib/vlib/error.c271
-rw-r--r--vlib/vlib/error.h89
-rw-r--r--vlib/vlib/error_funcs.h80
-rw-r--r--vlib/vlib/format.c171
-rw-r--r--vlib/vlib/format_funcs.h67
-rw-r--r--vlib/vlib/global_funcs.h37
-rw-r--r--vlib/vlib/init.c158
-rw-r--r--vlib/vlib/init.h222
-rw-r--r--vlib/vlib/lex.c251
-rw-r--r--vlib/vlib/lex.h130
-rw-r--r--vlib/vlib/main.c1559
-rw-r--r--vlib/vlib/main.h315
-rw-r--r--vlib/vlib/mc.c2354
-rw-r--r--vlib/vlib/mc.h674
-rw-r--r--vlib/vlib/node.c566
-rw-r--r--vlib/vlib/node.h621
-rw-r--r--vlib/vlib/node_cli.c441
-rw-r--r--vlib/vlib/node_format.c169
-rw-r--r--vlib/vlib/node_funcs.h979
-rw-r--r--vlib/vlib/parse.c980
-rw-r--r--vlib/vlib/parse.h171
-rw-r--r--vlib/vlib/parse_builtin.c132
-rw-r--r--vlib/vlib/physmem.h93
-rw-r--r--vlib/vlib/threads.c1166
-rw-r--r--vlib/vlib/threads.h333
-rw-r--r--vlib/vlib/trace.c294
-rw-r--r--vlib/vlib/trace.h75
-rw-r--r--vlib/vlib/trace_funcs.h167
-rw-r--r--vlib/vlib/unix/cj.c218
-rw-r--r--vlib/vlib/unix/cj.h68
-rw-r--r--vlib/vlib/unix/cli.c900
-rw-r--r--vlib/vlib/unix/input.c244
-rw-r--r--vlib/vlib/unix/main.c471
-rw-r--r--vlib/vlib/unix/mc_socket.c972
-rw-r--r--vlib/vlib/unix/mc_socket.h126
-rw-r--r--vlib/vlib/unix/pci.c577
-rw-r--r--vlib/vlib/unix/pci.h94
-rw-r--r--vlib/vlib/unix/physmem.c472
-rw-r--r--vlib/vlib/unix/physmem.h59
-rw-r--r--vlib/vlib/unix/plugin.c210
-rw-r--r--vlib/vlib/unix/plugin.h88
-rw-r--r--vlib/vlib/unix/unix.h177
-rw-r--r--vlib/vlib/vlib.h78
54 files changed, 22906 insertions, 0 deletions
diff --git a/vlib/vlib/buffer.c b/vlib/vlib/buffer.c
new file mode 100644
index 00000000000..4463f7fdb4f
--- /dev/null
+++ b/vlib/vlib/buffer.c
@@ -0,0 +1,1435 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer.c: allocate/free network buffers.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm, vlib_buffer_t * b_first)
+{
+ vlib_buffer_t * b = b_first;
+ uword l_first = b_first->current_length;
+ uword l = 0;
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ l += b->current_length;
+ }
+ b_first->total_length_not_including_first_buffer = l;
+ b_first->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ return l + l_first;
+}
+
+u8 * format_vlib_buffer (u8 * s, va_list * args)
+{
+ vlib_buffer_t * b = va_arg (*args, vlib_buffer_t *);
+
+ s = format (s, "current data %d, length %d, free-list %d",
+ b->current_data, b->current_length,
+ b->free_list_index);
+
+ if (b->flags & VLIB_BUFFER_IS_TRACED)
+ s = format (s, ", trace 0x%x", b->trace_index);
+
+ if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ s = format (s, ", next-buffer 0x%x", b->next_buffer);
+
+ return s;
+}
+
+u8 * format_vlib_buffer_and_data (u8 * s, va_list * args)
+{
+ vlib_buffer_t * b = va_arg (*args, vlib_buffer_t *);
+
+ s = format (s, "%U, %U",
+ format_vlib_buffer, b,
+ format_hex_bytes, vlib_buffer_get_current (b), 64);
+
+ return s;
+}
+
+static u8 * format_vlib_buffer_known_state (u8 * s, va_list * args)
+{
+ vlib_buffer_known_state_t state = va_arg (*args, vlib_buffer_known_state_t);
+ char * t;
+
+ switch (state)
+ {
+ case VLIB_BUFFER_UNKNOWN:
+ t = "unknown";
+ break;
+
+ case VLIB_BUFFER_KNOWN_ALLOCATED:
+ t = "known-allocated";
+ break;
+
+ case VLIB_BUFFER_KNOWN_FREE:
+ t = "known-free";
+ break;
+
+ default:
+ t = "invalid";
+ break;
+ }
+
+ return format (s, "%s", t);
+}
+
+u8 * format_vlib_buffer_contents (u8 * s, va_list * va)
+{
+ vlib_main_t * vm = va_arg (*va, vlib_main_t *);
+ vlib_buffer_t * b = va_arg (*va, vlib_buffer_t *);
+
+ while (1)
+ {
+ vec_add (s, vlib_buffer_get_current (b),
+ b->current_length);
+ if (! (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ }
+
+ return s;
+}
+
+static u8 *
+vlib_validate_buffer_helper (vlib_main_t * vm,
+ u32 bi,
+ uword follow_buffer_next,
+ uword ** unique_hash)
+
+{
+ vlib_buffer_t * b = vlib_get_buffer (vm, bi);
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ vlib_buffer_free_list_t * fl;
+
+ if (pool_is_free_index (bm->buffer_free_list_pool,
+ b->free_list_index))
+ return format (0, "unknown free list 0x%x", b->free_list_index);
+
+ fl = pool_elt_at_index (bm->buffer_free_list_pool,
+ b->free_list_index);
+
+ if ((signed) b->current_data < (signed) - VLIB_BUFFER_PRE_DATA_SIZE)
+ return format (0, "current data %d before pre-data", b->current_data);
+#if DPDK == 0
+ if (b->current_data + b->current_length > fl->n_data_bytes)
+ return format (0, "%d-%d beyond end of buffer %d",
+ b->current_data, b->current_length,
+ fl->n_data_bytes);
+#endif
+
+ if (follow_buffer_next
+ && (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ {
+ vlib_buffer_known_state_t k;
+ u8 * msg, * result;
+
+ k = vlib_buffer_is_known (vm, b->next_buffer);
+ if (k != VLIB_BUFFER_KNOWN_ALLOCATED)
+ return format (0, "next 0x%x: %U",
+ b->next_buffer,
+ format_vlib_buffer_known_state, k);
+
+ if (unique_hash)
+ {
+ if (hash_get (*unique_hash, b->next_buffer))
+ return format (0, "duplicate buffer 0x%x", b->next_buffer);
+
+ hash_set1 (*unique_hash, b->next_buffer);
+ }
+
+ msg = vlib_validate_buffer (vm, b->next_buffer, follow_buffer_next);
+ if (msg)
+ {
+ result = format (0, "next 0x%x: %v", b->next_buffer, msg);
+ vec_free (msg);
+ return result;
+ }
+ }
+
+ return 0;
+}
+
+u8 *
+vlib_validate_buffer (vlib_main_t * vm, u32 bi, uword follow_buffer_next)
+{ return vlib_validate_buffer_helper (vm, bi, follow_buffer_next, /* unique_hash */ 0); }
+
+u8 *
+vlib_validate_buffers (vlib_main_t * vm,
+ u32 * buffers,
+ uword next_buffer_stride,
+ uword n_buffers,
+ vlib_buffer_known_state_t known_state,
+ uword follow_buffer_next)
+{
+ uword i, * hash;
+ u32 bi, * b = buffers;
+ vlib_buffer_known_state_t k;
+ u8 * msg = 0, * result = 0;
+
+ hash = hash_create (0, 0);
+ for (i = 0; i < n_buffers; i++)
+ {
+ bi = b[0];
+ b += next_buffer_stride;
+
+ /* Buffer is not unique. */
+ if (hash_get (hash, bi))
+ {
+ msg = format (0, "not unique");
+ goto done;
+ }
+
+ k = vlib_buffer_is_known (vm, bi);
+ if (k != known_state)
+ {
+ msg = format (0, "is %U; expected %U",
+ format_vlib_buffer_known_state, k,
+ format_vlib_buffer_known_state, known_state);
+ goto done;
+ }
+
+ msg = vlib_validate_buffer_helper (vm, bi, follow_buffer_next, &hash);
+ if (msg)
+ goto done;
+
+ hash_set1 (hash, bi);
+ }
+
+ done:
+ if (msg)
+ {
+ result = format (0, "0x%x: %v", bi, msg);
+ vec_free (msg);
+ }
+ hash_free (hash);
+ return result;
+}
+
+vlib_main_t **vlib_mains;
+
+/* When dubugging validate that given buffers are either known allocated
+ or known free. */
+static void
+vlib_buffer_validate_alloc_free (vlib_main_t * vm,
+ u32 * buffers,
+ uword n_buffers,
+ vlib_buffer_known_state_t expected_state)
+{
+ u32 * b;
+ uword i, bi, is_free;
+
+ if (CLIB_DEBUG == 0)
+ return;
+
+ ASSERT(os_get_cpu_number() == 0);
+
+ /* smp disaster check */
+ if (vlib_mains)
+ ASSERT(vm == vlib_mains[0]);
+
+ is_free = expected_state == VLIB_BUFFER_KNOWN_ALLOCATED;
+ b = buffers;
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_known_state_t known;
+
+ bi = b[0];
+ b += 1;
+ known = vlib_buffer_is_known (vm, bi);
+ if (known != expected_state)
+ {
+ ASSERT (0);
+ vlib_panic_with_msg
+ (vm, "%s %U buffer 0x%x",
+ is_free ? "freeing" : "allocating",
+ format_vlib_buffer_known_state, known,
+ bi);
+ }
+
+ vlib_buffer_set_known_state
+ (vm, bi,
+ is_free ? VLIB_BUFFER_KNOWN_FREE : VLIB_BUFFER_KNOWN_ALLOCATED);
+ }
+}
+
+/* Aligned copy routine. */
+void
+vlib_aligned_memcpy (void * _dst, void * _src, int n_bytes)
+{
+ vlib_copy_unit_t * dst = _dst;
+ vlib_copy_unit_t * src = _src;
+
+ /* Arguments must be naturally aligned. */
+ ASSERT (pointer_to_uword (dst) % sizeof (dst[0]) == 0);
+ ASSERT (pointer_to_uword (src) % sizeof (src[0]) == 0);
+ ASSERT (n_bytes % sizeof (dst[0]) == 0);
+
+ if (4 * sizeof (dst[0]) == CLIB_CACHE_LINE_BYTES)
+ {
+ CLIB_PREFETCH (dst + 0, 4 * sizeof (dst[0]), WRITE);
+ CLIB_PREFETCH (src + 0, 4 * sizeof (src[0]), READ);
+
+ while (n_bytes >= 4 * sizeof (dst[0]))
+ {
+ dst += 4;
+ src += 4;
+ n_bytes -= 4 * sizeof (dst[0]);
+ CLIB_PREFETCH (dst, 4 * sizeof (dst[0]), WRITE);
+ CLIB_PREFETCH (src, 4 * sizeof (src[0]), READ);
+ dst[-4] = src[-4];
+ dst[-3] = src[-3];
+ dst[-2] = src[-2];
+ dst[-1] = src[-1];
+ }
+ }
+ else if (8 * sizeof (dst[0]) == CLIB_CACHE_LINE_BYTES)
+ {
+ CLIB_PREFETCH (dst + 0, 8 * sizeof (dst[0]), WRITE);
+ CLIB_PREFETCH (src + 0, 8 * sizeof (src[0]), READ);
+
+ while (n_bytes >= 8 * sizeof (dst[0]))
+ {
+ dst += 8;
+ src += 8;
+ n_bytes -= 8 * sizeof (dst[0]);
+ CLIB_PREFETCH (dst, 8 * sizeof (dst[0]), WRITE);
+ CLIB_PREFETCH (src, 8 * sizeof (src[0]), READ);
+ dst[-8] = src[-8];
+ dst[-7] = src[-7];
+ dst[-6] = src[-6];
+ dst[-5] = src[-5];
+ dst[-4] = src[-4];
+ dst[-3] = src[-3];
+ dst[-2] = src[-2];
+ dst[-1] = src[-1];
+ }
+ }
+ else
+ /* Cache line size unknown: fall back to slow version. */;
+
+ while (n_bytes > 0)
+ {
+ *dst++ = *src++;
+ n_bytes -= 1 * sizeof (dst[0]);
+ }
+}
+
+#define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32))
+
+/* Make sure we have at least given number of unaligned buffers. */
+static void
+fill_unaligned (vlib_main_t * vm,
+ vlib_buffer_free_list_t * free_list,
+ uword n_unaligned_buffers)
+{
+ word la = vec_len (free_list->aligned_buffers);
+ word lu = vec_len (free_list->unaligned_buffers);
+
+ /* Aligned come in aligned copy-sized chunks. */
+ ASSERT (la % BUFFERS_PER_COPY == 0);
+
+ ASSERT (la >= n_unaligned_buffers);
+
+ while (lu < n_unaligned_buffers)
+ {
+ /* Copy 4 buffers from end of aligned vector to unaligned vector. */
+ vec_add (free_list->unaligned_buffers,
+ free_list->aligned_buffers + la - BUFFERS_PER_COPY,
+ BUFFERS_PER_COPY);
+ la -= BUFFERS_PER_COPY;
+ lu += BUFFERS_PER_COPY;
+ }
+ _vec_len (free_list->aligned_buffers) = la;
+}
+
+/* After free aligned buffers may not contain even sized chunks. */
+static void
+trim_aligned (vlib_buffer_free_list_t * f)
+{
+ uword l, n_trim;
+
+ /* Add unaligned to aligned before trim. */
+ l = vec_len (f->unaligned_buffers);
+ if (l > 0)
+ {
+ vec_add_aligned (f->aligned_buffers, f->unaligned_buffers, l,
+ /* align */ sizeof (vlib_copy_unit_t));
+
+ _vec_len (f->unaligned_buffers) = 0;
+ }
+
+ /* Remove unaligned buffers from end of aligned vector and save for next trim. */
+ l = vec_len (f->aligned_buffers);
+ n_trim = l % BUFFERS_PER_COPY;
+ if (n_trim)
+ {
+ /* Trim aligned -> unaligned. */
+ vec_add (f->unaligned_buffers, f->aligned_buffers + l - n_trim, n_trim);
+
+ /* Remove from aligned. */
+ _vec_len (f->aligned_buffers) = l - n_trim;
+ }
+}
+
+static void
+merge_free_lists (vlib_buffer_free_list_t * dst,
+ vlib_buffer_free_list_t * src)
+{
+ uword l;
+ u32 * d;
+
+ trim_aligned (src);
+ trim_aligned (dst);
+
+ l = vec_len (src->aligned_buffers);
+ if (l > 0)
+ {
+ vec_add2_aligned (dst->aligned_buffers, d, l,
+ /* align */ sizeof (vlib_copy_unit_t));
+ vlib_aligned_memcpy (d, src->aligned_buffers, l * sizeof (d[0]));
+ vec_free (src->aligned_buffers);
+ }
+
+ l = vec_len (src->unaligned_buffers);
+ if (l > 0)
+ {
+ vec_add (dst->unaligned_buffers, src->unaligned_buffers, l);
+ vec_free (src->unaligned_buffers);
+ }
+}
+
+always_inline u32
+vlib_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+
+ size = vlib_buffer_round_size (size);
+ uword * p = hash_get (bm->free_list_by_size, size);
+ return p ? p[0] : ~0;
+}
+
+/* Add buffer free list. */
+static u32
+vlib_buffer_create_free_list_helper (vlib_main_t * vm,
+ u32 n_data_bytes,
+ u32 is_public,
+ u32 is_default,
+ u8 * name)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ vlib_buffer_free_list_t * f;
+
+ if (! is_default && pool_elts (bm->buffer_free_list_pool) == 0)
+ {
+ u32 default_free_free_list_index;
+
+ default_free_free_list_index =
+ vlib_buffer_create_free_list_helper (vm,
+ /* default buffer size */ VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
+ /* is_public */ 1,
+ /* is_default */ 1,
+ (u8 *) "default");
+ ASSERT (default_free_free_list_index == VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ if (n_data_bytes == VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES && is_public)
+ return default_free_free_list_index;
+ }
+
+ pool_get_aligned (bm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES);
+
+ memset (f, 0, sizeof (f[0]));
+ f->index = f - bm->buffer_free_list_pool;
+ f->n_data_bytes = vlib_buffer_round_size (n_data_bytes);
+ f->min_n_buffers_each_physmem_alloc = 256;
+ f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name);
+
+ /* Setup free buffer template. */
+ f->buffer_init_template.free_list_index = f->index;
+
+ if (is_public)
+ {
+ uword * p = hash_get (bm->free_list_by_size, f->n_data_bytes);
+ if (! p)
+ hash_set (bm->free_list_by_size, f->n_data_bytes, f->index);
+ }
+
+ return f->index;
+}
+
+u32 vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
+ char * fmt, ...)
+{
+ va_list va;
+ u8 * name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ return vlib_buffer_create_free_list_helper (vm, n_data_bytes,
+ /* is_public */ 0,
+ /* is_default */ 0,
+ name);
+}
+
+u32 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
+ char * fmt, ...)
+{
+ u32 i = vlib_buffer_get_free_list_with_size (vm, n_data_bytes);
+
+ if (i == ~0)
+ {
+ va_list va;
+ u8 * name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ i = vlib_buffer_create_free_list_helper (vm, n_data_bytes,
+ /* is_public */ 1,
+ /* is_default */ 0,
+ name);
+ }
+
+ return i;
+}
+
+static void
+del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f)
+{
+ u32 i;
+
+ for (i = 0; i < vec_len (f->buffer_memory_allocated); i++)
+ vm->os_physmem_free (f->buffer_memory_allocated[i]);
+ vec_free (f->name);
+ vec_free (f->buffer_memory_allocated);
+ vec_free (f->unaligned_buffers);
+ vec_free (f->aligned_buffers);
+}
+
+/* Add buffer free list. */
+void vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ vlib_buffer_free_list_t * f;
+ u32 merge_index;
+
+ f = vlib_buffer_get_free_list (vm, free_list_index);
+
+ ASSERT (vec_len (f->unaligned_buffers) + vec_len (f->aligned_buffers) == f->n_alloc);
+ merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes);
+ if (merge_index != ~0 && merge_index != free_list_index)
+ {
+ merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool,
+ merge_index), f);
+ }
+
+ del_free_list (vm, f);
+
+ /* Poison it. */
+ memset (f, 0xab, sizeof (f[0]));
+
+ pool_put (bm->buffer_free_list_pool, f);
+}
+
+/* Make sure free list has at least given number of free buffers. */
+static uword
+fill_free_list (vlib_main_t * vm,
+ vlib_buffer_free_list_t * fl,
+ uword min_free_buffers)
+{
+ vlib_buffer_t * buffers, * b;
+ int n, n_bytes, i;
+ u32 * bi;
+ u32 n_remaining, n_alloc, n_this_chunk;
+
+ trim_aligned (fl);
+
+ /* Already have enough free buffers on free list? */
+ n = min_free_buffers - vec_len (fl->aligned_buffers);
+ if (n <= 0)
+ return min_free_buffers;
+
+ /* Always allocate round number of buffers. */
+ n = round_pow2 (n, BUFFERS_PER_COPY);
+
+ /* Always allocate new buffers in reasonably large sized chunks. */
+ n = clib_max (n, fl->min_n_buffers_each_physmem_alloc);
+
+ n_remaining = n;
+ n_alloc = 0;
+ while (n_remaining > 0)
+ {
+ n_this_chunk = clib_min (n_remaining, 16);
+
+ n_bytes = n_this_chunk * (sizeof (b[0]) + fl->n_data_bytes);
+
+ /* drb: removed power-of-2 ASSERT */
+ buffers = vm->os_physmem_alloc_aligned (&vm->physmem_main,
+ n_bytes, sizeof (vlib_buffer_t));
+ if (! buffers)
+ return n_alloc;
+
+ /* Record chunk as being allocated so we can free it later. */
+ vec_add1 (fl->buffer_memory_allocated, buffers);
+
+ fl->n_alloc += n_this_chunk;
+ n_alloc += n_this_chunk;
+ n_remaining -= n_this_chunk;
+
+ b = buffers;
+ vec_add2_aligned (fl->aligned_buffers, bi, n_this_chunk,
+ sizeof (vlib_copy_unit_t));
+ for (i = 0; i < n_this_chunk; i++)
+ {
+ bi[i] = vlib_get_buffer_index (vm, b);
+
+ if (CLIB_DEBUG > 0)
+ vlib_buffer_set_known_state (vm, bi[i], VLIB_BUFFER_KNOWN_FREE);
+ b = vlib_buffer_next_contiguous (b, fl->n_data_bytes);
+ }
+
+ memset (buffers, 0, n_bytes);
+
+ /* Initialize all new buffers. */
+ b = buffers;
+ for (i = 0; i < n_this_chunk; i++)
+ {
+ vlib_buffer_init_for_free_list (b, fl);
+ b = vlib_buffer_next_contiguous (b, fl->n_data_bytes);
+ }
+
+ if (fl->buffer_init_function)
+ fl->buffer_init_function (vm, fl, bi, n_this_chunk);
+ }
+ return n_alloc;
+}
+
+always_inline uword
+copy_alignment (u32 * x)
+{ return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY; }
+
+static u32
+alloc_from_free_list (vlib_main_t * vm,
+ vlib_buffer_free_list_t * free_list,
+ u32 * alloc_buffers,
+ u32 n_alloc_buffers)
+{
+ u32 * dst, * u_src;
+ uword u_len, n_left;
+ uword n_unaligned_start, n_unaligned_end, n_filled;
+
+ ASSERT(os_get_cpu_number() == 0);
+
+ n_left = n_alloc_buffers;
+ dst = alloc_buffers;
+ n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst))
+ & (BUFFERS_PER_COPY - 1));
+
+ n_filled = fill_free_list (vm, free_list, n_alloc_buffers);
+ if (n_filled == 0)
+ return 0;
+
+ n_left = n_filled < n_left ? n_filled : n_left;
+ n_alloc_buffers = n_left;
+
+ if (n_unaligned_start >= n_left)
+ {
+ n_unaligned_start = n_left;
+ n_unaligned_end = 0;
+ }
+ else
+ n_unaligned_end = copy_alignment (dst + n_alloc_buffers);
+
+ fill_unaligned (vm, free_list, n_unaligned_start + n_unaligned_end);
+
+ u_len = vec_len (free_list->unaligned_buffers);
+ u_src = free_list->unaligned_buffers + u_len - 1;
+
+ if (n_unaligned_start)
+ {
+ uword n_copy = n_unaligned_start;
+ if (n_copy > n_left)
+ n_copy = n_left;
+ n_left -= n_copy;
+
+ while (n_copy > 0)
+ {
+ *dst++ = *u_src--;
+ n_copy--;
+ u_len--;
+ }
+
+ /* Now dst should be aligned. */
+ if (n_left > 0)
+ ASSERT (pointer_to_uword (dst) % sizeof (vlib_copy_unit_t) == 0);
+ }
+
+ /* Aligned copy. */
+ {
+ vlib_copy_unit_t * d, * s;
+ uword n_copy;
+
+ if (vec_len(free_list->aligned_buffers) < ((n_left/BUFFERS_PER_COPY)*BUFFERS_PER_COPY))
+ abort();
+
+ n_copy = n_left / BUFFERS_PER_COPY;
+ n_left = n_left % BUFFERS_PER_COPY;
+
+ /* Remove buffers from aligned free list. */
+ _vec_len (free_list->aligned_buffers) -= n_copy * BUFFERS_PER_COPY;
+
+ s = (vlib_copy_unit_t *) vec_end (free_list->aligned_buffers);
+ d = (vlib_copy_unit_t *) dst;
+
+ /* Fast path loop. */
+ while (n_copy >= 4)
+ {
+ d[0] = s[0];
+ d[1] = s[1];
+ d[2] = s[2];
+ d[3] = s[3];
+ n_copy -= 4;
+ s += 4;
+ d += 4;
+ }
+
+ while (n_copy >= 1)
+ {
+ d[0] = s[0];
+ n_copy -= 1;
+ s += 1;
+ d += 1;
+ }
+
+ dst = (void *) d;
+ }
+
+ /* Unaligned copy. */
+ ASSERT (n_unaligned_end == n_left);
+ while (n_left > 0)
+ {
+ *dst++ = *u_src--;
+ n_left--;
+ u_len--;
+ }
+
+ if (! free_list->unaligned_buffers)
+ ASSERT (u_len == 0);
+ else
+ _vec_len (free_list->unaligned_buffers) = u_len;
+
+ /* Verify that buffers are known free. */
+ vlib_buffer_validate_alloc_free (vm, alloc_buffers,
+ n_alloc_buffers,
+ VLIB_BUFFER_KNOWN_FREE);
+
+ return n_alloc_buffers;
+}
+
+/* Allocate a given number of buffers into given array.
+ Returns number actually allocated which will be either zero or
+ number requested. */
+u32 vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ ASSERT(os_get_cpu_number() == 0);
+
+ return alloc_from_free_list
+ (vm,
+ pool_elt_at_index (bm->buffer_free_list_pool,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX),
+ buffers, n_buffers);
+}
+
+u32 vlib_buffer_alloc_from_free_list (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers,
+ u32 free_list_index)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ vlib_buffer_free_list_t * f;
+ f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index);
+ return alloc_from_free_list (vm, f, buffers, n_buffers);
+}
+
+always_inline void
+add_buffer_to_free_list (vlib_main_t * vm,
+ vlib_buffer_free_list_t * f,
+ u32 buffer_index, u8 do_init)
+{
+ vlib_buffer_t * b;
+ b = vlib_get_buffer (vm, buffer_index);
+ if (PREDICT_TRUE(do_init))
+ vlib_buffer_init_for_free_list (b, f);
+ vec_add1_aligned (f->aligned_buffers, buffer_index, sizeof (vlib_copy_unit_t));
+}
+
+always_inline vlib_buffer_free_list_t *
+buffer_get_free_list (vlib_main_t * vm, vlib_buffer_t * b, u32 * index)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ u32 i;
+
+ *index = i = b->free_list_index;
+ return pool_elt_at_index (bm->buffer_free_list_pool, i);
+}
+
+void *vlib_set_buffer_free_callback (vlib_main_t *vm, void *fp)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ void * rv = bm->buffer_free_callback;
+
+ bm->buffer_free_callback = fp;
+ return rv;
+}
+
+void vnet_buffer_free_dpdk_mb (vlib_buffer_t * b) __attribute__ ((weak));
+void vnet_buffer_free_dpdk_mb (vlib_buffer_t * b) { }
+
+static_always_inline void
+vlib_buffer_free_inline (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers,
+ u32 follow_buffer_next)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ vlib_buffer_free_list_t * fl;
+ static u32 * next_to_free[2]; /* smp bad */
+ u32 i_next_to_free, * b, * n, * f, fi;
+ uword n_left;
+ int i;
+ static vlib_buffer_free_list_t ** announce_list;
+ vlib_buffer_free_list_t * fl0 = 0, * fl1 = 0;
+ u32 bi0=(u32)~0, bi1=(u32)~0, fi0, fi1 = (u32)~0;
+ u8 free0, free1=0, free_next0, free_next1;
+ u32 (*cb)(vlib_main_t * vm, u32 * buffers, u32 n_buffers,
+ u32 follow_buffer_next);
+
+ ASSERT(os_get_cpu_number() == 0);
+
+ cb = bm->buffer_free_callback;
+
+ if (PREDICT_FALSE (cb != 0))
+ n_buffers = (*cb)(vm, buffers, n_buffers, follow_buffer_next);
+
+ if (! n_buffers)
+ return;
+
+ /* Use first buffer to get default free list. */
+ {
+ u32 bi0 = buffers[0];
+ vlib_buffer_t * b0;
+
+ b0 = vlib_get_buffer (vm, bi0);
+ fl = buffer_get_free_list (vm, b0, &fi);
+ if (fl->buffers_added_to_freelist_function)
+ vec_add1 (announce_list, fl);
+ }
+
+ vec_validate (next_to_free[0], n_buffers - 1);
+ vec_validate (next_to_free[1], n_buffers - 1);
+
+ i_next_to_free = 0;
+ n_left = n_buffers;
+ b = buffers;
+
+ again:
+ /* Verify that buffers are known allocated. */
+ vlib_buffer_validate_alloc_free (vm, b,
+ n_left,
+ VLIB_BUFFER_KNOWN_ALLOCATED);
+
+ vec_add2_aligned (fl->aligned_buffers, f, n_left,
+ /* align */ sizeof (vlib_copy_unit_t));
+
+ n = next_to_free[i_next_to_free];
+ while (n_left >= 4)
+ {
+ vlib_buffer_t * b0, * b1, * binit0, * binit1, dummy_buffers[2];
+
+ bi0 = b[0];
+ bi1 = b[1];
+
+ f[0] = bi0;
+ f[1] = bi1;
+ f += 2;
+ b += 2;
+ n_left -= 2;
+
+ /* Prefetch buffers for next iteration. */
+ vlib_prefetch_buffer_with_index (vm, b[0], WRITE);
+ vlib_prefetch_buffer_with_index (vm, b[1], WRITE);
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ free0 = b0->clone_count == 0;
+ free1 = b1->clone_count == 0;
+
+ /* Must be before init which will over-write buffer flags. */
+ if (follow_buffer_next)
+ {
+ n[0] = b0->next_buffer;
+ free_next0 = free0 && (b0->flags & VLIB_BUFFER_NEXT_PRESENT) != 0;
+ n += free_next0;
+
+ n[0] = b1->next_buffer;
+ free_next1 = free1 && (b1->flags & VLIB_BUFFER_NEXT_PRESENT) != 0;
+ n += free_next1;
+ }
+ else
+ free_next0 = free_next1 = 0;
+
+ /* Must be before init which will over-write buffer free list. */
+ fi0 = b0->free_list_index;
+ fi1 = b1->free_list_index;
+
+ if (PREDICT_FALSE (fi0 != fi || fi1 != fi))
+ goto slow_path_x2;
+
+ binit0 = free0 ? b0 : &dummy_buffers[0];
+ binit1 = free1 ? b1 : &dummy_buffers[1];
+
+ vlib_buffer_init_two_for_free_list (binit0, binit1, fl);
+ continue;
+
+ slow_path_x2:
+ /* Backup speculation. */
+ f -= 2;
+ n -= free_next0 + free_next1;
+
+ _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers;
+
+ fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0);
+ fl1 = pool_elt_at_index (bm->buffer_free_list_pool, fi1);
+
+ add_buffer_to_free_list (vm, fl0, bi0, free0);
+ if (PREDICT_FALSE(fl0->buffers_added_to_freelist_function != 0))
+ {
+ int i;
+ for (i = 0; i < vec_len (announce_list); i++)
+ if (fl0 == announce_list[i])
+ goto no_fl0;
+ vec_add1(announce_list, fl0);
+ }
+ no_fl0:
+ if (PREDICT_FALSE(fl1->buffers_added_to_freelist_function != 0))
+ {
+ int i;
+ for (i = 0; i < vec_len (announce_list); i++)
+ if (fl1 == announce_list[i])
+ goto no_fl1;
+ vec_add1(announce_list, fl1);
+ }
+
+ no_fl1:
+ add_buffer_to_free_list (vm, fl1, bi1, free1);
+
+ /* Possibly change current free list. */
+ if (fi0 != fi && fi1 != fi)
+ {
+ fi = fi1;
+ fl = pool_elt_at_index (bm->buffer_free_list_pool, fi);
+ }
+
+ vec_add2_aligned (fl->aligned_buffers, f, n_left,
+ /* align */ sizeof (vlib_copy_unit_t));
+ }
+
+ while (n_left >= 1)
+ {
+ vlib_buffer_t * b0, * binit0, dummy_buffers[1];
+
+ bi0 = b[0];
+ f[0] = bi0;
+ f += 1;
+ b += 1;
+ n_left -= 1;
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ free0 = b0->clone_count == 0;
+
+ /* Must be before init which will over-write buffer flags. */
+ if (follow_buffer_next)
+ {
+ n[0] = b0->next_buffer;
+ free_next0 = free0 && (b0->flags & VLIB_BUFFER_NEXT_PRESENT) != 0;
+ n += free_next0;
+ }
+ else
+ free_next0 = 0;
+
+ /* Must be before init which will over-write buffer free list. */
+ fi0 = b0->free_list_index;
+
+ if (PREDICT_FALSE (fi0 != fi))
+ goto slow_path_x1;
+
+ binit0 = free0 ? b0 : &dummy_buffers[0];
+
+ vlib_buffer_init_for_free_list (binit0, fl);
+ continue;
+
+ slow_path_x1:
+ /* Backup speculation. */
+ f -= 1;
+ n -= free_next0;
+
+ _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers;
+
+ fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0);
+
+ add_buffer_to_free_list (vm, fl0, bi0, free0);
+ if (PREDICT_FALSE(fl0->buffers_added_to_freelist_function != 0))
+ {
+ int i;
+ for (i = 0; i < vec_len (announce_list); i++)
+ if (fl0 == announce_list[i])
+ goto no_fl00;
+ vec_add1(announce_list, fl0);
+ }
+
+ no_fl00:
+ fi = fi0;
+ fl = pool_elt_at_index (bm->buffer_free_list_pool, fi);
+
+ vec_add2_aligned (fl->aligned_buffers, f, n_left,
+ /* align */ sizeof (vlib_copy_unit_t));
+ }
+
+ if (follow_buffer_next && ((n_left = n - next_to_free[i_next_to_free]) > 0))
+ {
+ b = next_to_free[i_next_to_free];
+ i_next_to_free ^= 1;
+ goto again;
+ }
+
+ _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers;
+
+ if (vec_len(announce_list))
+ {
+ vlib_buffer_free_list_t * fl;
+ for (i = 0; i < vec_len (announce_list); i++)
+ {
+ fl = announce_list[i];
+ fl->buffers_added_to_freelist_function (vm, fl);
+ }
+ _vec_len(announce_list) = 0;
+ }
+}
+
+void vlib_buffer_free (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers)
+{
+ vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ 1);
+}
+
+void vlib_buffer_free_no_next (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers)
+{
+ vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ 0);
+}
+
+/* Copy template packet data into buffers as they are allocated. */
+static void
+vlib_packet_template_buffer_init (vlib_main_t * vm,
+ vlib_buffer_free_list_t * fl,
+ u32 * buffers,
+ u32 n_buffers)
+{
+ vlib_packet_template_t * t = uword_to_pointer (fl->buffer_init_function_opaque,
+ vlib_packet_template_t *);
+ uword i;
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t * b = vlib_get_buffer (vm, buffers[i]);
+ ASSERT (b->current_length == vec_len (t->packet_data));
+ memcpy (vlib_buffer_get_current (b), t->packet_data, b->current_length);
+ }
+}
+
+void vlib_packet_template_init (vlib_main_t * vm,
+ vlib_packet_template_t * t,
+ void * packet_data,
+ uword n_packet_data_bytes,
+ uword min_n_buffers_each_physmem_alloc,
+ char * fmt,
+ ...)
+{
+ vlib_buffer_free_list_t * fl;
+ va_list va;
+ u8 * name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ memset (t, 0, sizeof (t[0]));
+
+ vec_add (t->packet_data, packet_data, n_packet_data_bytes);
+ t->min_n_buffers_each_physmem_alloc = min_n_buffers_each_physmem_alloc;
+
+ t->free_list_index = vlib_buffer_create_free_list_helper
+ (vm, n_packet_data_bytes,
+ /* is_public */ 1,
+ /* is_default */ 0,
+ name);
+
+ ASSERT (t->free_list_index != 0);
+ fl = vlib_buffer_get_free_list (vm, t->free_list_index);
+ fl->min_n_buffers_each_physmem_alloc = t->min_n_buffers_each_physmem_alloc;
+
+ fl->buffer_init_function = vlib_packet_template_buffer_init;
+ fl->buffer_init_function_opaque = pointer_to_uword (t);
+
+ fl->buffer_init_template.current_data = 0;
+ fl->buffer_init_template.current_length = n_packet_data_bytes;
+ fl->buffer_init_template.flags = 0;
+}
+
+void *
+vlib_packet_template_get_packet (vlib_main_t * vm,
+ vlib_packet_template_t * t,
+ u32 * bi_result)
+{
+ u32 bi;
+ vlib_buffer_t * b;
+
+ if (vlib_buffer_alloc (vm, &bi, 1) != 1)
+ return 0;
+
+ *bi_result = bi;
+
+ b = vlib_get_buffer (vm, bi);
+ memcpy (vlib_buffer_get_current (b),
+ t->packet_data, vec_len(t->packet_data));
+ b->current_length = vec_len(t->packet_data);
+
+ return b->data;
+}
+
+void vlib_packet_template_get_packet_helper (vlib_main_t * vm, vlib_packet_template_t * t)
+{
+ word n = t->min_n_buffers_each_physmem_alloc;
+ word l = vec_len (t->packet_data);
+ word n_alloc;
+
+ ASSERT (l > 0);
+ ASSERT (vec_len (t->free_buffers) == 0);
+
+ vec_validate (t->free_buffers, n - 1);
+ n_alloc = vlib_buffer_alloc_from_free_list (vm, t->free_buffers,
+ n, t->free_list_index);
+ _vec_len (t->free_buffers) = n_alloc;
+}
+
+/* Append given data to end of buffer, possibly allocating new buffers. */
+u32 vlib_buffer_add_data (vlib_main_t * vm,
+ u32 free_list_index,
+ u32 buffer_index,
+ void * data, u32 n_data_bytes)
+{
+ u32 n_buffer_bytes, n_left, n_left_this_buffer, bi;
+ vlib_buffer_t * b;
+ void * d;
+
+ bi = buffer_index;
+ if (bi == 0
+ && 1 != vlib_buffer_alloc_from_free_list (vm, &bi, 1, free_list_index))
+ goto out_of_buffers;
+
+ d = data;
+ n_left = n_data_bytes;
+ n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, free_list_index);
+
+ b = vlib_get_buffer (vm, bi);
+ b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ /* Get to the end of the chain before we try to append data...*/
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ b = vlib_get_buffer (vm, b->next_buffer);
+
+ while (1)
+ {
+ u32 n;
+
+ ASSERT (n_buffer_bytes >= b->current_length);
+ n_left_this_buffer = n_buffer_bytes - (b->current_data + b->current_length);
+ n = clib_min (n_left_this_buffer, n_left);
+ memcpy (vlib_buffer_get_current (b) + b->current_length, d, n);
+ b->current_length += n;
+ n_left -= n;
+ if (n_left == 0)
+ break;
+
+ d += n;
+ if (1 != vlib_buffer_alloc_from_free_list (vm, &b->next_buffer, 1, free_list_index))
+ goto out_of_buffers;
+
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+
+ b = vlib_get_buffer (vm, b->next_buffer);
+ }
+
+ return bi;
+
+ out_of_buffers:
+ clib_error ("out of buffers");
+ return bi;
+}
+
+static void vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s)
+{
+ vlib_main_t * vm;
+ vlib_serialize_buffer_main_t * sm;
+ uword n, n_bytes_to_write;
+ vlib_buffer_t * last;
+
+ n_bytes_to_write = s->current_buffer_index;
+ sm = uword_to_pointer (s->data_function_opaque, vlib_serialize_buffer_main_t *);
+ vm = sm->vlib_main;
+
+ ASSERT (sm->tx.max_n_data_bytes_per_chain > 0);
+ if (serialize_stream_is_end_of_stream (s)
+ || sm->tx.n_total_data_bytes + n_bytes_to_write > sm->tx.max_n_data_bytes_per_chain)
+ {
+ vlib_process_t * p = vlib_get_current_process (vm);
+
+ last = vlib_get_buffer (vm, sm->last_buffer);
+ last->current_length = n_bytes_to_write;
+
+ vlib_set_next_frame_buffer (vm, &p->node_runtime, sm->tx.next_index, sm->first_buffer);
+
+ sm->first_buffer = sm->last_buffer = ~0;
+ sm->tx.n_total_data_bytes = 0;
+ }
+
+ else if (n_bytes_to_write == 0 && s->n_buffer_bytes == 0)
+ {
+ ASSERT (sm->first_buffer == ~0);
+ ASSERT (sm->last_buffer == ~0);
+ n = vlib_buffer_alloc_from_free_list (vm, &sm->first_buffer, 1, sm->tx.free_list_index);
+ if (n != 1)
+ serialize_error (m, clib_error_create ("vlib_buffer_alloc_from_free_list fails"));
+ sm->last_buffer = sm->first_buffer;
+ s->n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, sm->tx.free_list_index);
+ }
+
+ if (n_bytes_to_write > 0)
+ {
+ vlib_buffer_t * prev = vlib_get_buffer (vm, sm->last_buffer);
+ n = vlib_buffer_alloc_from_free_list (vm, &sm->last_buffer, 1, sm->tx.free_list_index);
+ if (n != 1)
+ serialize_error (m, clib_error_create ("vlib_buffer_alloc_from_free_list fails"));
+ sm->tx.n_total_data_bytes += n_bytes_to_write;
+ prev->current_length = n_bytes_to_write;
+ prev->next_buffer = sm->last_buffer;
+ prev->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ }
+
+ if (sm->last_buffer != ~0)
+ {
+ last = vlib_get_buffer (vm, sm->last_buffer);
+ s->buffer = vlib_buffer_get_current (last);
+ s->current_buffer_index = 0;
+ ASSERT (last->current_data == s->current_buffer_index);
+ }
+}
+
+static void vlib_serialize_rx (serialize_main_header_t * m, serialize_stream_t * s)
+{
+ vlib_main_t * vm;
+ vlib_serialize_buffer_main_t * sm;
+ vlib_buffer_t * last;
+
+ sm = uword_to_pointer (s->data_function_opaque, vlib_serialize_buffer_main_t *);
+ vm = sm->vlib_main;
+
+ if (serialize_stream_is_end_of_stream (s))
+ return;
+
+ if (sm->last_buffer != ~0)
+ {
+ last = vlib_get_buffer (vm, sm->last_buffer);
+
+ if (last->flags & VLIB_BUFFER_NEXT_PRESENT)
+ sm->last_buffer = last->next_buffer;
+ else
+ {
+ vlib_buffer_free (vm, &sm->first_buffer, /* count */ 1);
+ sm->first_buffer = sm->last_buffer = ~0;
+ }
+ }
+
+ if (sm->last_buffer == ~0)
+ {
+ while (clib_fifo_elts (sm->rx.buffer_fifo) == 0)
+ {
+ sm->rx.ready_one_time_event = vlib_process_create_one_time_event (vm, vlib_current_process (vm), ~0);
+ vlib_process_wait_for_one_time_event (vm, /* no event data */ 0, sm->rx.ready_one_time_event);
+ }
+
+ clib_fifo_sub1 (sm->rx.buffer_fifo, sm->first_buffer);
+ sm->last_buffer = sm->first_buffer;
+ }
+
+ ASSERT (sm->last_buffer != ~0);
+
+ last = vlib_get_buffer (vm, sm->last_buffer);
+ s->current_buffer_index = 0;
+ s->buffer = vlib_buffer_get_current (last);
+ s->n_buffer_bytes = last->current_length;
+}
+
+static void
+serialize_open_vlib_helper (serialize_main_t * m,
+ vlib_main_t * vm,
+ vlib_serialize_buffer_main_t * sm,
+ uword is_read)
+{
+ /* Initialize serialize main but save overflow buffer for re-use between calls. */
+ {
+ u8 * save = m->stream.overflow_buffer;
+ memset (m, 0, sizeof (m[0]));
+ m->stream.overflow_buffer = save;
+ if (save)
+ _vec_len (save) = 0;
+ }
+
+ sm->first_buffer = sm->last_buffer = ~0;
+ if (is_read)
+ clib_fifo_reset (sm->rx.buffer_fifo);
+ else
+ sm->tx.n_total_data_bytes = 0;
+ sm->vlib_main = vm;
+ m->header.data_function = is_read ? vlib_serialize_rx : vlib_serialize_tx;
+ m->stream.data_function_opaque = pointer_to_uword (sm);
+}
+
+void serialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, vlib_serialize_buffer_main_t * sm)
+{ serialize_open_vlib_helper (m, vm, sm, /* is_read */ 0); }
+
+void unserialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, vlib_serialize_buffer_main_t * sm)
+{ serialize_open_vlib_helper (m, vm, sm, /* is_read */ 1); }
+
+u32 serialize_close_vlib_buffer (serialize_main_t * m)
+{
+ vlib_serialize_buffer_main_t * sm
+ = uword_to_pointer (m->stream.data_function_opaque, vlib_serialize_buffer_main_t *);
+ vlib_buffer_t * last;
+ serialize_stream_t * s = &m->stream;
+
+ last = vlib_get_buffer (sm->vlib_main, sm->last_buffer);
+ last->current_length = s->current_buffer_index;
+
+ if (vec_len (s->overflow_buffer) > 0)
+ {
+ sm->last_buffer
+ = vlib_buffer_add_data (sm->vlib_main, sm->tx.free_list_index,
+ sm->last_buffer == ~0 ? 0 : sm->last_buffer,
+ s->overflow_buffer,
+ vec_len (s->overflow_buffer));
+ _vec_len (s->overflow_buffer) = 0;
+ }
+
+ return sm->first_buffer;
+}
+
+void unserialize_close_vlib_buffer (serialize_main_t * m)
+{
+ vlib_serialize_buffer_main_t * sm
+ = uword_to_pointer (m->stream.data_function_opaque, vlib_serialize_buffer_main_t *);
+ if (sm->first_buffer != ~0)
+ vlib_buffer_free_one (sm->vlib_main, sm->first_buffer);
+ clib_fifo_reset (sm->rx.buffer_fifo);
+ if (m->stream.overflow_buffer)
+ _vec_len (m->stream.overflow_buffer) = 0;
+}
+
+static u8 * format_vlib_buffer_free_list (u8 * s, va_list * va)
+{
+ vlib_buffer_free_list_t * f = va_arg (*va, vlib_buffer_free_list_t *);
+ uword bytes_alloc, bytes_free, n_free, size;
+
+ if (! f)
+ return format (s, "%=30s%=12s%=12s%=12s%=12s%=12s%=12s",
+ "Name", "Index", "Size", "Alloc", "Free", "#Alloc", "#Free");
+
+ size = sizeof (vlib_buffer_t) + f->n_data_bytes;
+ n_free = vec_len (f->aligned_buffers) + vec_len (f->unaligned_buffers);
+ bytes_alloc = size * f->n_alloc;
+ bytes_free = size * n_free;
+
+ s = format (s, "%30s%12d%12d%=12U%=12U%=12d%=12d",
+ f->name, f->index, f->n_data_bytes,
+ format_memory_size, bytes_alloc,
+ format_memory_size, bytes_free,
+ f->n_alloc, n_free);
+
+ return s;
+}
+
+static clib_error_t *
+show_buffers (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ vlib_buffer_free_list_t * f;
+
+ vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, 0);
+ pool_foreach (f, bm->buffer_free_list_pool, ({
+ vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, f);
+ }));
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_buffers_command, static) = {
+ .path = "show buffers",
+ .short_help = "Show packet buffer allocation",
+ .function = show_buffers,
+};
+
diff --git a/vlib/vlib/buffer.h b/vlib/vlib/buffer.h
new file mode 100644
index 00000000000..6322481b696
--- /dev/null
+++ b/vlib/vlib/buffer.h
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer.h: VLIB buffers
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_buffer_h
+#define included_vlib_buffer_h
+
+#include <vppinfra/types.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/serialize.h>
+#include <vppinfra/vector.h>
+#include <vlib/error.h> /* for vlib_error_t */
+#include <vlib/config.h> /* for __PRE_DATA_SIZE */
+
+#ifdef CLIB_HAVE_VEC128
+typedef u8x16 vlib_copy_unit_t;
+#else
+typedef uword vlib_copy_unit_t;
+#endif
+
+/** \file
+ vlib buffer structure definition and a few select
+ access methods. This structure and the buffer allocation
+ mechanism should perhaps live in vnet, but it would take a lot
+ of typing to make it so.
+*/
+
+/* VLIB buffer representation. */
+typedef struct {
+ /* Offset within data[] that we are currently processing.
+ If negative current header points into predata area. */
+ i16 current_data; /**< signed offset in data[], pre_data[]
+ that we are currently processing.
+ If negative current header points into predata area.
+ */
+ u16 current_length; /**< Nbytes between current data and
+ the end of this buffer.
+ */
+ u32 flags; /**< buffer flags:
+ <br> VLIB_BUFFER_IS_TRACED: trace this buffer.
+ <br> VLIB_BUFFER_NEXT_PRESENT: this is a multi-chunk buffer.
+ <br> VLIB_BUFFER_TOTAL_LENGTH_VALID: as it says
+ <br> VLIB_BUFFER_REPL_FAIL: packet replication failure
+ <br> VLIB_BUFFER_FLAG_USER(n): user-defined bit N
+ */
+#define VLIB_BUFFER_IS_TRACED (1 << 0)
+#define VLIB_BUFFER_LOG2_NEXT_PRESENT (1)
+#define VLIB_BUFFER_NEXT_PRESENT (1 << VLIB_BUFFER_LOG2_NEXT_PRESENT)
+#define VLIB_BUFFER_IS_RECYCLED (1 << 2)
+#define VLIB_BUFFER_TOTAL_LENGTH_VALID (1 << 3)
+#define VLIB_BUFFER_HGSHM_USER_INDEX_VALID (1 << 4)
+#define VLIB_BUFFER_REPL_FAIL (1 << 5)
+
+ /* User defined buffer flags. */
+#define LOG2_VLIB_BUFFER_FLAG_USER(n) (32 - (n))
+#define VLIB_BUFFER_FLAG_USER(n) (1 << LOG2_VLIB_BUFFER_FLAG_USER(n))
+
+ u32 free_list_index; /**< Buffer free list that this buffer was
+ allocated from and will be freed to.
+ */
+
+ u32 total_length_not_including_first_buffer;
+ /**< Only valid for first buffer in chain. Current length plus
+ total length given here give total number of bytes in buffer chain.
+ */
+
+
+ u32 next_buffer; /**< Next buffer for this linked-list of buffers.
+ Only valid if VLIB_BUFFER_NEXT_PRESENT flag is set.
+ */
+
+ u32 trace_index; /**< Specifies index into trace buffer
+ if VLIB_PACKET_IS_TRACED flag is set.
+ */
+
+
+ u32 clone_count; /**< Specifies whether this buffer should be
+ reinitialized when freed. It will be reinitialized
+ if the value is 0. This field can be used
+ as a counter or for other state during packet
+ replication. The buffer free function does not
+ modify this value.
+ */
+
+ vlib_error_t error; /**< Error code for buffers to be enqueued
+ to error handler.
+ */
+
+ u32 opaque[8]; /**< Opaque data used by sub-graphs for their own purposes.
+ See .../vnet/vnet/buffer.h
+ */
+ /***** end of first cache line */
+
+ u32 opaque2[16]; /**< More opaque data, in its own cache line */
+
+ /***** end of second cache line */
+ u8 pre_data [__PRE_DATA_SIZE]; /**< Space for inserting data
+ before buffer start.
+ Packet rewrite string will be
+ rewritten backwards and may extend
+ back before buffer->data[0].
+ Must come directly before packet data.
+ */
+
+#define VLIB_BUFFER_PRE_DATA_SIZE (ARRAY_LEN (((vlib_buffer_t *)0)->pre_data))
+ u8 data[0]; /**< Packet data. Hardware DMA here */
+} vlib_buffer_t; /* Must be a multiple of 64B. */
+
+/** \brief Prefetch buffer metadata.
+ The first 64 bytes of buffer contains most header information
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @param type - LOAD, STORE. In most cases, STORE is the right answer
+*/
+
+#define vlib_prefetch_buffer_header(b,type) CLIB_PREFETCH (b, 64, type)
+
+always_inline vlib_buffer_t *
+vlib_buffer_next_contiguous (vlib_buffer_t * b, u32 buffer_bytes)
+{ return (void *) (b + 1) + buffer_bytes; }
+
+always_inline void
+vlib_buffer_struct_is_sane (vlib_buffer_t * b)
+{
+ ASSERT (sizeof (b[0]) % 64 == 0);
+
+ /* Rewrite data must be before and contiguous with packet data. */
+ ASSERT (b->pre_data + VLIB_BUFFER_PRE_DATA_SIZE == b->data);
+}
+
+/** \brief Get pointer to current data to process
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @return - (void *) (b->data + b->current_data)
+*/
+
+always_inline void *
+vlib_buffer_get_current (vlib_buffer_t * b)
+{
+ /* Check bounds. */
+ ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
+ return b->data + b->current_data;
+}
+
+/** \brief Advance current data pointer by the supplied (signed!) amount
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @param l - (word) signed increment
+*/
+always_inline void
+vlib_buffer_advance (vlib_buffer_t * b, word l)
+{
+ ASSERT (b->current_length >= l);
+ b->current_data += l;
+ b->current_length -= l;
+}
+
+/** \brief Reset current header & length to state they were in when
+ packet was received.
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+*/
+
+always_inline void
+vlib_buffer_reset (vlib_buffer_t * b)
+{
+ b->current_length += clib_max (b->current_data, 0);
+ b->current_data = 0;
+}
+
+/** \brief Get pointer to buffer's opaque data array
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @return - (void *) b->opaque
+*/
+always_inline void *
+vlib_get_buffer_opaque (vlib_buffer_t * b)
+{ return (void *) b->opaque; }
+
+/** \brief Get pointer to buffer's opaque2 data array
+
+ @param b - (vlib_buffer_t *) pointer to the buffer
+ @return - (void *) b->opaque2
+*/
+always_inline void *
+vlib_get_buffer_opaque2 (vlib_buffer_t * b)
+{ return (void *) b->opaque2; }
+
+/* Forward declaration. */
+struct vlib_main_t;
+
+typedef struct vlib_buffer_free_list_t {
+ /* Template buffer used to initialize first 16 bytes of buffers
+ allocated on this free list. */
+ vlib_buffer_t buffer_init_template;
+
+ /* Our index into vlib_main_t's buffer_free_list_pool. */
+ u32 index;
+
+ /* Number of data bytes for buffers in this free list. */
+ u32 n_data_bytes;
+
+ /* Number of buffers to allocate when we need to allocate new buffers
+ from physmem heap. */
+ u32 min_n_buffers_each_physmem_alloc;
+
+ /* Total number of buffers allocated from this free list. */
+ u32 n_alloc;
+
+ /* Vector of free buffers. Each element is a byte offset into I/O heap.
+ Aligned vectors always has naturally aligned vlib_copy_unit_t sized chunks
+ of buffer indices. Unaligned vector has any left over. This is meant to
+ speed up copy routines. */
+ u32 * aligned_buffers, * unaligned_buffers;
+
+ /* Memory chunks allocated for this free list
+ recorded here so they can be freed when free list
+ is deleted. */
+ void ** buffer_memory_allocated;
+
+ /* Free list name. */
+ u8 * name;
+
+ /* Callback functions to initialize newly allocated buffers.
+ If null buffers are zeroed. */
+ void (* buffer_init_function) (struct vlib_main_t * vm,
+ struct vlib_buffer_free_list_t * fl,
+ u32 * buffers, u32 n_buffers);
+
+ /* Callback function to announce that buffers have been
+ added to the freelist */
+ void (* buffers_added_to_freelist_function)
+ (struct vlib_main_t * vm,
+ struct vlib_buffer_free_list_t * fl);
+
+ uword buffer_init_function_opaque;
+} __attribute__ ((aligned (16))) vlib_buffer_free_list_t;
+
+typedef struct {
+ /* Buffer free callback, for subversive activities */
+ u32 (*buffer_free_callback) (struct vlib_main_t *vm,
+ u32 * buffers,
+ u32 n_buffers,
+ u32 follow_buffer_next);
+ /* Pool of buffer free lists.
+ Multiple free lists exist for packet generator which uses
+ separate free lists for each packet stream --- so as to avoid
+ initializing static data for each packet generated. */
+ vlib_buffer_free_list_t * buffer_free_list_pool;
+#define VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX (0)
+
+#if DPDK == 1
+/* must be same as dpdk buffer size */
+#define VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES (2048)
+#else
+#define VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES (512)
+#endif
+
+ /* Hash table mapping buffer size (rounded to next unit of
+ sizeof (vlib_buffer_t)) to free list index. */
+ uword * free_list_by_size;
+
+ /* Hash table mapping buffer index into number
+ 0 => allocated but free, 1 => allocated and not-free.
+ If buffer index is not in hash table then this buffer
+ has never been allocated. */
+ uword * buffer_known_hash;
+
+ /* List of free-lists needing Blue Light Special announcements */
+ vlib_buffer_free_list_t **announce_list;
+
+ /* Vector of rte_mempools per socket */
+#if DPDK == 1
+ struct rte_mempool ** pktmbuf_pools;
+#endif
+} vlib_buffer_main_t;
+
+typedef struct {
+ struct vlib_main_t * vlib_main;
+
+ u32 first_buffer, last_buffer;
+
+ union {
+ struct {
+ /* Total accumulated bytes in chain starting with first_buffer. */
+ u32 n_total_data_bytes;
+
+ /* Max number of bytes to accumulate in chain starting with first_buffer.
+ As this limit is reached buffers are enqueued to next node. */
+ u32 max_n_data_bytes_per_chain;
+
+ /* Next node to enqueue buffers to relative to current process node. */
+ u32 next_index;
+
+ /* Free list to use to allocate new buffers. */
+ u32 free_list_index;
+ } tx;
+
+ struct {
+ /* CLIB fifo of buffer indices waiting to be unserialized. */
+ u32 * buffer_fifo;
+
+ /* Event type used to signal that RX buffers have been added to fifo. */
+ uword ready_one_time_event;
+ } rx;
+ };
+} vlib_serialize_buffer_main_t;
+
+void serialize_open_vlib_buffer (serialize_main_t * m, struct vlib_main_t * vm, vlib_serialize_buffer_main_t * sm);
+void unserialize_open_vlib_buffer (serialize_main_t * m, struct vlib_main_t * vm, vlib_serialize_buffer_main_t * sm);
+
+u32 serialize_close_vlib_buffer (serialize_main_t * m);
+void unserialize_close_vlib_buffer (serialize_main_t * m);
+void *vlib_set_buffer_free_callback (struct vlib_main_t *vm, void *fp);
+
+always_inline u32
+serialize_vlib_buffer_n_bytes (serialize_main_t * m)
+{
+ serialize_stream_t * s = &m->stream;
+ vlib_serialize_buffer_main_t * sm
+ = uword_to_pointer (m->stream.data_function_opaque, vlib_serialize_buffer_main_t *);
+ return sm->tx.n_total_data_bytes + s->current_buffer_index + vec_len (s->overflow_buffer);
+}
+
+/*
+ */
+
+/** \brief Compile time buffer trajectory tracing option
+ Turn this on if you run into "bad monkey" contexts,
+ and you want to know exactly which nodes they've visited...
+ See vlib/main.c...
+*/
+#define VLIB_BUFFER_TRACE_TRAJECTORY 0
+
+#if VLIB_BUFFER_TRACE_TRAJECTORY > 0
+#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b) (b)->pre_data[0]=0
+#else
+#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
+#endif /* VLIB_BUFFER_TRACE_TRAJECTORY */
+
+#endif /* included_vlib_buffer_h */
diff --git a/vlib/vlib/buffer_funcs.h b/vlib/vlib/buffer_funcs.h
new file mode 100644
index 00000000000..452cdcb26a7
--- /dev/null
+++ b/vlib/vlib/buffer_funcs.h
@@ -0,0 +1,602 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer_funcs.h: VLIB buffer related functions/inlines
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_buffer_funcs_h
+#define included_vlib_buffer_funcs_h
+
+#include <vppinfra/hash.h>
+
+/** \file
+ vlib buffer access methods.
+*/
+
+
+/** \brief Translate buffer index into buffer pointer
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffer_index - (u32) buffer index
+ @return - (vlib_buffer_t *) buffer pointer
+*/
+always_inline vlib_buffer_t *
+vlib_get_buffer (vlib_main_t * vm, u32 buffer_index)
+{
+ return vlib_physmem_at_offset (&vm->physmem_main, ((uword)buffer_index)
+ << CLIB_LOG2_CACHE_LINE_BYTES);
+}
+
+/** \brief Translate buffer pointer into buffer index
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param b - (void *) buffer pointer
+ @return - (u32) buffer index
+*/
+always_inline u32
+vlib_get_buffer_index (vlib_main_t * vm, void * p)
+{
+ uword offset = vlib_physmem_offset_of (&vm->physmem_main, p);
+ ASSERT((offset % (1<<CLIB_LOG2_CACHE_LINE_BYTES)) == 0);
+ return offset >> CLIB_LOG2_CACHE_LINE_BYTES;
+}
+
+/** \brief Get next buffer in buffer linklist, or zero for end of list.
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param b - (void *) buffer pointer
+ @return - (vlib_buffer_t *) next buffer, or NULL
+*/
+always_inline vlib_buffer_t *
+vlib_get_next_buffer (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ return (b->flags & VLIB_BUFFER_NEXT_PRESENT
+ ? vlib_get_buffer (vm, b->next_buffer)
+ : 0);
+}
+
+uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm, vlib_buffer_t * b_first);
+
+/** \brief Get length in bytes of the buffer chain
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param b - (void *) buffer pointer
+ @return - (uword) length of buffer chain
+*/
+always_inline uword
+vlib_buffer_length_in_chain (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ uword l = b->current_length + b->total_length_not_including_first_buffer;
+ if (PREDICT_FALSE ((b->flags & (VLIB_BUFFER_NEXT_PRESENT
+ | VLIB_BUFFER_TOTAL_LENGTH_VALID))
+ == VLIB_BUFFER_NEXT_PRESENT))
+ return vlib_buffer_length_in_chain_slow_path (vm, b);
+ return l;
+}
+
+/** \brief Get length in bytes of the buffer index buffer chain
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param bi - (u32) buffer index
+ @return - (uword) length of buffer chain
+*/
+always_inline uword
+vlib_buffer_index_length_in_chain (vlib_main_t * vm, u32 bi)
+{
+ vlib_buffer_t * b = vlib_get_buffer (vm, bi);
+ return vlib_buffer_length_in_chain (vm, b);
+}
+
+/** \brief Copy buffer contents to memory
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param bi - (u32) buffer index
+ @param contents - (u8 *) memory, <strong>must be large enough</strong>
+ @return - (uword) length of buffer chain
+*/
+always_inline uword
+vlib_buffer_contents (vlib_main_t * vm, u32 buffer_index, u8 * contents)
+{
+ uword content_len = 0;
+ uword l;
+ vlib_buffer_t * b;
+
+ while (1)
+ {
+ b = vlib_get_buffer (vm, buffer_index);
+ l = b->current_length;
+ memcpy (contents + content_len, b->data + b->current_data, l);
+ content_len += l;
+ if (! (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+ buffer_index = b->next_buffer;
+ }
+
+ return content_len;
+}
+
+/* Return physical address of buffer->data start. */
+always_inline u64
+vlib_get_buffer_data_physical_address (vlib_main_t * vm, u32 buffer_index)
+{
+ return vlib_physmem_offset_to_physical (&vm->physmem_main,
+ (((uword)buffer_index) <<
+ CLIB_LOG2_CACHE_LINE_BYTES) +
+ STRUCT_OFFSET_OF (vlib_buffer_t, data));
+}
+
+/** \brief Prefetch buffer metadata by buffer index
+ The first 64 bytes of buffer contains most header information
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param bi - (u32) buffer index
+ @param type - LOAD, STORE. In most cases, STORE is the right answer
+*/
+/* Prefetch buffer header given index. */
+#define vlib_prefetch_buffer_with_index(vm,bi,type) \
+ do { \
+ vlib_buffer_t * _b = vlib_get_buffer (vm, bi); \
+ vlib_prefetch_buffer_header (_b, type); \
+ } while (0)
+
+#if 0
+/* Iterate over known allocated vlib bufs. You probably do not want
+ * to do this!
+ @param vm the vlib_main_t
+ @param bi found allocated buffer index
+ @param body operation to perform on buffer index
+ function executes body for each allocated buffer index
+ */
+#define vlib_buffer_foreach_allocated(vm,bi,body) \
+do { \
+ vlib_main_t * _vmain = (vm); \
+ vlib_buffer_main_t * _bmain = &_vmain->buffer_main; \
+ hash_pair_t * _vbpair; \
+ hash_foreach_pair(_vbpair, _bmain->buffer_known_hash, ({ \
+ if (VLIB_BUFFER_KNOWN_ALLOCATED == _vbpair->value[0]) { \
+ (bi) = _vbpair->key; \
+ body; \
+ } \
+ })); \
+} while (0)
+#endif
+
+#if DPDK == 0
+
+typedef enum {
+ /* Index is unknown. */
+ VLIB_BUFFER_UNKNOWN,
+
+ /* Index is known and free/allocated. */
+ VLIB_BUFFER_KNOWN_FREE,
+ VLIB_BUFFER_KNOWN_ALLOCATED,
+} vlib_buffer_known_state_t;
+
+always_inline vlib_buffer_known_state_t
+vlib_buffer_is_known (vlib_main_t * vm, u32 buffer_index)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ ASSERT(os_get_cpu_number() == 0);
+
+ uword * p = hash_get (bm->buffer_known_hash, buffer_index);
+ return p ? p[0] : VLIB_BUFFER_UNKNOWN;
+}
+
+always_inline void
+vlib_buffer_set_known_state (vlib_main_t * vm,
+ u32 buffer_index,
+ vlib_buffer_known_state_t state)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ ASSERT(os_get_cpu_number() == 0);
+ hash_set (bm->buffer_known_hash, buffer_index, state);
+}
+
+/* Validates sanity of a single buffer.
+ Returns format'ed vector with error message if any. */
+u8 * vlib_validate_buffer (vlib_main_t * vm, u32 buffer_index, uword follow_chain);
+
+/* Validate an array of buffers. As above. */
+u8 * vlib_validate_buffers (vlib_main_t * vm,
+ u32 * buffers,
+ uword next_buffer_stride,
+ uword n_buffers,
+ vlib_buffer_known_state_t known_state,
+ uword follow_chain);
+
+#endif /* DPDK == 0 */
+
+clib_error_t *
+vlib_buffer_pool_create(vlib_main_t * vm, unsigned num_mbufs,
+ unsigned mbuf_size, unsigned socket_id);
+
+/** \brief Allocate buffers into supplied array
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u32) number of buffers requested
+ @return - (u32) number of buffers actually allocated, may be
+ less than the number requested or zero
+*/
+u32 vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers);
+
+always_inline u32
+vlib_buffer_round_size (u32 size)
+{ return round_pow2 (size, sizeof (vlib_buffer_t)); }
+
+/** \brief Allocate buffers from specific freelist into supplied array
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u32) number of buffers requested
+ @return - (u32) number of buffers actually allocated, may be
+ less than the number requested or zero
+*/
+u32 vlib_buffer_alloc_from_free_list (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers,
+ u32 free_list_index);
+
+/** \brief Free buffers
+ Frees the entire buffer chain for each buffer
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u32) number of buffers to free
+
+*/
+void vlib_buffer_free (vlib_main_t * vm,
+ /* pointer to first buffer */
+ u32 * buffers,
+ /* number of buffers to free */
+ u32 n_buffers);
+
+/** \brief Free buffers, does not free the buffer chain for each buffer
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffers - (u32 * ) buffer index array
+ @param n_buffers - (u32) number of buffers to free
+
+*/
+void vlib_buffer_free_no_next (vlib_main_t * vm,
+ /* pointer to first buffer */
+ u32 * buffers,
+ /* number of buffers to free */
+ u32 n_buffers);
+
+/** \brief Free one buffer
+ Shorthand to free a single buffer chain.
+
+ @param vm - (vlib_main_t *) vlib main data structure pointer
+ @param buffer_index - (u32) buffer index to free
+*/
+always_inline void
+vlib_buffer_free_one (vlib_main_t * vm, u32 buffer_index)
+{
+ vlib_buffer_free (vm, &buffer_index, /* n_buffers */ 1);
+}
+
+/* Add/delete buffer free lists. */
+u32 vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes, char * fmt, ...);
+void vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index);
+
+/* Find already existing public free list with given size or create one. */
+u32 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes, char * fmt, ...);
+
+always_inline vlib_buffer_free_list_t *
+vlib_buffer_get_free_list (vlib_main_t * vm, u32 free_list_index)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ vlib_buffer_free_list_t * f;
+
+ f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index);
+
+ /* Sanity: indices must match. */
+ ASSERT (f->index == free_list_index);
+
+ return f;
+}
+
+always_inline u32
+vlib_buffer_free_list_buffer_size (vlib_main_t * vm, u32 free_list_index)
+{
+ vlib_buffer_free_list_t * f = vlib_buffer_get_free_list (vm, free_list_index);
+ return f->n_data_bytes;
+}
+
+void
+vlib_aligned_memcpy (void * _dst, void * _src, int n_bytes);
+
+/* Reasonably fast buffer copy routine. */
+always_inline void
+vlib_copy_buffers (u32 * dst, u32 * src, u32 n)
+{
+ while (n >= 4)
+ {
+ dst[0] = src[0];
+ dst[1] = src[1];
+ dst[2] = src[2];
+ dst[3] = src[3];
+ dst += 4;
+ src += 4;
+ n -= 4;
+ }
+ while (n > 0)
+ {
+ dst[0] = src[0];
+ dst += 1;
+ src += 1;
+ n -= 1;
+ }
+}
+
+always_inline void *
+vlib_physmem_alloc_aligned (vlib_main_t * vm, clib_error_t ** error,
+ uword n_bytes, uword alignment)
+{
+ void * r = vm->os_physmem_alloc_aligned (&vm->physmem_main, n_bytes, alignment);
+ if (! r)
+ *error = clib_error_return (0, "failed to allocate %wd bytes of I/O memory", n_bytes);
+ else
+ *error = 0;
+ return r;
+}
+
+/* By default allocate I/O memory with cache line alignment. */
+always_inline void *
+vlib_physmem_alloc (vlib_main_t * vm, clib_error_t ** error, uword n_bytes)
+{ return vlib_physmem_alloc_aligned (vm, error, n_bytes, CLIB_CACHE_LINE_BYTES); }
+
+always_inline void
+vlib_physmem_free (vlib_main_t * vm, void * mem)
+{ return vm->os_physmem_free (mem); }
+
+always_inline u64
+vlib_physmem_virtual_to_physical (vlib_main_t * vm, void * mem)
+{
+ vlib_physmem_main_t * pm = &vm->physmem_main;
+ uword o = pointer_to_uword (mem) - pm->virtual.start;
+ return vlib_physmem_offset_to_physical (pm, o);
+}
+
+/* Append given data to end of buffer, possibly allocating new buffers. */
+u32 vlib_buffer_add_data (vlib_main_t * vm,
+ u32 free_list_index,
+ u32 buffer_index,
+ void * data, u32 n_data_bytes);
+
+format_function_t format_vlib_buffer, format_vlib_buffer_and_data, format_vlib_buffer_contents;
+
+typedef struct {
+ /* Vector of packet data. */
+ u8 * packet_data;
+
+#if DPDK == 0
+ /* Number of buffers to allocate in each call to physmem
+ allocator. */
+ u32 min_n_buffers_each_physmem_alloc;
+
+ /* Buffer free list for this template. */
+ u32 free_list_index;
+
+ u32 * free_buffers;
+#endif
+} vlib_packet_template_t;
+
+void vlib_packet_template_get_packet_helper (vlib_main_t * vm,
+ vlib_packet_template_t * t);
+
+void vlib_packet_template_init (vlib_main_t * vm,
+ vlib_packet_template_t * t,
+ void * packet_data,
+ uword n_packet_data_bytes,
+ uword min_n_buffers_each_physmem_alloc,
+ char * fmt, ...);
+
+void *
+vlib_packet_template_get_packet (vlib_main_t * vm,
+ vlib_packet_template_t * t,
+ u32 * bi_result);
+
+always_inline void
+vlib_packet_template_free (vlib_main_t * vm, vlib_packet_template_t * t)
+{
+ vec_free (t->packet_data);
+}
+
+always_inline u32
+unserialize_vlib_buffer_n_bytes (serialize_main_t * m)
+{
+ serialize_stream_t * s = &m->stream;
+ vlib_serialize_buffer_main_t * sm
+ = uword_to_pointer (m->stream.data_function_opaque, vlib_serialize_buffer_main_t *);
+ vlib_main_t * vm = sm->vlib_main;
+ u32 n, * f;
+
+ n = s->n_buffer_bytes - s->current_buffer_index;
+ if (sm->last_buffer != ~0)
+ {
+ vlib_buffer_t * b = vlib_get_buffer (vm, sm->last_buffer);
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ n += b->current_length;
+ }
+ }
+
+ clib_fifo_foreach (f, sm->rx.buffer_fifo, ({
+ n += vlib_buffer_index_length_in_chain (vm, f[0]);
+ }));
+
+ return n;
+}
+
+typedef union {
+ vlib_buffer_t b;
+ vlib_copy_unit_t i[sizeof (vlib_buffer_t) / sizeof (vlib_copy_unit_t)];
+} vlib_buffer_union_t;
+
+/* Set a buffer quickly into "uninitialized" state. We want this to
+ be extremely cheap and arrange for all fields that need to be
+ initialized to be in the first 128 bits of the buffer. */
+always_inline void
+vlib_buffer_init_for_free_list (vlib_buffer_t * _dst,
+ vlib_buffer_free_list_t * fl)
+{
+ vlib_buffer_union_t * dst = (vlib_buffer_union_t *) _dst;
+ vlib_buffer_union_t * src = (vlib_buffer_union_t *) &fl->buffer_init_template;
+
+ /* Make sure buffer template is sane. */
+ ASSERT (fl->index == fl->buffer_init_template.free_list_index);
+
+ /* Copy template from src->current_data thru src->free_list_index */
+ dst->i[0] = src->i[0];
+ if (1 * sizeof (dst->i[0]) < 16)
+ dst->i[1] = src->i[1];
+ if (2 * sizeof (dst->i[0]) < 16)
+ dst->i[2] = src->i[2];
+
+ /* Make sure it really worked. */
+#define _(f) ASSERT (dst->b.f == src->b.f)
+ _ (current_data);
+ _ (current_length);
+ _ (flags);
+ _ (free_list_index);
+#undef _
+ ASSERT (dst->b.total_length_not_including_first_buffer == 0);
+}
+
+always_inline void
+vlib_buffer_init_two_for_free_list (vlib_buffer_t * _dst0,
+ vlib_buffer_t * _dst1,
+ vlib_buffer_free_list_t * fl)
+{
+ vlib_buffer_union_t * dst0 = (vlib_buffer_union_t *) _dst0;
+ vlib_buffer_union_t * dst1 = (vlib_buffer_union_t *) _dst1;
+ vlib_buffer_union_t * src = (vlib_buffer_union_t *) &fl->buffer_init_template;
+
+ /* Make sure buffer template is sane. */
+ ASSERT (fl->index == fl->buffer_init_template.free_list_index);
+
+ /* Copy template from src->current_data thru src->free_list_index */
+ dst0->i[0] = dst1->i[0] = src->i[0];
+ if (1 * sizeof (dst0->i[0]) < 16)
+ dst0->i[1] = dst1->i[1] = src->i[1];
+ if (2 * sizeof (dst0->i[0]) < 16)
+ dst0->i[2] = dst1->i[2] = src->i[2];
+
+ /* Make sure it really worked. */
+#define _(f) ASSERT (dst0->b.f == src->b.f && dst1->b.f == src->b.f)
+ _ (current_data);
+ _ (current_length);
+ _ (flags);
+ _ (free_list_index);
+#undef _
+ ASSERT (dst0->b.total_length_not_including_first_buffer == 0);
+ ASSERT (dst1->b.total_length_not_including_first_buffer == 0);
+}
+
+#if CLIB_DEBUG > 0
+u32 * vlib_buffer_state_validation_lock;
+uword * vlib_buffer_state_validation_hash;
+void * vlib_buffer_state_heap;
+#endif
+
+static inline void
+vlib_validate_buffer_in_use (vlib_buffer_t * b, u32 expected)
+{
+#if CLIB_DEBUG > 0
+ uword * p;
+ void * oldheap;
+
+ oldheap = clib_mem_set_heap (vlib_buffer_state_heap);
+
+ while (__sync_lock_test_and_set (vlib_buffer_state_validation_lock, 1))
+ ;
+
+ p = hash_get (vlib_buffer_state_validation_hash, b);
+
+ /* If we don't know about b, declare it to be in the expected state */
+ if (!p)
+ {
+ hash_set (vlib_buffer_state_validation_hash, b, expected);
+ goto out;
+ }
+
+ if (p[0] != expected)
+ {
+ void cj_stop(void);
+ u32 bi;
+ vlib_main_t * vm = &vlib_global_main;
+
+ cj_stop();
+
+ bi = vlib_get_buffer_index (vm, b);
+
+ clib_mem_set_heap (oldheap);
+ clib_warning ("%.6f buffer %llx (%d): %s, not %s",
+ vlib_time_now(vm), bi,
+ p[0] ? "busy" : "free",
+ expected ? "busy" : "free");
+ os_panic();
+ }
+ out:
+ CLIB_MEMORY_BARRIER();
+ *vlib_buffer_state_validation_lock = 0;
+ clib_mem_set_heap (oldheap);
+#endif
+}
+
+static inline void
+vlib_validate_buffer_set_in_use (vlib_buffer_t * b, u32 expected)
+{
+#if CLIB_DEBUG > 0
+ void * oldheap;
+
+ oldheap = clib_mem_set_heap (vlib_buffer_state_heap);
+
+ while (__sync_lock_test_and_set (vlib_buffer_state_validation_lock, 1))
+ ;
+
+ hash_set (vlib_buffer_state_validation_hash, b, expected);
+
+ CLIB_MEMORY_BARRIER();
+ *vlib_buffer_state_validation_lock = 0;
+ clib_mem_set_heap (oldheap);
+#endif
+}
+
+#endif /* included_vlib_buffer_funcs_h */
diff --git a/vlib/vlib/buffer_node.h b/vlib/vlib/buffer_node.h
new file mode 100644
index 00000000000..0fa5c8093ca
--- /dev/null
+++ b/vlib/vlib/buffer_node.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer_node.h: VLIB buffer handling node helper macros/inlines
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_buffer_node_h
+#define included_vlib_buffer_node_h
+
+#define vlib_validate_buffer_enqueue_x2(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,next0,next1) \
+do { \
+ int enqueue_code = (next0 != next_index) + 2*(next1 != next_index); \
+ \
+ if (PREDICT_FALSE (enqueue_code != 0)) \
+ { \
+ switch (enqueue_code) \
+ { \
+ case 1: \
+ /* A B A */ \
+ to_next[-2] = bi1; \
+ to_next -= 1; \
+ n_left_to_next += 1; \
+ vlib_set_next_frame_buffer (vm, node, next0, bi0); \
+ break; \
+ \
+ case 2: \
+ /* A A B */ \
+ to_next -= 1; \
+ n_left_to_next += 1; \
+ vlib_set_next_frame_buffer (vm, node, next1, bi1); \
+ break; \
+ \
+ case 3: \
+ /* A B B or A B C */ \
+ to_next -= 2; \
+ n_left_to_next += 2; \
+ vlib_set_next_frame_buffer (vm, node, next0, bi0); \
+ vlib_set_next_frame_buffer (vm, node, next1, bi1); \
+ if (next0 == next1) \
+ { \
+ vlib_put_next_frame (vm, node, next_index, \
+ n_left_to_next); \
+ next_index = next1; \
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
+ } \
+ } \
+ } \
+} while (0)
+
+#define vlib_validate_buffer_enqueue_x1(vm,node,next_index,to_next,n_left_to_next,bi0,next0) \
+do { \
+ if (PREDICT_FALSE (next0 != next_index)) \
+ { \
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); \
+ next_index = next0; \
+ vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \
+ \
+ to_next[0] = bi0; \
+ to_next += 1; \
+ n_left_to_next -= 1; \
+ } \
+} while (0)
+
+always_inline uword
+generic_buffer_node_inline (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame,
+ uword sizeof_trace,
+ void * opaque1,
+ uword opaque2,
+ void (* two_buffers) (vlib_main_t * vm,
+ void * opaque1,
+ uword opaque2,
+ vlib_buffer_t * b0, vlib_buffer_t * b1,
+ u32 * next0, u32 * next1),
+ void (* one_buffer) (vlib_main_t * vm,
+ void * opaque1,
+ uword opaque2,
+ vlib_buffer_t * b0,
+ u32 * next0))
+{
+ u32 n_left_from, * from, * to_next;
+ u32 next_index;
+
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ next_index = node->cached_next_index;
+
+ if (node->flags & VLIB_NODE_FLAG_TRACE)
+ vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors,
+ /* stride */ 1, sizeof_trace);
+
+ while (n_left_from > 0)
+ {
+ u32 n_left_to_next;
+
+ vlib_get_next_frame (vm, node, next_index,
+ to_next, n_left_to_next);
+
+ while (n_left_from >= 4 && n_left_to_next >= 2)
+ {
+ vlib_buffer_t * p0, * p1;
+ u32 pi0, next0;
+ u32 pi1, next1;
+
+ /* Prefetch next iteration. */
+ {
+ vlib_buffer_t * p2, * p3;
+
+ p2 = vlib_get_buffer (vm, from[2]);
+ p3 = vlib_get_buffer (vm, from[3]);
+
+ vlib_prefetch_buffer_header (p2, LOAD);
+ vlib_prefetch_buffer_header (p3, LOAD);
+
+ CLIB_PREFETCH (p2->data, 64, LOAD);
+ CLIB_PREFETCH (p3->data, 64, LOAD);
+ }
+
+ pi0 = to_next[0] = from[0];
+ pi1 = to_next[1] = from[1];
+ from += 2;
+ to_next += 2;
+ n_left_from -= 2;
+ n_left_to_next -= 2;
+
+ p0 = vlib_get_buffer (vm, pi0);
+ p1 = vlib_get_buffer (vm, pi1);
+
+ two_buffers (vm, opaque1, opaque2, p0, p1, &next0, &next1);
+
+ vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, pi1, next0, next1);
+ }
+
+ while (n_left_from > 0 && n_left_to_next > 0)
+ {
+ vlib_buffer_t * p0;
+ u32 pi0, next0;
+
+ pi0 = from[0];
+ to_next[0] = pi0;
+ from += 1;
+ to_next += 1;
+ n_left_from -= 1;
+ n_left_to_next -= 1;
+
+ p0 = vlib_get_buffer (vm, pi0);
+
+ one_buffer (vm, opaque1, opaque2, p0, &next0);
+
+ vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+ to_next, n_left_to_next,
+ pi0, next0);
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ }
+
+ return frame->n_vectors;
+}
+
+#endif /* included_vlib_buffer_node_h */
diff --git a/vlib/vlib/cli.c b/vlib/vlib/cli.c
new file mode 100644
index 00000000000..e5163f260e1
--- /dev/null
+++ b/vlib/vlib/cli.c
@@ -0,0 +1,1015 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cli.c: command line interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+/* Root of all show commands. */
+VLIB_CLI_COMMAND (vlib_cli_show_command, static) = {
+ .path = "show",
+ .short_help = "Show commands",
+};
+
+/* Root of all clear commands. */
+VLIB_CLI_COMMAND (vlib_cli_clear_command, static) = {
+ .path = "clear",
+ .short_help = "Clear commands",
+};
+
+/* Root of all set commands. */
+VLIB_CLI_COMMAND (vlib_cli_set_command, static) = {
+ .path = "set",
+ .short_help = "Set commands",
+};
+
+/* Root of all test commands. */
+VLIB_CLI_COMMAND (vlib_cli_test_command, static) = {
+ .path = "test",
+ .short_help = "Test commands",
+};
+
+/* Returns bitmap of commands which match key. */
+static uword *
+vlib_cli_sub_command_match (vlib_cli_command_t * c, unformat_input_t * input)
+{
+ int i, n;
+ uword * match = 0;
+ vlib_cli_parse_position_t * p;
+
+ unformat_skip_white_space (input);
+
+ for (i = 0; ; i++)
+ {
+ uword k;
+
+ k = unformat_get_input (input);
+ switch (k)
+ {
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '0' ... '9':
+ case '-': case '_':
+ break;
+
+ case ' ': case '\t': case '\r': case '\n':
+ case UNFORMAT_END_OF_INPUT:
+ /* White space or end of input removes any non-white
+ matches that were before possible. */
+ if (i < vec_len (c->sub_command_positions)
+ && clib_bitmap_count_set_bits (match) > 1)
+ {
+ p = vec_elt_at_index (c->sub_command_positions, i);
+ for (n = 0; n < vec_len (p->bitmaps); n++)
+ match = clib_bitmap_andnot (match, p->bitmaps[n]);
+ }
+ goto done;
+
+ default:
+ unformat_put_input (input);
+ goto done;
+ }
+
+ if (i >= vec_len (c->sub_command_positions))
+ {
+ no_match:
+ clib_bitmap_free (match);
+ return 0;
+ }
+
+ p = vec_elt_at_index (c->sub_command_positions, i);
+ if (vec_len (p->bitmaps) == 0)
+ goto no_match;
+
+ n = k - p->min_char;
+ if (n < 0 || n >= vec_len (p->bitmaps))
+ goto no_match;
+
+ if (i == 0)
+ match = clib_bitmap_dup (p->bitmaps[n]);
+ else
+ match = clib_bitmap_and (match, p->bitmaps[n]);
+
+ if (clib_bitmap_is_zero (match))
+ goto no_match;
+ }
+
+ done:
+ return match;
+}
+
+/* Looks for string based sub-input formatted { SUB-INPUT }. */
+static uword unformat_vlib_cli_sub_input (unformat_input_t * i, va_list * args)
+{
+ unformat_input_t * sub_input = va_arg (*args, unformat_input_t *);
+ u8 * s;
+ uword c;
+
+ while (1)
+ {
+ c = unformat_get_input (i);
+ switch (c)
+ {
+ case ' ': case '\t':
+ case '\n': case '\r':
+ case '\f':
+ break;
+
+ case '{':
+ default:
+ /* Put back paren. */
+ if (c != UNFORMAT_END_OF_INPUT)
+ unformat_put_input (i);
+
+ if (c == '{' && unformat (i, "%v", &s))
+ {
+ unformat_init_vector (sub_input, s);
+ return 1;
+ }
+ return 0;
+ }
+ }
+ return 0;
+}
+
+static vlib_cli_command_t *
+get_sub_command (vlib_cli_main_t * cm, vlib_cli_command_t * parent, u32 si)
+{
+ vlib_cli_sub_command_t * s = vec_elt_at_index (parent->sub_commands, si);
+ return vec_elt_at_index (cm->commands, s->index);
+}
+
+static uword unformat_vlib_cli_sub_command (unformat_input_t * i, va_list * args)
+{
+ vlib_main_t * vm = va_arg (*args, vlib_main_t *);
+ vlib_cli_command_t * c = va_arg (*args, vlib_cli_command_t *);
+ vlib_cli_command_t ** result = va_arg (*args, vlib_cli_command_t **);
+ vlib_cli_main_t * cm = &vm->cli_main;
+ uword * match_bitmap, is_unique, index;
+
+ {
+ vlib_cli_sub_rule_t * sr;
+ vlib_cli_parse_rule_t * r;
+ vec_foreach (sr, c->sub_rules)
+ {
+ void ** d;
+ r = vec_elt_at_index (cm->parse_rules, sr->rule_index);
+ vec_add2 (cm->parse_rule_data, d, 1);
+ vec_reset_length (d[0]);
+ if (r->data_size)
+ d[0] = _vec_resize (d[0],
+ /* length increment */ 1,
+ r->data_size,
+ /* header_bytes */ 0,
+ /* data align */ sizeof (uword));
+ if (unformat_user (i, r->unformat_function, vm, d[0]))
+ {
+ *result = vec_elt_at_index (cm->commands, sr->command_index);
+ return 1;
+ }
+ }
+ }
+
+ match_bitmap = vlib_cli_sub_command_match (c, i);
+ is_unique = clib_bitmap_count_set_bits (match_bitmap) == 1;
+ index = ~0;
+ if (is_unique)
+ {
+ index = clib_bitmap_first_set (match_bitmap);
+ *result = get_sub_command (cm, c, index);
+ }
+ clib_bitmap_free (match_bitmap);
+
+ return is_unique;
+}
+
+static u8 * format_vlib_cli_command_help (u8 * s, va_list * args)
+{
+ vlib_cli_command_t * c = va_arg (*args, vlib_cli_command_t *);
+ int is_long = va_arg (*args, int);
+ if (is_long && c->long_help)
+ s = format (s, "%s", c->long_help);
+ else if (c->short_help)
+ s = format (s, "%s", c->short_help);
+ else
+ s = format (s, "%v commands", c->path);
+ return s;
+}
+
+static u8 * format_vlib_cli_parse_rule_name (u8 * s, va_list * args)
+{
+ vlib_cli_parse_rule_t * r = va_arg (*args, vlib_cli_parse_rule_t *);
+ return format (s, "<%U>", format_c_identifier, r->name);
+}
+
+static u8 * format_vlib_cli_path (u8 * s, va_list * args)
+{
+ u8 * path = va_arg (*args, u8 *);
+ int i, in_rule;
+ in_rule = 0;
+ for (i = 0; i < vec_len (path); i++)
+ {
+ switch (path[i])
+ {
+ case '%':
+ in_rule = 1;
+ vec_add1 (s, '<'); /* start of <RULE> */
+ break;
+
+ case '_':
+ /* _ -> space in rules. */
+ vec_add1 (s, in_rule ? ' ' : '_');
+ break;
+
+ case ' ':
+ if (in_rule)
+ {
+ vec_add1 (s, '>'); /* end of <RULE> */
+ in_rule = 0;
+ }
+ vec_add1 (s, ' ');
+ break;
+
+ default:
+ vec_add1 (s, path[i]);
+ break;
+ }
+ }
+
+ if (in_rule)
+ vec_add1 (s, '>'); /* terminate <RULE> */
+
+ return s;
+}
+
+static vlib_cli_command_t *
+all_subs (vlib_cli_main_t * cm,
+ vlib_cli_command_t * subs,
+ u32 command_index)
+{
+ vlib_cli_command_t * c = vec_elt_at_index (cm->commands, command_index);
+ vlib_cli_sub_command_t * sc;
+ vlib_cli_sub_rule_t * sr;
+
+ if (c->function)
+ vec_add1 (subs, c[0]);
+
+ vec_foreach (sr, c->sub_rules)
+ subs = all_subs (cm, subs, sr->command_index);
+ vec_foreach (sc, c->sub_commands)
+ subs = all_subs (cm, subs, sc->index);
+
+ return subs;
+}
+
+static clib_error_t *
+vlib_cli_dispatch_sub_commands (vlib_main_t * vm,
+ vlib_cli_main_t * cm,
+ unformat_input_t * input,
+ uword parent_command_index)
+{
+ vlib_cli_command_t * parent, * c;
+ clib_error_t * error = 0;
+ unformat_input_t sub_input;
+ u8 * string;
+ uword is_main_dispatch = cm == &vm->cli_main;
+
+ parent = vec_elt_at_index (cm->commands, parent_command_index);
+ if (is_main_dispatch && unformat (input, "help"))
+ {
+ uword help_at_end_of_line, i;
+
+ help_at_end_of_line = unformat_check_input (input) == UNFORMAT_END_OF_INPUT;
+ while (1)
+ {
+ c = parent;
+ if (unformat_user (input, unformat_vlib_cli_sub_command, vm, c, &parent))
+ ;
+
+ else if (! unformat_check_input (input) == UNFORMAT_END_OF_INPUT)
+ goto unknown;
+
+ else
+ break;
+ }
+
+ /* help SUB-COMMAND => long format help.
+ "help" at end of line: show all commands. */
+ if (! help_at_end_of_line)
+ vlib_cli_output (vm, "%U", format_vlib_cli_command_help, c, /* is_long */ 1);
+
+ else if (vec_len (c->sub_commands) + vec_len (c->sub_rules) == 0)
+ vlib_cli_output (vm, "%v: no sub-commands", c->path);
+
+ else
+ {
+ vlib_cli_sub_command_t * sc;
+ vlib_cli_sub_rule_t * sr, * subs;
+
+ subs = vec_dup (c->sub_rules);
+
+ /* Add in rules if any. */
+ vec_foreach (sc, c->sub_commands)
+ {
+ vec_add2 (subs, sr, 1);
+ sr->name = sc->name;
+ sr->command_index = sc->index;
+ sr->rule_index = ~0;
+ }
+
+ vec_sort (subs, c1, c2, vec_cmp (c1->name, c2->name));
+
+ for (i = 0; i < vec_len (subs); i++)
+ {
+ vlib_cli_command_t * d;
+ vlib_cli_parse_rule_t * r;
+
+ d = vec_elt_at_index (cm->commands, subs[i].command_index);
+ r = subs[i].rule_index != ~0 ? vec_elt_at_index (cm->parse_rules, subs[i].rule_index) : 0;
+
+ if (r)
+ vlib_cli_output
+ (vm, " %-30U %U",
+ format_vlib_cli_parse_rule_name, r,
+ format_vlib_cli_command_help, d, /* is_long */ 0);
+ else
+ vlib_cli_output
+ (vm, " %-30v %U",
+ subs[i].name,
+ format_vlib_cli_command_help, d, /* is_long */ 0);
+ }
+
+ vec_free (subs);
+ }
+ }
+
+ else if (is_main_dispatch && (unformat (input, "choices") || unformat (input, "?")))
+ {
+ vlib_cli_command_t * sub, * subs;
+
+ subs = all_subs (cm, 0, parent_command_index);
+ vec_sort (subs, c1, c2, vec_cmp (c1->path, c2->path));
+ vec_foreach (sub, subs)
+ vlib_cli_output (vm, " %-40U %U",
+ format_vlib_cli_path, sub->path,
+ format_vlib_cli_command_help, sub, /* is_long */ 0);
+ vec_free (subs);
+ }
+
+ else if (unformat (input, "comment %v", &string))
+ {
+ vec_free (string);
+ }
+
+ else if (unformat (input, "uncomment %U",
+ unformat_vlib_cli_sub_input, &sub_input))
+ {
+ error = vlib_cli_dispatch_sub_commands (vm, cm, &sub_input, parent_command_index);
+ unformat_free (&sub_input);
+ }
+
+ else if (unformat_user (input, unformat_vlib_cli_sub_command, vm, parent, &c))
+ {
+ unformat_input_t * si;
+ uword has_sub_commands = vec_len (c->sub_commands) + vec_len (c->sub_rules) > 0;
+
+ si = input;
+ if (unformat_user (input, unformat_vlib_cli_sub_input, &sub_input))
+ si = &sub_input;
+
+ if (has_sub_commands)
+ error = vlib_cli_dispatch_sub_commands (vm, cm, si, c - cm->commands);
+
+ if (has_sub_commands && ! error)
+ /* Found valid sub-command. */;
+
+ else if (c->function)
+ {
+ clib_error_t * c_error;
+
+ /* Skip white space for benefit of called function. */
+ unformat_skip_white_space (si);
+
+ if (unformat (si, "?"))
+ {
+ vlib_cli_output (vm, " %-40U %U",
+ format_vlib_cli_path, c->path,
+ format_vlib_cli_command_help, c, /* is_long */ 0);
+ }
+ else
+ {
+ if (!c->is_mp_safe)
+ vlib_worker_thread_barrier_sync(vm);
+
+ c_error = c->function (vm, si, c);
+
+ if (!c->is_mp_safe)
+ vlib_worker_thread_barrier_release(vm);
+
+ if (c_error)
+ {
+ error = clib_error_return (0, "%v: %v", c->path, c_error->what);
+ clib_error_free (c_error);
+ /* Free sub input. */
+ if (si != input)
+ unformat_free (si);
+
+ return error;
+ }
+ }
+
+ /* Free any previous error. */
+ clib_error_free (error);
+ }
+
+ else if (! error)
+ error = clib_error_return (0, "%v: no sub-commands", c->path);
+
+ /* Free sub input. */
+ if (si != input)
+ unformat_free (si);
+ }
+
+ else
+ goto unknown;
+
+ return error;
+
+ unknown:
+ if (parent->path)
+ return clib_error_return (0, "%v: unknown input `%U'", parent->path, format_unformat_error, input);
+ else
+ return clib_error_return (0, "unknown input `%U'", format_unformat_error, input);
+}
+
+
+void vlib_unix_error_report (vlib_main_t *, clib_error_t *)
+ __attribute__ ((weak));
+
+void vlib_unix_error_report (vlib_main_t * vm, clib_error_t * error) { }
+
+/* Process CLI input. */
+void vlib_cli_input (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_output_function_t * function,
+ uword function_arg)
+{
+ vlib_cli_main_t * cm = &vm->cli_main;
+ clib_error_t * error;
+ vlib_cli_output_function_t * save_function;
+ uword save_function_arg;
+
+ save_function = cm->output_function;
+ save_function_arg = cm->output_function_arg;
+
+ cm->output_function = function;
+ cm->output_function_arg = function_arg;
+
+ do {
+ vec_reset_length (cm->parse_rule_data);
+ error = vlib_cli_dispatch_sub_commands (vm, &vm->cli_main, input, /* parent */ 0);
+ } while (! error && ! unformat (input, "%U", unformat_eof));
+
+ if (error)
+ {
+ vlib_cli_output (vm, "%v", error->what);
+ vlib_unix_error_report (vm, error);
+ clib_error_free (error);
+ }
+
+ cm->output_function = save_function;
+ cm->output_function_arg = save_function_arg;
+}
+
+/* Output to current CLI connection. */
+void vlib_cli_output (vlib_main_t * vm, char * fmt, ...)
+{
+ vlib_cli_main_t * cm = &vm->cli_main;
+ va_list va;
+ u8 * s;
+
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ /* Terminate with \n if not present. */
+ if (vec_len (s) > 0 && s[vec_len (s)-1] != '\n')
+ vec_add1 (s, '\n');
+
+ if (! cm->output_function)
+ fformat (stdout, "%v", s);
+ else
+ cm->output_function (cm->output_function_arg, s, vec_len (s));
+
+ vec_free (s);
+}
+
+static clib_error_t *
+show_memory_usage (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int verbose = 0;
+ clib_error_t * error;
+ u32 index = 0;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "verbose"))
+ verbose = 1;
+ else {
+ error = clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ return error;
+ }
+ }
+
+ foreach_vlib_main (
+ ({
+ vlib_cli_output (vm, "Thread %d %v\n", index, vlib_worker_threads[index].name);
+ vlib_cli_output (vm, "%U\n", format_mheap, clib_per_cpu_mheaps[index], verbose);
+ index++;
+ }));
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_memory_usage_command, static) = {
+ .path = "show memory",
+ .short_help = "Show current memory usage",
+ .function = show_memory_usage,
+};
+
+static clib_error_t *
+enable_disable_memory_trace (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t * error = 0;
+ int enable;
+
+ if (! unformat_user (input, unformat_vlib_enable_disable, &enable))
+ {
+ error = clib_error_return (0, "expecting enable/on or disable/off");
+ goto done;
+ }
+
+ clib_mem_trace (enable);
+
+ done:
+ return error;
+}
+
+VLIB_CLI_COMMAND (enable_disable_memory_trace_command, static) = {
+ .path = "memory-trace",
+ .short_help = "Enable/disable memory allocation trace",
+ .function = enable_disable_memory_trace,
+};
+
+
+static clib_error_t *
+test_heap_validate (vlib_main_t * vm, unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ clib_error_t * error = 0;
+ void * heap;
+ mheap_t *mheap;
+
+ if (unformat(input, "on")) {
+ foreach_vlib_main({
+ heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+ mheap = mheap_header(heap);
+ mheap->flags |= MHEAP_FLAG_VALIDATE;
+ // Turn off small object cache because it delays detection of errors
+ mheap->flags &= ~MHEAP_FLAG_SMALL_OBJECT_CACHE;
+ });
+
+ } else if (unformat(input, "off")) {
+ foreach_vlib_main({
+ heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+ mheap = mheap_header(heap);
+ mheap->flags &= ~MHEAP_FLAG_VALIDATE;
+ mheap->flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE;
+ });
+
+ } else if (unformat(input, "now")) {
+ foreach_vlib_main({
+ heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index];
+ mheap = mheap_header(heap);
+ mheap_validate(heap);
+ });
+ vlib_cli_output(vm, "heap validation complete");
+
+ } else {
+ return clib_error_return(0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (cmd_test_heap_validate,static) = {
+ .path = "test heap-validate",
+ .short_help = "<on/off/now> validate heap on future allocs/frees or right now",
+ .function = test_heap_validate,
+};
+
+
+static uword vlib_cli_normalize_path (char * input, char ** result)
+{
+ char * i = input;
+ char * s = 0;
+ uword l = 0;
+ uword index_of_last_space = ~0;
+
+ while (*i != 0)
+ {
+ u8 c = *i++;
+ /* Multiple white space -> single space. */
+ switch (c)
+ {
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ if (l > 0 && s[l-1] != ' ')
+ {
+ vec_add1 (s, ' ');
+ l++;
+ }
+ break;
+
+ default:
+ if (l > 0 && s[l-1] == ' ')
+ index_of_last_space = vec_len (s);
+ vec_add1 (s, c);
+ l++;
+ break;
+ }
+ }
+
+ /* Remove any extra space at end. */
+ if (l > 0 && s[l-1] == ' ')
+ _vec_len (s) -= 1;
+
+ *result = s;
+ return index_of_last_space;
+}
+
+always_inline uword
+parent_path_len (char * path)
+{
+ word i;
+ for (i = vec_len (path) - 1; i >= 0; i--)
+ {
+ if (path[i] == ' ')
+ return i;
+ }
+ return ~0;
+}
+
+static void add_sub_command (vlib_cli_main_t * cm,
+ uword parent_index,
+ uword child_index)
+{
+ vlib_cli_command_t * p, * c;
+ vlib_cli_sub_command_t * sub_c;
+ u8 * sub_name;
+ word i, l;
+
+ p = vec_elt_at_index (cm->commands, parent_index);
+ c = vec_elt_at_index (cm->commands, child_index);
+
+ l = parent_path_len (c->path);
+ if (l == ~0)
+ sub_name = vec_dup ((u8 *) c->path);
+ else
+ {
+ ASSERT (l + 1 < vec_len (c->path));
+ sub_name = 0;
+ vec_add (sub_name, c->path + l + 1, vec_len (c->path) - (l + 1));
+ }
+
+ if (sub_name[0] == '%')
+ {
+ uword * q;
+ vlib_cli_sub_rule_t * sr;
+
+ /* Remove %. */
+ vec_delete (sub_name, 1, 0);
+
+ if (! p->sub_rule_index_by_name)
+ p->sub_rule_index_by_name
+ = hash_create_vec (/* initial length */ 32,
+ sizeof (sub_name[0]),
+ sizeof (uword));
+ q = hash_get_mem (p->sub_rule_index_by_name, sub_name);
+ if (q)
+ {
+ sr = vec_elt_at_index (p->sub_rules, q[0]);
+ ASSERT (sr->command_index == child_index);
+ return;
+ }
+
+ q = hash_get_mem (cm->parse_rule_index_by_name, sub_name);
+ if (! q)
+ clib_error ("reference to unknown rule `%%%v' in path `%v'",
+ sub_name, c->path);
+
+ hash_set_mem (p->sub_rule_index_by_name, sub_name, vec_len (p->sub_rules));
+ vec_add2 (p->sub_rules, sr, 1);
+ sr->name = sub_name;
+ sr->rule_index = q[0];
+ sr->command_index = child_index;
+ return;
+ }
+
+ if (! p->sub_command_index_by_name)
+ p->sub_command_index_by_name
+ = hash_create_vec (/* initial length */ 32,
+ sizeof (c->path[0]),
+ sizeof (uword));
+
+ /* Check if sub-command has already been created. */
+ if (hash_get_mem (p->sub_command_index_by_name, sub_name))
+ {
+ vec_free (sub_name);
+ return;
+ }
+
+ vec_add2 (p->sub_commands, sub_c, 1);
+ sub_c->index = child_index;
+ sub_c->name = sub_name;
+ hash_set_mem (p->sub_command_index_by_name, sub_c->name, sub_c - p->sub_commands);
+
+ vec_validate (p->sub_command_positions, vec_len (sub_c->name) - 1);
+ for (i = 0; i < vec_len (sub_c->name); i++)
+ {
+ int n;
+ vlib_cli_parse_position_t * pos;
+
+ pos = vec_elt_at_index (p->sub_command_positions, i);
+
+ if (! pos->bitmaps)
+ pos->min_char = sub_c->name[i];
+
+ n = sub_c->name[i] - pos->min_char;
+ if (n < 0)
+ {
+ pos->min_char = sub_c->name[i];
+ vec_insert (pos->bitmaps, -n, 0);
+ n = 0;
+ }
+
+ vec_validate (pos->bitmaps, n);
+ pos->bitmaps[n] = clib_bitmap_ori (pos->bitmaps[n], sub_c - p->sub_commands);
+ }
+}
+
+static void
+vlib_cli_make_parent (vlib_cli_main_t * cm, uword ci)
+{
+ uword p_len, pi, * p;
+ char * p_path;
+ vlib_cli_command_t * c, * parent;
+
+ /* Root command (index 0) should have already been added. */
+ ASSERT (vec_len (cm->commands) > 0);
+
+ c = vec_elt_at_index (cm->commands, ci);
+ p_len = parent_path_len (c->path);
+
+ /* No space? Parent is root command. */
+ if (p_len == ~0)
+ {
+ add_sub_command (cm, 0, ci);
+ return;
+ }
+
+ p_path = 0;
+ vec_add (p_path, c->path, p_len);
+
+ p = hash_get_mem (cm->command_index_by_path, p_path);
+
+ /* Parent exists? */
+ if (! p)
+ {
+ /* Parent does not exist; create it. */
+ vec_add2 (cm->commands, parent, 1);
+ parent->path = p_path;
+ hash_set_mem (cm->command_index_by_path, parent->path, parent - cm->commands);
+ pi = parent - cm->commands;
+ }
+ else
+ {
+ pi = p[0];
+ vec_free (p_path);
+ }
+
+ add_sub_command (cm, pi, ci);
+
+ /* Create parent's parent. */
+ if (! p)
+ vlib_cli_make_parent (cm, pi);
+}
+
+always_inline uword
+vlib_cli_command_is_empty (vlib_cli_command_t * c)
+{
+ return (c->long_help == 0
+ && c->short_help == 0
+ && c->function == 0);
+}
+
+clib_error_t * vlib_cli_register (vlib_main_t * vm, vlib_cli_command_t * c)
+{
+ vlib_cli_main_t * cm = &vm->cli_main;
+ clib_error_t * error = 0;
+ uword ci, * p;
+ char * normalized_path;
+
+ if ((error = vlib_call_init_function (vm, vlib_cli_init)))
+ return error;
+
+ (void) vlib_cli_normalize_path (c->path, &normalized_path);
+
+ if (! cm->command_index_by_path)
+ cm->command_index_by_path = hash_create_vec (/* initial length */ 32,
+ sizeof (c->path[0]),
+ sizeof (uword));
+
+ /* See if command already exists with given path. */
+ p = hash_get_mem (cm->command_index_by_path, normalized_path);
+ if (p)
+ {
+ vlib_cli_command_t * d;
+
+ ci = p[0];
+ d = vec_elt_at_index (cm->commands, ci);
+
+ /* If existing command was created via vlib_cli_make_parent
+ replaced it with callers data. */
+ if (vlib_cli_command_is_empty (d))
+ {
+ vlib_cli_command_t save = d[0];
+
+ ASSERT (! vlib_cli_command_is_empty (c));
+
+ /* Copy callers fields. */
+ d[0] = c[0];
+
+ /* Save internal fields. */
+ d->path = save.path;
+ d->sub_commands = save.sub_commands;
+ d->sub_command_index_by_name = save.sub_command_index_by_name;
+ d->sub_command_positions = save.sub_command_positions;
+ d->sub_rules = save.sub_rules;
+ }
+ else
+ error = clib_error_return (0, "duplicate command name with path %v", normalized_path);
+
+ vec_free (normalized_path);
+ if (error)
+ return error;
+ }
+ else
+ {
+ /* Command does not exist: create it. */
+
+ /* Add root command (index 0). */
+ if (vec_len (cm->commands) == 0)
+ {
+ /* Create command with index 0; path is empty string. */
+ vec_resize (cm->commands, 1);
+ }
+
+ ci = vec_len (cm->commands);
+ hash_set_mem (cm->command_index_by_path, normalized_path, ci);
+ vec_add1 (cm->commands, c[0]);
+
+ c = vec_elt_at_index (cm->commands, ci);
+ c->path = normalized_path;
+
+ /* Don't inherit from registration. */
+ c->sub_commands = 0;
+ c->sub_command_index_by_name = 0;
+ c->sub_command_positions = 0;
+ }
+
+ vlib_cli_make_parent (cm, ci);
+ return 0;
+}
+
+clib_error_t *
+vlib_cli_register_parse_rule (vlib_main_t * vm, vlib_cli_parse_rule_t * r_reg)
+{
+ vlib_cli_main_t * cm = &vm->cli_main;
+ vlib_cli_parse_rule_t * r;
+ clib_error_t * error = 0;
+ u8 * r_name;
+ uword * p;
+
+ if (! cm->parse_rule_index_by_name)
+ cm->parse_rule_index_by_name = hash_create_vec (/* initial length */ 32,
+ sizeof (r->name[0]),
+ sizeof (uword));
+
+ /* Make vector copy of name. */
+ r_name = format (0, "%s", r_reg->name);
+
+ if ((p = hash_get_mem (cm->parse_rule_index_by_name, r_name)))
+ {
+ vec_free (r_name);
+ return clib_error_return (0, "duplicate parse rule name `%s'", r_reg->name);
+ }
+
+ vec_add2 (cm->parse_rules, r, 1);
+ r[0] = r_reg[0];
+ r->name = (char *) r_name;
+ hash_set_mem (cm->parse_rule_index_by_name, r->name, r - cm->parse_rules);
+
+ return error;
+}
+
+#if 0
+/* $$$ turn back on again someday, maybe */
+static clib_error_t *
+vlib_cli_register_parse_rules (vlib_main_t * vm,
+ vlib_cli_parse_rule_t * lo,
+ vlib_cli_parse_rule_t * hi)
+
+ __attribute__((unused))
+{
+ clib_error_t * error = 0;
+ vlib_cli_parse_rule_t * r;
+
+ for (r = lo; r < hi; r = clib_elf_section_data_next (r, 0))
+ {
+ if (! r->name || strlen (r->name) == 0)
+ {
+ error = clib_error_return (0, "parse rule with no name");
+ goto done;
+ }
+
+ error = vlib_cli_register_parse_rule (vm, r);
+ if (error)
+ goto done;
+ }
+
+ done:
+ return error;
+}
+#endif
+
+static clib_error_t * vlib_cli_init (vlib_main_t * vm)
+{
+ vlib_cli_main_t * cm = &vm->cli_main;
+ clib_error_t * error = 0;
+ vlib_cli_command_t * cmd;
+
+ cmd = cm->cli_command_registrations;
+
+ while (cmd)
+ {
+ error = vlib_cli_register (vm, cmd);
+ if (error)
+ return error;
+ cmd = cmd->next_cli_command;
+ }
+ return error;
+}
+
+VLIB_INIT_FUNCTION (vlib_cli_init);
diff --git a/vlib/vlib/cli.h b/vlib/vlib/cli.h
new file mode 100644
index 00000000000..8c802475176
--- /dev/null
+++ b/vlib/vlib/cli.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cli.h: command line interface
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_cli_h
+#define included_vlib_cli_h
+
+#include <vppinfra/format.h>
+
+struct vlib_cli_command_t;
+
+typedef struct {
+ u32 min_char;
+
+ /* Indexed by name[position] - min_char. */
+ uword ** bitmaps;
+} vlib_cli_parse_position_t;
+
+typedef struct {
+ u8 * name;
+
+ u32 index;
+} vlib_cli_sub_command_t;
+
+typedef struct {
+ u8 * name;
+
+ u32 rule_index;
+
+ u32 command_index;
+} vlib_cli_sub_rule_t;
+
+typedef struct {
+ char * name;
+ char * short_help;
+ char * long_help;
+
+ /* Number of bytes in parsed data. Zero for vector. */
+ uword data_size;
+
+ unformat_function_t * unformat_function;
+
+ /* Opaque for unformat function. */
+ uword unformat_function_arg[2];
+} vlib_cli_parse_rule_t;
+
+/* CLI command callback function. */
+typedef clib_error_t * (vlib_cli_command_function_t)
+ (struct vlib_main_t * vm,
+ unformat_input_t * input,
+ struct vlib_cli_command_t * cmd);
+
+typedef struct vlib_cli_command_t {
+ /* Command path (e.g. "show something").
+ Spaces delimit elements of path. */
+ char * path;
+
+ /* Short/long help strings. */
+ char * short_help;
+ char * long_help;
+
+ /* Callback function. */
+ vlib_cli_command_function_t * function;
+
+ /* Opaque. */
+ uword function_arg;
+
+ /* Known MP-safe? */
+ uword is_mp_safe;
+
+ /* Sub commands for this command. */
+ vlib_cli_sub_command_t * sub_commands;
+
+ /* Hash table mapping name (e.g. last path element) to sub command index. */
+ uword * sub_command_index_by_name;
+
+ /* bitmap[p][c][i] says whether sub-command i has character
+ c in position p. */
+ vlib_cli_parse_position_t * sub_command_positions;
+
+ /* Hash table mapping name (e.g. last path element) to sub rule index. */
+ uword * sub_rule_index_by_name;
+
+ /* Vector of possible parse rules for this path. */
+ vlib_cli_sub_rule_t * sub_rules;
+
+ /* List of CLI commands, built by constructors */
+ struct vlib_cli_command_t * next_cli_command;
+
+} vlib_cli_command_t;
+
+typedef void (vlib_cli_output_function_t) (uword arg,
+ u8 * buffer,
+ uword buffer_bytes);
+typedef struct {
+ /* Current output function. */
+ vlib_cli_output_function_t * output_function;
+
+ /* Opaque data for output function. */
+ uword output_function_arg;
+
+ /* Vector of all known commands. */
+ vlib_cli_command_t * commands;
+
+ /* Hash table mapping normalized path to index into all_commands. */
+ uword * command_index_by_path;
+
+ /* Vector of all known parse rules. */
+ vlib_cli_parse_rule_t * parse_rules;
+
+ /* Hash table mapping parse rule name to index into parse_rule vector. */
+ uword * parse_rule_index_by_name;
+
+ /* Data parsed for rules. */
+ void ** parse_rule_data;
+
+ /* registration list added by constructors */
+ vlib_cli_command_t *cli_command_registrations;
+} vlib_cli_main_t;
+
+#define VLIB_CLI_COMMAND(x,...) \
+ __VA_ARGS__ vlib_cli_command_t x; \
+static void __vlib_cli_command_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_cli_command_registration_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ vlib_cli_main_t *cm = &vm->cli_main; \
+ x.next_cli_command = cm->cli_command_registrations; \
+ cm->cli_command_registrations = &x; \
+} \
+__VA_ARGS__ vlib_cli_command_t x
+
+
+#define VLIB_CLI_PARSE_RULE(x) \
+ vlib_cli_parse_rule_t x
+
+/* Output to current CLI connection. */
+void vlib_cli_output (struct vlib_main_t * vm, char * fmt, ...);
+
+/* Process CLI input. */
+void vlib_cli_input (struct vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_output_function_t * function,
+ uword function_arg);
+
+clib_error_t * vlib_cli_register (struct vlib_main_t * vm,
+ vlib_cli_command_t * c);
+clib_error_t * vlib_cli_register_parse_rule (struct vlib_main_t * vm,
+ vlib_cli_parse_rule_t * c);
+
+#endif /* included_vlib_cli_h */
diff --git a/vlib/vlib/cli_funcs.h b/vlib/vlib/cli_funcs.h
new file mode 100644
index 00000000000..a43ed20a2c2
--- /dev/null
+++ b/vlib/vlib/cli_funcs.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cli_funcs.h: VLIB CLI related functions/inlines
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_cli_funcs_h
+#define included_vlib_cli_funcs_h
+
+always_inline void *
+vlib_cli_get_parse_rule_result (vlib_main_t * vm, uword index)
+{
+ vlib_cli_main_t * cm = &vm->cli_main;
+ return vec_elt (cm->parse_rule_data, index);
+}
+
+#endif /* included_vlib_cli_funcs_h */
diff --git a/vlib/vlib/counter.c b/vlib/vlib/counter.c
new file mode 100644
index 00000000000..1b94884e319
--- /dev/null
+++ b/vlib/vlib/counter.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * counter.c: simple and packet/byte counters
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+void vlib_clear_simple_counters (vlib_simple_counter_main_t * cm)
+{
+ uword i, j;
+ u16 * my_minis;
+
+ for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++)
+ {
+ my_minis = cm->minis[i];
+
+ for (j = 0; j < vec_len (my_minis); j++)
+ {
+ cm->maxi[j] += my_minis[j];
+ my_minis[j] = 0;
+ }
+ }
+
+ j = vec_len (cm->maxi);
+ if (j > 0)
+ vec_validate (cm->value_at_last_clear, j - 1);
+ for (i = 0; i < j; i++)
+ cm->value_at_last_clear[i] = cm->maxi[i];
+}
+
+void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm)
+{
+ uword i, j;
+ vlib_mini_counter_t * my_minis;
+
+ for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++)
+ {
+ my_minis = cm->minis[i];
+
+ for (j = 0; j < vec_len (my_minis); j++)
+ {
+ cm->maxi[j].packets += my_minis[j].packets;
+ cm->maxi[j].bytes += my_minis[j].bytes;
+ my_minis[j].packets = 0;
+ my_minis[j].bytes = 0;
+ }
+ }
+
+ j = vec_len (cm->maxi);
+ if (j > 0)
+ vec_validate (cm->value_at_last_clear, j - 1);
+
+ for (i = 0; i < j; i++)
+ {
+ vlib_counter_t * c = vec_elt_at_index (cm->value_at_last_clear, i);
+
+ c[0] = cm->maxi[i];
+ }
+}
+
+void serialize_vlib_simple_counter_main (serialize_main_t * m, va_list * va)
+{
+ clib_warning ("unimplemented");
+}
+
+void unserialize_vlib_simple_counter_main (serialize_main_t * m, va_list * va)
+{
+ clib_warning ("unimplemented");
+}
+
+void serialize_vlib_combined_counter_main (serialize_main_t * m, va_list * va)
+{
+ clib_warning ("unimplemented");
+}
+
+void unserialize_vlib_combined_counter_main (serialize_main_t * m, va_list * va)
+{
+ clib_warning ("unimplemented");
+}
diff --git a/vlib/vlib/counter.h b/vlib/vlib/counter.h
new file mode 100644
index 00000000000..804757173bb
--- /dev/null
+++ b/vlib/vlib/counter.h
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * counter.h: simple and packet/byte counters
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_counter_h
+#define included_vlib_counter_h
+
+/*
+ * Annoyingly enough, counters are created long before
+ * the CPU configuration is available, so we have to
+ * preallocate the mini-counter per-cpu vectors
+ */
+#define VLIB_COUNTER_MAX_CPUS 32
+
+typedef struct {
+ /* Compact counters that (rarely) can overflow. */
+ u16 ** minis;
+
+ /* Counters to hold overflow. */
+ u64 * maxi;
+
+ /* Counter values as of last clear. */
+ u64 * value_at_last_clear;
+
+ /* Values as of last serialize. */
+ u64 * value_at_last_serialize;
+
+ /* Last counter index serialized incrementally. */
+ u32 last_incremental_serialize_index;
+
+ /* Counter name. */
+ char * name;
+} vlib_simple_counter_main_t;
+
+always_inline void
+vlib_increment_simple_counter (vlib_simple_counter_main_t * cm,
+ u32 cpu_index,
+ u32 index,
+ u32 increment)
+{
+ u16 * my_minis;
+ u16 * mini;
+ u32 old, new;
+
+ my_minis = cm->minis[cpu_index];
+ mini = vec_elt_at_index (my_minis, index);
+ old = mini[0];
+ new = old + increment;
+ mini[0] = new;
+
+ if (PREDICT_FALSE (mini[0] != new))
+ {
+ __sync_fetch_and_add (&cm->maxi[index], new);
+ my_minis[index] = 0;
+ }
+}
+
+always_inline u64
+vlib_get_simple_counter (vlib_simple_counter_main_t * cm, u32 index)
+{
+ u16 *my_minis, *mini;
+ u64 v;
+ int i;
+
+ ASSERT (index < vec_len (cm->maxi));
+
+ v = 0;
+
+ for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++)
+ {
+ my_minis = cm->minis[i];
+ mini = vec_elt_at_index (my_minis, index);
+ v += mini[0];
+ }
+
+ v += cm->maxi[index];
+
+ if (index < vec_len (cm->value_at_last_clear))
+ {
+ ASSERT (v >= cm->value_at_last_clear[index]);
+ v -= cm->value_at_last_clear[index];
+ }
+
+ return v;
+}
+
+always_inline void
+vlib_zero_simple_counter (vlib_simple_counter_main_t * cm, u32 index)
+{
+ u16 * my_minis;
+ int i;
+
+ ASSERT (index < vec_len (cm->maxi));
+
+ for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++)
+ {
+ my_minis = cm->minis[i];
+ my_minis[index] = 0;
+ }
+
+ cm->maxi[index] = 0;
+
+ if (index < vec_len (cm->value_at_last_clear))
+ cm->value_at_last_clear[index] = 0;
+}
+
+/* Combined counters hold both packets and byte differences. */
+/* Maxi-packet/byte counter. */
+typedef struct {
+ u64 packets, bytes;
+} vlib_counter_t;
+
+always_inline void
+vlib_counter_add (vlib_counter_t * a, vlib_counter_t * b)
+{
+ a->packets += b->packets;
+ a->bytes += b->bytes;
+}
+
+always_inline void
+vlib_counter_sub (vlib_counter_t * a, vlib_counter_t * b)
+{
+ ASSERT (a->packets >= b->packets);
+ ASSERT (a->bytes >= b->bytes);
+ a->packets -= b->packets;
+ a->bytes -= b->bytes;
+}
+
+always_inline void
+vlib_counter_zero (vlib_counter_t * a)
+{ a->packets = a->bytes = 0; }
+
+/* Micro-counter: 16 bits of packets and 16 bits of byte difference. */
+typedef struct {
+ /* Packet count. */
+ u16 packets;
+
+ /* The average packet size hack doesn't work in a multi-core config */
+ i16 bytes;
+} vlib_mini_counter_t;
+
+typedef struct {
+ /* Compact counters that (rarely) can overflow. */
+ vlib_mini_counter_t ** minis;
+
+ /* Counters to hold overflow. */
+ vlib_counter_t * maxi;
+
+ /* Debug counters for testing. */
+ vlib_counter_t * debug;
+
+ /* Counter values as of last clear. */
+ vlib_counter_t * value_at_last_clear;
+
+ /* Counter values as of last serialize. */
+ vlib_counter_t * value_at_last_serialize;
+
+ /* Last counter index serialized incrementally. */
+ u32 last_incremental_serialize_index;
+
+ /* Average packet sizes used in mini-counter byte differences. */
+ u32 ave_packet_size;
+
+ /* Current summed packets and bytes for average computation. */
+ u32 ave_packets, ave_bytes;
+
+ /* Counter name. */
+ char * name;
+
+} vlib_combined_counter_main_t;
+
+void vlib_clear_simple_counters (vlib_simple_counter_main_t * cm);
+void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm);
+
+always_inline void
+vlib_increment_combined_counter (vlib_combined_counter_main_t * cm,
+ u32 cpu_index,
+ u32 index,
+ u32 packet_increment,
+ u32 byte_increment)
+{
+ vlib_mini_counter_t * my_minis, * mini;
+ u32 old_packets, new_packets;
+ i32 old_bytes, new_bytes;
+
+ /* Use this CPU's mini counter array */
+ my_minis = cm->minis[cpu_index];
+
+ mini = vec_elt_at_index (my_minis, index);
+ old_packets = mini->packets;
+ old_bytes = mini->bytes;
+
+ new_packets = old_packets + packet_increment;
+ new_bytes = old_bytes + byte_increment;
+
+ mini->packets = new_packets;
+ mini->bytes = new_bytes;
+
+ /* Bytes always overflow before packets.. */
+ if (PREDICT_FALSE (mini->bytes != new_bytes))
+ {
+ vlib_counter_t * maxi = vec_elt_at_index (cm->maxi, index);
+
+ __sync_fetch_and_add (&maxi->packets, new_packets);
+ __sync_fetch_and_add (&maxi->bytes, new_bytes);
+
+ mini->packets = 0;
+ mini->bytes = 0;
+ }
+}
+
+/* This is never done in the speed path */
+static inline void
+vlib_get_combined_counter (vlib_combined_counter_main_t * cm,
+ u32 index,
+ vlib_counter_t * result)
+{
+ vlib_mini_counter_t * my_minis, * mini;
+ vlib_counter_t * maxi;
+ int i;
+
+ result->packets = 0;
+ result->bytes = 0;
+
+ for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++)
+ {
+ my_minis = cm->minis[i];
+
+ mini = vec_elt_at_index (my_minis, index);
+ result->packets += mini->packets;
+ result->bytes += mini->bytes;
+ }
+
+ maxi = vec_elt_at_index (cm->maxi, index);
+ result->packets += maxi->packets;
+ result->bytes += maxi->bytes;
+
+ if (index < vec_len (cm->value_at_last_clear))
+ vlib_counter_sub (result, &cm->value_at_last_clear[index]);
+}
+
+always_inline void
+vlib_zero_combined_counter (vlib_combined_counter_main_t * cm,
+ u32 index)
+{
+ vlib_mini_counter_t * mini, * my_minis;
+ int i;
+
+ for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++)
+ {
+ my_minis = cm->minis[i];
+
+ mini = vec_elt_at_index (my_minis, index);
+ mini->packets = 0;
+ mini->bytes = 0;
+ }
+
+ vlib_counter_zero (&cm->maxi[index]);
+ if (index < vec_len (cm->value_at_last_clear))
+ vlib_counter_zero (&cm->value_at_last_clear[index]);
+}
+
+/* Initialize/allocate given counter index.
+ Works for both simple and combined counters. */
+#define vlib_validate_counter_DEPRECATED(cm,index) \
+ do { \
+ int i; \
+ \
+ vec_validate ((cm)->minis, VLIB_COUNTER_MAX_CPUS-1); \
+ for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++) \
+ vec_validate ((cm)->minis[i], (index)); \
+ vec_validate ((cm)->maxi, (index)); \
+ } while (0)
+
+static inline void
+vlib_validate_simple_counter (vlib_simple_counter_main_t *cm, u32 index)
+{
+ int i;
+ vec_validate (cm->minis, VLIB_COUNTER_MAX_CPUS-1);
+ for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++)
+ vec_validate_aligned (cm->minis[i], index, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (cm->maxi, index, CLIB_CACHE_LINE_BYTES);
+}
+
+static inline void
+vlib_validate_combined_counter (vlib_combined_counter_main_t *cm, u32 index)
+{
+ int i;
+ vec_validate (cm->minis, VLIB_COUNTER_MAX_CPUS-1);
+ for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++)
+ vec_validate_aligned (cm->minis[i], index, CLIB_CACHE_LINE_BYTES);
+ vec_validate_aligned (cm->maxi, index, CLIB_CACHE_LINE_BYTES);
+}
+
+/* Number of simple/combined counters allocated. */
+#define vlib_counter_len(cm) vec_len((cm)->maxi)
+
+serialize_function_t serialize_vlib_simple_counter_main, unserialize_vlib_simple_counter_main;
+serialize_function_t serialize_vlib_combined_counter_main, unserialize_vlib_combined_counter_main;
+
+#endif /* included_vlib_counter_h */
diff --git a/vlib/vlib/defs.h b/vlib/vlib/defs.h
new file mode 100644
index 00000000000..ff9046861f3
--- /dev/null
+++ b/vlib/vlib/defs.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * defs.h: VLIB generic C definitions
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_defs_h
+#define included_vlib_defs_h
+
+/* Receive or transmit. */
+typedef enum {
+ VLIB_RX,
+ VLIB_TX,
+ VLIB_N_RX_TX = 2, /* Used to size arrays. */
+} vlib_rx_or_tx_t;
+
+#define vlib_foreach_rx_tx(v) for (v = 0; v < VLIB_N_RX_TX; v++)
+
+/* Read/write. */
+typedef enum {
+ VLIB_READ,
+ VLIB_WRITE,
+} vlib_read_or_write_t;
+
+/* Up/down. */
+typedef enum {
+ VLIB_DOWN = 0,
+ VLIB_UP = 1,
+} vlib_up_or_down_t;
+
+/* Enable/disable. */
+typedef enum {
+ VLIB_DISABLE = 0,
+ VLIB_ENABLE = 1,
+} vlib_enable_or_disable_t;
+
+#endif /* included_vlib_defs_h */
diff --git a/vlib/vlib/dpdk_buffer.c b/vlib/vlib/dpdk_buffer.c
new file mode 100644
index 00000000000..dbbd5806fd2
--- /dev/null
+++ b/vlib/vlib/dpdk_buffer.c
@@ -0,0 +1,1206 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * buffer.c: allocate/free network buffers.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <rte_config.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_tailq.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+
+#include <vlib/vlib.h>
+
+phys_addr_t __attribute__ ((weak)) rte_mem_virt2phy();
+int __attribute__ ((weak)) rte_eal_has_hugepages();
+unsigned __attribute__ ((weak)) rte_socket_id();
+struct rte_mempool * __attribute__ ((weak)) rte_mempool_create();
+void __attribute__ ((weak)) rte_pktmbuf_init();
+void __attribute__ ((weak)) rte_pktmbuf_pool_init();
+
+uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm, vlib_buffer_t * b_first)
+{
+ vlib_buffer_t * b = b_first;
+ uword l_first = b_first->current_length;
+ uword l = 0;
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ b = vlib_get_buffer (vm, b->next_buffer);
+ l += b->current_length;
+ }
+ b_first->total_length_not_including_first_buffer = l;
+ b_first->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+ return l + l_first;
+}
+
+u8 * format_vlib_buffer (u8 * s, va_list * args)
+{
+ vlib_buffer_t * b = va_arg (*args, vlib_buffer_t *);
+ uword indent = format_get_indent (s);
+
+ s = format (s, "current data %d, length %d, free-list %d",
+ b->current_data, b->current_length,
+ b->free_list_index);
+
+ if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID)
+ s = format (s, ", totlen-nifb %d",
+ b->total_length_not_including_first_buffer);
+
+ if (b->flags & VLIB_BUFFER_IS_TRACED)
+ s = format (s, ", trace 0x%x", b->trace_index);
+
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ {
+ vlib_main_t * vm = vlib_get_main();
+ u32 next_buffer = b->next_buffer;
+ b = vlib_get_buffer(vm, next_buffer);
+
+ s = format (s, "\n%Unext-buffer 0x%x, segment length %d",
+ format_white_space, indent, next_buffer, b->current_length);
+ }
+
+
+ return s;
+}
+
+u8 * format_vlib_buffer_and_data (u8 * s, va_list * args)
+{
+ vlib_buffer_t * b = va_arg (*args, vlib_buffer_t *);
+
+ s = format (s, "%U, %U",
+ format_vlib_buffer, b,
+ format_hex_bytes, vlib_buffer_get_current (b), 64);
+
+ return s;
+}
+
+u8 * format_vlib_buffer_contents (u8 * s, va_list * va)
+{
+ vlib_main_t * vm = va_arg (*va, vlib_main_t *);
+ vlib_buffer_t * b = va_arg (*va, vlib_buffer_t *);
+
+ while (1)
+ {
+ vec_add (s, vlib_buffer_get_current (b),
+ b->current_length);
+ if (! (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+ b = vlib_get_buffer (vm, b->next_buffer);
+ }
+
+ return s;
+}
+
+vlib_main_t **vlib_mains;
+
+/* Aligned copy routine. */
+void
+vlib_aligned_memcpy (void * _dst, void * _src, int n_bytes)
+{
+ vlib_copy_unit_t * dst = _dst;
+ vlib_copy_unit_t * src = _src;
+
+ /* Arguments must be naturally aligned. */
+ ASSERT (pointer_to_uword (dst) % sizeof (dst[0]) == 0);
+ ASSERT (pointer_to_uword (src) % sizeof (src[0]) == 0);
+ ASSERT (n_bytes % sizeof (dst[0]) == 0);
+
+ if (4 * sizeof (dst[0]) == CLIB_CACHE_LINE_BYTES)
+ {
+ CLIB_PREFETCH (dst + 0, 4 * sizeof (dst[0]), WRITE);
+ CLIB_PREFETCH (src + 0, 4 * sizeof (src[0]), READ);
+
+ while (n_bytes >= 4 * sizeof (dst[0]))
+ {
+ dst += 4;
+ src += 4;
+ n_bytes -= 4 * sizeof (dst[0]);
+ CLIB_PREFETCH (dst, 4 * sizeof (dst[0]), WRITE);
+ CLIB_PREFETCH (src, 4 * sizeof (src[0]), READ);
+ dst[-4] = src[-4];
+ dst[-3] = src[-3];
+ dst[-2] = src[-2];
+ dst[-1] = src[-1];
+ }
+ }
+ else if (8 * sizeof (dst[0]) == CLIB_CACHE_LINE_BYTES)
+ {
+ CLIB_PREFETCH (dst + 0, 8 * sizeof (dst[0]), WRITE);
+ CLIB_PREFETCH (src + 0, 8 * sizeof (src[0]), READ);
+
+ while (n_bytes >= 8 * sizeof (dst[0]))
+ {
+ dst += 8;
+ src += 8;
+ n_bytes -= 8 * sizeof (dst[0]);
+ CLIB_PREFETCH (dst, 8 * sizeof (dst[0]), WRITE);
+ CLIB_PREFETCH (src, 8 * sizeof (src[0]), READ);
+ dst[-8] = src[-8];
+ dst[-7] = src[-7];
+ dst[-6] = src[-6];
+ dst[-5] = src[-5];
+ dst[-4] = src[-4];
+ dst[-3] = src[-3];
+ dst[-2] = src[-2];
+ dst[-1] = src[-1];
+ }
+ }
+ else
+ /* Cache line size unknown: fall back to slow version. */;
+
+ while (n_bytes > 0)
+ {
+ *dst++ = *src++;
+ n_bytes -= 1 * sizeof (dst[0]);
+ }
+}
+
+#define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32))
+
+/* Make sure we have at least given number of unaligned buffers. */
+static void
+fill_unaligned (vlib_main_t * vm,
+ vlib_buffer_free_list_t * free_list,
+ uword n_unaligned_buffers)
+{
+ word la = vec_len (free_list->aligned_buffers);
+ word lu = vec_len (free_list->unaligned_buffers);
+
+ /* Aligned come in aligned copy-sized chunks. */
+ ASSERT (la % BUFFERS_PER_COPY == 0);
+
+ ASSERT (la >= n_unaligned_buffers);
+
+ while (lu < n_unaligned_buffers)
+ {
+ /* Copy 4 buffers from end of aligned vector to unaligned vector. */
+ vec_add (free_list->unaligned_buffers,
+ free_list->aligned_buffers + la - BUFFERS_PER_COPY,
+ BUFFERS_PER_COPY);
+ la -= BUFFERS_PER_COPY;
+ lu += BUFFERS_PER_COPY;
+ }
+ _vec_len (free_list->aligned_buffers) = la;
+}
+
+/* After free aligned buffers may not contain even sized chunks. */
+static void
+trim_aligned (vlib_buffer_free_list_t * f)
+{
+ uword l, n_trim;
+
+ /* Add unaligned to aligned before trim. */
+ l = vec_len (f->unaligned_buffers);
+ if (l > 0)
+ {
+ vec_add_aligned (f->aligned_buffers, f->unaligned_buffers, l,
+ /* align */ sizeof (vlib_copy_unit_t));
+
+ _vec_len (f->unaligned_buffers) = 0;
+ }
+
+ /* Remove unaligned buffers from end of aligned vector and save for next trim. */
+ l = vec_len (f->aligned_buffers);
+ n_trim = l % BUFFERS_PER_COPY;
+ if (n_trim)
+ {
+ /* Trim aligned -> unaligned. */
+ vec_add (f->unaligned_buffers, f->aligned_buffers + l - n_trim, n_trim);
+
+ /* Remove from aligned. */
+ _vec_len (f->aligned_buffers) = l - n_trim;
+ }
+}
+
+static void
+merge_free_lists (vlib_buffer_free_list_t * dst,
+ vlib_buffer_free_list_t * src)
+{
+ uword l;
+ u32 * d;
+
+ trim_aligned (src);
+ trim_aligned (dst);
+
+ l = vec_len (src->aligned_buffers);
+ if (l > 0)
+ {
+ vec_add2_aligned (dst->aligned_buffers, d, l,
+ /* align */ sizeof (vlib_copy_unit_t));
+ vlib_aligned_memcpy (d, src->aligned_buffers, l * sizeof (d[0]));
+ vec_free (src->aligned_buffers);
+ }
+
+ l = vec_len (src->unaligned_buffers);
+ if (l > 0)
+ {
+ vec_add (dst->unaligned_buffers, src->unaligned_buffers, l);
+ vec_free (src->unaligned_buffers);
+ }
+}
+
+always_inline u32
+vlib_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+
+ size = vlib_buffer_round_size (size);
+ uword * p = hash_get (bm->free_list_by_size, size);
+ return p ? p[0] : ~0;
+}
+
+/* Add buffer free list. */
+static u32
+vlib_buffer_create_free_list_helper (vlib_main_t * vm,
+ u32 n_data_bytes,
+ u32 is_public,
+ u32 is_default,
+ u8 * name)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ vlib_buffer_free_list_t * f;
+
+ if (! is_default && pool_elts (bm->buffer_free_list_pool) == 0)
+ {
+ u32 default_free_free_list_index;
+
+ default_free_free_list_index =
+ vlib_buffer_create_free_list_helper (vm,
+ /* default buffer size */ VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
+ /* is_public */ 1,
+ /* is_default */ 1,
+ (u8 *) "default");
+ ASSERT (default_free_free_list_index == VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+ if (n_data_bytes == VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES && is_public)
+ return default_free_free_list_index;
+ }
+
+ pool_get_aligned (bm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES);
+
+ memset (f, 0, sizeof (f[0]));
+ f->index = f - bm->buffer_free_list_pool;
+ f->n_data_bytes = vlib_buffer_round_size (n_data_bytes);
+ f->min_n_buffers_each_physmem_alloc = 16;
+ f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name);
+
+ /* Setup free buffer template. */
+ f->buffer_init_template.free_list_index = f->index;
+
+ if (is_public)
+ {
+ uword * p = hash_get (bm->free_list_by_size, f->n_data_bytes);
+ if (! p)
+ hash_set (bm->free_list_by_size, f->n_data_bytes, f->index);
+ }
+
+ return f->index;
+}
+
+u32 vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
+ char * fmt, ...)
+{
+ va_list va;
+ u8 * name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ return vlib_buffer_create_free_list_helper (vm, n_data_bytes,
+ /* is_public */ 0,
+ /* is_default */ 0,
+ name);
+}
+
+u32 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes,
+ char * fmt, ...)
+{
+ u32 i = vlib_buffer_get_free_list_with_size (vm, n_data_bytes);
+
+ if (i == ~0)
+ {
+ va_list va;
+ u8 * name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ i = vlib_buffer_create_free_list_helper (vm, n_data_bytes,
+ /* is_public */ 1,
+ /* is_default */ 0,
+ name);
+ }
+
+ return i;
+}
+
+static void
+del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f)
+{
+ u32 i;
+ struct rte_mbuf *mb;
+ vlib_buffer_t *b;
+
+ for (i = 0; i < vec_len (f->unaligned_buffers); i++) {
+ b = vlib_get_buffer (vm, f->unaligned_buffers[i]);
+ mb = ((struct rte_mbuf *)b)-1;
+ rte_pktmbuf_free (mb);
+ }
+ for (i = 0; i < vec_len (f->aligned_buffers); i++) {
+ b = vlib_get_buffer (vm, f->aligned_buffers[i]);
+ mb = ((struct rte_mbuf *)b)-1;
+ rte_pktmbuf_free (mb);
+ }
+ vec_free (f->name);
+ vec_free (f->unaligned_buffers);
+ vec_free (f->aligned_buffers);
+}
+
+/* Add buffer free list. */
+void vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ vlib_buffer_free_list_t * f;
+ u32 merge_index;
+
+ f = vlib_buffer_get_free_list (vm, free_list_index);
+
+ merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes);
+ if (merge_index != ~0 && merge_index != free_list_index)
+ {
+ merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool,
+ merge_index), f);
+ }
+
+ del_free_list (vm, f);
+
+ /* Poison it. */
+ memset (f, 0xab, sizeof (f[0]));
+
+ pool_put (bm->buffer_free_list_pool, f);
+}
+
+/* Make sure free list has at least given number of free buffers. */
+static uword
+fill_free_list (vlib_main_t * vm,
+ vlib_buffer_free_list_t * fl,
+ uword min_free_buffers)
+{
+ vlib_buffer_t * b;
+ int n, i;
+ u32 bi;
+ u32 n_remaining, n_alloc;
+ unsigned socket_id = rte_socket_id ? rte_socket_id() : 0;
+ struct rte_mempool *rmp = vm->buffer_main->pktmbuf_pools[socket_id];
+ struct rte_mbuf *mb;
+
+ /* Too early? */
+ if (PREDICT_FALSE(rmp == 0))
+ return 0;
+
+ trim_aligned (fl);
+
+ /* Already have enough free buffers on free list? */
+ n = min_free_buffers - vec_len (fl->aligned_buffers);
+ if (n <= 0)
+ return min_free_buffers;
+
+ /* Always allocate round number of buffers. */
+ n = round_pow2 (n, BUFFERS_PER_COPY);
+
+ /* Always allocate new buffers in reasonably large sized chunks. */
+ n = clib_max (n, fl->min_n_buffers_each_physmem_alloc);
+
+ vec_validate (vm->mbuf_alloc_list, n-1);
+
+ if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0)
+ return 0;
+
+ _vec_len (vm->mbuf_alloc_list) = n;
+
+ for (i = 0; i < n; i++)
+ {
+ mb = vm->mbuf_alloc_list[i];
+
+ ASSERT(rte_mbuf_refcnt_read(mb) == 0);
+ rte_mbuf_refcnt_set(mb, 1);
+ mb->next = NULL;
+ mb->data_off = RTE_PKTMBUF_HEADROOM;
+ mb->nb_segs = 1;
+
+ b = (vlib_buffer_t *)(mb+1);
+ bi = vlib_get_buffer_index (vm, b);
+
+ vec_add1_aligned (fl->aligned_buffers, bi, sizeof (vlib_copy_unit_t));
+ n_alloc++;
+ n_remaining--;
+
+ vlib_buffer_init_for_free_list (b, fl);
+
+ if (fl->buffer_init_function)
+ fl->buffer_init_function (vm, fl, &bi, 1);
+ }
+
+ fl->n_alloc += n;
+
+ return n;
+}
+
+always_inline uword
+copy_alignment (u32 * x)
+{ return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY; }
+
+static u32
+alloc_from_free_list (vlib_main_t * vm,
+ vlib_buffer_free_list_t * free_list,
+ u32 * alloc_buffers,
+ u32 n_alloc_buffers)
+{
+ u32 * dst, * u_src;
+ uword u_len, n_left;
+ uword n_unaligned_start, n_unaligned_end, n_filled;
+
+ n_left = n_alloc_buffers;
+ dst = alloc_buffers;
+ n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst))
+ & (BUFFERS_PER_COPY - 1));
+
+ n_filled = fill_free_list (vm, free_list, n_alloc_buffers);
+ if (n_filled == 0)
+ return 0;
+
+ n_left = n_filled < n_left ? n_filled : n_left;
+ n_alloc_buffers = n_left;
+
+ if (n_unaligned_start >= n_left)
+ {
+ n_unaligned_start = n_left;
+ n_unaligned_end = 0;
+ }
+ else
+ n_unaligned_end = copy_alignment (dst + n_alloc_buffers);
+
+ fill_unaligned (vm, free_list, n_unaligned_start + n_unaligned_end);
+
+ u_len = vec_len (free_list->unaligned_buffers);
+ u_src = free_list->unaligned_buffers + u_len - 1;
+
+ if (n_unaligned_start)
+ {
+ uword n_copy = n_unaligned_start;
+ if (n_copy > n_left)
+ n_copy = n_left;
+ n_left -= n_copy;
+
+ while (n_copy > 0)
+ {
+ *dst++ = *u_src--;
+ n_copy--;
+ u_len--;
+ }
+
+ /* Now dst should be aligned. */
+ if (n_left > 0)
+ ASSERT (pointer_to_uword (dst) % sizeof (vlib_copy_unit_t) == 0);
+ }
+
+ /* Aligned copy. */
+ {
+ vlib_copy_unit_t * d, * s;
+ uword n_copy;
+
+ if (vec_len(free_list->aligned_buffers) < ((n_left/BUFFERS_PER_COPY)*BUFFERS_PER_COPY))
+ abort();
+
+ n_copy = n_left / BUFFERS_PER_COPY;
+ n_left = n_left % BUFFERS_PER_COPY;
+
+ /* Remove buffers from aligned free list. */
+ _vec_len (free_list->aligned_buffers) -= n_copy * BUFFERS_PER_COPY;
+
+ s = (vlib_copy_unit_t *) vec_end (free_list->aligned_buffers);
+ d = (vlib_copy_unit_t *) dst;
+
+ /* Fast path loop. */
+ while (n_copy >= 4)
+ {
+ d[0] = s[0];
+ d[1] = s[1];
+ d[2] = s[2];
+ d[3] = s[3];
+ n_copy -= 4;
+ s += 4;
+ d += 4;
+ }
+
+ while (n_copy >= 1)
+ {
+ d[0] = s[0];
+ n_copy -= 1;
+ s += 1;
+ d += 1;
+ }
+
+ dst = (void *) d;
+ }
+
+ /* Unaligned copy. */
+ ASSERT (n_unaligned_end == n_left);
+ while (n_left > 0)
+ {
+ *dst++ = *u_src--;
+ n_left--;
+ u_len--;
+ }
+
+ if (! free_list->unaligned_buffers)
+ ASSERT (u_len == 0);
+ else
+ _vec_len (free_list->unaligned_buffers) = u_len;
+
+ return n_alloc_buffers;
+}
+
+/* Allocate a given number of buffers into given array.
+ Returns number actually allocated which will be either zero or
+ number requested. */
+u32 vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+
+ return alloc_from_free_list
+ (vm,
+ pool_elt_at_index (bm->buffer_free_list_pool,
+ VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX),
+ buffers, n_buffers);
+}
+
+u32 vlib_buffer_alloc_from_free_list (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers,
+ u32 free_list_index)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ vlib_buffer_free_list_t * f;
+ f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index);
+ return alloc_from_free_list (vm, f, buffers, n_buffers);
+}
+
+always_inline void
+add_buffer_to_free_list (vlib_main_t * vm,
+ vlib_buffer_free_list_t * f,
+ u32 buffer_index, u8 do_init)
+{
+ vlib_buffer_t * b;
+ b = vlib_get_buffer (vm, buffer_index);
+ if (PREDICT_TRUE(do_init))
+ vlib_buffer_init_for_free_list (b, f);
+ vec_add1_aligned (f->aligned_buffers, buffer_index, sizeof (vlib_copy_unit_t));
+}
+
+always_inline vlib_buffer_free_list_t *
+buffer_get_free_list (vlib_main_t * vm, vlib_buffer_t * b, u32 * index)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ u32 i;
+
+ *index = i = b->free_list_index;
+ return pool_elt_at_index (bm->buffer_free_list_pool, i);
+}
+
+void *vlib_set_buffer_free_callback (vlib_main_t *vm, void *fp)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ void * rv = bm->buffer_free_callback;
+
+ bm->buffer_free_callback = fp;
+ return rv;
+}
+
+static_always_inline void
+vlib_buffer_free_inline (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers,
+ u32 follow_buffer_next)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ vlib_buffer_free_list_t * fl;
+ u32 fi;
+ int i;
+ u32 (*cb)(vlib_main_t * vm, u32 * buffers, u32 n_buffers,
+ u32 follow_buffer_next);
+
+ cb = bm->buffer_free_callback;
+
+ if (PREDICT_FALSE (cb != 0))
+ n_buffers = (*cb)(vm, buffers, n_buffers, follow_buffer_next);
+
+ if (! n_buffers)
+ return;
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t * b;
+ struct rte_mbuf * mb;
+
+ b = vlib_get_buffer (vm, buffers[i]);
+
+ fl = buffer_get_free_list (vm, b, &fi);
+
+ /* The only current use of this callback: multicast recycle */
+ if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0))
+ {
+ int j;
+
+ add_buffer_to_free_list (vm, fl, buffers[i], b->clone_count == 0);
+
+ for (j = 0; j < vec_len (bm->announce_list); j++)
+ {
+ if (fl == bm->announce_list[j])
+ goto already_announced;
+ }
+ vec_add1 (bm->announce_list, fl);
+ already_announced:
+ ;
+ }
+ else
+ {
+ mb = ((struct rte_mbuf *)b)-1;
+ rte_pktmbuf_free (mb);
+ }
+ }
+ if (vec_len(bm->announce_list))
+ {
+ vlib_buffer_free_list_t * fl;
+ for (i = 0; i < vec_len (bm->announce_list); i++)
+ {
+ fl = bm->announce_list[i];
+ fl->buffers_added_to_freelist_function (vm, fl);
+ }
+ _vec_len(bm->announce_list) = 0;
+ }
+}
+
+void vlib_buffer_free (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers)
+{
+ vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ 1);
+}
+
+void vlib_buffer_free_no_next (vlib_main_t * vm,
+ u32 * buffers,
+ u32 n_buffers)
+{
+ vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ 0);
+}
+
+/* Copy template packet data into buffers as they are allocated. */
+__attribute__((unused)) static void
+vlib_packet_template_buffer_init (vlib_main_t * vm,
+ vlib_buffer_free_list_t * fl,
+ u32 * buffers,
+ u32 n_buffers)
+{
+ vlib_packet_template_t * t = uword_to_pointer (fl->buffer_init_function_opaque,
+ vlib_packet_template_t *);
+ uword i;
+
+ for (i = 0; i < n_buffers; i++)
+ {
+ vlib_buffer_t * b = vlib_get_buffer (vm, buffers[i]);
+ ASSERT (b->current_length == vec_len (t->packet_data));
+ memcpy (vlib_buffer_get_current (b), t->packet_data, b->current_length);
+ }
+}
+
+void vlib_packet_template_init (vlib_main_t * vm,
+ vlib_packet_template_t * t,
+ void * packet_data,
+ uword n_packet_data_bytes,
+ uword min_n_buffers_each_physmem_alloc,
+ char * fmt,
+ ...)
+{
+ va_list va;
+ __attribute__((unused)) u8 * name;
+
+ va_start (va, fmt);
+ name = va_format (0, fmt, &va);
+ va_end (va);
+
+ vlib_worker_thread_barrier_sync(vm);
+ memset (t, 0, sizeof (t[0]));
+
+ vec_add (t->packet_data, packet_data, n_packet_data_bytes);
+
+ vlib_worker_thread_barrier_release(vm);
+}
+
+void *
+vlib_packet_template_get_packet (vlib_main_t * vm,
+ vlib_packet_template_t * t,
+ u32 * bi_result)
+{
+ u32 bi;
+ vlib_buffer_t * b;
+
+ if (vlib_buffer_alloc (vm, &bi, 1) != 1)
+ return 0;
+
+ *bi_result = bi;
+
+ b = vlib_get_buffer (vm, bi);
+ memcpy (vlib_buffer_get_current (b),
+ t->packet_data, vec_len(t->packet_data));
+ b->current_length = vec_len(t->packet_data);
+
+ /* Fix up mbuf header length fields */
+ struct rte_mbuf * mb;
+ mb = ((struct rte_mbuf *)b) - 1;
+ mb->data_len = b->current_length;
+ mb->pkt_len = b->current_length;
+
+ return b->data;
+}
+
+/* Append given data to end of buffer, possibly allocating new buffers. */
+u32 vlib_buffer_add_data (vlib_main_t * vm,
+ u32 free_list_index,
+ u32 buffer_index,
+ void * data, u32 n_data_bytes)
+{
+ u32 n_buffer_bytes, n_left, n_left_this_buffer, bi;
+ vlib_buffer_t * b;
+ void * d;
+
+ bi = buffer_index;
+ if (bi == 0
+ && 1 != vlib_buffer_alloc_from_free_list (vm, &bi, 1, free_list_index))
+ goto out_of_buffers;
+
+ d = data;
+ n_left = n_data_bytes;
+ n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, free_list_index);
+
+ b = vlib_get_buffer (vm, bi);
+ b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID;
+
+ /* Get to the end of the chain before we try to append data...*/
+ while (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+ b = vlib_get_buffer (vm, b->next_buffer);
+
+ while (1)
+ {
+ u32 n;
+
+ ASSERT (n_buffer_bytes >= b->current_length);
+ n_left_this_buffer = n_buffer_bytes - (b->current_data + b->current_length);
+ n = clib_min (n_left_this_buffer, n_left);
+ memcpy (vlib_buffer_get_current (b) + b->current_length, d, n);
+ b->current_length += n;
+ n_left -= n;
+ if (n_left == 0)
+ break;
+
+ d += n;
+ if (1 != vlib_buffer_alloc_from_free_list (vm, &b->next_buffer, 1, free_list_index))
+ goto out_of_buffers;
+
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+
+ b = vlib_get_buffer (vm, b->next_buffer);
+ }
+
+ return bi;
+
+ out_of_buffers:
+ clib_error ("out of buffers");
+ return bi;
+}
+
+clib_error_t *
+vlib_buffer_pool_create(vlib_main_t * vm, unsigned num_mbufs,
+ unsigned mbuf_size, unsigned socket_id)
+{
+ vlib_buffer_main_t * bm = vm->buffer_main;
+ vlib_physmem_main_t * vpm = &vm->physmem_main;
+ struct rte_mempool * rmp;
+ uword new_start, new_size;
+ int i;
+
+ if (!rte_mempool_create)
+ return clib_error_return (0, "not linked with DPDK");
+
+ vec_validate_aligned(bm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES);
+
+ /* pool already exists, nothing to do */
+ if (bm->pktmbuf_pools[socket_id])
+ return 0;
+
+ u8 * pool_name = format(0, "mbuf_pool_socket%u%c",socket_id, 0);
+ rmp = rte_mempool_create((char *) pool_name,
+ num_mbufs, mbuf_size, 512,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ socket_id, 0);
+ vec_free(pool_name);
+
+ if (rmp)
+ {
+ new_start = pointer_to_uword(rmp);
+ new_size = rmp->elt_va_end - new_start;
+
+ if (vpm->virtual.size > 0)
+ {
+ ASSERT(new_start != vpm->virtual.start);
+ if (new_start < vpm->virtual.start)
+ {
+ new_size = vpm->virtual.size + vpm->virtual.start - new_start;
+ }
+ else
+ {
+ new_size += new_start - vpm->virtual.start;
+ new_start = vpm->virtual.start;
+ }
+
+ /* check if fits into buffer index range */
+ if (new_size > ( (uword) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES)))
+ rmp = 0;
+ }
+ }
+
+ if (rmp)
+ {
+ bm->pktmbuf_pools[socket_id] = rmp;
+ vpm->virtual.start = new_start;
+ vpm->virtual.size = new_size;
+ vpm->virtual.end = new_start + new_size;
+ return 0;
+ }
+
+ /* no usable pool for this socket, try to use pool from another one */
+ for (i = 0; i < vec_len(bm->pktmbuf_pools); i++)
+ {
+ if(bm->pktmbuf_pools[i])
+ {
+ clib_warning("WARNING: Failed to allocate mempool for CPU socket %u. "
+ "Threads running on socket %u will use socket %u mempool.",
+ socket_id, socket_id, i);
+ bm->pktmbuf_pools[socket_id] = bm->pktmbuf_pools[i];
+ return 0;
+ }
+ }
+
+ return clib_error_return (0, "failed to allocate mempool on socket %u",
+ socket_id);
+}
+
+
+static void vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s)
+{
+ vlib_main_t * vm;
+ vlib_serialize_buffer_main_t * sm;
+ uword n, n_bytes_to_write;
+ vlib_buffer_t * last;
+
+ n_bytes_to_write = s->current_buffer_index;
+ sm = uword_to_pointer (s->data_function_opaque, vlib_serialize_buffer_main_t *);
+ vm = sm->vlib_main;
+
+ ASSERT (sm->tx.max_n_data_bytes_per_chain > 0);
+ if (serialize_stream_is_end_of_stream (s)
+ || sm->tx.n_total_data_bytes + n_bytes_to_write > sm->tx.max_n_data_bytes_per_chain)
+ {
+ vlib_process_t * p = vlib_get_current_process (vm);
+
+ last = vlib_get_buffer (vm, sm->last_buffer);
+ last->current_length = n_bytes_to_write;
+
+ vlib_set_next_frame_buffer (vm, &p->node_runtime, sm->tx.next_index, sm->first_buffer);
+
+ sm->first_buffer = sm->last_buffer = ~0;
+ sm->tx.n_total_data_bytes = 0;
+ }
+
+ else if (n_bytes_to_write == 0 && s->n_buffer_bytes == 0)
+ {
+ ASSERT (sm->first_buffer == ~0);
+ ASSERT (sm->last_buffer == ~0);
+ n = vlib_buffer_alloc_from_free_list (vm, &sm->first_buffer, 1, sm->tx.free_list_index);
+ if (n != 1)
+ serialize_error (m, clib_error_create ("vlib_buffer_alloc_from_free_list fails"));
+ sm->last_buffer = sm->first_buffer;
+ s->n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, sm->tx.free_list_index);
+ }
+
+ if (n_bytes_to_write > 0)
+ {
+ vlib_buffer_t * prev = vlib_get_buffer (vm, sm->last_buffer);
+ n = vlib_buffer_alloc_from_free_list (vm, &sm->last_buffer, 1, sm->tx.free_list_index);
+ if (n != 1)
+ serialize_error (m, clib_error_create ("vlib_buffer_alloc_from_free_list fails"));
+ sm->tx.n_total_data_bytes += n_bytes_to_write;
+ prev->current_length = n_bytes_to_write;
+ prev->next_buffer = sm->last_buffer;
+ prev->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ }
+
+ if (sm->last_buffer != ~0)
+ {
+ last = vlib_get_buffer (vm, sm->last_buffer);
+ s->buffer = vlib_buffer_get_current (last);
+ s->current_buffer_index = 0;
+ ASSERT (last->current_data == s->current_buffer_index);
+ }
+}
+
+static void vlib_serialize_rx (serialize_main_header_t * m, serialize_stream_t * s)
+{
+ vlib_main_t * vm;
+ vlib_serialize_buffer_main_t * sm;
+ vlib_buffer_t * last;
+
+ sm = uword_to_pointer (s->data_function_opaque, vlib_serialize_buffer_main_t *);
+ vm = sm->vlib_main;
+
+ if (serialize_stream_is_end_of_stream (s))
+ return;
+
+ if (sm->last_buffer != ~0)
+ {
+ last = vlib_get_buffer (vm, sm->last_buffer);
+
+ if (last->flags & VLIB_BUFFER_NEXT_PRESENT)
+ sm->last_buffer = last->next_buffer;
+ else
+ {
+ vlib_buffer_free (vm, &sm->first_buffer, /* count */ 1);
+ sm->first_buffer = sm->last_buffer = ~0;
+ }
+ }
+
+ if (sm->last_buffer == ~0)
+ {
+ while (clib_fifo_elts (sm->rx.buffer_fifo) == 0)
+ {
+ sm->rx.ready_one_time_event = vlib_process_create_one_time_event (vm, vlib_current_process (vm), ~0);
+ vlib_process_wait_for_one_time_event (vm, /* no event data */ 0, sm->rx.ready_one_time_event);
+ }
+
+ clib_fifo_sub1 (sm->rx.buffer_fifo, sm->first_buffer);
+ sm->last_buffer = sm->first_buffer;
+ }
+
+ ASSERT (sm->last_buffer != ~0);
+
+ last = vlib_get_buffer (vm, sm->last_buffer);
+ s->current_buffer_index = 0;
+ s->buffer = vlib_buffer_get_current (last);
+ s->n_buffer_bytes = last->current_length;
+}
+
+static void
+serialize_open_vlib_helper (serialize_main_t * m,
+ vlib_main_t * vm,
+ vlib_serialize_buffer_main_t * sm,
+ uword is_read)
+{
+ /* Initialize serialize main but save overflow buffer for re-use between calls. */
+ {
+ u8 * save = m->stream.overflow_buffer;
+ memset (m, 0, sizeof (m[0]));
+ m->stream.overflow_buffer = save;
+ if (save)
+ _vec_len (save) = 0;
+ }
+
+ sm->first_buffer = sm->last_buffer = ~0;
+ if (is_read)
+ clib_fifo_reset (sm->rx.buffer_fifo);
+ else
+ sm->tx.n_total_data_bytes = 0;
+ sm->vlib_main = vm;
+ m->header.data_function = is_read ? vlib_serialize_rx : vlib_serialize_tx;
+ m->stream.data_function_opaque = pointer_to_uword (sm);
+}
+
+void serialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, vlib_serialize_buffer_main_t * sm)
+{ serialize_open_vlib_helper (m, vm, sm, /* is_read */ 0); }
+
+void unserialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, vlib_serialize_buffer_main_t * sm)
+{ serialize_open_vlib_helper (m, vm, sm, /* is_read */ 1); }
+
+u32 serialize_close_vlib_buffer (serialize_main_t * m)
+{
+ vlib_serialize_buffer_main_t * sm
+ = uword_to_pointer (m->stream.data_function_opaque, vlib_serialize_buffer_main_t *);
+ vlib_buffer_t * last;
+ serialize_stream_t * s = &m->stream;
+
+ last = vlib_get_buffer (sm->vlib_main, sm->last_buffer);
+ last->current_length = s->current_buffer_index;
+
+ if (vec_len (s->overflow_buffer) > 0)
+ {
+ sm->last_buffer
+ = vlib_buffer_add_data (sm->vlib_main, sm->tx.free_list_index,
+ sm->last_buffer == ~0 ? 0 : sm->last_buffer,
+ s->overflow_buffer,
+ vec_len (s->overflow_buffer));
+ _vec_len (s->overflow_buffer) = 0;
+ }
+
+ return sm->first_buffer;
+}
+
+void unserialize_close_vlib_buffer (serialize_main_t * m)
+{
+ vlib_serialize_buffer_main_t * sm
+ = uword_to_pointer (m->stream.data_function_opaque, vlib_serialize_buffer_main_t *);
+ if (sm->first_buffer != ~0)
+ vlib_buffer_free_one (sm->vlib_main, sm->first_buffer);
+ clib_fifo_reset (sm->rx.buffer_fifo);
+ if (m->stream.overflow_buffer)
+ _vec_len (m->stream.overflow_buffer) = 0;
+}
+
+static u8 * format_vlib_buffer_free_list (u8 * s, va_list * va)
+{
+ vlib_buffer_free_list_t * f = va_arg (*va, vlib_buffer_free_list_t *);
+ u32 threadnum= va_arg (*va, u32);
+ uword bytes_alloc, bytes_free, n_free, size;
+
+ if (! f)
+ return format (s, "%=7s%=30s%=12s%=12s%=12s%=12s%=12s%=12s",
+ "Thread", "Name", "Index", "Size", "Alloc", "Free", "#Alloc", "#Free");
+
+ size = sizeof (vlib_buffer_t) + f->n_data_bytes;
+ n_free = vec_len (f->aligned_buffers) + vec_len (f->unaligned_buffers);
+ bytes_alloc = size * f->n_alloc;
+ bytes_free = size * n_free;
+
+ s = format (s, "%7d%30s%12d%12d%=12U%=12U%=12d%=12d",
+ threadnum,
+ f->name, f->index, f->n_data_bytes,
+ format_memory_size, bytes_alloc,
+ format_memory_size, bytes_free,
+ f->n_alloc, n_free);
+
+ return s;
+}
+
+static clib_error_t *
+show_buffers (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_buffer_main_t * bm;
+ vlib_buffer_free_list_t * f;
+ vlib_main_t *curr_vm;
+ u32 vm_index = 0;
+
+ vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, 0, 0);
+
+ do {
+ curr_vm = vec_len(vlib_mains) ? vlib_mains[vm_index] : vm;
+ bm = curr_vm->buffer_main;
+
+ pool_foreach (f, bm->buffer_free_list_pool, ({
+ vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, f, vm_index);
+ }));
+
+ vm_index++;
+ } while (vm_index < vec_len(vlib_mains));
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_buffers_command, static) = {
+ .path = "show buffers",
+ .short_help = "Show packet buffer allocation",
+ .function = show_buffers,
+};
+
+#if CLIB_DEBUG > 0
+
+u32 * vlib_buffer_state_validation_lock;
+uword * vlib_buffer_state_validation_hash;
+void * vlib_buffer_state_heap;
+
+static clib_error_t *
+buffer_state_validation_init (vlib_main_t * vm)
+{
+ void * oldheap;
+
+ vlib_buffer_state_heap = mheap_alloc (0, 10<<20);
+
+ oldheap = clib_mem_set_heap (vlib_buffer_state_heap);
+
+ vlib_buffer_state_validation_hash = hash_create (0, sizeof(uword));
+ vec_validate_aligned (vlib_buffer_state_validation_lock, 0,
+ CLIB_CACHE_LINE_BYTES);
+ clib_mem_set_heap (oldheap);
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (buffer_state_validation_init);
+#endif
diff --git a/vlib/vlib/error.c b/vlib/vlib/error.c
new file mode 100644
index 00000000000..59b89cefc3a
--- /dev/null
+++ b/vlib/vlib/error.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * error.c: VLIB error handler
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/heap.h>
+
+uword
+vlib_error_drop_buffers (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 * buffers,
+ u32 next_buffer_stride,
+ u32 n_buffers,
+ u32 next_index,
+ u32 drop_error_node,
+ u32 drop_error_code)
+{
+ u32 n_left_this_frame, n_buffers_left, * args, n_args_left;
+ vlib_error_t drop_error;
+
+ drop_error = vlib_error_set (drop_error_node, drop_error_code);
+
+ n_buffers_left = n_buffers;
+ while (n_buffers_left > 0)
+ {
+ vlib_get_next_frame (vm, node, next_index, args, n_args_left);
+
+ n_left_this_frame = clib_min (n_buffers_left, n_args_left);
+ n_buffers_left -= n_left_this_frame;
+ n_args_left -= n_left_this_frame;
+
+ while (n_left_this_frame >= 4)
+ {
+ u32 bi0, bi1, bi2, bi3;
+ vlib_buffer_t * b0, * b1, * b2, * b3;
+
+ args[0] = bi0 = buffers[0];
+ args[1] = bi1 = buffers[1];
+ args[2] = bi2 = buffers[2];
+ args[3] = bi3 = buffers[3];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+ b2 = vlib_get_buffer (vm, bi2);
+ b3 = vlib_get_buffer (vm, bi3);
+
+ b0->error = drop_error;
+ b1->error = drop_error;
+ b2->error = drop_error;
+ b3->error = drop_error;
+
+ buffers += 4;
+ args += 4;
+ n_left_this_frame -= 4;
+ }
+
+ while (n_left_this_frame >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+
+ args[0] = bi0 = buffers[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b0->error = drop_error;
+
+ buffers += 1;
+ args += 1;
+ n_left_this_frame -= 1;
+ }
+
+ vlib_put_next_frame (vm, node, next_index, n_args_left);
+ }
+
+ return n_buffers;
+}
+
+/* Convenience node to drop a vector of buffers with a "misc error". */
+static uword
+misc_drop_buffers (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return vlib_error_drop_buffers (vm, node,
+ vlib_frame_args (frame),
+ /* buffer stride */ 1,
+ frame->n_vectors,
+ /* next */ 0,
+ node->node_index,
+ /* error */ 0);
+}
+
+static char * misc_drop_buffers_error_strings[] = {
+ [0] = "misc. errors",
+};
+
+VLIB_REGISTER_NODE (misc_drop_buffers_node,static) = {
+ .function = misc_drop_buffers,
+ .name = "misc-drop-buffers",
+ .vector_size = sizeof (u32),
+ .n_errors = 1,
+ .n_next_nodes = 1,
+ .next_nodes = {
+ "error-drop",
+ },
+ .error_strings = misc_drop_buffers_error_strings,
+};
+
+/* Reserves given number of error codes for given node. */
+void vlib_register_errors (vlib_main_t * vm,
+ u32 node_index,
+ u32 n_errors,
+ char * error_strings[])
+{
+ vlib_error_main_t * em = &vm->error_main;
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+ uword l;
+
+ /* Free up any previous error strings. */
+ if (n->n_errors > 0)
+ heap_dealloc (em->error_strings_heap, n->error_heap_handle);
+
+ n->n_errors = n_errors;
+ n->error_strings = error_strings;
+
+ if (n_errors == 0)
+ return;
+
+ n->error_heap_index =
+ heap_alloc (em->error_strings_heap, n_errors,
+ n->error_heap_handle);
+
+ l = vec_len (em->error_strings_heap);
+
+ memcpy (vec_elt_at_index (em->error_strings_heap, n->error_heap_index),
+ error_strings,
+ n_errors * sizeof (error_strings[0]));
+
+ /* Allocate a counter/elog type for each error. */
+ vec_validate (em->counters, l - 1);
+ vec_validate (vm->error_elog_event_types, l - 1);
+
+ /* Zero counters for re-registrations of errors. */
+ if (n->error_heap_index + n_errors <= vec_len (em->counters_last_clear))
+ memcpy (em->counters + n->error_heap_index,
+ em->counters_last_clear + n->error_heap_index,
+ n_errors * sizeof (em->counters[0]));
+ else
+ memset (em->counters + n->error_heap_index,
+ 0,
+ n_errors * sizeof (em->counters[0]));
+
+ {
+ elog_event_type_t t;
+ uword i;
+
+ memset (&t, 0, sizeof (t));
+ for (i = 0; i < n_errors; i++)
+ {
+ t.format = (char *) format (0, "%v %s: %%d",
+ n->name,
+ error_strings[i]);
+ vm->error_elog_event_types[n->error_heap_index + i] = t;
+ }
+ }
+}
+
+static clib_error_t *
+show_errors (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_error_main_t * em = &vm->error_main;
+ vlib_node_t * n;
+ u32 code, i, ni;
+ u64 c;
+
+ vlib_cli_output (vm, "%=16s%=40s%=20s", "Count", "Node", "Reason");
+
+ for (ni = 0; ni < vec_len (vm->node_main.nodes); ni++)
+ {
+ n = vlib_get_node (vm, ni);
+ for (code = 0; code < n->n_errors; code++)
+ {
+ i = n->error_heap_index + code;
+ c = em->counters[i];
+ if (i < vec_len (em->counters_last_clear))
+ c -= em->counters_last_clear[i];
+
+ if (c == 0)
+ continue;
+
+ vlib_cli_output (vm, "%16Ld%=40v%s", c, n->name, em->error_strings_heap[i]);
+ }
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (cli_show_errors, static) = {
+ .path = "show errors",
+ .short_help = "Show error counts",
+ .function = show_errors,
+};
+
+VLIB_CLI_COMMAND (cli_show_node_counters, static) = {
+ .path = "show node counters",
+ .short_help = "Show node counters",
+ .function = show_errors,
+};
+
+static clib_error_t *
+clear_error_counters (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_error_main_t * em = &vm->error_main;
+ u32 i;
+
+ vec_validate (em->counters_last_clear, vec_len (em->counters) - 1);
+ for (i = 0; i < vec_len (em->counters); i++)
+ em->counters_last_clear[i] = em->counters[i];
+ return 0;
+}
+
+VLIB_CLI_COMMAND (cli_clear_error_counters, static) = {
+ .path = "clear errors",
+ .short_help = "Clear error counters",
+ .function = clear_error_counters,
+};
+
+VLIB_CLI_COMMAND (cli_clear_node_counters, static) = {
+ .path = "clear node counters",
+ .short_help = "Clear node counters",
+ .function = clear_error_counters,
+};
diff --git a/vlib/vlib/error.h b/vlib/vlib/error.h
new file mode 100644
index 00000000000..4bf0b926718
--- /dev/null
+++ b/vlib/vlib/error.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * error.h: drop/punt error packets
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_error_h
+#define included_vlib_error_h
+
+/* Combined 16 bit node & 16 bit code as 32 bit number. */
+typedef u32 vlib_error_t;
+
+always_inline u32
+vlib_error_get_node (vlib_error_t e)
+{ return e >> 12; }
+
+always_inline u32
+vlib_error_get_code (vlib_error_t e)
+{ return e & 0xfff; }
+
+always_inline vlib_error_t
+vlib_error_set (u32 node_index, u32 code)
+{
+ ASSERT (node_index < (1 << 20));
+ ASSERT (code < (1 << 12));
+ return (node_index << 12) | code;
+}
+
+always_inline vlib_error_t
+vlib_error_set_code (vlib_error_t e, u32 code)
+{
+ ASSERT (vlib_error_get_code (e) == 0);
+ ASSERT (code < (1 << 12));
+ e |= code;
+ return e;
+}
+
+typedef struct {
+ /* Error counters. */
+ u64 * counters;
+
+ /* Counter values as of last counter clear. */
+ u64 * counters_last_clear;
+
+ /* Error name strings in heap. Heap index
+ indexes counter vector. */
+ char ** error_strings_heap;
+} vlib_error_main_t;
+
+/* Per node error registration. */
+void vlib_register_errors (struct vlib_main_t * vm,
+ u32 node_index,
+ u32 n_errors,
+ char * error_strings[]);
+
+#endif /* included_vlib_error_h */
diff --git a/vlib/vlib/error_funcs.h b/vlib/vlib/error_funcs.h
new file mode 100644
index 00000000000..acdd5d2d898
--- /dev/null
+++ b/vlib/vlib/error_funcs.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * error_funcs.h: VLIB error handling
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_error_funcs_h
+#define included_vlib_error_funcs_h
+
+#include <vlib/node_funcs.h>
+
+always_inline void
+vlib_error_elog_count (vlib_main_t * vm, uword counter, uword increment)
+{
+ elog_main_t * em = &vm->elog_main;
+ if (VLIB_ELOG_MAIN_LOOP > 0 && increment > 0)
+ elog (em, vec_elt_at_index (vm->error_elog_event_types, counter), increment);
+}
+
+always_inline void
+vlib_error_count (vlib_main_t * vm, uword node_index,
+ uword counter, uword increment)
+{
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+ vlib_error_main_t * em = &vm->error_main;
+
+ ASSERT (counter < n->n_errors);
+ counter += n->error_heap_index;
+
+ ASSERT (counter < vec_len (em->counters));
+ em->counters[counter] += increment;
+
+ vlib_error_elog_count (vm, counter, increment);
+}
+
+/* Drop all buffers in frame with given error code. */
+uword
+vlib_error_drop_buffers (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 * buffers,
+ u32 next_buffer_stride,
+ u32 n_buffers,
+ u32 error_next_index,
+ u32 error_node,
+ u32 error_code);
+
+#endif /* included_vlib_error_funcs_h */
diff --git a/vlib/vlib/format.c b/vlib/vlib/format.c
new file mode 100644
index 00000000000..3c77d8dbd18
--- /dev/null
+++ b/vlib/vlib/format.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * format.c: generic network formatting/unformating
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+u8 * format_vlib_rx_tx (u8 * s, va_list * args)
+{
+ vlib_rx_or_tx_t r = va_arg (*args, vlib_rx_or_tx_t);
+ char * t;
+
+ switch (r)
+ {
+ case VLIB_RX: t = "rx"; break;
+ case VLIB_TX: t = "tx"; break;
+ default: t = "INVALID"; break;
+ }
+
+ vec_add (s, t, strlen (t));
+ return s;
+}
+
+u8 * format_vlib_read_write (u8 * s, va_list * args)
+{
+ vlib_rx_or_tx_t r = va_arg (*args, vlib_rx_or_tx_t);
+ char * t;
+
+ switch (r)
+ {
+ case VLIB_READ: t = "read"; break;
+ case VLIB_WRITE: t = "write"; break;
+ default: t = "INVALID"; break;
+ }
+
+ vec_add (s, t, strlen (t));
+ return s;
+}
+
+/* Formats buffer data as printable ascii or as hex. */
+u8 * format_vlib_buffer_data (u8 * s, va_list * args)
+{
+ u8 * data = va_arg (*args, u8 *);
+ u32 n_data_bytes = va_arg (*args, u32);
+ u32 i, is_printable;
+
+ is_printable = 1;
+ for (i = 0; i < n_data_bytes && is_printable; i++)
+ {
+ u8 c = data[i];
+ if (c < 0x20)
+ is_printable = 0;
+ else if (c >= 0x7f)
+ is_printable = 0;
+ }
+
+ if (is_printable)
+ vec_add (s, data, n_data_bytes);
+ else
+ s = format (s, "%U", format_hex_bytes, data, n_data_bytes);
+
+ return s;
+}
+
+/* Enable/on => 1; disable/off => 0. */
+uword unformat_vlib_enable_disable (unformat_input_t * input, va_list * args)
+{
+ int * result = va_arg (*args, int *);
+ int enable;
+
+ if (unformat (input, "enable") || unformat (input, "on"))
+ enable = 1;
+ else if (unformat (input, "disable") || unformat (input, "off"))
+ enable = 0;
+ else
+ return 0;
+
+ *result = enable;
+ return 1;
+}
+
+/* rx/tx => VLIB_RX/VLIB_TX. */
+uword unformat_vlib_rx_tx (unformat_input_t * input, va_list * args)
+{
+ int * result = va_arg (*args, int *);
+ if (unformat (input, "rx"))
+ *result = VLIB_RX;
+ else if (unformat (input, "tx"))
+ *result = VLIB_TX;
+ else
+ return 0;
+ return 1;
+}
+
+/* Parse an int either %d or 0x%x. */
+uword unformat_vlib_number (unformat_input_t * input, va_list * args)
+{
+ int * result = va_arg (*args, int *);
+
+ return (unformat (input, "0x%x", result)
+ || unformat (input, "%d", result));
+}
+
+/* Parse a-zA-Z0-9_ token and hash to value. */
+uword unformat_vlib_number_by_name (unformat_input_t * input, va_list * args)
+{
+ uword * hash = va_arg (*args, uword *);
+ int * result = va_arg (*args, int *);
+ uword * p;
+ u8 * token;
+ int i;
+
+ if (! unformat_user (input, unformat_token, "a-zA-Z0-9_", &token))
+ return 0;
+
+ /* Null terminate. */
+ if (vec_len (token) > 0 &&
+ token[vec_len (token) - 1] != 0)
+ vec_add1 (token, 0);
+
+ /* Check for exact match. */
+ p = hash_get_mem (hash, token);
+ if (p)
+ goto done;
+
+ /* Convert to upper case & try match. */
+ for (i = 0; i < vec_len (token); i++)
+ if (token[i] >= 'a' && token[i] <= 'z')
+ token[i] = 'A' + token[i] - 'a';
+ p = hash_get_mem (hash, token);
+
+ done:
+ vec_free (token);
+ if (p)
+ *result = p[0];
+ return p != 0;
+}
diff --git a/vlib/vlib/format_funcs.h b/vlib/vlib/format_funcs.h
new file mode 100644
index 00000000000..02d8a555d78
--- /dev/null
+++ b/vlib/vlib/format_funcs.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * format_funcs.h: VLIB formatting/unformating
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_format_h
+#define included_vlib_format_h
+
+/* Format vlib_rx_or_tx_t/vlib_read_or_write_t enum as string. */
+u8 * format_vlib_rx_tx (u8 * s, va_list * args);
+u8 * format_vlib_read_write (u8 * s, va_list * args);
+
+/* Formats buffer data as printable ascii or as hex. */
+u8 * format_vlib_buffer_data (u8 * s, va_list * args);
+
+/* Enable/on => 1; disable/off => 0. */
+uword unformat_vlib_enable_disable (unformat_input_t * input, va_list * args);
+
+/* rx/tx => VLIB_RX/VLIB_TX. */
+uword unformat_vlib_rx_tx (unformat_input_t * input, va_list * args);
+
+/* Parse a-zA-Z0-9_ token and hash to value. */
+uword unformat_vlib_number_by_name (unformat_input_t * input, va_list * args);
+
+/* Parse an int either %d or 0x%x. */
+uword unformat_vlib_number (unformat_input_t * input, va_list * args);
+
+/* Flag to format_vlib_*_header functions to tell them not to recurse
+ into the next layer's header. For example, tells format_vlib_ethernet_header
+ not to format ip header. */
+#define FORMAT_VLIB_HEADER_NO_RECURSION (~0)
+
+#endif /* included_vlib_format_h */
diff --git a/vlib/vlib/global_funcs.h b/vlib/vlib/global_funcs.h
new file mode 100644
index 00000000000..406ce7d71b6
--- /dev/null
+++ b/vlib/vlib/global_funcs.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * global_funcs.h: global data structure access functions
+ */
+
+#ifndef included_vlib_global_funcs_h_
+#define included_vlib_global_funcs_h_
+
+always_inline vlib_main_t *
+vlib_get_main (void)
+{
+ vlib_main_t * vm;
+ vm = vlib_mains ? vlib_mains[os_get_cpu_number()] : &vlib_global_main;
+ ASSERT(vm);
+ return vm;
+}
+
+always_inline vlib_thread_main_t *
+vlib_get_thread_main()
+{
+ return &vlib_thread_main;
+}
+
+#endif /* included_vlib_global_funcs_h_ */
diff --git a/vlib/vlib/init.c b/vlib/vlib/init.c
new file mode 100644
index 00000000000..3991c800147
--- /dev/null
+++ b/vlib/vlib/init.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * init.c: mechanism for functions to be called at init/exit.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+clib_error_t *
+vlib_call_init_exit_functions (vlib_main_t * vm,
+ _vlib_init_function_list_elt_t *head,
+ int call_once)
+{
+ clib_error_t * error = 0;
+ _vlib_init_function_list_elt_t * i;
+
+ i = head;
+ while (i)
+ {
+ if (call_once && !hash_get (vm->init_functions_called, i->f))
+ {
+ if (call_once)
+ hash_set1 (vm->init_functions_called, i->f);
+ error = i->f (vm);
+ if (error)
+ return error;
+ }
+ i = i->next_init_function;
+ }
+ return error;
+}
+
+clib_error_t * vlib_call_all_init_functions (vlib_main_t * vm)
+{
+ /* Call dummy functions to make sure purely static modules are
+ linked in. */
+#define _(f) vlib_##f##_reference ();
+ foreach_vlib_module_reference;
+#undef _
+
+ return vlib_call_init_exit_functions
+ (vm, vm->init_function_registrations, 1 /* call_once */);
+}
+
+clib_error_t * vlib_call_all_main_loop_enter_functions (vlib_main_t * vm)
+{
+ return vlib_call_init_exit_functions
+ (vm, vm->main_loop_enter_function_registrations, 1 /* call_once */);
+}
+
+clib_error_t * vlib_call_all_main_loop_exit_functions (vlib_main_t * vm)
+{
+ return vlib_call_init_exit_functions
+ (vm, vm->main_loop_exit_function_registrations, 1 /* call_once */);
+}
+
+clib_error_t * vlib_call_all_config_functions (vlib_main_t * vm,
+ unformat_input_t * input,
+ int is_early)
+{
+ clib_error_t * error = 0;
+ vlib_config_function_runtime_t * c, ** all;
+ uword * hash = 0, * p;
+ uword i;
+
+ hash = hash_create_string (0, sizeof (uword));
+ all = 0;
+
+ c = vm->config_function_registrations;
+
+ while (c)
+ {
+ hash_set_mem (hash, c->name, vec_len (all));
+ vec_add1 (all, c);
+ unformat_init (&c->input, 0, 0);
+ c = c->next_registration;
+ }
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ u8 * s, * v;
+
+ if (! unformat (input, "%s %v", &s, &v)
+ || ! (p = hash_get_mem (hash, s)))
+ {
+ error = clib_error_create ("unknown input `%s %v'", s, v);
+ goto done;
+ }
+
+ c = all[p[0]];
+ if (vec_len (c->input.buffer) > 0)
+ vec_add1 (c->input.buffer, ' ');
+ vec_add (c->input.buffer, v, vec_len (v));
+ vec_free (v);
+ vec_free (s);
+ }
+
+ for (i = 0; i < vec_len (all); i++)
+ {
+ c = all[i];
+
+ /* Is this an early config? Are we doing early configs? */
+ if (is_early ^ c->is_early)
+ continue;
+
+ /* Already called? */
+ if (hash_get (vm->init_functions_called, c->function))
+ continue;
+ hash_set1 (vm->init_functions_called, c->function);
+
+ error = c->function (vm, &c->input);
+ if (error)
+ goto done;
+ }
+
+ done:
+ for (i = 0; i < vec_len (all); i++)
+ {
+ c = all[i];
+ unformat_free (&c->input);
+ }
+ vec_free (all);
+ hash_free (hash);
+ return error;
+}
diff --git a/vlib/vlib/init.h b/vlib/vlib/init.h
new file mode 100644
index 00000000000..9d940d0745f
--- /dev/null
+++ b/vlib/vlib/init.h
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * init.h: mechanism for functions to be called at init/exit.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_init_h
+#define included_vlib_init_h
+
+#include <vppinfra/error.h>
+#include <vppinfra/format.h>
+#include <vppinfra/hash.h>
+
+/* Init/exit functions: called at start/end of main routine. Init
+ functions are typically used to register and setup packet
+ processing nodes. */
+
+typedef clib_error_t * (vlib_init_function_t) (struct vlib_main_t * vm);
+
+typedef struct _vlib_init_function_list_elt {
+ struct _vlib_init_function_list_elt * next_init_function;
+ vlib_init_function_t * f;
+} _vlib_init_function_list_elt_t;
+
+/* Configuration functions: called with configuration input just before
+ main polling loop starts. */
+typedef clib_error_t * (vlib_config_function_t) (struct vlib_main_t * vm,
+ unformat_input_t * input);
+
+typedef struct vlib_config_function_runtime_t {
+ /* Function to call. Set to null once function has already been called. */
+ vlib_config_function_t * function;
+
+ /* Input for function. */
+ unformat_input_t input;
+
+ /* next config function registration */
+ struct vlib_config_function_runtime_t * next_registration;
+
+ /* To be invoked as soon as the clib heap is available */
+ u8 is_early;
+
+ /* Name used to distinguish input on command line. */
+ char name[32];
+} vlib_config_function_runtime_t;
+
+#define _VLIB_INIT_FUNCTION_SYMBOL(x, type) \
+ _vlib_##type##_function_##x
+
+#define VLIB_INIT_FUNCTION_SYMBOL(x) \
+ _VLIB_INIT_FUNCTION_SYMBOL(x, init)
+#define VLIB_MAIN_LOOP_ENTER_FUNCTION_SYMBOL(x) \
+ _VLIB_INIT_FUNCTION_SYMBOL(x, main_loop_enter)
+#define VLIB_MAIN_LOOP_EXIT_FUNCTION_SYMBOL(x) \
+ _VLIB_INIT_FUNCTION_SYMBOL(x, main_loop_exit)
+#define VLIB_CONFIG_FUNCTION_SYMBOL(x) \
+ _VLIB_INIT_FUNCTION_SYMBOL(x, config)
+
+/* Declaration is global (e.g. not static) so that init functions can
+ be called from other modules to resolve init function depend. */
+
+#define VLIB_DECLARE_INIT_FUNCTION(x, tag) \
+vlib_init_function_t * _VLIB_INIT_FUNCTION_SYMBOL (x, tag) = x; \
+static void __vlib_add_##tag##_function_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_##tag##_function_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ static _vlib_init_function_list_elt_t _vlib_init_function; \
+ _vlib_init_function.next_init_function \
+ = vm->tag##_function_registrations; \
+ vm->tag##_function_registrations = &_vlib_init_function; \
+ _vlib_init_function.f = &x; \
+}
+
+#define VLIB_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,init)
+
+#define VLIB_MAIN_LOOP_ENTER_FUNCTION(x) \
+ VLIB_DECLARE_INIT_FUNCTION(x,main_loop_enter)
+#define VLIB_MAIN_LOOP_EXIT_FUNCTION(x) \
+VLIB_DECLARE_INIT_FUNCTION(x,main_loop_exit)
+
+#define VLIB_CONFIG_FUNCTION(x,n,...) \
+ __VA_ARGS__ vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL(x); \
+static void __vlib_add_config_function_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_config_function_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ VLIB_CONFIG_FUNCTION_SYMBOL(x).next_registration \
+ = vm->config_function_registrations; \
+ vm->config_function_registrations \
+ = &VLIB_CONFIG_FUNCTION_SYMBOL(x); \
+} \
+ vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL (x) \
+ = { \
+ .name = n, \
+ .function = x, \
+ .is_early = 0, \
+ }
+
+#define VLIB_EARLY_CONFIG_FUNCTION(x,n,...) \
+ __VA_ARGS__ vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL(x); \
+static void __vlib_add_config_function_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_config_function_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ VLIB_CONFIG_FUNCTION_SYMBOL(x).next_registration \
+ = vm->config_function_registrations; \
+ vm->config_function_registrations \
+ = &VLIB_CONFIG_FUNCTION_SYMBOL(x); \
+} \
+ vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL (x) \
+ = { \
+ .name = n, \
+ .function = x, \
+ .is_early = 1, \
+ }
+
+/* Call given init function: used for init function dependencies. */
+#define vlib_call_init_function(vm, x) \
+ ({ \
+ extern vlib_init_function_t * VLIB_INIT_FUNCTION_SYMBOL (x); \
+ vlib_init_function_t * _f = VLIB_INIT_FUNCTION_SYMBOL (x); \
+ clib_error_t * _error = 0; \
+ if (! hash_get (vm->init_functions_called, _f)) \
+ { \
+ hash_set1 (vm->init_functions_called, _f); \
+ _error = _f (vm); \
+ } \
+ _error; \
+ })
+
+#define vlib_call_post_graph_init_function(vm, x) \
+ ({ \
+ extern vlib_init_function_t * VLIB_POST_GRAPH_INIT_FUNCTION_SYMBOL (x); \
+ vlib_init_function_t * _f = VLIB_POST_GRAPH_INIT_FUNCTION_SYMBOL (x); \
+ clib_error_t * _error = 0; \
+ if (! hash_get (vm->init_functions_called, _f)) \
+ { \
+ hash_set1 (vm->init_functions_called, _f); \
+ _error = _f (vm); \
+ } \
+ _error; \
+ })
+
+#define vlib_call_config_function(vm, x) \
+ ({ \
+ vlib_config_function_runtime_t * _r; \
+ clib_error_t * _error = 0; \
+ extern vlib_config_function_runtime_t \
+ VLIB_CONFIG_FUNCTION_SYMBOL (x); \
+ \
+ _r = &VLIB_CONFIG_FUNCTION_SYMBOL (x); \
+ if (! hash_get (vm->init_functions_called, _r->function)) \
+ { \
+ hash_set1 (vm->init_functions_called, _r->function); \
+ _error = _r->function (vm, &_r->input); \
+ } \
+ _error; \
+ })
+
+/* External functions. */
+clib_error_t * vlib_call_all_init_functions (struct vlib_main_t * vm);
+clib_error_t * vlib_call_all_config_functions (struct vlib_main_t * vm,
+ unformat_input_t * input,
+ int is_early);
+clib_error_t * vlib_call_all_main_loop_enter_functions (struct vlib_main_t * vm);
+clib_error_t * vlib_call_all_main_loop_exit_functions (struct vlib_main_t * vm);
+clib_error_t *
+vlib_call_init_exit_functions (struct vlib_main_t * vm,
+ _vlib_init_function_list_elt_t *head,
+ int call_once);
+
+#define foreach_vlib_module_reference \
+ _ (node_cli) \
+ _ (trace_cli)
+
+/* Dummy function to get node_cli.c linked in. */
+#define _(x) void vlib_##x##_reference (void);
+foreach_vlib_module_reference
+#undef _
+
+#endif /* included_vlib_init_h */
diff --git a/vlib/vlib/lex.c b/vlib/vlib/lex.c
new file mode 100644
index 00000000000..de650900c11
--- /dev/null
+++ b/vlib/vlib/lex.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vlib/lex.h>
+
+vlib_lex_main_t vlib_lex_main;
+
+#define LEX_DEBUG 0
+
+u8 * format_vlib_lex_token (u8 * s, va_list * args)
+{
+ vlib_lex_main_t *lm = va_arg (*args, vlib_lex_main_t *);
+ vlib_lex_token_t *t = va_arg (*args, vlib_lex_token_t *);
+
+ if (t->token == VLIB_LEX_word)
+ s = format (s, "%s", t->value.as_pointer);
+ else
+ s = format (s, "%s", lm->lex_token_names[t->token]);
+ return s;
+}
+
+void vlib_lex_get_token (vlib_lex_main_t * lm, vlib_lex_token_t * rv)
+{
+ u8 c;
+ vlib_lex_table_t *t;
+ vlib_lex_table_entry_t *e;
+ uword tv;
+
+ if (PREDICT_FALSE (lm->pushback_sp >= 0))
+ {
+ rv[0] = lm->pushback_vector [lm->pushback_sp--];
+ return;
+ }
+
+ rv->value.as_uword = ~0;
+
+ while (1)
+ {
+ if (PREDICT_FALSE(lm->current_index >= vec_len (lm->input_vector)))
+ {
+ rv->token = VLIB_LEX_eof;
+ return;
+ }
+
+ t = vec_elt_at_index (lm->lex_tables, lm->current_table_index);
+ c = (lm->input_vector [lm->current_index++]) & 0x7f;
+ e = &t->entries [c];
+ lm->current_table_index = e->next_table_index;
+
+ switch (e->action)
+ {
+ case VLIB_LEX_IGNORE:
+ continue;
+
+ case VLIB_LEX_START_NUMBER:
+ lm->current_token_value = 0;
+ /* fallthru */
+
+ case VLIB_LEX_ADD_TO_NUMBER:
+ lm->current_number_base = e->token;
+ lm->current_token_value *= lm->current_number_base;
+ tv = c - '0';
+ if (tv >= lm->current_number_base)
+ {
+ tv = 10 + c - 'A';
+ if (tv >= lm->current_number_base)
+ tv = 10 + c - 'a';
+ }
+ lm->current_token_value += tv;
+ continue;
+
+ case VLIB_LEX_ADD_TO_TOKEN:
+ vec_add1(lm->token_buffer, c);
+ continue;
+
+ case VLIB_LEX_KEYWORD_CHECK: {
+ uword * p;
+
+ vec_add1 (lm->token_buffer, 0);
+
+ /* It's either a keyword or just a word. */
+ p = hash_get_mem (lm->lex_keywords, lm->token_buffer);
+ if (p)
+ {
+ rv->token = p[0];
+ if (LEX_DEBUG > 0)
+ clib_warning ("keyword '%s' token %s",
+ lm->token_buffer,
+ lm->lex_token_names[rv->token]);
+ }
+ else
+ {
+ /* it's a WORD */
+ rv->token = VLIB_LEX_word;
+ rv->value.as_pointer = vec_dup (lm->token_buffer);
+ if (LEX_DEBUG > 0)
+ clib_warning ("%s, value '%s'",
+ lm->lex_token_names[VLIB_LEX_word],
+ rv->value.as_pointer);
+ }
+ _vec_len (lm->token_buffer) = 0;
+
+ /* Rescan the character which terminated the keyword/word. */
+ lm->current_index--;
+ return;
+ }
+
+ case VLIB_LEX_RETURN_AND_RESCAN:
+ ASSERT(lm->current_index);
+ lm->current_index--;
+ /* note flow-through */
+
+ case VLIB_LEX_RETURN:
+ rv->token = e->token;
+ rv->value.as_uword = lm->current_token_value;
+ lm->current_token_value = ~0;
+ if (LEX_DEBUG > 0)
+ {
+ clib_warning ("table %s char '%c'(0x%02x) next table %s return %s",
+ t->name, c, c, lm->lex_tables[e->next_table_index].name,
+ lm->lex_token_names[e->token]);
+ if (rv->token == VLIB_LEX_number)
+ clib_warning (" numeric value 0x%x (%d)", rv->value,
+ rv->value);
+ }
+ return;
+ }
+ }
+}
+
+u16 vlib_lex_add_token (vlib_lex_main_t *lm, char *token_name)
+{
+ uword *p;
+ u16 rv;
+
+ p = hash_get_mem (lm->lex_tokens_by_name, token_name);
+
+ if (p)
+ return p[0];
+
+ rv = vec_len (lm->lex_token_names);
+ hash_set_mem (lm->lex_tokens_by_name, token_name, rv);
+ vec_add1 (lm->lex_token_names, token_name);
+
+ return rv;
+}
+
+static u16 add_keyword (vlib_lex_main_t *lm, char *keyword, char *token_name)
+{
+ uword *p;
+ u16 token;
+
+ p = hash_get_mem (lm->lex_keywords, keyword);
+
+ ASSERT (p == 0);
+
+ token = vlib_lex_add_token (lm, token_name);
+
+ hash_set_mem (lm->lex_keywords, keyword, token);
+ return token;
+}
+
+u16 vlib_lex_find_or_add_keyword (vlib_lex_main_t *lm, char *keyword, char *token_name)
+{
+ uword * p = hash_get_mem (lm->lex_keywords, keyword);
+ return p ? p[0] : add_keyword (lm, keyword, token_name);
+}
+
+void vlib_lex_set_action_range (u32 table_index, u8 lo, u8 hi, u16 action,
+ u16 token, u32 next_table_index)
+{
+ int i;
+ vlib_lex_main_t *lm = &vlib_lex_main;
+ vlib_lex_table_t *t = pool_elt_at_index (lm->lex_tables, table_index);
+
+ for (i = lo; i <= hi; i++)
+ {
+ ASSERT (i < ARRAY_LEN (t->entries));
+ t->entries[i].action = action;
+ t->entries[i].token = token;
+ t->entries[i].next_table_index = next_table_index;
+ }
+}
+
+u16 vlib_lex_add_table (char *name)
+{
+ vlib_lex_main_t *lm = &vlib_lex_main;
+ vlib_lex_table_t *t;
+ uword *p;
+
+ p = hash_get_mem (lm->lex_tables_by_name, name);
+
+ ASSERT(p == 0);
+
+ pool_get_aligned (lm->lex_tables, t, CLIB_CACHE_LINE_BYTES);
+
+ t->name = name;
+
+ hash_set_mem (lm->lex_tables_by_name, name, t - lm->lex_tables);
+
+ vlib_lex_set_action_range (t - lm->lex_tables, 1, 0x7F, VLIB_LEX_IGNORE, ~0,
+ t - lm->lex_tables);
+
+ vlib_lex_set_action_range (t - lm->lex_tables, 0, 0, VLIB_LEX_RETURN, VLIB_LEX_eof,
+ t - lm->lex_tables);
+
+ return t - lm->lex_tables;
+}
+
+void vlib_lex_reset (vlib_lex_main_t *lm, u8 *input_vector)
+{
+ if (lm->pushback_vector)
+ _vec_len (lm->pushback_vector) = 0;
+ lm->pushback_sp = -1;
+
+ lm->input_vector = input_vector;
+ lm->current_index = 0;
+}
+
+static clib_error_t * lex_onetime_init (vlib_main_t * vm)
+{
+ vlib_lex_main_t *lm = &vlib_lex_main;
+
+ lm->lex_tables_by_name = hash_create_string (0, sizeof (uword));
+ lm->lex_tokens_by_name = hash_create_string (0, sizeof (uword));
+ lm->lex_keywords = hash_create_string (0, sizeof (uword));
+ lm->pushback_sp = -1;
+
+#define _(f) { u16 tmp = vlib_lex_add_token (lm, #f); ASSERT (tmp == VLIB_LEX_##f); }
+ foreach_vlib_lex_global_token;
+#undef _
+
+ vec_validate (lm->token_buffer, 127);
+ _vec_len (lm->token_buffer) = 0;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (lex_onetime_init);
diff --git a/vlib/vlib/lex.h b/vlib/vlib/lex.h
new file mode 100644
index 00000000000..d5ea509915c
--- /dev/null
+++ b/vlib/vlib/lex.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vlib_lex_h
+#define included_vlib_lex_h
+
+#include <vppinfra/hash.h>
+#include <vppinfra/bitmap.h>
+#include <vppinfra/error.h>
+#include <vppinfra/pool.h>
+
+#define foreach_vlib_lex_global_token \
+ _ (invalid) \
+ _ (eof) \
+ _ (word) \
+ _ (number) \
+ _ (lt) \
+ _ (gt) \
+ _ (dot) \
+ _ (slash) \
+ _ (qmark) \
+ _ (equals) \
+ _ (plus) \
+ _ (minus) \
+ _ (star) \
+ _ (lpar) \
+ _ (rpar)
+
+typedef enum {
+#define _(f) VLIB_LEX_##f,
+ foreach_vlib_lex_global_token
+#undef _
+} vlib_lex_global_token_t;
+
+typedef enum {
+ VLIB_LEX_IGNORE,
+ VLIB_LEX_ADD_TO_TOKEN,
+ VLIB_LEX_RETURN,
+ VLIB_LEX_RETURN_AND_RESCAN,
+ VLIB_LEX_KEYWORD_CHECK,
+ VLIB_LEX_START_NUMBER,
+ VLIB_LEX_ADD_TO_NUMBER,
+} vlib_lex_action_t;
+
+typedef struct {
+ u16 action;
+ u16 next_table_index;
+ u16 token;
+} vlib_lex_table_entry_t;
+
+typedef struct {
+ char *name;
+ vlib_lex_table_entry_t entries [128];
+} vlib_lex_table_t;
+
+typedef struct {
+ u32 token;
+
+ union {
+ uword as_uword;
+ void * as_pointer;
+ char * as_string;
+ } value;
+} vlib_lex_token_t;
+
+typedef struct {
+ vlib_lex_table_t * lex_tables;
+ uword * lex_tables_by_name;
+
+ /* Vector of token strings. */
+ char ** lex_token_names;
+
+ /* Hash mapping c string name to token index. */
+ uword * lex_tokens_by_name;
+
+ /* Hash mapping c string keyword name to token index. */
+ uword * lex_keywords;
+
+ vlib_lex_token_t * pushback_vector;
+
+ i32 pushback_sp;
+
+ u32 current_table_index;
+
+ uword current_token_value;
+
+ uword current_number_base;
+
+ /* Input string we are lex-ing. */
+ u8 *input_vector;
+
+ /* Current index into input vector. */
+ u32 current_index;
+
+ /* Re-used vector for forming token strings and hashing them. */
+ u8 * token_buffer;
+} vlib_lex_main_t;
+
+vlib_lex_main_t vlib_lex_main;
+
+always_inline void
+vlib_lex_cleanup_token (vlib_lex_token_t * t)
+{
+ if (t->token == VLIB_LEX_word)
+ {
+ u8 * tv = t->value.as_pointer;
+ vec_free (tv);
+ }
+}
+
+u16 vlib_lex_add_table (char *name);
+void vlib_lex_get_token (vlib_lex_main_t *lm, vlib_lex_token_t * result);
+u16 vlib_lex_add_token (vlib_lex_main_t *lm, char *token_name);
+void vlib_lex_set_action_range (u32 table_index, u8 lo, u8 hi, u16 action,
+ u16 token, u32 next_table_index);
+void vlib_lex_reset (vlib_lex_main_t *lm, u8 *input_vector);
+format_function_t format_vlib_lex_token;
+
+#endif /* included_vlib_lex_h */
diff --git a/vlib/vlib/main.c b/vlib/vlib/main.c
new file mode 100644
index 00000000000..64bd3c02b60
--- /dev/null
+++ b/vlib/vlib/main.c
@@ -0,0 +1,1559 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * main.c: main vector processing loop
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <math.h>
+#include <vppinfra/format.h>
+#include <vlib/vlib.h>
+#include <vlib/threads.h>
+
+#include <vlib/unix/cj.h>
+
+CJ_GLOBAL_LOG_PROTOTYPE;
+
+
+//#define VLIB_ELOG_MAIN_LOOP 1
+
+/* Actually allocate a few extra slots of vector data to support
+ speculative vector enqueues which overflow vector data in next frame. */
+#define VLIB_FRAME_SIZE_ALLOC (VLIB_FRAME_SIZE + 4)
+
+always_inline u32
+vlib_frame_bytes (u32 n_scalar_bytes, u32 n_vector_bytes)
+{
+ u32 n_bytes;
+
+ /* Make room for vlib_frame_t plus scalar arguments. */
+ n_bytes = vlib_frame_vector_byte_offset (n_scalar_bytes);
+
+ /* Make room for vector arguments.
+ Allocate a few extra slots of vector data to support
+ speculative vector enqueues which overflow vector data in next frame. */
+#define VLIB_FRAME_SIZE_EXTRA 4
+ n_bytes += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * n_vector_bytes;
+
+ /* Magic number is first 32bit number after vector data.
+ Used to make sure that vector data is never overrun. */
+#define VLIB_FRAME_MAGIC (0xabadc0ed)
+ n_bytes += sizeof (u32);
+
+ /* Pad to cache line. */
+ n_bytes = round_pow2 (n_bytes, CLIB_CACHE_LINE_BYTES);
+
+ return n_bytes;
+}
+
+always_inline u32 *
+vlib_frame_find_magic (vlib_frame_t * f, vlib_node_t * node)
+{
+ void * p = f;
+
+ p += vlib_frame_vector_byte_offset (node->scalar_size);
+
+ p += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * node->vector_size;
+
+ return p;
+}
+
+static vlib_frame_size_t *
+get_frame_size_info (vlib_node_main_t * nm,
+ u32 n_scalar_bytes, u32 n_vector_bytes)
+{
+ uword key = (n_scalar_bytes << 16) | n_vector_bytes;
+ uword * p, i;
+
+ p = hash_get (nm->frame_size_hash, key);
+ if (p)
+ i = p[0];
+ else
+ {
+ i = vec_len (nm->frame_sizes);
+ vec_validate (nm->frame_sizes, i);
+ hash_set (nm->frame_size_hash, key, i);
+ }
+
+ return vec_elt_at_index (nm->frame_sizes, i);
+}
+
+static u32
+vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index, u32 frame_flags)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_frame_size_t * fs;
+ vlib_node_t * to_node;
+ vlib_frame_t * f;
+ u32 fi, l, n, scalar_size, vector_size;
+
+ to_node = vlib_get_node (vm, to_node_index);
+
+ scalar_size = to_node->scalar_size;
+ vector_size = to_node->vector_size;
+
+ fs = get_frame_size_info (nm, scalar_size, vector_size);
+ n = vlib_frame_bytes (scalar_size, vector_size);
+ if ((l = vec_len (fs->free_frame_indices)) > 0)
+ {
+ /* Allocate from end of free list. */
+ fi = fs->free_frame_indices[l - 1];
+ f = vlib_get_frame_no_check (vm, fi);
+ _vec_len (fs->free_frame_indices) = l - 1;
+ }
+ else
+ {
+ f = clib_mem_alloc_aligned_no_fail (n, CLIB_CACHE_LINE_BYTES);
+ f->cpu_index = vm->cpu_index;
+ fi = vlib_frame_index_no_check (vm, f);
+ }
+
+ /* Poison frame when debugging. */
+ if (CLIB_DEBUG > 0)
+ {
+ u32 save_cpu_index = f->cpu_index;
+
+ memset (f, 0xfe, n);
+
+ f->cpu_index = save_cpu_index;
+ }
+
+ /* Insert magic number. */
+ {
+ u32 * magic;
+
+ magic = vlib_frame_find_magic (f, to_node);
+ *magic = VLIB_FRAME_MAGIC;
+ }
+
+ f->flags = VLIB_FRAME_IS_ALLOCATED | frame_flags;
+ f->n_vectors = 0;
+ f->scalar_size = scalar_size;
+ f->vector_size = vector_size;
+
+ fs->n_alloc_frames += 1;
+
+ return fi;
+}
+
+/* Allocate a frame for from FROM_NODE to TO_NODE via TO_NEXT_INDEX.
+ Returns frame index. */
+static u32
+vlib_frame_alloc (vlib_main_t * vm, vlib_node_runtime_t * from_node_runtime, u32 to_next_index)
+{
+ vlib_node_t * from_node;
+
+ from_node = vlib_get_node (vm, from_node_runtime->node_index);
+ ASSERT (to_next_index < vec_len (from_node->next_nodes));
+
+ return vlib_frame_alloc_to_node (vm,
+ from_node->next_nodes[to_next_index],
+ /* frame_flags */ 0);
+}
+
+vlib_frame_t *
+vlib_get_frame_to_node (vlib_main_t * vm, u32 to_node_index)
+{
+ u32 fi = vlib_frame_alloc_to_node (vm, to_node_index,
+ /* frame_flags */ VLIB_FRAME_FREE_AFTER_DISPATCH);
+ return vlib_get_frame (vm, fi);
+}
+
+void vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_t * f)
+{
+ vlib_pending_frame_t * p;
+ vlib_node_t * to_node;
+
+ if (f->n_vectors == 0)
+ return;
+
+ to_node = vlib_get_node (vm, to_node_index);
+
+ vec_add2 (vm->node_main.pending_frames, p, 1);
+
+ f->flags |= VLIB_FRAME_PENDING;
+ p->frame_index = vlib_frame_index (vm, f);
+ p->node_runtime_index = to_node->runtime_index;
+ p->next_frame_index = VLIB_PENDING_FRAME_NO_NEXT_FRAME;
+}
+
+/* Free given frame. */
+void
+vlib_frame_free (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ vlib_frame_t * f)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * node;
+ vlib_frame_size_t * fs;
+ u32 frame_index;
+
+ ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+
+ node = vlib_get_node (vm, r->node_index);
+ fs = get_frame_size_info (nm, node->scalar_size, node->vector_size);
+
+ frame_index = vlib_frame_index (vm, f);
+
+ ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+
+ /* No next frames may point to freed frame. */
+ if (CLIB_DEBUG > 0)
+ {
+ vlib_next_frame_t * nf;
+ vec_foreach (nf, vm->node_main.next_frames)
+ ASSERT (nf->frame_index != frame_index);
+ }
+
+ f->flags &= ~VLIB_FRAME_IS_ALLOCATED;
+
+ vec_add1 (fs->free_frame_indices, frame_index);
+ ASSERT (fs->n_alloc_frames > 0);
+ fs->n_alloc_frames -= 1;
+}
+
+static clib_error_t *
+show_frame_stats (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_frame_size_t * fs;
+
+ vlib_cli_output (vm, "%=6s%=12s%=12s", "Size", "# Alloc", "# Free");
+ vec_foreach (fs, nm->frame_sizes)
+ {
+ u32 n_alloc = fs->n_alloc_frames;
+ u32 n_free = vec_len (fs->free_frame_indices);
+
+ if (n_alloc + n_free > 0)
+ vlib_cli_output (vm, "%=6d%=12d%=12d",
+ fs - nm->frame_sizes, n_alloc, n_free);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_frame_stats_cli, static) = {
+ .path = "show vlib frame-allocation",
+ .short_help = "Show node dispatch frame statistics",
+ .function = show_frame_stats,
+};
+
+/* Change ownership of enqueue rights to given next node. */
+static void
+vlib_next_frame_change_ownership (vlib_main_t * vm,
+ vlib_node_runtime_t * node_runtime,
+ u32 next_index)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_next_frame_t * next_frame;
+ vlib_node_t * node, * next_node;
+
+ node = vec_elt (nm->nodes, node_runtime->node_index);
+
+ /* Only internal & input nodes are allowed to call other nodes. */
+ ASSERT (node->type == VLIB_NODE_TYPE_INTERNAL
+ || node->type == VLIB_NODE_TYPE_INPUT
+ || node->type == VLIB_NODE_TYPE_PROCESS);
+
+ ASSERT (vec_len (node->next_nodes) == node_runtime->n_next_nodes);
+
+ next_frame = vlib_node_runtime_get_next_frame (vm, node_runtime, next_index);
+ next_node = vec_elt (nm->nodes, node->next_nodes[next_index]);
+
+ if (next_node->owner_node_index != VLIB_INVALID_NODE_INDEX)
+ {
+ /* Get frame from previous owner. */
+ vlib_next_frame_t * owner_next_frame;
+ vlib_next_frame_t tmp;
+
+ owner_next_frame =
+ vlib_node_get_next_frame (vm,
+ next_node->owner_node_index,
+ next_node->owner_next_index);
+
+ /* Swap target next frame with owner's. */
+ tmp = owner_next_frame[0];
+ owner_next_frame[0] = next_frame[0];
+ next_frame[0] = tmp;
+
+ /*
+ * If next_frame is already pending, we have to track down
+ * all pending frames and fix their next_frame_index fields.
+ */
+ if (next_frame->flags & VLIB_FRAME_PENDING)
+ {
+ vlib_pending_frame_t * p;
+ if (next_frame->frame_index != ~0)
+ {
+ vec_foreach (p, nm->pending_frames)
+ {
+ if (p->frame_index == next_frame->frame_index)
+ {
+ p->next_frame_index =
+ next_frame - vm->node_main.next_frames;
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ /* No previous owner. Take ownership. */
+ next_frame->flags |= VLIB_FRAME_OWNER;
+ }
+
+ /* Record new owner. */
+ next_node->owner_node_index = node->index;
+ next_node->owner_next_index = next_index;
+
+ /* Now we should be owner. */
+ ASSERT (next_frame->flags & VLIB_FRAME_OWNER);
+}
+
+/* Make sure that magic number is still there.
+ Otherwise, it is likely that caller has overrun frame arguments. */
+always_inline void
+validate_frame_magic (vlib_main_t * vm,
+ vlib_frame_t * f,
+ vlib_node_t * n,
+ uword next_index)
+{
+ vlib_node_t * next_node = vlib_get_node (vm, n->next_nodes[next_index]);
+ u32 * magic = vlib_frame_find_magic (f, next_node);
+ ASSERT (VLIB_FRAME_MAGIC == magic[0]);
+}
+
+vlib_frame_t *
+vlib_get_next_frame_internal (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 next_index,
+ u32 allocate_new_next_frame)
+{
+ vlib_frame_t * f;
+ vlib_next_frame_t * nf;
+ u32 n_used;
+
+ nf = vlib_node_runtime_get_next_frame (vm, node, next_index);
+
+ /* Make sure this next frame owns right to enqueue to destination frame. */
+ if (PREDICT_FALSE (! (nf->flags & VLIB_FRAME_OWNER)))
+ vlib_next_frame_change_ownership (vm, node, next_index);
+
+ /* ??? Don't need valid flag: can use frame_index == ~0 */
+ if (PREDICT_FALSE (! (nf->flags & VLIB_FRAME_IS_ALLOCATED)))
+ {
+ nf->frame_index = vlib_frame_alloc (vm, node, next_index);
+ nf->flags |= VLIB_FRAME_IS_ALLOCATED;
+ }
+
+ f = vlib_get_frame (vm, nf->frame_index);
+
+ /* Has frame been removed from pending vector (e.g. finished dispatching)?
+ If so we can reuse frame. */
+ if ((nf->flags & VLIB_FRAME_PENDING) && ! (f->flags & VLIB_FRAME_PENDING))
+ {
+ nf->flags &= ~VLIB_FRAME_PENDING;
+ f->n_vectors = 0;
+ }
+
+ /* Allocate new frame if current one is already full. */
+ n_used = f->n_vectors;
+ if (n_used >= VLIB_FRAME_SIZE || (allocate_new_next_frame && n_used > 0))
+ {
+ /* Old frame may need to be freed after dispatch, since we'll have
+ two redundant frames from node -> next node. */
+ if (! (nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH))
+ {
+ vlib_frame_t * f_old = vlib_get_frame (vm, nf->frame_index);
+ f_old->flags |= VLIB_FRAME_FREE_AFTER_DISPATCH;
+ }
+
+ /* Allocate new frame to replace full one. */
+ nf->frame_index = vlib_frame_alloc (vm, node, next_index);
+ f = vlib_get_frame (vm, nf->frame_index);
+ n_used = f->n_vectors;
+ }
+
+ /* Should have free vectors in frame now. */
+ ASSERT (n_used < VLIB_FRAME_SIZE);
+
+ if (CLIB_DEBUG > 0)
+ {
+ validate_frame_magic (vm, f,
+ vlib_get_node (vm, node->node_index),
+ next_index);
+ }
+
+ return f;
+}
+
+static void
+vlib_put_next_frame_validate (vlib_main_t * vm,
+ vlib_node_runtime_t * rt,
+ u32 next_index,
+ u32 n_vectors_left)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_next_frame_t * nf;
+ vlib_frame_t * f;
+ vlib_node_runtime_t * next_rt;
+ vlib_node_t * next_node;
+ u32 n_before, n_after;
+
+ nf = vlib_node_runtime_get_next_frame (vm, rt, next_index);
+ f = vlib_get_frame (vm, nf->frame_index);
+
+ ASSERT (n_vectors_left <= VLIB_FRAME_SIZE);
+ n_after = VLIB_FRAME_SIZE - n_vectors_left;
+ n_before = f->n_vectors;
+
+ ASSERT (n_after >= n_before);
+
+ next_rt = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
+ nf->node_runtime_index);
+ next_node = vlib_get_node (vm, next_rt->node_index);
+ if (n_after > 0 && next_node->validate_frame)
+ {
+ u8 * msg = next_node->validate_frame (vm, rt, f);
+ if (msg)
+ {
+ clib_warning ("%v", msg);
+ ASSERT (0);
+ }
+ vec_free (msg);
+ }
+}
+
+void
+vlib_put_next_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ u32 next_index,
+ u32 n_vectors_left)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_next_frame_t * nf;
+ vlib_frame_t * f;
+ u32 n_vectors_in_frame;
+
+ if (DPDK == 0 && CLIB_DEBUG > 0)
+ vlib_put_next_frame_validate (vm, r, next_index, n_vectors_left);
+
+ nf = vlib_node_runtime_get_next_frame (vm, r, next_index);
+ f = vlib_get_frame (vm, nf->frame_index);
+
+ /* Make sure that magic number is still there. Otherwise, caller
+ has overrun frame meta data. */
+ if (CLIB_DEBUG > 0)
+ {
+ vlib_node_t * node = vlib_get_node (vm, r->node_index);
+ validate_frame_magic (vm, f, node, next_index);
+ }
+
+ /* Convert # of vectors left -> number of vectors there. */
+ ASSERT (n_vectors_left <= VLIB_FRAME_SIZE);
+ n_vectors_in_frame = VLIB_FRAME_SIZE - n_vectors_left;
+
+ f->n_vectors = n_vectors_in_frame;
+
+ /* If vectors were added to frame, add to pending vector. */
+ if (PREDICT_TRUE (n_vectors_in_frame > 0))
+ {
+ vlib_pending_frame_t * p;
+ u32 v0, v1;
+
+ r->cached_next_index = next_index;
+
+ if (!(f->flags & VLIB_FRAME_PENDING))
+ {
+ __attribute__((unused)) vlib_node_t *node;
+ vlib_node_t *next_node;
+ vlib_node_runtime_t *next_runtime;
+
+ node = vlib_get_node (vm, r->node_index);
+ next_node = vlib_get_next_node (vm, r->node_index, next_index);
+ next_runtime = vlib_node_get_runtime (vm, next_node->index);
+
+ vec_add2 (nm->pending_frames, p, 1);
+
+ p->frame_index = nf->frame_index;
+ p->node_runtime_index = nf->node_runtime_index;
+ p->next_frame_index = nf - nm->next_frames;
+ nf->flags |= VLIB_FRAME_PENDING;
+ f->flags |= VLIB_FRAME_PENDING;
+
+ /*
+ * If we're going to dispatch this frame on another thread,
+ * force allocation of a new frame. Otherwise, we create
+ * a dangling frame reference. Each thread has its own copy of
+ * the next_frames vector.
+ */
+ if (0 && r->cpu_index != next_runtime->cpu_index)
+ {
+ nf->frame_index = ~0;
+ nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED);
+ }
+ }
+
+ /* Copy trace flag from next_frame and from runtime. */
+ nf->flags |= (nf->flags & VLIB_NODE_FLAG_TRACE) | (r->flags & VLIB_NODE_FLAG_TRACE);
+
+ v0 = nf->vectors_since_last_overflow;
+ v1 = v0 + n_vectors_in_frame;
+ nf->vectors_since_last_overflow = v1;
+ if (PREDICT_FALSE (v1 < v0))
+ {
+ vlib_node_t * node = vlib_get_node (vm, r->node_index);
+ vec_elt (node->n_vectors_by_next_node, next_index) += v0;
+ }
+ }
+}
+
+/* Sync up runtime (32 bit counters) and main node stats (64 bit counters). */
+never_inline void
+vlib_node_runtime_sync_stats (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ uword n_calls,
+ uword n_vectors,
+ uword n_clocks)
+{
+ vlib_node_t * n = vlib_get_node (vm, r->node_index);
+
+ n->stats_total.calls += n_calls + r->calls_since_last_overflow;
+ n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow;
+ n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow;
+ n->stats_total.max_clock = r->max_clock;
+ n->stats_total.max_clock_n = r->max_clock_n;
+
+ r->calls_since_last_overflow = 0;
+ r->vectors_since_last_overflow = 0;
+ r->clocks_since_last_overflow = 0;
+}
+
+always_inline void
+vlib_process_sync_stats (vlib_main_t * vm,
+ vlib_process_t * p,
+ uword n_calls,
+ uword n_vectors,
+ uword n_clocks)
+{
+ vlib_node_runtime_t * rt = &p->node_runtime;
+ vlib_node_t * n = vlib_get_node (vm, rt->node_index);
+ vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks);
+ n->stats_total.suspends += p->n_suspends;
+ p->n_suspends = 0;
+}
+
+void vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n)
+{
+ vlib_node_runtime_t * rt;
+
+ if (n->type == VLIB_NODE_TYPE_PROCESS)
+ {
+ /* Nothing to do for PROCESS nodes except in main thread */
+ if (vm != &vlib_global_main) return;
+
+ vlib_process_t * p = vlib_get_process_from_node (vm, n);
+ n->stats_total.suspends += p->n_suspends;
+ p->n_suspends = 0;
+ rt = &p->node_runtime;
+ }
+ else
+ rt = vec_elt_at_index (vm->node_main.nodes_by_type[n->type], n->runtime_index);
+
+ vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0);
+
+ /* Sync up runtime next frame vector counters with main node structure. */
+ {
+ vlib_next_frame_t * nf;
+ uword i;
+ for (i = 0; i < rt->n_next_nodes; i++)
+ {
+ nf = vlib_node_runtime_get_next_frame (vm, rt, i);
+ vec_elt (n->n_vectors_by_next_node, i) += nf->vectors_since_last_overflow;
+ nf->vectors_since_last_overflow = 0;
+ }
+ }
+}
+
+always_inline u32
+vlib_node_runtime_update_stats (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ uword n_calls,
+ uword n_vectors,
+ uword n_clocks)
+{
+ u32 ca0, ca1, v0, v1, cl0, cl1, r;
+
+ cl0 = cl1 = node->clocks_since_last_overflow;
+ ca0 = ca1 = node->calls_since_last_overflow;
+ v0 = v1 = node->vectors_since_last_overflow;
+
+ ca1 = ca0 + n_calls;
+ v1 = v0 + n_vectors;
+ cl1 = cl0 + n_clocks;
+
+ node->calls_since_last_overflow = ca1;
+ node->clocks_since_last_overflow = cl1;
+ node->vectors_since_last_overflow = v1;
+ node->max_clock_n = node->max_clock > n_clocks ?
+ node->max_clock_n : n_vectors;
+ node->max_clock = node->max_clock > n_clocks ?
+ node->max_clock : n_clocks;
+
+ r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors);
+
+ if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0))
+ {
+ node->calls_since_last_overflow = ca0;
+ node->clocks_since_last_overflow = cl0;
+ node->vectors_since_last_overflow = v0;
+ vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks);
+ }
+
+ return r;
+}
+
+always_inline void
+vlib_process_update_stats (vlib_main_t * vm,
+ vlib_process_t * p,
+ uword n_calls,
+ uword n_vectors,
+ uword n_clocks)
+{
+ vlib_node_runtime_update_stats (vm, &p->node_runtime,
+ n_calls, n_vectors, n_clocks);
+}
+
+static clib_error_t *
+vlib_cli_elog_clear (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ elog_reset_buffer (&vm->elog_main);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (elog_clear_cli, static) = {
+ .path = "clear event-logger",
+ .short_help = "Clear current event log",
+ .function = vlib_cli_elog_clear,
+};
+
+#ifdef CLIB_UNIX
+static clib_error_t *
+elog_save_buffer (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ elog_main_t * em = &vm->elog_main;
+ char * file, * chroot_file;
+ clib_error_t * error = 0;
+
+ if (! unformat (input, "%s", &file))
+ {
+ vlib_cli_output (vm, "expected file name, got `%U'",
+ format_unformat_error, input);
+ return 0;
+ }
+
+ /* It's fairly hard to get "../oopsie" through unformat; just in case */
+ if (strstr(file, "..") || index(file, '/'))
+ {
+ vlib_cli_output (vm, "illegal characters in filename '%s'", file);
+ return 0;
+ }
+
+ chroot_file = (char *) format (0, "/tmp/%s%c", file, 0);
+
+ vec_free(file);
+
+ vlib_cli_output (vm, "Saving %wd of %wd events to %s",
+ elog_n_events_in_buffer (em),
+ elog_buffer_capacity (em),
+ chroot_file);
+
+ vlib_worker_thread_barrier_sync (vm);
+ error = elog_write_file (em, chroot_file);
+ vlib_worker_thread_barrier_release(vm);
+ vec_free (chroot_file);
+ return error;
+}
+
+VLIB_CLI_COMMAND (elog_save_cli, static) = {
+ .path = "save event-logger",
+ .short_help = "save event-logger <filename> (saves log in /tmp/<filename>)",
+ .function = elog_save_buffer,
+};
+
+#endif /* CLIB_UNIX */
+
+static void elog_show_buffer_internal (vlib_main_t * vm, u32 n_events_to_show)
+{
+ elog_main_t * em = &vm->elog_main;
+ elog_event_t * e, * es;
+ f64 dt;
+
+ /* Show events in VLIB time since log clock starts after VLIB clock. */
+ dt = (em->init_time.cpu - vm->clib_time.init_cpu_time)
+ * vm->clib_time.seconds_per_clock;
+
+ es = elog_peek_events (em);
+ vlib_cli_output (vm, "%d events in buffer", vec_len (es));
+ vec_foreach (e, es)
+ {
+ vlib_cli_output (vm, "%18.9f: %U",
+ e->time + dt,
+ format_elog_event, em, e);
+ n_events_to_show--;
+ if (n_events_to_show == 0)
+ break;
+ }
+ vec_free (es);
+
+}
+
+static clib_error_t *
+elog_show_buffer (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ u32 n_events_to_show;
+ clib_error_t * error = 0;
+
+ n_events_to_show = 250;
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "%d", &n_events_to_show))
+ ;
+ else if (unformat (input, "all"))
+ n_events_to_show = ~0;
+ else
+ return unformat_parse_error (input);
+ }
+ elog_show_buffer_internal (vm, n_events_to_show);
+ return error;
+}
+
+VLIB_CLI_COMMAND (elog_show_cli, static) = {
+ .path = "show event-logger",
+ .short_help = "Show event logger info",
+ .function = elog_show_buffer,
+};
+
+void vlib_gdb_show_event_log (void)
+{
+ elog_show_buffer_internal (vlib_get_main(), (u32)~0);
+}
+
+always_inline void
+vlib_elog_main_loop_event (vlib_main_t * vm,
+ u32 node_index,
+ u64 time,
+ u32 n_vectors,
+ u32 is_return)
+{
+ elog_main_t * em = &vm->elog_main;
+
+ if (VLIB_ELOG_MAIN_LOOP)
+ elog (em,
+ /* event type */
+ vec_elt_at_index (is_return
+ ? vm->node_return_elog_event_types
+ : vm->node_call_elog_event_types,
+ node_index),
+ /* data to log */ n_vectors);
+}
+
+void vlib_dump_context_trace (vlib_main_t *vm, u32 bi)
+{
+ vlib_node_main_t * vnm = &vm->node_main;
+ vlib_buffer_t * b;
+ u8 i, n;
+
+ if (VLIB_BUFFER_TRACE_TRAJECTORY)
+ {
+ b = vlib_get_buffer (vm, bi);
+ n = b->pre_data[0];
+
+ fformat(stderr, "Context trace for bi %d b 0x%llx, visited %d\n",
+ bi, b, n);
+
+ if (n == 0 || n > 20)
+ {
+ fformat(stderr, "n is unreasonable\n");
+ return;
+ }
+
+
+ for (i = 0; i < n; i++)
+ {
+ u32 node_index;
+
+ node_index = b->pre_data[i+1];
+
+ if (node_index > vec_len (vnm->nodes))
+ {
+ fformat(stderr, "Skip bogus node index %d\n", node_index);
+ continue;
+ }
+
+ fformat(stderr, "%v (%d)\n", vnm->nodes[node_index]->name,
+ node_index);
+ }
+ }
+ else
+ {
+ fformat(stderr,
+ "in vlib/buffers.h, #define VLIB_BUFFER_TRACE_TRAJECTORY 1\n");
+ }
+}
+
+
+/* static_always_inline */ u64
+dispatch_node (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_node_type_t type,
+ vlib_node_state_t dispatch_state,
+ vlib_frame_t * frame,
+ u64 last_time_stamp)
+{
+ uword n, v;
+ u64 t;
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_next_frame_t * nf;
+
+ if (CLIB_DEBUG > 0)
+ {
+ vlib_node_t * n = vlib_get_node (vm, node->node_index);
+ ASSERT (n->type == type);
+ }
+
+ /* Only non-internal nodes may be disabled. */
+ if (type != VLIB_NODE_TYPE_INTERNAL && node->state != dispatch_state)
+ {
+ ASSERT (type != VLIB_NODE_TYPE_INTERNAL);
+ return last_time_stamp;
+ }
+
+ if ((type == VLIB_NODE_TYPE_PRE_INPUT || type == VLIB_NODE_TYPE_INPUT)
+ && dispatch_state != VLIB_NODE_STATE_INTERRUPT)
+ {
+ u32 c = node->input_main_loops_per_call;
+ /* Only call node when count reaches zero. */
+ if (c)
+ {
+ node->input_main_loops_per_call = c - 1;
+ return last_time_stamp;
+ }
+ }
+
+ /* Speculatively prefetch next frames. */
+ if (node->n_next_nodes > 0)
+ {
+ nf = vec_elt_at_index (nm->next_frames, node->next_frame_index);
+ CLIB_PREFETCH (nf, 4 * sizeof (nf[0]), WRITE);
+ }
+
+ vm->cpu_time_last_node_dispatch = last_time_stamp;
+
+ if (1 /* || vm->cpu_index == node->cpu_index */)
+ {
+ vlib_main_t *stat_vm;
+
+ stat_vm = /* vlib_mains ? vlib_mains[0] : */ vm;
+
+ vlib_elog_main_loop_event (vm, node->node_index,
+ last_time_stamp,
+ frame ? frame->n_vectors : 0,
+ /* is_after */ 0);
+
+ /*
+ * Turn this on if you run into
+ * "bad monkey" contexts, and you want to know exactly
+ * which nodes they've visited... See ixge.c...
+ */
+ if (VLIB_BUFFER_TRACE_TRAJECTORY && frame)
+ {
+ int i;
+ int log_index;
+ u32 * from;
+ from = vlib_frame_vector_args (frame);
+ for (i = 0; i < frame->n_vectors; i++)
+ {
+ vlib_buffer_t *b = vlib_get_buffer (vm, from[i]);
+ ASSERT (b->pre_data[0] < 32);
+ log_index = b->pre_data[0]++ + 1;
+ b->pre_data[log_index] = node->node_index;
+ }
+ n = node->function (vm, node, frame);
+ }
+ else
+ n = node->function (vm, node, frame);
+
+ t = clib_cpu_time_now ();
+
+ vlib_elog_main_loop_event (vm, node->node_index, t, n, /* is_after */ 1);
+
+ vm->main_loop_vectors_processed += n;
+ vm->main_loop_nodes_processed += n > 0;
+
+ v = vlib_node_runtime_update_stats (stat_vm, node,
+ /* n_calls */ 1,
+ /* n_vectors */ n,
+ /* n_clocks */ t - last_time_stamp);
+
+ /* When in interrupt mode and vector rate crosses threshold switch to
+ polling mode. */
+ if ((DPDK == 0 && dispatch_state == VLIB_NODE_STATE_INTERRUPT)
+ || (DPDK == 0 && dispatch_state == VLIB_NODE_STATE_POLLING
+ && (node->flags
+ & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)))
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .function = (char *) __FUNCTION__,
+ .format = "%s vector length %d, switching to %s",
+ .format_args = "T4i4t4",
+ .n_enum_strings = 2,
+ .enum_strings = {
+ "interrupt", "polling",
+ },
+ };
+ struct { u32 node_name, vector_length, is_polling; } * ed;
+
+ if (dispatch_state == VLIB_NODE_STATE_INTERRUPT
+ && v >= nm->polling_threshold_vector_length)
+ {
+ vlib_node_t * n = vlib_get_node (vm, node->node_index);
+ n->state = VLIB_NODE_STATE_POLLING;
+ node->state = VLIB_NODE_STATE_POLLING;
+ ASSERT (! (node->flags & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE));
+ node->flags &= ~VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE;
+ node->flags |= VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE;
+ nm->input_node_counts_by_state[VLIB_NODE_STATE_INTERRUPT] -= 1;
+ nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] += 1;
+
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->node_name = n->name_elog_string;
+ ed->vector_length = v;
+ ed->is_polling = 1;
+ }
+ else if (dispatch_state == VLIB_NODE_STATE_POLLING
+ && v <= nm->interrupt_threshold_vector_length)
+ {
+ vlib_node_t * n = vlib_get_node (vm, node->node_index);
+ if (node->flags & VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE)
+ {
+ /* Switch to interrupt mode after dispatch in polling one more time.
+ This allows driver to re-enable interrupts. */
+ n->state = VLIB_NODE_STATE_INTERRUPT;
+ node->state = VLIB_NODE_STATE_INTERRUPT;
+ node->flags &= ~VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE;
+ nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] -= 1;
+ nm->input_node_counts_by_state[VLIB_NODE_STATE_INTERRUPT] += 1;
+
+ }
+ else
+ {
+ node->flags |= VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->node_name = n->name_elog_string;
+ ed->vector_length = v;
+ ed->is_polling = 0;
+ }
+ }
+ }
+ }
+
+ return t;
+}
+
+/* static */ u64
+dispatch_pending_node (vlib_main_t * vm,
+ vlib_pending_frame_t * p,
+ u64 last_time_stamp)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_frame_t * f;
+ vlib_next_frame_t * nf, nf_dummy;
+ vlib_node_runtime_t * n;
+ u32 restore_frame_index;
+
+ n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
+ p->node_runtime_index);
+
+ f = vlib_get_frame (vm, p->frame_index);
+ if (p->next_frame_index == VLIB_PENDING_FRAME_NO_NEXT_FRAME)
+ {
+ /* No next frame: so use dummy on stack. */
+ nf = &nf_dummy;
+ nf->flags = f->flags & VLIB_NODE_FLAG_TRACE;
+ nf->frame_index = ~p->frame_index;
+ }
+ else
+ nf = vec_elt_at_index (nm->next_frames, p->next_frame_index);
+
+ ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+
+ /* Force allocation of new frame while current frame is being
+ dispatched. */
+ restore_frame_index = ~0;
+ if (nf->frame_index == p->frame_index)
+ {
+ nf->frame_index = ~0;
+ nf->flags &= ~VLIB_FRAME_IS_ALLOCATED;
+ if (! (n->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH))
+ restore_frame_index = p->frame_index;
+ }
+
+ /* Frame must be pending. */
+ ASSERT (f->flags & VLIB_FRAME_PENDING);
+ ASSERT (f->n_vectors > 0);
+
+ /* Copy trace flag from next frame to node.
+ Trace flag indicates that at least one vector in the dispatched
+ frame is traced. */
+ n->flags &= ~VLIB_NODE_FLAG_TRACE;
+ n->flags |= (nf->flags & VLIB_FRAME_TRACE) ? VLIB_NODE_FLAG_TRACE : 0;
+ nf->flags &= ~VLIB_FRAME_TRACE;
+
+ last_time_stamp = dispatch_node (vm, n,
+ VLIB_NODE_TYPE_INTERNAL,
+ VLIB_NODE_STATE_POLLING,
+ f, last_time_stamp);
+
+ f->flags &= ~VLIB_FRAME_PENDING;
+
+ /* Frame is ready to be used again, so restore it. */
+ if (restore_frame_index != ~0)
+ {
+ /* p->next_frame_index can change during node dispatch if node
+ function decides to change graph hook up. */
+ nf = vec_elt_at_index (nm->next_frames, p->next_frame_index);
+ nf->frame_index = restore_frame_index;
+ nf->flags |= VLIB_FRAME_IS_ALLOCATED;
+ }
+
+ if (f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH)
+ {
+ ASSERT (! (n->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH));
+ vlib_frame_free (vm, n, f);
+ }
+
+ return last_time_stamp;
+}
+
+always_inline uword
+vlib_process_stack_is_valid (vlib_process_t * p)
+{ return p->stack[0] == VLIB_PROCESS_STACK_MAGIC; }
+
+typedef struct {
+ vlib_main_t * vm;
+ vlib_process_t * process;
+ vlib_frame_t * frame;
+} vlib_process_bootstrap_args_t;
+
+/* Called in process stack. */
+static uword vlib_process_bootstrap (uword _a)
+{
+ vlib_process_bootstrap_args_t * a;
+ vlib_main_t * vm;
+ vlib_node_runtime_t * node;
+ vlib_frame_t * f;
+ vlib_process_t * p;
+ uword n;
+
+ a = uword_to_pointer (_a, vlib_process_bootstrap_args_t *);
+
+ vm = a->vm;
+ p = a->process;
+ f = a->frame;
+ node = &p->node_runtime;
+
+ n = node->function (vm, node, f);
+
+ ASSERT (vlib_process_stack_is_valid (p));
+
+ clib_longjmp (&p->return_longjmp, n);
+
+ return n;
+}
+
+/* Called in main stack. */
+static_always_inline uword
+vlib_process_startup (vlib_main_t * vm,
+ vlib_process_t * p,
+ vlib_frame_t * f)
+{
+ vlib_process_bootstrap_args_t a;
+ uword r;
+
+ a.vm = vm;
+ a.process = p;
+ a.frame = f;
+
+ r = clib_setjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_RETURN);
+ if (r == VLIB_PROCESS_RETURN_LONGJMP_RETURN)
+ r = clib_calljmp (vlib_process_bootstrap, pointer_to_uword (&a),
+ (void *) p->stack + (1 << p->log2_n_stack_bytes));
+
+ return r;
+}
+
+static_always_inline uword
+vlib_process_resume (vlib_process_t * p)
+{
+ uword r;
+ p->flags &= ~(VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT
+ | VLIB_PROCESS_RESUME_PENDING);
+ r = clib_setjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_RETURN);
+ if (r == VLIB_PROCESS_RETURN_LONGJMP_RETURN)
+ clib_longjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_RESUME);
+ return r;
+}
+
+static u64
+dispatch_process (vlib_main_t * vm,
+ vlib_process_t * p,
+ vlib_frame_t * f,
+ u64 last_time_stamp)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_runtime_t * node_runtime = &p->node_runtime;
+ vlib_node_t * node = vlib_get_node (vm, node_runtime->node_index);
+ u64 t;
+ uword n_vectors, is_suspend;
+
+ if (node->state != VLIB_NODE_STATE_POLLING
+ || (p->flags & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT)))
+ return last_time_stamp;
+
+ p->flags |= VLIB_PROCESS_IS_RUNNING;
+
+ t = last_time_stamp;
+ vlib_elog_main_loop_event (vm, node_runtime->node_index, t,
+ f ? f->n_vectors : 0, /* is_after */ 0);
+
+ /* Save away current process for suspend. */
+ nm->current_process_index = node->runtime_index;
+
+ n_vectors = vlib_process_startup (vm, p, f);
+
+ nm->current_process_index = ~0;
+
+ ASSERT (n_vectors != VLIB_PROCESS_RETURN_LONGJMP_RETURN);
+ is_suspend = n_vectors == VLIB_PROCESS_RETURN_LONGJMP_SUSPEND;
+ if (is_suspend)
+ {
+ vlib_pending_frame_t * pf;
+
+ n_vectors = 0;
+ pool_get (nm->suspended_process_frames, pf);
+ pf->node_runtime_index = node->runtime_index;
+ pf->frame_index = f ? vlib_frame_index (vm, f) : ~0;
+ pf->next_frame_index = ~0;
+
+ p->n_suspends += 1;
+ p->suspended_process_frame_index = pf - nm->suspended_process_frames;
+
+ if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK)
+ timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time,
+ vlib_timing_wheel_data_set_suspended_process (node->runtime_index));
+ }
+ else
+ p->flags &= ~VLIB_PROCESS_IS_RUNNING;
+
+ t = clib_cpu_time_now ();
+
+ vlib_elog_main_loop_event (vm, node_runtime->node_index, t, is_suspend, /* is_after */ 1);
+
+ vlib_process_update_stats (vm, p,
+ /* n_calls */ ! is_suspend,
+ /* n_vectors */ n_vectors,
+ /* n_clocks */ t - last_time_stamp);
+
+ return t;
+}
+
+void vlib_start_process (vlib_main_t * vm, uword process_index)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_process_t * p = vec_elt (nm->processes, process_index);
+ dispatch_process (vm, p, /* frame */ 0, /* cpu_time_now */ 0);
+}
+
+static u64
+dispatch_suspended_process (vlib_main_t * vm,
+ uword process_index,
+ u64 last_time_stamp)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_runtime_t * node_runtime;
+ vlib_node_t * node;
+ vlib_frame_t * f;
+ vlib_process_t * p;
+ vlib_pending_frame_t * pf;
+ u64 t, n_vectors, is_suspend;
+
+ t = last_time_stamp;
+
+ p = vec_elt (nm->processes, process_index);
+ if (PREDICT_FALSE (! (p->flags & VLIB_PROCESS_IS_RUNNING)))
+ return last_time_stamp;
+
+ ASSERT (p->flags & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT));
+
+ pf = pool_elt_at_index (nm->suspended_process_frames, p->suspended_process_frame_index);
+
+ node_runtime = &p->node_runtime;
+ node = vlib_get_node (vm, node_runtime->node_index);
+ f = pf->frame_index != ~0 ? vlib_get_frame (vm, pf->frame_index) : 0;
+
+ vlib_elog_main_loop_event (vm, node_runtime->node_index, t, f ? f->n_vectors : 0, /* is_after */ 0);
+
+ /* Save away current process for suspend. */
+ nm->current_process_index = node->runtime_index;
+
+ n_vectors = vlib_process_resume (p);
+ t = clib_cpu_time_now ();
+
+ nm->current_process_index = ~0;
+
+ is_suspend = n_vectors == VLIB_PROCESS_RETURN_LONGJMP_SUSPEND;
+ if (is_suspend)
+ {
+ /* Suspend it again. */
+ n_vectors = 0;
+ p->n_suspends += 1;
+ if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK)
+ timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time,
+ vlib_timing_wheel_data_set_suspended_process (node->runtime_index));
+ }
+ else
+ {
+ p->flags &= ~VLIB_PROCESS_IS_RUNNING;
+ p->suspended_process_frame_index = ~0;
+ pool_put (nm->suspended_process_frames, pf);
+ }
+
+ t = clib_cpu_time_now ();
+ vlib_elog_main_loop_event (vm, node_runtime->node_index, t, ! is_suspend, /* is_after */ 1);
+
+ vlib_process_update_stats (vm, p,
+ /* n_calls */ ! is_suspend,
+ /* n_vectors */ n_vectors,
+ /* n_clocks */ t - last_time_stamp);
+
+ return t;
+}
+
+static void vlib_main_loop (vlib_main_t * vm)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ uword i;
+ u64 cpu_time_now;
+
+ /* Initialize pending node vector. */
+ vec_resize (nm->pending_frames, 32);
+ _vec_len (nm->pending_frames) = 0;
+
+ /* Mark time of main loop start. */
+ cpu_time_now = vm->clib_time.last_cpu_time;
+ vm->cpu_time_main_loop_start = cpu_time_now;
+
+ /* Arrange for first level of timing wheel to cover times we care
+ most about. */
+ nm->timing_wheel.min_sched_time = 10e-6;
+ nm->timing_wheel.max_sched_time = 10e-3;
+ timing_wheel_init (&nm->timing_wheel,
+ cpu_time_now,
+ vm->clib_time.clocks_per_second);
+
+ /* Pre-allocate expired nodes. */
+ vec_alloc (nm->data_from_advancing_timing_wheel, 32);
+ vec_alloc (nm->pending_interrupt_node_runtime_indices, 32);
+
+ if (! nm->polling_threshold_vector_length)
+ nm->polling_threshold_vector_length = 10;
+ if (! nm->interrupt_threshold_vector_length)
+ nm->interrupt_threshold_vector_length = 5;
+
+ nm->current_process_index = ~0;
+
+ /* Start all processes. */
+ {
+ uword i;
+ for (i = 0; i < vec_len (nm->processes); i++)
+ cpu_time_now = dispatch_process (vm, nm->processes[i], /* frame */ 0, cpu_time_now);
+ }
+
+ while (1)
+ {
+ vlib_node_runtime_t * n;
+
+ /* Process pre-input nodes. */
+ vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])
+ cpu_time_now = dispatch_node (vm, n,
+ VLIB_NODE_TYPE_PRE_INPUT,
+ VLIB_NODE_STATE_POLLING,
+ /* frame */ 0,
+ cpu_time_now);
+
+ /* Next process input nodes. */
+ vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+ cpu_time_now = dispatch_node (vm, n,
+ VLIB_NODE_TYPE_INPUT,
+ VLIB_NODE_STATE_POLLING,
+ /* frame */ 0,
+ cpu_time_now);
+
+ if (PREDICT_FALSE(vm->queue_signal_pending))
+ if (vm->queue_signal_callback)
+ vm->queue_signal_callback (vm);
+
+ /* Next handle interrupts. */
+ {
+ uword l = _vec_len (nm->pending_interrupt_node_runtime_indices);
+ uword i;
+ if (l > 0)
+ {
+ _vec_len (nm->pending_interrupt_node_runtime_indices) = 0;
+ for (i = 0; i < l; i++)
+ {
+ n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT],
+ nm->pending_interrupt_node_runtime_indices[i]);
+ cpu_time_now = dispatch_node (vm, n,
+ VLIB_NODE_TYPE_INPUT,
+ VLIB_NODE_STATE_INTERRUPT,
+ /* frame */ 0,
+ cpu_time_now);
+ }
+ }
+ }
+
+ /* Check if process nodes have expired from timing wheel. */
+ nm->data_from_advancing_timing_wheel
+ = timing_wheel_advance (&nm->timing_wheel, cpu_time_now,
+ nm->data_from_advancing_timing_wheel,
+ &nm->cpu_time_next_process_ready);
+
+ ASSERT (nm->data_from_advancing_timing_wheel != 0);
+ if (PREDICT_FALSE (_vec_len (nm->data_from_advancing_timing_wheel) > 0))
+ {
+ uword i;
+
+ processes_timing_wheel_data:
+ for (i = 0; i < _vec_len (nm->data_from_advancing_timing_wheel); i++)
+ {
+ u32 d = nm->data_from_advancing_timing_wheel[i];
+ u32 di = vlib_timing_wheel_data_get_index (d);
+
+ if (vlib_timing_wheel_data_is_timed_event (d))
+ {
+ vlib_signal_timed_event_data_t * te = pool_elt_at_index (nm->signal_timed_event_data_pool, di);
+ vlib_node_t * n = vlib_get_node (vm, te->process_node_index);
+ vlib_process_t * p = vec_elt (nm->processes, n->runtime_index);
+ void * data;
+ data = vlib_process_signal_event_helper (nm, n, p, te->event_type_index, te->n_data_elts, te->n_data_elt_bytes);
+ if (te->n_data_bytes < sizeof (te->inline_event_data))
+ memcpy (data, te->inline_event_data, te->n_data_bytes);
+ else
+ {
+ memcpy (data, te->event_data_as_vector, te->n_data_bytes);
+ vec_free (te->event_data_as_vector);
+ }
+ pool_put (nm->signal_timed_event_data_pool, te);
+ }
+ else
+ {
+ cpu_time_now = clib_cpu_time_now();
+ cpu_time_now = dispatch_suspended_process (vm, di, cpu_time_now);
+ }
+ }
+
+ /* Reset vector. */
+ _vec_len (nm->data_from_advancing_timing_wheel) = 0;
+ }
+
+ /* Input nodes may have added work to the pending vector.
+ Process pending vector until there is nothing left.
+ All pending vectors will be processed from input -> output. */
+ for (i = 0; i < _vec_len (nm->pending_frames); i++)
+ cpu_time_now = dispatch_pending_node (vm, nm->pending_frames + i,
+ cpu_time_now);
+ /* Reset pending vector for next iteration. */
+ _vec_len (nm->pending_frames) = 0;
+
+ /* Pending internal nodes may resume processes. */
+ if (_vec_len (nm->data_from_advancing_timing_wheel) > 0)
+ goto processes_timing_wheel_data;
+
+ vlib_increment_main_loop_counter (vm);
+
+ /* Record time stamp in case there are no enabled nodes and above
+ calls do not update time stamp. */
+ cpu_time_now = clib_cpu_time_now ();
+ }
+}
+
+vlib_main_t vlib_global_main;
+
+static clib_error_t *
+vlib_main_configure (vlib_main_t * vm, unformat_input_t * input)
+{
+ int turn_on_mem_trace = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "memory-trace"))
+ turn_on_mem_trace = 1;
+
+ else if (unformat (input, "elog-events %d",
+ &vm->elog_main.event_ring_size))
+ ;
+ else
+ return unformat_parse_error (input);
+ }
+
+ unformat_free (input);
+
+ /* Enable memory trace as early as possible. */
+ if (turn_on_mem_trace)
+ clib_mem_trace (1);
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (vlib_main_configure, "vlib");
+
+/* Main function. */
+int vlib_main (vlib_main_t * vm, unformat_input_t * input)
+{
+ clib_error_t * error;
+
+ clib_time_init (&vm->clib_time);
+
+ /* Turn on event log. */
+ if (! vm->elog_main.event_ring_size)
+ vm->elog_main.event_ring_size = 128 << 10;
+ elog_init (&vm->elog_main, vm->elog_main.event_ring_size);
+ elog_enable_disable (&vm->elog_main, 1);
+
+ /* Default name. */
+ if (! vm->name)
+ vm->name = "VLIB";
+
+ vec_validate (vm->buffer_main, 0);
+
+ if ((error = vlib_thread_init (vm)))
+ {
+ clib_error_report (error);
+ goto done;
+ }
+
+ /* Register static nodes so that init functions may use them. */
+ vlib_register_all_static_nodes (vm);
+
+ /* Set seed for random number generator.
+ Allow user to specify seed to make random sequence deterministic. */
+ if (! unformat (input, "seed %wd", &vm->random_seed))
+ vm->random_seed = clib_cpu_time_now ();
+ clib_random_buffer_init (&vm->random_buffer, vm->random_seed);
+
+ /* See unix/main.c; most likely already set up */
+ if (vm->init_functions_called == 0)
+ vm->init_functions_called = hash_create (0, /* value bytes */ 0);
+ if ((error = vlib_call_all_init_functions (vm)))
+ goto done;
+
+ /* Initialize node graph. */
+ if ((error = vlib_node_main_init (vm)))
+ {
+ /* Arrange for graph hook up error to not be fatal when debugging. */
+ if (CLIB_DEBUG > 0)
+ clib_error_report (error);
+ else
+ goto done;
+ }
+
+ /* Create default buffer free list. */
+ vlib_buffer_get_or_create_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
+ "default");
+
+ switch (clib_setjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_NONE))
+ {
+ case VLIB_MAIN_LOOP_EXIT_NONE:
+ vm->main_loop_exit_set = 1;
+ break;
+
+ case VLIB_MAIN_LOOP_EXIT_CLI:
+ goto done;
+
+ default:
+ error = vm->main_loop_error;
+ goto done;
+ }
+
+ if ((error = vlib_call_all_config_functions (vm, input, 0 /* is_early */)))
+ goto done;
+
+ /* Call all main loop enter functions. */
+ {
+ clib_error_t * sub_error;
+ sub_error = vlib_call_all_main_loop_enter_functions (vm);
+ if (sub_error)
+ clib_error_report (sub_error);
+ }
+
+ vlib_main_loop (vm);
+
+ done:
+ /* Call all exit functions. */
+ {
+ clib_error_t * sub_error;
+ sub_error = vlib_call_all_main_loop_exit_functions (vm);
+ if (sub_error)
+ clib_error_report (sub_error);
+ }
+
+ if (error)
+ clib_error_report (error);
+
+ return 0;
+}
diff --git a/vlib/vlib/main.h b/vlib/vlib/main.h
new file mode 100644
index 00000000000..5a8d745661b
--- /dev/null
+++ b/vlib/vlib/main.h
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * main.h: VLIB main data structure
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_main_h
+#define included_vlib_main_h
+
+#include <vppinfra/elog.h>
+#include <vppinfra/format.h>
+#include <vppinfra/longjmp.h>
+#include <vppinfra/pool.h>
+#include <vppinfra/random_buffer.h>
+#include <vppinfra/time.h>
+
+#include <pthread.h>
+
+
+/* By default turn off node/error event logging.
+ Override with -DVLIB_ELOG_MAIN_LOOP */
+#ifndef VLIB_ELOG_MAIN_LOOP
+#define VLIB_ELOG_MAIN_LOOP 0
+#endif
+
+typedef struct vlib_main_t {
+ /* Instruction level timing state. */
+ clib_time_t clib_time;
+
+ /* Time stamp of last node dispatch. */
+ u64 cpu_time_last_node_dispatch;
+
+ /* Time stamp when main loop was entered (time 0). */
+ u64 cpu_time_main_loop_start;
+
+ /* Incremented once for each main loop. */
+ u32 main_loop_count;
+
+ /* Count of vectors processed this main loop. */
+ u32 main_loop_vectors_processed;
+ u32 main_loop_nodes_processed;
+
+ /* Circular buffer of input node vector counts.
+ Indexed by low bits of
+ (main_loop_count >> VLIB_LOG2_INPUT_VECTORS_PER_MAIN_LOOP). */
+ u32 vector_counts_per_main_loop[2];
+ u32 node_counts_per_main_loop[2];
+
+ /* Every so often we switch to the next counter. */
+#define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7
+
+ /* Jump target to exit main loop with given code. */
+ u32 main_loop_exit_set;
+ clib_longjmp_t main_loop_exit;
+#define VLIB_MAIN_LOOP_EXIT_NONE 0
+#define VLIB_MAIN_LOOP_EXIT_PANIC 1
+ /* Exit via CLI. */
+#define VLIB_MAIN_LOOP_EXIT_CLI 2
+
+ /* Error marker to use when exiting main loop. */
+ clib_error_t * main_loop_error;
+
+ /* Name for e.g. syslog. */
+ char * name;
+
+ /* Start and size of CLIB heap. */
+ void * heap_base;
+ uword heap_size;
+
+ vlib_buffer_main_t * buffer_main;
+
+ vlib_physmem_main_t physmem_main;
+
+ /* Allocate/free buffer memory for DMA transfers, descriptor rings, etc.
+ buffer memory is guaranteed to be cache-aligned. */
+ void * (* os_physmem_alloc_aligned) (vlib_physmem_main_t * pm,
+ uword n_bytes,
+ uword alignment);
+ void (* os_physmem_free) (void * x);
+
+ /* Node graph main structure. */
+ vlib_node_main_t node_main;
+
+ /* Command line interface. */
+ vlib_cli_main_t cli_main;
+
+ /* Packet trace buffer. */
+ vlib_trace_main_t trace_main;
+
+ /* Error handling. */
+ vlib_error_main_t error_main;
+
+ /* Punt packets to underlying operating system for when fast switching
+ code does not know what to do. */
+ void (* os_punt_frame) (struct vlib_main_t * vm,
+ struct vlib_node_runtime_t * node,
+ vlib_frame_t * frame);
+
+ /* Multicast distribution. Set to zero for MC disabled. */
+ mc_main_t * mc_main;
+
+ /* Stream index to use for distribution when MC is enabled. */
+ u32 mc_stream_index;
+
+ vlib_one_time_waiting_process_t * procs_waiting_for_mc_stream_join;
+
+ /* Event logger. */
+ elog_main_t elog_main;
+
+ /* Node call and return event types. */
+ elog_event_type_t * node_call_elog_event_types;
+ elog_event_type_t * node_return_elog_event_types;
+
+ elog_event_type_t * error_elog_event_types;
+
+ /* Seed for random number generator. */
+ uword random_seed;
+
+ /* Buffer of random data for various uses. */
+ clib_random_buffer_t random_buffer;
+
+ /* Hash table to record which init functions have been called. */
+ uword * init_functions_called;
+
+ /* to compare with node runtime */
+ u32 cpu_index;
+
+ void **mbuf_alloc_list;
+
+ /* List of init functions to call, setup by constructors */
+ _vlib_init_function_list_elt_t *init_function_registrations;
+ _vlib_init_function_list_elt_t *main_loop_enter_function_registrations;
+ _vlib_init_function_list_elt_t *main_loop_exit_function_registrations;
+ _vlib_init_function_list_elt_t *api_init_function_registrations;
+ vlib_config_function_runtime_t *config_function_registrations;
+ mc_serialize_msg_t *mc_msg_registrations; /* mc_main is a pointer... */
+
+ /* control-plane API queue signal pending */
+ volatile u32 queue_signal_pending;
+ void (*queue_signal_callback)(struct vlib_main_t *);
+} vlib_main_t;
+
+/* Global main structure. */
+vlib_main_t vlib_global_main;
+
+always_inline f64
+vlib_time_now (vlib_main_t * vm)
+{ return clib_time_now (&vm->clib_time); }
+
+always_inline f64
+vlib_time_now_ticks (vlib_main_t * vm, u64 n)
+{ return clib_time_now_internal (&vm->clib_time, n); }
+
+/* Busy wait for specified time. */
+always_inline void
+vlib_time_wait (vlib_main_t * vm, f64 wait)
+{
+ f64 t = vlib_time_now (vm);
+ f64 limit = t + wait;
+ while (t < limit)
+ t = vlib_time_now (vm);
+}
+
+/* Time a piece of code. */
+#define vlib_time_code(vm,body) \
+do { \
+ f64 _t[2]; \
+ _t[0] = vlib_time_now (vm); \
+ do { body; } while (0); \
+ _t[1] = vlib_time_now (vm); \
+ clib_warning ("%.7e", _t[1] - _t[0]); \
+} while (0)
+
+#define vlib_wait_with_timeout(vm,suspend_time,timeout_time,test) \
+({ \
+ uword __vlib_wait_with_timeout = 0; \
+ f64 __vlib_wait_time = 0; \
+ while (! (__vlib_wait_with_timeout = (test)) \
+ && __vlib_wait_time < (timeout_time)) \
+ { \
+ vlib_process_suspend (vm, suspend_time); \
+ __vlib_wait_time += suspend_time; \
+ } \
+ __vlib_wait_with_timeout; \
+})
+
+always_inline void
+vlib_panic_with_error (vlib_main_t * vm, clib_error_t * error)
+{
+ vm->main_loop_error = error;
+ clib_longjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_PANIC);
+}
+
+#define vlib_panic_with_msg(vm,args...) \
+ vlib_panic_with_error (vm, clib_error_return (0, args))
+
+always_inline void
+vlib_panic (vlib_main_t * vm)
+{ vlib_panic_with_error (vm, 0); }
+
+always_inline u32
+vlib_vector_input_stats_index (vlib_main_t * vm, word delta)
+{
+ u32 i;
+ i = vm->main_loop_count >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
+ ASSERT (is_pow2 (ARRAY_LEN (vm->vector_counts_per_main_loop)));
+ return (i + delta) & (ARRAY_LEN (vm->vector_counts_per_main_loop) - 1);
+}
+
+/* Estimate input rate based on previous
+ 2^VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE
+ samples. */
+always_inline u32
+vlib_last_vectors_per_main_loop (vlib_main_t * vm)
+{
+ u32 i = vlib_vector_input_stats_index (vm, -1);
+ u32 n = vm->vector_counts_per_main_loop[i];
+ return n >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
+}
+
+/* Total ave vector count per iteration of main loop. */
+always_inline f64
+vlib_last_vectors_per_main_loop_as_f64 (vlib_main_t * vm)
+{
+ u32 i = vlib_vector_input_stats_index (vm, -1);
+ u32 v = vm->vector_counts_per_main_loop[i];
+ return (f64) v / (f64) (1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE);
+}
+
+/* Total ave vectors/node count per iteration of main loop. */
+always_inline f64
+vlib_last_vector_length_per_node (vlib_main_t * vm)
+{
+ u32 i = vlib_vector_input_stats_index (vm, -1);
+ u32 v = vm->vector_counts_per_main_loop[i];
+ u32 n = vm->node_counts_per_main_loop[i];
+ return n == 0 ? 0 : (f64) v / (f64) n;
+}
+
+u32 wraps;
+
+always_inline void
+vlib_increment_main_loop_counter (vlib_main_t * vm)
+{
+ u32 i, c, n, v, is_wrap;
+
+ c = vm->main_loop_count++;
+
+ is_wrap = (c & pow2_mask (VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE)) == 0;
+
+ if (is_wrap)
+ wraps++;
+
+ i = vlib_vector_input_stats_index (vm, /* delta */ is_wrap);
+
+ v = is_wrap ? 0 : vm->vector_counts_per_main_loop[i];
+ n = is_wrap ? 0 : vm->node_counts_per_main_loop[i];
+
+ v += vm->main_loop_vectors_processed;
+ n += vm->main_loop_nodes_processed;
+ vm->main_loop_vectors_processed = 0;
+ vm->main_loop_nodes_processed = 0;
+ vm->vector_counts_per_main_loop[i] = v;
+ vm->node_counts_per_main_loop[i] = n;
+}
+
+always_inline void vlib_set_queue_signal_callback
+(vlib_main_t *vm, void (*fp)(vlib_main_t *))
+{
+ vm->queue_signal_callback = fp;
+}
+
+/* Main routine. */
+int vlib_main (vlib_main_t * vm, unformat_input_t * input);
+
+/* Thread stacks, for os_get_cpu_number */
+u8 **vlib_thread_stacks;
+
+/* Number of thread stacks that the application needs */
+u32 vlib_app_num_thread_stacks_needed (void) __attribute__ ((weak));
+
+#endif /* included_vlib_main_h */
diff --git a/vlib/vlib/mc.c b/vlib/vlib/mc.c
new file mode 100644
index 00000000000..460145ef0e6
--- /dev/null
+++ b/vlib/vlib/mc.c
@@ -0,0 +1,2354 @@
+/*
+ * mc.c: vlib reliable sequenced multicast distributed applications
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+
+/*
+ * 1 to enable msg id training wheels, which are useful for tracking
+ * down catchup and/or partitioned network problems
+ */
+#define MSG_ID_DEBUG 0
+
+static format_function_t format_mc_stream_state;
+
+static u32 elog_id_for_peer_id (mc_main_t * m, u64 peer_id)
+{
+ uword * p, r;
+ mhash_t * h = &m->elog_id_by_peer_id;
+
+ if (! m->elog_id_by_peer_id.hash)
+ mhash_init (h, sizeof (uword), sizeof (mc_peer_id_t));
+
+ p = mhash_get (h, &peer_id);
+ if (p)
+ return p[0];
+ r = elog_string (m->elog_main, "%U",
+ m->transport.format_peer_id, peer_id);
+ mhash_set (h, &peer_id, r, /* old_value */ 0);
+ return r;
+}
+
+static u32 elog_id_for_msg_name (mc_main_t * m, char *msg_name)
+{
+ uword * p, r;
+ uword * h = m->elog_id_by_msg_name;
+ u8 *name_copy;
+
+ if (! h)
+ h = m->elog_id_by_msg_name
+ = hash_create_string (0, sizeof (uword));
+
+ p = hash_get_mem (h, msg_name);
+ if (p)
+ return p[0];
+ r = elog_string (m->elog_main, "%s", msg_name);
+
+ name_copy = format (0, "%s%c", msg_name, 0);
+
+ hash_set_mem (h, name_copy, r);
+ m->elog_id_by_msg_name = h;
+
+ return r;
+}
+
+static void elog_tx_msg (mc_main_t * m, u32 stream_id, u32 local_sequence, u32 retry_count)
+{
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "tx-msg: stream %d local seq %d attempt %d",
+ .format_args = "i4i4i4",
+ };
+ struct { u32 stream_id, local_sequence, retry_count; } * ed;
+ ed = ELOG_DATA (m->elog_main, e);
+ ed->stream_id = stream_id;
+ ed->local_sequence = local_sequence;
+ ed->retry_count = retry_count;
+ }
+}
+
+/*
+ * seq_cmp
+ * correctly compare two unsigned sequence numbers.
+ * This function works so long as x and y are within 2**(n-1) of each
+ * other, where n = bits(x, y).
+ *
+ * Magic decoder ring:
+ * seq_cmp == 0 => x and y are equal
+ * seq_cmp < 0 => x is "in the past" with respect to y
+ * seq_cmp > 0 => x is "in the future" with respect to y
+ */
+always_inline i32 mc_seq_cmp (u32 x, u32 y)
+{ return (i32) x - (i32) y;}
+
+void * mc_get_vlib_buffer (vlib_main_t * vm, u32 n_bytes, u32 * bi_return)
+{
+ u32 n_alloc, bi;
+ vlib_buffer_t * b;
+
+ n_alloc = vlib_buffer_alloc (vm, &bi, 1);
+ ASSERT (n_alloc == 1);
+
+ b = vlib_get_buffer (vm, bi);
+ b->current_length = n_bytes;
+ *bi_return = bi;
+ return (void *) b->data;
+}
+
+static void
+delete_peer_with_index (mc_main_t * mcm, mc_stream_t * s,
+ uword index,
+ int notify_application)
+{
+ mc_stream_peer_t * p = pool_elt_at_index (s->peers, index);
+ ASSERT (p != 0);
+ if (s->config.peer_died && notify_application)
+ s->config.peer_died (mcm, s, p->id);
+
+ s->all_peer_bitmap = clib_bitmap_andnoti (s->all_peer_bitmap, p - s->peers);
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "delete peer %s from all_peer_bitmap",
+ .format_args = "T4",
+ };
+ struct { u32 peer; } * ed = 0;
+
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64);
+ }
+ /* Do not delete the pool / hash table entries, or we lose sequence number state */
+}
+
+static mc_stream_peer_t *
+get_or_create_peer_with_id (mc_main_t * mcm,
+ mc_stream_t * s, mc_peer_id_t id,
+ int * created)
+{
+ uword * q = mhash_get (&s->peer_index_by_id, &id);
+ mc_stream_peer_t * p;
+
+ if (q)
+ {
+ p = pool_elt_at_index (s->peers, q[0]);
+ goto done;
+ }
+
+ pool_get (s->peers, p);
+ memset (p, 0, sizeof (p[0]));
+ p->id = id;
+ p->last_sequence_received = ~0;
+ mhash_set (&s->peer_index_by_id, &id, p - s->peers, /* old_value */ 0);
+ if (created)
+ *created = 1;
+
+ done:
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "get_or_create %s peer %s stream %d seq %d",
+ .format_args = "t4T4i4i4",
+ .n_enum_strings = 2,
+ .enum_strings = { "old", "new", },
+ };
+ struct { u32 is_new, peer, stream_index, rx_sequence; } * ed = 0;
+
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->is_new = q ? 0 : 1;
+ ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64);
+ ed->stream_index = s->index;
+ ed->rx_sequence = p->last_sequence_received;
+ }
+ /* $$$$ Enable or reenable this peer */
+ s->all_peer_bitmap = clib_bitmap_ori (s->all_peer_bitmap, p - s->peers);
+ return p;
+}
+
+static void maybe_send_window_open_event (vlib_main_t * vm, mc_stream_t * stream)
+{
+ vlib_one_time_waiting_process_t * p;
+
+ if (pool_elts (stream->retry_pool) >= stream->config.window_size)
+ return;
+
+ vec_foreach (p, stream->procs_waiting_for_open_window)
+ vlib_signal_one_time_waiting_process (vm, p);
+
+ if (stream->procs_waiting_for_open_window)
+ _vec_len (stream->procs_waiting_for_open_window) = 0;
+}
+
+static void mc_retry_free (mc_main_t * mcm, mc_stream_t *s, mc_retry_t * r)
+{
+ mc_retry_t record, *retp;
+
+ if (r->unacked_by_peer_bitmap)
+ _vec_len (r->unacked_by_peer_bitmap) = 0;
+
+ if (clib_fifo_elts (s->retired_fifo) >= 2 * s->config.window_size)
+ {
+ clib_fifo_sub1 (s->retired_fifo, record);
+ vlib_buffer_free_one (mcm->vlib_main, record.buffer_index);
+ }
+
+ clib_fifo_add2 (s->retired_fifo, retp);
+
+ retp->buffer_index = r->buffer_index;
+ retp->local_sequence = r->local_sequence;
+
+ r->buffer_index = ~0; /* poison buffer index in this retry */
+}
+
+static void mc_resend_retired (mc_main_t *mcm, mc_stream_t *s, u32 local_sequence)
+{
+ mc_retry_t *retry;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "resend-retired: search for local seq %d",
+ .format_args = "i4",
+ };
+ struct { u32 local_sequence; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->local_sequence = local_sequence;
+ }
+
+ clib_fifo_foreach
+ (retry, s->retired_fifo,
+ ({
+ if (retry->local_sequence == local_sequence)
+ {
+ elog_tx_msg (mcm, s->index, retry->local_sequence, -13);
+
+ mcm->transport.tx_buffer
+ (mcm->transport.opaque,
+ MC_TRANSPORT_USER_REQUEST_TO_RELAY,
+ retry->buffer_index);
+ return;
+ }
+ }));
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "resend-retired: FAILED search for local seq %d",
+ .format_args = "i4",
+ };
+ struct { u32 local_sequence; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->local_sequence = local_sequence;
+ }
+}
+
+static uword *
+delete_retry_fifo_elt (mc_main_t * mcm,
+ mc_stream_t * stream,
+ mc_retry_t * r,
+ uword * dead_peer_bitmap)
+{
+ mc_stream_peer_t * p;
+
+ pool_foreach (p, stream->peers, ({
+ uword pi = p - stream->peers;
+ uword is_alive = 0 == clib_bitmap_get (r->unacked_by_peer_bitmap, pi);
+
+ if (! is_alive)
+ dead_peer_bitmap = clib_bitmap_ori (dead_peer_bitmap, pi);
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "delete_retry_fifo_elt: peer %s is %s",
+ .format_args = "T4t4",
+ .n_enum_strings = 2,
+ .enum_strings = { "alive", "dead", },
+ };
+ struct { u32 peer, is_alive; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64);
+ ed->is_alive = is_alive;
+ }
+ }));
+
+ hash_unset (stream->retry_index_by_local_sequence, r->local_sequence);
+ mc_retry_free (mcm, stream, r);
+
+ return dead_peer_bitmap;
+}
+
+always_inline mc_retry_t *
+prev_retry (mc_stream_t * s, mc_retry_t * r)
+{
+ return (r->prev_index != ~0
+ ? pool_elt_at_index (s->retry_pool, r->prev_index)
+ : 0);
+}
+
+always_inline mc_retry_t *
+next_retry (mc_stream_t * s, mc_retry_t * r)
+{
+ return (r->next_index != ~0
+ ? pool_elt_at_index (s->retry_pool, r->next_index)
+ : 0);
+}
+
+always_inline void
+remove_retry_from_pool (mc_stream_t * s, mc_retry_t * r)
+{
+ mc_retry_t * p = prev_retry (s, r);
+ mc_retry_t * n = next_retry (s, r);
+
+ if (p)
+ p->next_index = r->next_index;
+ else
+ s->retry_head_index = r->next_index;
+ if (n)
+ n->prev_index = r->prev_index;
+ else
+ s->retry_tail_index = r->prev_index;
+
+ pool_put_index (s->retry_pool, r - s->retry_pool);
+}
+
+static void check_retry (mc_main_t * mcm, mc_stream_t * s)
+{
+ mc_retry_t * r;
+ vlib_main_t * vm = mcm->vlib_main;
+ f64 now = vlib_time_now(vm);
+ uword * dead_peer_bitmap = 0;
+ u32 ri, ri_next;
+
+ for (ri = s->retry_head_index; ri != ~0; ri = ri_next)
+ {
+ r = pool_elt_at_index (s->retry_pool, ri);
+ ri_next = r->next_index;
+
+ if (now < r->sent_at + s->config.retry_interval)
+ continue;
+
+ r->n_retries += 1;
+ if (r->n_retries > s->config.retry_limit)
+ {
+ dead_peer_bitmap =
+ delete_retry_fifo_elt (mcm, s, r, dead_peer_bitmap);
+ remove_retry_from_pool (s, r);
+ }
+ else
+ {
+ if (MC_EVENT_LOGGING > 0)
+ {
+ mc_stream_peer_t * p;
+ ELOG_TYPE_DECLARE (t) = {
+ .format = "resend local seq %d attempt %d",
+ .format_args = "i4i4",
+ };
+
+ pool_foreach (p, s->peers, ({
+ if (clib_bitmap_get (r->unacked_by_peer_bitmap, p - s->peers))
+ {
+ ELOG_TYPE_DECLARE (ev) = {
+ .format = "resend: needed by peer %s local seq %d",
+ .format_args = "T4i4",
+ };
+ struct { u32 peer, rx_sequence; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, ev);
+ ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64);
+ ed->rx_sequence = r->local_sequence;
+ }
+ }));
+
+ struct { u32 sequence; u32 trail; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->sequence = r->local_sequence;
+ ed->trail = r->n_retries;
+ }
+
+ r->sent_at = vlib_time_now (vm);
+ s->stats.n_retries += 1;
+
+ elog_tx_msg (mcm, s->index, r->local_sequence, r->n_retries);
+
+ mcm->transport.tx_buffer
+ (mcm->transport.opaque,
+ MC_TRANSPORT_USER_REQUEST_TO_RELAY,
+ r->buffer_index);
+ }
+ }
+
+ maybe_send_window_open_event (mcm->vlib_main, s);
+
+ /* Delete any dead peers we've found. */
+ if (! clib_bitmap_is_zero (dead_peer_bitmap))
+ {
+ uword i;
+
+ clib_bitmap_foreach (i, dead_peer_bitmap, ({
+ delete_peer_with_index (mcm, s, i, /* notify_application */ 1);
+
+ /* Delete any references to just deleted peer in retry pool. */
+ pool_foreach (r, s->retry_pool, ({
+ r->unacked_by_peer_bitmap =
+ clib_bitmap_andnoti (r->unacked_by_peer_bitmap, i);
+ }));
+ }));
+ clib_bitmap_free (dead_peer_bitmap);
+ }
+}
+
+always_inline mc_main_t *
+mc_node_get_main (vlib_node_runtime_t * node)
+{
+ mc_main_t ** p = (void *) node->runtime_data;
+ return p[0];
+}
+
+static uword
+mc_retry_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * f)
+{
+ mc_main_t * mcm = mc_node_get_main (node);
+ mc_stream_t * s;
+
+ while (1)
+ {
+ vlib_process_suspend (vm, 1.0);
+ vec_foreach (s, mcm->stream_vector)
+ {
+ if (s->state != MC_STREAM_STATE_invalid)
+ check_retry (mcm, s);
+ }
+ }
+ return 0; /* not likely */
+}
+
+static void send_join_or_leave_request (mc_main_t * mcm, u32 stream_index, u32 is_join)
+{
+ vlib_main_t * vm = mcm->vlib_main;
+ mc_msg_join_or_leave_request_t * mp;
+ u32 bi;
+
+ mp = mc_get_vlib_buffer (vm, sizeof (mp[0]), &bi);
+ memset(mp, 0, sizeof (*mp));
+ mp->type = MC_MSG_TYPE_join_or_leave_request;
+ mp->peer_id = mcm->transport.our_ack_peer_id;
+ mp->stream_index = stream_index;
+ mp->is_join = is_join;
+
+ mc_byte_swap_msg_join_or_leave_request (mp);
+
+ /*
+ * These msgs are unnumbered, unordered so send on the from-relay
+ * channel.
+ */
+ mcm->transport.tx_buffer (mcm->transport.opaque, MC_TRANSPORT_JOIN, bi);
+}
+
+static uword
+mc_join_ager_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * f)
+{
+ mc_main_t * mcm = mc_node_get_main (node);
+
+ while (1)
+ {
+ if (mcm->joins_in_progress)
+ {
+ mc_stream_t * s;
+ vlib_one_time_waiting_process_t * p;
+ f64 now = vlib_time_now (vm);
+
+ vec_foreach (s, mcm->stream_vector)
+ {
+ if (s->state != MC_STREAM_STATE_join_in_progress)
+ continue;
+
+ if (now > s->join_timeout)
+ {
+ s->state = MC_STREAM_STATE_ready;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "stream %d join timeout",
+ };
+ ELOG (mcm->elog_main, e, s->index);
+ }
+ /* Make sure that this app instance exists as a stream peer,
+ or we may answer a catchup request with a NULL
+ all_peer_bitmap... */
+ (void) get_or_create_peer_with_id
+ (mcm, s, mcm->transport.our_ack_peer_id, /* created */ 0);
+
+ vec_foreach (p, s->procs_waiting_for_join_done)
+ vlib_signal_one_time_waiting_process (vm, p);
+ if (s->procs_waiting_for_join_done)
+ _vec_len (s->procs_waiting_for_join_done) = 0;
+
+ mcm->joins_in_progress--;
+ ASSERT (mcm->joins_in_progress >= 0);
+ }
+ else
+ {
+ /* Resent join request which may have been lost. */
+ send_join_or_leave_request (mcm, s->index,
+ 1 /* is_join */);
+
+ /* We're *not* alone, retry for as long as it takes */
+ if (mcm->relay_state == MC_RELAY_STATE_SLAVE)
+ s->join_timeout = vlib_time_now (vm) + 2.0;
+
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "stream %d resend join request",
+ };
+ ELOG (mcm->elog_main, e, s->index);
+ }
+ }
+ }
+ }
+
+ vlib_process_suspend (vm, .5);
+ }
+
+ return 0; /* not likely */
+}
+
+static void serialize_mc_register_stream_name (serialize_main_t * m, va_list * va)
+{
+ char * name = va_arg (*va, char *);
+ serialize_cstring (m, name);
+}
+
+static void elog_stream_name (char * buf, int n_buf_bytes, char * v)
+{
+ memcpy (buf, v, clib_min (n_buf_bytes - 1, vec_len (v)));
+ buf[n_buf_bytes - 1] = 0;
+}
+
+static void unserialize_mc_register_stream_name (serialize_main_t * m, va_list * va)
+{
+ mc_main_t * mcm = va_arg (*va, mc_main_t *);
+ char * name;
+ mc_stream_t * s;
+ uword * p;
+
+ unserialize_cstring (m, &name);
+
+ if ((p = hash_get_mem (mcm->stream_index_by_name, name)))
+ {
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "stream index %d already named %s",
+ .format_args = "i4s16",
+ };
+ struct { u32 stream_index; char name[16]; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->stream_index = p[0];
+ elog_stream_name (ed->name, sizeof (ed->name), name);
+ }
+
+ vec_free (name);
+ return;
+ }
+
+ vec_add2 (mcm->stream_vector, s, 1);
+ mc_stream_init (s);
+ s->state = MC_STREAM_STATE_name_known;
+ s->index = s - mcm->stream_vector;
+ s->config.name = name;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "stream index %d named %s",
+ .format_args = "i4s16",
+ };
+ struct { u32 stream_index; char name[16]; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->stream_index = s->index;
+ elog_stream_name (ed->name, sizeof (ed->name), name);
+ }
+
+ hash_set_mem (mcm->stream_index_by_name, name, s->index);
+
+ p = hash_get (mcm->procs_waiting_for_stream_name_by_name, name);
+ if (p)
+ {
+ vlib_one_time_waiting_process_t * wp, ** w;
+ w = pool_elt_at_index (mcm->procs_waiting_for_stream_name_pool, p[0]);
+ vec_foreach (wp, w[0])
+ vlib_signal_one_time_waiting_process (mcm->vlib_main, wp);
+ pool_put (mcm->procs_waiting_for_stream_name_pool, w);
+ hash_unset_mem (mcm->procs_waiting_for_stream_name_by_name, name);
+ }
+}
+
+MC_SERIALIZE_MSG (mc_register_stream_name_msg, static) = {
+ .name = "mc_register_stream_name",
+ .serialize = serialize_mc_register_stream_name,
+ .unserialize = unserialize_mc_register_stream_name,
+};
+
+void
+mc_rx_buffer_unserialize (mc_main_t * mcm,
+ mc_stream_t * stream,
+ mc_peer_id_t peer_id,
+ u32 buffer_index)
+{ return mc_unserialize (mcm, stream, buffer_index); }
+
+static u8 *
+mc_internal_catchup_snapshot (mc_main_t * mcm,
+ u8 * data_vector,
+ u32 last_global_sequence_processed)
+{
+ serialize_main_t m;
+
+ /* Append serialized data to data vector. */
+ serialize_open_vector (&m, data_vector);
+ m.stream.current_buffer_index = vec_len (data_vector);
+
+ serialize (&m, serialize_mc_main, mcm);
+ return serialize_close_vector (&m);
+}
+
+static void
+mc_internal_catchup (mc_main_t * mcm,
+ u8 * data,
+ u32 n_data_bytes)
+{
+ serialize_main_t s;
+
+ unserialize_open_data (&s, data, n_data_bytes);
+
+ unserialize (&s, unserialize_mc_main, mcm);
+}
+
+/* Overridden from the application layer, not actually used here */
+void mc_stream_join_process_hold (void) __attribute__ ((weak));
+void mc_stream_join_process_hold (void) { }
+
+static u32
+mc_stream_join_helper (mc_main_t * mcm,
+ mc_stream_config_t * config,
+ u32 is_internal)
+{
+ mc_stream_t * s;
+ vlib_main_t * vm = mcm->vlib_main;
+
+ s = 0;
+ if (! is_internal)
+ {
+ uword * p;
+
+ /* Already have a stream with given name? */
+ if ((s = mc_stream_by_name (mcm, config->name)))
+ {
+ /* Already joined and ready? */
+ if (s->state == MC_STREAM_STATE_ready)
+ return s->index;
+ }
+
+ /* First join MC internal stream. */
+ if (! mcm->stream_vector
+ || (mcm->stream_vector[MC_STREAM_INDEX_INTERNAL].state
+ == MC_STREAM_STATE_invalid))
+ {
+ static mc_stream_config_t c = {
+ .name = "mc-internal",
+ .rx_buffer = mc_rx_buffer_unserialize,
+ .catchup = mc_internal_catchup,
+ .catchup_snapshot = mc_internal_catchup_snapshot,
+ };
+
+ c.save_snapshot = config->save_snapshot;
+
+ mc_stream_join_helper (mcm, &c, /* is_internal */ 1);
+ }
+
+ /* If stream is still unknown register this name and wait for
+ sequenced message to name stream. This way all peers agree
+ on stream name to index mappings. */
+ s = mc_stream_by_name (mcm, config->name);
+ if (! s)
+ {
+ vlib_one_time_waiting_process_t * wp, ** w;
+ u8 * name_copy = format (0, "%s", config->name);
+
+ mc_serialize_stream (mcm,
+ MC_STREAM_INDEX_INTERNAL,
+ &mc_register_stream_name_msg,
+ config->name);
+
+ /* Wait for this stream to be named. */
+ p = hash_get_mem (mcm->procs_waiting_for_stream_name_by_name, name_copy);
+ if (p)
+ w = pool_elt_at_index (mcm->procs_waiting_for_stream_name_pool, p[0]);
+ else
+ {
+ pool_get (mcm->procs_waiting_for_stream_name_pool, w);
+ if (! mcm->procs_waiting_for_stream_name_by_name)
+ mcm->procs_waiting_for_stream_name_by_name
+ = hash_create_string (/* elts */ 0, /* value size */ sizeof (uword));
+ hash_set_mem (mcm->procs_waiting_for_stream_name_by_name,
+ name_copy,
+ w - mcm->procs_waiting_for_stream_name_pool);
+ w[0] = 0;
+ }
+
+ vec_add2 (w[0], wp, 1);
+ vlib_current_process_wait_for_one_time_event (vm, wp);
+ vec_free (name_copy);
+ }
+
+ /* Name should be known now. */
+ s = mc_stream_by_name (mcm, config->name);
+ ASSERT (s != 0);
+ ASSERT (s->state == MC_STREAM_STATE_name_known);
+ }
+
+ if (! s)
+ {
+ vec_add2 (mcm->stream_vector, s, 1);
+ mc_stream_init (s);
+ s->index = s - mcm->stream_vector;
+ }
+
+ {
+ /* Save name since we could have already used it as hash key. */
+ char * name_save = s->config.name;
+
+ s->config = config[0];
+
+ if (name_save)
+ s->config.name = name_save;
+ }
+
+ if (s->config.window_size == 0)
+ s->config.window_size = 8;
+
+ if (s->config.retry_interval == 0.0)
+ s->config.retry_interval = 1.0;
+
+ /* Sanity. */
+ ASSERT (s->config.retry_interval < 30);
+
+ if (s->config.retry_limit == 0)
+ s->config.retry_limit = 7;
+
+ s->state = MC_STREAM_STATE_join_in_progress;
+ if (! s->peer_index_by_id.hash)
+ mhash_init (&s->peer_index_by_id, sizeof (uword), sizeof (mc_peer_id_t));
+
+ /* If we don't hear from someone in 5 seconds, we're alone */
+ s->join_timeout = vlib_time_now (vm) + 5.0;
+ mcm->joins_in_progress++;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "stream index %d join request %s",
+ .format_args = "i4s16",
+ };
+ struct { u32 stream_index; char name[16]; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->stream_index = s->index;
+ elog_stream_name (ed->name, sizeof (ed->name), s->config.name);
+ }
+
+ send_join_or_leave_request (mcm, s->index, 1 /* join */);
+
+ vlib_current_process_wait_for_one_time_event_vector
+ (vm, &s->procs_waiting_for_join_done);
+
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "join complete stream %d");
+ ELOG (mcm->elog_main, e, s->index);
+ }
+
+ return s->index;
+}
+
+u32 mc_stream_join (mc_main_t * mcm, mc_stream_config_t * config)
+{ return mc_stream_join_helper (mcm, config, /* is_internal */ 0); }
+
+void mc_stream_leave (mc_main_t * mcm, u32 stream_index)
+{
+ mc_stream_t * s = mc_stream_by_index (mcm, stream_index);
+
+ if (! s)
+ return;
+
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE_DECLARE (t) = {
+ .format = "leave-stream: %d",
+ .format_args = "i4",
+ };
+ struct { u32 index; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->index = stream_index;
+ }
+
+ send_join_or_leave_request (mcm, stream_index, 0 /* is_join */);
+ mc_stream_free (s);
+ s->state = MC_STREAM_STATE_name_known;
+}
+
+void mc_msg_join_or_leave_request_handler (mc_main_t * mcm,
+ mc_msg_join_or_leave_request_t * req,
+ u32 buffer_index)
+{
+ mc_stream_t * s;
+ mc_msg_join_reply_t * rep;
+ u32 bi;
+
+ mc_byte_swap_msg_join_or_leave_request (req);
+
+ s = mc_stream_by_index (mcm, req->stream_index);
+ if (! s || s->state != MC_STREAM_STATE_ready)
+ return;
+
+ /* If the peer is joining, create it */
+ if (req->is_join)
+ {
+ mc_stream_t * this_s;
+
+ /* We're not in a position to catch up a peer until all
+ stream joins are complete. */
+ if (0)
+ {
+ /* XXX This is hard to test so we've. */
+ vec_foreach (this_s, mcm->stream_vector)
+ {
+ if (this_s->state != MC_STREAM_STATE_ready
+ && this_s->state != MC_STREAM_STATE_name_known)
+ return;
+ }
+ }
+ else
+ if (mcm->joins_in_progress > 0)
+ return;
+
+ (void) get_or_create_peer_with_id (mcm,
+ s,
+ req->peer_id,
+ /* created */ 0);
+
+ rep = mc_get_vlib_buffer (mcm->vlib_main, sizeof (rep[0]), &bi);
+ memset (rep, 0, sizeof (rep[0]));
+ rep->type = MC_MSG_TYPE_join_reply;
+ rep->stream_index = req->stream_index;
+
+ mc_byte_swap_msg_join_reply (rep);
+ /* These two are already in network byte order... */
+ rep->peer_id = mcm->transport.our_ack_peer_id;
+ rep->catchup_peer_id = mcm->transport.our_catchup_peer_id;
+
+ mcm->transport.tx_buffer (mcm->transport.opaque, MC_TRANSPORT_JOIN, bi);
+ }
+ else
+ {
+ if (s->config.peer_died)
+ s->config.peer_died (mcm, s, req->peer_id);
+ }
+}
+
+void mc_msg_join_reply_handler (mc_main_t * mcm,
+ mc_msg_join_reply_t * mp,
+ u32 buffer_index)
+{
+ mc_stream_t * s;
+
+ mc_byte_swap_msg_join_reply (mp);
+
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ if (! s || s->state != MC_STREAM_STATE_join_in_progress)
+ return;
+
+ /* Switch to catchup state; next join reply
+ for this stream will be ignored. */
+ s->state = MC_STREAM_STATE_catchup;
+
+ mcm->joins_in_progress--;
+ mcm->transport.catchup_request_fun (mcm->transport.opaque,
+ mp->stream_index,
+ mp->catchup_peer_id);
+}
+
+void mc_wait_for_stream_ready (mc_main_t * m, char * stream_name)
+{
+ mc_stream_t * s;
+
+ while (1)
+ {
+ s = mc_stream_by_name (m, stream_name);
+ if (s)
+ break;
+ vlib_process_suspend (m->vlib_main, .1);
+ }
+
+ /* It's OK to send a message in catchup and ready states. */
+ if (s->state == MC_STREAM_STATE_catchup
+ || s->state == MC_STREAM_STATE_ready)
+ return;
+
+ /* Otherwise we are waiting for a join to finish. */
+ vlib_current_process_wait_for_one_time_event_vector
+ (m->vlib_main, &s->procs_waiting_for_join_done);
+}
+
+u32 mc_stream_send (mc_main_t * mcm, u32 stream_index, u32 buffer_index)
+{
+ mc_stream_t * s = mc_stream_by_index (mcm, stream_index);
+ vlib_main_t * vm = mcm->vlib_main;
+ mc_retry_t * r;
+ mc_msg_user_request_t * mp;
+ vlib_buffer_t * b = vlib_get_buffer (vm, buffer_index);
+ u32 ri;
+
+ if (! s)
+ return 0;
+
+ if (s->state != MC_STREAM_STATE_ready)
+ vlib_current_process_wait_for_one_time_event_vector
+ (vm, &s->procs_waiting_for_join_done);
+
+ while (pool_elts (s->retry_pool) >= s->config.window_size)
+ {
+ vlib_current_process_wait_for_one_time_event_vector
+ (vm, &s->procs_waiting_for_open_window);
+ }
+
+ pool_get (s->retry_pool, r);
+ ri = r - s->retry_pool;
+
+ r->prev_index = s->retry_tail_index;
+ r->next_index = ~0;
+ s->retry_tail_index = ri;
+
+ if (r->prev_index == ~0)
+ s->retry_head_index = ri;
+ else
+ {
+ mc_retry_t * p = pool_elt_at_index (s->retry_pool, r->prev_index);
+ p->next_index = ri;
+ }
+
+ vlib_buffer_advance (b, -sizeof (mp[0]));
+ mp = vlib_buffer_get_current (b);
+
+ mp->peer_id = mcm->transport.our_ack_peer_id;
+ /* mp->transport.global_sequence set by relay agent. */
+ mp->global_sequence = 0xdeadbeef;
+ mp->stream_index = s->index;
+ mp->local_sequence = s->our_local_sequence++;
+ mp->n_data_bytes = vlib_buffer_index_length_in_chain (vm, buffer_index) - sizeof (mp[0]);
+
+ r->buffer_index = buffer_index;
+ r->local_sequence = mp->local_sequence;
+ r->sent_at = vlib_time_now(vm);
+ r->n_retries = 0;
+
+ /* Retry will be freed when all currently known peers have acked. */
+ vec_validate (r->unacked_by_peer_bitmap, vec_len (s->all_peer_bitmap) - 1);
+ vec_copy (r->unacked_by_peer_bitmap, s->all_peer_bitmap);
+
+ hash_set (s->retry_index_by_local_sequence, r->local_sequence, r - s->retry_pool);
+
+ elog_tx_msg (mcm, s->index, mp->local_sequence, r->n_retries);
+
+ mc_byte_swap_msg_user_request (mp);
+
+ mcm->transport.tx_buffer (mcm->transport.opaque, MC_TRANSPORT_USER_REQUEST_TO_RELAY, buffer_index);
+
+ s->user_requests_sent++;
+
+ /* return amount of window remaining */
+ return s->config.window_size - pool_elts (s->retry_pool);
+}
+
+void mc_msg_user_request_handler (mc_main_t * mcm, mc_msg_user_request_t * mp, u32 buffer_index)
+{
+ vlib_main_t * vm = mcm->vlib_main;
+ mc_stream_t * s;
+ mc_stream_peer_t * peer;
+ i32 seq_cmp_result;
+ static int once=0;
+
+ mc_byte_swap_msg_user_request (mp);
+
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ /* Not signed up for this stream? Turf-o-matic */
+ if (! s || s->state != MC_STREAM_STATE_ready)
+ {
+ vlib_buffer_free_one (vm, buffer_index);
+ return;
+ }
+
+ /* Find peer, including ourselves. */
+ peer = get_or_create_peer_with_id (mcm,
+ s, mp->peer_id,
+ /* created */ 0);
+
+ seq_cmp_result = mc_seq_cmp (mp->local_sequence,
+ peer->last_sequence_received + 1);
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "rx-msg: peer %s stream %d rx seq %d seq_cmp %d",
+ .format_args = "T4i4i4i4",
+ };
+ struct { u32 peer, stream_index, rx_sequence; i32 seq_cmp_result; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, peer->id.as_u64);
+ ed->stream_index = mp->stream_index;
+ ed->rx_sequence = mp->local_sequence;
+ ed->seq_cmp_result = seq_cmp_result;
+ }
+
+ if (0 && mp->stream_index == 1 && once == 0)
+ {
+ once = 1;
+ ELOG_TYPE (e, "FAKE lost msg on stream 1");
+ ELOG (mcm->elog_main,e,0);
+ return;
+ }
+
+ peer->last_sequence_received += seq_cmp_result == 0;
+ s->user_requests_received++;
+
+ if (seq_cmp_result > 0)
+ peer->stats.n_msgs_from_future += 1;
+
+ /* Send ack even if msg from future */
+ if (1)
+ {
+ mc_msg_user_ack_t * rp;
+ u32 bi;
+
+ rp = mc_get_vlib_buffer (vm, sizeof (rp[0]), &bi);
+ rp->peer_id = mcm->transport.our_ack_peer_id;
+ rp->stream_index = s->index;
+ rp->local_sequence = mp->local_sequence;
+ rp->seq_cmp_result = seq_cmp_result;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "tx-ack: stream %d local seq %d",
+ .format_args = "i4i4",
+ };
+ struct { u32 stream_index; u32 local_sequence; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->stream_index = rp->stream_index;
+ ed->local_sequence = rp->local_sequence;
+ }
+
+ mc_byte_swap_msg_user_ack (rp);
+
+ mcm->transport.tx_ack (mcm->transport.opaque, mp->peer_id, bi);
+ /* Msg from past? If so, free the buffer... */
+ if (seq_cmp_result < 0)
+ {
+ vlib_buffer_free_one (vm, buffer_index);
+ peer->stats.n_msgs_from_past += 1;
+ }
+ }
+
+ if (seq_cmp_result == 0)
+ {
+ vlib_buffer_t * b = vlib_get_buffer (vm, buffer_index);
+ switch (s->state)
+ {
+ case MC_STREAM_STATE_ready:
+ vlib_buffer_advance (b, sizeof (mp[0]));
+ s->config.rx_buffer(mcm, s, mp->peer_id, buffer_index);
+
+ /* Stream vector can change address via rx callback for mc-internal
+ stream. */
+ s = mc_stream_by_index (mcm, mp->stream_index);
+ ASSERT (s != 0);
+ s->last_global_sequence_processed = mp->global_sequence;
+ break;
+
+ case MC_STREAM_STATE_catchup:
+ clib_fifo_add1 (s->catchup_fifo, buffer_index);
+ break;
+
+ default:
+ clib_warning ("stream in unknown state %U",
+ format_mc_stream_state, s->state);
+ break;
+ }
+ }
+}
+
+void mc_msg_user_ack_handler (mc_main_t * mcm, mc_msg_user_ack_t * mp, u32 buffer_index)
+{
+ vlib_main_t * vm = mcm->vlib_main;
+ uword *p;
+ mc_stream_t * s;
+ mc_stream_peer_t * peer;
+ mc_retry_t * r;
+ int peer_created = 0;
+
+ mc_byte_swap_msg_user_ack (mp);
+
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (t) = {
+ .format = "rx-ack: local seq %d peer %s seq_cmp_result %d",
+ .format_args = "i4T4i4",
+ };
+ struct { u32 local_sequence; u32 peer; i32 seq_cmp_result;} * ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->local_sequence = mp->local_sequence;
+ ed->peer = elog_id_for_peer_id (mcm, mp->peer_id.as_u64);
+ ed->seq_cmp_result = mp->seq_cmp_result;
+ }
+
+ /* Unknown stream? */
+ if (! s)
+ return;
+
+ /* Find the peer which just ack'ed. */
+ peer = get_or_create_peer_with_id (mcm, s, mp->peer_id,
+ /* created */ &peer_created);
+
+ /*
+ * Peer reports message from the future. If it's not in the retry
+ * fifo, look for a retired message.
+ */
+ if (mp->seq_cmp_result > 0)
+ {
+ p = hash_get (s->retry_index_by_local_sequence, mp->local_sequence -
+ mp->seq_cmp_result);
+ if (p == 0)
+ mc_resend_retired (mcm, s, mp->local_sequence - mp->seq_cmp_result);
+
+ /* Normal retry should fix it... */
+ return;
+ }
+
+ /*
+ * Pointer to the indicated retry fifo entry.
+ * Worth hashing because we could use a window size of 100 or 1000.
+ */
+ p = hash_get (s->retry_index_by_local_sequence, mp->local_sequence);
+
+ /*
+ * Is this a duplicate ACK, received after we've retired the
+ * fifo entry. This can happen when learning about new
+ * peers.
+ */
+ if (p == 0)
+ {
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "ack: for seq %d from peer %s no fifo elt",
+ .format_args = "i4T4",
+ };
+ struct { u32 seq; u32 peer; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->seq = mp->local_sequence;
+ ed->peer = elog_id_for_peer_id (mcm, mp->peer_id.as_u64);
+ }
+
+ return;
+ }
+
+ r = pool_elt_at_index (s->retry_pool, p[0]);
+
+ /* Make sure that this new peer ACKs our msgs from now on */
+ if (peer_created)
+ {
+ mc_retry_t *later_retry = next_retry (s, r);
+
+ while (later_retry)
+ {
+ later_retry->unacked_by_peer_bitmap =
+ clib_bitmap_ori (later_retry->unacked_by_peer_bitmap,
+ peer - s->peers);
+ later_retry = next_retry (s, later_retry);
+ }
+ }
+
+ ASSERT (mp->local_sequence == r->local_sequence);
+
+ /* If we weren't expecting to hear from this peer */
+ if (!peer_created &&
+ ! clib_bitmap_get (r->unacked_by_peer_bitmap, peer - s->peers))
+ {
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "dup-ack: for seq %d from peer %s",
+ .format_args = "i4T4",
+ };
+ struct { u32 seq; u32 peer; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->seq = r->local_sequence;
+ ed->peer = elog_id_for_peer_id (mcm, peer->id.as_u64);
+ }
+ if (! clib_bitmap_is_zero (r->unacked_by_peer_bitmap))
+ return;
+ }
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "ack: for seq %d from peer %s",
+ .format_args = "i4T4",
+ };
+ struct { u32 seq; u32 peer; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->seq = mp->local_sequence;
+ ed->peer = elog_id_for_peer_id (mcm, peer->id.as_u64);
+ }
+
+ r->unacked_by_peer_bitmap =
+ clib_bitmap_andnoti (r->unacked_by_peer_bitmap, peer - s->peers);
+
+ /* Not all clients have ack'ed */
+ if (! clib_bitmap_is_zero (r->unacked_by_peer_bitmap))
+ {
+ return;
+ }
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "ack: retire fifo elt loc seq %d after %d acks",
+ .format_args = "i4i4",
+ };
+ struct { u32 seq; u32 npeers; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->seq = r->local_sequence;
+ ed->npeers = pool_elts (s->peers);
+ }
+
+ hash_unset (s->retry_index_by_local_sequence, mp->local_sequence);
+ mc_retry_free (mcm, s, r);
+ remove_retry_from_pool (s, r);
+ maybe_send_window_open_event (vm, s);
+}
+
+#define EVENT_MC_SEND_CATCHUP_DATA 0
+
+static uword
+mc_catchup_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * f)
+{
+ mc_main_t * mcm = mc_node_get_main (node);
+ uword *event_data = 0;
+ mc_catchup_process_arg_t * args;
+ int i;
+
+ while (1)
+ {
+ if (event_data)
+ _vec_len(event_data) = 0;
+ vlib_process_wait_for_event_with_type (vm, &event_data, EVENT_MC_SEND_CATCHUP_DATA);
+
+ for (i = 0; i < vec_len(event_data); i++)
+ {
+ args = pool_elt_at_index (mcm->catchup_process_args,
+ event_data[i]);
+
+ mcm->transport.catchup_send_fun (mcm->transport.opaque,
+ args->catchup_opaque,
+ args->catchup_snapshot);
+
+ /* Send function will free snapshot data vector. */
+ pool_put (mcm->catchup_process_args, args);
+ }
+ }
+
+ return 0; /* not likely */
+}
+
+static void serialize_mc_stream (serialize_main_t * m, va_list * va)
+{
+ mc_stream_t * s = va_arg (*va, mc_stream_t *);
+ mc_stream_peer_t * p;
+
+ serialize_integer (m, pool_elts (s->peers), sizeof (u32));
+ pool_foreach (p, s->peers, ({
+ u8 * x = serialize_get (m, sizeof (p->id));
+ memcpy (x, p->id.as_u8, sizeof (p->id));
+ serialize_integer (m, p->last_sequence_received,
+ sizeof (p->last_sequence_received));
+ }));
+ serialize_bitmap (m, s->all_peer_bitmap);
+}
+
+void unserialize_mc_stream (serialize_main_t * m, va_list * va)
+{
+ mc_stream_t * s = va_arg (*va, mc_stream_t *);
+ u32 i, n_peers;
+ mc_stream_peer_t * p;
+
+ unserialize_integer (m, &n_peers, sizeof (u32));
+ mhash_init (&s->peer_index_by_id, sizeof (uword), sizeof (mc_peer_id_t));
+ for (i = 0; i < n_peers; i++)
+ {
+ u8 * x;
+ pool_get (s->peers, p);
+ x = unserialize_get (m, sizeof (p->id));
+ memcpy (p->id.as_u8, x, sizeof (p->id));
+ unserialize_integer (m, &p->last_sequence_received, sizeof (p->last_sequence_received));
+ mhash_set (&s->peer_index_by_id, &p->id, p - s->peers, /* old_value */ 0);
+ }
+ s->all_peer_bitmap = unserialize_bitmap (m);
+
+ /* This is really bad. */
+ if (!s->all_peer_bitmap)
+ clib_warning ("BUG: stream %s all_peer_bitmap NULL", s->config.name);
+}
+
+void mc_msg_catchup_request_handler (mc_main_t * mcm, mc_msg_catchup_request_t * req, u32 catchup_opaque)
+{
+ vlib_main_t * vm = mcm->vlib_main;
+ mc_stream_t * s;
+ mc_catchup_process_arg_t * args;
+
+ mc_byte_swap_msg_catchup_request (req);
+
+ s = mc_stream_by_index (mcm, req->stream_index);
+ if (! s || s->state != MC_STREAM_STATE_ready)
+ return;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (t) =
+ {
+ .format = "catchup-request: from %s stream %d",
+ .format_args = "T4i4",
+ };
+ struct { u32 peer, stream; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->peer = elog_id_for_peer_id (mcm, req->peer_id.as_u64);
+ ed->stream = req->stream_index;
+ }
+
+ /*
+ * The application has to snapshoot its data structures right
+ * here, right now. If we process any messages after
+ * noting the last global sequence we've processed, the client
+ * won't be able to accurately reconstruct our data structures.
+ *
+ * Once the data structures are e.g. vec_dup()'ed, we
+ * send the resulting messages from a separate process, to
+ * make sure that we don't cause a bunch of message retransmissions
+ */
+ pool_get (mcm->catchup_process_args, args);
+
+ args->stream_index = s - mcm->stream_vector;
+ args->catchup_opaque = catchup_opaque;
+ args->catchup_snapshot = 0;
+
+ /* Construct catchup reply and snapshot state for stream to send as
+ catchup reply payload. */
+ {
+ mc_msg_catchup_reply_t * rep;
+ serialize_main_t m;
+
+ vec_resize (args->catchup_snapshot, sizeof (rep[0]));
+
+ rep = (void *) args->catchup_snapshot;
+
+ rep->peer_id = req->peer_id;
+ rep->stream_index = req->stream_index;
+ rep->last_global_sequence_included = s->last_global_sequence_processed;
+
+ /* Setup for serialize to append to catchup snapshot. */
+ serialize_open_vector (&m, args->catchup_snapshot);
+ m.stream.current_buffer_index = vec_len (m.stream.buffer);
+
+ serialize (&m, serialize_mc_stream, s);
+
+ args->catchup_snapshot = serialize_close_vector (&m);
+
+ /* Actually copy internal state */
+ args->catchup_snapshot = s->config.catchup_snapshot
+ (mcm,
+ args->catchup_snapshot,
+ rep->last_global_sequence_included);
+
+ rep = (void *) args->catchup_snapshot;
+ rep->n_data_bytes = vec_len (args->catchup_snapshot) - sizeof (rep[0]);
+
+ mc_byte_swap_msg_catchup_reply (rep);
+ }
+
+ /* now go send it... */
+ vlib_process_signal_event (vm, mcm->catchup_process,
+ EVENT_MC_SEND_CATCHUP_DATA,
+ args - mcm->catchup_process_args);
+}
+
+#define EVENT_MC_UNSERIALIZE_BUFFER 0
+#define EVENT_MC_UNSERIALIZE_CATCHUP 1
+
+void mc_msg_catchup_reply_handler (mc_main_t * mcm, mc_msg_catchup_reply_t * mp, u32 catchup_opaque)
+{
+ vlib_process_signal_event (mcm->vlib_main,
+ mcm->unserialize_process,
+ EVENT_MC_UNSERIALIZE_CATCHUP,
+ pointer_to_uword (mp));
+}
+
+static void perform_catchup (mc_main_t * mcm, mc_msg_catchup_reply_t * mp)
+{
+ mc_stream_t * s;
+ i32 seq_cmp_result;
+
+ mc_byte_swap_msg_catchup_reply (mp);
+
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ /* Never heard of this stream or already caught up. */
+ if (! s || s->state == MC_STREAM_STATE_ready)
+ return;
+
+ {
+ serialize_main_t m;
+ mc_stream_peer_t * p;
+ u32 n_stream_bytes;
+
+ /* For offline sim replay: save the entire catchup snapshot... */
+ if (s->config.save_snapshot)
+ s->config.save_snapshot (mcm, /* is_catchup */ 1, mp->data, mp->n_data_bytes);
+
+ unserialize_open_data (&m, mp->data, mp->n_data_bytes);
+ unserialize (&m, unserialize_mc_stream, s);
+
+ /* Make sure we start numbering our messages as expected */
+ pool_foreach (p, s->peers, ({
+ if (p->id.as_u64 == mcm->transport.our_ack_peer_id.as_u64)
+ s->our_local_sequence = p->last_sequence_received + 1;
+ }));
+
+ n_stream_bytes = m.stream.current_buffer_index;
+
+ /* No need to unserialize close; nothing to free. */
+
+ /* After serialized stream is user's catchup data. */
+ s->config.catchup (mcm, mp->data + n_stream_bytes,
+ mp->n_data_bytes - n_stream_bytes);
+ }
+
+ /* Vector could have been moved by catchup.
+ This can only happen for mc-internal stream. */
+ s = mc_stream_by_index (mcm, mp->stream_index);
+
+ s->last_global_sequence_processed = mp->last_global_sequence_included;
+
+ while (clib_fifo_elts (s->catchup_fifo))
+ {
+ mc_msg_user_request_t * gp;
+ u32 bi;
+ vlib_buffer_t * b;
+
+ clib_fifo_sub1(s->catchup_fifo, bi);
+
+ b = vlib_get_buffer (mcm->vlib_main, bi);
+ gp = vlib_buffer_get_current (b);
+
+ /* Make sure we're replaying "new" news */
+ seq_cmp_result = mc_seq_cmp (gp->global_sequence,
+ mp->last_global_sequence_included);
+
+ if (seq_cmp_result > 0)
+ {
+ vlib_buffer_advance (b, sizeof (gp[0]));
+ s->config.rx_buffer (mcm, s, gp->peer_id, bi);
+ s->last_global_sequence_processed = gp->global_sequence;
+
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE_DECLARE (t) = {
+ .format = "catchup replay local sequence 0x%x",
+ .format_args = "i4",
+ };
+ struct { u32 local_sequence; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->local_sequence = gp->local_sequence;
+ }
+ }
+ else
+ {
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE_DECLARE (t) = {
+ .format = "catchup discard local sequence 0x%x",
+ .format_args = "i4",
+ };
+ struct { u32 local_sequence; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, t);
+ ed->local_sequence = gp->local_sequence;
+ }
+
+ vlib_buffer_free_one (mcm->vlib_main, bi);
+ }
+ }
+
+ s->state = MC_STREAM_STATE_ready;
+
+ /* Now that we are caught up wake up joining process. */
+ {
+ vlib_one_time_waiting_process_t * wp;
+ vec_foreach (wp, s->procs_waiting_for_join_done)
+ vlib_signal_one_time_waiting_process (mcm->vlib_main, wp);
+ if (s->procs_waiting_for_join_done)
+ _vec_len (s->procs_waiting_for_join_done) = 0;
+ }
+}
+
+static void this_node_maybe_master (mc_main_t * mcm)
+{
+ vlib_main_t * vm = mcm->vlib_main;
+ mc_msg_master_assert_t * mp;
+ uword event_type;
+ int timeouts = 0;
+ int is_master = mcm->relay_state == MC_RELAY_STATE_MASTER;
+ clib_error_t * error;
+ f64 now, time_last_master_assert = -1;
+ u32 bi;
+
+ while (1)
+ {
+ if (! mcm->we_can_be_relay_master)
+ {
+ mcm->relay_state = MC_RELAY_STATE_SLAVE;
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "become slave (config)");
+ ELOG (mcm->elog_main, e, 0);
+ }
+ return;
+ }
+
+ now = vlib_time_now (vm);
+ if (now >= time_last_master_assert + 1)
+ {
+ time_last_master_assert = now;
+ mp = mc_get_vlib_buffer (mcm->vlib_main, sizeof (mp[0]), &bi);
+
+ mp->peer_id = mcm->transport.our_ack_peer_id;
+ mp->global_sequence = mcm->relay_global_sequence;
+
+ /*
+ * these messages clog the event log, set MC_EVENT_LOGGING higher
+ * if you want them
+ */
+ if (MC_EVENT_LOGGING > 1)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "tx-massert: peer %s global seq %u",
+ .format_args = "T4i4",
+ };
+ struct { u32 peer, global_sequence; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, mp->peer_id.as_u64);
+ ed->global_sequence = mp->global_sequence;
+ }
+
+ mc_byte_swap_msg_master_assert (mp);
+
+ error = mcm->transport.tx_buffer (mcm->transport.opaque, MC_TRANSPORT_MASTERSHIP, bi);
+ if (error)
+ clib_error_report (error);
+ }
+
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ event_type = vlib_process_get_events (vm, /* no event data */ 0);
+
+ switch (event_type)
+ {
+ case ~0:
+ if (! is_master && timeouts++ > 2)
+ {
+ mcm->relay_state = MC_RELAY_STATE_MASTER;
+ mcm->relay_master_peer_id = mcm->transport.our_ack_peer_id.as_u64;
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "become master (was maybe_master)");
+ ELOG (mcm->elog_main, e, 0);
+ }
+ return;
+ }
+ break;
+
+ case MC_RELAY_STATE_SLAVE:
+ mcm->relay_state = MC_RELAY_STATE_SLAVE;
+ if (MC_EVENT_LOGGING && mcm->relay_state != MC_RELAY_STATE_SLAVE)
+ {
+ ELOG_TYPE (e, "become slave (was maybe_master)");
+ ELOG (mcm->elog_main, e, 0);
+ }
+ return;
+ }
+ }
+}
+
+static void this_node_slave (mc_main_t * mcm)
+{
+ vlib_main_t * vm = mcm->vlib_main;
+ uword event_type;
+ int timeouts = 0;
+
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "become slave");
+ ELOG (mcm->elog_main, e, 0);
+ }
+
+ while (1)
+ {
+ vlib_process_wait_for_event_or_clock (vm, 1.0);
+ event_type = vlib_process_get_events (vm, /* no event data */ 0);
+
+ switch (event_type)
+ {
+ case ~0:
+ if (timeouts++ > 2)
+ {
+ mcm->relay_state = MC_RELAY_STATE_NEGOTIATE;
+ mcm->relay_master_peer_id = ~0ULL;
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE (e, "timeouts; negoitate mastership");
+ ELOG (mcm->elog_main, e, 0);
+ }
+ return;
+ }
+ break;
+
+ case MC_RELAY_STATE_SLAVE:
+ mcm->relay_state = MC_RELAY_STATE_SLAVE;
+ timeouts = 0;
+ break;
+ }
+ }
+}
+
+static uword
+mc_mastership_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * f)
+{
+ mc_main_t * mcm = mc_node_get_main (node);
+
+ while (1)
+ {
+ switch (mcm->relay_state)
+ {
+ case MC_RELAY_STATE_NEGOTIATE:
+ case MC_RELAY_STATE_MASTER:
+ this_node_maybe_master(mcm);
+ break;
+
+ case MC_RELAY_STATE_SLAVE:
+ this_node_slave (mcm);
+ break;
+ }
+ }
+ return 0; /* not likely */
+}
+
+void mc_enable_disable_mastership (mc_main_t * mcm, int we_can_be_master)
+{
+ if (we_can_be_master != mcm->we_can_be_relay_master)
+ {
+ mcm->we_can_be_relay_master = we_can_be_master;
+ vlib_process_signal_event (mcm->vlib_main,
+ mcm->mastership_process,
+ MC_RELAY_STATE_NEGOTIATE, 0);
+ }
+}
+
+void mc_msg_master_assert_handler (mc_main_t * mcm, mc_msg_master_assert_t * mp, u32 buffer_index)
+{
+ mc_peer_id_t his_peer_id, our_peer_id;
+ i32 seq_cmp_result;
+ u8 signal_slave = 0;
+ u8 update_global_sequence = 0;
+
+ mc_byte_swap_msg_master_assert (mp);
+
+ his_peer_id = mp->peer_id;
+ our_peer_id = mcm->transport.our_ack_peer_id;
+
+ /* compare the incoming global sequence with ours */
+ seq_cmp_result = mc_seq_cmp (mp->global_sequence,
+ mcm->relay_global_sequence);
+
+ /* If the sender has a lower peer id and the sender's sequence >=
+ our global sequence, we become a slave. Otherwise we are master. */
+ if (mc_peer_id_compare (his_peer_id, our_peer_id) < 0 && seq_cmp_result >= 0)
+ {
+ vlib_process_signal_event (mcm->vlib_main,
+ mcm->mastership_process,
+ MC_RELAY_STATE_SLAVE, 0);
+ signal_slave = 1;
+ }
+
+ /* Update our global sequence. */
+ if (seq_cmp_result > 0)
+ {
+ mcm->relay_global_sequence = mp->global_sequence;
+ update_global_sequence = 1;
+ }
+
+ {
+ uword * q = mhash_get (&mcm->mastership_peer_index_by_id, &his_peer_id);
+ mc_mastership_peer_t * p;
+
+ if (q)
+ p = vec_elt_at_index (mcm->mastership_peers, q[0]);
+ else
+ {
+ vec_add2 (mcm->mastership_peers, p, 1);
+ p->peer_id = his_peer_id;
+ mhash_set (&mcm->mastership_peer_index_by_id, &p->peer_id, p - mcm->mastership_peers,
+ /* old_value */ 0);
+ }
+ p->time_last_master_assert_received = vlib_time_now (mcm->vlib_main);
+ }
+
+ /*
+ * these messages clog the event log, set MC_EVENT_LOGGING higher
+ * if you want them.
+ */
+ if (MC_EVENT_LOGGING > 1)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "rx-massert: peer %s global seq %u upd %d slave %d",
+ .format_args = "T4i4i1i1",
+ };
+ struct {
+ u32 peer;
+ u32 global_sequence;
+ u8 update_sequence;
+ u8 slave;
+ } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->peer = elog_id_for_peer_id (mcm, his_peer_id.as_u64);
+ ed->global_sequence = mp->global_sequence;
+ ed->update_sequence = update_global_sequence;
+ ed->slave = signal_slave;
+ }
+}
+
+static void
+mc_serialize_init (mc_main_t * mcm)
+{
+ mc_serialize_msg_t * m;
+ vlib_main_t * vm = vlib_get_main();
+
+ mcm->global_msg_index_by_name
+ = hash_create_string (/* elts */ 0, sizeof (uword));
+
+ m = vm->mc_msg_registrations;
+
+ while (m)
+ {
+ m->global_index = vec_len (mcm->global_msgs);
+ hash_set_mem (mcm->global_msg_index_by_name,
+ m->name,
+ m->global_index);
+ vec_add1 (mcm->global_msgs, m);
+ m = m->next_registration;
+ }
+}
+
+clib_error_t *
+mc_serialize_va (mc_main_t * mc,
+ u32 stream_index,
+ u32 multiple_messages_per_vlib_buffer,
+ mc_serialize_msg_t * msg,
+ va_list * va)
+{
+ mc_stream_t * s;
+ clib_error_t * error;
+ serialize_main_t * m = &mc->serialize_mains[VLIB_TX];
+ vlib_serialize_buffer_main_t * sbm = &mc->serialize_buffer_mains[VLIB_TX];
+ u32 bi, n_before, n_after, n_total, n_this_msg;
+ u32 si, gi;
+
+ if (! sbm->vlib_main)
+ {
+ sbm->tx.max_n_data_bytes_per_chain = 4096;
+ sbm->tx.free_list_index = VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX;
+ }
+
+ if (sbm->first_buffer == 0)
+ serialize_open_vlib_buffer (m, mc->vlib_main, sbm);
+
+ n_before = serialize_vlib_buffer_n_bytes (m);
+
+ s = mc_stream_by_index (mc, stream_index);
+ gi = msg->global_index;
+ ASSERT (msg == vec_elt (mc->global_msgs, gi));
+
+ si = ~0;
+ if (gi < vec_len (s->stream_msg_index_by_global_index))
+ si = s->stream_msg_index_by_global_index[gi];
+
+ serialize_likely_small_unsigned_integer (m, si);
+
+ /* For first time message is sent, use name to identify message. */
+ if (si == ~0 || MSG_ID_DEBUG)
+ serialize_cstring (m, msg->name);
+
+ if (MSG_ID_DEBUG && MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "serialize-msg: %s index %d",
+ .format_args = "T4i4",
+ };
+ struct { u32 c[2]; } * ed;
+ ed = ELOG_DATA (mc->elog_main, e);
+ ed->c[0] = elog_id_for_msg_name (mc, msg->name);
+ ed->c[1] = si;
+ }
+
+ error = va_serialize (m, va);
+
+ n_after = serialize_vlib_buffer_n_bytes (m);
+ n_this_msg = n_after - n_before;
+ n_total = n_after + sizeof (mc_msg_user_request_t);
+
+ /* For max message size ignore first message where string name is sent. */
+ if (si != ~0)
+ msg->max_n_bytes_serialized = clib_max (msg->max_n_bytes_serialized, n_this_msg);
+
+ if (! multiple_messages_per_vlib_buffer
+ || si == ~0
+ || n_total + msg->max_n_bytes_serialized > mc->transport.max_packet_size)
+ {
+ bi = serialize_close_vlib_buffer (m);
+ sbm->first_buffer = 0;
+ if (! error)
+ mc_stream_send (mc, stream_index, bi);
+ else if (bi != ~0)
+ vlib_buffer_free_one (mc->vlib_main, bi);
+ }
+
+ return error;
+}
+
+clib_error_t *
+mc_serialize_internal (mc_main_t * mc,
+ u32 stream_index,
+ u32 multiple_messages_per_vlib_buffer,
+ mc_serialize_msg_t * msg,
+ ...)
+{
+ vlib_main_t * vm = mc->vlib_main;
+ va_list va;
+ clib_error_t * error;
+
+ if (stream_index == ~0)
+ {
+ if (vm->mc_main && vm->mc_stream_index == ~0)
+ vlib_current_process_wait_for_one_time_event_vector
+ (vm, &vm->procs_waiting_for_mc_stream_join);
+ stream_index = vm->mc_stream_index;
+ }
+
+ va_start (va, msg);
+ error = mc_serialize_va (mc, stream_index,
+ multiple_messages_per_vlib_buffer,
+ msg, &va);
+ va_end (va);
+ return error;
+}
+
+uword mc_unserialize_message (mc_main_t * mcm,
+ mc_stream_t * s,
+ serialize_main_t * m)
+{
+ mc_serialize_stream_msg_t * sm;
+ u32 gi, si;
+
+ si = unserialize_likely_small_unsigned_integer (m);
+
+ if (! (si == ~0 || MSG_ID_DEBUG))
+ {
+ sm = vec_elt_at_index (s->stream_msgs, si);
+ gi = sm->global_index;
+ }
+ else
+ {
+ char * name;
+
+ unserialize_cstring (m, &name);
+
+ if (MSG_ID_DEBUG && MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "unserialize-msg: %s rx index %d",
+ .format_args = "T4i4",
+ };
+ struct { u32 c[2]; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->c[0] = elog_id_for_msg_name (mcm, name);
+ ed->c[1] = si;
+ }
+
+ {
+ uword * p = hash_get_mem (mcm->global_msg_index_by_name, name);
+ gi = p ? p[0] : ~0;
+ }
+
+ /* Unknown message? */
+ if (gi == ~0)
+ {
+ vec_free (name);
+ goto done;
+ }
+
+ vec_validate_init_empty (s->stream_msg_index_by_global_index, gi, ~0);
+ si = s->stream_msg_index_by_global_index[gi];
+
+ /* Stream local index unknown? Create it. */
+ if (si == ~0)
+ {
+ vec_add2 (s->stream_msgs, sm, 1);
+
+ si = sm - s->stream_msgs;
+ sm->global_index = gi;
+ s->stream_msg_index_by_global_index[gi] = si;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "msg-bind: stream %d %s to index %d",
+ .format_args = "i4T4i4",
+ };
+ struct { u32 c[3]; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->c[0] = s->index;
+ ed->c[1] = elog_id_for_msg_name (mcm, name);
+ ed->c[2] = si;
+ }
+ }
+ else
+ {
+ sm = vec_elt_at_index (s->stream_msgs, si);
+ if (gi != sm->global_index && MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "msg-id-ERROR: %s index %d expected %d",
+ .format_args = "T4i4i4",
+ };
+ struct { u32 c[3]; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->c[0] = elog_id_for_msg_name (mcm, name);
+ ed->c[1] = si;
+ ed->c[2] = ~0;
+ if (sm->global_index < vec_len (s->stream_msg_index_by_global_index))
+ ed->c[2] = s->stream_msg_index_by_global_index[sm->global_index];
+ }
+ }
+
+ vec_free (name);
+ }
+
+ if (gi != ~0)
+ {
+ mc_serialize_msg_t * msg;
+ msg = vec_elt (mcm->global_msgs, gi);
+ unserialize (m, msg->unserialize, mcm);
+ }
+
+ done:
+ return gi != ~0;
+}
+
+void
+mc_unserialize_internal (mc_main_t * mcm, u32 stream_and_buffer_index)
+{
+ vlib_main_t * vm = mcm->vlib_main;
+ serialize_main_t * m = &mcm->serialize_mains[VLIB_RX];
+ vlib_serialize_buffer_main_t * sbm = &mcm->serialize_buffer_mains[VLIB_RX];
+ mc_stream_and_buffer_t * sb;
+ mc_stream_t * stream;
+ u32 buffer_index;
+
+ sb = pool_elt_at_index (mcm->mc_unserialize_stream_and_buffers, stream_and_buffer_index);
+ buffer_index = sb->buffer_index;
+ stream = vec_elt_at_index (mcm->stream_vector, sb->stream_index);
+ pool_put (mcm->mc_unserialize_stream_and_buffers, sb);
+
+ if (stream->config.save_snapshot)
+ {
+ u32 n_bytes = vlib_buffer_index_length_in_chain (vm, buffer_index);
+ static u8 * contents;
+ vec_reset_length (contents);
+ vec_validate (contents, n_bytes - 1);
+ vlib_buffer_contents (vm, buffer_index, contents);
+ stream->config.save_snapshot (mcm, /* is_catchup */ 0, contents, n_bytes);
+ }
+
+ ASSERT (vlib_in_process_context (vm));
+
+ unserialize_open_vlib_buffer (m, vm, sbm);
+
+ clib_fifo_add1 (sbm->rx.buffer_fifo, buffer_index);
+
+ while (unserialize_vlib_buffer_n_bytes (m) > 0)
+ mc_unserialize_message (mcm, stream, m);
+
+ /* Frees buffer. */
+ unserialize_close_vlib_buffer (m);
+}
+
+void
+mc_unserialize (mc_main_t * mcm, mc_stream_t * s, u32 buffer_index)
+{
+ vlib_main_t * vm = mcm->vlib_main;
+ mc_stream_and_buffer_t * sb;
+ pool_get (mcm->mc_unserialize_stream_and_buffers, sb);
+ sb->stream_index = s->index;
+ sb->buffer_index = buffer_index;
+ vlib_process_signal_event (vm, mcm->unserialize_process,
+ EVENT_MC_UNSERIALIZE_BUFFER, sb - mcm->mc_unserialize_stream_and_buffers);
+}
+
+static uword
+mc_unserialize_process (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * f)
+{
+ mc_main_t * mcm = mc_node_get_main (node);
+ uword event_type, * event_data = 0;
+ int i;
+
+ while (1)
+ {
+ if (event_data)
+ _vec_len(event_data) = 0;
+
+ vlib_process_wait_for_event (vm);
+ event_type = vlib_process_get_events (vm, &event_data);
+ switch (event_type)
+ {
+ case EVENT_MC_UNSERIALIZE_BUFFER:
+ for (i = 0; i < vec_len (event_data); i++)
+ mc_unserialize_internal (mcm, event_data[i]);
+ break;
+
+ case EVENT_MC_UNSERIALIZE_CATCHUP:
+ for (i = 0; i < vec_len (event_data); i++)
+ {
+ u8 * mp = uword_to_pointer (event_data[i], u8 *);
+ perform_catchup (mcm, (void *) mp);
+ vec_free (mp);
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return 0; /* not likely */
+}
+
+void serialize_mc_main (serialize_main_t * m, va_list * va)
+{
+ mc_main_t * mcm = va_arg (*va, mc_main_t *);
+ mc_stream_t * s;
+ mc_serialize_stream_msg_t * sm;
+ mc_serialize_msg_t * msg;
+
+ serialize_integer (m, vec_len (mcm->stream_vector), sizeof (u32));
+ vec_foreach (s, mcm->stream_vector)
+ {
+ /* Stream name. */
+ serialize_cstring (m, s->config.name);
+
+ /* Serialize global names for all sent messages. */
+ serialize_integer (m, vec_len (s->stream_msgs), sizeof (u32));
+ vec_foreach (sm, s->stream_msgs)
+ {
+ msg = vec_elt (mcm->global_msgs, sm->global_index);
+ serialize_cstring (m, msg->name);
+ }
+ }
+}
+
+void unserialize_mc_main (serialize_main_t * m, va_list * va)
+{
+ mc_main_t * mcm = va_arg (*va, mc_main_t *);
+ u32 i, n_streams, n_stream_msgs;
+ char * name;
+ mc_stream_t * s;
+ mc_serialize_stream_msg_t * sm;
+
+ unserialize_integer (m, &n_streams, sizeof (u32));
+ for (i = 0; i < n_streams; i++)
+ {
+ unserialize_cstring (m, &name);
+ if (i != MC_STREAM_INDEX_INTERNAL
+ && ! mc_stream_by_name (mcm, name))
+ {
+ vec_validate (mcm->stream_vector, i);
+ s = vec_elt_at_index (mcm->stream_vector, i);
+ mc_stream_init (s);
+ s->index = s - mcm->stream_vector;
+ s->config.name = name;
+ s->state = MC_STREAM_STATE_name_known;
+ hash_set_mem (mcm->stream_index_by_name, s->config.name, s->index);
+ }
+ else
+ vec_free (name);
+
+ s = vec_elt_at_index (mcm->stream_vector, i);
+
+ vec_free (s->stream_msgs);
+ vec_free (s->stream_msg_index_by_global_index);
+
+ unserialize_integer (m, &n_stream_msgs, sizeof (u32));
+ vec_resize (s->stream_msgs, n_stream_msgs);
+ vec_foreach (sm, s->stream_msgs)
+ {
+ uword * p;
+ u32 si, gi;
+
+ unserialize_cstring (m, &name);
+ p = hash_get (mcm->global_msg_index_by_name, name);
+ gi = p ? p[0] : ~0;
+ si = sm - s->stream_msgs;
+
+ if (MC_EVENT_LOGGING > 0)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "catchup-bind: %s to %d global index %d stream %d",
+ .format_args = "T4i4i4i4",
+ };
+ struct { u32 c[4]; } * ed;
+ ed = ELOG_DATA (mcm->elog_main, e);
+ ed->c[0] = elog_id_for_msg_name (mcm, name);
+ ed->c[1] = si;
+ ed->c[2] = gi;
+ ed->c[3] = s->index;
+ }
+
+ vec_free (name);
+
+ sm->global_index = gi;
+ if (gi != ~0)
+ {
+ vec_validate_init_empty (s->stream_msg_index_by_global_index,
+ gi, ~0);
+ s->stream_msg_index_by_global_index[gi] = si;
+ }
+ }
+ }
+}
+
+void mc_main_init (mc_main_t * mcm, char * tag)
+{
+ vlib_main_t * vm = vlib_get_main();
+
+ mcm->vlib_main = vm;
+ mcm->elog_main = &vm->elog_main;
+
+ mcm->relay_master_peer_id = ~0ULL;
+ mcm->relay_state = MC_RELAY_STATE_NEGOTIATE;
+
+ mcm->stream_index_by_name
+ = hash_create_string (/* elts */ 0, /* value size */ sizeof (uword));
+
+ {
+ vlib_node_registration_t r;
+
+ memset (&r, 0, sizeof (r));
+
+ r.type = VLIB_NODE_TYPE_PROCESS;
+
+ /* Point runtime data to main instance. */
+ r.runtime_data = &mcm;
+ r.runtime_data_bytes = sizeof (&mcm);
+
+ r.name = (char *) format (0, "mc-mastership-%s", tag);
+ r.function = mc_mastership_process;
+ mcm->mastership_process = vlib_register_node (vm, &r);
+
+ r.name = (char *) format (0, "mc-join-ager-%s", tag);
+ r.function = mc_join_ager_process;
+ mcm->join_ager_process = vlib_register_node (vm, &r);
+
+ r.name = (char *) format (0, "mc-retry-%s", tag);
+ r.function = mc_retry_process;
+ mcm->retry_process = vlib_register_node (vm, &r);
+
+ r.name = (char *) format (0, "mc-catchup-%s", tag);
+ r.function = mc_catchup_process;
+ mcm->catchup_process = vlib_register_node (vm, &r);
+
+ r.name = (char *) format (0, "mc-unserialize-%s", tag);
+ r.function = mc_unserialize_process;
+ mcm->unserialize_process = vlib_register_node (vm, &r);
+ }
+
+ if (MC_EVENT_LOGGING > 0)
+ mhash_init (&mcm->elog_id_by_peer_id, sizeof (uword), sizeof (mc_peer_id_t));
+
+ mhash_init (&mcm->mastership_peer_index_by_id, sizeof (uword), sizeof (mc_peer_id_t));
+ mc_serialize_init (mcm);
+}
+
+static u8 * format_mc_relay_state (u8 * s, va_list * args)
+{
+ mc_relay_state_t state = va_arg (*args, mc_relay_state_t);
+ char * t = 0;
+ switch (state)
+ {
+ case MC_RELAY_STATE_NEGOTIATE:
+ t = "negotiate";
+ break;
+ case MC_RELAY_STATE_MASTER:
+ t = "master";
+ break;
+ case MC_RELAY_STATE_SLAVE:
+ t = "slave";
+ break;
+ default:
+ return format (s, "unknown 0x%x", state);
+ }
+
+ return format (s, "%s", t);
+}
+
+static u8 * format_mc_stream_state (u8 * s, va_list * args)
+{
+ mc_stream_state_t state = va_arg (*args, mc_stream_state_t);
+ char * t = 0;
+ switch (state)
+ {
+#define _(f) case MC_STREAM_STATE_##f: t = #f; break;
+ foreach_mc_stream_state
+#undef _
+ default:
+ return format (s, "unknown 0x%x", state);
+ }
+
+ return format (s, "%s", t);
+}
+
+u8 * format_mc_main (u8 * s, va_list * args)
+{
+ mc_main_t * mcm = va_arg (*args, mc_main_t *);
+ mc_stream_t * t;
+ mc_stream_peer_t * p, * ps;
+ uword indent = format_get_indent (s);
+
+ s = format (s, "MC state %U, %d streams joined, global sequence 0x%x",
+ format_mc_relay_state, mcm->relay_state,
+ vec_len (mcm->stream_vector),
+ mcm->relay_global_sequence);
+
+ {
+ mc_mastership_peer_t * mp;
+ f64 now = vlib_time_now (mcm->vlib_main);
+ s = format (s, "\n%UMost recent mastership peers:",
+ format_white_space, indent + 2);
+ vec_foreach (mp, mcm->mastership_peers)
+ {
+ s = format (s, "\n%U%-30U%.4e",
+ format_white_space, indent + 4,
+ mcm->transport.format_peer_id, mp->peer_id,
+ now - mp->time_last_master_assert_received);
+ }
+ }
+
+ vec_foreach (t, mcm->stream_vector)
+ {
+ s = format (s, "\n%Ustream `%s' index %d",
+ format_white_space, indent + 2,
+ t->config.name, t->index);
+
+ s = format (s, "\n%Ustate %U",
+ format_white_space, indent + 4,
+ format_mc_stream_state, t->state);
+
+ s = format (s, "\n%Uretries: interval %.0f sec, limit %d, pool elts %d, %Ld sent",
+ format_white_space, indent + 4, t->config.retry_interval,
+ t->config.retry_limit,
+ pool_elts (t->retry_pool),
+ t->stats.n_retries - t->stats_last_clear.n_retries);
+
+ s = format (s, "\n%U%Ld/%Ld user requests sent/received",
+ format_white_space, indent + 4,
+ t->user_requests_sent, t->user_requests_received);
+
+ s = format (s, "\n%U%d peers, local/global sequence 0x%x/0x%x",
+ format_white_space, indent + 4,
+ pool_elts (t->peers),
+ t->our_local_sequence,
+ t->last_global_sequence_processed);
+
+ ps = 0;
+ pool_foreach (p, t->peers,
+ ({
+ if (clib_bitmap_get (t->all_peer_bitmap, p - t->peers))
+ vec_add1 (ps, p[0]);
+ }));
+ vec_sort (ps, p1, p2, mc_peer_id_compare (p1->id, p2->id));
+ s = format (s, "\n%U%=30s%10s%16s%16s",
+ format_white_space, indent + 6,
+ "Peer", "Last seq", "Retries", "Future");
+
+ vec_foreach (p, ps)
+ {
+ s = format (s, "\n%U%-30U0x%08x%16Ld%16Ld%s",
+ format_white_space, indent + 6,
+ mcm->transport.format_peer_id, p->id.as_u64,
+ p->last_sequence_received,
+ p->stats.n_msgs_from_past - p->stats_last_clear.n_msgs_from_past,
+ p->stats.n_msgs_from_future - p->stats_last_clear.n_msgs_from_future,
+ (mcm->transport.our_ack_peer_id.as_u64 == p->id.as_u64
+ ? " (self)" : ""));
+ }
+ vec_free (ps);
+ }
+
+ return s;
+}
diff --git a/vlib/vlib/mc.h b/vlib/vlib/mc.h
new file mode 100644
index 00000000000..55dce2822c6
--- /dev/null
+++ b/vlib/vlib/mc.h
@@ -0,0 +1,674 @@
+/*
+ * mc.h: vlib reliable sequenced multicast distributed applications
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef included_vlib_mc_h
+#define included_vlib_mc_h
+
+#include <vppinfra/elog.h>
+#include <vppinfra/fifo.h>
+#include <vppinfra/mhash.h>
+#include <vlib/node.h>
+
+#ifndef MC_EVENT_LOGGING
+#define MC_EVENT_LOGGING 1
+#endif
+
+always_inline uword
+mc_need_byte_swap (void)
+{ return CLIB_ARCH_IS_LITTLE_ENDIAN; }
+
+/*
+ * Used to uniquely identify hosts.
+ * For IP4 this would be ip4_address plus tcp/udp port.
+ */
+typedef union {
+ u8 as_u8[8];
+ u64 as_u64;
+} mc_peer_id_t;
+
+always_inline mc_peer_id_t
+mc_byte_swap_peer_id (mc_peer_id_t i)
+{
+ /* Peer id is already in network byte order. */
+ return i;
+}
+
+always_inline int
+mc_peer_id_compare (mc_peer_id_t a, mc_peer_id_t b)
+{
+ return memcmp (a.as_u8, b.as_u8, sizeof (a.as_u8));
+}
+
+/* Assert mastership. Lowest peer_id amount all peers wins mastership.
+ Only sent/received over mastership channel (MC_TRANSPORT_MASTERSHIP).
+ So, we don't need a message opcode. */
+typedef CLIB_PACKED (struct {
+ /* Peer id asserting mastership. */
+ mc_peer_id_t peer_id;
+
+ /* Global sequence number asserted. */
+ u32 global_sequence;
+}) mc_msg_master_assert_t;
+
+always_inline void
+mc_byte_swap_msg_master_assert (mc_msg_master_assert_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->global_sequence = clib_byte_swap_u32 (r->global_sequence);
+ }
+}
+
+#define foreach_mc_msg_type \
+ _ (master_assert) \
+ _ (join_or_leave_request) \
+ _ (join_reply) \
+ _ (user_request) \
+ _ (user_ack) \
+ _ (catchup_request) \
+ _ (catchup_reply)
+
+typedef enum {
+#define _(f) MC_MSG_TYPE_##f,
+ foreach_mc_msg_type
+#undef _
+} mc_relay_msg_type_t;
+
+/* Request to join a given stream. Multicast over MC_TRANSPORT_JOIN. */
+typedef CLIB_PACKED (struct {
+ mc_peer_id_t peer_id;
+
+ mc_relay_msg_type_t type : 32; /* MC_MSG_TYPE_join_or_leave_request */
+
+ /* Stream to join or leave. */
+ u32 stream_index;
+
+ /* join = 1, leave = 0 */
+ u8 is_join;
+}) mc_msg_join_or_leave_request_t;
+
+always_inline void
+mc_byte_swap_msg_join_or_leave_request (mc_msg_join_or_leave_request_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->type = clib_byte_swap_u32 (r->type);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ }
+}
+
+/* Join reply. Multicast over MC_TRANSPORT_JOIN. */
+typedef CLIB_PACKED (struct {
+ mc_peer_id_t peer_id;
+
+ mc_relay_msg_type_t type : 32; /* MC_MSG_TYPE_join_reply */
+
+ u32 stream_index;
+
+ /* Peer ID to contact to catchup with this stream. */
+ mc_peer_id_t catchup_peer_id;
+}) mc_msg_join_reply_t;
+
+always_inline void
+mc_byte_swap_msg_join_reply (mc_msg_join_reply_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->type = clib_byte_swap_u32 (r->type);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ r->catchup_peer_id = mc_byte_swap_peer_id (r->catchup_peer_id);
+ }
+}
+
+/* Generic (application) request. Multicast over MC_TRANSPORT_USER_REQUEST_TO_RELAY and then
+ relayed by relay master after filling in global sequence number. */
+typedef CLIB_PACKED (struct {
+ mc_peer_id_t peer_id;
+
+ u32 stream_index;
+
+ /* Global sequence number as filled in by relay master. */
+ u32 global_sequence;
+
+ /* Local sequence number as filled in by peer sending message. */
+ u32 local_sequence;
+
+ /* Size of request data. */
+ u32 n_data_bytes;
+
+ /* Opaque request data. */
+ u8 data[0];
+}) mc_msg_user_request_t;
+
+always_inline void
+mc_byte_swap_msg_user_request (mc_msg_user_request_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ r->global_sequence = clib_byte_swap_u32 (r->global_sequence);
+ r->local_sequence = clib_byte_swap_u32 (r->local_sequence);
+ r->n_data_bytes = clib_byte_swap_u32 (r->n_data_bytes);
+ }
+}
+
+/* Sent unicast over ACK channel. */
+typedef CLIB_PACKED (struct {
+ mc_peer_id_t peer_id;
+ u32 global_sequence;
+ u32 stream_index;
+ u32 local_sequence;
+ i32 seq_cmp_result;
+}) mc_msg_user_ack_t;
+
+always_inline void
+mc_byte_swap_msg_user_ack (mc_msg_user_ack_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ r->global_sequence = clib_byte_swap_u32 (r->global_sequence);
+ r->local_sequence = clib_byte_swap_u32 (r->local_sequence);
+ r->seq_cmp_result = clib_byte_swap_i32 (r->seq_cmp_result);
+ }
+}
+
+/* Sent/received unicast over catchup channel (e.g. using TCP). */
+typedef CLIB_PACKED (struct {
+ mc_peer_id_t peer_id;
+ u32 stream_index;
+}) mc_msg_catchup_request_t;
+
+always_inline void
+mc_byte_swap_msg_catchup_request (mc_msg_catchup_request_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ }
+}
+
+/* Sent/received unicast over catchup channel. */
+typedef CLIB_PACKED (struct {
+ mc_peer_id_t peer_id;
+
+ u32 stream_index;
+
+ /* Last global sequence number included in catchup data. */
+ u32 last_global_sequence_included;
+
+ /* Size of catchup data. */
+ u32 n_data_bytes;
+
+ /* Catchup data. */
+ u8 data[0];
+}) mc_msg_catchup_reply_t;
+
+always_inline void
+mc_byte_swap_msg_catchup_reply (mc_msg_catchup_reply_t * r)
+{
+ if (mc_need_byte_swap ())
+ {
+ r->peer_id = mc_byte_swap_peer_id (r->peer_id);
+ r->stream_index = clib_byte_swap_u32 (r->stream_index);
+ r->last_global_sequence_included = clib_byte_swap_u32 (r->last_global_sequence_included);
+ r->n_data_bytes = clib_byte_swap_u32 (r->n_data_bytes);
+ }
+}
+
+typedef struct _mc_serialize_msg {
+ /* Name for this type. */
+ char * name;
+
+ /* Functions to serialize/unserialize data. */
+ serialize_function_t * serialize;
+ serialize_function_t * unserialize;
+
+ /* Maximum message size in bytes when serialized.
+ If zero then this will be set to the largest sent message. */
+ u32 max_n_bytes_serialized;
+
+ /* Opaque to use for first argument to serialize/unserialize function. */
+ u32 opaque;
+
+ /* Index in global message vector. */
+ u32 global_index;
+
+ /* Registration list */
+ struct _mc_serialize_msg * next_registration;
+} mc_serialize_msg_t;
+
+typedef struct {
+ /* Index into global message vector. */
+ u32 global_index;
+} mc_serialize_stream_msg_t;
+
+#define MC_SERIALIZE_MSG(x,...) \
+ __VA_ARGS__ mc_serialize_msg_t x; \
+static void __mc_serialize_msg_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __mc_serialize_msg_registration_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ x.next_registration = vm->mc_msg_registrations; \
+ vm->mc_msg_registrations = &x; \
+} \
+__VA_ARGS__ mc_serialize_msg_t x
+
+typedef enum {
+ MC_TRANSPORT_MASTERSHIP,
+ MC_TRANSPORT_JOIN,
+ MC_TRANSPORT_USER_REQUEST_TO_RELAY,
+ MC_TRANSPORT_USER_REQUEST_FROM_RELAY,
+ MC_N_TRANSPORT_TYPE,
+} mc_transport_type_t;
+
+typedef struct {
+ clib_error_t * (* tx_buffer) (void * opaque, mc_transport_type_t type, u32 buffer_index);
+
+ clib_error_t * (* tx_ack) (void * opaque, mc_peer_id_t peer_id, u32 buffer_index);
+
+ /* Returns catchup opaque. */
+ uword (* catchup_request_fun) (void * opaque, u32 stream_index, mc_peer_id_t catchup_peer_id);
+
+ void (* catchup_send_fun) (void * opaque, uword catchup_opaque, u8 * data_vector);
+
+ /* Opaque passed to callbacks. */
+ void * opaque;
+
+ mc_peer_id_t our_ack_peer_id;
+ mc_peer_id_t our_catchup_peer_id;
+
+ /* Max packet size (MTU) for this transport.
+ For IP this is interface MTU less IP + UDP header size. */
+ u32 max_packet_size;
+
+ format_function_t * format_peer_id;
+} mc_transport_t;
+
+typedef struct {
+ /* Count of messages received from this peer from the past/future
+ (with seq_cmp != 0). */
+ u64 n_msgs_from_past;
+ u64 n_msgs_from_future;
+} mc_stream_peer_stats_t;
+
+typedef struct {
+ /* ID of this peer. */
+ mc_peer_id_t id;
+
+ /* The last sequence we received from this peer. */
+ u32 last_sequence_received;
+
+ mc_stream_peer_stats_t stats, stats_last_clear;
+} mc_stream_peer_t;
+
+typedef struct {
+ u32 buffer_index;
+
+ /* Cached copy of local sequence number from buffer. */
+ u32 local_sequence;
+
+ /* Number of times this buffer has been sent (retried). */
+ u32 n_retries;
+
+ /* Previous/next retries in doubly-linked list. */
+ u32 prev_index, next_index;
+
+ /* Bitmap of all peers which have acked this msg */
+ uword * unacked_by_peer_bitmap;
+
+ /* Message send or resend time */
+ f64 sent_at;
+} mc_retry_t;
+
+typedef struct {
+ /* Number of retries sent for this stream. */
+ u64 n_retries;
+} mc_stream_stats_t;
+
+struct mc_main_t;
+struct mc_stream_t;
+
+typedef struct {
+ /* Stream name. */
+ char * name;
+
+ /* Number of outstanding messages. */
+ u32 window_size;
+
+ /* Retry interval, in seconds */
+ f64 retry_interval;
+
+ /* Retry limit */
+ u32 retry_limit;
+
+ /* User rx buffer callback */
+ void (* rx_buffer) (struct mc_main_t * mc_main,
+ struct mc_stream_t * stream,
+ mc_peer_id_t peer_id,
+ u32 buffer_index);
+
+ /* User callback to create a snapshot */
+ u8 * (* catchup_snapshot) (struct mc_main_t *mc_main,
+ u8 * snapshot_vector,
+ u32 last_global_sequence_included);
+
+ /* User callback to replay a snapshot */
+ void (* catchup) (struct mc_main_t *mc_main,
+ u8 * snapshot_data,
+ u32 n_snapshot_data_bytes);
+
+ /* Callback to save a snapshot for offline replay */
+ void (* save_snapshot) (struct mc_main_t *mc_main,
+ u32 is_catchup,
+ u8 * snapshot_data,
+ u32 n_snapshot_data_bytes);
+
+ /* Called when a peer dies */
+ void (* peer_died) (struct mc_main_t * mc_main,
+ struct mc_stream_t * stream,
+ mc_peer_id_t peer_id);
+} mc_stream_config_t;
+
+#define foreach_mc_stream_state \
+ _ (invalid) \
+ _ (name_known) \
+ _ (join_in_progress) \
+ _ (catchup) \
+ _ (ready)
+
+typedef enum {
+#define _(f) MC_STREAM_STATE_##f,
+ foreach_mc_stream_state
+#undef _
+} mc_stream_state_t;
+
+typedef struct mc_stream_t {
+ mc_stream_config_t config;
+
+ mc_stream_state_t state;
+
+ /* Index in stream pool. */
+ u32 index;
+
+ /* Stream index 0 is always for MC internal use. */
+#define MC_STREAM_INDEX_INTERNAL 0
+
+ mc_retry_t * retry_pool;
+
+ /* Head and tail index of retry pool. */
+ u32 retry_head_index, retry_tail_index;
+
+ /*
+ * Country club for recently retired messages
+ * If the set of peers is expanding and a new peer
+ * misses a message, we can easily retire the FIFO
+ * element before we even know about the new peer
+ */
+ mc_retry_t * retired_fifo;
+
+ /* Hash mapping local sequence to retry pool index. */
+ uword * retry_index_by_local_sequence;
+
+ /* catch-up fifo of VLIB buffer indices.
+ start recording when catching up. */
+ u32 * catchup_fifo;
+
+ mc_stream_stats_t stats, stats_last_clear;
+
+ /* Peer pool. */
+ mc_stream_peer_t * peers;
+
+ /* Bitmap with ones for all peers in peer pool. */
+ uword * all_peer_bitmap;
+
+ /* Map of 64 bit id to index in stream pool. */
+ mhash_t peer_index_by_id;
+
+ /* Timeout, in case we're alone in the world */
+ f64 join_timeout;
+
+ vlib_one_time_waiting_process_t * procs_waiting_for_join_done;
+
+ vlib_one_time_waiting_process_t * procs_waiting_for_open_window;
+
+ /* Next sequence number to use */
+ u32 our_local_sequence;
+
+ /*
+ * Last global sequence we processed.
+ * When supplying catchup data, we need to tell
+ * the client precisely where to start replaying
+ */
+ u32 last_global_sequence_processed;
+
+ /* Vector of unique messages we've sent on this stream. */
+ mc_serialize_stream_msg_t * stream_msgs;
+
+ /* Vector global message index into per stream message index. */
+ u32 * stream_msg_index_by_global_index;
+
+ /* Hashed by message name. */
+ uword * stream_msg_index_by_name;
+
+ u64 user_requests_sent;
+ u64 user_requests_received;
+} mc_stream_t;
+
+always_inline void
+mc_stream_free (mc_stream_t * s)
+{
+ pool_free (s->retry_pool);
+ hash_free (s->retry_index_by_local_sequence);
+ clib_fifo_free (s->catchup_fifo);
+ pool_free (s->peers);
+ mhash_free (&s->peer_index_by_id);
+ vec_free (s->procs_waiting_for_join_done);
+ vec_free (s->procs_waiting_for_open_window);
+}
+
+always_inline void
+mc_stream_init (mc_stream_t * s)
+{
+ memset (s, 0, sizeof (s[0]));
+ s->retry_head_index = s->retry_tail_index = ~0;
+}
+
+typedef struct {
+ u32 stream_index;
+ u32 catchup_opaque;
+ u8 *catchup_snapshot;
+} mc_catchup_process_arg_t;
+
+typedef enum {
+ MC_RELAY_STATE_NEGOTIATE,
+ MC_RELAY_STATE_MASTER,
+ MC_RELAY_STATE_SLAVE,
+} mc_relay_state_t;
+
+typedef struct {
+ mc_peer_id_t peer_id;
+
+ f64 time_last_master_assert_received;
+} mc_mastership_peer_t;
+
+typedef struct {
+ u32 stream_index;
+ u32 buffer_index;
+} mc_stream_and_buffer_t;
+
+typedef struct mc_main_t {
+ mc_relay_state_t relay_state;
+
+ /* Mastership */
+ u32 we_can_be_relay_master;
+
+ u64 relay_master_peer_id;
+
+ mc_mastership_peer_t * mastership_peers;
+
+ /* Map of 64 bit id to index in stream pool. */
+ mhash_t mastership_peer_index_by_id;
+
+ /* The transport we're using. */
+ mc_transport_t transport;
+
+ /* Last-used global sequence number. */
+ u32 relay_global_sequence;
+
+ /* Vector of streams. */
+ mc_stream_t * stream_vector;
+
+ /* Hash table mapping stream name to pool index. */
+ uword * stream_index_by_name;
+
+ uword * procs_waiting_for_stream_name_by_name;
+
+ vlib_one_time_waiting_process_t ** procs_waiting_for_stream_name_pool;
+
+ int joins_in_progress;
+
+ mc_catchup_process_arg_t * catchup_process_args;
+
+ /* Node indices for mastership, join ager,
+ retry and catchup processes. */
+ u32 mastership_process;
+ u32 join_ager_process;
+ u32 retry_process;
+ u32 catchup_process;
+ u32 unserialize_process;
+
+ /* Global vector of messages. */
+ mc_serialize_msg_t ** global_msgs;
+
+ /* Hash table mapping message name to index. */
+ uword * global_msg_index_by_name;
+
+ /* Shared serialize/unserialize main. */
+ serialize_main_t serialize_mains[VLIB_N_RX_TX];
+
+ vlib_serialize_buffer_main_t serialize_buffer_mains[VLIB_N_RX_TX];
+
+ /* Convenience variables */
+ struct vlib_main_t * vlib_main;
+ elog_main_t * elog_main;
+
+ /* Maps 64 bit peer id to elog string table offset for this formatted peer id. */
+ mhash_t elog_id_by_peer_id;
+
+ uword *elog_id_by_msg_name;
+
+ /* For mc_unserialize. */
+ mc_stream_and_buffer_t * mc_unserialize_stream_and_buffers;
+} mc_main_t;
+
+always_inline mc_stream_t *
+mc_stream_by_name (mc_main_t * m, char * name)
+{
+ uword * p = hash_get (m->stream_index_by_name, name);
+ return p ? vec_elt_at_index (m->stream_vector, p[0]) : 0;
+}
+
+always_inline mc_stream_t *
+mc_stream_by_index (mc_main_t * m, u32 i)
+{
+ return i < vec_len (m->stream_vector) ? m->stream_vector + i : 0;
+}
+
+always_inline void
+mc_clear_stream_stats (mc_main_t * m)
+{
+ mc_stream_t * s;
+ mc_stream_peer_t * p;
+ vec_foreach (s, m->stream_vector)
+ {
+ s->stats_last_clear = s->stats;
+ pool_foreach (p, s->peers, ({
+ p->stats_last_clear = p->stats;
+ }));
+ }
+}
+
+/* Declare all message handlers. */
+#define _(f) void mc_msg_##f##_handler (mc_main_t * mcm, mc_msg_##f##_t * msg, u32 buffer_index);
+foreach_mc_msg_type
+#undef _
+
+u32 mc_stream_join (mc_main_t * mcm, mc_stream_config_t *);
+
+void mc_stream_leave (mc_main_t * mcm, u32 stream_index);
+
+void mc_wait_for_stream_ready (mc_main_t * m, char * stream_name);
+
+u32 mc_stream_send (mc_main_t * mcm, u32 stream_index, u32 buffer_index);
+
+void mc_main_init (mc_main_t * mcm, char * tag);
+
+void mc_enable_disable_mastership (mc_main_t * mcm, int we_can_be_master);
+
+void * mc_get_vlib_buffer (struct vlib_main_t * vm, u32 n_bytes, u32 * bi_return);
+
+format_function_t format_mc_main;
+
+clib_error_t *
+mc_serialize_internal (mc_main_t * mc,
+ u32 stream_index,
+ u32 multiple_messages_per_vlib_buffer,
+ mc_serialize_msg_t * msg,
+ ...);
+
+clib_error_t *
+mc_serialize_va (mc_main_t * mc,
+ u32 stream_index,
+ u32 multiple_messages_per_vlib_buffer,
+ mc_serialize_msg_t * msg,
+ va_list * va);
+
+#define mc_serialize_stream(mc,si,msg,args...) \
+ mc_serialize_internal((mc),(si),(0),(msg),(msg)->serialize,args)
+
+#define mc_serialize(mc,msg,args...) \
+ mc_serialize_internal((mc),(~0),(0),(msg),(msg)->serialize,args)
+
+#define mc_serialize2(mc,add,msg,args...) \
+ mc_serialize_internal((mc),(~0),(add),(msg),(msg)->serialize,args)
+
+void mc_unserialize (mc_main_t * mcm, mc_stream_t * s, u32 buffer_index);
+uword mc_unserialize_message (mc_main_t * mcm, mc_stream_t * s,
+ serialize_main_t * m);
+
+serialize_function_t serialize_mc_main, unserialize_mc_main;
+
+always_inline uword
+mc_max_message_size_in_bytes (mc_main_t * mcm)
+{ return mcm->transport.max_packet_size - sizeof (mc_msg_user_request_t); }
+
+always_inline word
+mc_serialize_n_bytes_left (mc_main_t * mcm, serialize_main_t * m)
+{ return mc_max_message_size_in_bytes (mcm) - serialize_vlib_buffer_n_bytes (m); }
+
+void unserialize_mc_stream (serialize_main_t * m, va_list * va);
+void mc_stream_join_process_hold (void);
+
+#endif /* included_vlib_mc_h */
diff --git a/vlib/vlib/node.c b/vlib/vlib/node.c
new file mode 100644
index 00000000000..4fb117e4f3e
--- /dev/null
+++ b/vlib/vlib/node.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node.c: VLIB processing nodes
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/threads.h>
+
+/* Query node given name. */
+vlib_node_t * vlib_get_node_by_name (vlib_main_t * vm, u8 * name)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ uword * p;
+ u8 * key = name;
+ if (! clib_mem_is_heap_object (key))
+ key = format (0, "%s", key);
+ p = hash_get (nm->node_by_name, key);
+ if (key != name)
+ vec_free (key);
+ return p ? vec_elt (nm->nodes, p[0]) : 0;
+}
+
+static void node_set_elog_name (vlib_main_t * vm, uword node_index)
+{
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+ elog_event_type_t * t;
+
+ t = vec_elt_at_index (vm->node_call_elog_event_types, node_index);
+ vec_free (t->format);
+ t->format = (char *) format (0, "%v (%%d)", n->name);
+
+ t = vec_elt_at_index (vm->node_return_elog_event_types, node_index);
+ vec_free (t->format);
+ t->format = (char *) format (0, "%v () = %%d", n->name);
+
+ n->name_elog_string = elog_string (&vm->elog_main, "%v", n->name);
+}
+
+void vlib_node_rename (vlib_main_t * vm, u32 node_index, char * fmt, ...)
+{
+ va_list va;
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+
+ va_start (va, fmt);
+ hash_unset (nm->node_by_name, n->name);
+ vec_free (n->name);
+ n->name = va_format (0, fmt, &va);
+ va_end (va);
+ hash_set (nm->node_by_name, n->name, n->index);
+
+ node_set_elog_name (vm, node_index);
+}
+
+static void
+vlib_node_runtime_update (vlib_main_t * vm,
+ u32 node_index,
+ u32 next_index)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_runtime_t * r, * s;
+ vlib_node_t * node, * next_node;
+ vlib_next_frame_t * nf;
+ vlib_pending_frame_t * pf;
+ i32 i, j, n_insert;
+
+ ASSERT(os_get_cpu_number() == 0);
+
+ vlib_worker_thread_barrier_sync(vm);
+
+ node = vec_elt (nm->nodes, node_index);
+ r = vlib_node_get_runtime (vm, node_index);
+
+ n_insert = vec_len (node->next_nodes) - r->n_next_nodes;
+ if (n_insert > 0)
+ {
+ i = r->next_frame_index + r->n_next_nodes;
+ vec_insert (nm->next_frames, n_insert, i);
+
+ /* Initialize newly inserted next frames. */
+ for (j = 0; j < n_insert; j++)
+ vlib_next_frame_init (nm->next_frames + i + j);
+
+ /* Relocate other next frames at higher indices. */
+ for (j = 0; j < vec_len (nm->nodes); j++)
+ {
+ s = vlib_node_get_runtime (vm, j);
+ if (j != node_index
+ && s->next_frame_index >= i)
+ s->next_frame_index += n_insert;
+ }
+
+ /* Pending frames may need to be relocated also. */
+ vec_foreach (pf, nm->pending_frames)
+ {
+ if (pf->next_frame_index != VLIB_PENDING_FRAME_NO_NEXT_FRAME
+ && pf->next_frame_index >= i)
+ pf->next_frame_index += n_insert;
+ }
+ pool_foreach (pf, nm->suspended_process_frames, ({
+ if (pf->next_frame_index != ~0 && pf->next_frame_index >= i)
+ pf->next_frame_index += n_insert;
+ }));
+
+ r->n_next_nodes = vec_len (node->next_nodes);
+ }
+
+ /* Set frame's node runtime index. */
+ next_node = vlib_get_node (vm, node->next_nodes[next_index]);
+ nf = nm->next_frames + r->next_frame_index + next_index;
+ nf->node_runtime_index = next_node->runtime_index;
+
+ vlib_worker_thread_node_runtime_update();
+
+ vlib_worker_thread_barrier_release(vm);
+}
+
+/* Add next node to given node in given slot. */
+uword
+vlib_node_add_next_with_slot (vlib_main_t * vm,
+ uword node_index,
+ uword next_node_index,
+ uword slot)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * node, * next;
+ uword * p;
+
+ node = vec_elt (nm->nodes, node_index);
+ next = vec_elt (nm->nodes, next_node_index);
+
+ /* Fill in static next nodes if runtime has yet to be initialized. */
+ if (slot == ~0 && ! (nm->flags & VLIB_NODE_MAIN_RUNTIME_STARTED))
+ {
+ uword i;
+ for (i = 0; i < vec_len (node->next_node_names); i++)
+ {
+ char * a = node->next_node_names[i];
+ if (a)
+ vlib_node_add_named_next_with_slot (vm, node->index, a, i);
+ }
+ }
+
+ if ((p = hash_get (node->next_slot_by_node, next_node_index)))
+ {
+ /* Next already exists: slot must match. */
+ if (slot != ~0)
+ ASSERT (slot == p[0]);
+ return p[0];
+ }
+
+ if (slot == ~0)
+ slot = vec_len (node->next_nodes);
+
+ vec_validate_init_empty (node->next_nodes, slot, ~0);
+ vec_validate (node->n_vectors_by_next_node, slot);
+
+ node->next_nodes[slot] = next_node_index;
+ hash_set (node->next_slot_by_node, next_node_index, slot);
+
+ vlib_node_runtime_update (vm, node_index, slot);
+
+ next->prev_node_bitmap = clib_bitmap_ori (next->prev_node_bitmap,
+ node_index);
+
+ /* Siblings all get same node structure. */
+ {
+ uword sib_node_index, sib_slot;
+ vlib_node_t * sib_node;
+ clib_bitmap_foreach (sib_node_index, node->sibling_bitmap, ({
+ sib_node = vec_elt (nm->nodes, sib_node_index);
+ if (sib_node != node)
+ {
+ sib_slot = vlib_node_add_next_with_slot (vm, sib_node_index, next_node_index, slot);
+ ASSERT (sib_slot == slot);
+ }
+ }));
+ }
+
+ return slot;
+}
+
+/* Add named next node to given node in given slot. */
+uword
+vlib_node_add_named_next_with_slot (vlib_main_t * vm,
+ uword node,
+ char * name,
+ uword slot)
+{
+ vlib_node_main_t * nm;
+ vlib_node_t * n, * n_next;
+
+ nm = &vm->node_main;
+ n = vlib_get_node (vm, node);
+
+ n_next = vlib_get_node_by_name (vm, (u8 *) name);
+ if (! n_next)
+ {
+ if (nm->flags & VLIB_NODE_MAIN_RUNTIME_STARTED)
+ return ~0;
+
+ if (slot == ~0)
+ slot = clib_max (vec_len (n->next_node_names),
+ vec_len (n->next_nodes));
+ vec_validate (n->next_node_names, slot);
+ n->next_node_names[slot] = name;
+ return slot;
+ }
+
+ return vlib_node_add_next_with_slot (vm, node, n_next->index, slot);
+}
+
+static void node_elog_init (vlib_main_t * vm, uword ni)
+{
+ elog_event_type_t t;
+
+ memset (&t, 0, sizeof (t));
+
+ /* 2 event types for this node: one when node function is called.
+ One when it returns. */
+ vec_validate (vm->node_call_elog_event_types, ni);
+ vm->node_call_elog_event_types[ni] = t;
+
+ vec_validate (vm->node_return_elog_event_types, ni);
+ vm->node_return_elog_event_types[ni] = t;
+
+ node_set_elog_name (vm, ni);
+}
+
+#ifdef CLIB_UNIX
+#define STACK_ALIGN 4096
+#else
+#define STACK_ALIGN CLIB_CACHE_LINE_BYTES
+#endif
+
+static void register_node (vlib_main_t * vm,
+ vlib_node_registration_t * r)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * n;
+ int i;
+
+ if (CLIB_DEBUG > 0)
+ {
+ /* Default (0) type should match INTERNAL. */
+ vlib_node_t zero = {0};
+ ASSERT (VLIB_NODE_TYPE_INTERNAL == zero.type);
+ }
+
+ ASSERT (r->function != 0);
+
+ n = clib_mem_alloc_no_fail (sizeof (n[0]));
+ memset (n, 0, sizeof (n[0]));
+ n->index = vec_len (nm->nodes);
+
+ vec_add1 (nm->nodes, n);
+
+ /* Name is always a vector so it can be formatted with %v. */
+ if (clib_mem_is_heap_object (vec_header (r->name, 0)))
+ n->name = vec_dup ((u8 *) r->name);
+ else
+ n->name = format (0, "%s", r->name);
+
+ if (! nm->node_by_name)
+ nm->node_by_name = hash_create_vec (/* size */ 32,
+ sizeof (n->name[0]),
+ sizeof (uword));
+
+ /* Node names must be unique. */
+ {
+ vlib_node_t * o = vlib_get_node_by_name (vm, n->name);
+ if (o)
+ clib_error ("more than one node named `%v'", n->name);
+ }
+
+ hash_set (nm->node_by_name, n->name, n->index);
+
+ r->index = n->index; /* save index in registration */
+ n->function = r->function;
+
+ /* Node index of next sibling will be filled in by vlib_node_main_init. */
+ n->sibling_of = r->sibling_of;
+
+ if (r->type == VLIB_NODE_TYPE_INTERNAL)
+ ASSERT (r->vector_size > 0);
+
+#define _(f) n->f = r->f
+
+ _ (type);
+ _ (flags);
+ _ (state);
+ _ (scalar_size);
+ _ (vector_size);
+ _ (format_buffer);
+ _ (unformat_buffer);
+ _ (format_trace);
+ _ (validate_frame);
+
+ /* Register error counters. */
+ vlib_register_errors (vm, n->index, r->n_errors, r->error_strings);
+ node_elog_init (vm, n->index);
+
+ _ (runtime_data_bytes);
+ if (r->runtime_data_bytes > 0)
+ {
+ vec_resize (n->runtime_data, r->runtime_data_bytes);
+ if (r->runtime_data)
+ memcpy (n->runtime_data, r->runtime_data, r->runtime_data_bytes);
+ }
+
+ vec_resize (n->next_node_names, r->n_next_nodes);
+ for (i = 0; i < r->n_next_nodes; i++)
+ n->next_node_names[i] = r->next_nodes[i];
+
+ vec_validate_init_empty (n->next_nodes, r->n_next_nodes - 1, ~0);
+ vec_validate (n->n_vectors_by_next_node, r->n_next_nodes - 1);
+
+ n->owner_node_index = n->owner_next_index = ~0;
+
+ /* Initialize node runtime. */
+ {
+ vlib_node_runtime_t * rt;
+ u32 i;
+
+ if (n->type == VLIB_NODE_TYPE_PROCESS)
+ {
+ vlib_process_t * p;
+ uword log2_n_stack_bytes;
+
+ log2_n_stack_bytes = clib_max (r->process_log2_n_stack_bytes, 15);
+
+ p = clib_mem_alloc_aligned_no_fail
+ (sizeof (p[0]) + (1 << log2_n_stack_bytes),
+ STACK_ALIGN);
+
+ memset (p, 0, sizeof (p[0]));
+ p->log2_n_stack_bytes = log2_n_stack_bytes;
+
+ /* Process node's runtime index is really index into process
+ pointer vector. */
+ n->runtime_index = vec_len (nm->processes);
+
+ vec_add1 (nm->processes, p);
+
+ /* Paint first stack word with magic number so we can at least
+ detect process stack overruns. */
+ p->stack[0] = VLIB_PROCESS_STACK_MAGIC;
+
+ /* Node runtime is stored inside of process. */
+ rt = &p->node_runtime;
+
+#ifdef CLIB_UNIX
+ /*
+ * Disallow writes to the bottom page of the stack, to
+ * catch stack overflows.
+ */
+ if (mprotect (p->stack, 4096, PROT_READ) < 0)
+ clib_unix_warning ("process stack");
+#endif
+
+ }
+ else
+ {
+ vec_add2_aligned (nm->nodes_by_type[n->type], rt, 1,
+ /* align */ CLIB_CACHE_LINE_BYTES);
+ n->runtime_index = rt - nm->nodes_by_type[n->type];
+ }
+
+ if (n->type == VLIB_NODE_TYPE_INPUT)
+ nm->input_node_counts_by_state[n->state] += 1;
+
+ rt->function = n->function;
+ rt->flags = n->flags;
+ rt->state = n->state;
+ rt->node_index = n->index;
+
+ rt->n_next_nodes = r->n_next_nodes;
+ rt->next_frame_index = vec_len (nm->next_frames);
+
+ vec_resize (nm->next_frames, rt->n_next_nodes);
+ for (i = 0; i < rt->n_next_nodes; i++)
+ vlib_next_frame_init (nm->next_frames + rt->next_frame_index + i);
+
+ vec_resize (rt->errors, r->n_errors);
+ for (i = 0; i < vec_len (rt->errors); i++)
+ rt->errors[i] = vlib_error_set (n->index, i);
+
+ ASSERT (vec_len (n->runtime_data) <= sizeof (rt->runtime_data));
+ if (vec_len (n->runtime_data) > 0)
+ memcpy (rt->runtime_data, n->runtime_data, vec_len (n->runtime_data));
+
+ vec_free (n->runtime_data);
+ }
+}
+
+/* Register new packet processing node. */
+u32 vlib_register_node (vlib_main_t * vm, vlib_node_registration_t * r)
+{
+ register_node (vm, r);
+ return r->index;
+}
+
+void vlib_register_all_static_nodes (vlib_main_t * vm)
+{
+ vlib_node_registration_t * r;
+
+ r = vm->node_main.node_registrations;
+ while (r) {
+ register_node (vm, r);
+ r = r->next_registration;
+ }
+}
+
+clib_error_t *
+vlib_node_main_init (vlib_main_t * vm)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ clib_error_t * error = 0;
+ vlib_node_t * n;
+ uword ni;
+
+ nm->flags |= VLIB_NODE_MAIN_RUNTIME_STARTED;
+
+ /* Resolve next names into next indices. */
+ for (ni = 0; ni < vec_len (nm->nodes); ni++)
+ {
+ uword i;
+
+ n = vec_elt (nm->nodes, ni);
+
+ for (i = 0; i < vec_len (n->next_node_names); i++)
+ {
+ char * a = n->next_node_names[i];
+
+ if (! a)
+ continue;
+
+ if (~0 == vlib_node_add_named_next_with_slot (vm, n->index, a, i))
+ {
+ error = clib_error_create
+ ("node `%v' refers to unknown node `%s'", n->name, a);
+ goto done;
+ }
+ }
+
+ vec_free (n->next_node_names);
+ }
+
+ /* Set previous node pointers. */
+ for (ni = 0; ni < vec_len (nm->nodes); ni++)
+ {
+ vlib_node_t * n_next;
+ uword i;
+
+ n = vec_elt (nm->nodes, ni);
+
+ for (i = 0; i < vec_len (n->next_nodes); i++)
+ {
+ if (n->next_nodes[i] >= vec_len (nm->nodes))
+ continue;
+
+ n_next = vec_elt (nm->nodes, n->next_nodes[i]);
+ n_next->prev_node_bitmap =
+ clib_bitmap_ori (n_next->prev_node_bitmap, n->index);
+ }
+ }
+
+ {
+ vlib_next_frame_t * nf;
+ vlib_node_runtime_t * r;
+ vlib_node_t * next;
+ uword i;
+
+ vec_foreach (r, nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL])
+ {
+ if (r->n_next_nodes == 0)
+ continue;
+
+ n = vlib_get_node (vm, r->node_index);
+ nf = vec_elt_at_index (nm->next_frames, r->next_frame_index);
+
+ for (i = 0; i < vec_len (n->next_nodes); i++)
+ {
+ next = vlib_get_node (vm, n->next_nodes[i]);
+
+ /* Validate node runtime indices are correctly initialized. */
+ ASSERT (nf[i].node_runtime_index == next->runtime_index);
+
+ nf[i].flags = 0;
+ if (next->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH)
+ nf[i].flags |= VLIB_FRAME_NO_FREE_AFTER_DISPATCH;
+ }
+ }
+ }
+
+ /* Generate node sibling relationships. */
+ {
+ vlib_node_t * n, * sib;
+ uword si;
+
+ for (ni = 0; ni < vec_len (nm->nodes); ni++)
+ {
+ n = vec_elt (nm->nodes, ni);
+
+ if (! n->sibling_of)
+ continue;
+
+ sib = vlib_get_node_by_name (vm, (u8 *) n->sibling_of);
+ if (! sib)
+ clib_error ("sibling `%s' not found for node `%v'", n->sibling_of, n->name);
+
+ clib_bitmap_foreach (si, sib->sibling_bitmap, ({
+ vlib_node_t * m = vec_elt (nm->nodes, si);
+
+ /* Connect all of sibling's siblings to us. */
+ m->sibling_bitmap = clib_bitmap_ori (m->sibling_bitmap, n->index);
+
+ /* Connect us to all of sibling's siblings. */
+ n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, si);
+ }));
+
+ /* Connect sibling to us. */
+ sib->sibling_bitmap = clib_bitmap_ori (sib->sibling_bitmap, n->index);
+
+ /* Connect us to sibling. */
+ n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, sib->index);
+ }
+ }
+
+ done:
+ return error;
+}
diff --git a/vlib/vlib/node.h b/vlib/vlib/node.h
new file mode 100644
index 00000000000..806a9dae1b5
--- /dev/null
+++ b/vlib/vlib/node.h
@@ -0,0 +1,621 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node.h: VLIB processing nodes
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_node_h
+#define included_vlib_node_h
+
+#include <vppinfra/longjmp.h>
+#include <vppinfra/timing_wheel.h>
+#include <vlib/trace.h> /* for vlib_trace_filter_t */
+
+/* Forward declaration. */
+struct vlib_node_runtime_t;
+struct vlib_frame_t;
+
+/* Internal nodes (including output nodes) move data from node to
+ node (or out of the graph for output nodes). */
+typedef uword (vlib_node_function_t) (struct vlib_main_t * vm,
+ struct vlib_node_runtime_t * node,
+ struct vlib_frame_t * frame);
+
+typedef enum {
+ /* An internal node on the call graph (could be output). */
+ VLIB_NODE_TYPE_INTERNAL,
+
+ /* Nodes which input data into the processing graph.
+ Input nodes are called for each iteration of main loop. */
+ VLIB_NODE_TYPE_INPUT,
+
+ /* Nodes to be called before all input nodes.
+ Used, for example, to clean out driver TX rings before
+ processing input. */
+ VLIB_NODE_TYPE_PRE_INPUT,
+
+ /* "Process" nodes which can be suspended and later resumed. */
+ VLIB_NODE_TYPE_PROCESS,
+
+ VLIB_N_NODE_TYPE,
+} vlib_node_type_t;
+
+typedef struct _vlib_node_registration {
+ /* Vector processing function for this node. */
+ vlib_node_function_t * function;
+
+ /* Node name. */
+ char * name;
+
+ /* Name of sibling (if applicable). */
+ char * sibling_of;
+
+ /* Node index filled in by registration. */
+ u32 index;
+
+ /* Type of this node. */
+ vlib_node_type_t type;
+
+ /* Error strings indexed by error code for this node. */
+ char ** error_strings;
+
+ /* Buffer format/unformat for this node. */
+ format_function_t * format_buffer;
+ unformat_function_t * unformat_buffer;
+
+ /* Trace format/unformat for this node. */
+ format_function_t * format_trace;
+ unformat_function_t * unformat_trace;
+
+ /* Function to validate incoming frames. */
+ u8 * (* validate_frame) (struct vlib_main_t * vm,
+ struct vlib_node_runtime_t *,
+ struct vlib_frame_t * f);
+
+ /* Per-node runtime data. */
+ void * runtime_data;
+
+ /* Process stack size. */
+ u16 process_log2_n_stack_bytes;
+
+ /* Number of bytes of per-node run time data. */
+ u8 runtime_data_bytes;
+
+ /* State for input nodes. */
+ u8 state;
+
+ /* Node flags. */
+ u16 flags;
+
+ /* Size of scalar and vector arguments in bytes. */
+ u16 scalar_size, vector_size;
+
+ /* Number of error codes used by this node. */
+ u16 n_errors;
+
+ /* Number of next node names that follow. */
+ u16 n_next_nodes;
+
+ /* Constructor link-list, don't ask... */
+ struct _vlib_node_registration * next_registration;
+
+ /* Names of next nodes which this node feeds into. */
+ char * next_nodes[];
+
+} vlib_node_registration_t;
+
+#define VLIB_REGISTER_NODE(x,...) \
+ __VA_ARGS__ vlib_node_registration_t x; \
+static void __vlib_add_node_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_node_registration_##x (void) \
+{ \
+ vlib_main_t * vm = vlib_get_main(); \
+ x.next_registration = vm->node_main.node_registrations; \
+ vm->node_main.node_registrations = &x; \
+} \
+__VA_ARGS__ vlib_node_registration_t x
+
+always_inline vlib_node_registration_t *
+vlib_node_next_registered (vlib_node_registration_t * c)
+{
+ c = clib_elf_section_data_next (c, c->n_next_nodes * sizeof (c->next_nodes[0]));
+ return c;
+}
+
+typedef struct {
+ /* Total calls, clock ticks and vector elements processed for this node. */
+ u64 calls, vectors, clocks, suspends;
+ u64 max_clock;
+ u64 max_clock_n;
+} vlib_node_stats_t;
+
+#define foreach_vlib_node_state \
+ /* Input node is called each iteration of main loop. \
+ This is the default (zero). */ \
+ _ (POLLING) \
+ /* Input node is called when device signals an interrupt. */ \
+ _ (INTERRUPT) \
+ /* Input node is never called. */ \
+ _ (DISABLED)
+
+typedef enum {
+#define _(f) VLIB_NODE_STATE_##f,
+ foreach_vlib_node_state
+#undef _
+ VLIB_N_NODE_STATE,
+} vlib_node_state_t;
+
+typedef struct vlib_node_t {
+ /* Vector processing function for this node. */
+ vlib_node_function_t * function;
+
+ /* Node name. */
+ u8 * name;
+
+ /* Node name index in elog string table. */
+ u32 name_elog_string;
+
+ /* Total statistics for this node. */
+ vlib_node_stats_t stats_total;
+
+ /* Saved values as of last clear (or zero if never cleared).
+ Current values are always stats_total - stats_last_clear. */
+ vlib_node_stats_t stats_last_clear;
+
+ /* Type of this node. */
+ vlib_node_type_t type;
+
+ /* Node index. */
+ u32 index;
+
+ /* Index of corresponding node runtime. */
+ u32 runtime_index;
+
+ /* Runtime data for this node. */
+ void * runtime_data;
+
+ /* Node flags. */
+ u16 flags;
+
+ /* Processing function keeps frame. Tells node dispatching code not
+ to free frame after dispatch is done. */
+#define VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH (1 << 0)
+
+ /* Node counts as output/drop/punt node for stats purposes. */
+#define VLIB_NODE_FLAG_IS_OUTPUT (1 << 1)
+#define VLIB_NODE_FLAG_IS_DROP (1 << 2)
+#define VLIB_NODE_FLAG_IS_PUNT (1 << 3)
+#define VLIB_NODE_FLAG_IS_HANDOFF (1 << 4)
+
+ /* Set if current node runtime has traced vectors. */
+#define VLIB_NODE_FLAG_TRACE (1 << 5)
+
+#define VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE (1 << 6)
+#define VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE (1 << 7)
+
+ /* State for input nodes. */
+ u8 state;
+
+ /* Number of bytes of run time data. */
+ u8 runtime_data_bytes;
+
+ /* Number of error codes used by this node. */
+ u16 n_errors;
+
+ /* Size of scalar and vector arguments in bytes. */
+ u16 scalar_size, vector_size;
+
+ /* Handle/index in error heap for this node. */
+ u32 error_heap_handle;
+ u32 error_heap_index;
+
+ /* Error strings indexed by error code for this node. */
+ char ** error_strings;
+
+ /* Vector of next node names.
+ Only used before next_nodes array is initialized. */
+ char ** next_node_names;
+
+ /* Next node indices for this node. */
+ u32 * next_nodes;
+
+ /* Name of node that we are sibling of. */
+ char * sibling_of;
+
+ /* Bitmap of all of this node's siblings. */
+ uword * sibling_bitmap;
+
+ /* Total number of vectors sent to each next node. */
+ u64 * n_vectors_by_next_node;
+
+ /* Hash table mapping next node index into slot in
+ next_nodes vector. Quickly determines whether this node
+ is connected to given next node and, if so, with which slot. */
+ uword * next_slot_by_node;
+
+ /* Bitmap of node indices which feed this node. */
+ uword * prev_node_bitmap;
+
+ /* Node/next-index which own enqueue rights with to this node. */
+ u32 owner_node_index, owner_next_index;
+
+ /* Buffer format/unformat for this node. */
+ format_function_t * format_buffer;
+ unformat_function_t * unformat_buffer;
+
+ /* Trace buffer format/unformat for this node. */
+ format_function_t * format_trace;
+
+ /* Function to validate incoming frames. */
+ u8 * (* validate_frame) (struct vlib_main_t * vm,
+ struct vlib_node_runtime_t *,
+ struct vlib_frame_t * f);
+} vlib_node_t;
+
+#define VLIB_INVALID_NODE_INDEX ((u32) ~0)
+
+/* Max number of vector elements to process at once per node. */
+#define VLIB_FRAME_SIZE 256
+
+/* Calling frame (think stack frame) for a node. */
+typedef struct vlib_frame_t {
+ /* Frame flags. */
+ u16 flags;
+
+ /* Number of scalar bytes in arguments. */
+ u8 scalar_size;
+
+ /* Number of bytes per vector argument. */
+ u8 vector_size;
+
+ /* Number of vector elements currently in frame. */
+ u16 n_vectors;
+
+ /* Owner cpuid / heap id */
+ u16 cpu_index;
+
+ /* Scalar and vector arguments to next node. */
+ u8 arguments[0];
+} vlib_frame_t;
+
+typedef struct {
+ /* Frame index. */
+ u32 frame_index;
+
+ /* Node runtime for this next. */
+ u32 node_runtime_index;
+
+ /* Next frame flags. */
+ u32 flags;
+
+ /* Reflects node frame-used flag for this next. */
+#define VLIB_FRAME_NO_FREE_AFTER_DISPATCH \
+ VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH
+
+ /* This next frame owns enqueue to node
+ corresponding to node_runtime_index. */
+#define VLIB_FRAME_OWNER (1 << 15)
+
+ /* Set when frame has been allocated for this next. */
+#define VLIB_FRAME_IS_ALLOCATED VLIB_NODE_FLAG_IS_OUTPUT
+
+ /* Set when frame has been added to pending vector. */
+#define VLIB_FRAME_PENDING VLIB_NODE_FLAG_IS_DROP
+
+ /* Set when frame is to be freed after dispatch. */
+#define VLIB_FRAME_FREE_AFTER_DISPATCH VLIB_NODE_FLAG_IS_PUNT
+
+ /* Set when frame has traced packets. */
+#define VLIB_FRAME_TRACE VLIB_NODE_FLAG_TRACE
+
+ /* Number of vectors enqueue to this next since last overflow. */
+ u32 vectors_since_last_overflow;
+} vlib_next_frame_t;
+
+always_inline void
+vlib_next_frame_init (vlib_next_frame_t * nf)
+{
+ memset (nf, 0, sizeof (nf[0]));
+ nf->frame_index = ~0;
+ nf->node_runtime_index = ~0;
+}
+
+/* A frame pending dispatch by main loop. */
+typedef struct {
+ /* Node and runtime for this frame. */
+ u32 node_runtime_index;
+
+ /* Frame index (in the heap). */
+ u32 frame_index;
+
+ /* Start of next frames for this node. */
+ u32 next_frame_index;
+
+ /* Special value for next_frame_index when there is no next frame. */
+#define VLIB_PENDING_FRAME_NO_NEXT_FRAME ((u32) ~0)
+} vlib_pending_frame_t;
+
+typedef struct vlib_node_runtime_t {
+ /* Node function to call. */
+ vlib_node_function_t * function;
+
+ /* Vector of errors for this node. */
+ vlib_error_t * errors;
+
+ /* Number of clock cycles. */
+ u32 clocks_since_last_overflow;
+
+ /* Maximum clock cycle for an invocation. */
+ u32 max_clock;
+
+ /* Number of vectors in the recorded max_clock. */
+ u32 max_clock_n;
+
+ /* Number of calls. */
+ u32 calls_since_last_overflow;
+
+ /* Number of vector elements processed by this node. */
+ u32 vectors_since_last_overflow;
+
+ /* Start of next frames for this node. */
+ u32 next_frame_index;
+
+ /* Node index. */
+ u32 node_index;
+
+ /* For input nodes: decremented on each main loop interation until it reaches zero
+ and function is called. Allows some input nodes to be called
+ more than others. */
+ u32 input_main_loops_per_call;
+
+ /* Saved main loop counter of last dispatch of this node. */
+ u32 main_loop_count_last_dispatch;
+
+ u32 main_loop_vector_stats[2];
+
+ /* Copy of main node flags. */
+ u16 flags;
+
+ /* Input node state. */
+ u16 state;
+
+ u16 n_next_nodes;
+
+ /* Next frame index that vector arguments were last enqueued to
+ last time this node ran. Set to zero before first run
+ of this node. */
+ u16 cached_next_index;
+
+ /* CPU this node runs on */
+ u16 cpu_index;
+
+ /* Function dependent node-runtime. */
+ uword runtime_data[(128
+ - 1 * sizeof (vlib_node_function_t *)
+ - 1 * sizeof (vlib_error_t *)
+ - 11 * sizeof (u32)
+ - 5 * sizeof (u16)) / sizeof (uword)];
+} vlib_node_runtime_t;
+
+typedef struct {
+ /* Number of allocated frames for this scalar/vector size. */
+ u32 n_alloc_frames;
+
+ /* Vector of free frame indices for this scalar/vector size. */
+ u32 * free_frame_indices;
+} vlib_frame_size_t;
+
+typedef struct {
+ /* Users opaque value for event type. */
+ uword opaque;
+} vlib_process_event_type_t;
+
+typedef struct {
+ /* Node runtime for this process. */
+ vlib_node_runtime_t node_runtime;
+
+ /* Where to longjmp when process is done. */
+ clib_longjmp_t return_longjmp;
+
+#define VLIB_PROCESS_RETURN_LONGJMP_RETURN ((uword) ~0 - 0)
+#define VLIB_PROCESS_RETURN_LONGJMP_SUSPEND ((uword) ~0 - 1)
+
+ /* Where to longjmp to resume node after suspend. */
+ clib_longjmp_t resume_longjmp;
+#define VLIB_PROCESS_RESUME_LONGJMP_SUSPEND 0
+#define VLIB_PROCESS_RESUME_LONGJMP_RESUME 1
+
+ u16 flags;
+#define VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK (1 << 0)
+#define VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT (1 << 1)
+ /* Set to indicate that this process has been added to resume vector. */
+#define VLIB_PROCESS_RESUME_PENDING (1 << 2)
+
+ /* Process function is currently running. */
+#define VLIB_PROCESS_IS_RUNNING (1 << 3)
+
+ /* Size of process stack. */
+ u16 log2_n_stack_bytes;
+
+ u32 suspended_process_frame_index;
+
+ /* Number of times this process was suspended. */
+ u32 n_suspends;
+
+ /* Vectors of pending event data indexed by event type index. */
+ void ** pending_event_data_by_type_index;
+
+ /* Bitmap of event type-indices with non-empty vectors. */
+ uword * non_empty_event_type_bitmap;
+
+ /* Bitmap of event type-indices which are one time events. */
+ uword * one_time_event_type_bitmap;
+
+ /* Type is opaque pointer -- typically a pointer to an event handler
+ function. Hash table to map opaque to a type index. */
+ uword * event_type_index_by_type_opaque;
+
+ /* Pool of currently valid event types. */
+ vlib_process_event_type_t * event_type_pool;
+
+ /* When suspending saves cpu cycle counter when process is to be resumed. */
+ u64 resume_cpu_time;
+
+#ifdef CLIB_UNIX
+ /* Pad to a multiple of the page size so we can mprotect process stacks */
+ CLIB_PAD_FROM_TO (0x140, 0x1000);
+#endif
+ /* Process stack. Starts here and extends 2^log2_n_stack_bytes
+ bytes. */
+
+#define VLIB_PROCESS_STACK_MAGIC (0xdead7ead)
+ u32 stack[0];
+} vlib_process_t __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES)));
+
+typedef struct {
+ u32 node_index;
+
+ u32 one_time_event;
+} vlib_one_time_waiting_process_t;
+
+typedef struct {
+ u16 n_data_elts;
+
+ u16 n_data_elt_bytes;
+
+ /* n_data_elts * n_data_elt_bytes */
+ u32 n_data_bytes;
+
+ /* Process node & event type to be used to signal event. */
+ u32 process_node_index;
+
+ u32 event_type_index;
+
+ union {
+ u8 inline_event_data[64 - 3 * sizeof (u32) - 2 * sizeof (u16)];
+
+ /* Vector of event data used only when data does not fit inline. */
+ u8 * event_data_as_vector;
+ };
+} vlib_signal_timed_event_data_t;
+
+always_inline uword
+vlib_timing_wheel_data_is_timed_event (u32 d)
+{ return d & 1; }
+
+always_inline u32
+vlib_timing_wheel_data_set_suspended_process (u32 i)
+{ return 0 + 2*i; }
+
+always_inline u32
+vlib_timing_wheel_data_set_timed_event (u32 i)
+{ return 1 + 2*i; }
+
+always_inline uword
+vlib_timing_wheel_data_get_index (u32 d)
+{ return d / 2; }
+
+typedef struct {
+ /* Public nodes. */
+ vlib_node_t ** nodes;
+
+ /* Node index hashed by node name. */
+ uword * node_by_name;
+
+ u32 flags;
+#define VLIB_NODE_MAIN_RUNTIME_STARTED (1 << 0)
+
+ /* Nodes segregated by type for cache locality.
+ Does not apply to nodes of type VLIB_NODE_TYPE_INTERNAL. */
+ vlib_node_runtime_t * nodes_by_type[VLIB_N_NODE_TYPE];
+
+ /* Node runtime indices for input nodes with pending interrupts. */
+ u32 * pending_interrupt_node_runtime_indices;
+
+ /* Input nodes are switched from/to interrupt to/from polling mode
+ when average vector length goes above/below polling/interrupt
+ thresholds. */
+ u32 polling_threshold_vector_length;
+ u32 interrupt_threshold_vector_length;
+
+ /* Vector of next frames. */
+ vlib_next_frame_t * next_frames;
+
+ /* Vector of internal node's frames waiting to be called. */
+ vlib_pending_frame_t * pending_frames;
+
+ /* Timing wheel for scheduling time-based node dispatch. */
+ timing_wheel_t timing_wheel;
+
+ vlib_signal_timed_event_data_t * signal_timed_event_data_pool;
+
+ /* Opaque data vector added via timing_wheel_advance. */
+ u32 * data_from_advancing_timing_wheel;
+
+ /* CPU time of next process to be ready on timing wheel. */
+ u64 cpu_time_next_process_ready;
+
+ /* Vector of process nodes.
+ One for each node of type VLIB_NODE_TYPE_PROCESS. */
+ vlib_process_t ** processes;
+
+ /* Current running process or ~0 if no process running. */
+ u32 current_process_index;
+
+ /* Pool of pending process frames. */
+ vlib_pending_frame_t * suspended_process_frames;
+
+ /* Vector of event data vectors pending recycle. */
+ void ** recycled_event_data_vectors;
+
+ /* Current counts of nodes in each state. */
+ u32 input_node_counts_by_state[VLIB_N_NODE_STATE];
+
+ /* Hash of (scalar_size,vector_size) to frame_sizes index. */
+ uword * frame_size_hash;
+
+ /* Per-size frame allocation information. */
+ vlib_frame_size_t * frame_sizes;
+
+ /* Time of last node runtime stats clear. */
+ f64 time_last_runtime_stats_clear;
+
+ /* Node registrations added by constructors */
+ vlib_node_registration_t * node_registrations;
+} vlib_node_main_t;
+
+#endif /* included_vlib_node_h */
diff --git a/vlib/vlib/node_cli.c b/vlib/vlib/node_cli.c
new file mode 100644
index 00000000000..58c3776a67b
--- /dev/null
+++ b/vlib/vlib/node_cli.c
@@ -0,0 +1,441 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node_cli.c: node CLI
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/threads.h>
+
+static clib_error_t *
+show_node_graph (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * n;
+ u32 node_index;
+
+ vlib_cli_output (vm, "%U\n", format_vlib_node_graph, nm, 0);
+
+ if (unformat (input, "%U", unformat_vlib_node, vm, &node_index))
+ {
+ n = vlib_get_node (vm, node_index);
+ vlib_cli_output (vm, "%U\n", format_vlib_node_graph, nm, n);
+ }
+ else
+ {
+ vlib_node_t ** nodes = vec_dup (nm->nodes);
+ uword i;
+
+ vec_sort (nodes, n1, n2,
+ vec_cmp (n1[0]->name, n2[0]->name));
+
+ for (i = 0; i < vec_len (nodes); i++)
+ vlib_cli_output (vm, "%U\n\n", format_vlib_node_graph, nm, nodes[i]);
+
+ vec_free (nodes);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_node_graph_command, static) = {
+ .path = "show vlib graph",
+ .short_help = "Show packet processing node graph",
+ .function = show_node_graph,
+};
+
+static u8 * format_vlib_node_stats (u8 * s, va_list * va)
+{
+ vlib_main_t * vm = va_arg (*va, vlib_main_t *);
+ vlib_node_t * n = va_arg (*va, vlib_node_t *);
+ int max = va_arg (*va, int);
+ f64 v;
+ char * state;
+ u8 * ns;
+ u8 * misc_info = 0;
+ u64 c, p, l, d;
+ f64 x;
+ f64 maxc, maxcn;
+ u32 maxn;
+ uword indent;
+
+ if (! n)
+ {
+ if (max)
+ return format (s,
+ "%=30s%=17s%=16s%=16s%=16s%=16s",
+ "Name", "Max Node Clocks", "Vectors at Max", "Max Clocks", "Avg Clocks", "Avg Vectors/Call");
+ else
+ return format (s,
+ "%=30s%=12s%=16s%=16s%=16s%=16s%=16s",
+ "Name", "State", "Calls", "Vectors", "Suspends", "Clocks", "Vectors/Call");
+ }
+
+ indent = format_get_indent (s);
+
+ l = n->stats_total.clocks - n->stats_last_clear.clocks;
+ c = n->stats_total.calls - n->stats_last_clear.calls;
+ p = n->stats_total.vectors - n->stats_last_clear.vectors;
+ d = n->stats_total.suspends - n->stats_last_clear.suspends;
+ maxc = (f64)n->stats_total.max_clock;
+ maxn = n->stats_total.max_clock_n;
+ if (n->stats_total.max_clock_n)
+ maxcn = (f64)n->stats_total.max_clock / (f64)maxn;
+ else
+ maxcn = 0.0;
+
+ /* Clocks per packet, per call or per suspend. */
+ x = 0;
+ if (p > 0)
+ x = (f64) l / (f64) p;
+ else if (c > 0)
+ x = (f64) l / (f64) c;
+ else if (d > 0)
+ x = (f64) l / (f64) d;
+
+ if (c > 0)
+ v = (double)p / (double)c;
+ else
+ v = 0;
+
+ state = "active";
+ if (n->type == VLIB_NODE_TYPE_PROCESS)
+ {
+ vlib_process_t * p = vlib_get_process_from_node (vm, n);
+
+ /* Show processes with events pending. This helps spot bugs where events are not
+ being handled. */
+ if (! clib_bitmap_is_zero (p->non_empty_event_type_bitmap))
+ misc_info = format (misc_info, "events pending, ");
+
+ switch (p->flags & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT))
+ {
+ default:
+ if (! (p->flags & VLIB_PROCESS_IS_RUNNING))
+ state = "done";
+ break;
+
+ case VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK:
+ state = "time wait";
+ break;
+
+ case VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT:
+ state = "event wait";
+ break;
+
+ case (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK):
+ state = "any wait";
+ break;
+ }
+ }
+ else if (n->type != VLIB_NODE_TYPE_INTERNAL)
+ {
+ state = "polling";
+ if (n->state == VLIB_NODE_STATE_DISABLED)
+ state = "disabled";
+ else if (n->state == VLIB_NODE_STATE_INTERRUPT)
+ state = "interrupt wait";
+ }
+
+ ns = n->name;
+
+ if (max)
+ s = format (s, "%-30v%=17.2e%=16d%=16.2e%=16.2e%=16.2e",
+ ns, maxc, maxn, maxcn, x, v);
+ else
+ s = format (s, "%-30v%=12s%16Ld%16Ld%16Ld%16.2e%16.2f", ns, state,
+ c, p, d, x, v);
+
+ if (ns != n->name)
+ vec_free (ns);
+
+ if (misc_info)
+ {
+ s = format (s, "\n%U%v", format_white_space, indent + 4, misc_info);
+ vec_free (misc_info);
+ }
+
+ return s;
+}
+
+static clib_error_t *
+show_node_runtime (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * n;
+ f64 time_now;
+ u32 node_index;
+ vlib_node_t *** node_dups = 0;
+ f64 * vectors_per_main_loop = 0;
+ f64 * last_vector_length_per_node = 0;
+
+ time_now = vlib_time_now (vm);
+
+ if (unformat (input, "%U", unformat_vlib_node, vm, &node_index))
+ {
+ n = vlib_get_node (vm, node_index);
+ vlib_node_sync_stats (vm, n);
+ vlib_cli_output (vm, "%U\n", format_vlib_node_stats, vm, 0, 0);
+ vlib_cli_output (vm, "%U\n", format_vlib_node_stats, vm, n, 0);
+ }
+ else
+ {
+ vlib_node_t ** nodes;
+ uword i, j;
+ f64 dt;
+ u64 n_input, n_output, n_drop, n_punt;
+ u64 n_internal_vectors, n_internal_calls;
+ u64 n_clocks, l, v, c, d;
+ int brief = 1;
+ int max = 0;
+ vlib_main_t ** stat_vms = 0, *stat_vm;
+
+ /* Suppress nodes with zero calls since last clear */
+ if (unformat (input, "brief") || unformat (input, "b"))
+ brief = 1;
+ if (unformat (input, "verbose") || unformat(input, "v"))
+ brief = 0;
+ if (unformat (input, "max") || unformat(input, "m"))
+ max = 1;
+
+ if (vec_len(vlib_mains) == 0)
+ vec_add1 (stat_vms, vm);
+ else
+ {
+ for (i = 0; i < vec_len (vlib_mains); i++)
+ {
+ stat_vm = vlib_mains[i];
+ if (stat_vm)
+ vec_add1 (stat_vms, stat_vm);
+ }
+ }
+
+ /*
+ * Barrier sync across stats scraping.
+ * Otherwise, the counts will be grossly inaccurate.
+ */
+ vlib_worker_thread_barrier_sync(vm);
+
+ for (j = 0; j < vec_len (stat_vms); j++)
+ {
+ stat_vm = stat_vms[j];
+ nm = &stat_vm->node_main;
+
+ for (i = 0; i < vec_len (nm->nodes); i++)
+ {
+ n = nm->nodes[i];
+ vlib_node_sync_stats (stat_vm, n);
+ }
+
+ nodes = vec_dup (nm->nodes);
+
+ vec_add1(node_dups, nodes);
+ vec_add1 (vectors_per_main_loop,
+ vlib_last_vectors_per_main_loop_as_f64 (stat_vm));
+ vec_add1 (last_vector_length_per_node,
+ vlib_last_vector_length_per_node (stat_vm));
+ }
+ vlib_worker_thread_barrier_release(vm);
+
+
+ for (j = 0; j < vec_len (stat_vms); j++)
+ {
+ stat_vm = stat_vms[j];
+ nodes = node_dups[j];
+
+ vec_sort (nodes, n1, n2,
+ vec_cmp (n1[0]->name, n2[0]->name));
+
+ n_input = n_output = n_drop = n_punt = n_clocks = 0;
+ n_internal_vectors = n_internal_calls = 0;
+ for (i = 0; i < vec_len (nodes); i++)
+ {
+ n = nodes[i];
+
+ l = n->stats_total.clocks - n->stats_last_clear.clocks;
+ n_clocks += l;
+
+ v = n->stats_total.vectors - n->stats_last_clear.vectors;
+ c = n->stats_total.calls - n->stats_last_clear.calls;
+
+ switch (n->type)
+ {
+ default:
+ continue;
+
+ case VLIB_NODE_TYPE_INTERNAL:
+ n_output += (n->flags & VLIB_NODE_FLAG_IS_OUTPUT) ? v : 0;
+ n_drop += (n->flags & VLIB_NODE_FLAG_IS_DROP) ? v : 0;
+ n_punt += (n->flags & VLIB_NODE_FLAG_IS_PUNT) ? v : 0;
+ if (! (n->flags & VLIB_NODE_FLAG_IS_OUTPUT))
+ {
+ n_internal_vectors += v;
+ n_internal_calls += c;
+ }
+ if (n->flags & VLIB_NODE_FLAG_IS_HANDOFF)
+ n_input += v;
+ break;
+
+ case VLIB_NODE_TYPE_INPUT:
+ n_input += v;
+ break;
+ }
+ }
+
+ if (vec_len (vlib_mains))
+ {
+ vlib_worker_thread_t *w = vlib_worker_threads + j;
+ if (j > 0)
+ vlib_cli_output (vm, "---------------");
+
+ if ( w->dpdk_lcore_id > -1)
+ vlib_cli_output (vm, "Thread %d %v (lcore %u)", j, w->name,
+ w->dpdk_lcore_id);
+ else
+ vlib_cli_output (vm, "Thread %d %v", j,
+ w->name);
+ }
+
+ dt = time_now - nm->time_last_runtime_stats_clear;
+ vlib_cli_output
+ (vm,
+ "Time %.1f, average vectors/node %.2f, last %d main loops %.2f per node %.2f"
+ "\n vector rates in %.4e, out %.4e, drop %.4e, punt %.4e",
+ dt,
+ (n_internal_calls > 0
+ ? (f64) n_internal_vectors / (f64) n_internal_calls
+ : 0),
+ 1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE,
+ vectors_per_main_loop [j],
+ last_vector_length_per_node [j],
+ (f64) n_input / dt,
+ (f64) n_output / dt,
+ (f64) n_drop / dt,
+ (f64) n_punt / dt);
+
+ vlib_cli_output (vm, "%U", format_vlib_node_stats, stat_vm, 0, max);
+ for (i = 0; i < vec_len (nodes); i++)
+ {
+ c = nodes[i]->stats_total.calls - nodes[i]->stats_last_clear.calls;
+ d = nodes[i]->stats_total.suspends - nodes[i]->stats_last_clear.suspends;
+ if (c || d || ! brief)
+ {
+ vlib_cli_output (vm, "%U", format_vlib_node_stats, stat_vm,
+ nodes[i], max);
+ }
+ }
+ vec_free (nodes);
+ }
+ vec_free (stat_vms);
+ vec_free (node_dups);
+ vec_free (vectors_per_main_loop);
+ vec_free (last_vector_length_per_node);
+ }
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_node_runtime_command, static) = {
+ .path = "show runtime",
+ .short_help = "Show packet processing runtime",
+ .function = show_node_runtime,
+ .is_mp_safe = 1,
+};
+
+static clib_error_t *
+clear_node_runtime (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_node_main_t * nm;
+ vlib_node_t * n;
+ int i, j;
+ vlib_main_t ** stat_vms = 0, *stat_vm;
+ vlib_node_runtime_t * r;
+
+ if (vec_len(vlib_mains) == 0)
+ vec_add1 (stat_vms, vm);
+ else
+ {
+ for (i = 0; i < vec_len (vlib_mains); i++)
+ {
+ stat_vm = vlib_mains[i];
+ if (stat_vm)
+ vec_add1 (stat_vms, stat_vm);
+ }
+ }
+
+ vlib_worker_thread_barrier_sync(vm);
+
+ for (j = 0; j < vec_len (stat_vms); j++)
+ {
+ stat_vm = stat_vms[j];
+ nm = &stat_vm->node_main;
+
+ for (i = 0; i < vec_len (nm->nodes); i++)
+ {
+ n = nm->nodes[i];
+ vlib_node_sync_stats (stat_vm, n);
+ n->stats_last_clear = n->stats_total;
+
+ r = vlib_node_get_runtime (stat_vm, n->index);
+ r->max_clock = 0;
+ }
+ /* Note: input/output rates computed using vlib_global_main */
+ nm->time_last_runtime_stats_clear = vlib_time_now (vm);
+ }
+
+ vlib_worker_thread_barrier_release(vm);
+
+ vec_free (stat_vms);
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (clear_node_runtime_command, static) = {
+ .path = "clear runtime",
+ .short_help = "Clear packet processing runtime statistics",
+ .function = clear_node_runtime,
+};
+
+/* Dummy function to get us linked in. */
+void vlib_node_cli_reference (void) {}
diff --git a/vlib/vlib/node_format.c b/vlib/vlib/node_format.c
new file mode 100644
index 00000000000..d1d415e1376
--- /dev/null
+++ b/vlib/vlib/node_format.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node_format.c: node formatting
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+
+u8 * format_vlib_node_graph (u8 * s, va_list * va)
+{
+ vlib_node_main_t * nm = va_arg (*va, vlib_node_main_t *);
+ vlib_node_t * n = va_arg (*va, vlib_node_t *);
+ int i, j;
+ uword indent;
+ typedef struct {
+ u32 next_node;
+ u32 next_slot;
+ u32 prev_node;
+ } tmp_t;
+ tmp_t * tmps = 0;
+ tmp_t empty = { .next_node = ~0, .prev_node = ~0 };
+
+ if (! n)
+ return format (s,
+ "%=26s%=26s%=26s",
+ "Name", "Next", "Previous");
+
+ s = format (s, "%-26v", n->name);
+
+ indent = format_get_indent (s);
+
+ for (i = j = 0; i < vec_len (n->next_nodes); i++)
+ {
+ if (n->next_nodes[i] == VLIB_INVALID_NODE_INDEX)
+ continue;
+ vec_validate_init_empty (tmps, j, empty);
+ tmps[j].next_node = n->next_nodes[i];
+ tmps[j].next_slot = i;
+ j++;
+ }
+
+ j = 0;
+ clib_bitmap_foreach (i, n->prev_node_bitmap, ({
+ vec_validate_init_empty (tmps, j, empty);
+ tmps[j].prev_node = i;
+ j++;
+ }));
+
+ for (i = 0; i < vec_len (tmps); i++)
+ {
+ if (i > 0)
+ s = format (s, "\n%U", format_white_space, indent);
+
+ if (tmps[i].next_node != ~0)
+ {
+ vlib_node_t * x;
+ u8 * t = 0;
+
+ x = vec_elt (nm->nodes, tmps[i].next_node);
+ t = format (t, "%v [%d]", x->name, tmps[i].next_slot);
+ s = format (s, "%=26v", t);
+ vec_free (t);
+ }
+ else
+ s = format (s, "%26s", "");
+
+ if (tmps[i].prev_node != ~0)
+ {
+ vlib_node_t * x;
+ x = vec_elt (nm->nodes, tmps[i].prev_node);
+ s = format (s, "%=26v", x->name);
+ }
+ }
+
+ vec_free (tmps);
+
+ return s;
+}
+
+u8 * format_vlib_node_and_next (u8 * s, va_list * va)
+{
+ vlib_main_t * vm = va_arg (*va, vlib_main_t *);
+ vlib_node_t * n = va_arg (*va, vlib_node_t *);
+ u32 next_index = va_arg (*va, u32);
+ vlib_node_t * n_next;
+ u32 * ni;
+
+ ni = vec_elt_at_index (n->next_nodes, next_index);
+ n_next = vlib_get_node (vm, ni[0]);
+ return format (s, "%v -> %v", n->name, n_next->name);
+}
+
+u8 * format_vlib_node_name (u8 * s, va_list * va)
+{
+ vlib_main_t * vm = va_arg (*va, vlib_main_t *);
+ u32 node_index = va_arg (*va, u32);
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+
+ return format (s, "%v", n->name);
+}
+
+u8 * format_vlib_next_node_name (u8 * s, va_list * va)
+{
+ vlib_main_t * vm = va_arg (*va, vlib_main_t *);
+ u32 node_index = va_arg (*va, u32);
+ u32 next_index = va_arg (*va, u32);
+ vlib_node_t * next = vlib_get_next_node (vm, node_index, next_index);
+ return format (s, "%v", next->name);
+}
+
+/* Parse node name -> node index. */
+uword unformat_vlib_node (unformat_input_t * input, va_list * args)
+{
+ vlib_main_t * vm = va_arg (*args, vlib_main_t *);
+ u32 * result = va_arg (*args, u32 *);
+
+ return unformat_user (input, unformat_hash_vec_string,
+ vm->node_main.node_by_name, result);
+}
+
+u8 * format_vlib_time (u8 * s, va_list * va)
+{
+ CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
+ f64 time = va_arg (*va, f64);
+ return format (s, "%12.4f", time);
+}
+
+u8 * format_vlib_cpu_time (u8 * s, va_list * va)
+{
+ vlib_main_t * vm = va_arg (*va, vlib_main_t *);
+ u64 cpu_time = va_arg (*va, u64);
+ f64 dt;
+
+ dt = (cpu_time - vm->clib_time.init_cpu_time) * vm->clib_time.seconds_per_clock;
+ return format (s, "%U", format_vlib_time, vm, dt);
+}
diff --git a/vlib/vlib/node_funcs.h b/vlib/vlib/node_funcs.h
new file mode 100644
index 00000000000..80dc3c602a1
--- /dev/null
+++ b/vlib/vlib/node_funcs.h
@@ -0,0 +1,979 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * node_funcs.h: processing nodes global functions/inlines
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_node_funcs_h
+#define included_vlib_node_funcs_h
+
+#include <vppinfra/fifo.h>
+
+always_inline vlib_node_t *
+vlib_get_node (vlib_main_t * vm, u32 i)
+{ return vec_elt (vm->node_main.nodes, i); }
+
+always_inline vlib_node_t *
+vlib_get_next_node (vlib_main_t * vm, u32 node_index, u32 next_index)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * n;
+
+ n = vec_elt (nm->nodes, node_index);
+ ASSERT (next_index < vec_len (n->next_nodes));
+ return vlib_get_node (vm, n->next_nodes[next_index]);
+}
+
+always_inline vlib_node_runtime_t *
+vlib_node_get_runtime (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * n = vec_elt (nm->nodes, node_index);
+ vlib_process_t * p;
+ if (n->type != VLIB_NODE_TYPE_PROCESS)
+ return vec_elt_at_index (nm->nodes_by_type[n->type], n->runtime_index);
+ else
+ {
+ p = vec_elt (nm->processes, n->runtime_index);
+ return &p->node_runtime;
+ }
+}
+
+always_inline void *
+vlib_node_get_runtime_data (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_runtime_t * r = vlib_node_get_runtime (vm, node_index);
+ return r->runtime_data;
+}
+
+always_inline void
+vlib_node_set_runtime_data (vlib_main_t * vm, u32 node_index,
+ void * runtime_data,
+ u32 n_runtime_data_bytes)
+{
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+ vlib_node_runtime_t * r = vlib_node_get_runtime (vm, node_index);
+
+ n->runtime_data_bytes = n_runtime_data_bytes;
+ vec_free (n->runtime_data);
+ vec_add (n->runtime_data, runtime_data, n_runtime_data_bytes);
+
+ ASSERT (vec_len (n->runtime_data) <= sizeof (r->runtime_data));
+ if (vec_len (n->runtime_data) > 0)
+ memcpy (r->runtime_data, n->runtime_data, vec_len (n->runtime_data));
+}
+
+always_inline void
+vlib_node_set_state (vlib_main_t * vm, u32 node_index, vlib_node_state_t new_state)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * n;
+ vlib_node_runtime_t * r;
+
+ n = vec_elt (nm->nodes, node_index);
+ if (n->type == VLIB_NODE_TYPE_PROCESS)
+ {
+ vlib_process_t * p = vec_elt (nm->processes, n->runtime_index);
+ r = &p->node_runtime;
+
+ /* When disabling make sure flags are cleared. */
+ p->flags &= ~(VLIB_PROCESS_RESUME_PENDING
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT);
+ }
+ else
+ r = vec_elt_at_index (nm->nodes_by_type[n->type], n->runtime_index);
+
+ ASSERT (new_state < VLIB_N_NODE_STATE);
+
+ if (n->type == VLIB_NODE_TYPE_INPUT)
+ {
+ ASSERT (nm->input_node_counts_by_state[n->state] > 0);
+ nm->input_node_counts_by_state[n->state] -= 1;
+ nm->input_node_counts_by_state[new_state] += 1;
+ }
+
+ n->state = new_state;
+ r->state = new_state;
+}
+
+always_inline void
+vlib_node_set_interrupt_pending (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * n = vec_elt (nm->nodes, node_index);
+ ASSERT (n->type == VLIB_NODE_TYPE_INPUT);
+ vec_add1 (nm->pending_interrupt_node_runtime_indices, n->runtime_index);
+}
+
+always_inline vlib_process_t *
+vlib_get_process_from_node (vlib_main_t * vm, vlib_node_t * node)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ ASSERT (node->type == VLIB_NODE_TYPE_PROCESS);
+ return vec_elt (nm->processes, node->runtime_index);
+}
+
+/* Fetches frame with given handle. */
+always_inline vlib_frame_t *
+vlib_get_frame_no_check (vlib_main_t * vm, uword frame_index)
+{
+ vlib_frame_t * f;
+ u32 cpu_index = frame_index & VLIB_CPU_MASK;
+ u32 offset = frame_index & VLIB_OFFSET_MASK;
+ vm = vlib_mains ? vlib_mains[cpu_index] : vm;
+ f = vm->heap_base + offset;
+ return f;
+}
+
+always_inline u32
+vlib_frame_index_no_check (vlib_main_t * vm, vlib_frame_t * f)
+{
+ u32 i;
+
+ ASSERT (((uword)f & VLIB_CPU_MASK)==0);
+
+ vm = vlib_mains ? vlib_mains[f->cpu_index] : vm;
+
+ i = ((u8 *) f - (u8 *) vm->heap_base);
+ return i | f->cpu_index;
+}
+
+always_inline vlib_frame_t *
+vlib_get_frame (vlib_main_t * vm, uword frame_index)
+{
+ vlib_frame_t * f = vlib_get_frame_no_check (vm, frame_index);
+ ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED);
+ return f;
+}
+
+always_inline u32
+vlib_frame_index (vlib_main_t * vm, vlib_frame_t * f)
+{
+ uword i = vlib_frame_index_no_check (vm, f);
+ ASSERT (vlib_get_frame (vm, i) == f);
+ return i;
+}
+
+/* Byte alignment for vector arguments. */
+#define VLIB_FRAME_VECTOR_ALIGN (1 << 4)
+
+always_inline u32
+vlib_frame_vector_byte_offset (u32 scalar_size)
+{
+ return round_pow2 (sizeof (vlib_frame_t) + scalar_size,
+ VLIB_FRAME_VECTOR_ALIGN);
+}
+
+always_inline void *
+vlib_frame_vector_args (vlib_frame_t * f)
+{
+ return (void *) f + vlib_frame_vector_byte_offset (f->scalar_size);
+}
+
+/* Scalar data lies before aligned vector data. */
+always_inline void *
+vlib_frame_args (vlib_frame_t * f)
+{ return vlib_frame_vector_args (f) - f->scalar_size; }
+
+always_inline vlib_next_frame_t *
+vlib_node_runtime_get_next_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * n,
+ u32 next_index)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_next_frame_t * nf;
+
+ ASSERT (next_index < n->n_next_nodes);
+ nf = vec_elt_at_index (nm->next_frames,
+ n->next_frame_index + next_index);
+
+ if (CLIB_DEBUG > 0)
+ {
+ vlib_node_t * node, * next;
+ node = vec_elt (nm->nodes, n->node_index);
+ next = vec_elt (nm->nodes, node->next_nodes[next_index]);
+ ASSERT (nf->node_runtime_index == next->runtime_index);
+ }
+
+ return nf;
+}
+
+always_inline vlib_next_frame_t *
+vlib_node_get_next_frame (vlib_main_t * vm,
+ u32 node_index,
+ u32 next_index)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * n;
+ vlib_node_runtime_t * r;
+
+ n = vec_elt (nm->nodes, node_index);
+ r = vec_elt_at_index (nm->nodes_by_type[n->type], n->runtime_index);
+ return vlib_node_runtime_get_next_frame (vm, r, next_index);
+}
+
+vlib_frame_t *
+vlib_get_next_frame_internal (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 next_index,
+ u32 alloc_new_frame);
+
+#define vlib_get_next_frame_macro(vm,node,next_index,vectors,n_vectors_left,alloc_new_frame) \
+do { \
+ vlib_frame_t * _f \
+ = vlib_get_next_frame_internal ((vm), (node), (next_index), \
+ (alloc_new_frame)); \
+ u32 _n = _f->n_vectors; \
+ (vectors) = vlib_frame_vector_args (_f) + _n * sizeof ((vectors)[0]); \
+ (n_vectors_left) = VLIB_FRAME_SIZE - _n; \
+} while (0)
+
+#define vlib_get_next_frame(vm,node,next_index,vectors,n_vectors_left) \
+ vlib_get_next_frame_macro (vm, node, next_index, \
+ vectors, n_vectors_left, \
+ /* alloc new frame */ 0)
+
+#define vlib_get_new_next_frame(vm,node,next_index,vectors,n_vectors_left) \
+ vlib_get_next_frame_macro (vm, node, next_index, \
+ vectors, n_vectors_left, \
+ /* alloc new frame */ 1)
+
+void
+vlib_put_next_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ u32 next_index,
+ u32 n_packets_left);
+
+/* Combination get plus put. Returns vector argument just added. */
+#define vlib_set_next_frame(vm,node,next_index,v) \
+({ \
+ uword _n_left; \
+ vlib_get_next_frame ((vm), (node), (next_index), (v), _n_left); \
+ ASSERT (_n_left > 0); \
+ vlib_put_next_frame ((vm), (node), (next_index), _n_left - 1); \
+ (v); \
+})
+
+always_inline void
+vlib_set_next_frame_buffer (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 next_index,
+ u32 buffer_index)
+{
+ u32 * p;
+ p = vlib_set_next_frame (vm, node, next_index, p);
+ p[0] = buffer_index;
+}
+
+vlib_frame_t * vlib_get_frame_to_node (vlib_main_t * vm, u32 to_node_index);
+void vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_t * f);
+
+always_inline vlib_process_t *
+vlib_get_current_process (vlib_main_t * vm)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ return vec_elt (nm->processes, nm->current_process_index);
+}
+
+always_inline uword
+vlib_in_process_context (vlib_main_t * vm)
+{ return vm->node_main.current_process_index != ~0; }
+
+always_inline uword
+vlib_current_process (vlib_main_t * vm)
+{ return vlib_get_current_process (vm)->node_runtime.node_index; }
+
+/* Anything less than 1e-6 is considered zero. */
+always_inline uword
+vlib_process_suspend_time_is_zero (f64 dt)
+{ return dt < 1e-6; }
+
+always_inline uword
+vlib_process_suspend (vlib_main_t * vm, f64 dt)
+{
+ uword r;
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_process_t * p = vec_elt (nm->processes, nm->current_process_index);
+ u64 dt_cpu = dt * vm->clib_time.clocks_per_second;
+
+ if (vlib_process_suspend_time_is_zero (dt))
+ return VLIB_PROCESS_RESUME_LONGJMP_RESUME;
+
+ p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK;
+ r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ {
+ p->resume_cpu_time = clib_cpu_time_now () + dt_cpu;
+ clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+ }
+
+ return r;
+}
+
+always_inline void
+vlib_process_free_event_type (vlib_process_t * p, uword t, uword is_one_time_event)
+{
+ ASSERT (! pool_is_free_index (p->event_type_pool, t));
+ pool_put_index (p->event_type_pool, t);
+ if (is_one_time_event)
+ p->one_time_event_type_bitmap =
+ clib_bitmap_andnoti (p->one_time_event_type_bitmap, t);
+}
+
+always_inline void
+vlib_process_maybe_free_event_type (vlib_process_t * p, uword t)
+{
+ ASSERT (! pool_is_free_index (p->event_type_pool, t));
+ if (clib_bitmap_get (p->one_time_event_type_bitmap, t))
+ vlib_process_free_event_type (p, t, /* is_one_time_event */ 1);
+}
+
+always_inline void *
+vlib_process_get_event_data (vlib_main_t * vm, uword * return_event_type_opaque)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_process_t * p;
+ vlib_process_event_type_t * et;
+ uword t, l;
+ void * event_data_vector;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+
+ /* Find first type with events ready.
+ Return invalid type when there's nothing there. */
+ t = clib_bitmap_first_set (p->non_empty_event_type_bitmap);
+ if (t == ~0)
+ return 0;
+
+ p->non_empty_event_type_bitmap = clib_bitmap_andnoti (p->non_empty_event_type_bitmap, t);
+
+ l = _vec_len (p->pending_event_data_by_type_index[t]);
+ ASSERT (l > 0);
+ event_data_vector = p->pending_event_data_by_type_index[t];
+ p->pending_event_data_by_type_index[t] = 0;
+
+ et = pool_elt_at_index (p->event_type_pool, t);
+
+ /* Return user's opaque value and possibly index. */
+ *return_event_type_opaque = et->opaque;
+
+ vlib_process_maybe_free_event_type (p, t);
+
+ return event_data_vector;
+}
+
+/* Return event data vector for later reuse. We reuse event data to avoid
+ repeatedly allocating event vectors in cases where we care about speed. */
+always_inline void
+vlib_process_put_event_data (vlib_main_t * vm, void * event_data)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vec_add1 (nm->recycled_event_data_vectors, event_data);
+}
+
+/* Return type & add any events to data vector. */
+always_inline uword
+vlib_process_get_events (vlib_main_t * vm, uword ** data_vector)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_process_t * p;
+ vlib_process_event_type_t * et;
+ uword r, t, l;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+
+ /* Find first type with events ready.
+ Return invalid type when there's nothing there. */
+ t = clib_bitmap_first_set (p->non_empty_event_type_bitmap);
+ if (t == ~0)
+ return t;
+
+ p->non_empty_event_type_bitmap = clib_bitmap_andnoti (p->non_empty_event_type_bitmap, t);
+
+ l = _vec_len (p->pending_event_data_by_type_index[t]);
+ if (data_vector)
+ vec_add (*data_vector, p->pending_event_data_by_type_index[t], l);
+ _vec_len (p->pending_event_data_by_type_index[t]) = 0;
+
+ et = pool_elt_at_index (p->event_type_pool, t);
+
+ /* Return user's opaque value. */
+ r = et->opaque;
+
+ vlib_process_maybe_free_event_type (p, t);
+
+ return r;
+}
+
+always_inline uword
+vlib_process_get_events_helper (vlib_process_t * p, uword t, uword ** data_vector)
+{
+ uword l;
+
+ p->non_empty_event_type_bitmap = clib_bitmap_andnoti (p->non_empty_event_type_bitmap, t);
+
+ l = _vec_len (p->pending_event_data_by_type_index[t]);
+ if (data_vector)
+ vec_add (*data_vector, p->pending_event_data_by_type_index[t], l);
+ _vec_len (p->pending_event_data_by_type_index[t]) = 0;
+
+ vlib_process_maybe_free_event_type (p, t);
+
+ return l;
+}
+
+/* As above but query as specified type of event. Returns number of
+ events found. */
+always_inline uword
+vlib_process_get_events_with_type (vlib_main_t * vm, uword ** data_vector,
+ uword with_type_opaque)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_process_t * p;
+ uword t, * h;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+ h = hash_get (p->event_type_index_by_type_opaque, with_type_opaque);
+ if (! h)
+ /* This can happen when an event has not yet been
+ signaled with given opaque type. */
+ return 0;
+
+ t = h[0];
+ if (! clib_bitmap_get (p->non_empty_event_type_bitmap, t))
+ return 0;
+
+ return vlib_process_get_events_helper (p, t, data_vector);
+}
+
+always_inline uword *
+vlib_process_wait_for_event (vlib_main_t * vm)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_process_t * p;
+ uword r;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+ if (clib_bitmap_is_zero (p->non_empty_event_type_bitmap))
+ {
+ p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT;
+ r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+ }
+
+ return p->non_empty_event_type_bitmap;
+}
+
+always_inline uword
+vlib_process_wait_for_one_time_event (vlib_main_t * vm,
+ uword ** data_vector,
+ uword with_type_index)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_process_t * p;
+ uword r;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+ ASSERT (! pool_is_free_index (p->event_type_pool, with_type_index));
+ while (! clib_bitmap_get (p->non_empty_event_type_bitmap, with_type_index))
+ {
+ p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT;
+ r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+ }
+
+ return vlib_process_get_events_helper (p, with_type_index, data_vector);
+}
+
+always_inline uword
+vlib_process_wait_for_event_with_type (vlib_main_t * vm,
+ uword ** data_vector,
+ uword with_type_opaque)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_process_t * p;
+ uword r, * h;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+ h = hash_get (p->event_type_index_by_type_opaque, with_type_opaque);
+ while (! h || ! clib_bitmap_get (p->non_empty_event_type_bitmap, h[0]))
+ {
+ p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT;
+ r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+
+ /* See if unknown event type has been signaled now. */
+ if (! h)
+ h = hash_get (p->event_type_index_by_type_opaque, with_type_opaque);
+ }
+
+ return vlib_process_get_events_helper (p, h[0], data_vector);
+}
+
+always_inline f64
+vlib_process_wait_for_event_or_clock (vlib_main_t * vm, f64 dt)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_process_t * p;
+ f64 wakeup_time;
+ uword r;
+
+ p = vec_elt (nm->processes, nm->current_process_index);
+
+ if (vlib_process_suspend_time_is_zero (dt)
+ || ! clib_bitmap_is_zero (p->non_empty_event_type_bitmap))
+ return dt;
+
+ wakeup_time = vlib_time_now (vm) + dt;
+
+ /* Suspend waiting for both clock and event to occur. */
+ p->flags |= (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT
+ | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK);
+
+ r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND);
+ if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND)
+ {
+ p->resume_cpu_time = (clib_cpu_time_now ()
+ + (dt * vm->clib_time.clocks_per_second));
+ clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND);
+ }
+
+ /* Return amount of time still left to sleep.
+ If <= 0 then we've been waken up by the clock (and not an event). */
+ return wakeup_time - vlib_time_now (vm);
+}
+
+always_inline vlib_process_event_type_t *
+vlib_process_new_event_type (vlib_process_t * p, uword with_type_opaque)
+{
+ vlib_process_event_type_t * et;
+ pool_get (p->event_type_pool, et);
+ et->opaque = with_type_opaque;
+ return et;
+}
+
+always_inline uword
+vlib_process_create_one_time_event (vlib_main_t * vm, uword node_index, uword with_type_opaque)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+ vlib_process_t * p = vec_elt (nm->processes, n->runtime_index);
+ vlib_process_event_type_t * et;
+ uword t;
+
+ et = vlib_process_new_event_type (p, with_type_opaque);
+ t = et - p->event_type_pool;
+ p->one_time_event_type_bitmap = clib_bitmap_ori (p->one_time_event_type_bitmap, t);
+ return t;
+}
+
+always_inline void
+vlib_process_delete_one_time_event (vlib_main_t * vm, uword node_index, uword t)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+ vlib_process_t * p = vec_elt (nm->processes, n->runtime_index);
+
+ ASSERT (clib_bitmap_get (p->one_time_event_type_bitmap, t));
+ vlib_process_free_event_type (p, t, /* is_one_time_event */ 1);
+}
+
+always_inline void *
+vlib_process_signal_event_helper (vlib_node_main_t * nm,
+ vlib_node_t * n,
+ vlib_process_t * p,
+ uword t,
+ uword n_data_elts,
+ uword n_data_elt_bytes)
+{
+ uword p_flags, add_to_pending, delete_from_wheel;
+ void * data_to_be_written_by_caller;
+
+ ASSERT (! pool_is_free_index (p->event_type_pool, t));
+
+ vec_validate (p->pending_event_data_by_type_index, t);
+
+ /* Resize data vector and return caller's data to be written. */
+ {
+ void * data_vec = p->pending_event_data_by_type_index[t];
+ uword l;
+
+ if (! data_vec && vec_len (nm->recycled_event_data_vectors))
+ {
+ data_vec = vec_pop (nm->recycled_event_data_vectors);
+ _vec_len (data_vec) = 0;
+ }
+
+ l = vec_len (data_vec);
+
+ data_vec = _vec_resize (data_vec,
+ /* length_increment */ n_data_elts,
+ /* total size after increment */ (l + n_data_elts) * n_data_elt_bytes,
+ /* header_bytes */ 0, /* data_align */ 0);
+
+ p->pending_event_data_by_type_index[t] = data_vec;
+ data_to_be_written_by_caller = data_vec + l * n_data_elt_bytes;
+ }
+
+ p->non_empty_event_type_bitmap = clib_bitmap_ori (p->non_empty_event_type_bitmap, t);
+
+ p_flags = p->flags;
+
+ /* Event was already signalled? */
+ add_to_pending = (p_flags & VLIB_PROCESS_RESUME_PENDING) == 0;
+
+ /* Process will resume when suspend time elapses? */
+ delete_from_wheel = 0;
+ if (p_flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK)
+ {
+ /* Waiting for both event and clock? */
+ if (p_flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT)
+ delete_from_wheel = 1;
+ else
+ /* Waiting only for clock. Event will be queue and may be
+ handled when timer expires. */
+ add_to_pending = 0;
+ }
+
+ /* Never add current process to pending vector since current process is
+ already running. */
+ add_to_pending &= nm->current_process_index != n->runtime_index;
+
+ if (add_to_pending)
+ {
+ u32 x = vlib_timing_wheel_data_set_suspended_process (n->runtime_index);
+ p->flags = p_flags | VLIB_PROCESS_RESUME_PENDING;
+ vec_add1 (nm->data_from_advancing_timing_wheel, x);
+ if (delete_from_wheel)
+ timing_wheel_delete (&nm->timing_wheel, x);
+ }
+
+ return data_to_be_written_by_caller;
+}
+
+always_inline void *
+vlib_process_signal_event_data (vlib_main_t * vm,
+ uword node_index,
+ uword type_opaque,
+ uword n_data_elts,
+ uword n_data_elt_bytes)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+ vlib_process_t * p = vec_elt (nm->processes, n->runtime_index);
+ uword * h, t;
+
+ h = hash_get (p->event_type_index_by_type_opaque, type_opaque);
+ if (! h)
+ {
+ vlib_process_event_type_t * et = vlib_process_new_event_type (p, type_opaque);
+ t = et - p->event_type_pool;
+ hash_set (p->event_type_index_by_type_opaque, type_opaque, t);
+ }
+ else
+ t = h[0];
+
+ return vlib_process_signal_event_helper (nm, n, p, t, n_data_elts, n_data_elt_bytes);
+}
+
+always_inline void *
+vlib_process_signal_event_at_time (vlib_main_t * vm,
+ f64 dt,
+ uword node_index,
+ uword type_opaque,
+ uword n_data_elts,
+ uword n_data_elt_bytes)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+ vlib_process_t * p = vec_elt (nm->processes, n->runtime_index);
+ uword * h, t;
+
+ h = hash_get (p->event_type_index_by_type_opaque, type_opaque);
+ if (! h)
+ {
+ vlib_process_event_type_t * et = vlib_process_new_event_type (p, type_opaque);
+ t = et - p->event_type_pool;
+ hash_set (p->event_type_index_by_type_opaque, type_opaque, t);
+ }
+ else
+ t = h[0];
+
+ if (vlib_process_suspend_time_is_zero (dt))
+ return vlib_process_signal_event_helper (nm, n, p, t, n_data_elts, n_data_elt_bytes);
+ else
+ {
+ vlib_signal_timed_event_data_t * te;
+ u64 dt_cpu = dt * vm->clib_time.clocks_per_second;
+
+ pool_get_aligned (nm->signal_timed_event_data_pool, te, sizeof (te[0]));
+
+ te->n_data_elts = n_data_elts;
+ te->n_data_elt_bytes = n_data_elt_bytes;
+ te->n_data_bytes = n_data_elts * n_data_elt_bytes;
+
+ /* Assert that structure fields are big enough. */
+ ASSERT (te->n_data_elts == n_data_elts);
+ ASSERT (te->n_data_elt_bytes == n_data_elt_bytes);
+ ASSERT (te->n_data_bytes == n_data_elts * n_data_elt_bytes);
+
+ te->process_node_index = n->runtime_index;
+ te->event_type_index = t;
+
+ timing_wheel_insert (&nm->timing_wheel, clib_cpu_time_now () + dt_cpu,
+ vlib_timing_wheel_data_set_timed_event (te - nm->signal_timed_event_data_pool));
+
+ /* Inline data big enough to hold event? */
+ if (te->n_data_bytes < sizeof (te->inline_event_data))
+ return te->inline_event_data;
+ else
+ {
+ te->event_data_as_vector = 0;
+ vec_resize (te->event_data_as_vector, te->n_data_bytes);
+ return te->event_data_as_vector;
+ }
+ }
+}
+
+always_inline void *
+vlib_process_signal_one_time_event_data (vlib_main_t * vm,
+ uword node_index,
+ uword type_index,
+ uword n_data_elts,
+ uword n_data_elt_bytes)
+{
+ vlib_node_main_t * nm = &vm->node_main;
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+ vlib_process_t * p = vec_elt (nm->processes, n->runtime_index);
+ return vlib_process_signal_event_helper (nm, n, p, type_index, n_data_elts, n_data_elt_bytes);
+}
+
+always_inline void
+vlib_process_signal_event (vlib_main_t * vm,
+ uword node_index,
+ uword type_opaque,
+ uword data)
+{
+ uword * d = vlib_process_signal_event_data (vm, node_index, type_opaque,
+ 1 /* elts */, sizeof (uword));
+ d[0] = data;
+}
+
+always_inline void
+vlib_process_signal_event_pointer (vlib_main_t * vm,
+ uword node_index,
+ uword type_opaque,
+ void * data)
+{
+ void ** d = vlib_process_signal_event_data (vm, node_index, type_opaque,
+ 1 /* elts */, sizeof (data));
+ d[0] = data;
+}
+
+always_inline void
+vlib_process_signal_one_time_event (vlib_main_t * vm,
+ uword node_index,
+ uword type_index,
+ uword data)
+{
+ uword * d = vlib_process_signal_one_time_event_data (vm, node_index, type_index,
+ 1 /* elts */, sizeof (uword));
+ d[0] = data;
+}
+
+always_inline void
+vlib_signal_one_time_waiting_process (vlib_main_t * vm, vlib_one_time_waiting_process_t * p)
+{
+ vlib_process_signal_one_time_event (vm, p->node_index, p->one_time_event, /* data */ ~0);
+ memset (p, ~0, sizeof (p[0]));
+}
+
+always_inline void
+vlib_signal_one_time_waiting_process_vector (vlib_main_t * vm,
+ vlib_one_time_waiting_process_t ** wps)
+{
+ vlib_one_time_waiting_process_t * wp;
+ vec_foreach (wp, *wps)
+ vlib_signal_one_time_waiting_process (vm, wp);
+ vec_free (*wps);
+}
+
+always_inline void
+vlib_current_process_wait_for_one_time_event (vlib_main_t * vm, vlib_one_time_waiting_process_t * p)
+{
+ p->node_index = vlib_current_process (vm);
+ p->one_time_event =
+ vlib_process_create_one_time_event (vm, p->node_index, /* type opaque */ ~0);
+ vlib_process_wait_for_one_time_event (vm,
+ /* don't care about data */ 0,
+ p->one_time_event);
+}
+
+always_inline void
+vlib_current_process_wait_for_one_time_event_vector (vlib_main_t * vm,
+ vlib_one_time_waiting_process_t ** wps)
+{
+ vlib_one_time_waiting_process_t * wp;
+ vec_add2 (*wps, wp, 1);
+ vlib_current_process_wait_for_one_time_event (vm, wp);
+}
+
+always_inline u32
+vlib_node_runtime_update_main_loop_vector_stats (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ uword n_vectors)
+{
+ u32 i, d, vi0, vi1;
+ u32 i0, i1;
+
+ ASSERT (is_pow2 (ARRAY_LEN (node->main_loop_vector_stats)));
+ i = ((vm->main_loop_count >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE)
+ & (ARRAY_LEN (node->main_loop_vector_stats) - 1));
+ i0 = i ^ 0;
+ i1 = i ^ 1;
+ d = ((vm->main_loop_count >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE)
+ - (node->main_loop_count_last_dispatch >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE));
+ vi0 = node->main_loop_vector_stats[i0];
+ vi1 = node->main_loop_vector_stats[i1];
+ vi0 = d == 0 ? vi0 : 0;
+ vi1 = d <= 1 ? vi1 : 0;
+ vi0 += n_vectors;
+ node->main_loop_vector_stats[i0] = vi0;
+ node->main_loop_vector_stats[i1] = vi1;
+ node->main_loop_count_last_dispatch = vm->main_loop_count;
+ /* Return previous counter. */
+ return node->main_loop_vector_stats[i1];
+}
+
+always_inline f64
+vlib_node_vectors_per_main_loop_as_float (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, node_index);
+ u32 v;
+
+ v = vlib_node_runtime_update_main_loop_vector_stats (vm, rt, /* n_vectors */ 0);
+ return (f64) v / (1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE);
+}
+
+always_inline u32
+vlib_node_vectors_per_main_loop_as_integer (vlib_main_t * vm, u32 node_index)
+{
+ vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, node_index);
+ u32 v;
+
+ v = vlib_node_runtime_update_main_loop_vector_stats (vm, rt, /* n_vectors */ 0);
+ return v >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE;
+}
+
+void
+vlib_frame_free (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ vlib_frame_t * f);
+
+/* Add next node to given node in given slot. */
+uword
+vlib_node_add_next_with_slot (vlib_main_t * vm,
+ uword node,
+ uword next_node,
+ uword slot);
+
+/* As above but adds to end of node's next vector. */
+always_inline uword
+vlib_node_add_next (vlib_main_t * vm, uword node, uword next_node)
+{ return vlib_node_add_next_with_slot (vm, node, next_node, ~0); }
+
+/* Add next node to given node in given slot. */
+uword
+vlib_node_add_named_next_with_slot (vlib_main_t * vm,
+ uword node,
+ char * next_name,
+ uword slot);
+
+/* As above but adds to end of node's next vector. */
+always_inline uword
+vlib_node_add_named_next (vlib_main_t * vm,
+ uword node,
+ char * name)
+{ return vlib_node_add_named_next_with_slot (vm, node, name, ~0); }
+
+/* Query node given name. */
+vlib_node_t * vlib_get_node_by_name (vlib_main_t * vm, u8 * name);
+
+/* Rename a node. */
+void vlib_node_rename (vlib_main_t * vm, u32 node_index, char * fmt, ...);
+
+/* Register new packet processing node. Nodes can be registered
+ dynamically via this call or statically via the VLIB_REGISTER_NODE
+ macro. */
+u32 vlib_register_node (vlib_main_t * vm, vlib_node_registration_t * r);
+
+/* Register all static nodes registered via VLIB_REGISTER_NODE. */
+void vlib_register_all_static_nodes (vlib_main_t * vm);
+
+/* Start a process. */
+void vlib_start_process (vlib_main_t * vm, uword process_index);
+
+/* Sync up runtime and main node stats. */
+void
+vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n);
+
+/* Node graph initialization function. */
+clib_error_t * vlib_node_main_init (vlib_main_t * vm);
+
+format_function_t format_vlib_node_graph;
+format_function_t format_vlib_node_name;
+format_function_t format_vlib_next_node_name;
+format_function_t format_vlib_node_and_next;
+format_function_t format_vlib_cpu_time;
+format_function_t format_vlib_time;
+/* Parse node name -> node index. */
+unformat_function_t unformat_vlib_node;
+
+always_inline void
+vlib_node_increment_counter (vlib_main_t *vm, u32 node_index,
+ u32 counter_index, u64 increment)
+{
+ vlib_node_t * n = vlib_get_node (vm, node_index);
+ vlib_error_main_t * em = &vm->error_main;
+ u32 node_counter_base_index = n->error_heap_index;
+ em->counters[node_counter_base_index + counter_index] += increment;
+}
+
+#endif /* included_vlib_node_funcs_h */
diff --git a/vlib/vlib/parse.c b/vlib/vlib/parse.c
new file mode 100644
index 00000000000..844be8aafe3
--- /dev/null
+++ b/vlib/vlib/parse.c
@@ -0,0 +1,980 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/parse.h>
+
+#define PARSE_DEBUG 0
+
+u16 word_type_index, number_type_index, eof_type_index, rule_eof_type_index,
+ plus_type_index, minus_type_index, star_type_index, slash_type_index,
+ lpar_type_index, rpar_type_index;
+
+u8 * format_vlib_parse_value (u8 * s, va_list * args)
+{
+ vlib_parse_main_t *pm = va_arg (*args, vlib_parse_main_t *);
+ vlib_parse_type_t *type;
+ vlib_parse_value_t *v;
+ u16 type_index;
+
+ s = format (s, "%d items:\n", vec_len (pm->parse_value));
+ vec_foreach (v, pm->parse_value)
+ {
+ type_index = v->type;
+ type = pool_elt_at_index (pm->parse_types, type_index);
+ if (type->format_value)
+ s = format (s, "[%d]: %U\n", v - pm->parse_value,
+ type->format_value, v);
+ else
+ s = format (s, "[%d]: (nofun)\n", v - pm->parse_value);
+ }
+ return s;
+}
+
+static u8 * format_vlib_parse_match (u8 * s, va_list * args)
+{
+ vlib_parse_match_t m = va_arg (*args, vlib_parse_match_t);
+ char * t = 0;
+ switch (m)
+ {
+#define _(a) case VLIB_PARSE_##a: t = #a; break;
+ foreach_parse_match_type
+#undef _
+ default: t = 0; break;
+ }
+
+ if (t)
+ return format (s, "%s", t);
+ else
+ return format (s, "unknown 0x%x", m);
+}
+
+static u8 * format_vlib_parse_item (u8 * s, va_list * args)
+{
+ vlib_parse_main_t *pm = va_arg (*args, vlib_parse_main_t *);
+ vlib_parse_item_t *item = va_arg (*args, vlib_parse_item_t *);
+ vlib_parse_type_t *type = pool_elt_at_index (pm->parse_types, item->type);
+
+ if (item->type == word_type_index)
+ s = format (s, "%s", item->value.as_pointer);
+ else
+ s = format (s, "<%s>", type->name);
+ return s;
+}
+
+static u8 * format_vlib_parse_graph (u8 * s, va_list * args)
+{
+ vlib_parse_main_t *pm = va_arg (*args, vlib_parse_main_t *);
+ vlib_parse_graph_t *node = va_arg (*args, vlib_parse_graph_t *);
+ vlib_parse_item_t *item;
+ vlib_parse_type_t *type;
+
+ /* $$$ hash table */
+ pool_foreach (type, pm->parse_types,
+ ({
+ if (type->rule_index == node - pm->parse_graph)
+ s = format (s, "\n<%s>\n", type->name);
+ }));
+
+ if (pm->root_index == (node - pm->parse_graph))
+ s = format (s, "\n<root>\n");
+
+ item = pool_elt_at_index (pm->parse_items, node->item);
+
+ s = format (s, "[%d] %U ", node - pm->parse_graph,
+ format_vlib_parse_item, pm, item);
+
+ if (node->peer == (u32)~0)
+ s = format (s, "peer nil ");
+ else
+ s = format (s, "peer %4u ", node->peer);
+
+ if (node->deeper == (u32)~0)
+ s = format (s, "deeper nil ");
+ else
+ s = format (s, "deeper %4u ", node->deeper);
+
+ return s;
+}
+
+void dump_parse_graph (void)
+{
+ vlib_parse_main_t *pm = &vlib_parse_main;
+ vlib_parse_graph_t *node;
+
+ pool_foreach (node, pm->parse_graph, ({
+ fformat(stdout, "%U\n", format_vlib_parse_graph, pm, node);
+ }));
+}
+
+always_inline void
+parse_cleanup_value (vlib_parse_main_t *pm, vlib_parse_value_t *pv)
+{
+ vlib_parse_type_t *type = pool_elt_at_index (pm->parse_types, pv->type);
+ if (type->value_cleanup_function)
+ type->value_cleanup_function (pv);
+}
+
+static void parse_reset (vlib_parse_main_t *pm, u8 *input)
+{
+ vlib_lex_token_t *t;
+ vlib_parse_value_t *pv;
+
+ vlib_lex_reset (pm->lex_main, input);
+
+ vec_foreach (t, pm->tokens)
+ vlib_lex_cleanup_token (t);
+
+ vec_foreach (pv, pm->parse_value)
+ parse_cleanup_value (pm, pv);
+
+ _vec_len (pm->parse_value) = 0;
+ _vec_len (pm->tokens) = 0;
+ pm->current_token_index = 0;
+}
+
+static void parse_help (vlib_parse_main_t *pm, u32 index)
+{
+ vlib_parse_graph_t *node;
+ vlib_parse_item_t *item;
+ vlib_parse_type_t *type;
+ vlib_main_t *vm = pm->vlib_main;
+ u8 *help_input;
+ int i;
+
+ help_input = vec_dup (pm->lex_main->input_vector);
+
+ for (i = vec_len(help_input)-1; i >= 0; i--)
+ if (help_input[i] == '?')
+ {
+ help_input[i] = 0;
+ _vec_len(help_input) = i;
+ break;
+ }
+
+ for (i = vec_len(help_input)-1; i >= 0; i--)
+ {
+ if (help_input[i] != ' ' && help_input[i] != '\t')
+ break;
+ help_input[i] = 0;
+ break;
+ }
+ _vec_len(help_input) = i+1;
+
+ while (index != (u32)~0)
+ {
+ node = pool_elt_at_index (pm->parse_graph, index);
+ item = pool_elt_at_index (pm->parse_items, node->item);
+ type = pool_elt_at_index (pm->parse_types, item->type);
+
+ if (item->type == eof_type_index && vec_len (pm->match_items) == 0)
+ /* do nothing */;
+ else if (item->type == word_type_index)
+ vlib_cli_output (vm, "%s %s\n", help_input, item->value.as_pointer);
+ else
+ vlib_cli_output (vm, "%s <%s>\n", help_input, type->name);
+ index = node->peer;
+ }
+ vec_free (help_input);
+}
+
+static vlib_parse_match_t
+parse_eval_internal (vlib_parse_main_t *pm, u32 index)
+{
+ vlib_parse_graph_t *node;
+ vlib_parse_item_t *item;
+ vlib_parse_type_t *type;
+ vlib_parse_value_t value, *pv;
+ vlib_parse_match_t rv;
+ u32 *partial_matches = 0;
+ vlib_lex_token_t *t;
+ u32 save_token_index=(u32)~0, save_match_items=0;
+ int had_value = 0;
+
+ if (pm->current_token_index >= vec_len(pm->tokens))
+ return VLIB_PARSE_MATCH_FAIL;
+
+ /* current token */
+ t = vec_elt_at_index (pm->tokens, pm->current_token_index);
+
+ /* Help ? */
+ if (PREDICT_FALSE(t->token == VLIB_LEX_qmark))
+ {
+ parse_help (pm, index);
+ _vec_len (pm->match_items) = 0;
+ return VLIB_PARSE_MATCH_DONE;
+ }
+
+ /* Across all peers at this level of the parse graph */
+ while (index != (u32)~0)
+ {
+ node = pool_elt_at_index (pm->parse_graph, index);
+ item = pool_elt_at_index (pm->parse_items, node->item);
+ type = pool_elt_at_index (pm->parse_types, item->type);
+
+ /*
+ * Save the token index. We may have to back up several
+ * trie plies. Type-specific match functions can consume
+ * multiple tokens, and they may not be optimally careful
+ */
+ save_token_index = pm->current_token_index;
+ save_match_items = vec_len (pm->match_items);
+ vec_add1 (pm->match_items, node->item);
+
+ if (PARSE_DEBUG > 1)
+ clib_warning ("Try to match token %U against node %d",
+ format_vlib_lex_token, pm->lex_main, t, index);
+
+ /* Call the type-specific match function */
+ rv = type->match_function (pm, type, t, &value);
+
+ if (PARSE_DEBUG > 1)
+ clib_warning ("returned %U", format_vlib_parse_match, rv);
+
+ switch (rv)
+ {
+ case VLIB_PARSE_MATCH_VALUE:
+ /*
+ * Matched, and returned a value to append to the
+ * set of args passed to the action function
+ */
+ value.type = item->type;
+ vec_add1 (pm->parse_value, value);
+ had_value = 1;
+ /* fallthrough */
+
+ case VLIB_PARSE_MATCH_FULL:
+ unambiguous_partial_match:
+ /* Consume the matched token */
+ pm->current_token_index++;
+
+ /* continue matching along this path */
+ rv = parse_eval_internal (pm, node->deeper);
+
+ /* this is not the right path */
+ if (rv == VLIB_PARSE_MATCH_FAIL)
+ {
+ if (had_value)
+ {
+ /* Delete the value */
+ value = pm->parse_value [vec_len (pm->parse_value)-1];
+ parse_cleanup_value (pm, &value);
+ _vec_len (pm->parse_value) -= 1;
+ }
+ /* Continue with the next sibling */
+ pm->current_token_index = save_token_index;
+ _vec_len (pm->match_items) = save_match_items;
+ index = node->peer;
+ break;
+ }
+ return rv;
+
+ case VLIB_PARSE_MATCH_PARTIAL:
+ /* Partial (substring) match, remember it but keep going */
+ vec_add1 (partial_matches, node - pm->parse_graph);
+ index = node->peer;
+ break;
+
+ case VLIB_PARSE_MATCH_FAIL:
+ /* Continue with the next sibling */
+ index = node->peer;
+ _vec_len (pm->match_items) = save_match_items;
+ break;
+
+ case VLIB_PARSE_MATCH_DONE:
+ /* Parse complete, invoke the action function */
+ if (PARSE_DEBUG > 0)
+ clib_warning ("parse_value: %U", format_vlib_parse_value, pm);
+
+ {
+ vlib_parse_eval_function_t * f = item->value.as_pointer;
+ if (f)
+ rv = f (pm, item, pm->parse_value);
+ }
+
+ vec_foreach (pv, pm->parse_value)
+ parse_cleanup_value (pm, pv);
+ _vec_len (pm->parse_value) = 0;
+ _vec_len (pm->match_items) = 0;
+ return rv;
+
+ case VLIB_PARSE_MATCH_AMBIGUOUS:
+ case VLIB_PARSE_MATCH_EVAL_FAIL:
+ case VLIB_PARSE_MATCH_RULE:
+ _vec_len (pm->match_items) = save_match_items;
+ return rv;
+ }
+ }
+
+ /*
+ * Out of siblings. If we have exactly one partial match
+ * we win
+ */
+ if (vec_len (partial_matches) == 1)
+ {
+ index = partial_matches[0];
+ node = pool_elt_at_index (pm->parse_graph, index);
+ vec_free (partial_matches);
+ goto unambiguous_partial_match;
+ }
+
+ /* Ordinary loser */
+ rv = VLIB_PARSE_MATCH_FAIL;
+
+ /* Ambiguous loser */
+ if (vec_len (partial_matches) > 1)
+ {
+ vec_free (partial_matches);
+ rv = VLIB_PARSE_MATCH_AMBIGUOUS;
+ }
+
+ _vec_len (pm->match_items) = save_match_items;
+ return rv;
+}
+
+vlib_parse_match_t rule_match (vlib_parse_main_t *pm, vlib_parse_type_t *type,
+ vlib_lex_token_t *t,
+ vlib_parse_value_t *valuep)
+{
+ vlib_parse_match_t rv;
+ static int recursion_level;
+
+ if (PARSE_DEBUG > 1)
+ clib_warning ("[%d]: try to match type %s graph index %d",
+ recursion_level,
+ type->name,
+ type->rule_index);
+ recursion_level++;
+ rv = parse_eval_internal (pm, type->rule_index);
+ recursion_level--;
+
+ /* Break the recusive unwind here... */
+ if (rv == VLIB_PARSE_MATCH_RULE)
+ {
+ if (PARSE_DEBUG > 1)
+ clib_warning ("[%d]: type %s matched", recursion_level, type->name);
+
+ return VLIB_PARSE_MATCH_FULL;
+ }
+ else
+ {
+ if (PARSE_DEBUG > 1)
+ clib_warning ("[%d]: type %s returns %U", recursion_level, type->name,
+ format_vlib_parse_match, rv);
+ }
+ return rv;
+}
+
+static int parse_eval (vlib_parse_main_t *pm, u8 *input)
+{
+ vlib_lex_token_t * t;
+
+ parse_reset (pm, input);
+
+ /* Tokenize the entire input vector */
+ do {
+ vec_add2 (pm->tokens, t, 1);
+ vlib_lex_get_token (pm->lex_main, t);
+ } while (t->token != VLIB_LEX_eof);
+
+ /* Feed it to the parser */
+ return parse_eval_internal (pm, pm->root_index);
+}
+
+/* Temporary vlib stub */
+vlib_parse_match_t vlib_parse_eval (u8 *input)
+{
+ return parse_eval (&vlib_parse_main, input);
+}
+
+u16 parse_type_find_or_create (vlib_parse_main_t *pm, vlib_parse_type_t *t)
+{
+ uword *p;
+ vlib_parse_type_t *n;
+ u8 *name_copy;
+
+ p = hash_get_mem (pm->parse_type_by_name_hash, t->name);
+ if (p)
+ return p[0];
+
+ pool_get (pm->parse_types, n);
+ *n = *t;
+ n->rule_index = (u32) ~0;
+
+ name_copy = format (0, "%s%c", n->name, 0);
+
+ hash_set_mem (pm->parse_type_by_name_hash, name_copy, n - pm->parse_types);
+ return n - pm->parse_types;
+}
+
+u16 parse_type_find_by_name (vlib_parse_main_t *pm, char *name)
+{
+ uword *p;
+
+ p = hash_get_mem (pm->parse_type_by_name_hash, name);
+ if (p)
+ return p[0];
+
+ return (u16) ~0;
+}
+
+u32 parse_item_find_or_create (vlib_parse_main_t *pm, vlib_parse_item_t *item)
+
+{
+ uword *p;
+ vlib_parse_item_t *i;
+
+ /* Exact match the entire item */
+ p = mhash_get (&pm->parse_item_hash, item);
+ if (p)
+ return p[0];
+
+ pool_get (pm->parse_items, i);
+ *i = *item;
+
+ mhash_set (&pm->parse_item_hash, i, i - pm->parse_items, 0);
+ return i - pm->parse_items;
+}
+
+static void parse_type_and_graph_init (vlib_parse_main_t *pm)
+{
+ u32 eof_index;
+ vlib_parse_type_t type;
+ vlib_parse_item_t item;
+
+ memset (&type, 0, sizeof (type));
+
+#define foreach_token_type \
+ _ (eof) \
+ _ (rule_eof) \
+ _ (word) \
+ _ (number) \
+ _ (plus) \
+ _ (minus) \
+ _ (star) \
+ _ (slash) \
+ _ (lpar) \
+ _ (rpar)
+
+#define _(a) a##_type_index = parse_type_find_by_name (pm, #a);
+ foreach_token_type
+#undef _
+
+ memset (&item, 0, sizeof (item));
+ item.type = eof_type_index;
+
+ eof_index = parse_item_find_or_create (pm, &item);
+ pm->root_index = (u32)~0;
+
+#if 0
+ pool_get (pm->parse_graph, g);
+ memset (g, 0xff, sizeof (*g));
+ g->item = eof_index;
+ pm->root_index = 0;
+#endif
+}
+
+
+
+static void tokenize (vlib_parse_main_t *pm, parse_registration_t *pr)
+{
+ vlib_lex_token_t *t;
+ pm->register_input = format (pm->register_input,
+ "%s%c", pr->initializer, 0);
+
+ parse_reset (pm, pm->register_input);
+
+ do {
+ vec_add2 (pm->tokens, t, 1);
+ vlib_lex_get_token (pm->lex_main, t);
+ } while (t->token != VLIB_LEX_eof);
+ _vec_len (pm->register_input) = 0;
+}
+
+static int is_typed_rule (vlib_parse_main_t *pm)
+{
+ vlib_lex_token_t *t = vec_elt_at_index (pm->tokens, 0);
+
+ /* <mytype> = blah blah blah */
+ if (vec_len(pm->tokens) >= 4
+ && t[0].token == VLIB_LEX_lt
+ && t[1].token == VLIB_LEX_word
+ && t[2].token == VLIB_LEX_gt
+ && t[3].token == VLIB_LEX_equals)
+ return 1;
+ return 0;
+}
+
+static int token_matches_graph_node (vlib_parse_main_t *pm,
+ vlib_lex_token_t *t,
+ vlib_parse_graph_t *node,
+ vlib_parse_item_t *item,
+ vlib_parse_type_t *type,
+ u32 *token_increment)
+{
+ /* EOFs don't match */
+ if (t->token == VLIB_LEX_eof)
+ return 0;
+
+ /* New chain element is a word */
+ if (t->token == VLIB_LEX_word)
+ {
+ /* but the item in hand is not a word */
+ if (item->type != word_type_index)
+ return 0;
+
+ /* Or it's not this particular word */
+ if (strcmp (t->value.as_pointer, item->value.as_pointer))
+ return 0;
+ *token_increment = 1;
+ return 1;
+ }
+ /* New chain element is a type-name: < TYPE-NAME > */
+ if (t->token == VLIB_LEX_lt)
+ {
+ u16 token_type_index;
+
+ /* < TYPE > */
+ if (t[1].token != VLIB_LEX_word ||
+ t[2].token != VLIB_LEX_gt)
+ {
+ clib_warning (0, "broken type name in '%s'", pm->register_input);
+ return 0;
+ }
+
+ token_type_index = parse_type_find_by_name (pm, t[1].value.as_pointer);
+ if (token_type_index == (u16)~0)
+ {
+ clib_warning (0, "unknown type '%s'", t[1].value.as_pointer);
+ return 0;
+ }
+
+ /* Its a known type but does not match. */
+ if (item->type != token_type_index)
+ return 0;
+
+ *token_increment = 3;
+ return 1;
+ }
+ clib_warning ("BUG: t->token = %d", t->token);
+ return 0;
+}
+
+u32 generate_subgraph_from_tokens (vlib_parse_main_t *pm,
+ vlib_lex_token_t *t,
+ u32 *new_subgraph_depth,
+ parse_registration_t *pr,
+ int not_a_rule)
+{
+ vlib_parse_graph_t *g, *last_g;
+ vlib_parse_item_t new_item;
+ u32 rv = (u32)~0, new_item_index, last_index = (u32)~0;
+ u16 token_type_index;
+ u32 depth = 0;
+
+ while (t < pm->tokens + vec_len (pm->tokens))
+ {
+ memset (&new_item, 0, sizeof (new_item));
+
+ if (t->token == VLIB_LEX_word)
+ {
+ new_item.type = word_type_index;
+ new_item.value.as_pointer = vec_dup ((u8 *) t->value.as_pointer);
+ new_item_index = parse_item_find_or_create (pm, &new_item);
+ t++;
+ }
+ else if (t->token == VLIB_LEX_lt)
+ {
+ if (t[1].token != VLIB_LEX_word ||
+ t[2].token != VLIB_LEX_gt)
+ {
+ clib_warning ("broken type name in '%s'", pm->register_input);
+ goto screwed;
+ }
+ token_type_index = parse_type_find_by_name (pm,
+ t[1].value.as_pointer);
+ if (token_type_index == (u16)~0)
+ {
+ clib_warning ("unknown type 2 '%s'", t[1].value.as_pointer);
+ goto screwed;
+ }
+
+ new_item.type = token_type_index;
+ new_item.value.as_pointer = 0;
+ new_item_index = parse_item_find_or_create (pm, &new_item);
+ t += 3; /* skip < <type-name> and > */
+ }
+ else if (t->token == VLIB_LEX_eof)
+ {
+ screwed:
+ new_item.type = not_a_rule ? eof_type_index : rule_eof_type_index;
+ new_item.value.as_pointer = pr->eof_match;
+ new_item_index = parse_item_find_or_create (pm, &new_item);
+ t++;
+ }
+ else
+ {
+ clib_warning ("unexpected token %U index %d in '%s'",
+ format_vlib_lex_token, pm->lex_main, t,
+ t - pm->tokens, pm->register_input);
+ goto screwed;
+ }
+
+ pool_get (pm->parse_graph, g);
+ memset (g, 0xff, sizeof (*g));
+ g->item = new_item_index;
+ depth++;
+
+ if (rv == (u32)~0)
+ {
+ rv = g - pm->parse_graph;
+ last_index = rv;
+ }
+ else
+ {
+ last_g = pool_elt_at_index (pm->parse_graph, last_index);
+ last_index = last_g->deeper = g - pm->parse_graph;
+ }
+ }
+ *new_subgraph_depth = depth;
+ return rv;
+}
+
+static u32 measure_depth (vlib_parse_main_t *pm, u32 index)
+{
+ vlib_parse_graph_t *node;
+ vlib_parse_item_t *item;
+ u32 max=0;
+ u32 depth;
+
+ if (index == (u32)~0)
+ return 0;
+
+ node = pool_elt_at_index (pm->parse_graph, index);
+ item = pool_elt_at_index (pm->parse_items, node->item);
+
+ if (item->type == eof_type_index)
+ return 1;
+
+ while (index != (u32)~0)
+ {
+ node = pool_elt_at_index (pm->parse_graph, index);
+ depth = measure_depth (pm, node->deeper);
+ if (max < depth)
+ max = depth;
+ index = node->peer;
+ }
+
+ return max + 1;
+}
+
+static void add_subgraph_to_graph (vlib_parse_main_t *pm,
+ u32 last_matching_index,
+ u32 graph_root_index,
+ u32 new_subgraph_index,
+ u32 new_subgraph_depth)
+{
+ vlib_parse_graph_t *parent_node;
+ int new_subgraph_longest = 1;
+ u32 current_peer_index;
+ u32 current_depth;
+ vlib_parse_graph_t *current_peer = 0;
+ vlib_parse_graph_t *new_subgraph_node =
+ pool_elt_at_index (pm->parse_graph, new_subgraph_index);
+
+ /*
+ * Case 1: top-level peer. Splice into the top-level
+ * peer chain according to rule depth
+ */
+ if (last_matching_index == (u32)~0)
+ {
+ u32 index = graph_root_index;
+ while (1) {
+ current_peer = pool_elt_at_index (pm->parse_graph, index);
+ current_depth = measure_depth (pm, index);
+ if (current_depth < new_subgraph_depth
+ || current_peer->peer == (u32)~0)
+ break;
+ index = current_peer->peer;
+ }
+ new_subgraph_node->peer = current_peer->peer;
+ current_peer->peer = new_subgraph_index;
+ return;
+ }
+
+ parent_node = pool_elt_at_index (pm->parse_graph, last_matching_index);
+ current_peer_index = parent_node->deeper;
+
+ while (current_peer_index != (u32)~0)
+ {
+ current_peer = pool_elt_at_index (pm->parse_graph, current_peer_index);
+ current_depth = measure_depth (pm, current_peer_index);
+ if (current_depth < new_subgraph_depth)
+ break;
+ new_subgraph_longest = 0;
+ current_peer_index = current_peer->peer;
+ }
+
+ ASSERT (current_peer);
+
+ if (new_subgraph_longest)
+ {
+ new_subgraph_node->peer = parent_node->deeper;
+ parent_node->deeper = new_subgraph_index;
+ }
+ else
+ {
+ new_subgraph_node->peer = current_peer->peer;
+ current_peer->peer = new_subgraph_index;
+ }
+}
+
+static clib_error_t *
+parse_register_one (vlib_parse_main_t *pm, parse_registration_t *pr)
+{
+ u32 graph_root_index;
+ u16 subgraph_type_index = (u16)~0;
+ vlib_parse_type_t *subgraph_type = 0;
+ vlib_lex_token_t *t;
+ vlib_parse_graph_t *node;
+ u32 node_index, last_index, token_increment, new_subgraph_index;
+ u32 new_subgraph_depth, last_matching_index;
+ vlib_parse_item_t *item;
+ vlib_parse_type_t *type;
+
+ int use_main_graph = 1;
+
+ tokenize (pm, pr);
+
+ /* A typed rule? */
+ if (is_typed_rule (pm))
+ {
+ /* Get the type and its current subgraph root, if any */
+ t = vec_elt_at_index (pm->tokens, 1);
+ subgraph_type_index = parse_type_find_by_name (pm, t->value.as_pointer);
+ if (subgraph_type_index == (u16)~0)
+ return clib_error_return (0, "undeclared type '%s'",
+ t->value.as_pointer);
+ subgraph_type = pool_elt_at_index (pm->parse_types, subgraph_type_index);
+ graph_root_index = subgraph_type->rule_index;
+ /* Skip "mytype> = */
+ t += 3;
+ use_main_graph = 0;
+ }
+ else
+ {
+ /* top-level graph */
+ graph_root_index = pm->root_index;
+ t = vec_elt_at_index (pm->tokens, 0);
+ }
+
+ last_matching_index = (u32)~0;
+ last_index = node_index = graph_root_index;
+
+ /* Find the first token which isn't already being parsed */
+ while (t < pm->tokens + vec_len (pm->tokens) && node_index != (u32) ~0)
+ {
+ node = pool_elt_at_index (pm->parse_graph, node_index);
+ item = pool_elt_at_index (pm->parse_items, node->item);
+ type = pool_elt_at_index (pm->parse_types, item->type);
+ last_index = node_index;
+
+ if (token_matches_graph_node (pm, t, node, item, type, &token_increment))
+ {
+ t += token_increment;
+ last_matching_index = node_index;
+ node_index = node->deeper;
+ }
+ else
+ node_index = node->peer;
+ }
+
+ new_subgraph_index =
+ generate_subgraph_from_tokens (pm, t, &new_subgraph_depth, pr,
+ use_main_graph);
+
+ /* trivial cases: first graph node or first type rule */
+ if (graph_root_index == (u32)~0)
+ {
+ if (use_main_graph)
+ pm->root_index = new_subgraph_index;
+ else
+ subgraph_type->rule_index = new_subgraph_index;
+ return 0;
+ }
+
+ add_subgraph_to_graph (pm, last_matching_index, graph_root_index,
+ new_subgraph_index,
+ new_subgraph_depth);
+ return 0;
+}
+
+static clib_error_t *
+parse_register (vlib_main_t * vm,
+ parse_registration_t * lo,
+ parse_registration_t * hi,
+ vlib_parse_main_t *pm)
+{
+ parse_registration_t * pr;
+
+ for (pr = lo; pr < hi; pr = vlib_elf_section_data_next (pr, 0))
+ vec_add1 (pm->parse_registrations, pr);
+
+ return 0;
+}
+
+static clib_error_t *
+parse_register_one_type (vlib_parse_main_t *pm, vlib_parse_type_t *rp)
+{
+ (void) parse_type_find_or_create (pm, (vlib_parse_type_t *)rp);
+ return 0;
+}
+
+static clib_error_t *
+parse_type_register (vlib_main_t * vm,
+ vlib_parse_type_t * lo,
+ vlib_parse_type_t * hi,
+ vlib_parse_main_t *pm)
+{
+ clib_error_t * error = 0;
+ vlib_parse_type_t * ptr;
+
+ for (ptr = lo; ptr < hi; ptr = vlib_elf_section_data_next (ptr, 0)) {
+ error = parse_register_one_type (pm, ptr);
+ if (error)
+ goto done;
+ }
+
+ done:
+ return error;
+}
+
+clib_error_t *vlib_stdlex_init (vlib_main_t *vm) __attribute__((weak));
+clib_error_t *vlib_stdlex_init (vlib_main_t *vm)
+{
+ (void) vlib_lex_add_table ("ignore_everything");
+ return 0;
+}
+
+static int compute_rule_length (parse_registration_t *r)
+{
+ int length, i;
+ vlib_parse_main_t *pm = &vlib_parse_main;
+
+ if (r->rule_length)
+ return r->rule_length;
+
+ length = 0;
+
+ tokenize (pm, r);
+ length = vec_len (pm->tokens);
+
+ /* Account for "<foo> = " in "<foo> = bar" etc. */
+ if (is_typed_rule (pm))
+ length -= 2;
+
+ for (i = 0; i < vec_len (pm->tokens); i++)
+ {
+ switch (pm->tokens[i].token)
+ {
+ case VLIB_LEX_lt:
+ case VLIB_LEX_gt:
+ length -= 1;
+
+ default:
+ break;
+ }
+ }
+
+ ASSERT (length > 0);
+ r->rule_length = length;
+ return length;
+}
+
+static int rule_length_compare (parse_registration_t *r1,
+ parse_registration_t *r2)
+{
+ compute_rule_length (r1);
+ compute_rule_length (r2);
+ /* Descending sort */
+ return r2->rule_length - r1->rule_length;
+}
+
+
+static clib_error_t * parse_init (vlib_main_t *vm)
+{
+ vlib_parse_main_t *pm = &vlib_parse_main;
+ vlib_lex_main_t *lm = &vlib_lex_main;
+ vlib_elf_section_bounds_t * b, * bounds;
+ clib_error_t * error = 0;
+ parse_registration_t *rule;
+ int i;
+
+ if ((error = vlib_call_init_function (vm, lex_onetime_init)))
+ return error;
+
+ if ((error = vlib_stdlex_init(vm)))
+ return error;
+
+ if ((error = vlib_call_init_function (vm, parse_builtin_init)))
+ return error;
+
+ pm->vlib_main = vm;
+ pm->lex_main = lm;
+
+ mhash_init (&pm->parse_item_hash, sizeof (u32), sizeof (vlib_parse_item_t));
+ pm->parse_type_by_name_hash = hash_create_string (0, sizeof (u32));
+
+ vec_validate (pm->parse_value, 16);
+ vec_validate (pm->tokens, 16);
+ vec_validate (pm->register_input, 32);
+ vec_validate (pm->match_items, 16);
+
+ _vec_len (pm->parse_value) = 0;
+ _vec_len (pm->tokens) = 0;
+ _vec_len (pm->register_input) = 0;
+ _vec_len (pm->match_items) = 0;
+
+ bounds = vlib_get_elf_section_bounds (vm, "parse_type_registrations");
+ vec_foreach (b, bounds)
+ {
+ error = parse_type_register (vm, b->lo, b->hi, pm);
+ if (error)
+ break;
+ }
+ vec_free (bounds);
+
+ parse_type_and_graph_init (pm);
+
+ bounds = vlib_get_elf_section_bounds (vm, "parse_registrations");
+ vec_foreach (b, bounds)
+ {
+ error = parse_register (vm, b->lo, b->hi, pm);
+ if (error)
+ break;
+ }
+ vec_free (bounds);
+
+ vec_sort (pm->parse_registrations, r1, r2,
+ rule_length_compare (r1[0], r2[0]));
+
+ for (i = 0; i < vec_len (pm->parse_registrations); i++)
+ {
+ rule = pm->parse_registrations[i];
+ parse_register_one (pm, rule);
+ }
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (parse_init);
diff --git a/vlib/vlib/parse.h b/vlib/vlib/parse.h
new file mode 100644
index 00000000000..5b9acebf774
--- /dev/null
+++ b/vlib/vlib/parse.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vlib_parse_h
+#define included_vlib_parse_h
+
+#include <vlib/vlib.h>
+#include <vlib/lex.h>
+#include <vppinfra/mhash.h>
+
+typedef struct {
+ /* Word aligned value. */
+ union {
+ u8 as_u8[32 - 1 * sizeof (u16)];
+ void * as_pointer;
+ uword as_uword;
+ word as_word;
+ u64 as_u64;
+ } value;
+
+ /* 16 bit type at end so that 30 bytes of value are aligned. */
+ u16 type;
+} __attribute ((packed)) vlib_parse_value_t;
+
+/* Instance of a type. */
+typedef struct {
+ u32 type;
+
+ u32 origin;
+
+ u32 help_index;
+
+ union {
+ void * as_pointer;
+ uword as_uword;
+ } value;
+} vlib_parse_item_t;
+
+typedef struct {
+ /* Index of item for this node. */
+ u32 item;
+
+ /* Graph index of peer (sibling) node (linked list of peers). */
+ u32 peer;
+
+ /* Graph index of deeper (child) node (linked list of children). */
+ u32 deeper;
+} vlib_parse_graph_t;
+
+#define foreach_parse_match_type \
+ _(MATCH_DONE) \
+ _(MATCH_RULE) \
+ _(MATCH_FAIL) \
+ _(MATCH_FULL) \
+ _(MATCH_VALUE) \
+ _(MATCH_PARTIAL) \
+ _(MATCH_AMBIGUOUS) \
+ _(MATCH_EVAL_FAIL)
+
+typedef enum {
+#define _(a) VLIB_PARSE_##a,
+ foreach_parse_match_type
+#undef _
+} vlib_parse_match_t;
+
+struct vlib_parse_type;
+struct vlib_parse_main;
+
+typedef vlib_parse_match_t (vlib_parse_match_function_t)
+ (struct vlib_parse_main *,
+ struct vlib_parse_type *,
+ vlib_lex_token_t *,
+ vlib_parse_value_t *);
+typedef void (vlib_parse_value_cleanup_function_t) (vlib_parse_value_t *);
+
+typedef struct vlib_parse_type {
+ /* Type name. */
+ char * name;
+
+ vlib_parse_match_function_t * match_function;
+
+ vlib_parse_value_cleanup_function_t * value_cleanup_function;
+
+ format_function_t * format_value;
+
+ u32 rule_index;
+} vlib_parse_type_t;
+
+typedef struct {
+ char *initializer;
+ void * eof_match;
+ int rule_length;
+} parse_registration_t;
+
+typedef struct vlib_parse_main {
+ /* (type, origin, help, value) tuples */
+ vlib_parse_item_t *parse_items;
+ mhash_t parse_item_hash;
+
+ /* (item, peer, deeper) tuples */
+ vlib_parse_graph_t *parse_graph;
+ u32 root_index;
+
+ u8 *register_input;
+
+ /* parser types */
+ vlib_parse_type_t * parse_types;
+ uword *parse_type_by_name_hash;
+
+ /* Vector of MATCH_VALUEs */
+ vlib_parse_value_t * parse_value;
+ u32 * match_items;
+
+ /* Parse registrations */
+ parse_registration_t **parse_registrations;
+
+ /* Token vector */
+ vlib_lex_token_t *tokens;
+ u32 current_token_index;
+
+ vlib_lex_main_t *lex_main;
+ vlib_main_t *vlib_main;
+} vlib_parse_main_t;
+
+vlib_parse_main_t vlib_parse_main;
+
+typedef vlib_parse_match_t (vlib_parse_eval_function_t)
+ (vlib_parse_main_t *,
+ vlib_parse_item_t *,
+ vlib_parse_value_t *);
+
+vlib_parse_match_t vlib_parse_eval (u8 * input);
+
+format_function_t format_vlib_parse_value;
+
+/* FIXME need these to be global? */
+vlib_parse_match_function_t rule_match, eof_match, word_match, number_match;
+
+#define _PARSE_REGISTRATION_DATA(x) \
+VLIB_ELF_SECTION_DATA(x##_registration,parse_registration_t,parse_registrations)
+
+#define PARSE_INIT(x, s, e) \
+static _PARSE_REGISTRATION_DATA(x) = { \
+ .initializer = s, \
+ .eof_match = e, \
+};
+
+#define _PARSE_TYPE_REGISTRATION_DATA(x) \
+VLIB_ELF_SECTION_DATA(x##_type_registration,vlib_parse_type_t, \
+parse_type_registrations)
+
+#define PARSE_TYPE_INIT(n, m, c, f) \
+static _PARSE_TYPE_REGISTRATION_DATA(n) = { \
+ .name = #n, \
+ .match_function = m, \
+ .value_cleanup_function = c, \
+ .format_value = f, \
+};
+
+#endif /* included_vlib_parse_h */
diff --git a/vlib/vlib/parse_builtin.c b/vlib/vlib/parse_builtin.c
new file mode 100644
index 00000000000..df830db4e21
--- /dev/null
+++ b/vlib/vlib/parse_builtin.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/parse.h>
+
+always_inline void *
+parse_last_match_value (vlib_parse_main_t * pm)
+{
+ vlib_parse_item_t * i;
+ i = pool_elt_at_index (pm->parse_items,
+ vec_elt (pm->match_items, vec_len (pm->match_items) - 1));
+ return i->value.as_pointer;
+}
+
+vlib_parse_match_t eof_match (vlib_parse_main_t *pm, vlib_parse_type_t *type,
+ vlib_lex_token_t *t, vlib_parse_value_t *valuep)
+{ return t->token == VLIB_LEX_eof ? VLIB_PARSE_MATCH_DONE : VLIB_PARSE_MATCH_FAIL; }
+
+PARSE_TYPE_INIT (eof, eof_match, 0 /* cleanup value */, 0 /* format value */);
+
+vlib_parse_match_t rule_eof_match (vlib_parse_main_t *pm, vlib_parse_type_t *type,
+ vlib_lex_token_t *t, vlib_parse_value_t *valuep)
+{
+ vlib_parse_match_function_t * fp = parse_last_match_value (pm);
+ pm->current_token_index--;
+ return fp ? fp (pm, type, t, valuep) : VLIB_PARSE_MATCH_RULE;
+}
+
+PARSE_TYPE_INIT (rule_eof, rule_eof_match, 0, 0);
+
+vlib_parse_match_t word_match (vlib_parse_main_t *pm, vlib_parse_type_t *type,
+ vlib_lex_token_t *t, vlib_parse_value_t *valuep)
+{
+ u8 * tv, * iv;
+ int i;
+
+ if (t->token != VLIB_LEX_word)
+ return VLIB_PARSE_MATCH_FAIL;
+
+ tv = t->value.as_pointer;
+ iv = parse_last_match_value (pm);
+
+ for (i = 0; tv[i]; i++)
+ {
+ if (tv[i] != iv[i])
+ return VLIB_PARSE_MATCH_FAIL;
+ }
+
+ return iv[i] == 0 ? VLIB_PARSE_MATCH_FULL : VLIB_PARSE_MATCH_PARTIAL;
+}
+
+PARSE_TYPE_INIT (word, word_match, 0 /* clnup value */, 0 /* format value */);
+
+vlib_parse_match_t number_match (vlib_parse_main_t *pm, vlib_parse_type_t *type,
+ vlib_lex_token_t *t, vlib_parse_value_t *valuep)
+{
+ if (t->token == VLIB_LEX_number)
+ {
+ valuep->value.as_uword = t->value.as_uword;
+ return VLIB_PARSE_MATCH_VALUE;
+ }
+ return VLIB_PARSE_MATCH_FAIL;
+}
+
+static u8 * format_value_number (u8 * s, va_list * args)
+{
+ vlib_parse_value_t * v = va_arg (*args, vlib_parse_value_t *);
+ uword a = v->value.as_uword;
+
+ if (BITS(uword) == 64)
+ s = format (s, "%lld(0x%llx)", a, a);
+ else
+ s = format (s, "%ld(0x%lx)", a, a);
+ return s;
+}
+
+PARSE_TYPE_INIT (number, number_match, 0 /* cln value */,
+ format_value_number /* fmt value */);
+
+
+#define foreach_vanilla_lex_match_function \
+ _(plus) \
+ _(minus) \
+ _(star) \
+ _(slash) \
+ _(lpar) \
+ _(rpar)
+
+#define LEX_MATCH_DEBUG 0
+
+#define _(name) \
+vlib_parse_match_t name##_match (vlib_parse_main_t *pm, \
+ vlib_parse_type_t *type, \
+ vlib_lex_token_t *t, \
+ vlib_parse_value_t *valuep) \
+{ \
+ if (LEX_MATCH_DEBUG > 0) \
+ clib_warning ("against %U returns %s", \
+ format_vlib_lex_token, pm->lex_main, t, \
+ (t->token == VLIB_LEX_##name) \
+ ? "VLIB_PARSE_MATCH_FULL" : \
+ "VLIB_PARSE_MATCH_FAIL"); \
+ if (t->token == VLIB_LEX_##name) \
+ return VLIB_PARSE_MATCH_FULL; \
+ return VLIB_PARSE_MATCH_FAIL; \
+} \
+ \
+PARSE_TYPE_INIT (name, name##_match, 0 /* cln value */, \
+ 0 /* fmt val */);
+
+foreach_vanilla_lex_match_function
+#undef _
+
+/* So we're linked in. */
+static clib_error_t *
+parse_builtin_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (parse_builtin_init);
diff --git a/vlib/vlib/physmem.h b/vlib/vlib/physmem.h
new file mode 100644
index 00000000000..6e70291c1d9
--- /dev/null
+++ b/vlib/vlib/physmem.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * physmem.h: virtual <-> physical memory mapping for VLIB buffers
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_physmem_h
+#define included_vlib_physmem_h
+
+typedef struct {
+ uword start, end, size;
+} vlib_physmem_region_t;
+
+typedef struct {
+ vlib_physmem_region_t virtual;
+
+ uword log2_n_bytes_per_page;
+
+ /* 1 << log2_n_bytes_per_page - 1. */
+ uword page_mask;
+
+ u64 * page_table;
+} vlib_physmem_main_t;
+
+always_inline u64
+vlib_physmem_offset_to_physical (vlib_physmem_main_t * pm, uword o)
+{
+ uword page_index = o >> pm->log2_n_bytes_per_page;
+ ASSERT (o < pm->virtual.size);
+ ASSERT (pm->page_table[page_index] != 0);
+ return (vec_elt (pm->page_table, page_index) + (o & pm->page_mask));
+}
+
+always_inline int
+vlib_physmem_is_virtual (vlib_physmem_main_t * pm, uword p)
+{ return p >= pm->virtual.start && p < pm->virtual.end; }
+
+always_inline uword
+vlib_physmem_offset_of (vlib_physmem_main_t * pm, void * p)
+{
+ uword a = pointer_to_uword (p);
+ uword o;
+
+ ASSERT (vlib_physmem_is_virtual (pm, a));
+ o = a - pm->virtual.start;
+
+ /* Offset must fit in 32 bits. */
+ ASSERT ((uword) o == a - pm->virtual.start);
+
+ return o;
+}
+
+always_inline void *
+vlib_physmem_at_offset (vlib_physmem_main_t * pm, uword offset)
+{
+ ASSERT (offset < pm->virtual.size);
+ return uword_to_pointer (pm->virtual.start + offset, void *);
+}
+
+#endif /* included_vlib_physmem_h */
diff --git a/vlib/vlib/threads.c b/vlib/vlib/threads.c
new file mode 100644
index 00000000000..4621f843dd5
--- /dev/null
+++ b/vlib/vlib/threads.c
@@ -0,0 +1,1166 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <signal.h>
+#include <math.h>
+#include <vppinfra/format.h>
+#include <vlib/vlib.h>
+
+#include <vlib/threads.h>
+#include <vlib/unix/physmem.h>
+
+#include <vlib/unix/cj.h>
+
+#if DPDK==1
+#include <rte_config.h>
+#include <rte_common.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#endif
+DECLARE_CJ_GLOBAL_LOG;
+
+#define FRAME_QUEUE_NELTS 32
+
+
+#if DPDK==1
+/*
+ * Weak definitions of DPDK symbols used in this file.
+ * Needed for linking test programs without DPDK libs.
+ */
+unsigned __thread __attribute__((weak)) RTE_PER_LCORE(_lcore_id);
+struct lcore_config __attribute__((weak)) lcore_config[];
+unsigned __attribute__((weak)) rte_socket_id();
+int __attribute__((weak)) rte_eal_remote_launch();
+#endif
+u32 vl(void *p)
+{
+ return vec_len (p);
+}
+
+void debug_hex_bytes (u8 *s, u32 n)
+{
+ fformat (stderr, "%U\n", format_hex_bytes, s, n);
+}
+
+vlib_thread_main_t vlib_thread_main;
+
+uword
+os_get_cpu_number (void)
+{
+ void * sp;
+ uword n;
+ u32 len;
+
+ len = vec_len (vlib_thread_stacks);
+ if (len == 0)
+ return 0;
+
+ /* Get any old stack address. */
+ sp = &sp;
+
+ n = ((uword)sp - (uword)vlib_thread_stacks[0])
+ >> VLIB_LOG2_THREAD_STACK_SIZE;
+
+ /* "processes" have their own stacks, and they always run in thread 0 */
+ n = n >= len ? 0 : n;
+
+ return n;
+}
+
+void
+vlib_set_thread_name (char *name)
+{
+ int pthread_setname_np (pthread_t __target_thread, const char *__name);
+ pthread_t thread = pthread_self();
+
+ if (thread)
+ pthread_setname_np(thread, name);
+}
+
+static int sort_registrations_by_no_clone (void *a0, void * a1)
+{
+ vlib_thread_registration_t ** tr0 = a0;
+ vlib_thread_registration_t ** tr1 = a1;
+
+ return ((i32)((*tr0)->no_data_structure_clone)
+ - ((i32)((*tr1)->no_data_structure_clone)));
+}
+
+static uword *
+vlib_sysfs_list_to_bitmap(char * filename)
+{
+ FILE *fp;
+ uword *r = 0;
+
+ fp = fopen (filename, "r");
+
+ if (fp != NULL)
+ {
+ u8 * buffer = 0;
+ vec_validate (buffer, 256-1);
+ if (fgets ((char *)buffer, 256, fp))
+ {
+ unformat_input_t in;
+ unformat_init_string (&in, (char *) buffer, strlen ((char *) buffer));
+ unformat(&in, "%U", unformat_bitmap_list, &r);
+ unformat_free (&in);
+ }
+ vec_free(buffer);
+ fclose(fp);
+ }
+ return r;
+}
+
+
+/* Called early in the init sequence */
+
+clib_error_t *
+vlib_thread_init (vlib_main_t * vm)
+{
+ vlib_thread_main_t * tm = &vlib_thread_main;
+ vlib_worker_thread_t * w;
+ vlib_thread_registration_t * tr;
+ u32 n_vlib_mains = 1;
+ u32 first_index = 1;
+ u32 i;
+ uword * avail_cpu;
+
+ /* get bitmaps of active cpu cores and sockets */
+ tm->cpu_core_bitmap =
+ vlib_sysfs_list_to_bitmap("/sys/devices/system/cpu/online");
+ tm->cpu_socket_bitmap =
+ vlib_sysfs_list_to_bitmap("/sys/devices/system/node/online");
+
+ avail_cpu = clib_bitmap_dup(tm->cpu_core_bitmap);
+
+ /* skip cores */
+ for (i=0; i < tm->skip_cores; i++)
+ {
+ uword c = clib_bitmap_first_set(avail_cpu);
+ if (c == ~0)
+ return clib_error_return (0, "no available cpus to skip");
+
+ avail_cpu = clib_bitmap_set(avail_cpu, c, 0);
+ }
+
+ /* grab cpu for main thread */
+ if (!tm->main_lcore)
+ {
+ tm->main_lcore = clib_bitmap_first_set(avail_cpu);
+ if (tm->main_lcore == ~0)
+ return clib_error_return (0, "no available cpus to be used for the"
+ " main thread");
+ }
+ else
+ {
+ if (clib_bitmap_get(avail_cpu, tm->main_lcore) == 0)
+ return clib_error_return (0, "cpu %u is not available to be used"
+ " for the main thread", tm->main_lcore);
+ }
+ avail_cpu = clib_bitmap_set(avail_cpu, tm->main_lcore, 0);
+
+ /* assume that there is socket 0 only if there is no data from sysfs */
+ if (!tm->cpu_socket_bitmap)
+ tm->cpu_socket_bitmap = clib_bitmap_set(0, 0, 1);
+
+ /* as many threads as stacks... */
+ vec_validate_aligned (vlib_worker_threads, vec_len(vlib_thread_stacks)-1,
+ CLIB_CACHE_LINE_BYTES);
+
+ /* Preallocate thread 0 */
+ _vec_len(vlib_worker_threads) = 1;
+ w = vlib_worker_threads;
+ w->thread_mheap = clib_mem_get_heap();
+ w->thread_stack = vlib_thread_stacks[0];
+ w->dpdk_lcore_id = -1;
+ w->lwp = syscall(SYS_gettid);
+ tm->n_vlib_mains = 1;
+
+ /* assign threads to cores and set n_vlib_mains */
+ tr = tm->next;
+
+ while (tr)
+ {
+ vec_add1 (tm->registrations, tr);
+ tr = tr->next;
+ }
+
+ vec_sort_with_function
+ (tm->registrations, sort_registrations_by_no_clone);
+
+ for (i = 0; i < vec_len (tm->registrations); i++)
+ {
+ int j;
+ tr = tm->registrations[i];
+ tr->first_index = first_index;
+ first_index += tr->count;
+ n_vlib_mains += (tr->no_data_structure_clone == 0) ? tr->count : 0;
+
+ /* construct coremask */
+ if (tr->use_pthreads || !tr->count)
+ continue;
+
+ if (tr->coremask)
+ {
+ uword c;
+ clib_bitmap_foreach (c, tr->coremask, ({
+ if (clib_bitmap_get(avail_cpu, c) == 0)
+ return clib_error_return (0, "cpu %u is not available to be used"
+ " for the '%s' thread",c, tr->name);
+
+ avail_cpu = clib_bitmap_set(avail_cpu, c, 0);
+ }));
+
+ }
+ else
+ {
+ for (j=0; j < tr->count; j++)
+ {
+ uword c = clib_bitmap_first_set(avail_cpu);
+ if (c == ~0)
+ return clib_error_return (0, "no available cpus to be used for"
+ " the '%s' thread", tr->name);
+
+ avail_cpu = clib_bitmap_set(avail_cpu, c, 0);
+ tr->coremask = clib_bitmap_set(tr->coremask, c, 1);
+ }
+ }
+ }
+
+ clib_bitmap_free(avail_cpu);
+
+ tm->n_vlib_mains = n_vlib_mains;
+
+ vec_validate_aligned (vlib_worker_threads, first_index-1,
+ CLIB_CACHE_LINE_BYTES);
+
+
+ tm->efd.enabled = VLIB_EFD_DISABLED;
+ tm->efd.queue_hi_thresh = ((VLIB_EFD_DEF_WORKER_HI_THRESH_PCT *
+ FRAME_QUEUE_NELTS)/100);
+ return 0;
+}
+
+vlib_worker_thread_t *
+vlib_alloc_thread (vlib_main_t * vm)
+{
+ vlib_worker_thread_t * w;
+
+ if (vec_len(vlib_worker_threads) >= vec_len (vlib_thread_stacks))
+ {
+ clib_warning ("out of worker threads... Quitting...");
+ exit(1);
+ }
+ vec_add2 (vlib_worker_threads, w, 1);
+ w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
+ return w;
+}
+
+vlib_frame_queue_t * vlib_frame_queue_alloc (int nelts)
+{
+ vlib_frame_queue_t * fq;
+
+ fq = clib_mem_alloc_aligned(sizeof (*fq), CLIB_CACHE_LINE_BYTES);
+ memset (fq, 0, sizeof (*fq));
+ fq->nelts = nelts;
+ fq->vector_threshold = 128; // packets
+ vec_validate_aligned (fq->elts, nelts-1, CLIB_CACHE_LINE_BYTES);
+
+ if (1)
+ {
+ if (((uword)&fq->tail) & (CLIB_CACHE_LINE_BYTES - 1))
+ fformat(stderr, "WARNING: fq->tail unaligned\n");
+ if (((uword)&fq->head) & (CLIB_CACHE_LINE_BYTES - 1))
+ fformat(stderr, "WARNING: fq->head unaligned\n");
+ if (((uword)fq->elts) & (CLIB_CACHE_LINE_BYTES - 1))
+ fformat(stderr, "WARNING: fq->elts unaligned\n");
+
+ if (sizeof (fq->elts[0]) % CLIB_CACHE_LINE_BYTES)
+ fformat(stderr, "WARNING: fq->elts[0] size %d\n",
+ sizeof (fq->elts[0]));
+ if (nelts & (nelts -1))
+ {
+ fformat (stderr, "FATAL: nelts MUST be a power of 2\n");
+ abort();
+ }
+ }
+
+ return (fq);
+}
+
+void vl_msg_api_handler_no_free (void *) __attribute__ ((weak));
+void vl_msg_api_handler_no_free (void *v) { }
+
+/* Turned off, save as reference material... */
+#if 0
+static inline int vlib_frame_queue_dequeue_internal (int thread_id,
+ vlib_main_t *vm,
+ vlib_node_main_t *nm)
+{
+ vlib_frame_queue_t *fq = vlib_frame_queues[thread_id];
+ vlib_frame_queue_elt_t *elt;
+ vlib_frame_t *f;
+ vlib_pending_frame_t *p;
+ vlib_node_runtime_t *r;
+ u32 node_runtime_index;
+ int msg_type;
+ u64 before;
+ int processed = 0;
+
+ ASSERT(vm == vlib_mains[thread_id]);
+
+ while (1)
+ {
+ if (fq->head == fq->tail)
+ return processed;
+
+ elt = fq->elts + ((fq->head+1) & (fq->nelts-1));
+
+ if (!elt->valid)
+ return processed;
+
+ before = clib_cpu_time_now();
+
+ f = elt->frame;
+ node_runtime_index = elt->node_runtime_index;
+ msg_type = elt->msg_type;
+
+ switch (msg_type)
+ {
+ case VLIB_FRAME_QUEUE_ELT_FREE_BUFFERS:
+ vlib_buffer_free (vm, vlib_frame_vector_args (f), f->n_vectors);
+ /* note fallthrough... */
+ case VLIB_FRAME_QUEUE_ELT_FREE_FRAME:
+ r = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL],
+ node_runtime_index);
+ vlib_frame_free (vm, r, f);
+ break;
+ case VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME:
+ vec_add2 (vm->node_main.pending_frames, p, 1);
+ f->flags |= (VLIB_FRAME_PENDING | VLIB_FRAME_FREE_AFTER_DISPATCH);
+ p->node_runtime_index = elt->node_runtime_index;
+ p->frame_index = vlib_frame_index (vm, f);
+ p->next_frame_index = VLIB_PENDING_FRAME_NO_NEXT_FRAME;
+ fq->dequeue_vectors += (u64) f->n_vectors;
+ break;
+ case VLIB_FRAME_QUEUE_ELT_API_MSG:
+ vl_msg_api_handler_no_free (f);
+ break;
+ default:
+ clib_warning ("bogus frame queue message, type %d", msg_type);
+ break;
+ }
+ elt->valid = 0;
+ fq->dequeues++;
+ fq->dequeue_ticks += clib_cpu_time_now() - before;
+ CLIB_MEMORY_BARRIER();
+ fq->head++;
+ processed++;
+ }
+ ASSERT(0);
+ return processed;
+}
+
+int vlib_frame_queue_dequeue (int thread_id,
+ vlib_main_t *vm,
+ vlib_node_main_t *nm)
+{
+ return vlib_frame_queue_dequeue_internal (thread_id, vm, nm);
+}
+
+int vlib_frame_queue_enqueue (vlib_main_t *vm, u32 node_runtime_index,
+ u32 frame_queue_index, vlib_frame_t *frame,
+ vlib_frame_queue_msg_type_t type)
+{
+ vlib_frame_queue_t *fq = vlib_frame_queues[frame_queue_index];
+ vlib_frame_queue_elt_t *elt;
+ u32 save_count;
+ u64 new_tail;
+ u64 before = clib_cpu_time_now();
+
+ ASSERT (fq);
+
+ new_tail = __sync_add_and_fetch (&fq->tail, 1);
+
+ /* Wait until a ring slot is available */
+ while (new_tail >= fq->head + fq->nelts)
+ {
+ f64 b4 = vlib_time_now_ticks (vm, before);
+ vlib_worker_thread_barrier_check (vm, b4);
+ /* Bad idea. Dequeue -> enqueue -> dequeue -> trouble */
+ // vlib_frame_queue_dequeue (vm->cpu_index, vm, nm);
+ }
+
+ elt = fq->elts + (new_tail & (fq->nelts-1));
+
+ /* this would be very bad... */
+ while (elt->valid)
+ {
+ }
+
+ /* Once we enqueue the frame, frame->n_vectors is owned elsewhere... */
+ save_count = frame->n_vectors;
+
+ elt->frame = frame;
+ elt->node_runtime_index = node_runtime_index;
+ elt->msg_type = type;
+ CLIB_MEMORY_BARRIER();
+ elt->valid = 1;
+
+ return save_count;
+}
+#endif /* 0 */
+
+/* To be called by vlib worker threads upon startup */
+void vlib_worker_thread_init (vlib_worker_thread_t * w)
+{
+ vlib_thread_main_t *tm = vlib_get_thread_main();
+
+ /* worker threads wants no signals. */
+ {
+ sigset_t s;
+ sigfillset (&s);
+ pthread_sigmask (SIG_SETMASK, &s, 0);
+ }
+
+ clib_mem_set_heap (w->thread_mheap);
+
+ if (vec_len(tm->thread_prefix) && w->registration->short_name)
+ {
+ w->name = format(0, "%v_%s_%d%c", tm->thread_prefix,
+ w->registration->short_name,
+ w->instance_id,
+ '\0');
+ vlib_set_thread_name((char *)w->name);
+ }
+
+ if (!w->registration->use_pthreads)
+ {
+
+ /* Initial barrier sync, for both worker and i/o threads */
+ clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1);
+
+ while (*vlib_worker_threads->wait_at_barrier)
+ ;
+
+ clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1);
+ }
+}
+
+void *vlib_worker_thread_bootstrap_fn (void *arg)
+{
+ void *rv;
+ vlib_worker_thread_t *w = arg;
+
+ w->lwp = syscall(SYS_gettid);
+ w->dpdk_lcore_id = -1;
+#if DPDK==1
+ if (w->registration && !w->registration->use_pthreads &&
+ rte_socket_id) /* do we really have dpdk linked */
+ {
+ unsigned lcore = rte_lcore_id();
+ lcore = lcore < RTE_MAX_LCORE ? lcore : -1;
+ w->dpdk_lcore_id = lcore;
+ }
+#endif
+
+ rv = (void *) clib_calljmp
+ ((uword (*)(uword)) w->thread_function,
+ (uword) arg, w->thread_stack + VLIB_THREAD_STACK_SIZE);
+ /* NOTREACHED, we hope */
+ return rv;
+}
+
+static int
+vlib_launch_thread (void *fp, vlib_worker_thread_t *w, unsigned lcore_id)
+{
+ pthread_t dummy;
+ void *(*fp_arg)(void *) = fp;
+
+#if DPDK==1
+ if (!w->registration->use_pthreads)
+ if (rte_eal_remote_launch) /* do we have dpdk linked */
+ return rte_eal_remote_launch (fp, (void *)w, lcore_id);
+ else
+ return -1;
+ else
+#endif
+ return pthread_create (&dummy, NULL /* attr */, fp_arg, (void *)w);
+}
+
+static clib_error_t * start_workers (vlib_main_t * vm)
+{
+ int i, j;
+ vlib_worker_thread_t *w;
+ vlib_main_t *vm_clone;
+ void *oldheap;
+ vlib_frame_queue_t *fq;
+ vlib_thread_main_t * tm = &vlib_thread_main;
+ vlib_thread_registration_t * tr;
+ vlib_node_runtime_t * rt;
+ u32 n_vlib_mains = tm->n_vlib_mains;
+ u32 worker_thread_index;
+
+ vec_reset_length (vlib_worker_threads);
+
+ /* Set up the main thread */
+ vec_add2_aligned (vlib_worker_threads, w, 1, CLIB_CACHE_LINE_BYTES);
+ w->elog_track.name = "thread 0";
+ elog_track_register (&vm->elog_main, &w->elog_track);
+
+ if (vec_len(tm->thread_prefix))
+ {
+ w->name = format(0, "%v_main%c", tm->thread_prefix, '\0');
+ vlib_set_thread_name((char *)w->name);
+ }
+
+#if DPDK==1
+ w->dpdk_lcore_id = -1;
+ if (rte_socket_id) /* do we really have dpdk linked */
+ {
+ unsigned lcore = rte_lcore_id();
+ w->dpdk_lcore_id = lcore < RTE_MAX_LCORE ? lcore : -1;;
+ }
+#endif
+
+ if (n_vlib_mains > 1)
+ {
+ u8 * heap = clib_mem_get_per_cpu_heap();
+ mheap_t * h = mheap_header (heap);
+
+ /* make the main heap thread-safe */
+ h->flags |= MHEAP_FLAG_THREAD_SAFE;
+
+ /* Make the event-log MP-safe */
+ vm->elog_main.lock =
+ clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+ CLIB_CACHE_LINE_BYTES);
+
+ vm->elog_main.lock[0] = 0;
+
+ vec_validate (vlib_mains, tm->n_vlib_mains - 1);
+ _vec_len (vlib_mains) = 0;
+ vec_add1 (vlib_mains, vm);
+
+ vec_validate (vlib_frame_queues, tm->n_vlib_mains - 1);
+ _vec_len (vlib_frame_queues) = 0;
+ fq = vlib_frame_queue_alloc (FRAME_QUEUE_NELTS);
+ vec_add1 (vlib_frame_queues, fq);
+
+ vlib_worker_threads->wait_at_barrier =
+ clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
+ vlib_worker_threads->workers_at_barrier =
+ clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES);
+
+ /* Ask for an initial barrier sync */
+ *vlib_worker_threads->workers_at_barrier = 0;
+ *vlib_worker_threads->wait_at_barrier = 1;
+
+ worker_thread_index = 1;
+
+ for (i = 0; i < vec_len(tm->registrations); i++)
+ {
+ vlib_node_main_t *nm, *nm_clone;
+ vlib_buffer_main_t *bm_clone;
+ vlib_buffer_free_list_t *fl_clone, *fl_orig;
+ vlib_buffer_free_list_t *orig_freelist_pool;
+ int k;
+
+ tr = tm->registrations[i];
+
+ if (tr->count == 0)
+ continue;
+
+ for (k = 0; k < tr->count; k++)
+ {
+ vec_add2 (vlib_worker_threads, w, 1);
+ /*
+ * Share the main heap which is now thread-safe.
+ *
+ * To allocate separate heaps, code:
+ * mheap_alloc (0 / * use VM * /, tr->mheap_size);
+ */
+ w->thread_mheap = heap;
+ w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
+ w->thread_function = tr->function;
+ w->thread_function_arg = w;
+ w->instance_id = k;
+ w->registration = tr;
+
+ w->elog_track.name = (char *) format (0, "thread %d", i+1);
+ vec_add1 (w->elog_track.name, 0);
+ elog_track_register (&vm->elog_main, &w->elog_track);
+
+ if (tr->no_data_structure_clone)
+ continue;
+
+ /* Allocate "to-worker-N" frame queue */
+ fq = vlib_frame_queue_alloc (FRAME_QUEUE_NELTS);
+ vec_validate (vlib_frame_queues, worker_thread_index);
+ vlib_frame_queues[worker_thread_index] = fq;
+
+ /* Fork vlib_global_main et al. Look for bugs here */
+ oldheap = clib_mem_set_heap (w->thread_mheap);
+
+ vm_clone = clib_mem_alloc (sizeof (*vm_clone));
+ memcpy (vm_clone, vlib_mains[0], sizeof (*vm_clone));
+
+ vm_clone->cpu_index = worker_thread_index;
+ vm_clone->heap_base = w->thread_mheap;
+ vm_clone->mbuf_alloc_list = 0;
+ memset (&vm_clone->random_buffer, 0, sizeof (vm_clone->random_buffer));
+
+ nm = &vlib_mains[0]->node_main;
+ nm_clone = &vm_clone->node_main;
+ /* fork next frames array, preserving node runtime indices */
+ nm_clone->next_frames = vec_dup (nm->next_frames);
+ for (j = 0; j < vec_len (nm_clone->next_frames); j++)
+ {
+ vlib_next_frame_t *nf = &nm_clone->next_frames[j];
+ u32 save_node_runtime_index;
+
+ save_node_runtime_index = nf->node_runtime_index;
+ vlib_next_frame_init (nf);
+ nf->node_runtime_index = save_node_runtime_index;
+ }
+
+ /* fork the frame dispatch queue */
+ nm_clone->pending_frames = 0;
+ vec_validate (nm_clone->pending_frames, 10); /* $$$$$?????? */
+ _vec_len (nm_clone->pending_frames) = 0;
+
+ /* fork nodes */
+ nm_clone->nodes = 0;
+ for (j = 0; j < vec_len (nm->nodes); j++)
+ {
+ vlib_node_t *n;
+ n = clib_mem_alloc_no_fail (sizeof(*n));
+ memcpy (n, nm->nodes[j], sizeof (*n));
+ /* none of the copied nodes have enqueue rights given out */
+ n->owner_node_index = VLIB_INVALID_NODE_INDEX;
+ memset (&n->stats_total, 0, sizeof (n->stats_total));
+ memset (&n->stats_last_clear, 0, sizeof (n->stats_last_clear));
+ vec_add1 (nm_clone->nodes, n);
+ }
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
+ vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
+
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
+ vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
+ vec_foreach(rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+ rt->cpu_index = vm_clone->cpu_index;
+
+ nm_clone->processes = vec_dup (nm->processes);
+
+ /* zap the (per worker) frame freelists, etc */
+ nm_clone->frame_sizes = 0;
+ nm_clone->frame_size_hash = 0;
+
+ /* Packet trace buffers are guaranteed to be empty, nothing to do here */
+
+ clib_mem_set_heap (oldheap);
+ vec_add1 (vlib_mains, vm_clone);
+
+ unix_physmem_init (vm_clone, 0 /* physmem not required */);
+
+ /* Fork the vlib_buffer_main_t free lists, etc. */
+ bm_clone = vec_dup (vm_clone->buffer_main);
+ vm_clone->buffer_main = bm_clone;
+
+ orig_freelist_pool = bm_clone->buffer_free_list_pool;
+ bm_clone->buffer_free_list_pool = 0;
+
+ pool_foreach (fl_orig, orig_freelist_pool,
+ ({
+ pool_get_aligned (bm_clone->buffer_free_list_pool,
+ fl_clone, CLIB_CACHE_LINE_BYTES);
+ ASSERT (fl_orig - orig_freelist_pool
+ == fl_clone - bm_clone->buffer_free_list_pool);
+
+ fl_clone[0] = fl_orig[0];
+ fl_clone->aligned_buffers = 0;
+ fl_clone->unaligned_buffers = 0;
+ fl_clone->n_alloc = 0;
+ }));
+
+ worker_thread_index++;
+ }
+ }
+ }
+ else
+ {
+ /* only have non-data-structure copy threads to create... */
+ for (i = 0; i < vec_len(tm->registrations); i++)
+ {
+ tr = tm->registrations[i];
+
+ for (j = 0; j < tr->count; j++)
+ {
+ vec_add2 (vlib_worker_threads, w, 1);
+ w->thread_mheap = mheap_alloc (0 /* use VM */, tr->mheap_size);
+ w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads];
+ w->thread_function = tr->function;
+ w->thread_function_arg = w;
+ w->instance_id = j;
+ w->elog_track.name = (char *) format (0, "thread %d", i+1);
+ w->registration = tr;
+ vec_add1 (w->elog_track.name, 0);
+ elog_track_register (&vm->elog_main, &w->elog_track);
+ }
+ }
+ }
+
+ worker_thread_index = 1;
+
+ for (i = 0; i < vec_len (tm->registrations); i++)
+ {
+ int j;
+
+ tr = tm->registrations[i];
+
+ if (tr->use_pthreads || tm->use_pthreads)
+ {
+ for (j = 0; j < tr->count; j++)
+ {
+ w = vlib_worker_threads + worker_thread_index++;
+ if (vlib_launch_thread (vlib_worker_thread_bootstrap_fn, w, 0) < 0)
+ clib_warning ("Couldn't start '%s' pthread ", tr->name);
+ }
+ }
+ else
+ {
+ uword c;
+ clib_bitmap_foreach (c, tr->coremask, ({
+ w = vlib_worker_threads + worker_thread_index++;
+ if (vlib_launch_thread (vlib_worker_thread_bootstrap_fn, w, c) < 0)
+ clib_warning ("Couldn't start DPDK lcore %d", c);
+
+ }));
+ }
+ }
+ vlib_worker_thread_barrier_sync(vm);
+ vlib_worker_thread_barrier_release(vm);
+ return 0;
+}
+
+VLIB_MAIN_LOOP_ENTER_FUNCTION (start_workers);
+
+void vlib_worker_thread_node_runtime_update(void)
+{
+ int i, j;
+ vlib_worker_thread_t *w;
+ vlib_main_t *vm;
+ vlib_node_main_t *nm, *nm_clone;
+ vlib_node_t ** old_nodes_clone;
+ vlib_main_t *vm_clone;
+ vlib_node_runtime_t * rt, * old_rt;
+ void *oldheap;
+ never_inline void
+ vlib_node_runtime_sync_stats (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ uword n_calls,
+ uword n_vectors,
+ uword n_clocks);
+
+ ASSERT (os_get_cpu_number() == 0);
+
+ if (vec_len (vlib_mains) == 0)
+ return;
+
+ vm = vlib_mains[0];
+ nm = &vm->node_main;
+
+ ASSERT (os_get_cpu_number() == 0);
+ ASSERT (*vlib_worker_threads->wait_at_barrier == 1);
+
+ /*
+ * Scrape all runtime stats, so we don't lose node runtime(s) with
+ * pending counts, or throw away worker / io thread counts.
+ */
+ for (j = 0; j < vec_len (nm->nodes); j++)
+ {
+ vlib_node_t * n;
+ n = nm->nodes[j];
+ vlib_node_sync_stats (vm, n);
+ }
+
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ vlib_node_t * n;
+
+ vm_clone = vlib_mains[i];
+ nm_clone = &vm_clone->node_main;
+
+ for (j = 0; j < vec_len (nm_clone->nodes); j++)
+ {
+ n = nm_clone->nodes[j];
+
+ rt = vlib_node_get_runtime (vm_clone, n->index);
+ vlib_node_runtime_sync_stats (vm_clone, rt, 0, 0, 0);
+ }
+ }
+
+ for (i = 1; i < vec_len (vlib_mains); i++)
+ {
+ vlib_node_runtime_t * rt;
+ w = vlib_worker_threads + i;
+ oldheap = clib_mem_set_heap (w->thread_mheap);
+
+ vm_clone = vlib_mains[i];
+
+ /* Re-clone error heap */
+ memcpy (&vm_clone->error_main, &vm->error_main, sizeof (vm->error_main));
+
+ nm_clone = &vm_clone->node_main;
+ vec_free (nm_clone->next_frames);
+ nm_clone->next_frames = vec_dup (nm->next_frames);
+
+ for (j = 0; j < vec_len (nm_clone->next_frames); j++)
+ {
+ vlib_next_frame_t *nf = &nm_clone->next_frames[j];
+ u32 save_node_runtime_index;
+
+ save_node_runtime_index = nf->node_runtime_index;
+ vlib_next_frame_init (nf);
+ nf->node_runtime_index = save_node_runtime_index;
+ }
+
+ old_nodes_clone = nm_clone->nodes;
+ nm_clone->nodes = 0;
+
+ /* re-fork nodes */
+ for (j = 0; j < vec_len (nm->nodes); j++) {
+ vlib_node_t *old_n_clone;
+ vlib_node_t *new_n, *new_n_clone;
+
+ new_n = nm->nodes[j];
+ old_n_clone = old_nodes_clone[j];
+
+ new_n_clone = clib_mem_alloc_no_fail (sizeof(*new_n_clone));
+ memcpy (new_n_clone, new_n, sizeof (*new_n));
+ /* none of the copied nodes have enqueue rights given out */
+ new_n_clone->owner_node_index = VLIB_INVALID_NODE_INDEX;
+
+ if (j >= vec_len (old_nodes_clone))
+ {
+ /* new node, set to zero */
+ memset (&new_n_clone->stats_total, 0,
+ sizeof (new_n_clone->stats_total));
+ memset (&new_n_clone->stats_last_clear, 0,
+ sizeof (new_n_clone->stats_last_clear));
+ }
+ else
+ {
+ /* Copy stats if the old data is valid */
+ memcpy (&new_n_clone->stats_total,
+ &old_n_clone->stats_total,
+ sizeof (new_n_clone->stats_total));
+ memcpy (&new_n_clone->stats_last_clear,
+ &old_n_clone->stats_last_clear,
+ sizeof (new_n_clone->stats_last_clear));
+
+ /* keep previous node state */
+ new_n_clone->state = old_n_clone->state;
+ }
+ vec_add1 (nm_clone->nodes, new_n_clone);
+ }
+ /* Free the old node clone */
+ for (j = 0; j < vec_len(old_nodes_clone); j++)
+ clib_mem_free (old_nodes_clone[j]);
+ vec_free (old_nodes_clone);
+
+ vec_free (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
+
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] =
+ vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]);
+
+ /* clone input node runtime */
+ old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT];
+
+ nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] =
+ vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]);
+
+ vec_foreach(rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])
+ {
+ rt->cpu_index = vm_clone->cpu_index;
+ }
+
+ for (j=0; j < vec_len(old_rt); j++)
+ {
+ rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index);
+ rt->state = old_rt[j].state;
+ }
+
+ vec_free(old_rt);
+
+ nm_clone->processes = vec_dup (nm->processes);
+
+ clib_mem_set_heap (oldheap);
+
+ // vnet_main_fork_fixup (i);
+ }
+}
+
+static clib_error_t *
+cpu_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ vlib_thread_registration_t *tr;
+ uword * p;
+ vlib_thread_main_t * tm = &vlib_thread_main;
+ u8 * name;
+ u64 coremask;
+ uword * bitmap;
+ u32 count;
+
+ tm->thread_registrations_by_name = hash_create_string (0, sizeof (uword));
+ tm->n_thread_stacks = 1; /* account for main thread */
+
+ tr = tm->next;
+
+ while (tr)
+ {
+ hash_set_mem (tm->thread_registrations_by_name, tr->name, (uword)tr);
+ tr = tr->next;
+ }
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "main-thread-io"))
+ tm->main_thread_is_io_node = 1;
+ else if (unformat (input, "use-pthreads"))
+ tm->use_pthreads = 1;
+ else if (unformat (input, "thread-prefix %v", &tm->thread_prefix))
+ ;
+ else if (unformat (input, "main-core %u", &tm->main_lcore))
+ ;
+ else if (unformat (input, "skip-cores %u", &tm->skip_cores))
+ ;
+ else if (unformat (input, "coremask-%s %llx", &name, &coremask))
+ {
+ p = hash_get_mem (tm->thread_registrations_by_name, name);
+ if (p == 0)
+ return clib_error_return (0, "no such thread type '%s'", name);
+
+ tr = (vlib_thread_registration_t *)p[0];
+
+ if (tr->use_pthreads)
+ return clib_error_return (0, "coremask cannot be set for '%s' threads",
+ name);
+
+ tr->coremask = clib_bitmap_set_multiple
+ (tr->coremask, 0, coremask, BITS(coremask));
+ tr->count = clib_bitmap_count_set_bits (tr->coremask);
+ }
+ else if (unformat (input, "corelist-%s %U", &name, unformat_bitmap_list,
+ &bitmap))
+ {
+ p = hash_get_mem (tm->thread_registrations_by_name, name);
+ if (p == 0)
+ return clib_error_return (0, "no such thread type '%s'", name);
+
+ tr = (vlib_thread_registration_t *)p[0];
+
+ if (tr->use_pthreads)
+ return clib_error_return (0, "corelist cannot be set for '%s' threads",
+ name);
+
+ tr->coremask = bitmap;
+ tr->count = clib_bitmap_count_set_bits (tr->coremask);
+ }
+ else if (unformat (input, "%s %u", &name, &count))
+ {
+ p = hash_get_mem (tm->thread_registrations_by_name, name);
+ if (p == 0)
+ return clib_error_return (0, "no such thread type '%s'", name);
+
+ tr = (vlib_thread_registration_t *)p[0];
+ if (tr->fixed_count)
+ return clib_error_return
+ (0, "number of %s threads not configurable", tr->name);
+ tr->count = count;
+ }
+ else
+ break;
+ }
+
+ tr = tm->next;
+
+ if (!tm->thread_prefix)
+ tm->thread_prefix = format(0, "vpp");
+
+ while (tr)
+ {
+ tm->n_thread_stacks += tr->count;
+ tm->n_pthreads += tr->count * tr->use_pthreads;
+ tm->n_eal_threads += tr->count * (tr->use_pthreads == 0);
+ tr = tr->next;
+ }
+
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (cpu_config, "cpu");
+
+#if !defined (__x86_64__)
+void __sync_fetch_and_add_8 (void)
+{
+ fformat(stderr, "%s called\n", __FUNCTION__);
+ abort();
+}
+void __sync_add_and_fetch_8 (void)
+{
+ fformat(stderr, "%s called\n", __FUNCTION__);
+ abort();
+}
+#endif
+
+void vnet_main_fixup (vlib_fork_fixup_t which) __attribute__ ((weak));
+void vnet_main_fixup (vlib_fork_fixup_t which) { }
+
+void vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which)
+{
+ vlib_main_t * vm = vlib_get_main();
+
+ if (vlib_mains == 0)
+ return;
+
+ ASSERT(os_get_cpu_number() == 0);
+ vlib_worker_thread_barrier_sync(vm);
+
+ switch (which)
+ {
+ case VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX:
+ vnet_main_fixup (VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX);
+ break;
+
+ default:
+ ASSERT(0);
+ }
+ vlib_worker_thread_barrier_release(vm);
+}
+
+void vlib_worker_thread_barrier_sync(vlib_main_t *vm)
+{
+ f64 deadline;
+ u32 count;
+
+ if (!vlib_mains)
+ return;
+
+ count = vec_len (vlib_mains) - 1;
+
+ /* Tolerate recursive calls */
+ if (++vlib_worker_threads[0].recursion_level > 1)
+ return;
+
+ ASSERT (os_get_cpu_number() == 0);
+
+ deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+
+ *vlib_worker_threads->wait_at_barrier = 1;
+ while (*vlib_worker_threads->workers_at_barrier != count)
+ {
+ if (vlib_time_now(vm) > deadline)
+ {
+ fformat(stderr, "%s: worker thread deadlock\n", __FUNCTION__);
+ os_panic();
+ }
+ }
+}
+
+void vlib_worker_thread_barrier_release(vlib_main_t * vm)
+{
+ f64 deadline;
+
+ if (!vlib_mains)
+ return;
+
+ if (--vlib_worker_threads[0].recursion_level > 0)
+ return;
+
+ deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT;
+
+ *vlib_worker_threads->wait_at_barrier = 0;
+
+ while (*vlib_worker_threads->workers_at_barrier > 0)
+ {
+ if (vlib_time_now(vm) > deadline)
+ {
+ fformat(stderr, "%s: worker thread deadlock\n", __FUNCTION__);
+ os_panic();
+ }
+ }
+}
+
+static clib_error_t *
+show_threads_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_worker_thread_t * w;
+ int i;
+
+ vlib_cli_output (vm, "%-7s%-20s%-12s%-8s%-7s%-7s%-7s%-10s",
+ "ID", "Name", "Type", "LWP",
+ "lcore", "Core", "Socket", "State");
+
+ for (i = 0; i < vec_len(vlib_worker_threads); i++)
+ {
+ w = vlib_worker_threads + i;
+ u8 * line = NULL;
+
+ line = format(line, "%-7d%-20s%-12s%-8d",
+ i,
+ w->name ? w->name : (u8 *) "",
+ w->registration ? w->registration->name : "",
+ w->lwp);
+
+ int lcore = w->dpdk_lcore_id;
+ if (lcore > -1)
+ {
+ line = format(line, "%-7u%-7u%-7u",
+ lcore,
+ lcore_config[lcore].core_id,
+ lcore_config[lcore].socket_id);
+
+ switch(lcore_config[lcore].state)
+ {
+ case WAIT:
+ line = format(line, "wait");
+ break;
+ case RUNNING:
+ line = format(line, "running");
+ break;
+ case FINISHED:
+ line = format(line, "finished");
+ break;
+ default:
+ line = format(line, "unknown");
+ }
+ }
+
+ vlib_cli_output(vm, "%v", line);
+ vec_free(line);
+ }
+
+ return 0;
+}
+
+
+VLIB_CLI_COMMAND (show_threads_command, static) = {
+ .path = "show threads",
+ .short_help = "Show threads",
+ .function = show_threads_fn,
+};
diff --git a/vlib/vlib/threads.h b/vlib/vlib/threads.h
new file mode 100644
index 00000000000..9ce42a1367d
--- /dev/null
+++ b/vlib/vlib/threads.h
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vlib_threads_h
+#define included_vlib_threads_h
+
+#include <vlib/main.h>
+
+vlib_main_t **vlib_mains;
+
+static inline uword
+vlib_get_cpu_number_inline (void)
+{
+ void * sp;
+ uword n;
+ u32 len;
+
+ /* Get any old stack address. */
+ sp = &sp;
+
+ n = ((uword)sp - (uword)vlib_thread_stacks[0]) >> 20;
+
+ /* "processes" have their own stacks, and they always run in thread 0 */
+ n = n >= len ? 0 : n;
+
+ return n;
+}
+
+void
+vlib_set_thread_name (char *name);
+
+/* arg is actually a vlib__thread_t * */
+typedef void (vlib_thread_function_t) (void * arg);
+
+typedef struct vlib_thread_registration_ {
+ /* constructor generated list of thread registrations */
+ struct vlib_thread_registration_ * next;
+
+ /* config parameters */
+ char * name;
+ char * short_name;
+ vlib_thread_function_t * function;
+ uword mheap_size;
+ int fixed_count;
+ u32 count;
+ int no_data_structure_clone;
+ /* All threads of this type run on pthreads */
+ int use_pthreads;
+ u32 first_index;
+ uword * coremask;
+} vlib_thread_registration_t;
+
+#define VLIB_MAX_CPUS 32
+
+/*
+ * Objects passed around by "index" are cache-line aligned.
+ * We can stick the owner CPU into the low 6 bits.
+ */
+#if VLIB_MAX_CPUS > 64
+#error VLIB_MAX_CPUS must be <= 64
+#endif
+
+#define VLIB_CPU_MASK (VLIB_MAX_CPUS - 1) /* 0x3f, max */
+#define VLIB_OFFSET_MASK (~VLIB_CPU_MASK)
+
+#define VLIB_LOG2_THREAD_STACK_SIZE (20)
+#define VLIB_THREAD_STACK_SIZE (1<<VLIB_LOG2_THREAD_STACK_SIZE)
+
+typedef enum {
+ VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME,
+} vlib_frame_queue_msg_type_t;
+
+typedef struct {
+ volatile u32 valid;
+ u32 msg_type;
+ u32 n_vectors;
+ u32 last_n_vectors;
+
+ /* 256 * 4 = 1024 bytes, even mult of cache line size */
+ u32 buffer_index[VLIB_FRAME_SIZE];
+
+ /* Pad to a cache line boundary */
+ u8 pad[CLIB_CACHE_LINE_BYTES - 4 * sizeof(u32)];
+} vlib_frame_queue_elt_t;
+
+typedef struct {
+ /* First cache line */
+ volatile u32 *wait_at_barrier;
+ volatile u32 *workers_at_barrier;
+ u8 pad0[CLIB_CACHE_LINE_BYTES - (2 * sizeof (u32 *))];
+
+ /* Second Cache Line */
+ void *thread_mheap;
+ u8 * thread_stack;
+ void (*thread_function)(void *);
+ void * thread_function_arg;
+ i64 recursion_level;
+ elog_track_t elog_track;
+ u32 instance_id;
+ vlib_thread_registration_t *registration;
+ u8 *name;
+
+ long lwp;
+ int dpdk_lcore_id;
+} vlib_worker_thread_t;
+
+vlib_worker_thread_t *vlib_worker_threads;
+
+typedef struct {
+ /* enqueue side */
+ volatile u64 tail;
+ u64 enqueues;
+ u64 enqueue_ticks;
+ u64 enqueue_vectors;
+ u32 enqueue_full_events;
+ u32 enqueue_efd_discards;
+ u8 pad2[CLIB_CACHE_LINE_BYTES
+ - (2 * sizeof(u32))
+ - (4 * sizeof(u64))];
+
+ /* dequeue side */
+ volatile u64 head;
+ u64 dequeues;
+ u64 dequeue_ticks;
+ u64 dequeue_vectors;
+ u64 trace;
+ u64 vector_threshold;
+ u8 pad4[CLIB_CACHE_LINE_BYTES
+ - (6 * sizeof(u64))];
+
+ /* dequeue hint to enqueue side */
+ volatile u64 head_hint;
+ u8 pad5 [CLIB_CACHE_LINE_BYTES - sizeof(u64)];
+
+ /* read-only, constant, shared */
+ vlib_frame_queue_elt_t *elts;
+ u32 nelts;
+} vlib_frame_queue_t;
+
+vlib_frame_queue_t **vlib_frame_queues;
+
+/* Called early, in thread 0's context */
+clib_error_t * vlib_thread_init (vlib_main_t * vm);
+
+vlib_worker_thread_t * vlib_alloc_thread (vlib_main_t * vm);
+
+int vlib_frame_queue_enqueue (vlib_main_t *vm, u32 node_runtime_index,
+ u32 frame_queue_index, vlib_frame_t *frame,
+ vlib_frame_queue_msg_type_t type);
+
+int vlib_frame_queue_dequeue (int thread_id,
+ vlib_main_t *vm,
+ vlib_node_main_t *nm);
+
+u64 dispatch_node (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_node_type_t type,
+ vlib_node_state_t dispatch_state,
+ vlib_frame_t * frame,
+ u64 last_time_stamp);
+
+u64 dispatch_pending_node (vlib_main_t * vm,
+ vlib_pending_frame_t * p,
+ u64 last_time_stamp);
+
+void vlib_worker_thread_node_runtime_update(void);
+
+void vlib_create_worker_threads (vlib_main_t *vm, int n,
+ void (*thread_function)(void *));
+
+void vlib_worker_thread_init (vlib_worker_thread_t * w);
+
+/* Check for a barrier sync request every 30ms */
+#define BARRIER_SYNC_DELAY (0.030000)
+
+#if CLIB_DEBUG > 0
+/* long barrier timeout, for gdb... */
+#define BARRIER_SYNC_TIMEOUT (600.1)
+#else
+#define BARRIER_SYNC_TIMEOUT (1.0)
+#endif
+
+void vlib_worker_thread_barrier_sync(vlib_main_t *vm);
+void vlib_worker_thread_barrier_release(vlib_main_t *vm);
+
+always_inline void vlib_smp_unsafe_warning (void)
+{
+ if (CLIB_DEBUG > 0)
+ {
+ if (os_get_cpu_number())
+ fformat(stderr, "%s: SMP unsafe warning...\n", __FUNCTION__);
+ }
+}
+
+typedef enum {
+ VLIB_WORKER_THREAD_FORK_FIXUP_ILLEGAL = 0,
+ VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX,
+} vlib_fork_fixup_t;
+
+void vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which);
+
+static inline void vlib_worker_thread_barrier_check (void)
+{
+ if (PREDICT_FALSE(*vlib_worker_threads->wait_at_barrier))
+ {
+ clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1);
+ while (*vlib_worker_threads->wait_at_barrier)
+ ;
+ clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1);
+ }
+}
+
+#define foreach_vlib_main(body) \
+do { \
+ vlib_main_t ** __vlib_mains = 0, *this_vlib_main; \
+ int ii; \
+ \
+ if (vec_len (vlib_mains) == 0) \
+ vec_add1 (__vlib_mains, &vlib_global_main); \
+ else \
+ { \
+ for (ii = 0; ii < vec_len (vlib_mains); ii++) \
+ { \
+ this_vlib_main = vlib_mains[ii]; \
+ if (this_vlib_main) \
+ vec_add1 (__vlib_mains, this_vlib_main); \
+ } \
+ } \
+ \
+ for (ii = 0; ii < vec_len (__vlib_mains); ii++) \
+ { \
+ this_vlib_main = __vlib_mains[ii]; \
+ /* body uses this_vlib_main... */ \
+ (body); \
+ } \
+ vec_free (__vlib_mains); \
+} while (0);
+
+
+/* Early-Fast-Discard (EFD) */
+#define VLIB_EFD_DISABLED 0
+#define VLIB_EFD_DISCARD_ENABLED (1 << 0)
+#define VLIB_EFD_MONITOR_ENABLED (1 << 1)
+
+#define VLIB_EFD_DEF_WORKER_HI_THRESH_PCT 90
+
+/* EFD worker thread settings */
+typedef struct vlib_efd_t {
+ u16 enabled;
+ u16 queue_hi_thresh;
+ u8 ip_prec_bitmap;
+ u8 mpls_exp_bitmap;
+ u8 vlan_cos_bitmap;
+ u8 pad;
+} vlib_efd_t;
+
+typedef struct {
+ /* Link list of registrations, built by constructors */
+ vlib_thread_registration_t * next;
+
+ /* Vector of registrations, w/ non-data-structure clones at the top */
+ vlib_thread_registration_t ** registrations;
+
+ uword * thread_registrations_by_name;
+
+ vlib_worker_thread_t * worker_threads;
+
+ /* thread / cpu / io thread parameters */
+ u32 main_thread_is_io_node;
+
+ /*
+ * Launch all threads as pthreads,
+ * not eal_rte_launch (strict affinity) threads
+ */
+ int use_pthreads;
+
+ /* Number of vlib_main / vnet_main clones */
+ u32 n_vlib_mains;
+
+ /* Number of thread stacks to create */
+ u32 n_thread_stacks;
+
+ /* Number of pthreads */
+ u32 n_pthreads;
+
+ /* Number of DPDK eal threads */
+ u32 n_eal_threads;
+
+ /* Number of cores to skip, must match the core mask */
+ u32 skip_cores;
+
+ /* Thread prefix name */
+ u8 *thread_prefix;
+
+ /* main thread lcore */
+ u8 main_lcore;
+
+ /* Bitmap of available CPU cores */
+ uword * cpu_core_bitmap;
+
+ /* Bitmap of available CPU sockets (NUMA nodes) */
+ uword * cpu_socket_bitmap;
+
+ vlib_efd_t efd;
+
+} vlib_thread_main_t;
+
+vlib_thread_main_t vlib_thread_main;
+
+#define VLIB_REGISTER_THREAD(x,...) \
+ __VA_ARGS__ vlib_thread_registration_t x; \
+static void __vlib_add_thread_registration_##x (void) \
+ __attribute__((__constructor__)) ; \
+static void __vlib_add_thread_registration_##x (void) \
+{ \
+ vlib_thread_main_t * tm = &vlib_thread_main; \
+ x.next = tm->next; \
+ tm->next = &x; \
+} \
+__VA_ARGS__ vlib_thread_registration_t x
+
+#endif /* included_vlib_threads_h */
diff --git a/vlib/vlib/trace.c b/vlib/vlib/trace.c
new file mode 100644
index 00000000000..6272d853145
--- /dev/null
+++ b/vlib/vlib/trace.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * trace.c: VLIB trace buffer.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/threads.h>
+
+/* Helper function for nodes which only trace buffer data. */
+void
+vlib_trace_frame_buffers_only (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 * buffers,
+ uword n_buffers,
+ uword next_buffer_stride,
+ uword n_buffer_data_bytes_in_trace)
+{
+ u32 n_left, * from;
+
+ n_left = n_buffers;
+ from = buffers;
+
+ while (n_left >= 4)
+ {
+ u32 bi0, bi1;
+ vlib_buffer_t * b0, * b1;
+ u8 * t0, * t1;
+
+ /* Prefetch next iteration. */
+ vlib_prefetch_buffer_with_index (vm, from[2], LOAD);
+ vlib_prefetch_buffer_with_index (vm, from[3], LOAD);
+
+ bi0 = from[0];
+ bi1 = from[1];
+
+ b0 = vlib_get_buffer (vm, bi0);
+ b1 = vlib_get_buffer (vm, bi1);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, n_buffer_data_bytes_in_trace);
+ memcpy (t0, b0->data + b0->current_data,
+ n_buffer_data_bytes_in_trace);
+ }
+ if (b1->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t1 = vlib_add_trace (vm, node, b1, n_buffer_data_bytes_in_trace);
+ memcpy (t1, b1->data + b1->current_data,
+ n_buffer_data_bytes_in_trace);
+ }
+ from += 2;
+ n_left -= 2;
+ }
+
+ while (n_left >= 1)
+ {
+ u32 bi0;
+ vlib_buffer_t * b0;
+ u8 * t0;
+
+ bi0 = from[0];
+
+ b0 = vlib_get_buffer (vm, bi0);
+
+ if (b0->flags & VLIB_BUFFER_IS_TRACED)
+ {
+ t0 = vlib_add_trace (vm, node, b0, n_buffer_data_bytes_in_trace);
+ memcpy (t0, b0->data + b0->current_data,
+ n_buffer_data_bytes_in_trace);
+ }
+ from += 1;
+ n_left -= 1;
+ }
+}
+
+/* Free up all trace buffer memory. */
+always_inline void
+clear_trace_buffer (vlib_trace_main_t * tm)
+{
+ int i;
+
+ foreach_vlib_main (
+ ({
+ void *mainheap;
+
+ tm = &this_vlib_main->trace_main;
+ mainheap = clib_mem_set_heap (this_vlib_main->heap_base);
+
+ for (i = 0; i < vec_len (tm->trace_buffer_pool); i++)
+ if (! pool_is_free_index (tm->trace_buffer_pool, i))
+ vec_free (tm->trace_buffer_pool[i]);
+ pool_free (tm->trace_buffer_pool);
+ clib_mem_set_heap (mainheap);
+ }));
+}
+
+static u8 * format_vlib_trace (u8 * s, va_list * va)
+{
+ vlib_main_t * vm = va_arg (*va, vlib_main_t *);
+ vlib_trace_header_t * h = va_arg (*va, vlib_trace_header_t *);
+ vlib_trace_header_t * e = vec_end (h);
+ vlib_node_t * node, * prev_node;
+ clib_time_t * ct = &vm->clib_time;
+ f64 t;
+
+ prev_node = 0;
+ while (h < e)
+ {
+ node = vlib_get_node (vm, h->node_index);
+
+ if (node != prev_node)
+ {
+ t = (h->time - vm->cpu_time_main_loop_start) * ct->seconds_per_clock;
+ s = format (s, "\n%U: %v",
+ format_time_interval, "h:m:s:u", t,
+ node->name);
+ }
+ prev_node = node;
+
+ if (node->format_trace)
+ s = format (s, "\n %U",
+ node->format_trace, vm, node, h->data);
+ else
+ s = format (s, "\n %U",
+ node->format_buffer, h->data);
+
+ h = vlib_trace_header_next (h);
+ }
+
+ return s;
+}
+
+/* Root of all trace cli commands. */
+VLIB_CLI_COMMAND (trace_cli_command,static) = {
+ .path = "trace",
+ .short_help = "Packet tracer commands",
+};
+
+static clib_error_t *
+cli_show_trace_buffer (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_trace_main_t * tm = &vm->trace_main;
+ vlib_trace_header_t ** h, ** traces;
+ u32 i, index = 0;
+ char * fmt;
+
+ /* Get active traces from pool. */
+
+ foreach_vlib_main (
+ ({
+ void *mainheap;
+
+ fmt = "------------------- Start of thread %d %v -------------------";
+ vlib_cli_output (vm, fmt, index, vlib_worker_threads[index].name);
+
+ tm = &this_vlib_main->trace_main;
+
+ mainheap = clib_mem_set_heap (this_vlib_main->heap_base);
+ traces = 0;
+ pool_foreach (h, tm->trace_buffer_pool,
+ ({
+ vec_add1 (traces, h[0]);
+ }));
+
+ if (vec_len (traces) == 0)
+ {
+ clib_mem_set_heap (mainheap);
+ vlib_cli_output (vm, "No packets in trace buffer");
+ goto done;
+ }
+
+ /* Sort them by increasing time. */
+ vec_sort (traces, t0, t1, ({
+ i64 dt = t0[0]->time - t1[0]->time;
+ dt < 0 ? -1 : (dt > 0 ? +1 : 0);
+ }));
+
+ for (i = 0; i < vec_len (traces); i++)
+ {
+ clib_mem_set_heap (mainheap);
+
+ vlib_cli_output (vm, "Packet %d\n%U\n\n", i + 1,
+ format_vlib_trace, vm, traces[i]);
+
+ mainheap = clib_mem_set_heap (this_vlib_main->heap_base);
+ }
+
+ done:
+ vec_free (traces);
+ clib_mem_set_heap (mainheap);
+
+ index++;
+ }));
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_trace_cli,static) = {
+ .path = "show trace",
+ .short_help = "Show trace buffer",
+ .function = cli_show_trace_buffer,
+};
+
+static clib_error_t *
+cli_add_trace_buffer (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_trace_main_t * tm;
+ vlib_trace_node_t * tn;
+ u32 node_index, add;
+
+ if (unformat (input, "%U %d", unformat_vlib_node, vm, &node_index, &add))
+ ;
+ else
+ return clib_error_create ("expected NODE COUNT, got `%U'",
+ format_unformat_error, input);
+
+ foreach_vlib_main (
+ ({
+ void *oldheap;
+ tm = &this_vlib_main->trace_main;
+
+ oldheap = clib_mem_set_heap (this_vlib_main->heap_base);
+
+ vec_validate (tm->nodes, node_index);
+ tn = tm->nodes + node_index;
+ tn->limit += add;
+ clib_mem_set_heap (oldheap);
+ }));
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (add_trace_cli,static) = {
+ .path = "trace add",
+ .short_help = "Trace given number of packets",
+ .function = cli_add_trace_buffer,
+};
+
+static clib_error_t *
+cli_clear_trace_buffer (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ vlib_trace_main_t * tm = &vm->trace_main;
+ clear_trace_buffer (tm);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (clear_trace_cli,static) = {
+ .path = "clear trace",
+ .short_help = "Clear trace buffer and free memory",
+ .function = cli_clear_trace_buffer,
+};
+
+/* Dummy function to get us linked in. */
+void vlib_trace_cli_reference (void) {}
diff --git a/vlib/vlib/trace.h b/vlib/vlib/trace.h
new file mode 100644
index 00000000000..228a22abb95
--- /dev/null
+++ b/vlib/vlib/trace.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * trace.h: VLIB trace buffer.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_trace_h
+#define included_vlib_trace_h
+
+#include <vppinfra/pool.h>
+
+typedef struct {
+ /* CPU time stamp trace was made. */
+ u64 time;
+
+ /* Node which generated this trace. */
+ u32 node_index;
+
+ /* Number of data words in this trace. */
+ u32 n_data;
+
+ /* Trace data follows. */
+ u8 data[0];
+} vlib_trace_header_t;
+
+typedef struct {
+ /* Current number of traces in buffer. */
+ u32 count;
+
+ /* Max. number of traces to be added to buffer. */
+ u32 limit;
+} vlib_trace_node_t;
+
+typedef struct {
+ /* Pool of trace buffers. */
+ vlib_trace_header_t ** trace_buffer_pool;
+
+ /* Per node trace counts. */
+ vlib_trace_node_t * nodes;
+} vlib_trace_main_t;
+
+#endif /* included_vlib_trace_h */
diff --git a/vlib/vlib/trace_funcs.h b/vlib/vlib/trace_funcs.h
new file mode 100644
index 00000000000..3dc7471e152
--- /dev/null
+++ b/vlib/vlib/trace_funcs.h
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * trace_funcs.h: VLIB trace buffer.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_trace_funcs_h
+#define included_vlib_trace_funcs_h
+
+always_inline void
+vlib_validate_trace (vlib_trace_main_t * tm, vlib_buffer_t * b)
+{
+ /*
+ * this assert seems right, but goes off constantly.
+ * disabling it appears to make the pain go away
+ */
+ ASSERT (1 || b->flags & VLIB_BUFFER_IS_TRACED);
+ ASSERT (! pool_is_free_index (tm->trace_buffer_pool, b->trace_index));
+}
+
+always_inline void *
+vlib_add_trace (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ vlib_buffer_t * b,
+ u32 n_data_bytes)
+{
+ vlib_trace_main_t * tm = &vm->trace_main;
+ vlib_trace_header_t * h;
+ u32 n_data_words;
+
+ vlib_validate_trace (tm, b);
+
+ n_data_bytes = round_pow2 (n_data_bytes, sizeof (h[0]));
+ n_data_words = n_data_bytes / sizeof (h[0]);
+ vec_add2_aligned (tm->trace_buffer_pool[b->trace_index], h,
+ 1 + n_data_words,
+ sizeof (h[0]));
+
+ h->time = vm->cpu_time_last_node_dispatch;
+ h->n_data = n_data_words;
+ h->node_index = r->node_index;
+
+ return h->data;
+}
+
+always_inline vlib_trace_header_t *
+vlib_trace_header_next (vlib_trace_header_t * h)
+{ return h + 1 + h->n_data; }
+
+always_inline void
+vlib_free_trace (vlib_main_t * vm, vlib_buffer_t * b)
+{
+ vlib_trace_main_t * tm = &vm->trace_main;
+ vlib_validate_trace (tm, b);
+ _vec_len (tm->trace_buffer_pool[b->trace_index]) = 0;
+ pool_put_index (tm->trace_buffer_pool, b->trace_index);
+}
+
+always_inline void
+vlib_trace_next_frame (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ u32 next_index)
+{
+ vlib_next_frame_t * nf;
+ nf = vlib_node_runtime_get_next_frame (vm, r, next_index);
+ nf->flags |= VLIB_FRAME_TRACE;
+}
+
+/* Mark buffer as traced and allocate trace buffer. */
+always_inline void
+vlib_trace_buffer (vlib_main_t * vm,
+ vlib_node_runtime_t * r,
+ u32 next_index,
+ vlib_buffer_t * b,
+ int follow_chain)
+{
+ vlib_trace_main_t * tm = &vm->trace_main;
+ vlib_trace_header_t ** h;
+
+ vlib_trace_next_frame (vm, r, next_index);
+
+ pool_get (tm->trace_buffer_pool, h);
+
+ do {
+ b->flags |= VLIB_BUFFER_IS_TRACED;
+ b->trace_index = h - tm->trace_buffer_pool;
+ } while (follow_chain && (b = vlib_get_next_buffer (vm, b)));
+}
+
+always_inline void
+vlib_buffer_copy_trace_flag (vlib_main_t * vm, vlib_buffer_t * b, u32 bi_target)
+{
+ vlib_buffer_t * b_target = vlib_get_buffer (vm, bi_target);
+ b_target->flags |= b->flags & VLIB_BUFFER_IS_TRACED;
+ b_target->trace_index = b->trace_index;
+}
+
+always_inline u32
+vlib_get_trace_count (vlib_main_t * vm, vlib_node_runtime_t * rt)
+{
+ vlib_trace_main_t * tm = &vm->trace_main;
+ vlib_trace_node_t * tn;
+ int n;
+
+ if (rt->node_index >= vec_len (tm->nodes))
+ return 0;
+ tn = tm->nodes + rt->node_index;
+ n = tn->limit - tn->count;
+ ASSERT (n >= 0);
+
+ return n;
+}
+
+always_inline void
+vlib_set_trace_count (vlib_main_t * vm, vlib_node_runtime_t * rt,
+ u32 count)
+{
+ vlib_trace_main_t * tm = &vm->trace_main;
+ vlib_trace_node_t * tn = vec_elt_at_index (tm->nodes, rt->node_index);
+
+ ASSERT (count <= tn->limit);
+ tn->count = tn->limit - count;
+}
+
+/* Helper function for nodes which only trace buffer data. */
+void
+vlib_trace_frame_buffers_only (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ u32 * buffers,
+ uword n_buffers,
+ uword next_buffer_stride,
+ uword n_buffer_data_bytes_in_trace);
+
+#endif /* included_vlib_trace_funcs_h */
diff --git a/vlib/vlib/unix/cj.c b/vlib/vlib/unix/cj.c
new file mode 100644
index 00000000000..665a13fa4f5
--- /dev/null
+++ b/vlib/vlib/unix/cj.c
@@ -0,0 +1,218 @@
+/*
+ *------------------------------------------------------------------
+ * cj.c
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <stdio.h>
+#include <vlib/vlib.h>
+
+#include <vlib/unix/cj.h>
+
+cj_main_t cj_main;
+
+void
+cj_log (u32 type, void * data0, void * data1)
+{
+ u64 new_tail;
+ cj_main_t * cjm = &cj_main;
+ cj_record_t * r;
+
+ if (cjm->enable == 0)
+ return;
+
+ new_tail = __sync_add_and_fetch (&cjm->tail, 1);
+
+ r = (cj_record_t *) &(cjm->records[new_tail & (cjm->num_records - 1)]);
+ r->time = vlib_time_now (cjm->vlib_main);
+ r->cpu = os_get_cpu_number();
+ r->type = type;
+ r->data[0] = (u64) data0;
+ r->data[1] = (u64) data1;
+}
+
+void cj_stop(void)
+{
+ cj_main_t * cjm = &cj_main;
+
+ cjm->enable = 0;
+}
+
+
+clib_error_t * cj_init (vlib_main_t * vm)
+{
+ cj_main_t * cjm = &cj_main;
+
+ cjm->vlib_main = vm;
+ return 0;
+}
+VLIB_INIT_FUNCTION (cj_init);
+
+static clib_error_t *
+cj_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ cj_main_t * cjm = &cj_main;
+ int matched = 0;
+ int enable = 0;
+
+ while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "records %d", &cjm->num_records))
+ matched = 1;
+ else if (unformat (input, "on"))
+ enable = 1;
+ else
+ return clib_error_return (0, "cj_config: unknown input '%U'",
+ format_unformat_error, input);
+ }
+
+ if (matched == 0)
+ return 0;
+
+ cjm->num_records = max_pow2 (cjm->num_records);
+ vec_validate (cjm->records, cjm->num_records-1);
+ memset (cjm->records, 0xff, cjm->num_records * sizeof (cj_record_t));
+ cjm->tail = ~0;
+ cjm->enable = enable;
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (cj_config, "cj");
+
+void cj_enable_disable (int is_enable)
+{
+ cj_main_t * cjm = &cj_main;
+
+ if (cjm->num_records)
+ cjm->enable = is_enable;
+ else
+ vlib_cli_output (cjm->vlib_main, "CJ not configured...");
+}
+
+static inline void cj_dump_one_record (cj_record_t * r)
+{
+ fprintf (stderr, "[%d]: %10.6f T%02d %llx %llx\n",
+ r->cpu, r->time, r->type, (long long unsigned int) r->data[0],
+ (long long unsigned int) r->data[1]);
+}
+
+static void cj_dump_internal (u8 filter0_enable, u64 filter0,
+ u8 filter1_enable, u64 filter1)
+{
+ cj_main_t * cjm = &cj_main;
+ cj_record_t * r;
+ u32 i, index;
+
+ if (cjm->num_records == 0)
+ {
+ fprintf (stderr, "CJ not configured...\n");
+ return;
+ }
+
+ if (cjm->tail == (u64)~0)
+ {
+ fprintf (stderr, "No data collected...\n");
+ return;
+ }
+
+ /* Has the trace wrapped? */
+ index = (cjm->tail+1) & (cjm->num_records - 1);
+ r = &(cjm->records[index]);
+
+ if (r->cpu != (u32)~0)
+ {
+ /* Yes, dump from tail + 1 to the end */
+ for (i = index; i < cjm->num_records; i++)
+ {
+ if (filter0_enable && (r->data[0] != filter0))
+ goto skip;
+ if (filter1_enable && (r->data[1] != filter1))
+ goto skip;
+ cj_dump_one_record (r);
+ skip:
+ r++;
+ }
+ }
+ /* dump from the beginning through the final tail */
+ r = cjm->records;
+ for (i = 0; i <= cjm->tail; i++)
+ {
+ if (filter0_enable && (r->data[0] != filter0))
+ goto skip2;
+ if (filter1_enable && (r->data[1] != filter1))
+ goto skip2;
+ cj_dump_one_record (r);
+ skip2:
+ r++;
+ }
+}
+
+void cj_dump (void)
+{
+ cj_dump_internal (0, 0, 0, 0);
+}
+
+void cj_dump_filter_data0 (u64 filter0)
+{
+ cj_dump_internal (1/* enable f0 */, filter0, 0, 0);
+}
+
+void cj_dump_filter_data1 (u64 filter1)
+{
+ cj_dump_internal (0, 0, 1 /* enable f1 */, filter1);
+}
+
+void cj_dump_filter_data12 (u64 filter0, u64 filter1)
+{
+ cj_dump_internal (1, filter0, 1, filter1);
+}
+
+static clib_error_t *
+cj_command_fn (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ int is_enable = -1;
+ int is_dump = -1;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) {
+ if (unformat (input, "enable") || unformat (input, "on"))
+ is_enable = 1;
+ else if (unformat (input, "disable") || unformat (input, "off"))
+ is_enable = 0;
+ else if (unformat (input, "dump"))
+ is_dump = 1;
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (is_enable >= 0)
+ cj_enable_disable (is_enable);
+
+ if (is_dump > 0)
+ cj_dump ();
+
+ return 0;
+}
+
+VLIB_CLI_COMMAND (cj_command,static) = {
+ .path = "cj",
+ .short_help = "cj",
+ .function = cj_command_fn,
+};
+
diff --git a/vlib/vlib/unix/cj.h b/vlib/vlib/unix/cj.h
new file mode 100644
index 00000000000..3c37f2bf22f
--- /dev/null
+++ b/vlib/vlib/unix/cj.h
@@ -0,0 +1,68 @@
+/*
+ *------------------------------------------------------------------
+ * cj.h
+ *
+ * Copyright (c) 2013 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#ifndef __included_cj_h__
+#define __included_cj_h__
+
+typedef struct {
+ f64 time;
+ u32 cpu;
+ u32 type;
+ u64 data[2];
+} cj_record_t;
+
+typedef struct {
+ volatile u64 tail;
+ cj_record_t * records;
+ u32 num_records;
+ volatile u32 enable;
+
+ vlib_main_t * vlib_main;
+} cj_main_t;
+
+void cj_log (u32 type, void * data0, void * data1);
+
+/*
+ * Supply in application main, so we can log from any library...
+ * Declare a weak reference in the library, off you go.
+ */
+
+#define DECLARE_CJ_GLOBAL_LOG \
+void cj_global_log (unsigned type, void * data0, void * data1) \
+ __attribute__ ((weak)); \
+ \
+unsigned __cj_type; \
+void * __cj_data0; \
+void * __cj_data1; \
+ \
+void \
+cj_global_log (unsigned type, void * data0, void * data1) \
+{ \
+ __cj_type = type; \
+ __cj_data0 = data0; \
+ __cj_data1 = data1; \
+}
+
+#define CJ_GLOBAL_LOG_PROTOTYPE
+void cj_global_log (unsigned type, void * data0, void * data1) \
+ __attribute__ ((weak)); \
+
+void cj_stop(void);
+
+#endif /* __included_cj_h__ */
diff --git a/vlib/vlib/unix/cli.c b/vlib/vlib/unix/cli.c
new file mode 100644
index 00000000000..3cb13fc8550
--- /dev/null
+++ b/vlib/vlib/unix/cli.c
@@ -0,0 +1,900 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * cli.c: Unix stdin/socket CLI.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <termios.h>
+#include <unistd.h>
+#include <arpa/telnet.h>
+
+typedef struct {
+ u32 unix_file_index;
+
+ /* Vector of output pending write to file descriptor. */
+ u8 * output_vector;
+
+ /* Vector of input saved by Unix input node to be processed by
+ CLI process. */
+ u8 * input_vector;
+
+ u8 has_history;
+ u8 ** command_history;
+ u8 * current_command;
+ i32 excursion;
+ u32 history_limit;
+ u8 * search_key;
+ int search_mode;
+
+ u32 process_node_index;
+} unix_cli_file_t;
+
+always_inline void
+unix_cli_file_free (unix_cli_file_t * f)
+{
+ vec_free (f->output_vector);
+ vec_free (f->input_vector);
+}
+
+typedef struct {
+ /* Prompt string for CLI. */
+ u8 * cli_prompt;
+
+ unix_cli_file_t * cli_file_pool;
+
+ u32 * unused_cli_process_node_indices;
+
+ /* File pool index of current input. */
+ u32 current_input_file_index;
+} unix_cli_main_t;
+
+static unix_cli_main_t unix_cli_main;
+
+static void
+unix_cli_add_pending_output (unix_file_t * uf,
+ unix_cli_file_t * cf,
+ u8 * buffer,
+ uword buffer_bytes)
+{
+ unix_main_t * um = &unix_main;
+
+ vec_add (cf->output_vector, buffer, buffer_bytes);
+ if (vec_len (cf->output_vector) > 0)
+ {
+ int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (! skip_update)
+ um->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+}
+
+static void
+unix_cli_del_pending_output (unix_file_t * uf,
+ unix_cli_file_t * cf,
+ uword n_bytes)
+{
+ unix_main_t * um = &unix_main;
+
+ vec_delete (cf->output_vector, n_bytes, 0);
+ if (vec_len (cf->output_vector) <= 0)
+ {
+ int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (! skip_update)
+ um->file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+}
+
+/* VLIB cli output function. */
+static void unix_vlib_cli_output (uword cli_file_index,
+ u8 * buffer,
+ uword buffer_bytes)
+{
+ unix_main_t * um = &unix_main;
+ unix_cli_main_t * cm = &unix_cli_main;
+ unix_cli_file_t * cf;
+ unix_file_t * uf;
+ int n;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+ uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+ n = 0;
+ if (vec_len (cf->output_vector) == 0)
+ n = write (uf->file_descriptor, buffer, buffer_bytes);
+ if (n < 0 && errno != EAGAIN)
+ clib_unix_warning ("write");
+
+ else if ((word) n < (word) buffer_bytes)
+ {
+ if (n < 0) n = 0;
+ unix_cli_add_pending_output (uf, cf, buffer + n, buffer_bytes - n);
+ }
+}
+
+static int unix_cli_line_edit (unix_main_t * um, unix_cli_file_t * cf)
+{
+ unix_file_t * uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+ u8 * prev;
+ int i, j, delta;
+
+ for (i = 0; i < vec_len (cf->input_vector); i++)
+ {
+ switch (cf->input_vector[i])
+ {
+ case 0:
+ continue;
+
+ case '?':
+ /* Erase the current command (if any) plus ?*/
+ for (j = 0; j < (vec_len (cf->current_command)+1); j++)
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\r\nHistory:\r\n", 12);
+
+ for (j = 0; j < vec_len (cf->command_history); j++)
+ {
+ unix_cli_add_pending_output (uf, cf, cf->command_history[j],
+ vec_len(cf->command_history[j]));
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\r\n", 2);
+ }
+ goto crlf;
+
+ /* ^R - reverse search */
+ case 'R' - '@':
+ case 'S' - '@':
+ if (cf->search_mode == 0)
+ {
+ /* Erase the current command (if any) plus ^R */
+ for (j = 0; j < (vec_len (cf->current_command)+2); j++)
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+
+ vec_reset_length (cf->search_key);
+ vec_reset_length (cf->current_command);
+ if (cf->input_vector[i] == 'R' - '@')
+ cf->search_mode = -1;
+ else
+ cf->search_mode = 1;
+ }
+ else
+ {
+ if (cf->input_vector[i] == 'R' - '@')
+ cf->search_mode = -1;
+ else
+ cf->search_mode = 1;
+
+ cf->excursion += cf->search_mode;
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+ goto search_again;
+ }
+ break;
+
+ /* ^U - line-kill */
+ case 'U'-'@':
+ /* Erase the command, plus ^U */
+ for (j = 0; j < (vec_len (cf->current_command)+2); j++)
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+ vec_reset_length (cf->current_command);
+ cf->search_mode = 0;
+ continue;
+
+ /* ^P - previous, ^N - next */
+ case 'P' - '@':
+ case 'N' - '@':
+ cf->search_mode = 0;
+ /* Erase the command, plus ^P */
+ for (j = 0; j < (vec_len (cf->current_command)+2); j++)
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+ vec_reset_length (cf->current_command);
+ if (vec_len (cf->command_history))
+ {
+ if (cf->input_vector[i] == 'P' - '@')
+ delta = -1;
+ else
+ delta = 1;
+
+ cf->excursion += delta;
+
+ if (cf->excursion > (i32) vec_len (cf->command_history) -1)
+ cf->excursion = 0;
+ else if (cf->excursion < 0)
+ cf->excursion = vec_len (cf->command_history) -1;
+
+ prev = cf->command_history [cf->excursion];
+ vec_validate (cf->current_command, vec_len(prev)-1);
+
+ memcpy (cf->current_command, prev, vec_len(prev));
+ _vec_len (cf->current_command) = vec_len(prev);
+ unix_cli_add_pending_output (uf, cf, cf->current_command,
+ vec_len (cf->current_command));
+ break;
+ }
+ break;
+
+ case 0x7f:
+ case 'H' - '@':
+ for (j = 0; j < 2; j++)
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+ if (vec_len (cf->current_command))
+ {
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+ _vec_len (cf->current_command)--;
+ }
+ cf->search_mode = 0;
+ cf->excursion = 0;
+ cf->search_mode = 0;
+ vec_reset_length (cf->search_key);
+ break;
+
+ case '\r':
+ case '\n':
+ crlf:
+ vec_add1 (cf->current_command, '\r');
+ vec_add1 (cf->current_command, '\n');
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b\b \b\b\r\n", 8);
+
+ vec_validate (cf->input_vector, vec_len(cf->current_command)-1);
+ memcpy (cf->input_vector, cf->current_command,
+ vec_len(cf->current_command));
+ _vec_len(cf->input_vector) = _vec_len (cf->current_command);
+
+ if (vec_len(cf->command_history) >= cf->history_limit)
+ {
+ vec_free (cf->command_history[0]);
+ vec_delete (cf->command_history, 1, 0);
+ }
+ /* Don't add blank lines to the cmd history */
+ if (vec_len (cf->current_command) > 2)
+ {
+ _vec_len (cf->current_command) -= 2;
+ vec_add1 (cf->command_history, cf->current_command);
+ cf->current_command = 0;
+ }
+ else
+ vec_reset_length (cf->current_command);
+ cf->excursion = 0;
+ cf->search_mode = 0;
+ vec_reset_length (cf->search_key);
+ return 0;
+
+ /* telnet "mode character" blort, echo but don't process. */
+ case 0xff:
+ unix_cli_add_pending_output (uf, cf, cf->input_vector + i,
+ 6);
+ i += 6;
+ continue;
+
+ default:
+ if (cf->search_mode)
+ {
+ int j, k, limit, offset;
+ u8 * item;
+
+ vec_add1 (cf->search_key, cf->input_vector[i]);
+
+ search_again:
+ for (j = 0; j < vec_len(cf->command_history); j++)
+ {
+ if (cf->excursion > (i32) vec_len (cf->command_history) -1)
+ cf->excursion = 0;
+ else if (cf->excursion < 0)
+ cf->excursion = vec_len (cf->command_history) -1;
+
+ item = cf->command_history[cf->excursion];
+
+ limit = (vec_len(cf->search_key) > vec_len (item)) ?
+ vec_len(item) : vec_len (cf->search_key);
+
+ for (offset = 0; offset <= vec_len(item) - limit; offset++)
+ {
+ for (k = 0; k < limit; k++)
+ {
+ if (item[k+offset] != cf->search_key[k])
+ goto next_offset;
+ }
+ goto found_at_offset;
+
+ next_offset:
+ ;
+ }
+ goto next;
+
+ found_at_offset:
+ for (j = 0; j < vec_len (cf->current_command)+1; j++)
+ unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3);
+
+ vec_validate (cf->current_command, vec_len(item)-1);
+
+ memcpy (cf->current_command, item, vec_len(item));
+ _vec_len (cf->current_command) = vec_len(item);
+ unix_cli_add_pending_output (uf, cf, cf->current_command,
+ vec_len (cf->current_command));
+ goto found;
+
+ next:
+ cf->excursion += cf->search_mode;
+ }
+
+ unix_cli_add_pending_output (uf, cf, (u8 *)"\r\nno match..", 12);
+ vec_reset_length (cf->search_key);
+ vec_reset_length (cf->current_command);
+ cf->search_mode = 0;
+ goto crlf;
+ }
+ else
+ vec_add1 (cf->current_command, cf->input_vector[i]);
+
+ found:
+
+ break;
+ }
+ }
+ vec_reset_length(cf->input_vector);
+ return 1;
+}
+
+static void unix_cli_process_input (unix_cli_main_t * cm, uword cli_file_index)
+{
+ unix_main_t * um = &unix_main;
+ unix_file_t * uf;
+ unix_cli_file_t * cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+ unformat_input_t input;
+ int vlib_parse_eval (u8 *);
+
+ /* Try vlibplex first. Someday... */
+ if (0 && vlib_parse_eval (cf->input_vector) == 0)
+ goto done;
+
+ /* Line edit, echo, etc. */
+ if (cf->has_history && unix_cli_line_edit (um, cf))
+ return;
+
+ if (um->log_fd)
+ {
+ static u8 * lv;
+ vec_reset_length (lv);
+ lv = format (lv, "%U[%d]: %v",
+ format_timeval,
+ 0 /* current bat-time */,
+ 0 /* current bat-format */,
+ cli_file_index,
+ cf->input_vector);
+ {
+ int rv __attribute__((unused)) =
+ write (um->log_fd, lv, vec_len(lv));
+ }
+ }
+
+ unformat_init_vector (&input, cf->input_vector);
+
+ /* Remove leading white space from input. */
+ (void) unformat (&input, "");
+
+ cm->current_input_file_index = cli_file_index;
+
+ if (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT)
+ vlib_cli_input (um->vlib_main, &input, unix_vlib_cli_output, cli_file_index);
+
+ /* Re-fetch pointer since pool may have moved. */
+ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+
+ /* Zero buffer since otherwise unformat_free will call vec_free on it. */
+ input.buffer = 0;
+
+ unformat_free (&input);
+
+ /* Re-use input vector. */
+done:
+ _vec_len (cf->input_vector) = 0;
+
+ /* Prompt. */
+ uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+ unix_cli_add_pending_output (uf, cf,
+ cm->cli_prompt,
+ vec_len (cm->cli_prompt));
+}
+
+static void unix_cli_kill (unix_cli_main_t * cm, uword cli_file_index)
+{
+ unix_main_t * um = &unix_main;
+ unix_cli_file_t * cf;
+ unix_file_t * uf;
+ int i;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index);
+ uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+
+ /* Quit/EOF on stdin means quit program. */
+ if (uf->file_descriptor == 0)
+ clib_longjmp (&um->vlib_main->main_loop_exit, VLIB_MAIN_LOOP_EXIT_CLI);
+
+ vec_free (cf->current_command);
+ vec_free (cf->search_key);
+
+ for (i = 0; i < vec_len (cf->command_history); i++)
+ vec_free (cf->command_history[i]);
+
+ vec_free (cf->command_history);
+
+ unix_file_del (um, uf);
+
+ unix_cli_file_free (cf);
+ pool_put (cm->cli_file_pool, cf);
+}
+
+typedef enum {
+ UNIX_CLI_PROCESS_EVENT_READ_READY,
+ UNIX_CLI_PROCESS_EVENT_QUIT,
+} unix_cli_process_event_type_t;
+
+static uword
+unix_cli_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ unix_cli_main_t * cm = &unix_cli_main;
+ uword i, * data = 0;
+
+ while (1)
+ {
+ unix_cli_process_event_type_t event_type;
+ vlib_process_wait_for_event (vm);
+ event_type = vlib_process_get_events (vm, &data);
+
+ switch (event_type)
+ {
+ case UNIX_CLI_PROCESS_EVENT_READ_READY:
+ for (i = 0; i < vec_len (data); i++)
+ unix_cli_process_input (cm, data[i]);
+ break;
+
+ case UNIX_CLI_PROCESS_EVENT_QUIT:
+ /* Kill this process. */
+ for (i = 0; i < vec_len (data); i++)
+ unix_cli_kill (cm, data[i]);
+ goto done;
+ }
+
+ if (data)
+ _vec_len (data) = 0;
+ }
+
+ done:
+ vec_free (data);
+
+ vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED);
+
+ /* Add node index so we can re-use this process later. */
+ vec_add1 (cm->unused_cli_process_node_indices, rt->node_index);
+
+ return 0;
+}
+
+static clib_error_t * unix_cli_write_ready (unix_file_t * uf)
+{
+ unix_cli_main_t * cm = &unix_cli_main;
+ unix_cli_file_t * cf;
+ int n;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, uf->private_data);
+
+ /* Flush output vector. */
+ n = write (uf->file_descriptor,
+ cf->output_vector, vec_len (cf->output_vector));
+
+ if (n < 0 && errno != EAGAIN)
+ return clib_error_return_unix (0, "write");
+
+ else if (n > 0)
+ unix_cli_del_pending_output (uf, cf, n);
+
+ return /* no error */ 0;
+}
+
+static clib_error_t * unix_cli_read_ready (unix_file_t * uf)
+{
+ unix_main_t * um = &unix_main;
+ unix_cli_main_t * cm = &unix_cli_main;
+ unix_cli_file_t * cf;
+ uword l;
+ int n, n_read, n_try;
+
+ cf = pool_elt_at_index (cm->cli_file_pool, uf->private_data);
+
+ n = n_try = 4096;
+ while (n == n_try) {
+ l = vec_len (cf->input_vector);
+ vec_resize (cf->input_vector, l + n_try);
+
+ n = read (uf->file_descriptor, cf->input_vector + l, n_try);
+
+ /* Error? */
+ if (n < 0 && errno != EAGAIN)
+ return clib_error_return_unix (0, "read");
+
+ n_read = n < 0 ? 0 : n;
+ _vec_len (cf->input_vector) = l + n_read;
+ }
+
+ if (! (n < 0))
+ vlib_process_signal_event (um->vlib_main,
+ cf->process_node_index,
+ (n_read == 0
+ ? UNIX_CLI_PROCESS_EVENT_QUIT
+ : UNIX_CLI_PROCESS_EVENT_READ_READY),
+ /* event data */ uf->private_data);
+
+ return /* no error */ 0;
+}
+
+static u32 unix_cli_file_add (unix_cli_main_t * cm, char * name, int fd)
+{
+ unix_main_t * um = &unix_main;
+ unix_cli_file_t * cf;
+ unix_file_t * uf, template = {0};
+ vlib_main_t * vm = um->vlib_main;
+ vlib_node_t * n;
+
+ name = (char *) format (0, "unix-cli-%s", name);
+
+ if (vec_len (cm->unused_cli_process_node_indices) > 0)
+ {
+ uword l = vec_len (cm->unused_cli_process_node_indices);
+
+ /* Find node and give it new name. */
+ n = vlib_get_node (vm, cm->unused_cli_process_node_indices[l - 1]);
+ vec_free (n->name);
+ n->name = (u8 *) name;
+
+ vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING);
+
+ _vec_len (cm->unused_cli_process_node_indices) = l - 1;
+ }
+ else
+ {
+ static vlib_node_registration_t r = {
+ .function = unix_cli_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .process_log2_n_stack_bytes = 14,
+ };
+
+ r.name = name;
+ vlib_register_node (vm, &r);
+ vec_free (name);
+
+ n = vlib_get_node (vm, r.index);
+ }
+
+ pool_get (cm->cli_file_pool, cf);
+ memset (cf, 0, sizeof (*cf));
+
+ template.read_function = unix_cli_read_ready;
+ template.write_function = unix_cli_write_ready;
+ template.file_descriptor = fd;
+ template.private_data = cf - cm->cli_file_pool;
+
+ cf->process_node_index = n->index;
+ cf->unix_file_index = unix_file_add (um, &template);
+ cf->output_vector = 0;
+ cf->input_vector = 0;
+
+ uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+
+ /* Prompt. */
+ unix_cli_add_pending_output (uf, cf,
+ cm->cli_prompt, vec_len (cm->cli_prompt));
+
+ vlib_start_process (vm, n->runtime_index);
+ return cf - cm->cli_file_pool;
+}
+
+static clib_error_t * unix_cli_listen_read_ready (unix_file_t * uf)
+{
+ unix_main_t * um = &unix_main;
+ unix_cli_main_t * cm = &unix_cli_main;
+ clib_socket_t * s = &um->cli_listen_socket;
+ clib_socket_t client;
+ char * client_name;
+ clib_error_t * error;
+ unix_cli_file_t * cf;
+ u32 cf_index;
+
+ error = clib_socket_accept (s, &client);
+ if (error)
+ return error;
+
+ client_name = (char *) format (0, "%U%c", format_sockaddr, &client.peer, 0);
+
+ cf_index = unix_cli_file_add (cm, client_name, client.fd);
+ cf = pool_elt_at_index (cm->cli_file_pool, cf_index);
+
+ /* No longer need CLIB version of socket. */
+ clib_socket_free (&client);
+
+ vec_free (client_name);
+
+ /* if we're supposed to run telnet session in character mode (default) */
+ if (um->cli_line_mode == 0)
+ {
+ u8 charmode_option[6];
+
+ cf->has_history = 1;
+ cf->history_limit = um->cli_history_limit ? um->cli_history_limit : 50;
+
+ /*
+ * Set telnet client character mode, echo on, suppress "go-ahead"
+ * Empirically, this sequence works. YMMV.
+ */
+
+ /* Tell the client no linemode, echo */
+ charmode_option[0] = IAC;
+ charmode_option[1] = DONT;
+ charmode_option[2] = TELOPT_LINEMODE;
+ charmode_option[3] = IAC;
+ charmode_option[4] = DO;
+ charmode_option[5] = TELOPT_SGA;
+
+ uf = pool_elt_at_index (um->file_pool, cf->unix_file_index);
+
+ unix_cli_add_pending_output (uf, cf, charmode_option,
+ ARRAY_LEN(charmode_option));
+ }
+
+ return error;
+}
+
+static clib_error_t *
+unix_cli_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ unix_main_t * um = &unix_main;
+ unix_cli_main_t * cm = &unix_cli_main;
+ int flags, standard_input_fd;
+ clib_error_t * error;
+
+ /* We depend on unix flags being set. */
+ if ((error = vlib_call_config_function (vm, unix_config)))
+ return error;
+
+ if (um->flags & UNIX_FLAG_INTERACTIVE)
+ {
+ standard_input_fd = 0;
+
+ /* Set stdin to be non-blocking. */
+ if ((flags = fcntl (standard_input_fd, F_GETFL, 0)) < 0)
+ flags = 0;
+ fcntl (standard_input_fd, F_SETFL, flags | O_NONBLOCK);
+
+ unix_cli_file_add (cm, "stdin", standard_input_fd);
+ }
+
+ {
+ /* CLI listen. */
+ clib_socket_t * s = &um->cli_listen_socket;
+ unix_file_t template = {0};
+
+ s->flags = SOCKET_IS_SERVER; /* listen, don't connect */
+
+ error = clib_socket_init (s);
+ if (error)
+ return error;
+
+ template.read_function = unix_cli_listen_read_ready;
+ template.file_descriptor = s->fd;
+
+ unix_file_add (um, &template);
+ }
+
+ /* Set CLI prompt. */
+ if (! cm->cli_prompt)
+ cm->cli_prompt = format (0, "VLIB: ");
+
+ return 0;
+}
+
+VLIB_CONFIG_FUNCTION (unix_cli_config, "unix-cli");
+
+void vlib_unix_cli_set_prompt (char * prompt)
+{
+ char * fmt = (prompt[strlen(prompt)-1] == ' ') ? "%s" : "%s ";
+ unix_cli_main_t * cm = &unix_cli_main;
+ if (cm->cli_prompt)
+ vec_free (cm->cli_prompt);
+ cm->cli_prompt = format (0, fmt, prompt);
+}
+
+static clib_error_t *
+unix_cli_quit (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unix_cli_main_t * cm = &unix_cli_main;
+
+ vlib_process_signal_event (vm,
+ vlib_current_process (vm),
+ UNIX_CLI_PROCESS_EVENT_QUIT,
+ cm->current_input_file_index);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (unix_cli_quit_command, static) = {
+ .path = "quit",
+ .short_help = "Exit CLI",
+ .function = unix_cli_quit,
+};
+
+static clib_error_t *
+unix_cli_exec (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ char * file_name;
+ int fd;
+ unformat_input_t sub_input;
+ clib_error_t * error;
+
+ file_name = 0;
+ fd = -1;
+ error = 0;
+
+ if (! unformat (input, "%s", &file_name))
+ {
+ error = clib_error_return (0, "expecting file name, got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+
+ fd = open (file_name, O_RDONLY);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "failed to open `%s'", file_name);
+ goto done;
+ }
+
+ /* Make sure its a regular file. */
+ {
+ struct stat s;
+
+ if (fstat (fd, &s) < 0)
+ {
+ error = clib_error_return_unix (0, "failed to stat `%s'", file_name);
+ goto done;
+ }
+
+ if (! (S_ISREG (s.st_mode) || S_ISLNK (s.st_mode)))
+ {
+ error = clib_error_return (0, "not a regular file `%s'", file_name);
+ goto done;
+ }
+ }
+
+ unformat_init_unix_file (&sub_input, fd);
+
+ vlib_cli_input (vm, &sub_input, 0, 0);
+ unformat_free (&sub_input);
+
+ done:
+ if (fd > 0)
+ close (fd);
+ vec_free (file_name);
+
+ return error;
+}
+
+VLIB_CLI_COMMAND (cli_exec, static) = {
+ .path = "exec",
+ .short_help = "Execute commands from file",
+ .function = unix_cli_exec,
+};
+
+static clib_error_t *
+unix_show_errors (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ unix_main_t * um = &unix_main;
+ clib_error_t * error = 0;
+ int i, n_errors_to_show;
+ unix_error_history_t * unix_errors = 0;
+
+ n_errors_to_show = 1 << 30;
+
+ if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (! unformat (input, "%d", &n_errors_to_show))
+ {
+ error = clib_error_return (0, "expecting integer number of errors to show, got `%U'",
+ format_unformat_error, input);
+ goto done;
+ }
+ }
+
+ n_errors_to_show = clib_min (ARRAY_LEN (um->error_history), n_errors_to_show);
+
+ i = um->error_history_index > 0 ? um->error_history_index - 1 : ARRAY_LEN (um->error_history) - 1;
+
+ while (n_errors_to_show > 0)
+ {
+ unix_error_history_t * eh = um->error_history + i;
+
+ if (! eh->error)
+ break;
+
+ vec_add1 (unix_errors, eh[0]);
+ n_errors_to_show -= 1;
+ if (i == 0)
+ i = ARRAY_LEN (um->error_history) - 1;
+ else
+ i--;
+ }
+
+ if (vec_len (unix_errors) == 0)
+ vlib_cli_output (vm, "no Unix errors so far");
+ else
+ {
+ vlib_cli_output (vm, "%Ld total errors seen", um->n_total_errors);
+ for (i = vec_len (unix_errors) - 1; i >= 0; i--)
+ {
+ unix_error_history_t * eh = vec_elt_at_index (unix_errors, i);
+ vlib_cli_output (vm, "%U: %U",
+ format_time_interval, "h:m:s:u", eh->time,
+ format_clib_error, eh->error);
+ }
+ vlib_cli_output (vm, "%U: time now",
+ format_time_interval, "h:m:s:u", vlib_time_now (vm));
+ }
+
+ done:
+ vec_free (unix_errors);
+ return error;
+}
+
+VLIB_CLI_COMMAND (cli_unix_show_errors, static) = {
+ .path = "show unix-errors",
+ .short_help = "Show Unix system call error history",
+ .function = unix_show_errors,
+};
+
+static clib_error_t *
+unix_cli_init (vlib_main_t * vm)
+{
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (unix_cli_init);
diff --git a/vlib/vlib/unix/input.c b/vlib/vlib/unix/input.c
new file mode 100644
index 00000000000..ea10e4fc354
--- /dev/null
+++ b/vlib/vlib/unix/input.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * input.c: Unix file input
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <signal.h>
+
+/* FIXME autoconf */
+#define HAVE_LINUX_EPOLL
+
+#ifdef HAVE_LINUX_EPOLL
+
+#include <sys/epoll.h>
+
+typedef struct {
+ int epoll_fd;
+ struct epoll_event * epoll_events;
+
+ /* Statistics. */
+ u64 epoll_files_ready;
+ u64 epoll_waits;
+} linux_epoll_main_t;
+
+static linux_epoll_main_t linux_epoll_main;
+
+static void
+linux_epoll_file_update (unix_file_t * f,
+ unix_file_update_type_t update_type)
+{
+ unix_main_t * um = &unix_main;
+ linux_epoll_main_t * em = &linux_epoll_main;
+ struct epoll_event e;
+
+ memset (&e, 0, sizeof (e));
+
+ e.events = EPOLLIN;
+ if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE)
+ e.events |= EPOLLOUT;
+ e.data.u32 = f - um->file_pool;
+
+ if (epoll_ctl (em->epoll_fd,
+ (update_type == UNIX_FILE_UPDATE_ADD
+ ? EPOLL_CTL_ADD
+ : (update_type == UNIX_FILE_UPDATE_MODIFY
+ ? EPOLL_CTL_MOD
+ : EPOLL_CTL_DEL)),
+ f->file_descriptor,
+ &e) < 0)
+ clib_warning ("epoll_ctl");
+}
+
+static uword
+linux_epoll_input (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ unix_main_t * um = &unix_main;
+ linux_epoll_main_t * em = &linux_epoll_main;
+ struct epoll_event * e;
+ int n_fds_ready;
+
+ {
+ vlib_node_main_t * nm = &vm->node_main;
+ u64 t = nm->cpu_time_next_process_ready;
+ f64 timeout;
+ int timeout_ms, max_timeout_ms = 10;
+ f64 vector_rate = vlib_last_vectors_per_main_loop (vm);
+
+ if (t == ~0ULL)
+ {
+ timeout = 10e-3;
+ timeout_ms = max_timeout_ms;
+ }
+ else
+ {
+ timeout =
+ (((i64) t - (i64) clib_cpu_time_now ())
+ * vm->clib_time.seconds_per_clock)
+ /* subtract off some slop time */ - 50e-6;
+ timeout_ms = timeout * 1e3;
+
+ /* Must be between 1 and 10 ms. */
+ timeout_ms = clib_max (1, timeout_ms);
+ timeout_ms = clib_min (max_timeout_ms, timeout_ms);
+ }
+
+ /* If we still have input nodes polling (e.g. vnet packet generator)
+ don't sleep. */
+ if (nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] > 0)
+ timeout_ms = 0;
+
+ if (vector_rate > 1)
+ {
+ /* When busy don't wait & only epoll for input every 8 times
+ through main loop. */
+ timeout_ms = 0;
+ node->input_main_loops_per_call = 1024;
+ }
+ else
+ /* We're not busy; go to sleep for a while. */
+ node->input_main_loops_per_call = 0;
+
+ /* Allow any signal to wakeup our sleep. */
+ {
+ static sigset_t unblock_all_signals;
+ n_fds_ready = epoll_pwait (em->epoll_fd,
+ em->epoll_events,
+ vec_len (em->epoll_events),
+ timeout_ms,
+ &unblock_all_signals);
+
+ /* This kludge is necessary to run over absurdly old kernels */
+ if (n_fds_ready < 0 && errno == ENOSYS)
+ {
+ n_fds_ready = epoll_wait (em->epoll_fd,
+ em->epoll_events,
+ vec_len (em->epoll_events),
+ timeout_ms);
+ }
+ }
+ }
+
+ if (n_fds_ready < 0)
+ {
+ if (unix_error_is_fatal (errno))
+ vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait"));
+
+ /* non fatal error (e.g. EINTR). */
+ return 0;
+ }
+
+ em->epoll_waits += 1;
+ em->epoll_files_ready += n_fds_ready;
+
+ for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++)
+ {
+ u32 i = e->data.u32;
+ unix_file_t * f = pool_elt_at_index (um->file_pool, i);
+ clib_error_t * errors[4];
+ int n_errors = 0;
+
+ if (PREDICT_TRUE (! (e->events & EPOLLERR)))
+ {
+ if (e->events & EPOLLIN)
+ {
+ errors[n_errors] = f->read_function (f);
+ n_errors += errors[n_errors] != 0;
+ }
+ if (e->events & EPOLLOUT)
+ {
+ errors[n_errors] = f->write_function (f);
+ n_errors += errors[n_errors] != 0;
+ }
+ }
+ else
+ {
+ if (f->error_function)
+ {
+ errors[n_errors] = f->error_function (f);
+ n_errors += errors[n_errors] != 0;
+ }
+ }
+
+ ASSERT (n_errors < ARRAY_LEN (errors));
+ for (i = 0; i < n_errors; i++)
+ {
+ unix_save_error (um, errors[i]);
+ }
+ }
+
+ return 0;
+}
+
+VLIB_REGISTER_NODE (linux_epoll_input_node,static) = {
+ .function = linux_epoll_input,
+ .type = VLIB_NODE_TYPE_PRE_INPUT,
+ .name = "unix-epoll-input",
+};
+
+clib_error_t *
+linux_epoll_input_init (vlib_main_t * vm)
+{
+ linux_epoll_main_t * em = &linux_epoll_main;
+ unix_main_t * um = &unix_main;
+
+ /* Allocate some events. */
+ vec_resize (em->epoll_events, VLIB_FRAME_SIZE);
+
+ em->epoll_fd = epoll_create (vec_len (em->epoll_events));
+ if (em->epoll_fd < 0)
+ return clib_error_return_unix (0, "epoll_create");
+
+ um->file_update = linux_epoll_file_update;
+
+ return 0;
+}
+
+VLIB_INIT_FUNCTION (linux_epoll_input_init);
+
+#endif /* HAVE_LINUX_EPOLL */
+
+static clib_error_t *
+unix_input_init (vlib_main_t * vm)
+{
+ return vlib_call_init_function (vm, linux_epoll_input_init);
+}
+
+VLIB_INIT_FUNCTION (unix_input_init);
diff --git a/vlib/vlib/unix/main.c b/vlib/vlib/unix/main.c
new file mode 100644
index 00000000000..b85f3e73326
--- /dev/null
+++ b/vlib/vlib/unix/main.c
@@ -0,0 +1,471 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * main.c: Unix main routine
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/unix/plugin.h>
+
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <syslog.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+unix_main_t unix_main;
+
+static clib_error_t *
+unix_main_init (vlib_main_t * vm)
+{
+ unix_main_t * um = &unix_main;
+ um->vlib_main = vm;
+ return vlib_call_init_function (vm, unix_input_init);
+}
+
+VLIB_INIT_FUNCTION (unix_main_init);
+
+static void unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc)
+{
+ uword fatal;
+ u8 * msg = 0;
+
+ msg = format (msg, "received signal %U, PC %U",
+ format_signal, signum,
+ format_ucontext_pc, uc);
+
+ if (signum == SIGSEGV)
+ msg = format (msg, ", faulting address %p", si->si_addr);
+
+ switch (signum)
+ {
+ /* these (caught) signals cause the application to exit */
+ case SIGTERM:
+ if (unix_main.vlib_main->main_loop_exit_set)
+ {
+ syslog (LOG_ERR | LOG_DAEMON, "received SIGTERM, exiting...");
+
+ clib_longjmp (&unix_main.vlib_main->main_loop_exit,
+ VLIB_MAIN_LOOP_EXIT_CLI);
+ }
+ case SIGQUIT:
+ case SIGINT:
+ case SIGILL:
+ case SIGBUS:
+ case SIGSEGV:
+ case SIGHUP:
+ case SIGFPE:
+ fatal = 1;
+ break;
+
+ /* by default, print a message and continue */
+ default:
+ fatal = 0;
+ break;
+ }
+
+ /* Null terminate. */
+ vec_add1 (msg, 0);
+
+ if (fatal)
+ {
+ syslog (LOG_ERR | LOG_DAEMON, "%s", msg);
+ os_exit (1);
+ }
+ else
+ clib_warning ("%s", msg);
+
+ vec_free (msg);
+}
+
+static clib_error_t *
+setup_signal_handlers (unix_main_t * um)
+{
+ uword i;
+ struct sigaction sa;
+
+ for (i = 1; i < 32; i++)
+ {
+ memset (&sa, 0, sizeof (sa));
+ sa.sa_sigaction = (void *) unix_signal_handler;
+ sa.sa_flags = SA_SIGINFO;
+
+ switch (i)
+ {
+ /* these signals take the default action */
+ case SIGABRT:
+ case SIGKILL:
+ case SIGSTOP:
+ case SIGUSR1:
+ case SIGUSR2:
+ continue;
+
+ /* ignore SIGPIPE, SIGCHLD */
+ case SIGPIPE:
+ case SIGCHLD:
+ sa.sa_sigaction = (void *) SIG_IGN;
+ break;
+
+ /* catch and handle all other signals */
+ default:
+ break;
+ }
+
+ if (sigaction (i, &sa, 0) < 0)
+ return clib_error_return_unix (0, "sigaction %U", format_signal, i);
+ }
+
+ return 0;
+}
+
+static void unix_error_handler (void * arg, u8 * msg, int msg_len)
+{
+ unix_main_t * um = arg;
+
+ /* Echo to stderr when interactive. */
+ if (um->flags & UNIX_FLAG_INTERACTIVE)
+ {
+ CLIB_UNUSED (int r) = write (2, msg, msg_len);
+ }
+ else
+ {
+ char save = msg[msg_len - 1];
+
+ /* Null Terminate. */
+ msg[msg_len-1] = 0;
+
+ syslog (LOG_ERR | LOG_DAEMON, "%s", msg);
+
+ msg[msg_len-1] = save;
+ }
+}
+
+void vlib_unix_error_report (vlib_main_t * vm, clib_error_t * error)
+{
+ unix_main_t * um = &unix_main;
+
+ if (um->flags & UNIX_FLAG_INTERACTIVE || error == 0)
+ return;
+
+ {
+ char save;
+ u8 * msg;
+ u32 msg_len;
+
+ msg = error->what;
+ msg_len = vec_len(msg);
+
+ /* Null Terminate. */
+ save = msg[msg_len-1];
+ msg[msg_len-1] = 0;
+
+ syslog (LOG_ERR | LOG_DAEMON, "%s", msg);
+
+ msg[msg_len-1] = save;
+ }
+}
+
+static uword
+startup_config_process (vlib_main_t * vm,
+ vlib_node_runtime_t * rt,
+ vlib_frame_t * f)
+{
+ unix_main_t * um = &unix_main;
+ u8 * buf = 0;
+ uword l, n = 1;
+
+ vlib_process_suspend (vm, 2.0);
+
+ while (um->unix_config_complete == 0)
+ vlib_process_suspend (vm, 0.1);
+
+ if (um->startup_config_filename) {
+ unformat_input_t sub_input;
+ int fd;
+ struct stat s;
+ char *fn = (char *)um->startup_config_filename;
+
+ fd = open (fn, O_RDONLY);
+ if (fd < 0) {
+ clib_warning ("failed to open `%s'", fn);
+ return 0;
+ }
+
+ if (fstat (fd, &s) < 0) {
+ clib_warning ("failed to stat `%s'", fn);
+ bail:
+ close(fd);
+ return 0;
+ }
+
+ if (! (S_ISREG (s.st_mode) || S_ISLNK (s.st_mode))) {
+ clib_warning ("not a regular file: `%s'", fn);
+ goto bail;
+ }
+
+ while (n > 0)
+ {
+ l = vec_len (buf);
+ vec_resize (buf, 4096);
+ n = read (fd, buf + l, 4096);
+ if (n > 0)
+ {
+ _vec_len (buf) = l + n;
+ if (n < 4096)
+ break;
+ }
+ else
+ break;
+ }
+ if (um->log_fd && vec_len (buf))
+ {
+ u8 * lv = 0;
+ lv = format (lv, "%U: ***** Startup Config *****\n%v",
+ format_timeval,
+ 0 /* current bat-time */,
+ 0 /* current bat-format */,
+ buf);
+ {
+ int rv __attribute__((unused)) =
+ write (um->log_fd, lv, vec_len(lv));
+ }
+ vec_reset_length (lv);
+ lv = format (lv, "%U: ***** End Startup Config *****\n",
+ format_timeval,
+ 0 /* current bat-time */,
+ 0 /* current bat-format */);
+ {
+ int rv __attribute__((unused)) =
+ write (um->log_fd, lv, vec_len(lv));
+ }
+ vec_free (lv);
+ }
+
+ if (vec_len(buf))
+ {
+ unformat_init_vector (&sub_input, buf);
+ vlib_cli_input (vm, &sub_input, 0, 0);
+ /* frees buf for us */
+ unformat_free (&sub_input);
+ }
+ close(fd);
+ }
+ return 0;
+}
+
+VLIB_REGISTER_NODE (startup_config_node,static) = {
+ .function = startup_config_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "startup-config-process",
+};
+
+static clib_error_t *
+unix_config (vlib_main_t * vm, unformat_input_t * input)
+{
+ unix_main_t * um = &unix_main;
+ clib_error_t * error = 0;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ char * cli_prompt;
+ if (unformat (input, "interactive"))
+ um->flags |= UNIX_FLAG_INTERACTIVE;
+ else if (unformat (input, "nodaemon"))
+ um->flags |= UNIX_FLAG_NODAEMON;
+ else if (unformat (input, "cli-prompt %s", &cli_prompt))
+ vlib_unix_cli_set_prompt (cli_prompt);
+ else if (unformat (input, "cli-listen %s", &um->cli_listen_socket.config))
+ ;
+ else if (unformat (input, "cli-line-mode"))
+ um->cli_line_mode = 1;
+ else if (unformat (input, "cli-history-limit %d", &um->cli_history_limit))
+ ;
+ else if (unformat (input, "full-coredump"))
+ {
+ int fd;
+
+ fd = open ("/proc/self/coredump_filter", O_WRONLY);
+ if (fd > 0)
+ {
+ if (write (fd, "0x6f\n", 5) != 5)
+ clib_unix_warning ("coredump filter write failed!");
+ close(fd);
+ }
+ else
+ clib_unix_warning ("couldn't open /proc/self/coredump_filter");
+ }
+ else if (unformat (input, "startup-config %s",
+ &um->startup_config_filename))
+ ;
+ else if (unformat (input, "exec %s",
+ &um->startup_config_filename))
+ ;
+ else if (unformat (input, "log %s", &um->log_filename))
+ {
+ um->log_fd = open ((char *) um->log_filename,
+ O_CREAT | O_WRONLY | O_APPEND, 0644);
+ if (um->log_fd < 0)
+ {
+ clib_warning ("couldn't open log '%s'\n", um->log_filename);
+ um->log_fd = 0;
+ }
+ else
+ {
+ u8 * lv = 0;
+ lv = format (0, "%U: ***** Start: PID %d *****\n",
+ format_timeval,
+ 0 /* current bat-time */,
+ 0 /* current bat-format */,
+ getpid());
+ {
+ int rv __attribute__((unused)) =
+ write (um->log_fd, lv, vec_len(lv));
+ }
+ vec_free (lv);
+ }
+ }
+ else
+ return clib_error_return (0, "unknown input `%U'",
+ format_unformat_error, input);
+ }
+
+ if (! (um->flags & UNIX_FLAG_INTERACTIVE))
+ {
+ error = setup_signal_handlers (um);
+ if (error)
+ return error;
+
+ openlog (vm->name, LOG_CONS | LOG_PERROR | LOG_PID, LOG_DAEMON);
+ clib_error_register_handler (unix_error_handler, um);
+
+ if (! (um->flags & UNIX_FLAG_NODAEMON)
+ && daemon (/* chdir to / */ 0,
+ /* stdin/stdout/stderr -> /dev/null */ 0) < 0)
+ clib_error_return (0, "daemon () fails");
+ }
+ um->unix_config_complete = 1;
+
+ return 0;
+}
+
+/* unix { ... } configuration. */
+VLIB_CONFIG_FUNCTION (unix_config, "unix");
+
+static clib_error_t *
+unix_exit (vlib_main_t * vm)
+{
+ /* Close syslog connection. */
+ closelog ();
+ return 0;
+}
+
+VLIB_MAIN_LOOP_EXIT_FUNCTION (unix_exit);
+
+u8 **vlib_thread_stacks;
+
+static char **argv_global;
+
+static uword thread0 (uword arg)
+{
+ vlib_main_t * vm = (vlib_main_t *)arg;
+ unformat_input_t input;
+ int i;
+
+ unformat_init_command_line (&input, argv_global);
+ i = vlib_main (vm, &input);
+ unformat_free (&input);
+
+ return i;
+ }
+
+int vlib_unix_main (int argc, char * argv[])
+{
+ vlib_main_t * vm = &vlib_global_main; /* one and only time for this! */
+
+ clib_smp_main_t * sm = &clib_smp_main;
+ vlib_thread_main_t * tm = &vlib_thread_main;
+ unformat_input_t input;
+ u8 * thread_stacks;
+ clib_error_t * e;
+ int i;
+
+ argv_global = argv;
+ vm->name = argv[0];
+ vm->heap_base = clib_mem_get_heap ();
+ ASSERT(vm->heap_base);
+
+ i = vlib_plugin_early_init (vm);
+ if (i)
+ return i;
+
+ unformat_init_command_line (&input, argv_global);
+ vm->init_functions_called = hash_create (0, /* value bytes */ 0);
+ e = vlib_call_all_config_functions (vm, &input, 1 /* early */);
+ if (e != 0)
+ {
+ clib_error_report(e);
+ return 1;
+ }
+ unformat_free (&input);
+
+ /* allocate N x 1mb stacks, aligned e.g. to a 16mb boundary */
+ thread_stacks = clib_mem_alloc_aligned
+ (tm->n_thread_stacks * VLIB_THREAD_STACK_SIZE,
+ (VLIB_MAX_CPUS << VLIB_LOG2_THREAD_STACK_SIZE));
+
+ sm->vm_base = thread_stacks;
+ sm->log2_n_per_cpu_vm_bytes = VLIB_LOG2_THREAD_STACK_SIZE;
+
+ vec_validate (vlib_thread_stacks, tm->n_thread_stacks - 1);
+ for (i = 0; i < vec_len (vlib_thread_stacks); i++)
+ {
+ vlib_thread_stacks[i] = thread_stacks;
+
+ /*
+ * Disallow writes to the bottom page of the stack, to
+ * catch stack overflows.
+ */
+ if (mprotect (thread_stacks, 4096, PROT_READ) < 0)
+ clib_unix_warning ("thread stack");
+
+ thread_stacks += VLIB_THREAD_STACK_SIZE;
+ }
+
+ i = clib_calljmp (thread0, (uword) vm,
+ (void *)(vlib_thread_stacks[0] + VLIB_THREAD_STACK_SIZE));
+ return i;
+}
diff --git a/vlib/vlib/unix/mc_socket.c b/vlib/vlib/unix/mc_socket.c
new file mode 100644
index 00000000000..1169203f855
--- /dev/null
+++ b/vlib/vlib/unix/mc_socket.c
@@ -0,0 +1,972 @@
+/*
+ * mc_socket.c: socket based multicast for vlib mc
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/unix/mc_socket.h>
+
+#include <sys/ioctl.h> /* for FIONBIO */
+#include <netinet/tcp.h> /* for TCP_NODELAY */
+#include <net/if.h> /* for struct ifreq */
+
+static u8 * format_socket_peer_id (u8 * s, va_list * args)
+{
+ u64 peer_id_as_u64 = va_arg (*args, u64);
+ mc_peer_id_t peer_id;
+ peer_id.as_u64 = peer_id_as_u64;
+ u32 a = mc_socket_peer_id_get_address (peer_id);
+ u32 p = mc_socket_peer_id_get_port (peer_id);
+
+ s = format (s, "%U:%04x", format_network_address, AF_INET, &a,
+ ntohs (p));
+
+ return s;
+}
+
+typedef void (mc_msg_handler_t) (mc_main_t * mcm, void * msg, u32 buffer_index);
+
+always_inline void msg_handler (mc_main_t * mcm,
+ u32 buffer_index,
+ u32 handler_frees_buffer,
+ void * _h)
+{
+ vlib_main_t * vm = mcm->vlib_main;
+ mc_msg_handler_t * h = _h;
+ vlib_buffer_t * b = vlib_get_buffer (vm, buffer_index);
+ void * the_msg = vlib_buffer_get_current (b);
+
+ h (mcm, the_msg, buffer_index);
+ if (! handler_frees_buffer)
+ vlib_buffer_free_one (vm, buffer_index);
+}
+
+static uword
+append_buffer_index_to_iovec (vlib_main_t * vm,
+ u32 buffer_index,
+ struct iovec ** iovs_return)
+{
+ struct iovec * i;
+ vlib_buffer_t * b;
+ u32 bi = buffer_index;
+ u32 l = 0;
+
+ while (1)
+ {
+ b = vlib_get_buffer (vm, bi);
+ vec_add2 (*iovs_return, i, 1);
+ i->iov_base = vlib_buffer_get_current (b);
+ i->iov_len = b->current_length;
+ l += i->iov_len;
+ if (! (b->flags & VLIB_BUFFER_NEXT_PRESENT))
+ break;
+ bi = b->next_buffer;
+ }
+
+ return l;
+}
+
+static clib_error_t *
+sendmsg_helper (mc_socket_main_t * msm,
+ int socket,
+ struct sockaddr_in * tx_addr,
+ u32 buffer_index)
+{
+ vlib_main_t * vm = msm->mc_main.vlib_main;
+ struct msghdr h;
+ word n_bytes, n_bytes_tx, n_retries;
+
+ memset (&h, 0, sizeof (h));
+ h.msg_name = tx_addr;
+ h.msg_namelen = sizeof (tx_addr[0]);
+
+ if (msm->iovecs)
+ _vec_len (msm->iovecs) = 0;
+
+ n_bytes = append_buffer_index_to_iovec (vm, buffer_index, &msm->iovecs);
+ ASSERT (n_bytes <= msm->mc_main.transport.max_packet_size);
+ if (n_bytes > msm->mc_main.transport.max_packet_size)
+ clib_error ("sending packet larger than interace MTU %d bytes", n_bytes);
+
+ h.msg_iov = msm->iovecs;
+ h.msg_iovlen = vec_len (msm->iovecs);
+
+ n_retries = 0;
+ while ((n_bytes_tx = sendmsg (socket, &h, /* flags */ 0)) != n_bytes
+ && errno == EAGAIN)
+ n_retries++;
+ if (n_bytes_tx != n_bytes)
+ {
+ clib_unix_warning ("sendmsg");
+ return 0;
+ }
+ if (n_retries)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "sendmsg-helper: %d retries",
+ .format_args = "i4",
+ };
+ struct { u32 retries; } * ed = 0;
+
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->retries = n_retries;
+ }
+ return 0;
+}
+
+static clib_error_t *
+tx_buffer (void * transport, mc_transport_type_t type, u32 buffer_index)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)transport;
+ vlib_main_t * vm = msm->mc_main.vlib_main;
+ mc_multicast_socket_t * ms = &msm->multicast_sockets[type];
+ clib_error_t * error;
+ error = sendmsg_helper (msm, ms->socket, &ms->tx_addr, buffer_index);
+ if (type != MC_TRANSPORT_USER_REQUEST_TO_RELAY)
+ vlib_buffer_free_one (vm, buffer_index);
+ return error;
+}
+
+static clib_error_t *
+tx_ack (void *transport, mc_peer_id_t dest_peer_id, u32 buffer_index)
+{
+ struct sockaddr_in tx_addr;
+ mc_socket_main_t *msm = (mc_socket_main_t *)transport;
+ vlib_main_t * vm = msm->mc_main.vlib_main;
+ clib_error_t * error;
+
+ memset (&tx_addr, 0, sizeof (tx_addr));
+ tx_addr.sin_family = AF_INET;
+ tx_addr.sin_addr.s_addr = mc_socket_peer_id_get_address (dest_peer_id);
+ tx_addr.sin_port = mc_socket_peer_id_get_port (dest_peer_id);
+
+ error = sendmsg_helper (msm, msm->ack_socket, &tx_addr, buffer_index);
+ vlib_buffer_free_one (vm, buffer_index);
+ return error;
+}
+
+static clib_error_t *
+recvmsg_helper (mc_socket_main_t * msm,
+ int socket,
+ struct sockaddr_in * rx_addr,
+ u32 * buffer_index,
+ u32 drop_message)
+{
+ vlib_main_t * vm = msm->mc_main.vlib_main;
+ vlib_buffer_t * b;
+ uword n_left, n_alloc, n_mtu, i, i_rx;
+ const uword buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
+ word n_bytes_left;
+
+ /* Make sure we have at least a MTU worth of buffers. */
+ n_mtu = msm->rx_mtu_n_buffers;
+ n_left = vec_len (msm->rx_buffers);
+ if (n_left < n_mtu)
+ {
+ uword max_alloc = 8 * n_mtu;
+ vec_validate (msm->rx_buffers, max_alloc - 1);
+ n_alloc = vlib_buffer_alloc (vm, msm->rx_buffers + n_left, max_alloc - n_left);
+ _vec_len (msm->rx_buffers) = n_left + n_alloc;
+ }
+
+ ASSERT (vec_len (msm->rx_buffers) >= n_mtu);
+ vec_validate (msm->iovecs, n_mtu - 1);
+
+ /* Allocate RX buffers from end of rx_buffers.
+ Turn them into iovecs to pass to readv. */
+ i_rx = vec_len (msm->rx_buffers) - 1;
+ for (i = 0; i < n_mtu; i++)
+ {
+ b = vlib_get_buffer (vm, msm->rx_buffers[i_rx - i]);
+ msm->iovecs[i].iov_base = b->data;
+ msm->iovecs[i].iov_len = buffer_size;
+ }
+ _vec_len (msm->iovecs) = n_mtu;
+
+ {
+ struct msghdr h;
+
+ memset (&h, 0, sizeof (h));
+ if (rx_addr)
+ {
+ h.msg_name = rx_addr;
+ h.msg_namelen = sizeof (rx_addr[0]);
+ }
+ h.msg_iov = msm->iovecs;
+ h.msg_iovlen = vec_len (msm->iovecs);
+
+ n_bytes_left = recvmsg (socket, &h, 0);
+ if (n_bytes_left < 0)
+ return clib_error_return_unix (0, "recvmsg");
+ }
+
+ if (drop_message)
+ {
+ *buffer_index = ~0;
+ return 0;
+ }
+
+ *buffer_index = msm->rx_buffers[i_rx];
+ while (1)
+ {
+ b = vlib_get_buffer (vm, msm->rx_buffers[i_rx]);
+
+ b->flags = 0;
+ b->current_data = 0;
+ b->current_length = n_bytes_left < buffer_size ? n_bytes_left : buffer_size;
+
+ n_bytes_left -= buffer_size;
+
+ if (n_bytes_left <= 0)
+ break;
+
+ i_rx--;
+ b->flags |= VLIB_BUFFER_NEXT_PRESENT;
+ b->next_buffer = msm->rx_buffers[i_rx];
+ }
+
+ _vec_len (msm->rx_buffers) = i_rx;
+
+ return 0 /* no error */;
+}
+
+static clib_error_t * mastership_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_main_t * mcm = &msm->mc_main;
+ mc_multicast_socket_t * ms = &msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP];
+ clib_error_t * error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */ 0);
+ if (! error)
+ msg_handler (mcm, bi,
+ /* handler_frees_buffer */ 0,
+ mc_msg_master_assert_handler);
+
+ return error;
+}
+
+static clib_error_t * to_relay_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ vlib_main_t * vm = msm->mc_main.vlib_main;
+ mc_multicast_socket_t * ms_to_relay = &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY];
+ mc_multicast_socket_t * ms_from_relay = &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
+ clib_error_t * error;
+ u32 bi;
+ u32 is_master = mcm->relay_state == MC_RELAY_STATE_MASTER;
+
+ /* Not the ordering master? Turf the msg */
+ error = recvmsg_helper (msm, ms_to_relay->socket, /* rx_addr */ 0, &bi,
+ /* drop_message */ ! is_master);
+
+ /* If we are the master, number and rebroadcast the msg. */
+ if (! error && is_master)
+ {
+ vlib_buffer_t * b = vlib_get_buffer (vm, bi);
+ mc_msg_user_request_t * mp = vlib_buffer_get_current (b);
+ mp->global_sequence = clib_host_to_net_u32 (mcm->relay_global_sequence);
+ mcm->relay_global_sequence++;
+ error = sendmsg_helper (msm, ms_from_relay->socket, &ms_from_relay->tx_addr, bi);
+ vlib_buffer_free_one (vm, bi);
+ }
+
+ return error;
+}
+
+static clib_error_t * from_relay_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_main_t * mcm = &msm->mc_main;
+ mc_multicast_socket_t * ms = &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY];
+ clib_error_t * error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */ 0);
+ if (! error)
+ {
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 1,
+ mc_msg_user_request_handler);
+ }
+ return error;
+}
+
+static clib_error_t * join_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_main_t * mcm = &msm->mc_main;
+ vlib_main_t * vm = mcm->vlib_main;
+ mc_multicast_socket_t * ms = &msm->multicast_sockets[MC_TRANSPORT_JOIN];
+ clib_error_t * error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */ 0);
+ if (! error)
+ {
+ vlib_buffer_t * b = vlib_get_buffer (vm, bi);
+ mc_msg_join_or_leave_request_t * mp = vlib_buffer_get_current (b);
+
+ switch (clib_host_to_net_u32 (mp->type))
+ {
+ case MC_MSG_TYPE_join_or_leave_request:
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
+ mc_msg_join_or_leave_request_handler);
+ break;
+
+ case MC_MSG_TYPE_join_reply:
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
+ mc_msg_join_reply_handler);
+ break;
+
+ default:
+ ASSERT (0);
+ break;
+ }
+ }
+ return error;
+}
+
+static clib_error_t * ack_socket_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_main_t * mcm = &msm->mc_main;
+ clib_error_t * error;
+ u32 bi;
+
+ error = recvmsg_helper (msm, msm->ack_socket, /* rx_addr */ 0, &bi, /* drop_message */ 0);
+ if (! error)
+ msg_handler (mcm, bi, /* handler_frees_buffer */ 0,
+ mc_msg_user_ack_handler);
+ return error;
+}
+
+static void catchup_cleanup (mc_socket_main_t *msm,
+ mc_socket_catchup_t *c,
+ unix_main_t *um, unix_file_t *uf)
+{
+ hash_unset (msm->catchup_index_by_file_descriptor, uf->file_descriptor);
+ unix_file_del (um, uf);
+ vec_free (c->input_vector);
+ vec_free (c->output_vector);
+ pool_put (msm->catchups, c);
+}
+
+static mc_socket_catchup_t *
+find_catchup_from_file_descriptor (mc_socket_main_t * msm, int file_descriptor)
+{
+ uword * p = hash_get (msm->catchup_index_by_file_descriptor, file_descriptor);
+ return p ? pool_elt_at_index (msm->catchups, p[0]) : 0;
+}
+
+static clib_error_t * catchup_socket_read_ready (unix_file_t * uf, int is_server)
+{
+ unix_main_t * um = &unix_main;
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_main_t *mcm = &msm->mc_main;
+ mc_socket_catchup_t * c = find_catchup_from_file_descriptor (msm, uf->file_descriptor);
+ word l, n, is_eof;
+
+ l = vec_len (c->input_vector);
+ vec_resize (c->input_vector, 4096);
+ n = read (uf->file_descriptor, c->input_vector + l, vec_len (c->input_vector) - l);
+ is_eof = n == 0;
+
+ if (n < 0)
+ {
+ if (errno == EAGAIN)
+ n = 0;
+ else
+ {
+ catchup_cleanup (msm, c, um, uf);
+ return clib_error_return_unix (0, "read");
+ }
+ }
+
+ _vec_len (c->input_vector) = l + n;
+
+ if (is_eof && vec_len (c->input_vector) > 0)
+ {
+ if (is_server)
+ {
+ mc_msg_catchup_request_handler (mcm, (void *) c->input_vector, c - msm->catchups);
+ _vec_len (c->input_vector) = 0;
+ }
+ else
+ {
+ mc_msg_catchup_reply_handler (mcm, (void *) c->input_vector, c - msm->catchups);
+ c->input_vector = 0; /* reply handler is responsible for freeing vector */
+ catchup_cleanup (msm, c, um, uf);
+ }
+ }
+
+ return 0 /* no error */;
+}
+
+static clib_error_t * catchup_server_read_ready (unix_file_t * uf)
+{ return catchup_socket_read_ready (uf, /* is_server */ 1); }
+
+static clib_error_t * catchup_client_read_ready (unix_file_t * uf)
+{
+ if (MC_EVENT_LOGGING)
+ {
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ vlib_main_t * vm = msm->mc_main.vlib_main;
+
+ ELOG_TYPE (e, "catchup_client_read_ready");
+ ELOG (&vm->elog_main, e, 0);
+ }
+ return catchup_socket_read_ready (uf, /* is_server */ 0);
+}
+
+static clib_error_t *
+catchup_socket_write_ready (unix_file_t * uf, int is_server)
+{
+ unix_main_t * um = &unix_main;
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_socket_catchup_t *c = find_catchup_from_file_descriptor (msm, uf->file_descriptor);
+ clib_error_t * error = 0;
+ int n;
+
+ if (c->connect_in_progress)
+ {
+ u32 len, value;
+
+ c->connect_in_progress = 0;
+ len = sizeof (value);
+ if (getsockopt (c->socket, SOL_SOCKET,
+ SO_ERROR, &value, &len) < 0)
+ {
+ error = clib_error_return_unix (0, "getsockopt SO_ERROR");
+ goto error_quit;
+ }
+ if (value != 0)
+ {
+ error = clib_error_return_code (0, value, CLIB_ERROR_ERRNO_VALID, "connect fails");
+ goto error_quit;
+ }
+ }
+
+ while (1)
+ {
+ u32 n_this_write;
+
+ n_this_write =
+ clib_min (vec_len (c->output_vector) - c->output_vector_n_written,
+ msm->rx_mtu_n_bytes - 64 /* ip + tcp + option allowance */);
+
+ if (n_this_write <= 0)
+ break;
+
+ do {
+ n = write (uf->file_descriptor,
+ c->output_vector + c->output_vector_n_written,
+ n_this_write);
+ } while (n < 0 && errno == EAGAIN);
+
+ if (n < 0)
+ {
+ error = clib_error_return_unix (0, "write");
+ goto error_quit;
+ }
+ c->output_vector_n_written += n;
+ }
+
+ if (c->output_vector_n_written >= vec_len (c->output_vector))
+ {
+ if (! is_server)
+ {
+ uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ /* Send EOF to other side. */
+ shutdown (uf->file_descriptor, SHUT_WR);
+ return error;
+ }
+ else
+ {
+ error_quit:
+ catchup_cleanup (msm, c, um, uf);
+ }
+ }
+ return error;
+}
+
+static clib_error_t *
+catchup_server_write_ready (unix_file_t * uf)
+{ return catchup_socket_write_ready (uf, /* is_server */ 1); }
+
+static clib_error_t *
+catchup_client_write_ready (unix_file_t * uf)
+{ return catchup_socket_write_ready (uf, /* is_server */ 0); }
+
+static clib_error_t *catchup_socket_error_ready (unix_file_t *uf)
+{
+ unix_main_t *um = &unix_main;
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ mc_socket_catchup_t *c = find_catchup_from_file_descriptor (msm, uf->file_descriptor);
+ catchup_cleanup (msm, c, um, uf);
+ return clib_error_return (0, "error");
+}
+
+static clib_error_t *catchup_listen_read_ready (unix_file_t * uf)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data;
+ struct sockaddr_in client_addr;
+ int client_len;
+ mc_socket_catchup_t *c;
+ unix_file_t template = {0};
+
+ pool_get (msm->catchups, c);
+ memset(c, 0, sizeof (c[0]));
+
+ client_len = sizeof(client_addr);
+
+ /* Acquires the non-blocking attrib from the server socket. */
+ c->socket = accept (uf->file_descriptor,
+ (struct sockaddr *)&client_addr,
+ (socklen_t *)&client_len);
+
+ if (c->socket < 0)
+ {
+ pool_put (msm->catchups, c);
+ return clib_error_return_unix (0, "accept");
+ }
+
+ if (MC_EVENT_LOGGING)
+ {
+ mc_main_t * mcm = &msm->mc_main;
+ vlib_main_t * vm = mcm->vlib_main;
+
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "catchup accepted from 0x%lx",
+ .format_args = "i4",
+ };
+ struct { u32 addr; } * ed = 0;
+
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->addr = ntohl(client_addr.sin_addr.s_addr);
+ }
+
+ /* Disable the Nagle algorithm, ship catchup pkts immediately */
+ {
+ int one = 1;
+ if ((setsockopt(c->socket, IPPROTO_TCP,
+ TCP_NODELAY, (void *)&one, sizeof(one))) < 0) {
+ clib_unix_warning("catchup socket: set TCP_NODELAY");
+ }
+ }
+
+ template.read_function = catchup_server_read_ready;
+ template.write_function = catchup_server_write_ready;
+ template.error_function = catchup_socket_error_ready;
+ template.file_descriptor = c->socket;
+ template.private_data = pointer_to_uword (msm);
+ c->unix_file_index = unix_file_add (&unix_main, &template);
+ hash_set (msm->catchup_index_by_file_descriptor, c->socket, c - msm->catchups);
+
+ return 0;
+}
+
+/* Return and bind to an unused port. */
+static word find_and_bind_to_free_port (word sock, word port)
+{
+ for (; port < 1 << 16; port++)
+ {
+ struct sockaddr_in a;
+
+ memset (&a, 0, sizeof(a)); /* Warnings be gone */
+
+ a.sin_family = PF_INET;
+ a.sin_addr.s_addr = INADDR_ANY;
+ a.sin_port = htons (port);
+
+ if (bind (sock, (struct sockaddr *) &a, sizeof (a)) >= 0)
+ break;
+ }
+
+ return port < 1 << 16 ? port : -1;
+}
+
+static clib_error_t *
+setup_mutlicast_socket (mc_socket_main_t * msm,
+ mc_multicast_socket_t * ms,
+ char * type,
+ uword udp_port)
+{
+ int one = 1;
+ struct ip_mreq mcast_req;
+
+ if (! msm->multicast_ttl)
+ msm->multicast_ttl = 1;
+
+ /* mastership (multicast) TX socket */
+ if ((ms->socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0)
+ return clib_error_return_unix(0, "%s socket", type);
+
+ {
+ u8 ttl = msm->multicast_ttl;
+
+ if ((setsockopt(ms->socket, IPPROTO_IP,
+ IP_MULTICAST_TTL, (void *)&ttl, sizeof(ttl))) < 0)
+ return clib_error_return_unix(0, "%s set multicast ttl", type);
+ }
+
+ if (setsockopt(ms->socket, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
+ return clib_error_return_unix (0, "%s setsockopt SO_REUSEADDR", type);
+
+ memset (&ms->tx_addr, 0, sizeof (ms->tx_addr));
+ ms->tx_addr.sin_family = AF_INET;
+ ms->tx_addr.sin_addr.s_addr = htonl (msm->multicast_tx_ip4_address_host_byte_order);
+ ms->tx_addr.sin_port = htons (udp_port);
+
+ if (bind(ms->socket, (struct sockaddr *)&ms->tx_addr,
+ sizeof (ms->tx_addr)) < 0)
+ return clib_error_return_unix(0, "%s bind", type);
+
+ memset (&mcast_req, 0, sizeof (mcast_req));
+ mcast_req.imr_multiaddr.s_addr = htonl (msm->multicast_tx_ip4_address_host_byte_order);
+ mcast_req.imr_interface.s_addr = msm->if_ip4_address_net_byte_order;
+
+ if ((setsockopt(ms->socket, IPPROTO_IP,
+ IP_ADD_MEMBERSHIP, (void *)&mcast_req,
+ sizeof (mcast_req))) < 0)
+ return clib_error_return_unix(0, "%s IP_ADD_MEMBERSHIP setsockopt", type);
+
+ if (ioctl (ms->socket, FIONBIO, &one) < 0)
+ return clib_error_return_unix (0, "%s set FIONBIO", type);
+
+ /* FIXME remove this when we support tx_ready. */
+ {
+ u32 len = 1 << 20;
+ socklen_t sl = sizeof (len);
+ if (setsockopt(ms->socket, SOL_SOCKET, SO_SNDBUF, &len, sl) < 0)
+ clib_unix_error ("setsockopt");
+ }
+
+ return 0;
+}
+
+static clib_error_t *
+socket_setup (mc_socket_main_t *msm)
+{
+ int one = 1;
+ clib_error_t * error;
+ u32 port;
+
+ if (! msm->base_multicast_udp_port_host_byte_order)
+ msm->base_multicast_udp_port_host_byte_order =
+ 0xffff - ((MC_N_TRANSPORT_TYPE + 2 /* ack socket, catchup socket */)
+ - 1);
+
+ port = msm->base_multicast_udp_port_host_byte_order;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP],
+ "mastership",
+ port++);
+ if (error)
+ return error;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets[MC_TRANSPORT_JOIN],
+ "join",
+ port++);
+ if (error)
+ return error;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY],
+ "to relay",
+ port++);
+ if (error)
+ return error;
+
+ error = setup_mutlicast_socket (msm,
+ &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY],
+ "from relay",
+ port++);
+ if (error)
+ return error;
+
+ /* ACK rx socket */
+ msm->ack_socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (msm->ack_socket < 0)
+ return clib_error_return_unix(0, "ack socket");
+
+ msm->ack_udp_port = find_and_bind_to_free_port (msm->ack_socket, port++);
+
+ if (ioctl (msm->ack_socket, FIONBIO, &one) < 0)
+ return clib_error_return_unix (0, "ack socket FIONBIO");
+
+ msm->catchup_server_socket = socket(AF_INET, SOCK_STREAM, 0);
+ if (msm->catchup_server_socket < 0)
+ return clib_error_return_unix (0, "catchup server socket");
+
+ msm->catchup_tcp_port = find_and_bind_to_free_port (msm->catchup_server_socket, port++);
+
+ if (ioctl (msm->catchup_server_socket, FIONBIO, &one) < 0)
+ return clib_error_return_unix (0, "catchup server socket FIONBIO");
+
+ if (listen(msm->catchup_server_socket, 5) < 0)
+ return clib_error_return_unix (0, "catchup server socket listen");
+
+ /* epoll setup for multicast mastership socket */
+ {
+ unix_file_t template = {0};
+
+ template.read_function = mastership_socket_read_ready;
+ template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP].socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ /* epoll setup for multicast to_relay socket */
+ template.read_function = to_relay_socket_read_ready;
+ template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY].socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ /* epoll setup for multicast from_relay socket */
+ template.read_function = from_relay_socket_read_ready;
+ template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY].socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ template.read_function = join_socket_read_ready;
+ template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_JOIN].socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ /* epoll setup for ack rx socket */
+ template.read_function = ack_socket_read_ready;
+ template.file_descriptor = msm->ack_socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+
+ /* epoll setup for TCP catchup server */
+ template.read_function = catchup_listen_read_ready;
+ template.file_descriptor = msm->catchup_server_socket;
+ template.private_data = (uword) msm;
+ unix_file_add (&unix_main, &template);
+ }
+
+ return 0;
+}
+
+static void *
+catchup_add_pending_output (mc_socket_catchup_t * c, uword n_bytes, u8 * set_output_vector)
+{
+ unix_file_t * uf = pool_elt_at_index (unix_main.file_pool,
+ c->unix_file_index);
+ u8 * result=0;
+
+ if (set_output_vector)
+ c->output_vector = set_output_vector;
+ else
+ vec_add2 (c->output_vector, result, n_bytes);
+ if (vec_len (c->output_vector) > 0)
+ {
+ int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ if (! skip_update)
+ unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+ return result;
+}
+
+static uword catchup_request_fun (void *transport_main,
+ u32 stream_index,
+ mc_peer_id_t catchup_peer_id)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)transport_main;
+ mc_main_t * mcm = &msm->mc_main;
+ vlib_main_t * vm = mcm->vlib_main;
+ mc_socket_catchup_t *c;
+ struct sockaddr_in addr;
+ unix_main_t *um = &unix_main;
+ int one = 1;
+
+ pool_get (msm->catchups, c);
+ memset (c, 0, sizeof (*c));
+
+ c->socket = socket(AF_INET, SOCK_STREAM, 0);
+ if (c->socket < 0)
+ {
+ clib_unix_warning ("socket");
+ return 0;
+ }
+
+ if (ioctl (c->socket, FIONBIO, &one) < 0)
+ {
+ clib_unix_warning ("FIONBIO");
+ return 0;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = mc_socket_peer_id_get_address (catchup_peer_id);
+ addr.sin_port = mc_socket_peer_id_get_port (catchup_peer_id);
+
+ c->connect_in_progress = 1;
+
+ if (MC_EVENT_LOGGING)
+ {
+ ELOG_TYPE_DECLARE (e) = {
+ .format = "connecting to peer 0x%Lx",
+ .format_args = "i8",
+ };
+ struct { u64 peer; } * ed;
+ ed = ELOG_DATA (&vm->elog_main, e);
+ ed->peer = catchup_peer_id.as_u64;
+ }
+
+ if (connect(c->socket, (const void *)&addr,sizeof(addr))
+ < 0 && errno != EINPROGRESS)
+ {
+ clib_unix_warning ("connect to %U fails",
+ format_socket_peer_id, catchup_peer_id);
+ return 0;
+ }
+
+ {
+ unix_file_t template = {0};
+
+ template.read_function = catchup_client_read_ready;
+ template.write_function = catchup_client_write_ready;
+ template.error_function = catchup_socket_error_ready;
+ template.file_descriptor = c->socket;
+ template.private_data = (uword) msm;
+ c->unix_file_index = unix_file_add (um, &template);
+
+ hash_set (msm->catchup_index_by_file_descriptor, c->socket, c - msm->catchups);
+ }
+
+ {
+ mc_msg_catchup_request_t * mp;
+ mp = catchup_add_pending_output (c, sizeof (mp[0]), /* set_output_vector */ 0);
+ mp->peer_id = msm->mc_main.transport.our_catchup_peer_id;
+ mp->stream_index = stream_index;
+ mc_byte_swap_msg_catchup_request (mp);
+ }
+
+ return c - msm->catchups;
+}
+
+static void catchup_send_fun (void *transport_main, uword opaque, u8 * data)
+{
+ mc_socket_main_t *msm = (mc_socket_main_t *)transport_main;
+ mc_socket_catchup_t *c = pool_elt_at_index (msm->catchups, opaque);
+ catchup_add_pending_output (c, 0, data);
+}
+
+static int
+find_interface_ip4_address (char * if_name, u32 * ip4_address, u32 * mtu)
+{
+ int fd;
+ struct ifreq ifr;
+ struct sockaddr_in * sa;
+
+ /* Dig up our IP address */
+ fd = socket (PF_INET, AF_INET, 0);
+ if (fd < 0) {
+ clib_unix_error ("socket");
+ return -1;
+ }
+
+ ifr.ifr_addr.sa_family = AF_INET;
+ strncpy (ifr.ifr_name, if_name, sizeof(ifr.ifr_name)-1);
+ if (ioctl (fd, SIOCGIFADDR, &ifr) < 0) {
+ clib_unix_error ("ioctl(SIOCFIGADDR)");
+ return -1;
+ }
+
+ sa = (void *) &ifr.ifr_addr;
+ memcpy (ip4_address, &sa->sin_addr.s_addr, sizeof (ip4_address[0]));
+
+ if (ioctl (fd, SIOCGIFMTU, &ifr) < 0)
+ return -1;
+ if (mtu)
+ *mtu = ifr.ifr_mtu - (/* IP4 header */ 20 + /* UDP header */ 8);
+
+ close (fd);
+
+ return 0;
+}
+
+clib_error_t *
+mc_socket_main_init (mc_socket_main_t * msm, char **intfc_probe_list,
+ int n_intfcs_to_probe)
+{
+ clib_error_t * error;
+ mc_main_t * mcm;
+ u32 mtu;
+
+ mcm = &msm->mc_main;
+
+ /* 239.255.0.7 */
+ if (! msm->multicast_tx_ip4_address_host_byte_order)
+ msm->multicast_tx_ip4_address_host_byte_order = 0xefff0007;
+
+ {
+ u32 i, a, win;
+
+ win = 0;
+ if (msm->multicast_interface_name)
+ {
+ win = ! find_interface_ip4_address (msm->multicast_interface_name, &a, &mtu);
+ }
+ else
+ {
+ for (i = 0; i < n_intfcs_to_probe; i++)
+ if (! find_interface_ip4_address (intfc_probe_list[i], &a, &mtu))
+ {
+ win = 1;
+ msm->multicast_interface_name = intfc_probe_list[i];
+ break;
+ }
+ }
+
+ if (! win)
+ return clib_error_return (0, "can't find interface ip4 address");
+
+ msm->if_ip4_address_net_byte_order = a;
+ }
+
+ msm->rx_mtu_n_bytes = mtu;
+ msm->rx_mtu_n_buffers = msm->rx_mtu_n_bytes / VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES;
+ msm->rx_mtu_n_buffers += (msm->rx_mtu_n_bytes % VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES) != 0;
+
+ error = socket_setup (msm);
+ if (error)
+ return error;
+
+ mcm->transport.our_ack_peer_id =
+ mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order, msm->ack_udp_port);
+
+ mcm->transport.our_catchup_peer_id =
+ mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order, msm->catchup_tcp_port);
+
+ mcm->transport.tx_buffer = tx_buffer;
+ mcm->transport.tx_ack = tx_ack;
+ mcm->transport.catchup_request_fun = catchup_request_fun;
+ mcm->transport.catchup_send_fun = catchup_send_fun;
+ mcm->transport.format_peer_id = format_socket_peer_id;
+ mcm->transport.opaque = msm;
+ mcm->transport.max_packet_size = mtu;
+
+ mc_main_init (mcm, "socket");
+
+ return error;
+}
diff --git a/vlib/vlib/unix/mc_socket.h b/vlib/vlib/unix/mc_socket.h
new file mode 100644
index 00000000000..7dd6b5e27b1
--- /dev/null
+++ b/vlib/vlib/unix/mc_socket.h
@@ -0,0 +1,126 @@
+/*
+ * mc_socket.h: socket based multicast for vlib mc
+ *
+ * Copyright (c) 2010 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_mc_socket_h__
+#define __included_mc_socket_h__
+
+#include <vlib/unix/unix.h>
+#include <netinet/in.h>
+
+typedef struct {
+ int socket;
+ struct sockaddr_in tx_addr;
+} mc_multicast_socket_t;
+
+/* TCP catchup socket */
+typedef struct {
+ int socket;
+ u32 unix_file_index;
+
+ u8 * input_vector;
+ u8 * output_vector;
+ u32 output_vector_n_written;
+
+ u32 connect_in_progress;
+} mc_socket_catchup_t;
+
+typedef struct mc_socket_main_t {
+ mc_main_t mc_main;
+
+ /* Multicast mastership/to-relay/from-relay sockets. */
+ mc_multicast_socket_t multicast_sockets[MC_N_TRANSPORT_TYPE];
+
+ /* Unicast UDP ack sockets */
+ int ack_socket;
+
+ /* TCP catchup server socket */
+ int catchup_server_socket;
+
+ /* Pool of stream-private catchup sockets */
+ mc_socket_catchup_t *catchups;
+
+ uword * catchup_index_by_file_descriptor;
+
+ u32 rx_mtu_n_bytes;
+
+ /* Receive MTU in bytes and VLIB buffers. */
+ u32 rx_mtu_n_buffers;
+
+ /* Vector of RX VLIB buffers. */
+ u32 * rx_buffers;
+ /* Vector of scatter/gather descriptors for sending/receiving VLIB buffers
+ via kernel. */
+ struct iovec * iovecs;
+
+ /* IP address of interface to use for multicast. */
+ u32 if_ip4_address_net_byte_order;
+
+ u32 ack_udp_port;
+ u32 catchup_tcp_port;
+
+ /* Interface on which to listen for multicasts. */
+ char * multicast_interface_name;
+
+ /* Multicast address to use (e.g. 0xefff0000).
+ Host byte order. */
+ u32 multicast_tx_ip4_address_host_byte_order;
+
+ /* TTL to use for multicasts. */
+ u32 multicast_ttl;
+
+ /* Multicast ports for mastership, joins, etc. will be chosen
+ starting at the given port in host byte order.
+ A total of MC_N_TRANSPORT_TYPE ports will be used. */
+ u32 base_multicast_udp_port_host_byte_order;
+} mc_socket_main_t;
+
+always_inline u32
+mc_socket_peer_id_get_address (mc_peer_id_t i)
+{
+ u32 a = ((i.as_u8[0] << 24)
+ | (i.as_u8[1] << 16)
+ | (i.as_u8[2] << 8)
+ | (i.as_u8[3] << 0));
+ return clib_host_to_net_u32 (a);
+}
+
+always_inline u32
+mc_socket_peer_id_get_port (mc_peer_id_t i)
+{ return clib_host_to_net_u16 ((i.as_u8[4] << 8) | i.as_u8[5]); }
+
+static_always_inline mc_peer_id_t
+mc_socket_set_peer_id (u32 address_net_byte_order, u32 port_host_byte_order)
+{
+ mc_peer_id_t i;
+ u32 a = ntohl (address_net_byte_order);
+ u32 p = port_host_byte_order;
+ i.as_u8[0] = (a >> 24) & 0xff;
+ i.as_u8[1] = (a >> 16) & 0xff;
+ i.as_u8[2] = (a >> 8) & 0xff;
+ i.as_u8[3] = (a >> 0) & 0xff;
+ i.as_u8[4] = (p >> 8) & 0xff;
+ i.as_u8[5] = (p >> 0) & 0xff;
+ i.as_u8[6] = 0;
+ i.as_u8[7] = 0;
+ return i;
+}
+
+clib_error_t *
+mc_socket_main_init (mc_socket_main_t * msm, char **intfc_probe_list,
+ int n_intfcs_to_probe);
+#endif /* __included_mc_socket_h__ */
+
diff --git a/vlib/vlib/unix/pci.c b/vlib/vlib/unix/pci.c
new file mode 100644
index 00000000000..02c37f72707
--- /dev/null
+++ b/vlib/vlib/unix/pci.c
@@ -0,0 +1,577 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * pci.c: Linux user space PCI bus management.
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/vlib.h>
+#include <vlib/pci/pci.h>
+#include <vlib/unix/unix.h>
+#include <vlib/unix/pci.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+
+linux_pci_main_t linux_pci_main;
+
+static clib_error_t *
+foreach_directory_file (char * dir_name,
+ clib_error_t * (* f) (void * arg, u8 * path_name, u8 * file_name),
+ void * arg,
+ int scan_dirs)
+{
+ DIR * d;
+ struct dirent * e;
+ clib_error_t * error = 0;
+ u8 * s, * t;
+
+ d = opendir (dir_name);
+ if (! d)
+ {
+ /* System has no PCI bus. */
+ if (errno == ENOENT)
+ return 0;
+ return clib_error_return_unix (0, "open `%s'", dir_name);
+ }
+
+ s = t = 0;
+ while (1)
+ {
+ e = readdir (d);
+ if (! e)
+ break;
+ if (scan_dirs)
+ {
+ if (e->d_type == DT_DIR
+ && (! strcmp (e->d_name, ".")
+ || ! strcmp (e->d_name, "..")))
+ continue;
+ }
+ else
+ {
+ if (e->d_type == DT_DIR)
+ continue;
+ }
+
+ s = format (s, "%s/%s", dir_name, e->d_name);
+ t = format (t, "%s", e->d_name);
+ error = f (arg, s, t);
+ _vec_len (s) = 0;
+ _vec_len (t) = 0;
+
+ if (error)
+ break;
+ }
+
+ vec_free (s);
+ closedir (d);
+
+ return error;
+}
+
+static clib_error_t *
+write_sys_fs (char * file_name, char * fmt, ...)
+{
+ u8 * s;
+ int fd;
+
+ fd = open (file_name, O_WRONLY);
+ if (fd < 0)
+ return clib_error_return_unix (0, "open `%s'", file_name);
+
+ va_list va;
+ va_start (va, fmt);
+ s = va_format (0, fmt, &va);
+ va_end (va);
+
+ if (write (fd, s, vec_len (s)) < 0)
+ return clib_error_return_unix (0, "write `%s'", file_name);
+
+ vec_free (s);
+ close (fd);
+ return 0;
+}
+
+static clib_error_t *
+scan_uio_dir (void * arg, u8 * path_name, u8 * file_name)
+{
+ linux_pci_device_t * l = arg;
+ unformat_input_t input;
+
+ unformat_init_string (&input, (char *) file_name, vec_len (file_name));
+
+ if (! unformat (&input, "uio%d", &l->uio_minor))
+ abort ();
+
+ unformat_free (&input);
+ return 0;
+}
+
+static clib_error_t * linux_pci_uio_read_ready (unix_file_t * uf)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ vlib_main_t * vm = pm->vlib_main;
+ linux_pci_device_t * l;
+ u32 li = uf->private_data;
+
+ l = pool_elt_at_index (pm->pci_devices, li);
+ vlib_node_set_interrupt_pending (vm, l->device_input_node_index);
+
+ /* Let node know which device is interrupting. */
+ {
+ vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, l->device_input_node_index);
+ rt->runtime_data[0] |= 1 << l->device_index;
+ }
+
+ return /* no error */ 0;
+}
+
+static clib_error_t *linux_pci_uio_error_ready (unix_file_t *uf)
+{
+ u32 error_index = (u32) uf->private_data;
+
+ return clib_error_return (0, "pci device %d: error", error_index);
+}
+
+static uword pci_resource_size (uword os_handle, uword resource)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ linux_pci_device_t * p;
+ u8 * file_name;
+ struct stat b;
+ uword result = 0;
+
+ p = pool_elt_at_index (pm->pci_devices, os_handle);
+
+ file_name = format (0, "%v/resource%d%c", p->dev_dir_name, resource, 0);
+ if (stat ((char *) file_name, &b) >= 0)
+ result = b.st_size;
+ vec_free (file_name);
+ return result;
+}
+
+void os_add_pci_disable_interrupts_reg (uword os_handle, u32 resource,
+ u32 reg_offset, u32 reg_value)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ linux_pci_device_t * l;
+ char * file_name;
+ clib_error_t * error;
+
+ l = pool_elt_at_index (pm->pci_devices, os_handle);
+ ASSERT (resource == 0);
+ ASSERT (reg_offset < pci_resource_size (os_handle, resource));
+ file_name = (char *) format (0, "%s/disable_interrupt_regs%c", l->dev_dir_name, 0);
+ error = write_sys_fs (file_name, "%x %x", reg_offset, reg_value);
+ if (error)
+ clib_error_report (error);
+ vec_free (file_name);
+}
+
+static void add_device (pci_device_t * dev, linux_pci_device_t * pdev)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ linux_pci_device_t * l;
+ pci_config_header_t * c;
+ u32 x[4];
+ clib_error_t * error;
+
+ c = &dev->config0.header;
+
+ pool_get (pm->pci_devices, l);
+ l[0] = pdev[0];
+
+ l->dev_dir_name = vec_dup (l->dev_dir_name);
+
+ /* Parse bus, dev, function from directory name. */
+ {
+ unformat_input_t input;
+
+ unformat_init_string (&input, (char *) l->dev_dir_name,
+ vec_len (l->dev_dir_name));
+
+ if (! unformat (&input, "/sys/bus/pci/devices/%x:%x:%x.%x",
+ &x[0], &x[1], &x[2], &x[3]))
+ abort ();
+
+ unformat_free (&input);
+
+ l->bus_address.bus = x[1];
+ l->bus_address.slot_function = (x[2] << 3) | x[3];
+ dev->bus_address = l->bus_address;
+ }
+
+ dev->os_handle = l - pm->pci_devices;
+
+ error = write_sys_fs ("/sys/bus/pci/drivers/uio_pci_dma/new_id",
+ "%x %x", c->vendor_id, c->device_id);
+ if (error)
+ clib_error_report (error);
+ error = write_sys_fs ("/sys/bus/pci/drivers/uio_pci_dma/bind",
+ "%04x:%02x:%02x.%x", x[0], x[1], x[2], x[3]);
+ /* Errors happen when re-binding so just ignore them. */
+ if (error)
+ clib_error_free (error);
+
+ {
+ u8 * uio_dir = format (0, "%s/uio", l->dev_dir_name);
+ foreach_directory_file ((char *) uio_dir, scan_uio_dir, l, /* scan_dirs */ 1);
+ vec_free (uio_dir);
+ }
+
+ {
+ char * uio_name = (char *) format (0, "/dev/uio%d%c", l->uio_minor, 0);
+ l->uio_fd = open (uio_name, O_RDWR);
+ if (l->uio_fd < 0)
+ clib_unix_error ("open `%s'", uio_name);
+ vec_free (uio_name);
+ }
+
+ {
+ unix_file_t template = {0};
+ unix_main_t * um = &unix_main;
+
+ template.read_function = linux_pci_uio_read_ready;
+ template.file_descriptor = l->uio_fd;
+ template.error_function = linux_pci_uio_error_ready;
+ template.private_data = l - pm->pci_devices;
+
+ /* To be filled in by driver. */
+ l->device_input_node_index = ~0;
+ l->device_index = 0;
+
+ l->unix_file_index = unix_file_add (um, &template);
+ }
+}
+
+static void linux_pci_device_free (linux_pci_device_t * l)
+{
+ int i;
+ for (i = 0; i < vec_len (l->resource_fds); i++)
+ if (l->resource_fds[i] > 0)
+ close (l->resource_fds[i]);
+ if (l->config_fd > 0)
+ close (l->config_fd);
+ if (l->uio_fd > 0)
+ close (l->uio_fd);
+ vec_free (l->resource_fds);
+ vec_free (l->dev_dir_name);
+}
+
+/* Configuration space read/write. */
+clib_error_t *
+os_read_write_pci_config (uword os_handle,
+ vlib_read_or_write_t read_or_write,
+ uword address,
+ void * data,
+ u32 n_bytes)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ linux_pci_device_t * p;
+ int n;
+
+ p = pool_elt_at_index (pm->pci_devices, os_handle);
+
+ if (address != lseek (p->config_fd, address, SEEK_SET))
+ return clib_error_return_unix (0, "seek offset %d", address);
+
+ if (read_or_write == VLIB_READ)
+ n = read (p->config_fd, data, n_bytes);
+ else
+ n = write (p->config_fd, data, n_bytes);
+
+ if (n != n_bytes)
+ return clib_error_return_unix (0, "%s",
+ read_or_write == VLIB_READ
+ ? "read" : "write");
+
+ return 0;
+}
+
+static clib_error_t *
+os_map_pci_resource_internal (uword os_handle,
+ u32 resource,
+ u8 *addr,
+ void ** result)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ linux_pci_device_t * p;
+ struct stat stat_buf;
+ u8 * file_name;
+ int fd;
+ clib_error_t * error;
+ int flags = MAP_SHARED;
+
+ error = 0;
+ p = pool_elt_at_index (pm->pci_devices, os_handle);
+
+ file_name = format (0, "%v/resource%d%c", p->dev_dir_name, resource, 0);
+ fd = open ((char *) file_name, O_RDWR);
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "open `%s'", file_name);
+ goto done;
+ }
+
+ if (fstat (fd, &stat_buf) < 0)
+ {
+ error = clib_error_return_unix (0, "fstat `%s'", file_name);
+ goto done;
+ }
+
+ vec_validate (p->resource_fds, resource);
+ p->resource_fds[resource] = fd;
+ if (addr != 0)
+ flags |= MAP_FIXED;
+
+ *result = mmap (addr,
+ /* size */ stat_buf.st_size,
+ PROT_READ | PROT_WRITE,
+ flags,
+ /* file */ fd,
+ /* offset */ 0);
+ if (*result == (void *) -1)
+ {
+ error = clib_error_return_unix (0, "mmap `%s'", file_name);
+ goto done;
+ }
+
+ done:
+ if (error)
+ {
+ if (fd > 0)
+ close (fd);
+ }
+ vec_free (file_name);
+ return error;
+}
+
+clib_error_t *
+os_map_pci_resource (uword os_handle,
+ u32 resource,
+ void ** result)
+{
+ return (os_map_pci_resource_internal (os_handle, resource, 0 /* addr */,
+ result));
+}
+
+clib_error_t *
+os_map_pci_resource_fixed (uword os_handle,
+ u32 resource,
+ u8 *addr,
+ void ** result)
+{
+ return (os_map_pci_resource_internal (os_handle, resource, addr, result));
+}
+
+void os_free_pci_device (uword os_handle)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ linux_pci_device_t * l;
+
+ l = pool_elt_at_index (pm->pci_devices, os_handle);
+ linux_pci_device_free (l);
+ pool_put (pm->pci_devices, l);
+}
+
+u8 * format_os_pci_handle (u8 * s, va_list * va)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ uword os_pci_handle = va_arg (*va, uword);
+ linux_pci_device_t * l;
+
+ l = pool_elt_at_index (pm->pci_devices, os_pci_handle);
+ return format (s, "%x/%x/%x", l->bus_address.bus,
+ (l->bus_address.slot_function >> 3),
+ (l->bus_address.slot_function & 0x7));
+}
+
+static inline pci_device_registration_t *
+pci_device_next_registered (pci_device_registration_t * r)
+{
+ uword i;
+
+ /* Null vendor id marks end of initialized list. */
+ for (i = 0; r->supported_devices[i].vendor_id != 0; i++)
+ ;
+
+ return clib_elf_section_data_next (r, i * sizeof (r->supported_devices[0]));
+}
+
+static inline u8 kernel_driver_installed (pci_device_registration_t *r)
+{
+ u8 * link_name;
+ struct stat b;
+
+ link_name = format (0, "/sys/bus/pci/drivers/%s", r->kernel_driver);
+ if (stat ((char *)link_name, &b) >= 0)
+ r->kernel_driver_running++;
+ else
+ r->kernel_driver_running=0;
+
+ vec_free (link_name);
+ return r->kernel_driver_running;
+}
+
+static clib_error_t *
+init_device_from_registered (vlib_main_t * vm,
+ pci_device_t * dev,
+ linux_pci_device_t * pdev)
+{
+ unix_main_t * um = vlib_unix_get_main();
+ pci_device_registration_t * r;
+ pci_device_id_t * i;
+ pci_config_header_t * c;
+
+ c = &dev->config0.header;
+
+ r = um->pci_device_registrations;
+
+ while (r)
+ {
+ for (i = r->supported_devices; i->vendor_id != 0; i++)
+ if (i->vendor_id == c->vendor_id && i->device_id == c->device_id)
+ {
+ if (r->kernel_driver && kernel_driver_installed(r))
+ {
+ if (r->kernel_driver_running == 1)
+ {
+ clib_warning("PCI device type [%04x:%04x] is busy!\n"
+ "\tUninstall the associated linux kernel "
+ "driver: sudo rmmod %s",
+ c->vendor_id, c->device_id, r->kernel_driver);
+ }
+ continue;
+ }
+ add_device (dev, pdev);
+ return r->init_function (vm, dev);
+ }
+ r = r->next_registration;
+ }
+ /* No driver, close the PCI config-space FD */
+ close (pdev->config_fd);
+ return 0;
+}
+
+static clib_error_t *
+init_device (vlib_main_t * vm,
+ pci_device_t * dev,
+ linux_pci_device_t * pdev)
+{
+ return init_device_from_registered (vm, dev, pdev);
+}
+
+static clib_error_t *
+scan_device (void * arg, u8 * dev_dir_name, u8 * ignored)
+{
+ vlib_main_t * vm = arg;
+ int fd;
+ u8 * f;
+ clib_error_t * error = 0;
+ pci_device_t dev = {0};
+ linux_pci_device_t pdev = {0};
+
+ f = format (0, "%v/config%c", dev_dir_name, 0);
+ fd = open ((char *) f, O_RDWR);
+
+ /* Try read-only access if write fails. */
+ if (fd < 0)
+ fd = open ((char *) f, O_RDONLY);
+
+ if (fd < 0)
+ {
+ error = clib_error_return_unix (0, "open `%s'", f);
+ goto done;
+ }
+
+ /* You can only read more that 64 bytes of config space as root; so we try to
+ read the full space but fall back to just the first 64 bytes. */
+ if (read (fd, &dev.config_data, sizeof (dev.config_data)) != sizeof (dev.config_data)
+ && read (fd, &dev.config0, sizeof (dev.config0)) != sizeof (dev.config0))
+ {
+ error = clib_error_return_unix (0, "read `%s'", f);
+ goto done;
+ }
+
+ {
+ static pci_config_header_t all_ones;
+ if (all_ones.vendor_id == 0)
+ memset (&all_ones, ~0, sizeof (all_ones));
+
+ if (! memcmp (&dev.config0.header, &all_ones, sizeof (all_ones)))
+ {
+ error = clib_error_return (0, "invalid PCI config for `%s'", f);
+ goto done;
+ }
+ }
+
+ if (dev.config0.header.header_type == 0)
+ pci_config_type0_little_to_host (&dev.config0);
+ else
+ pci_config_type1_little_to_host (&dev.config1);
+
+ pdev.config_fd = fd;
+ pdev.dev_dir_name = dev_dir_name;
+
+ error = init_device (vm, &dev, &pdev);
+
+ done:
+ vec_free (f);
+ return error;
+}
+
+clib_error_t * pci_bus_init (vlib_main_t * vm)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ clib_error_t * error;
+
+ pm->vlib_main = vm;
+
+ if ((error = vlib_call_init_function (vm, unix_input_init)))
+ return error;
+
+ error = foreach_directory_file ("/sys/bus/pci/devices", scan_device, vm, /* scan_dirs */ 0);
+
+ /* Complain and continue. might not be root, etc. */
+ if (error)
+ clib_error_report (error);
+
+ return error;
+}
+
+VLIB_INIT_FUNCTION (pci_bus_init);
diff --git a/vlib/vlib/unix/pci.h b/vlib/vlib/unix/pci.h
new file mode 100644
index 00000000000..b384250eb47
--- /dev/null
+++ b/vlib/vlib/unix/pci.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * unix/pci.h: Linux specific pci state
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_unix_pci_h
+#define included_unix_pci_h
+
+#include <vlib/pci/pci.h>
+
+typedef struct {
+ /* /sys/bus/pci/devices/... directory name for this device. */
+ u8 * dev_dir_name;
+
+ /* Resource file descriptors. */
+ int * resource_fds;
+
+ /* File descriptor for config space read/write. */
+ int config_fd;
+
+ /* PCI bus address for this devices parsed from /sys/bus/pci/devices name. */
+ pci_bus_address_t bus_address;
+
+ /* File descriptor for /dev/uio%d */
+ int uio_fd;
+
+ /* Minor device for uio device. */
+ u32 uio_minor;
+
+ /* Index given by unix_file_add. */
+ u32 unix_file_index;
+
+ /* Input node to handle interrupts for this device. */
+ u32 device_input_node_index;
+
+ /* Node runtime will be a bitmap of device indices with pending interrupts. */
+ u32 device_index;
+} linux_pci_device_t;
+
+/* Pool of PCI devices. */
+typedef struct {
+ vlib_main_t * vlib_main;
+ linux_pci_device_t * pci_devices;
+} linux_pci_main_t;
+
+extern linux_pci_main_t linux_pci_main;
+
+always_inline linux_pci_device_t *
+pci_dev_for_linux (pci_device_t * dev)
+{
+ linux_pci_main_t * pm = &linux_pci_main;
+ return pool_elt_at_index (pm->pci_devices, dev->os_handle);
+}
+
+/* Call to allocate/initialize the pci subsystem.
+ This is not an init function so that users can explicitly enable
+ pci only when it's needed. */
+clib_error_t * pci_bus_init (vlib_main_t * vm);
+
+#endif /* included_unix_pci_h */
diff --git a/vlib/vlib/unix/physmem.c b/vlib/vlib/unix/physmem.c
new file mode 100644
index 00000000000..83b40be6449
--- /dev/null
+++ b/vlib/vlib/unix/physmem.c
@@ -0,0 +1,472 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * physmem.c: Unix physical memory
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <vlib/unix/physmem.h>
+
+static physmem_main_t physmem_main;
+
+static void *
+unix_physmem_alloc_aligned (vlib_physmem_main_t * vpm, uword n_bytes, uword alignment)
+{
+ physmem_main_t * pm = &physmem_main;
+ uword lo_offset, hi_offset;
+ uword * to_free = 0;
+
+#if DPDK > 0
+ clib_warning ("unsafe alloc!");
+#endif
+
+ /* IO memory is always at least cache aligned. */
+ alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES);
+
+ while (1)
+ {
+ mheap_get_aligned (pm->heap, n_bytes,
+ /* align */ alignment,
+ /* align offset */ 0,
+ &lo_offset);
+
+ /* Allocation failed? */
+ if (lo_offset == ~0)
+ break;
+
+ /* Make sure allocation does not span DMA physical chunk boundary. */
+ hi_offset = lo_offset + n_bytes - 1;
+
+ if ((lo_offset >> vpm->log2_n_bytes_per_page) ==
+ (hi_offset >> vpm->log2_n_bytes_per_page))
+ break;
+
+ /* Allocation would span chunk boundary, queue it to be freed as soon as
+ we find suitable chunk. */
+ vec_add1 (to_free, lo_offset);
+ }
+
+ if (to_free != 0)
+ {
+ uword i;
+ for (i = 0; i < vec_len (to_free); i++)
+ mheap_put (pm->heap, to_free[i]);
+ vec_free (to_free);
+ }
+
+ return lo_offset != ~0 ? pm->heap + lo_offset : 0;
+}
+
+static void unix_physmem_free (void * x)
+{
+ physmem_main_t * pm = &physmem_main;
+
+ /* Return object to region's heap. */
+ mheap_put (pm->heap, x - pm->heap);
+}
+
+static void htlb_shutdown(void)
+{
+ physmem_main_t * pm = &physmem_main;
+
+ if (! pm->shmid)
+ return;
+ shmctl (pm->shmid, IPC_RMID, 0);
+ pm->shmid = 0;
+}
+
+/* try to use huge TLB pgs if possible */
+static int htlb_init (vlib_main_t * vm)
+{
+ vlib_physmem_main_t * vpm = &vm->physmem_main;
+ physmem_main_t * pm = &physmem_main;
+ u64 hugepagesize, pagesize;
+ u64 pfn, seek_loc;
+ u64 cur, physaddr, ptbits;
+ int fd, i;
+
+ pm->shmid = shmget (11 /* key, my amp goes to 11 */, pm->mem_size,
+ IPC_CREAT | SHM_HUGETLB | SHM_R | SHM_W);
+ if (pm->shmid < 0)
+ {
+ clib_unix_warning ("shmget");
+ return 0;
+ }
+
+ pm->mem = shmat (pm->shmid, NULL, 0 /* flags */);
+ if (pm->mem == 0)
+ {
+ shmctl (pm->shmid, IPC_RMID, 0);
+ return 0;
+ }
+
+ memset (pm->mem, 0, pm->mem_size);
+
+ /* $$$ get page size info from /proc/meminfo */
+ hugepagesize = 2<<20;
+ pagesize = 4<<10;
+ vpm->log2_n_bytes_per_page = min_log2 (hugepagesize);
+ vec_resize (vpm->page_table, pm->mem_size / hugepagesize);
+
+ vpm->page_mask = pow2_mask (vpm->log2_n_bytes_per_page);
+ vpm->virtual.start = pointer_to_uword (pm->mem);
+ vpm->virtual.size = pm->mem_size;
+ vpm->virtual.end = vpm->virtual.start + vpm->virtual.size;
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+
+ if (fd < 0)
+ {
+ (void) shmdt (pm->mem);
+ return 0;
+ }
+
+ pm->heap = mheap_alloc_with_flags
+ (pm->mem, pm->mem_size,
+ /* Don't want mheap mmap/munmap with IO memory. */
+ MHEAP_FLAG_DISABLE_VM);
+
+ cur = (u64) pm->mem;
+ i = 0;
+
+ while (cur < (u64) pm->mem + pm->mem_size)
+ {
+ pfn = (u64) cur / pagesize;
+ seek_loc = pfn * sizeof (u64);
+ if (lseek (fd, seek_loc, SEEK_SET) != seek_loc)
+ {
+ clib_unix_warning ("lseek to 0x%llx", seek_loc);
+ shmctl (pm->shmid, IPC_RMID, 0);
+ close(fd);
+ return 0;
+ }
+ if (read (fd, &ptbits, sizeof (ptbits)) != (sizeof(ptbits)))
+ {
+ clib_unix_warning ("read ptbits");
+ shmctl (pm->shmid, IPC_RMID, 0);
+ close(fd);
+ return 0;
+ }
+
+ /* bits 0-54 are the physical page number */
+ physaddr = (ptbits & 0x7fffffffffffffULL) * pagesize;
+ if (CLIB_DEBUG > 1)
+ fformat(stderr, "pm: virtual 0x%llx physical 0x%llx\n",
+ cur, physaddr);
+ vpm->page_table[i++] = physaddr;
+
+ cur += hugepagesize;
+ }
+ close(fd);
+ atexit (htlb_shutdown);
+ return 1;
+}
+
+int vlib_app_physmem_init (vlib_main_t * vm,
+ physmem_main_t * pm, int) __attribute__ ((weak));
+int vlib_app_physmem_init (vlib_main_t * vm, physmem_main_t * pm, int x)
+{
+ return 0;
+}
+
+clib_error_t * unix_physmem_init (vlib_main_t * vm, int physical_memory_required)
+{
+ vlib_physmem_main_t * vpm = &vm->physmem_main;
+ physmem_main_t * pm = &physmem_main;
+ clib_error_t * error = 0;
+ char * dev_uio_dma_file = "/dev/uio-dma";
+ int using_fake_memory = 0;
+
+ /* Avoid multiple calls. */
+ if (vm->os_physmem_alloc_aligned)
+ return error;
+
+ vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned;
+ vm->os_physmem_free = unix_physmem_free;
+ pm->mem = MAP_FAILED;
+
+ if (pm->mem_size == 0)
+ pm->mem_size = 16 << 20;
+
+ /* OK, Mr. App, you tell us */
+ if (vlib_app_physmem_init (vm, pm, physical_memory_required))
+ return 0;
+
+ if (physical_memory_required)
+ {
+ if (!pm->no_hugepages && htlb_init(vm))
+ {
+ fformat(stderr, "%s: use huge pages\n", __FUNCTION__);
+ return 0;
+ }
+ pm->uio_dma_fd = open (dev_uio_dma_file, O_RDWR);
+ }
+ else
+ pm->uio_dma_fd = -1;
+
+ if (pm->uio_dma_fd < 0)
+ {
+ if (physical_memory_required)
+ {
+ error = clib_error_return_unix (0, "open `%s'", dev_uio_dma_file);
+ goto done;
+ }
+
+ using_fake_memory = 1;
+ pm->mem = mmap (0, pm->mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (pm->mem == MAP_FAILED)
+ {
+ error = clib_error_return_unix (0, "mmap");
+ goto done;
+ }
+
+ pm->heap = mheap_alloc (pm->mem, pm->mem_size);
+
+ /* Identity map with a single page. */
+ vpm->log2_n_bytes_per_page = min_log2 (pm->mem_size);
+ vec_add1 (vpm->page_table, pointer_to_uword (pm->mem));
+ }
+ else
+ error = clib_error_return (0, "uio_dma deprecated");
+
+ if (using_fake_memory)
+ fformat(stderr, "%s: use fake dma pages\n", __FUNCTION__);
+ else
+ fformat(stderr, "%s: use uio dma pages\n", __FUNCTION__);
+
+ done:
+ if (error)
+ {
+ if (pm->mem != MAP_FAILED)
+ munmap (pm->mem, pm->mem_size);
+ if (pm->uio_dma_fd >= 0)
+ {
+ close (pm->uio_dma_fd);
+ pm->uio_dma_fd = -1;
+ }
+ }
+ return error;
+}
+
+static clib_error_t *
+show_physmem (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+#if DPDK > 0
+ vlib_cli_output (vm, "Not supported with DPDK drivers.");
+#else
+ physmem_main_t * pm = &physmem_main;
+
+ if (pm->heap)
+ vlib_cli_output (vm, "%U", format_mheap, pm->heap, /* verbose */ 0);
+ else
+ vlib_cli_output (vm, "No physmem allocated.");
+#endif
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_physmem_command, static) = {
+ .path = "show physmem",
+ .short_help = "Show physical memory allocation",
+ .function = show_physmem,
+};
+
+static clib_error_t *
+show_affinity (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ cpu_set_t set;
+ cpu_set_t *setp = &set;
+ int i, rv;
+ u8 *s = 0;
+ int first_set_bit_in_run = -1;
+ int last_set_bit_in_run = -1;
+ int output_done = 0;
+
+ rv = sched_getaffinity (0 /* pid, 0 = this proc */,
+ sizeof (*setp), setp);
+ if (rv < 0)
+ {
+ vlib_cli_output (vm, "Couldn't get affinity mask: %s\n",
+ strerror(errno));
+ return 0;
+ }
+
+ for (i = 0; i < 64; i++)
+ {
+ if (CPU_ISSET(i, setp))
+ {
+ if (first_set_bit_in_run == -1)
+ {
+ first_set_bit_in_run = i;
+ last_set_bit_in_run = i;
+ if (output_done)
+ s = format (s, ",");
+ s = format (s, "%d-", i);
+ output_done = 1;
+ }
+ else
+ {
+ if (i == (last_set_bit_in_run+1))
+ last_set_bit_in_run = i;
+ }
+ }
+ else
+ {
+ if (first_set_bit_in_run != -1)
+ {
+ if (first_set_bit_in_run == (i-1))
+ {
+ _vec_len (s) -= 2 + ((first_set_bit_in_run/10));
+ }
+ s = format (s, "%d", last_set_bit_in_run);
+ first_set_bit_in_run = -1;
+ last_set_bit_in_run = -1;
+ }
+ }
+ }
+
+ if (first_set_bit_in_run != -1)
+ s = format (s, "%d", first_set_bit_in_run);
+
+ vlib_cli_output (vm, "Process runs on: %v", s);
+ return 0;
+}
+
+VLIB_CLI_COMMAND (show_affinity_command, static) = {
+ .path = "show affinity",
+ .short_help = "Show process cpu affinity",
+ .function = show_affinity,
+};
+
+static clib_error_t *
+set_affinity (vlib_main_t * vm,
+ unformat_input_t * input,
+ vlib_cli_command_t * cmd)
+{
+ cpu_set_t set;
+ cpu_set_t *setp = &set;
+ int i, rv;
+ int another_round;
+ u32 first, last;
+
+ memset (setp, 0, sizeof (*setp));
+
+ do {
+ another_round = 0;
+ if (unformat (input, "%d-%d,", &first, &last))
+ {
+ if (first > 64 || last > 64)
+ {
+ barf1:
+ vlib_cli_output (vm, "range %d-%d invalid", first, last);
+ return 0;
+ }
+
+ for (i = first; i <= last; i++)
+ CPU_SET(i, setp);
+ another_round = 1;
+ }
+ else if (unformat (input, "%d-%d", &first, &last))
+ {
+ if (first > 64 || last > 64)
+ goto barf1;
+
+ for (i = first; i <= last; i++)
+ CPU_SET(i, setp);
+ }
+ else if (unformat (input, "%d,", &first))
+ {
+ if (first > 64)
+ {
+ barf2:
+ vlib_cli_output (vm, "cpu %d invalid", first);
+ return 0;
+ }
+ CPU_SET(first, setp);
+ another_round = 1;
+ }
+ else if (unformat (input, "%d", &first))
+ {
+ if (first > 64)
+ goto barf2;
+
+ CPU_SET(first, setp);
+ }
+ } while (another_round);
+
+ rv = sched_setaffinity (0 /* pid, 0 = this proc */,
+ sizeof (*setp), setp);
+
+ if (rv < 0)
+ {
+ vlib_cli_output (vm, "Couldn't get affinity mask: %s\n",
+ strerror(errno));
+ return 0;
+ }
+ return show_affinity (vm, input, cmd);
+}
+
+VLIB_CLI_COMMAND (set_affinity_command, static) = {
+ .path = "set affinity",
+ .short_help = "Set process cpu affinity",
+ .function = set_affinity,
+};
+
+static clib_error_t *
+vlib_physmem_configure (vlib_main_t * vm, unformat_input_t * input)
+{
+ physmem_main_t * pm = &physmem_main;
+ u32 size_in_mb;
+
+ while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+ {
+ if (unformat (input, "no-huge") || unformat (input, "no-huge-pages"))
+ pm->no_hugepages = 1;
+
+ else if (unformat(input, "size-in-mb %d", &size_in_mb) ||
+ unformat(input, "size %d", &size_in_mb))
+ pm->mem_size = size_in_mb << 20;
+ else
+ return unformat_parse_error (input);
+ }
+
+ unformat_free (input);
+ return 0;
+}
+
+VLIB_EARLY_CONFIG_FUNCTION (vlib_physmem_configure, "physmem");
diff --git a/vlib/vlib/unix/physmem.h b/vlib/vlib/unix/physmem.h
new file mode 100644
index 00000000000..a963be746d8
--- /dev/null
+++ b/vlib/vlib/unix/physmem.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_physmem_h__
+#define __included_physmem_h__
+
+/* Manage I/O physical memory. */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <vppinfra/cache.h>
+#include <vppinfra/error.h>
+#include <vppinfra/mheap.h>
+#include <vppinfra/os.h>
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+
+#include <sys/fcntl.h> /* for open */
+#include <sys/file.h> /* for flock */
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+
+typedef struct {
+ /* File descriptor for /dev/uio-dma. */
+ int uio_dma_fd;
+
+ /* Virtual memory via mmaped. */
+ void * mem;
+
+ /* Size in bytes. */
+ uword mem_size;
+
+ /* Heap allocated out of virtual memory. */
+ void * heap;
+
+ /* huge TLB segment id */
+ int shmid;
+
+ /* should we try to use htlb ? */
+ int no_hugepages;
+
+} physmem_main_t;
+
+#endif /* __included_physmem_h__ */
diff --git a/vlib/vlib/unix/plugin.c b/vlib/vlib/unix/plugin.c
new file mode 100644
index 00000000000..3411ef340af
--- /dev/null
+++ b/vlib/vlib/unix/plugin.c
@@ -0,0 +1,210 @@
+/*
+ * plugin.c: plugin handling
+ *
+ * Copyright (c) 2011 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/unix/plugin.h>
+#include <dlfcn.h>
+#include <dirent.h>
+
+plugin_main_t vlib_plugin_main;
+
+void vlib_set_get_handoff_structure_cb (void *cb)
+{
+ plugin_main_t * pm = &vlib_plugin_main;
+ pm->handoff_structure_get_cb = cb;
+}
+
+static void * vnet_get_handoff_structure (void)
+{
+ void * (*fp)(void);
+
+ fp = vlib_plugin_main.handoff_structure_get_cb;
+ if (fp == 0)
+ return 0;
+ else
+ return (*fp)();
+}
+
+static int
+load_one_plugin (plugin_main_t *pm, plugin_info_t *pi, int from_early_init)
+{
+ void *handle, *register_handle;
+ clib_error_t * (*fp)(vlib_main_t *, void *, int);
+ clib_error_t * error;
+ void *handoff_structure;
+
+ handle = dlopen ((char *)pi->name, RTLD_LAZY);
+
+ /*
+ * Note: this can happen if the plugin has an undefined symbol reference,
+ * so print a warning. Otherwise, the poor slob won't know what happened.
+ * Ask me how I know that...
+ */
+ if (handle == 0)
+ {
+ clib_warning ("%s", dlerror());
+ return -1;
+ }
+
+ pi->handle = handle;
+
+ register_handle = dlsym (pi->handle, "vlib_plugin_register");
+ if (register_handle == 0)
+ {
+ dlclose (handle);
+ return 0;
+ }
+
+ fp = register_handle;
+
+ handoff_structure = vnet_get_handoff_structure();
+
+ if (handoff_structure == 0)
+ error = clib_error_return (0, "handoff structure callback returned 0");
+ else
+ error = (*fp)(pm->vlib_main, handoff_structure, from_early_init);
+
+ if (error)
+ {
+ clib_error_report (error);
+ dlclose (handle);
+ return 1;
+ }
+
+ clib_warning ("Loaded plugin: %s", pi->name);
+
+ return 0;
+}
+
+static u8 **split_plugin_path (plugin_main_t *pm)
+{
+ int i;
+ u8 **rv = 0;
+ u8 *path = pm->plugin_path;
+ u8 *this = 0;
+
+ for (i = 0; i < vec_len (pm->plugin_path); i++)
+ {
+ if (path[i] != ':')
+ {
+ vec_add1(this, path[i]);
+ continue;
+ }
+ vec_add1(this, 0);
+ vec_add1 (rv, this);
+ this = 0;
+ }
+ if (this)
+ {
+ vec_add1 (this, 0);
+ vec_add1 (rv, this);
+ }
+ return rv;
+}
+
+int vlib_load_new_plugins (plugin_main_t *pm, int from_early_init)
+{
+ DIR *dp;
+ struct dirent *entry;
+ struct stat statb;
+ uword *p;
+ plugin_info_t *pi;
+ u8 **plugin_path;
+ int i;
+
+ plugin_path = split_plugin_path (pm);
+
+ for (i = 0; i < vec_len (plugin_path); i++)
+ {
+ dp = opendir ((char *)plugin_path[i]);
+
+ if (dp == 0)
+ continue;
+
+ while ((entry = readdir (dp)))
+ {
+ u8 *plugin_name;
+
+ if (pm->plugin_name_filter)
+ {
+ int j;
+ for (j = 0; j < vec_len (pm->plugin_name_filter); j++)
+ if (entry->d_name[j] != pm->plugin_name_filter[j])
+ goto next;
+ }
+
+ plugin_name = format (0, "%s/%s%c", plugin_path[i],
+ entry->d_name, 0);
+
+ /* unreadable */
+ if (stat ((char *)plugin_name, &statb) < 0)
+ {
+ ignore:
+ vec_free (plugin_name);
+ continue;
+ }
+
+ /* a dir or other things which aren't plugins */
+ if (!S_ISREG(statb.st_mode))
+ goto ignore;
+
+ p = hash_get_mem (pm->plugin_by_name_hash, plugin_name);
+ if (p == 0)
+ {
+ vec_add2 (pm->plugin_info, pi, 1);
+ pi->name = plugin_name;
+ pi->file_info = statb;
+
+ if (load_one_plugin (pm, pi, from_early_init))
+ {
+ vec_free (plugin_name);
+ _vec_len (pm->plugin_info) = vec_len (pm->plugin_info) - 1;
+ continue;
+ }
+ memset (pi, 0, sizeof (*pi));
+ hash_set_mem (pm->plugin_by_name_hash, plugin_name,
+ pi - pm->plugin_info);
+ }
+ next:
+ ;
+ }
+ closedir (dp);
+ vec_free (plugin_path[i]);
+ }
+ vec_free (plugin_path);
+ return 0;
+}
+char *vlib_plugin_path __attribute__((weak));
+char *vlib_plugin_path = "";
+char *vlib_plugin_name_filter __attribute__((weak));
+char *vlib_plugin_name_filter = 0;
+
+int vlib_plugin_early_init (vlib_main_t *vm)
+{
+ plugin_main_t *pm = &vlib_plugin_main;
+
+ pm->plugin_path = format (0, "%s%c", vlib_plugin_path, 0);
+
+ clib_warning ("plugin path %s", pm->plugin_path);
+
+ if (vlib_plugin_name_filter)
+ pm->plugin_name_filter = format (0, "%s%c", vlib_plugin_name_filter, 0);
+
+ pm->plugin_by_name_hash = hash_create_string (0, sizeof (uword));
+ pm->vlib_main = vm;
+
+ return vlib_load_new_plugins (pm, 1 /* from_early_init */);
+}
diff --git a/vlib/vlib/unix/plugin.h b/vlib/vlib/unix/plugin.h
new file mode 100644
index 00000000000..e7d75099ed9
--- /dev/null
+++ b/vlib/vlib/unix/plugin.h
@@ -0,0 +1,88 @@
+/*
+ * plugin.h: plugin handling
+ *
+ * Copyright (c) 2011 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __included_plugin_h__
+#define __included_plugin_h__
+
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+/*
+ * vlib plugin scheme
+ *
+ * Almost anything which can be made to work in a vlib unix
+ * application will also work in a vlib plugin.
+ *
+ * The elf-section magic which registers static objects
+ * works so long as plugins are preset when the vlib unix process
+ * starts. But wait: there's more...
+ *
+ * If an application calls vlib_load_new_plugins() -- possibly after
+ * changing vlib_plugin_main.plugin_path / vlib_plugin_main.plugin_name_filter,
+ * -- new plugins will be loaded. That, in turn, allows considerable
+ * flexibility in terms of adding feature code or fixing bugs without
+ * requiring the data-plane process to restart.
+ *
+ * When the plugin mechanism loads a plugin, it uses dlsym to locate
+ * and call the plugin's function vlib_plugin_register() if it exists.
+ * A plugin which expects to be loaded after the vlib application
+ * starts uses this callback to modify the application. If vlib_plugin_register
+ * returns non-zero, the plugin mechanism dlclose()'s the plugin.
+ *
+ * Applications control the plugin search path and name filter by
+ * declaring the variables vlib_plugin_path and vlib_plugin_name_filter.
+ * libvlib_unix.la supplies weak references for these symbols which
+ * effectively disable the scheme. In order for the elf-section magic to
+ * work, static plugins must be loaded at the earliest possible moment.
+ *
+ * An application can change these parameters at any time and call
+ * vlib_load_new_plugins().
+ */
+
+
+
+typedef struct {
+ u8 *name;
+ struct stat file_info;
+ void *handle;
+} plugin_info_t;
+
+typedef struct {
+ /* loaded plugin info */
+ plugin_info_t *plugin_info;
+ uword *plugin_by_name_hash;
+
+ /* path and name filter */
+ u8 *plugin_path;
+ u8 *plugin_name_filter;
+
+ /* handoff structure get callback */
+ void *handoff_structure_get_cb;
+
+ /* usual */
+ vlib_main_t *vlib_main;
+} plugin_main_t;
+
+plugin_main_t vlib_plugin_main;
+
+int vlib_plugin_early_init (vlib_main_t *vm);
+int vlib_load_new_plugins (plugin_main_t *pm, int from_early_init);
+
+#endif /* __included_plugin_h__ */
diff --git a/vlib/vlib/unix/unix.h b/vlib/vlib/unix/unix.h
new file mode 100644
index 00000000000..0802a93baa3
--- /dev/null
+++ b/vlib/vlib/unix/unix.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * unix.h: Unix specific main state
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_unix_unix_h
+#define included_unix_unix_h
+
+#include <vppinfra/socket.h>
+
+struct unix_file;
+typedef clib_error_t * (unix_file_function_t) (struct unix_file * f);
+
+typedef struct unix_file {
+ /* Unix file descriptor from open/socket. */
+ u32 file_descriptor;
+
+ u32 flags;
+#define UNIX_FILE_DATA_AVAILABLE_TO_WRITE (1 << 0)
+
+ /* Data available for function's use. */
+ uword private_data;
+
+ /* Functions to be called when read/write data becomes ready. */
+ unix_file_function_t * read_function, * write_function, * error_function;
+} unix_file_t;
+
+typedef struct {
+ f64 time;
+ clib_error_t * error;
+} unix_error_history_t;
+
+typedef enum {
+ UNIX_FILE_UPDATE_ADD,
+ UNIX_FILE_UPDATE_MODIFY,
+ UNIX_FILE_UPDATE_DELETE,
+} unix_file_update_type_t;
+
+typedef struct {
+ /* Back pointer to main structure. */
+ vlib_main_t * vlib_main;
+
+ u32 flags;
+ /* Run interactively or as daemon (background process). */
+#define UNIX_FLAG_INTERACTIVE (1 << 0)
+#define UNIX_FLAG_NODAEMON (1 << 1)
+
+ /* Pool of files to poll for input/output. */
+ unix_file_t * file_pool;
+
+ /* CLI listen socket. */
+ clib_socket_t cli_listen_socket;
+
+ void (* file_update) (unix_file_t * file, unix_file_update_type_t update_type);
+
+ /* Circular buffer of last unix errors. */
+ unix_error_history_t error_history[128];
+ u32 error_history_index;
+ u64 n_total_errors;
+
+ /* startup-config filename */
+ u8 *startup_config_filename;
+
+ /* unix config complete */
+ volatile int unix_config_complete;
+
+ /* CLI log file. GIGO. */
+ u8 *log_filename;
+ int log_fd;
+ /* Don't put telnet connections into character mode */
+ int cli_line_mode;
+ u32 cli_history_limit;
+
+} unix_main_t;
+
+/* Global main structure. */
+extern unix_main_t unix_main;
+
+always_inline uword
+unix_file_add (unix_main_t * um, unix_file_t * template)
+{
+ unix_file_t * f;
+ pool_get (um->file_pool, f);
+ f[0] = template[0];
+ um->file_update (f, UNIX_FILE_UPDATE_ADD);
+ return f - um->file_pool;
+}
+
+always_inline void
+unix_file_del (unix_main_t * um, unix_file_t * f)
+{
+ um->file_update (f, UNIX_FILE_UPDATE_DELETE);
+ close (f->file_descriptor);
+ f->file_descriptor = ~0;
+ pool_put (um->file_pool, f);
+}
+
+always_inline uword
+unix_file_set_data_available_to_write (u32 unix_file_index, uword is_available)
+{
+ unix_file_t * uf = pool_elt_at_index (unix_main.file_pool, unix_file_index);
+ uword was_available = (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE);
+ if ((was_available != 0) != (is_available != 0))
+ {
+ uf->flags ^= UNIX_FILE_DATA_AVAILABLE_TO_WRITE;
+ unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY);
+ }
+ return was_available != 0;
+}
+
+always_inline void
+unix_save_error (unix_main_t * um, clib_error_t * error)
+{
+ unix_error_history_t * eh = um->error_history + um->error_history_index;
+ clib_error_free_vector (eh->error);
+ eh->error = error;
+ eh->time = vlib_time_now (um->vlib_main);
+ um->n_total_errors += 1;
+ if (++um->error_history_index >= ARRAY_LEN (um->error_history))
+ um->error_history_index = 0;
+}
+
+/* Main function for Unix VLIB. */
+int vlib_unix_main (int argc, char * argv[]);
+
+/* Call to allocate/initialize physical DMA memory subsystem.
+ This is not an init function so that users can explicitly enable/disable
+ physmem when its not needed. */
+clib_error_t * unix_physmem_init (vlib_main_t * vm,
+ int fail_if_physical_memory_not_present);
+
+/* Set prompt for CLI. */
+void vlib_unix_cli_set_prompt (char * prompt);
+
+static inline unix_main_t * vlib_unix_get_main (void)
+{
+ return &unix_main;
+}
+
+/* thread stack array; vec_len = max number of threads */
+u8 **vlib_thread_stacks;
+
+#endif /* included_unix_unix_h */
diff --git a/vlib/vlib/vlib.h b/vlib/vlib/vlib.h
new file mode 100644
index 00000000000..74101f8d297
--- /dev/null
+++ b/vlib/vlib/vlib.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * vlib.h: top-level include file
+ *
+ * Copyright (c) 2008 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_vlib_h
+#define included_vlib_h
+
+#include <vppinfra/clib.h>
+#include <vppinfra/elf_clib.h>
+
+/* Generic definitions. */
+#include <vlib/defs.h>
+
+/* Forward declarations of structs to avoid circular dependencies. */
+struct vlib_main_t;
+
+/* All includes in alphabetical order. */
+#include <vlib/buffer.h>
+#include <vlib/cli.h>
+#include <vlib/counter.h>
+#include <vlib/error.h>
+#include <vlib/init.h>
+#include <vlib/mc.h>
+#include <vlib/node.h>
+#include <vlib/physmem.h>
+#include <vlib/trace.h>
+
+/* Main include depends on other vlib/ includes so we put it last. */
+#include <vlib/main.h>
+
+/* Inline/extern function declarations. */
+#include <vlib/threads.h>
+#include <vlib/buffer_funcs.h>
+#include <vlib/cli_funcs.h>
+#include <vlib/error_funcs.h>
+#include <vlib/format_funcs.h>
+#include <vlib/node_funcs.h>
+#include <vlib/trace_funcs.h>
+#include <vlib/global_funcs.h>
+
+#include <vlib/buffer_node.h>
+
+#endif /* included_vlib_h */