diff options
author | Ed Warnicke <eaw@cisco.com> | 2015-12-08 15:45:58 -0700 |
---|---|---|
committer | Ed Warnicke <eaw@cisco.com> | 2015-12-08 15:47:27 -0700 |
commit | cb9cadad578297ffd78fa8a33670bdf1ab669e7e (patch) | |
tree | 6ac2be912482cc7849a26f0ab845561c3d7f4e26 /vlib/vlib | |
parent | fb0815d4ae4bb0fe27bd9313f34b45c8593b907e (diff) |
Initial commit of vpp code.v1.0.0
Change-Id: Ib246f1fbfce93274020ee93ce461e3d8bd8b9f17
Signed-off-by: Ed Warnicke <eaw@cisco.com>
Diffstat (limited to 'vlib/vlib')
54 files changed, 22906 insertions, 0 deletions
diff --git a/vlib/vlib/buffer.c b/vlib/vlib/buffer.c new file mode 100644 index 00000000000..4463f7fdb4f --- /dev/null +++ b/vlib/vlib/buffer.c @@ -0,0 +1,1435 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * buffer.c: allocate/free network buffers. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> + +uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm, vlib_buffer_t * b_first) +{ + vlib_buffer_t * b = b_first; + uword l_first = b_first->current_length; + uword l = 0; + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b = vlib_get_buffer (vm, b->next_buffer); + l += b->current_length; + } + b_first->total_length_not_including_first_buffer = l; + b_first->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + return l + l_first; +} + +u8 * format_vlib_buffer (u8 * s, va_list * args) +{ + vlib_buffer_t * b = va_arg (*args, vlib_buffer_t *); + + s = format (s, "current data %d, length %d, free-list %d", + b->current_data, b->current_length, + b->free_list_index); + + if (b->flags & VLIB_BUFFER_IS_TRACED) + s = format (s, ", trace 0x%x", b->trace_index); + + if (b->flags & VLIB_BUFFER_NEXT_PRESENT) + s = format (s, ", next-buffer 0x%x", b->next_buffer); + + return s; +} + +u8 * format_vlib_buffer_and_data (u8 * s, va_list * args) +{ + vlib_buffer_t * b = va_arg (*args, vlib_buffer_t *); + + s = format (s, "%U, %U", + format_vlib_buffer, b, + format_hex_bytes, vlib_buffer_get_current (b), 64); + + return s; +} + +static u8 * format_vlib_buffer_known_state (u8 * s, va_list * args) +{ + vlib_buffer_known_state_t state = va_arg (*args, vlib_buffer_known_state_t); + char * t; + + switch (state) + { + case VLIB_BUFFER_UNKNOWN: + t = "unknown"; + break; + + case VLIB_BUFFER_KNOWN_ALLOCATED: + t = "known-allocated"; + break; + + case VLIB_BUFFER_KNOWN_FREE: + t = "known-free"; + break; + + default: + t = "invalid"; + break; + } + + return format (s, "%s", t); +} + +u8 * format_vlib_buffer_contents (u8 * s, va_list * va) +{ + vlib_main_t * vm = va_arg (*va, vlib_main_t *); + vlib_buffer_t * b = va_arg (*va, vlib_buffer_t *); + + while (1) + { + vec_add (s, vlib_buffer_get_current (b), + b->current_length); + if (! (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + break; + b = vlib_get_buffer (vm, b->next_buffer); + } + + return s; +} + +static u8 * +vlib_validate_buffer_helper (vlib_main_t * vm, + u32 bi, + uword follow_buffer_next, + uword ** unique_hash) + +{ + vlib_buffer_t * b = vlib_get_buffer (vm, bi); + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_buffer_free_list_t * fl; + + if (pool_is_free_index (bm->buffer_free_list_pool, + b->free_list_index)) + return format (0, "unknown free list 0x%x", b->free_list_index); + + fl = pool_elt_at_index (bm->buffer_free_list_pool, + b->free_list_index); + + if ((signed) b->current_data < (signed) - VLIB_BUFFER_PRE_DATA_SIZE) + return format (0, "current data %d before pre-data", b->current_data); +#if DPDK == 0 + if (b->current_data + b->current_length > fl->n_data_bytes) + return format (0, "%d-%d beyond end of buffer %d", + b->current_data, b->current_length, + fl->n_data_bytes); +#endif + + if (follow_buffer_next + && (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + vlib_buffer_known_state_t k; + u8 * msg, * result; + + k = vlib_buffer_is_known (vm, b->next_buffer); + if (k != VLIB_BUFFER_KNOWN_ALLOCATED) + return format (0, "next 0x%x: %U", + b->next_buffer, + format_vlib_buffer_known_state, k); + + if (unique_hash) + { + if (hash_get (*unique_hash, b->next_buffer)) + return format (0, "duplicate buffer 0x%x", b->next_buffer); + + hash_set1 (*unique_hash, b->next_buffer); + } + + msg = vlib_validate_buffer (vm, b->next_buffer, follow_buffer_next); + if (msg) + { + result = format (0, "next 0x%x: %v", b->next_buffer, msg); + vec_free (msg); + return result; + } + } + + return 0; +} + +u8 * +vlib_validate_buffer (vlib_main_t * vm, u32 bi, uword follow_buffer_next) +{ return vlib_validate_buffer_helper (vm, bi, follow_buffer_next, /* unique_hash */ 0); } + +u8 * +vlib_validate_buffers (vlib_main_t * vm, + u32 * buffers, + uword next_buffer_stride, + uword n_buffers, + vlib_buffer_known_state_t known_state, + uword follow_buffer_next) +{ + uword i, * hash; + u32 bi, * b = buffers; + vlib_buffer_known_state_t k; + u8 * msg = 0, * result = 0; + + hash = hash_create (0, 0); + for (i = 0; i < n_buffers; i++) + { + bi = b[0]; + b += next_buffer_stride; + + /* Buffer is not unique. */ + if (hash_get (hash, bi)) + { + msg = format (0, "not unique"); + goto done; + } + + k = vlib_buffer_is_known (vm, bi); + if (k != known_state) + { + msg = format (0, "is %U; expected %U", + format_vlib_buffer_known_state, k, + format_vlib_buffer_known_state, known_state); + goto done; + } + + msg = vlib_validate_buffer_helper (vm, bi, follow_buffer_next, &hash); + if (msg) + goto done; + + hash_set1 (hash, bi); + } + + done: + if (msg) + { + result = format (0, "0x%x: %v", bi, msg); + vec_free (msg); + } + hash_free (hash); + return result; +} + +vlib_main_t **vlib_mains; + +/* When dubugging validate that given buffers are either known allocated + or known free. */ +static void +vlib_buffer_validate_alloc_free (vlib_main_t * vm, + u32 * buffers, + uword n_buffers, + vlib_buffer_known_state_t expected_state) +{ + u32 * b; + uword i, bi, is_free; + + if (CLIB_DEBUG == 0) + return; + + ASSERT(os_get_cpu_number() == 0); + + /* smp disaster check */ + if (vlib_mains) + ASSERT(vm == vlib_mains[0]); + + is_free = expected_state == VLIB_BUFFER_KNOWN_ALLOCATED; + b = buffers; + for (i = 0; i < n_buffers; i++) + { + vlib_buffer_known_state_t known; + + bi = b[0]; + b += 1; + known = vlib_buffer_is_known (vm, bi); + if (known != expected_state) + { + ASSERT (0); + vlib_panic_with_msg + (vm, "%s %U buffer 0x%x", + is_free ? "freeing" : "allocating", + format_vlib_buffer_known_state, known, + bi); + } + + vlib_buffer_set_known_state + (vm, bi, + is_free ? VLIB_BUFFER_KNOWN_FREE : VLIB_BUFFER_KNOWN_ALLOCATED); + } +} + +/* Aligned copy routine. */ +void +vlib_aligned_memcpy (void * _dst, void * _src, int n_bytes) +{ + vlib_copy_unit_t * dst = _dst; + vlib_copy_unit_t * src = _src; + + /* Arguments must be naturally aligned. */ + ASSERT (pointer_to_uword (dst) % sizeof (dst[0]) == 0); + ASSERT (pointer_to_uword (src) % sizeof (src[0]) == 0); + ASSERT (n_bytes % sizeof (dst[0]) == 0); + + if (4 * sizeof (dst[0]) == CLIB_CACHE_LINE_BYTES) + { + CLIB_PREFETCH (dst + 0, 4 * sizeof (dst[0]), WRITE); + CLIB_PREFETCH (src + 0, 4 * sizeof (src[0]), READ); + + while (n_bytes >= 4 * sizeof (dst[0])) + { + dst += 4; + src += 4; + n_bytes -= 4 * sizeof (dst[0]); + CLIB_PREFETCH (dst, 4 * sizeof (dst[0]), WRITE); + CLIB_PREFETCH (src, 4 * sizeof (src[0]), READ); + dst[-4] = src[-4]; + dst[-3] = src[-3]; + dst[-2] = src[-2]; + dst[-1] = src[-1]; + } + } + else if (8 * sizeof (dst[0]) == CLIB_CACHE_LINE_BYTES) + { + CLIB_PREFETCH (dst + 0, 8 * sizeof (dst[0]), WRITE); + CLIB_PREFETCH (src + 0, 8 * sizeof (src[0]), READ); + + while (n_bytes >= 8 * sizeof (dst[0])) + { + dst += 8; + src += 8; + n_bytes -= 8 * sizeof (dst[0]); + CLIB_PREFETCH (dst, 8 * sizeof (dst[0]), WRITE); + CLIB_PREFETCH (src, 8 * sizeof (src[0]), READ); + dst[-8] = src[-8]; + dst[-7] = src[-7]; + dst[-6] = src[-6]; + dst[-5] = src[-5]; + dst[-4] = src[-4]; + dst[-3] = src[-3]; + dst[-2] = src[-2]; + dst[-1] = src[-1]; + } + } + else + /* Cache line size unknown: fall back to slow version. */; + + while (n_bytes > 0) + { + *dst++ = *src++; + n_bytes -= 1 * sizeof (dst[0]); + } +} + +#define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32)) + +/* Make sure we have at least given number of unaligned buffers. */ +static void +fill_unaligned (vlib_main_t * vm, + vlib_buffer_free_list_t * free_list, + uword n_unaligned_buffers) +{ + word la = vec_len (free_list->aligned_buffers); + word lu = vec_len (free_list->unaligned_buffers); + + /* Aligned come in aligned copy-sized chunks. */ + ASSERT (la % BUFFERS_PER_COPY == 0); + + ASSERT (la >= n_unaligned_buffers); + + while (lu < n_unaligned_buffers) + { + /* Copy 4 buffers from end of aligned vector to unaligned vector. */ + vec_add (free_list->unaligned_buffers, + free_list->aligned_buffers + la - BUFFERS_PER_COPY, + BUFFERS_PER_COPY); + la -= BUFFERS_PER_COPY; + lu += BUFFERS_PER_COPY; + } + _vec_len (free_list->aligned_buffers) = la; +} + +/* After free aligned buffers may not contain even sized chunks. */ +static void +trim_aligned (vlib_buffer_free_list_t * f) +{ + uword l, n_trim; + + /* Add unaligned to aligned before trim. */ + l = vec_len (f->unaligned_buffers); + if (l > 0) + { + vec_add_aligned (f->aligned_buffers, f->unaligned_buffers, l, + /* align */ sizeof (vlib_copy_unit_t)); + + _vec_len (f->unaligned_buffers) = 0; + } + + /* Remove unaligned buffers from end of aligned vector and save for next trim. */ + l = vec_len (f->aligned_buffers); + n_trim = l % BUFFERS_PER_COPY; + if (n_trim) + { + /* Trim aligned -> unaligned. */ + vec_add (f->unaligned_buffers, f->aligned_buffers + l - n_trim, n_trim); + + /* Remove from aligned. */ + _vec_len (f->aligned_buffers) = l - n_trim; + } +} + +static void +merge_free_lists (vlib_buffer_free_list_t * dst, + vlib_buffer_free_list_t * src) +{ + uword l; + u32 * d; + + trim_aligned (src); + trim_aligned (dst); + + l = vec_len (src->aligned_buffers); + if (l > 0) + { + vec_add2_aligned (dst->aligned_buffers, d, l, + /* align */ sizeof (vlib_copy_unit_t)); + vlib_aligned_memcpy (d, src->aligned_buffers, l * sizeof (d[0])); + vec_free (src->aligned_buffers); + } + + l = vec_len (src->unaligned_buffers); + if (l > 0) + { + vec_add (dst->unaligned_buffers, src->unaligned_buffers, l); + vec_free (src->unaligned_buffers); + } +} + +always_inline u32 +vlib_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + + size = vlib_buffer_round_size (size); + uword * p = hash_get (bm->free_list_by_size, size); + return p ? p[0] : ~0; +} + +/* Add buffer free list. */ +static u32 +vlib_buffer_create_free_list_helper (vlib_main_t * vm, + u32 n_data_bytes, + u32 is_public, + u32 is_default, + u8 * name) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_buffer_free_list_t * f; + + if (! is_default && pool_elts (bm->buffer_free_list_pool) == 0) + { + u32 default_free_free_list_index; + + default_free_free_list_index = + vlib_buffer_create_free_list_helper (vm, + /* default buffer size */ VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, + /* is_public */ 1, + /* is_default */ 1, + (u8 *) "default"); + ASSERT (default_free_free_list_index == VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + if (n_data_bytes == VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES && is_public) + return default_free_free_list_index; + } + + pool_get_aligned (bm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES); + + memset (f, 0, sizeof (f[0])); + f->index = f - bm->buffer_free_list_pool; + f->n_data_bytes = vlib_buffer_round_size (n_data_bytes); + f->min_n_buffers_each_physmem_alloc = 256; + f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name); + + /* Setup free buffer template. */ + f->buffer_init_template.free_list_index = f->index; + + if (is_public) + { + uword * p = hash_get (bm->free_list_by_size, f->n_data_bytes); + if (! p) + hash_set (bm->free_list_by_size, f->n_data_bytes, f->index); + } + + return f->index; +} + +u32 vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes, + char * fmt, ...) +{ + va_list va; + u8 * name; + + va_start (va, fmt); + name = va_format (0, fmt, &va); + va_end (va); + + return vlib_buffer_create_free_list_helper (vm, n_data_bytes, + /* is_public */ 0, + /* is_default */ 0, + name); +} + +u32 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes, + char * fmt, ...) +{ + u32 i = vlib_buffer_get_free_list_with_size (vm, n_data_bytes); + + if (i == ~0) + { + va_list va; + u8 * name; + + va_start (va, fmt); + name = va_format (0, fmt, &va); + va_end (va); + + i = vlib_buffer_create_free_list_helper (vm, n_data_bytes, + /* is_public */ 1, + /* is_default */ 0, + name); + } + + return i; +} + +static void +del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) +{ + u32 i; + + for (i = 0; i < vec_len (f->buffer_memory_allocated); i++) + vm->os_physmem_free (f->buffer_memory_allocated[i]); + vec_free (f->name); + vec_free (f->buffer_memory_allocated); + vec_free (f->unaligned_buffers); + vec_free (f->aligned_buffers); +} + +/* Add buffer free list. */ +void vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_buffer_free_list_t * f; + u32 merge_index; + + f = vlib_buffer_get_free_list (vm, free_list_index); + + ASSERT (vec_len (f->unaligned_buffers) + vec_len (f->aligned_buffers) == f->n_alloc); + merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); + if (merge_index != ~0 && merge_index != free_list_index) + { + merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool, + merge_index), f); + } + + del_free_list (vm, f); + + /* Poison it. */ + memset (f, 0xab, sizeof (f[0])); + + pool_put (bm->buffer_free_list_pool, f); +} + +/* Make sure free list has at least given number of free buffers. */ +static uword +fill_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * fl, + uword min_free_buffers) +{ + vlib_buffer_t * buffers, * b; + int n, n_bytes, i; + u32 * bi; + u32 n_remaining, n_alloc, n_this_chunk; + + trim_aligned (fl); + + /* Already have enough free buffers on free list? */ + n = min_free_buffers - vec_len (fl->aligned_buffers); + if (n <= 0) + return min_free_buffers; + + /* Always allocate round number of buffers. */ + n = round_pow2 (n, BUFFERS_PER_COPY); + + /* Always allocate new buffers in reasonably large sized chunks. */ + n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); + + n_remaining = n; + n_alloc = 0; + while (n_remaining > 0) + { + n_this_chunk = clib_min (n_remaining, 16); + + n_bytes = n_this_chunk * (sizeof (b[0]) + fl->n_data_bytes); + + /* drb: removed power-of-2 ASSERT */ + buffers = vm->os_physmem_alloc_aligned (&vm->physmem_main, + n_bytes, sizeof (vlib_buffer_t)); + if (! buffers) + return n_alloc; + + /* Record chunk as being allocated so we can free it later. */ + vec_add1 (fl->buffer_memory_allocated, buffers); + + fl->n_alloc += n_this_chunk; + n_alloc += n_this_chunk; + n_remaining -= n_this_chunk; + + b = buffers; + vec_add2_aligned (fl->aligned_buffers, bi, n_this_chunk, + sizeof (vlib_copy_unit_t)); + for (i = 0; i < n_this_chunk; i++) + { + bi[i] = vlib_get_buffer_index (vm, b); + + if (CLIB_DEBUG > 0) + vlib_buffer_set_known_state (vm, bi[i], VLIB_BUFFER_KNOWN_FREE); + b = vlib_buffer_next_contiguous (b, fl->n_data_bytes); + } + + memset (buffers, 0, n_bytes); + + /* Initialize all new buffers. */ + b = buffers; + for (i = 0; i < n_this_chunk; i++) + { + vlib_buffer_init_for_free_list (b, fl); + b = vlib_buffer_next_contiguous (b, fl->n_data_bytes); + } + + if (fl->buffer_init_function) + fl->buffer_init_function (vm, fl, bi, n_this_chunk); + } + return n_alloc; +} + +always_inline uword +copy_alignment (u32 * x) +{ return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY; } + +static u32 +alloc_from_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * free_list, + u32 * alloc_buffers, + u32 n_alloc_buffers) +{ + u32 * dst, * u_src; + uword u_len, n_left; + uword n_unaligned_start, n_unaligned_end, n_filled; + + ASSERT(os_get_cpu_number() == 0); + + n_left = n_alloc_buffers; + dst = alloc_buffers; + n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst)) + & (BUFFERS_PER_COPY - 1)); + + n_filled = fill_free_list (vm, free_list, n_alloc_buffers); + if (n_filled == 0) + return 0; + + n_left = n_filled < n_left ? n_filled : n_left; + n_alloc_buffers = n_left; + + if (n_unaligned_start >= n_left) + { + n_unaligned_start = n_left; + n_unaligned_end = 0; + } + else + n_unaligned_end = copy_alignment (dst + n_alloc_buffers); + + fill_unaligned (vm, free_list, n_unaligned_start + n_unaligned_end); + + u_len = vec_len (free_list->unaligned_buffers); + u_src = free_list->unaligned_buffers + u_len - 1; + + if (n_unaligned_start) + { + uword n_copy = n_unaligned_start; + if (n_copy > n_left) + n_copy = n_left; + n_left -= n_copy; + + while (n_copy > 0) + { + *dst++ = *u_src--; + n_copy--; + u_len--; + } + + /* Now dst should be aligned. */ + if (n_left > 0) + ASSERT (pointer_to_uword (dst) % sizeof (vlib_copy_unit_t) == 0); + } + + /* Aligned copy. */ + { + vlib_copy_unit_t * d, * s; + uword n_copy; + + if (vec_len(free_list->aligned_buffers) < ((n_left/BUFFERS_PER_COPY)*BUFFERS_PER_COPY)) + abort(); + + n_copy = n_left / BUFFERS_PER_COPY; + n_left = n_left % BUFFERS_PER_COPY; + + /* Remove buffers from aligned free list. */ + _vec_len (free_list->aligned_buffers) -= n_copy * BUFFERS_PER_COPY; + + s = (vlib_copy_unit_t *) vec_end (free_list->aligned_buffers); + d = (vlib_copy_unit_t *) dst; + + /* Fast path loop. */ + while (n_copy >= 4) + { + d[0] = s[0]; + d[1] = s[1]; + d[2] = s[2]; + d[3] = s[3]; + n_copy -= 4; + s += 4; + d += 4; + } + + while (n_copy >= 1) + { + d[0] = s[0]; + n_copy -= 1; + s += 1; + d += 1; + } + + dst = (void *) d; + } + + /* Unaligned copy. */ + ASSERT (n_unaligned_end == n_left); + while (n_left > 0) + { + *dst++ = *u_src--; + n_left--; + u_len--; + } + + if (! free_list->unaligned_buffers) + ASSERT (u_len == 0); + else + _vec_len (free_list->unaligned_buffers) = u_len; + + /* Verify that buffers are known free. */ + vlib_buffer_validate_alloc_free (vm, alloc_buffers, + n_alloc_buffers, + VLIB_BUFFER_KNOWN_FREE); + + return n_alloc_buffers; +} + +/* Allocate a given number of buffers into given array. + Returns number actually allocated which will be either zero or + number requested. */ +u32 vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + ASSERT(os_get_cpu_number() == 0); + + return alloc_from_free_list + (vm, + pool_elt_at_index (bm->buffer_free_list_pool, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX), + buffers, n_buffers); +} + +u32 vlib_buffer_alloc_from_free_list (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers, + u32 free_list_index) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_buffer_free_list_t * f; + f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index); + return alloc_from_free_list (vm, f, buffers, n_buffers); +} + +always_inline void +add_buffer_to_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * f, + u32 buffer_index, u8 do_init) +{ + vlib_buffer_t * b; + b = vlib_get_buffer (vm, buffer_index); + if (PREDICT_TRUE(do_init)) + vlib_buffer_init_for_free_list (b, f); + vec_add1_aligned (f->aligned_buffers, buffer_index, sizeof (vlib_copy_unit_t)); +} + +always_inline vlib_buffer_free_list_t * +buffer_get_free_list (vlib_main_t * vm, vlib_buffer_t * b, u32 * index) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + u32 i; + + *index = i = b->free_list_index; + return pool_elt_at_index (bm->buffer_free_list_pool, i); +} + +void *vlib_set_buffer_free_callback (vlib_main_t *vm, void *fp) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + void * rv = bm->buffer_free_callback; + + bm->buffer_free_callback = fp; + return rv; +} + +void vnet_buffer_free_dpdk_mb (vlib_buffer_t * b) __attribute__ ((weak)); +void vnet_buffer_free_dpdk_mb (vlib_buffer_t * b) { } + +static_always_inline void +vlib_buffer_free_inline (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers, + u32 follow_buffer_next) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_buffer_free_list_t * fl; + static u32 * next_to_free[2]; /* smp bad */ + u32 i_next_to_free, * b, * n, * f, fi; + uword n_left; + int i; + static vlib_buffer_free_list_t ** announce_list; + vlib_buffer_free_list_t * fl0 = 0, * fl1 = 0; + u32 bi0=(u32)~0, bi1=(u32)~0, fi0, fi1 = (u32)~0; + u8 free0, free1=0, free_next0, free_next1; + u32 (*cb)(vlib_main_t * vm, u32 * buffers, u32 n_buffers, + u32 follow_buffer_next); + + ASSERT(os_get_cpu_number() == 0); + + cb = bm->buffer_free_callback; + + if (PREDICT_FALSE (cb != 0)) + n_buffers = (*cb)(vm, buffers, n_buffers, follow_buffer_next); + + if (! n_buffers) + return; + + /* Use first buffer to get default free list. */ + { + u32 bi0 = buffers[0]; + vlib_buffer_t * b0; + + b0 = vlib_get_buffer (vm, bi0); + fl = buffer_get_free_list (vm, b0, &fi); + if (fl->buffers_added_to_freelist_function) + vec_add1 (announce_list, fl); + } + + vec_validate (next_to_free[0], n_buffers - 1); + vec_validate (next_to_free[1], n_buffers - 1); + + i_next_to_free = 0; + n_left = n_buffers; + b = buffers; + + again: + /* Verify that buffers are known allocated. */ + vlib_buffer_validate_alloc_free (vm, b, + n_left, + VLIB_BUFFER_KNOWN_ALLOCATED); + + vec_add2_aligned (fl->aligned_buffers, f, n_left, + /* align */ sizeof (vlib_copy_unit_t)); + + n = next_to_free[i_next_to_free]; + while (n_left >= 4) + { + vlib_buffer_t * b0, * b1, * binit0, * binit1, dummy_buffers[2]; + + bi0 = b[0]; + bi1 = b[1]; + + f[0] = bi0; + f[1] = bi1; + f += 2; + b += 2; + n_left -= 2; + + /* Prefetch buffers for next iteration. */ + vlib_prefetch_buffer_with_index (vm, b[0], WRITE); + vlib_prefetch_buffer_with_index (vm, b[1], WRITE); + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + free0 = b0->clone_count == 0; + free1 = b1->clone_count == 0; + + /* Must be before init which will over-write buffer flags. */ + if (follow_buffer_next) + { + n[0] = b0->next_buffer; + free_next0 = free0 && (b0->flags & VLIB_BUFFER_NEXT_PRESENT) != 0; + n += free_next0; + + n[0] = b1->next_buffer; + free_next1 = free1 && (b1->flags & VLIB_BUFFER_NEXT_PRESENT) != 0; + n += free_next1; + } + else + free_next0 = free_next1 = 0; + + /* Must be before init which will over-write buffer free list. */ + fi0 = b0->free_list_index; + fi1 = b1->free_list_index; + + if (PREDICT_FALSE (fi0 != fi || fi1 != fi)) + goto slow_path_x2; + + binit0 = free0 ? b0 : &dummy_buffers[0]; + binit1 = free1 ? b1 : &dummy_buffers[1]; + + vlib_buffer_init_two_for_free_list (binit0, binit1, fl); + continue; + + slow_path_x2: + /* Backup speculation. */ + f -= 2; + n -= free_next0 + free_next1; + + _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers; + + fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0); + fl1 = pool_elt_at_index (bm->buffer_free_list_pool, fi1); + + add_buffer_to_free_list (vm, fl0, bi0, free0); + if (PREDICT_FALSE(fl0->buffers_added_to_freelist_function != 0)) + { + int i; + for (i = 0; i < vec_len (announce_list); i++) + if (fl0 == announce_list[i]) + goto no_fl0; + vec_add1(announce_list, fl0); + } + no_fl0: + if (PREDICT_FALSE(fl1->buffers_added_to_freelist_function != 0)) + { + int i; + for (i = 0; i < vec_len (announce_list); i++) + if (fl1 == announce_list[i]) + goto no_fl1; + vec_add1(announce_list, fl1); + } + + no_fl1: + add_buffer_to_free_list (vm, fl1, bi1, free1); + + /* Possibly change current free list. */ + if (fi0 != fi && fi1 != fi) + { + fi = fi1; + fl = pool_elt_at_index (bm->buffer_free_list_pool, fi); + } + + vec_add2_aligned (fl->aligned_buffers, f, n_left, + /* align */ sizeof (vlib_copy_unit_t)); + } + + while (n_left >= 1) + { + vlib_buffer_t * b0, * binit0, dummy_buffers[1]; + + bi0 = b[0]; + f[0] = bi0; + f += 1; + b += 1; + n_left -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + free0 = b0->clone_count == 0; + + /* Must be before init which will over-write buffer flags. */ + if (follow_buffer_next) + { + n[0] = b0->next_buffer; + free_next0 = free0 && (b0->flags & VLIB_BUFFER_NEXT_PRESENT) != 0; + n += free_next0; + } + else + free_next0 = 0; + + /* Must be before init which will over-write buffer free list. */ + fi0 = b0->free_list_index; + + if (PREDICT_FALSE (fi0 != fi)) + goto slow_path_x1; + + binit0 = free0 ? b0 : &dummy_buffers[0]; + + vlib_buffer_init_for_free_list (binit0, fl); + continue; + + slow_path_x1: + /* Backup speculation. */ + f -= 1; + n -= free_next0; + + _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers; + + fl0 = pool_elt_at_index (bm->buffer_free_list_pool, fi0); + + add_buffer_to_free_list (vm, fl0, bi0, free0); + if (PREDICT_FALSE(fl0->buffers_added_to_freelist_function != 0)) + { + int i; + for (i = 0; i < vec_len (announce_list); i++) + if (fl0 == announce_list[i]) + goto no_fl00; + vec_add1(announce_list, fl0); + } + + no_fl00: + fi = fi0; + fl = pool_elt_at_index (bm->buffer_free_list_pool, fi); + + vec_add2_aligned (fl->aligned_buffers, f, n_left, + /* align */ sizeof (vlib_copy_unit_t)); + } + + if (follow_buffer_next && ((n_left = n - next_to_free[i_next_to_free]) > 0)) + { + b = next_to_free[i_next_to_free]; + i_next_to_free ^= 1; + goto again; + } + + _vec_len (fl->aligned_buffers) = f - fl->aligned_buffers; + + if (vec_len(announce_list)) + { + vlib_buffer_free_list_t * fl; + for (i = 0; i < vec_len (announce_list); i++) + { + fl = announce_list[i]; + fl->buffers_added_to_freelist_function (vm, fl); + } + _vec_len(announce_list) = 0; + } +} + +void vlib_buffer_free (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers) +{ + vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ 1); +} + +void vlib_buffer_free_no_next (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers) +{ + vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ 0); +} + +/* Copy template packet data into buffers as they are allocated. */ +static void +vlib_packet_template_buffer_init (vlib_main_t * vm, + vlib_buffer_free_list_t * fl, + u32 * buffers, + u32 n_buffers) +{ + vlib_packet_template_t * t = uword_to_pointer (fl->buffer_init_function_opaque, + vlib_packet_template_t *); + uword i; + + for (i = 0; i < n_buffers; i++) + { + vlib_buffer_t * b = vlib_get_buffer (vm, buffers[i]); + ASSERT (b->current_length == vec_len (t->packet_data)); + memcpy (vlib_buffer_get_current (b), t->packet_data, b->current_length); + } +} + +void vlib_packet_template_init (vlib_main_t * vm, + vlib_packet_template_t * t, + void * packet_data, + uword n_packet_data_bytes, + uword min_n_buffers_each_physmem_alloc, + char * fmt, + ...) +{ + vlib_buffer_free_list_t * fl; + va_list va; + u8 * name; + + va_start (va, fmt); + name = va_format (0, fmt, &va); + va_end (va); + + memset (t, 0, sizeof (t[0])); + + vec_add (t->packet_data, packet_data, n_packet_data_bytes); + t->min_n_buffers_each_physmem_alloc = min_n_buffers_each_physmem_alloc; + + t->free_list_index = vlib_buffer_create_free_list_helper + (vm, n_packet_data_bytes, + /* is_public */ 1, + /* is_default */ 0, + name); + + ASSERT (t->free_list_index != 0); + fl = vlib_buffer_get_free_list (vm, t->free_list_index); + fl->min_n_buffers_each_physmem_alloc = t->min_n_buffers_each_physmem_alloc; + + fl->buffer_init_function = vlib_packet_template_buffer_init; + fl->buffer_init_function_opaque = pointer_to_uword (t); + + fl->buffer_init_template.current_data = 0; + fl->buffer_init_template.current_length = n_packet_data_bytes; + fl->buffer_init_template.flags = 0; +} + +void * +vlib_packet_template_get_packet (vlib_main_t * vm, + vlib_packet_template_t * t, + u32 * bi_result) +{ + u32 bi; + vlib_buffer_t * b; + + if (vlib_buffer_alloc (vm, &bi, 1) != 1) + return 0; + + *bi_result = bi; + + b = vlib_get_buffer (vm, bi); + memcpy (vlib_buffer_get_current (b), + t->packet_data, vec_len(t->packet_data)); + b->current_length = vec_len(t->packet_data); + + return b->data; +} + +void vlib_packet_template_get_packet_helper (vlib_main_t * vm, vlib_packet_template_t * t) +{ + word n = t->min_n_buffers_each_physmem_alloc; + word l = vec_len (t->packet_data); + word n_alloc; + + ASSERT (l > 0); + ASSERT (vec_len (t->free_buffers) == 0); + + vec_validate (t->free_buffers, n - 1); + n_alloc = vlib_buffer_alloc_from_free_list (vm, t->free_buffers, + n, t->free_list_index); + _vec_len (t->free_buffers) = n_alloc; +} + +/* Append given data to end of buffer, possibly allocating new buffers. */ +u32 vlib_buffer_add_data (vlib_main_t * vm, + u32 free_list_index, + u32 buffer_index, + void * data, u32 n_data_bytes) +{ + u32 n_buffer_bytes, n_left, n_left_this_buffer, bi; + vlib_buffer_t * b; + void * d; + + bi = buffer_index; + if (bi == 0 + && 1 != vlib_buffer_alloc_from_free_list (vm, &bi, 1, free_list_index)) + goto out_of_buffers; + + d = data; + n_left = n_data_bytes; + n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, free_list_index); + + b = vlib_get_buffer (vm, bi); + b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID; + + /* Get to the end of the chain before we try to append data...*/ + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + b = vlib_get_buffer (vm, b->next_buffer); + + while (1) + { + u32 n; + + ASSERT (n_buffer_bytes >= b->current_length); + n_left_this_buffer = n_buffer_bytes - (b->current_data + b->current_length); + n = clib_min (n_left_this_buffer, n_left); + memcpy (vlib_buffer_get_current (b) + b->current_length, d, n); + b->current_length += n; + n_left -= n; + if (n_left == 0) + break; + + d += n; + if (1 != vlib_buffer_alloc_from_free_list (vm, &b->next_buffer, 1, free_list_index)) + goto out_of_buffers; + + b->flags |= VLIB_BUFFER_NEXT_PRESENT; + + b = vlib_get_buffer (vm, b->next_buffer); + } + + return bi; + + out_of_buffers: + clib_error ("out of buffers"); + return bi; +} + +static void vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s) +{ + vlib_main_t * vm; + vlib_serialize_buffer_main_t * sm; + uword n, n_bytes_to_write; + vlib_buffer_t * last; + + n_bytes_to_write = s->current_buffer_index; + sm = uword_to_pointer (s->data_function_opaque, vlib_serialize_buffer_main_t *); + vm = sm->vlib_main; + + ASSERT (sm->tx.max_n_data_bytes_per_chain > 0); + if (serialize_stream_is_end_of_stream (s) + || sm->tx.n_total_data_bytes + n_bytes_to_write > sm->tx.max_n_data_bytes_per_chain) + { + vlib_process_t * p = vlib_get_current_process (vm); + + last = vlib_get_buffer (vm, sm->last_buffer); + last->current_length = n_bytes_to_write; + + vlib_set_next_frame_buffer (vm, &p->node_runtime, sm->tx.next_index, sm->first_buffer); + + sm->first_buffer = sm->last_buffer = ~0; + sm->tx.n_total_data_bytes = 0; + } + + else if (n_bytes_to_write == 0 && s->n_buffer_bytes == 0) + { + ASSERT (sm->first_buffer == ~0); + ASSERT (sm->last_buffer == ~0); + n = vlib_buffer_alloc_from_free_list (vm, &sm->first_buffer, 1, sm->tx.free_list_index); + if (n != 1) + serialize_error (m, clib_error_create ("vlib_buffer_alloc_from_free_list fails")); + sm->last_buffer = sm->first_buffer; + s->n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, sm->tx.free_list_index); + } + + if (n_bytes_to_write > 0) + { + vlib_buffer_t * prev = vlib_get_buffer (vm, sm->last_buffer); + n = vlib_buffer_alloc_from_free_list (vm, &sm->last_buffer, 1, sm->tx.free_list_index); + if (n != 1) + serialize_error (m, clib_error_create ("vlib_buffer_alloc_from_free_list fails")); + sm->tx.n_total_data_bytes += n_bytes_to_write; + prev->current_length = n_bytes_to_write; + prev->next_buffer = sm->last_buffer; + prev->flags |= VLIB_BUFFER_NEXT_PRESENT; + } + + if (sm->last_buffer != ~0) + { + last = vlib_get_buffer (vm, sm->last_buffer); + s->buffer = vlib_buffer_get_current (last); + s->current_buffer_index = 0; + ASSERT (last->current_data == s->current_buffer_index); + } +} + +static void vlib_serialize_rx (serialize_main_header_t * m, serialize_stream_t * s) +{ + vlib_main_t * vm; + vlib_serialize_buffer_main_t * sm; + vlib_buffer_t * last; + + sm = uword_to_pointer (s->data_function_opaque, vlib_serialize_buffer_main_t *); + vm = sm->vlib_main; + + if (serialize_stream_is_end_of_stream (s)) + return; + + if (sm->last_buffer != ~0) + { + last = vlib_get_buffer (vm, sm->last_buffer); + + if (last->flags & VLIB_BUFFER_NEXT_PRESENT) + sm->last_buffer = last->next_buffer; + else + { + vlib_buffer_free (vm, &sm->first_buffer, /* count */ 1); + sm->first_buffer = sm->last_buffer = ~0; + } + } + + if (sm->last_buffer == ~0) + { + while (clib_fifo_elts (sm->rx.buffer_fifo) == 0) + { + sm->rx.ready_one_time_event = vlib_process_create_one_time_event (vm, vlib_current_process (vm), ~0); + vlib_process_wait_for_one_time_event (vm, /* no event data */ 0, sm->rx.ready_one_time_event); + } + + clib_fifo_sub1 (sm->rx.buffer_fifo, sm->first_buffer); + sm->last_buffer = sm->first_buffer; + } + + ASSERT (sm->last_buffer != ~0); + + last = vlib_get_buffer (vm, sm->last_buffer); + s->current_buffer_index = 0; + s->buffer = vlib_buffer_get_current (last); + s->n_buffer_bytes = last->current_length; +} + +static void +serialize_open_vlib_helper (serialize_main_t * m, + vlib_main_t * vm, + vlib_serialize_buffer_main_t * sm, + uword is_read) +{ + /* Initialize serialize main but save overflow buffer for re-use between calls. */ + { + u8 * save = m->stream.overflow_buffer; + memset (m, 0, sizeof (m[0])); + m->stream.overflow_buffer = save; + if (save) + _vec_len (save) = 0; + } + + sm->first_buffer = sm->last_buffer = ~0; + if (is_read) + clib_fifo_reset (sm->rx.buffer_fifo); + else + sm->tx.n_total_data_bytes = 0; + sm->vlib_main = vm; + m->header.data_function = is_read ? vlib_serialize_rx : vlib_serialize_tx; + m->stream.data_function_opaque = pointer_to_uword (sm); +} + +void serialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, vlib_serialize_buffer_main_t * sm) +{ serialize_open_vlib_helper (m, vm, sm, /* is_read */ 0); } + +void unserialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, vlib_serialize_buffer_main_t * sm) +{ serialize_open_vlib_helper (m, vm, sm, /* is_read */ 1); } + +u32 serialize_close_vlib_buffer (serialize_main_t * m) +{ + vlib_serialize_buffer_main_t * sm + = uword_to_pointer (m->stream.data_function_opaque, vlib_serialize_buffer_main_t *); + vlib_buffer_t * last; + serialize_stream_t * s = &m->stream; + + last = vlib_get_buffer (sm->vlib_main, sm->last_buffer); + last->current_length = s->current_buffer_index; + + if (vec_len (s->overflow_buffer) > 0) + { + sm->last_buffer + = vlib_buffer_add_data (sm->vlib_main, sm->tx.free_list_index, + sm->last_buffer == ~0 ? 0 : sm->last_buffer, + s->overflow_buffer, + vec_len (s->overflow_buffer)); + _vec_len (s->overflow_buffer) = 0; + } + + return sm->first_buffer; +} + +void unserialize_close_vlib_buffer (serialize_main_t * m) +{ + vlib_serialize_buffer_main_t * sm + = uword_to_pointer (m->stream.data_function_opaque, vlib_serialize_buffer_main_t *); + if (sm->first_buffer != ~0) + vlib_buffer_free_one (sm->vlib_main, sm->first_buffer); + clib_fifo_reset (sm->rx.buffer_fifo); + if (m->stream.overflow_buffer) + _vec_len (m->stream.overflow_buffer) = 0; +} + +static u8 * format_vlib_buffer_free_list (u8 * s, va_list * va) +{ + vlib_buffer_free_list_t * f = va_arg (*va, vlib_buffer_free_list_t *); + uword bytes_alloc, bytes_free, n_free, size; + + if (! f) + return format (s, "%=30s%=12s%=12s%=12s%=12s%=12s%=12s", + "Name", "Index", "Size", "Alloc", "Free", "#Alloc", "#Free"); + + size = sizeof (vlib_buffer_t) + f->n_data_bytes; + n_free = vec_len (f->aligned_buffers) + vec_len (f->unaligned_buffers); + bytes_alloc = size * f->n_alloc; + bytes_free = size * n_free; + + s = format (s, "%30s%12d%12d%=12U%=12U%=12d%=12d", + f->name, f->index, f->n_data_bytes, + format_memory_size, bytes_alloc, + format_memory_size, bytes_free, + f->n_alloc, n_free); + + return s; +} + +static clib_error_t * +show_buffers (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_buffer_free_list_t * f; + + vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, 0); + pool_foreach (f, bm->buffer_free_list_pool, ({ + vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, f); + })); + + return 0; +} + +VLIB_CLI_COMMAND (show_buffers_command, static) = { + .path = "show buffers", + .short_help = "Show packet buffer allocation", + .function = show_buffers, +}; + diff --git a/vlib/vlib/buffer.h b/vlib/vlib/buffer.h new file mode 100644 index 00000000000..6322481b696 --- /dev/null +++ b/vlib/vlib/buffer.h @@ -0,0 +1,376 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * buffer.h: VLIB buffers + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_buffer_h +#define included_vlib_buffer_h + +#include <vppinfra/types.h> +#include <vppinfra/cache.h> +#include <vppinfra/serialize.h> +#include <vppinfra/vector.h> +#include <vlib/error.h> /* for vlib_error_t */ +#include <vlib/config.h> /* for __PRE_DATA_SIZE */ + +#ifdef CLIB_HAVE_VEC128 +typedef u8x16 vlib_copy_unit_t; +#else +typedef uword vlib_copy_unit_t; +#endif + +/** \file + vlib buffer structure definition and a few select + access methods. This structure and the buffer allocation + mechanism should perhaps live in vnet, but it would take a lot + of typing to make it so. +*/ + +/* VLIB buffer representation. */ +typedef struct { + /* Offset within data[] that we are currently processing. + If negative current header points into predata area. */ + i16 current_data; /**< signed offset in data[], pre_data[] + that we are currently processing. + If negative current header points into predata area. + */ + u16 current_length; /**< Nbytes between current data and + the end of this buffer. + */ + u32 flags; /**< buffer flags: + <br> VLIB_BUFFER_IS_TRACED: trace this buffer. + <br> VLIB_BUFFER_NEXT_PRESENT: this is a multi-chunk buffer. + <br> VLIB_BUFFER_TOTAL_LENGTH_VALID: as it says + <br> VLIB_BUFFER_REPL_FAIL: packet replication failure + <br> VLIB_BUFFER_FLAG_USER(n): user-defined bit N + */ +#define VLIB_BUFFER_IS_TRACED (1 << 0) +#define VLIB_BUFFER_LOG2_NEXT_PRESENT (1) +#define VLIB_BUFFER_NEXT_PRESENT (1 << VLIB_BUFFER_LOG2_NEXT_PRESENT) +#define VLIB_BUFFER_IS_RECYCLED (1 << 2) +#define VLIB_BUFFER_TOTAL_LENGTH_VALID (1 << 3) +#define VLIB_BUFFER_HGSHM_USER_INDEX_VALID (1 << 4) +#define VLIB_BUFFER_REPL_FAIL (1 << 5) + + /* User defined buffer flags. */ +#define LOG2_VLIB_BUFFER_FLAG_USER(n) (32 - (n)) +#define VLIB_BUFFER_FLAG_USER(n) (1 << LOG2_VLIB_BUFFER_FLAG_USER(n)) + + u32 free_list_index; /**< Buffer free list that this buffer was + allocated from and will be freed to. + */ + + u32 total_length_not_including_first_buffer; + /**< Only valid for first buffer in chain. Current length plus + total length given here give total number of bytes in buffer chain. + */ + + + u32 next_buffer; /**< Next buffer for this linked-list of buffers. + Only valid if VLIB_BUFFER_NEXT_PRESENT flag is set. + */ + + u32 trace_index; /**< Specifies index into trace buffer + if VLIB_PACKET_IS_TRACED flag is set. + */ + + + u32 clone_count; /**< Specifies whether this buffer should be + reinitialized when freed. It will be reinitialized + if the value is 0. This field can be used + as a counter or for other state during packet + replication. The buffer free function does not + modify this value. + */ + + vlib_error_t error; /**< Error code for buffers to be enqueued + to error handler. + */ + + u32 opaque[8]; /**< Opaque data used by sub-graphs for their own purposes. + See .../vnet/vnet/buffer.h + */ + /***** end of first cache line */ + + u32 opaque2[16]; /**< More opaque data, in its own cache line */ + + /***** end of second cache line */ + u8 pre_data [__PRE_DATA_SIZE]; /**< Space for inserting data + before buffer start. + Packet rewrite string will be + rewritten backwards and may extend + back before buffer->data[0]. + Must come directly before packet data. + */ + +#define VLIB_BUFFER_PRE_DATA_SIZE (ARRAY_LEN (((vlib_buffer_t *)0)->pre_data)) + u8 data[0]; /**< Packet data. Hardware DMA here */ +} vlib_buffer_t; /* Must be a multiple of 64B. */ + +/** \brief Prefetch buffer metadata. + The first 64 bytes of buffer contains most header information + + @param b - (vlib_buffer_t *) pointer to the buffer + @param type - LOAD, STORE. In most cases, STORE is the right answer +*/ + +#define vlib_prefetch_buffer_header(b,type) CLIB_PREFETCH (b, 64, type) + +always_inline vlib_buffer_t * +vlib_buffer_next_contiguous (vlib_buffer_t * b, u32 buffer_bytes) +{ return (void *) (b + 1) + buffer_bytes; } + +always_inline void +vlib_buffer_struct_is_sane (vlib_buffer_t * b) +{ + ASSERT (sizeof (b[0]) % 64 == 0); + + /* Rewrite data must be before and contiguous with packet data. */ + ASSERT (b->pre_data + VLIB_BUFFER_PRE_DATA_SIZE == b->data); +} + +/** \brief Get pointer to current data to process + + @param b - (vlib_buffer_t *) pointer to the buffer + @return - (void *) (b->data + b->current_data) +*/ + +always_inline void * +vlib_buffer_get_current (vlib_buffer_t * b) +{ + /* Check bounds. */ + ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE); + return b->data + b->current_data; +} + +/** \brief Advance current data pointer by the supplied (signed!) amount + + @param b - (vlib_buffer_t *) pointer to the buffer + @param l - (word) signed increment +*/ +always_inline void +vlib_buffer_advance (vlib_buffer_t * b, word l) +{ + ASSERT (b->current_length >= l); + b->current_data += l; + b->current_length -= l; +} + +/** \brief Reset current header & length to state they were in when + packet was received. + + @param b - (vlib_buffer_t *) pointer to the buffer +*/ + +always_inline void +vlib_buffer_reset (vlib_buffer_t * b) +{ + b->current_length += clib_max (b->current_data, 0); + b->current_data = 0; +} + +/** \brief Get pointer to buffer's opaque data array + + @param b - (vlib_buffer_t *) pointer to the buffer + @return - (void *) b->opaque +*/ +always_inline void * +vlib_get_buffer_opaque (vlib_buffer_t * b) +{ return (void *) b->opaque; } + +/** \brief Get pointer to buffer's opaque2 data array + + @param b - (vlib_buffer_t *) pointer to the buffer + @return - (void *) b->opaque2 +*/ +always_inline void * +vlib_get_buffer_opaque2 (vlib_buffer_t * b) +{ return (void *) b->opaque2; } + +/* Forward declaration. */ +struct vlib_main_t; + +typedef struct vlib_buffer_free_list_t { + /* Template buffer used to initialize first 16 bytes of buffers + allocated on this free list. */ + vlib_buffer_t buffer_init_template; + + /* Our index into vlib_main_t's buffer_free_list_pool. */ + u32 index; + + /* Number of data bytes for buffers in this free list. */ + u32 n_data_bytes; + + /* Number of buffers to allocate when we need to allocate new buffers + from physmem heap. */ + u32 min_n_buffers_each_physmem_alloc; + + /* Total number of buffers allocated from this free list. */ + u32 n_alloc; + + /* Vector of free buffers. Each element is a byte offset into I/O heap. + Aligned vectors always has naturally aligned vlib_copy_unit_t sized chunks + of buffer indices. Unaligned vector has any left over. This is meant to + speed up copy routines. */ + u32 * aligned_buffers, * unaligned_buffers; + + /* Memory chunks allocated for this free list + recorded here so they can be freed when free list + is deleted. */ + void ** buffer_memory_allocated; + + /* Free list name. */ + u8 * name; + + /* Callback functions to initialize newly allocated buffers. + If null buffers are zeroed. */ + void (* buffer_init_function) (struct vlib_main_t * vm, + struct vlib_buffer_free_list_t * fl, + u32 * buffers, u32 n_buffers); + + /* Callback function to announce that buffers have been + added to the freelist */ + void (* buffers_added_to_freelist_function) + (struct vlib_main_t * vm, + struct vlib_buffer_free_list_t * fl); + + uword buffer_init_function_opaque; +} __attribute__ ((aligned (16))) vlib_buffer_free_list_t; + +typedef struct { + /* Buffer free callback, for subversive activities */ + u32 (*buffer_free_callback) (struct vlib_main_t *vm, + u32 * buffers, + u32 n_buffers, + u32 follow_buffer_next); + /* Pool of buffer free lists. + Multiple free lists exist for packet generator which uses + separate free lists for each packet stream --- so as to avoid + initializing static data for each packet generated. */ + vlib_buffer_free_list_t * buffer_free_list_pool; +#define VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX (0) + +#if DPDK == 1 +/* must be same as dpdk buffer size */ +#define VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES (2048) +#else +#define VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES (512) +#endif + + /* Hash table mapping buffer size (rounded to next unit of + sizeof (vlib_buffer_t)) to free list index. */ + uword * free_list_by_size; + + /* Hash table mapping buffer index into number + 0 => allocated but free, 1 => allocated and not-free. + If buffer index is not in hash table then this buffer + has never been allocated. */ + uword * buffer_known_hash; + + /* List of free-lists needing Blue Light Special announcements */ + vlib_buffer_free_list_t **announce_list; + + /* Vector of rte_mempools per socket */ +#if DPDK == 1 + struct rte_mempool ** pktmbuf_pools; +#endif +} vlib_buffer_main_t; + +typedef struct { + struct vlib_main_t * vlib_main; + + u32 first_buffer, last_buffer; + + union { + struct { + /* Total accumulated bytes in chain starting with first_buffer. */ + u32 n_total_data_bytes; + + /* Max number of bytes to accumulate in chain starting with first_buffer. + As this limit is reached buffers are enqueued to next node. */ + u32 max_n_data_bytes_per_chain; + + /* Next node to enqueue buffers to relative to current process node. */ + u32 next_index; + + /* Free list to use to allocate new buffers. */ + u32 free_list_index; + } tx; + + struct { + /* CLIB fifo of buffer indices waiting to be unserialized. */ + u32 * buffer_fifo; + + /* Event type used to signal that RX buffers have been added to fifo. */ + uword ready_one_time_event; + } rx; + }; +} vlib_serialize_buffer_main_t; + +void serialize_open_vlib_buffer (serialize_main_t * m, struct vlib_main_t * vm, vlib_serialize_buffer_main_t * sm); +void unserialize_open_vlib_buffer (serialize_main_t * m, struct vlib_main_t * vm, vlib_serialize_buffer_main_t * sm); + +u32 serialize_close_vlib_buffer (serialize_main_t * m); +void unserialize_close_vlib_buffer (serialize_main_t * m); +void *vlib_set_buffer_free_callback (struct vlib_main_t *vm, void *fp); + +always_inline u32 +serialize_vlib_buffer_n_bytes (serialize_main_t * m) +{ + serialize_stream_t * s = &m->stream; + vlib_serialize_buffer_main_t * sm + = uword_to_pointer (m->stream.data_function_opaque, vlib_serialize_buffer_main_t *); + return sm->tx.n_total_data_bytes + s->current_buffer_index + vec_len (s->overflow_buffer); +} + +/* + */ + +/** \brief Compile time buffer trajectory tracing option + Turn this on if you run into "bad monkey" contexts, + and you want to know exactly which nodes they've visited... + See vlib/main.c... +*/ +#define VLIB_BUFFER_TRACE_TRAJECTORY 0 + +#if VLIB_BUFFER_TRACE_TRAJECTORY > 0 +#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b) (b)->pre_data[0]=0 +#else +#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b) +#endif /* VLIB_BUFFER_TRACE_TRAJECTORY */ + +#endif /* included_vlib_buffer_h */ diff --git a/vlib/vlib/buffer_funcs.h b/vlib/vlib/buffer_funcs.h new file mode 100644 index 00000000000..452cdcb26a7 --- /dev/null +++ b/vlib/vlib/buffer_funcs.h @@ -0,0 +1,602 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * buffer_funcs.h: VLIB buffer related functions/inlines + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_buffer_funcs_h +#define included_vlib_buffer_funcs_h + +#include <vppinfra/hash.h> + +/** \file + vlib buffer access methods. +*/ + + +/** \brief Translate buffer index into buffer pointer + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param buffer_index - (u32) buffer index + @return - (vlib_buffer_t *) buffer pointer +*/ +always_inline vlib_buffer_t * +vlib_get_buffer (vlib_main_t * vm, u32 buffer_index) +{ + return vlib_physmem_at_offset (&vm->physmem_main, ((uword)buffer_index) + << CLIB_LOG2_CACHE_LINE_BYTES); +} + +/** \brief Translate buffer pointer into buffer index + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param b - (void *) buffer pointer + @return - (u32) buffer index +*/ +always_inline u32 +vlib_get_buffer_index (vlib_main_t * vm, void * p) +{ + uword offset = vlib_physmem_offset_of (&vm->physmem_main, p); + ASSERT((offset % (1<<CLIB_LOG2_CACHE_LINE_BYTES)) == 0); + return offset >> CLIB_LOG2_CACHE_LINE_BYTES; +} + +/** \brief Get next buffer in buffer linklist, or zero for end of list. + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param b - (void *) buffer pointer + @return - (vlib_buffer_t *) next buffer, or NULL +*/ +always_inline vlib_buffer_t * +vlib_get_next_buffer (vlib_main_t * vm, vlib_buffer_t * b) +{ + return (b->flags & VLIB_BUFFER_NEXT_PRESENT + ? vlib_get_buffer (vm, b->next_buffer) + : 0); +} + +uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm, vlib_buffer_t * b_first); + +/** \brief Get length in bytes of the buffer chain + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param b - (void *) buffer pointer + @return - (uword) length of buffer chain +*/ +always_inline uword +vlib_buffer_length_in_chain (vlib_main_t * vm, vlib_buffer_t * b) +{ + uword l = b->current_length + b->total_length_not_including_first_buffer; + if (PREDICT_FALSE ((b->flags & (VLIB_BUFFER_NEXT_PRESENT + | VLIB_BUFFER_TOTAL_LENGTH_VALID)) + == VLIB_BUFFER_NEXT_PRESENT)) + return vlib_buffer_length_in_chain_slow_path (vm, b); + return l; +} + +/** \brief Get length in bytes of the buffer index buffer chain + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param bi - (u32) buffer index + @return - (uword) length of buffer chain +*/ +always_inline uword +vlib_buffer_index_length_in_chain (vlib_main_t * vm, u32 bi) +{ + vlib_buffer_t * b = vlib_get_buffer (vm, bi); + return vlib_buffer_length_in_chain (vm, b); +} + +/** \brief Copy buffer contents to memory + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param bi - (u32) buffer index + @param contents - (u8 *) memory, <strong>must be large enough</strong> + @return - (uword) length of buffer chain +*/ +always_inline uword +vlib_buffer_contents (vlib_main_t * vm, u32 buffer_index, u8 * contents) +{ + uword content_len = 0; + uword l; + vlib_buffer_t * b; + + while (1) + { + b = vlib_get_buffer (vm, buffer_index); + l = b->current_length; + memcpy (contents + content_len, b->data + b->current_data, l); + content_len += l; + if (! (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + break; + buffer_index = b->next_buffer; + } + + return content_len; +} + +/* Return physical address of buffer->data start. */ +always_inline u64 +vlib_get_buffer_data_physical_address (vlib_main_t * vm, u32 buffer_index) +{ + return vlib_physmem_offset_to_physical (&vm->physmem_main, + (((uword)buffer_index) << + CLIB_LOG2_CACHE_LINE_BYTES) + + STRUCT_OFFSET_OF (vlib_buffer_t, data)); +} + +/** \brief Prefetch buffer metadata by buffer index + The first 64 bytes of buffer contains most header information + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param bi - (u32) buffer index + @param type - LOAD, STORE. In most cases, STORE is the right answer +*/ +/* Prefetch buffer header given index. */ +#define vlib_prefetch_buffer_with_index(vm,bi,type) \ + do { \ + vlib_buffer_t * _b = vlib_get_buffer (vm, bi); \ + vlib_prefetch_buffer_header (_b, type); \ + } while (0) + +#if 0 +/* Iterate over known allocated vlib bufs. You probably do not want + * to do this! + @param vm the vlib_main_t + @param bi found allocated buffer index + @param body operation to perform on buffer index + function executes body for each allocated buffer index + */ +#define vlib_buffer_foreach_allocated(vm,bi,body) \ +do { \ + vlib_main_t * _vmain = (vm); \ + vlib_buffer_main_t * _bmain = &_vmain->buffer_main; \ + hash_pair_t * _vbpair; \ + hash_foreach_pair(_vbpair, _bmain->buffer_known_hash, ({ \ + if (VLIB_BUFFER_KNOWN_ALLOCATED == _vbpair->value[0]) { \ + (bi) = _vbpair->key; \ + body; \ + } \ + })); \ +} while (0) +#endif + +#if DPDK == 0 + +typedef enum { + /* Index is unknown. */ + VLIB_BUFFER_UNKNOWN, + + /* Index is known and free/allocated. */ + VLIB_BUFFER_KNOWN_FREE, + VLIB_BUFFER_KNOWN_ALLOCATED, +} vlib_buffer_known_state_t; + +always_inline vlib_buffer_known_state_t +vlib_buffer_is_known (vlib_main_t * vm, u32 buffer_index) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + ASSERT(os_get_cpu_number() == 0); + + uword * p = hash_get (bm->buffer_known_hash, buffer_index); + return p ? p[0] : VLIB_BUFFER_UNKNOWN; +} + +always_inline void +vlib_buffer_set_known_state (vlib_main_t * vm, + u32 buffer_index, + vlib_buffer_known_state_t state) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + ASSERT(os_get_cpu_number() == 0); + hash_set (bm->buffer_known_hash, buffer_index, state); +} + +/* Validates sanity of a single buffer. + Returns format'ed vector with error message if any. */ +u8 * vlib_validate_buffer (vlib_main_t * vm, u32 buffer_index, uword follow_chain); + +/* Validate an array of buffers. As above. */ +u8 * vlib_validate_buffers (vlib_main_t * vm, + u32 * buffers, + uword next_buffer_stride, + uword n_buffers, + vlib_buffer_known_state_t known_state, + uword follow_chain); + +#endif /* DPDK == 0 */ + +clib_error_t * +vlib_buffer_pool_create(vlib_main_t * vm, unsigned num_mbufs, + unsigned mbuf_size, unsigned socket_id); + +/** \brief Allocate buffers into supplied array + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param buffers - (u32 * ) buffer index array + @param n_buffers - (u32) number of buffers requested + @return - (u32) number of buffers actually allocated, may be + less than the number requested or zero +*/ +u32 vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers); + +always_inline u32 +vlib_buffer_round_size (u32 size) +{ return round_pow2 (size, sizeof (vlib_buffer_t)); } + +/** \brief Allocate buffers from specific freelist into supplied array + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param buffers - (u32 * ) buffer index array + @param n_buffers - (u32) number of buffers requested + @return - (u32) number of buffers actually allocated, may be + less than the number requested or zero +*/ +u32 vlib_buffer_alloc_from_free_list (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers, + u32 free_list_index); + +/** \brief Free buffers + Frees the entire buffer chain for each buffer + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param buffers - (u32 * ) buffer index array + @param n_buffers - (u32) number of buffers to free + +*/ +void vlib_buffer_free (vlib_main_t * vm, + /* pointer to first buffer */ + u32 * buffers, + /* number of buffers to free */ + u32 n_buffers); + +/** \brief Free buffers, does not free the buffer chain for each buffer + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param buffers - (u32 * ) buffer index array + @param n_buffers - (u32) number of buffers to free + +*/ +void vlib_buffer_free_no_next (vlib_main_t * vm, + /* pointer to first buffer */ + u32 * buffers, + /* number of buffers to free */ + u32 n_buffers); + +/** \brief Free one buffer + Shorthand to free a single buffer chain. + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param buffer_index - (u32) buffer index to free +*/ +always_inline void +vlib_buffer_free_one (vlib_main_t * vm, u32 buffer_index) +{ + vlib_buffer_free (vm, &buffer_index, /* n_buffers */ 1); +} + +/* Add/delete buffer free lists. */ +u32 vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes, char * fmt, ...); +void vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index); + +/* Find already existing public free list with given size or create one. */ +u32 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes, char * fmt, ...); + +always_inline vlib_buffer_free_list_t * +vlib_buffer_get_free_list (vlib_main_t * vm, u32 free_list_index) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_buffer_free_list_t * f; + + f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index); + + /* Sanity: indices must match. */ + ASSERT (f->index == free_list_index); + + return f; +} + +always_inline u32 +vlib_buffer_free_list_buffer_size (vlib_main_t * vm, u32 free_list_index) +{ + vlib_buffer_free_list_t * f = vlib_buffer_get_free_list (vm, free_list_index); + return f->n_data_bytes; +} + +void +vlib_aligned_memcpy (void * _dst, void * _src, int n_bytes); + +/* Reasonably fast buffer copy routine. */ +always_inline void +vlib_copy_buffers (u32 * dst, u32 * src, u32 n) +{ + while (n >= 4) + { + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = src[3]; + dst += 4; + src += 4; + n -= 4; + } + while (n > 0) + { + dst[0] = src[0]; + dst += 1; + src += 1; + n -= 1; + } +} + +always_inline void * +vlib_physmem_alloc_aligned (vlib_main_t * vm, clib_error_t ** error, + uword n_bytes, uword alignment) +{ + void * r = vm->os_physmem_alloc_aligned (&vm->physmem_main, n_bytes, alignment); + if (! r) + *error = clib_error_return (0, "failed to allocate %wd bytes of I/O memory", n_bytes); + else + *error = 0; + return r; +} + +/* By default allocate I/O memory with cache line alignment. */ +always_inline void * +vlib_physmem_alloc (vlib_main_t * vm, clib_error_t ** error, uword n_bytes) +{ return vlib_physmem_alloc_aligned (vm, error, n_bytes, CLIB_CACHE_LINE_BYTES); } + +always_inline void +vlib_physmem_free (vlib_main_t * vm, void * mem) +{ return vm->os_physmem_free (mem); } + +always_inline u64 +vlib_physmem_virtual_to_physical (vlib_main_t * vm, void * mem) +{ + vlib_physmem_main_t * pm = &vm->physmem_main; + uword o = pointer_to_uword (mem) - pm->virtual.start; + return vlib_physmem_offset_to_physical (pm, o); +} + +/* Append given data to end of buffer, possibly allocating new buffers. */ +u32 vlib_buffer_add_data (vlib_main_t * vm, + u32 free_list_index, + u32 buffer_index, + void * data, u32 n_data_bytes); + +format_function_t format_vlib_buffer, format_vlib_buffer_and_data, format_vlib_buffer_contents; + +typedef struct { + /* Vector of packet data. */ + u8 * packet_data; + +#if DPDK == 0 + /* Number of buffers to allocate in each call to physmem + allocator. */ + u32 min_n_buffers_each_physmem_alloc; + + /* Buffer free list for this template. */ + u32 free_list_index; + + u32 * free_buffers; +#endif +} vlib_packet_template_t; + +void vlib_packet_template_get_packet_helper (vlib_main_t * vm, + vlib_packet_template_t * t); + +void vlib_packet_template_init (vlib_main_t * vm, + vlib_packet_template_t * t, + void * packet_data, + uword n_packet_data_bytes, + uword min_n_buffers_each_physmem_alloc, + char * fmt, ...); + +void * +vlib_packet_template_get_packet (vlib_main_t * vm, + vlib_packet_template_t * t, + u32 * bi_result); + +always_inline void +vlib_packet_template_free (vlib_main_t * vm, vlib_packet_template_t * t) +{ + vec_free (t->packet_data); +} + +always_inline u32 +unserialize_vlib_buffer_n_bytes (serialize_main_t * m) +{ + serialize_stream_t * s = &m->stream; + vlib_serialize_buffer_main_t * sm + = uword_to_pointer (m->stream.data_function_opaque, vlib_serialize_buffer_main_t *); + vlib_main_t * vm = sm->vlib_main; + u32 n, * f; + + n = s->n_buffer_bytes - s->current_buffer_index; + if (sm->last_buffer != ~0) + { + vlib_buffer_t * b = vlib_get_buffer (vm, sm->last_buffer); + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b = vlib_get_buffer (vm, b->next_buffer); + n += b->current_length; + } + } + + clib_fifo_foreach (f, sm->rx.buffer_fifo, ({ + n += vlib_buffer_index_length_in_chain (vm, f[0]); + })); + + return n; +} + +typedef union { + vlib_buffer_t b; + vlib_copy_unit_t i[sizeof (vlib_buffer_t) / sizeof (vlib_copy_unit_t)]; +} vlib_buffer_union_t; + +/* Set a buffer quickly into "uninitialized" state. We want this to + be extremely cheap and arrange for all fields that need to be + initialized to be in the first 128 bits of the buffer. */ +always_inline void +vlib_buffer_init_for_free_list (vlib_buffer_t * _dst, + vlib_buffer_free_list_t * fl) +{ + vlib_buffer_union_t * dst = (vlib_buffer_union_t *) _dst; + vlib_buffer_union_t * src = (vlib_buffer_union_t *) &fl->buffer_init_template; + + /* Make sure buffer template is sane. */ + ASSERT (fl->index == fl->buffer_init_template.free_list_index); + + /* Copy template from src->current_data thru src->free_list_index */ + dst->i[0] = src->i[0]; + if (1 * sizeof (dst->i[0]) < 16) + dst->i[1] = src->i[1]; + if (2 * sizeof (dst->i[0]) < 16) + dst->i[2] = src->i[2]; + + /* Make sure it really worked. */ +#define _(f) ASSERT (dst->b.f == src->b.f) + _ (current_data); + _ (current_length); + _ (flags); + _ (free_list_index); +#undef _ + ASSERT (dst->b.total_length_not_including_first_buffer == 0); +} + +always_inline void +vlib_buffer_init_two_for_free_list (vlib_buffer_t * _dst0, + vlib_buffer_t * _dst1, + vlib_buffer_free_list_t * fl) +{ + vlib_buffer_union_t * dst0 = (vlib_buffer_union_t *) _dst0; + vlib_buffer_union_t * dst1 = (vlib_buffer_union_t *) _dst1; + vlib_buffer_union_t * src = (vlib_buffer_union_t *) &fl->buffer_init_template; + + /* Make sure buffer template is sane. */ + ASSERT (fl->index == fl->buffer_init_template.free_list_index); + + /* Copy template from src->current_data thru src->free_list_index */ + dst0->i[0] = dst1->i[0] = src->i[0]; + if (1 * sizeof (dst0->i[0]) < 16) + dst0->i[1] = dst1->i[1] = src->i[1]; + if (2 * sizeof (dst0->i[0]) < 16) + dst0->i[2] = dst1->i[2] = src->i[2]; + + /* Make sure it really worked. */ +#define _(f) ASSERT (dst0->b.f == src->b.f && dst1->b.f == src->b.f) + _ (current_data); + _ (current_length); + _ (flags); + _ (free_list_index); +#undef _ + ASSERT (dst0->b.total_length_not_including_first_buffer == 0); + ASSERT (dst1->b.total_length_not_including_first_buffer == 0); +} + +#if CLIB_DEBUG > 0 +u32 * vlib_buffer_state_validation_lock; +uword * vlib_buffer_state_validation_hash; +void * vlib_buffer_state_heap; +#endif + +static inline void +vlib_validate_buffer_in_use (vlib_buffer_t * b, u32 expected) +{ +#if CLIB_DEBUG > 0 + uword * p; + void * oldheap; + + oldheap = clib_mem_set_heap (vlib_buffer_state_heap); + + while (__sync_lock_test_and_set (vlib_buffer_state_validation_lock, 1)) + ; + + p = hash_get (vlib_buffer_state_validation_hash, b); + + /* If we don't know about b, declare it to be in the expected state */ + if (!p) + { + hash_set (vlib_buffer_state_validation_hash, b, expected); + goto out; + } + + if (p[0] != expected) + { + void cj_stop(void); + u32 bi; + vlib_main_t * vm = &vlib_global_main; + + cj_stop(); + + bi = vlib_get_buffer_index (vm, b); + + clib_mem_set_heap (oldheap); + clib_warning ("%.6f buffer %llx (%d): %s, not %s", + vlib_time_now(vm), bi, + p[0] ? "busy" : "free", + expected ? "busy" : "free"); + os_panic(); + } + out: + CLIB_MEMORY_BARRIER(); + *vlib_buffer_state_validation_lock = 0; + clib_mem_set_heap (oldheap); +#endif +} + +static inline void +vlib_validate_buffer_set_in_use (vlib_buffer_t * b, u32 expected) +{ +#if CLIB_DEBUG > 0 + void * oldheap; + + oldheap = clib_mem_set_heap (vlib_buffer_state_heap); + + while (__sync_lock_test_and_set (vlib_buffer_state_validation_lock, 1)) + ; + + hash_set (vlib_buffer_state_validation_hash, b, expected); + + CLIB_MEMORY_BARRIER(); + *vlib_buffer_state_validation_lock = 0; + clib_mem_set_heap (oldheap); +#endif +} + +#endif /* included_vlib_buffer_funcs_h */ diff --git a/vlib/vlib/buffer_node.h b/vlib/vlib/buffer_node.h new file mode 100644 index 00000000000..0fa5c8093ca --- /dev/null +++ b/vlib/vlib/buffer_node.h @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * buffer_node.h: VLIB buffer handling node helper macros/inlines + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_buffer_node_h +#define included_vlib_buffer_node_h + +#define vlib_validate_buffer_enqueue_x2(vm,node,next_index,to_next,n_left_to_next,bi0,bi1,next0,next1) \ +do { \ + int enqueue_code = (next0 != next_index) + 2*(next1 != next_index); \ + \ + if (PREDICT_FALSE (enqueue_code != 0)) \ + { \ + switch (enqueue_code) \ + { \ + case 1: \ + /* A B A */ \ + to_next[-2] = bi1; \ + to_next -= 1; \ + n_left_to_next += 1; \ + vlib_set_next_frame_buffer (vm, node, next0, bi0); \ + break; \ + \ + case 2: \ + /* A A B */ \ + to_next -= 1; \ + n_left_to_next += 1; \ + vlib_set_next_frame_buffer (vm, node, next1, bi1); \ + break; \ + \ + case 3: \ + /* A B B or A B C */ \ + to_next -= 2; \ + n_left_to_next += 2; \ + vlib_set_next_frame_buffer (vm, node, next0, bi0); \ + vlib_set_next_frame_buffer (vm, node, next1, bi1); \ + if (next0 == next1) \ + { \ + vlib_put_next_frame (vm, node, next_index, \ + n_left_to_next); \ + next_index = next1; \ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \ + } \ + } \ + } \ +} while (0) + +#define vlib_validate_buffer_enqueue_x1(vm,node,next_index,to_next,n_left_to_next,bi0,next0) \ +do { \ + if (PREDICT_FALSE (next0 != next_index)) \ + { \ + vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); \ + next_index = next0; \ + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); \ + \ + to_next[0] = bi0; \ + to_next += 1; \ + n_left_to_next -= 1; \ + } \ +} while (0) + +always_inline uword +generic_buffer_node_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame, + uword sizeof_trace, + void * opaque1, + uword opaque2, + void (* two_buffers) (vlib_main_t * vm, + void * opaque1, + uword opaque2, + vlib_buffer_t * b0, vlib_buffer_t * b1, + u32 * next0, u32 * next1), + void (* one_buffer) (vlib_main_t * vm, + void * opaque1, + uword opaque2, + vlib_buffer_t * b0, + u32 * next0)) +{ + u32 n_left_from, * from, * to_next; + u32 next_index; + + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + next_index = node->cached_next_index; + + if (node->flags & VLIB_NODE_FLAG_TRACE) + vlib_trace_frame_buffers_only (vm, node, from, frame->n_vectors, + /* stride */ 1, sizeof_trace); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, + to_next, n_left_to_next); + + while (n_left_from >= 4 && n_left_to_next >= 2) + { + vlib_buffer_t * p0, * p1; + u32 pi0, next0; + u32 pi1, next1; + + /* Prefetch next iteration. */ + { + vlib_buffer_t * p2, * p3; + + p2 = vlib_get_buffer (vm, from[2]); + p3 = vlib_get_buffer (vm, from[3]); + + vlib_prefetch_buffer_header (p2, LOAD); + vlib_prefetch_buffer_header (p3, LOAD); + + CLIB_PREFETCH (p2->data, 64, LOAD); + CLIB_PREFETCH (p3->data, 64, LOAD); + } + + pi0 = to_next[0] = from[0]; + pi1 = to_next[1] = from[1]; + from += 2; + to_next += 2; + n_left_from -= 2; + n_left_to_next -= 2; + + p0 = vlib_get_buffer (vm, pi0); + p1 = vlib_get_buffer (vm, pi1); + + two_buffers (vm, opaque1, opaque2, p0, p1, &next0, &next1); + + vlib_validate_buffer_enqueue_x2 (vm, node, next_index, + to_next, n_left_to_next, + pi0, pi1, next0, next1); + } + + while (n_left_from > 0 && n_left_to_next > 0) + { + vlib_buffer_t * p0; + u32 pi0, next0; + + pi0 = from[0]; + to_next[0] = pi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + p0 = vlib_get_buffer (vm, pi0); + + one_buffer (vm, opaque1, opaque2, p0, &next0); + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, + to_next, n_left_to_next, + pi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return frame->n_vectors; +} + +#endif /* included_vlib_buffer_node_h */ diff --git a/vlib/vlib/cli.c b/vlib/vlib/cli.c new file mode 100644 index 00000000000..e5163f260e1 --- /dev/null +++ b/vlib/vlib/cli.c @@ -0,0 +1,1015 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * cli.c: command line interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> + +/* Root of all show commands. */ +VLIB_CLI_COMMAND (vlib_cli_show_command, static) = { + .path = "show", + .short_help = "Show commands", +}; + +/* Root of all clear commands. */ +VLIB_CLI_COMMAND (vlib_cli_clear_command, static) = { + .path = "clear", + .short_help = "Clear commands", +}; + +/* Root of all set commands. */ +VLIB_CLI_COMMAND (vlib_cli_set_command, static) = { + .path = "set", + .short_help = "Set commands", +}; + +/* Root of all test commands. */ +VLIB_CLI_COMMAND (vlib_cli_test_command, static) = { + .path = "test", + .short_help = "Test commands", +}; + +/* Returns bitmap of commands which match key. */ +static uword * +vlib_cli_sub_command_match (vlib_cli_command_t * c, unformat_input_t * input) +{ + int i, n; + uword * match = 0; + vlib_cli_parse_position_t * p; + + unformat_skip_white_space (input); + + for (i = 0; ; i++) + { + uword k; + + k = unformat_get_input (input); + switch (k) + { + case 'a' ... 'z': + case 'A' ... 'Z': + case '0' ... '9': + case '-': case '_': + break; + + case ' ': case '\t': case '\r': case '\n': + case UNFORMAT_END_OF_INPUT: + /* White space or end of input removes any non-white + matches that were before possible. */ + if (i < vec_len (c->sub_command_positions) + && clib_bitmap_count_set_bits (match) > 1) + { + p = vec_elt_at_index (c->sub_command_positions, i); + for (n = 0; n < vec_len (p->bitmaps); n++) + match = clib_bitmap_andnot (match, p->bitmaps[n]); + } + goto done; + + default: + unformat_put_input (input); + goto done; + } + + if (i >= vec_len (c->sub_command_positions)) + { + no_match: + clib_bitmap_free (match); + return 0; + } + + p = vec_elt_at_index (c->sub_command_positions, i); + if (vec_len (p->bitmaps) == 0) + goto no_match; + + n = k - p->min_char; + if (n < 0 || n >= vec_len (p->bitmaps)) + goto no_match; + + if (i == 0) + match = clib_bitmap_dup (p->bitmaps[n]); + else + match = clib_bitmap_and (match, p->bitmaps[n]); + + if (clib_bitmap_is_zero (match)) + goto no_match; + } + + done: + return match; +} + +/* Looks for string based sub-input formatted { SUB-INPUT }. */ +static uword unformat_vlib_cli_sub_input (unformat_input_t * i, va_list * args) +{ + unformat_input_t * sub_input = va_arg (*args, unformat_input_t *); + u8 * s; + uword c; + + while (1) + { + c = unformat_get_input (i); + switch (c) + { + case ' ': case '\t': + case '\n': case '\r': + case '\f': + break; + + case '{': + default: + /* Put back paren. */ + if (c != UNFORMAT_END_OF_INPUT) + unformat_put_input (i); + + if (c == '{' && unformat (i, "%v", &s)) + { + unformat_init_vector (sub_input, s); + return 1; + } + return 0; + } + } + return 0; +} + +static vlib_cli_command_t * +get_sub_command (vlib_cli_main_t * cm, vlib_cli_command_t * parent, u32 si) +{ + vlib_cli_sub_command_t * s = vec_elt_at_index (parent->sub_commands, si); + return vec_elt_at_index (cm->commands, s->index); +} + +static uword unformat_vlib_cli_sub_command (unformat_input_t * i, va_list * args) +{ + vlib_main_t * vm = va_arg (*args, vlib_main_t *); + vlib_cli_command_t * c = va_arg (*args, vlib_cli_command_t *); + vlib_cli_command_t ** result = va_arg (*args, vlib_cli_command_t **); + vlib_cli_main_t * cm = &vm->cli_main; + uword * match_bitmap, is_unique, index; + + { + vlib_cli_sub_rule_t * sr; + vlib_cli_parse_rule_t * r; + vec_foreach (sr, c->sub_rules) + { + void ** d; + r = vec_elt_at_index (cm->parse_rules, sr->rule_index); + vec_add2 (cm->parse_rule_data, d, 1); + vec_reset_length (d[0]); + if (r->data_size) + d[0] = _vec_resize (d[0], + /* length increment */ 1, + r->data_size, + /* header_bytes */ 0, + /* data align */ sizeof (uword)); + if (unformat_user (i, r->unformat_function, vm, d[0])) + { + *result = vec_elt_at_index (cm->commands, sr->command_index); + return 1; + } + } + } + + match_bitmap = vlib_cli_sub_command_match (c, i); + is_unique = clib_bitmap_count_set_bits (match_bitmap) == 1; + index = ~0; + if (is_unique) + { + index = clib_bitmap_first_set (match_bitmap); + *result = get_sub_command (cm, c, index); + } + clib_bitmap_free (match_bitmap); + + return is_unique; +} + +static u8 * format_vlib_cli_command_help (u8 * s, va_list * args) +{ + vlib_cli_command_t * c = va_arg (*args, vlib_cli_command_t *); + int is_long = va_arg (*args, int); + if (is_long && c->long_help) + s = format (s, "%s", c->long_help); + else if (c->short_help) + s = format (s, "%s", c->short_help); + else + s = format (s, "%v commands", c->path); + return s; +} + +static u8 * format_vlib_cli_parse_rule_name (u8 * s, va_list * args) +{ + vlib_cli_parse_rule_t * r = va_arg (*args, vlib_cli_parse_rule_t *); + return format (s, "<%U>", format_c_identifier, r->name); +} + +static u8 * format_vlib_cli_path (u8 * s, va_list * args) +{ + u8 * path = va_arg (*args, u8 *); + int i, in_rule; + in_rule = 0; + for (i = 0; i < vec_len (path); i++) + { + switch (path[i]) + { + case '%': + in_rule = 1; + vec_add1 (s, '<'); /* start of <RULE> */ + break; + + case '_': + /* _ -> space in rules. */ + vec_add1 (s, in_rule ? ' ' : '_'); + break; + + case ' ': + if (in_rule) + { + vec_add1 (s, '>'); /* end of <RULE> */ + in_rule = 0; + } + vec_add1 (s, ' '); + break; + + default: + vec_add1 (s, path[i]); + break; + } + } + + if (in_rule) + vec_add1 (s, '>'); /* terminate <RULE> */ + + return s; +} + +static vlib_cli_command_t * +all_subs (vlib_cli_main_t * cm, + vlib_cli_command_t * subs, + u32 command_index) +{ + vlib_cli_command_t * c = vec_elt_at_index (cm->commands, command_index); + vlib_cli_sub_command_t * sc; + vlib_cli_sub_rule_t * sr; + + if (c->function) + vec_add1 (subs, c[0]); + + vec_foreach (sr, c->sub_rules) + subs = all_subs (cm, subs, sr->command_index); + vec_foreach (sc, c->sub_commands) + subs = all_subs (cm, subs, sc->index); + + return subs; +} + +static clib_error_t * +vlib_cli_dispatch_sub_commands (vlib_main_t * vm, + vlib_cli_main_t * cm, + unformat_input_t * input, + uword parent_command_index) +{ + vlib_cli_command_t * parent, * c; + clib_error_t * error = 0; + unformat_input_t sub_input; + u8 * string; + uword is_main_dispatch = cm == &vm->cli_main; + + parent = vec_elt_at_index (cm->commands, parent_command_index); + if (is_main_dispatch && unformat (input, "help")) + { + uword help_at_end_of_line, i; + + help_at_end_of_line = unformat_check_input (input) == UNFORMAT_END_OF_INPUT; + while (1) + { + c = parent; + if (unformat_user (input, unformat_vlib_cli_sub_command, vm, c, &parent)) + ; + + else if (! unformat_check_input (input) == UNFORMAT_END_OF_INPUT) + goto unknown; + + else + break; + } + + /* help SUB-COMMAND => long format help. + "help" at end of line: show all commands. */ + if (! help_at_end_of_line) + vlib_cli_output (vm, "%U", format_vlib_cli_command_help, c, /* is_long */ 1); + + else if (vec_len (c->sub_commands) + vec_len (c->sub_rules) == 0) + vlib_cli_output (vm, "%v: no sub-commands", c->path); + + else + { + vlib_cli_sub_command_t * sc; + vlib_cli_sub_rule_t * sr, * subs; + + subs = vec_dup (c->sub_rules); + + /* Add in rules if any. */ + vec_foreach (sc, c->sub_commands) + { + vec_add2 (subs, sr, 1); + sr->name = sc->name; + sr->command_index = sc->index; + sr->rule_index = ~0; + } + + vec_sort (subs, c1, c2, vec_cmp (c1->name, c2->name)); + + for (i = 0; i < vec_len (subs); i++) + { + vlib_cli_command_t * d; + vlib_cli_parse_rule_t * r; + + d = vec_elt_at_index (cm->commands, subs[i].command_index); + r = subs[i].rule_index != ~0 ? vec_elt_at_index (cm->parse_rules, subs[i].rule_index) : 0; + + if (r) + vlib_cli_output + (vm, " %-30U %U", + format_vlib_cli_parse_rule_name, r, + format_vlib_cli_command_help, d, /* is_long */ 0); + else + vlib_cli_output + (vm, " %-30v %U", + subs[i].name, + format_vlib_cli_command_help, d, /* is_long */ 0); + } + + vec_free (subs); + } + } + + else if (is_main_dispatch && (unformat (input, "choices") || unformat (input, "?"))) + { + vlib_cli_command_t * sub, * subs; + + subs = all_subs (cm, 0, parent_command_index); + vec_sort (subs, c1, c2, vec_cmp (c1->path, c2->path)); + vec_foreach (sub, subs) + vlib_cli_output (vm, " %-40U %U", + format_vlib_cli_path, sub->path, + format_vlib_cli_command_help, sub, /* is_long */ 0); + vec_free (subs); + } + + else if (unformat (input, "comment %v", &string)) + { + vec_free (string); + } + + else if (unformat (input, "uncomment %U", + unformat_vlib_cli_sub_input, &sub_input)) + { + error = vlib_cli_dispatch_sub_commands (vm, cm, &sub_input, parent_command_index); + unformat_free (&sub_input); + } + + else if (unformat_user (input, unformat_vlib_cli_sub_command, vm, parent, &c)) + { + unformat_input_t * si; + uword has_sub_commands = vec_len (c->sub_commands) + vec_len (c->sub_rules) > 0; + + si = input; + if (unformat_user (input, unformat_vlib_cli_sub_input, &sub_input)) + si = &sub_input; + + if (has_sub_commands) + error = vlib_cli_dispatch_sub_commands (vm, cm, si, c - cm->commands); + + if (has_sub_commands && ! error) + /* Found valid sub-command. */; + + else if (c->function) + { + clib_error_t * c_error; + + /* Skip white space for benefit of called function. */ + unformat_skip_white_space (si); + + if (unformat (si, "?")) + { + vlib_cli_output (vm, " %-40U %U", + format_vlib_cli_path, c->path, + format_vlib_cli_command_help, c, /* is_long */ 0); + } + else + { + if (!c->is_mp_safe) + vlib_worker_thread_barrier_sync(vm); + + c_error = c->function (vm, si, c); + + if (!c->is_mp_safe) + vlib_worker_thread_barrier_release(vm); + + if (c_error) + { + error = clib_error_return (0, "%v: %v", c->path, c_error->what); + clib_error_free (c_error); + /* Free sub input. */ + if (si != input) + unformat_free (si); + + return error; + } + } + + /* Free any previous error. */ + clib_error_free (error); + } + + else if (! error) + error = clib_error_return (0, "%v: no sub-commands", c->path); + + /* Free sub input. */ + if (si != input) + unformat_free (si); + } + + else + goto unknown; + + return error; + + unknown: + if (parent->path) + return clib_error_return (0, "%v: unknown input `%U'", parent->path, format_unformat_error, input); + else + return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); +} + + +void vlib_unix_error_report (vlib_main_t *, clib_error_t *) + __attribute__ ((weak)); + +void vlib_unix_error_report (vlib_main_t * vm, clib_error_t * error) { } + +/* Process CLI input. */ +void vlib_cli_input (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_output_function_t * function, + uword function_arg) +{ + vlib_cli_main_t * cm = &vm->cli_main; + clib_error_t * error; + vlib_cli_output_function_t * save_function; + uword save_function_arg; + + save_function = cm->output_function; + save_function_arg = cm->output_function_arg; + + cm->output_function = function; + cm->output_function_arg = function_arg; + + do { + vec_reset_length (cm->parse_rule_data); + error = vlib_cli_dispatch_sub_commands (vm, &vm->cli_main, input, /* parent */ 0); + } while (! error && ! unformat (input, "%U", unformat_eof)); + + if (error) + { + vlib_cli_output (vm, "%v", error->what); + vlib_unix_error_report (vm, error); + clib_error_free (error); + } + + cm->output_function = save_function; + cm->output_function_arg = save_function_arg; +} + +/* Output to current CLI connection. */ +void vlib_cli_output (vlib_main_t * vm, char * fmt, ...) +{ + vlib_cli_main_t * cm = &vm->cli_main; + va_list va; + u8 * s; + + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + + /* Terminate with \n if not present. */ + if (vec_len (s) > 0 && s[vec_len (s)-1] != '\n') + vec_add1 (s, '\n'); + + if (! cm->output_function) + fformat (stdout, "%v", s); + else + cm->output_function (cm->output_function_arg, s, vec_len (s)); + + vec_free (s); +} + +static clib_error_t * +show_memory_usage (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int verbose = 0; + clib_error_t * error; + u32 index = 0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "verbose")) + verbose = 1; + else { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + return error; + } + } + + foreach_vlib_main ( + ({ + vlib_cli_output (vm, "Thread %d %v\n", index, vlib_worker_threads[index].name); + vlib_cli_output (vm, "%U\n", format_mheap, clib_per_cpu_mheaps[index], verbose); + index++; + })); + return 0; +} + +VLIB_CLI_COMMAND (show_memory_usage_command, static) = { + .path = "show memory", + .short_help = "Show current memory usage", + .function = show_memory_usage, +}; + +static clib_error_t * +enable_disable_memory_trace (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t * error = 0; + int enable; + + if (! unformat_user (input, unformat_vlib_enable_disable, &enable)) + { + error = clib_error_return (0, "expecting enable/on or disable/off"); + goto done; + } + + clib_mem_trace (enable); + + done: + return error; +} + +VLIB_CLI_COMMAND (enable_disable_memory_trace_command, static) = { + .path = "memory-trace", + .short_help = "Enable/disable memory allocation trace", + .function = enable_disable_memory_trace, +}; + + +static clib_error_t * +test_heap_validate (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + clib_error_t * error = 0; + void * heap; + mheap_t *mheap; + + if (unformat(input, "on")) { + foreach_vlib_main({ + heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index]; + mheap = mheap_header(heap); + mheap->flags |= MHEAP_FLAG_VALIDATE; + // Turn off small object cache because it delays detection of errors + mheap->flags &= ~MHEAP_FLAG_SMALL_OBJECT_CACHE; + }); + + } else if (unformat(input, "off")) { + foreach_vlib_main({ + heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index]; + mheap = mheap_header(heap); + mheap->flags &= ~MHEAP_FLAG_VALIDATE; + mheap->flags |= MHEAP_FLAG_SMALL_OBJECT_CACHE; + }); + + } else if (unformat(input, "now")) { + foreach_vlib_main({ + heap = clib_per_cpu_mheaps[this_vlib_main->cpu_index]; + mheap = mheap_header(heap); + mheap_validate(heap); + }); + vlib_cli_output(vm, "heap validation complete"); + + } else { + return clib_error_return(0, "unknown input `%U'", + format_unformat_error, input); + } + + return error; +} + +VLIB_CLI_COMMAND (cmd_test_heap_validate,static) = { + .path = "test heap-validate", + .short_help = "<on/off/now> validate heap on future allocs/frees or right now", + .function = test_heap_validate, +}; + + +static uword vlib_cli_normalize_path (char * input, char ** result) +{ + char * i = input; + char * s = 0; + uword l = 0; + uword index_of_last_space = ~0; + + while (*i != 0) + { + u8 c = *i++; + /* Multiple white space -> single space. */ + switch (c) + { + case ' ': + case '\t': + case '\n': + case '\r': + if (l > 0 && s[l-1] != ' ') + { + vec_add1 (s, ' '); + l++; + } + break; + + default: + if (l > 0 && s[l-1] == ' ') + index_of_last_space = vec_len (s); + vec_add1 (s, c); + l++; + break; + } + } + + /* Remove any extra space at end. */ + if (l > 0 && s[l-1] == ' ') + _vec_len (s) -= 1; + + *result = s; + return index_of_last_space; +} + +always_inline uword +parent_path_len (char * path) +{ + word i; + for (i = vec_len (path) - 1; i >= 0; i--) + { + if (path[i] == ' ') + return i; + } + return ~0; +} + +static void add_sub_command (vlib_cli_main_t * cm, + uword parent_index, + uword child_index) +{ + vlib_cli_command_t * p, * c; + vlib_cli_sub_command_t * sub_c; + u8 * sub_name; + word i, l; + + p = vec_elt_at_index (cm->commands, parent_index); + c = vec_elt_at_index (cm->commands, child_index); + + l = parent_path_len (c->path); + if (l == ~0) + sub_name = vec_dup ((u8 *) c->path); + else + { + ASSERT (l + 1 < vec_len (c->path)); + sub_name = 0; + vec_add (sub_name, c->path + l + 1, vec_len (c->path) - (l + 1)); + } + + if (sub_name[0] == '%') + { + uword * q; + vlib_cli_sub_rule_t * sr; + + /* Remove %. */ + vec_delete (sub_name, 1, 0); + + if (! p->sub_rule_index_by_name) + p->sub_rule_index_by_name + = hash_create_vec (/* initial length */ 32, + sizeof (sub_name[0]), + sizeof (uword)); + q = hash_get_mem (p->sub_rule_index_by_name, sub_name); + if (q) + { + sr = vec_elt_at_index (p->sub_rules, q[0]); + ASSERT (sr->command_index == child_index); + return; + } + + q = hash_get_mem (cm->parse_rule_index_by_name, sub_name); + if (! q) + clib_error ("reference to unknown rule `%%%v' in path `%v'", + sub_name, c->path); + + hash_set_mem (p->sub_rule_index_by_name, sub_name, vec_len (p->sub_rules)); + vec_add2 (p->sub_rules, sr, 1); + sr->name = sub_name; + sr->rule_index = q[0]; + sr->command_index = child_index; + return; + } + + if (! p->sub_command_index_by_name) + p->sub_command_index_by_name + = hash_create_vec (/* initial length */ 32, + sizeof (c->path[0]), + sizeof (uword)); + + /* Check if sub-command has already been created. */ + if (hash_get_mem (p->sub_command_index_by_name, sub_name)) + { + vec_free (sub_name); + return; + } + + vec_add2 (p->sub_commands, sub_c, 1); + sub_c->index = child_index; + sub_c->name = sub_name; + hash_set_mem (p->sub_command_index_by_name, sub_c->name, sub_c - p->sub_commands); + + vec_validate (p->sub_command_positions, vec_len (sub_c->name) - 1); + for (i = 0; i < vec_len (sub_c->name); i++) + { + int n; + vlib_cli_parse_position_t * pos; + + pos = vec_elt_at_index (p->sub_command_positions, i); + + if (! pos->bitmaps) + pos->min_char = sub_c->name[i]; + + n = sub_c->name[i] - pos->min_char; + if (n < 0) + { + pos->min_char = sub_c->name[i]; + vec_insert (pos->bitmaps, -n, 0); + n = 0; + } + + vec_validate (pos->bitmaps, n); + pos->bitmaps[n] = clib_bitmap_ori (pos->bitmaps[n], sub_c - p->sub_commands); + } +} + +static void +vlib_cli_make_parent (vlib_cli_main_t * cm, uword ci) +{ + uword p_len, pi, * p; + char * p_path; + vlib_cli_command_t * c, * parent; + + /* Root command (index 0) should have already been added. */ + ASSERT (vec_len (cm->commands) > 0); + + c = vec_elt_at_index (cm->commands, ci); + p_len = parent_path_len (c->path); + + /* No space? Parent is root command. */ + if (p_len == ~0) + { + add_sub_command (cm, 0, ci); + return; + } + + p_path = 0; + vec_add (p_path, c->path, p_len); + + p = hash_get_mem (cm->command_index_by_path, p_path); + + /* Parent exists? */ + if (! p) + { + /* Parent does not exist; create it. */ + vec_add2 (cm->commands, parent, 1); + parent->path = p_path; + hash_set_mem (cm->command_index_by_path, parent->path, parent - cm->commands); + pi = parent - cm->commands; + } + else + { + pi = p[0]; + vec_free (p_path); + } + + add_sub_command (cm, pi, ci); + + /* Create parent's parent. */ + if (! p) + vlib_cli_make_parent (cm, pi); +} + +always_inline uword +vlib_cli_command_is_empty (vlib_cli_command_t * c) +{ + return (c->long_help == 0 + && c->short_help == 0 + && c->function == 0); +} + +clib_error_t * vlib_cli_register (vlib_main_t * vm, vlib_cli_command_t * c) +{ + vlib_cli_main_t * cm = &vm->cli_main; + clib_error_t * error = 0; + uword ci, * p; + char * normalized_path; + + if ((error = vlib_call_init_function (vm, vlib_cli_init))) + return error; + + (void) vlib_cli_normalize_path (c->path, &normalized_path); + + if (! cm->command_index_by_path) + cm->command_index_by_path = hash_create_vec (/* initial length */ 32, + sizeof (c->path[0]), + sizeof (uword)); + + /* See if command already exists with given path. */ + p = hash_get_mem (cm->command_index_by_path, normalized_path); + if (p) + { + vlib_cli_command_t * d; + + ci = p[0]; + d = vec_elt_at_index (cm->commands, ci); + + /* If existing command was created via vlib_cli_make_parent + replaced it with callers data. */ + if (vlib_cli_command_is_empty (d)) + { + vlib_cli_command_t save = d[0]; + + ASSERT (! vlib_cli_command_is_empty (c)); + + /* Copy callers fields. */ + d[0] = c[0]; + + /* Save internal fields. */ + d->path = save.path; + d->sub_commands = save.sub_commands; + d->sub_command_index_by_name = save.sub_command_index_by_name; + d->sub_command_positions = save.sub_command_positions; + d->sub_rules = save.sub_rules; + } + else + error = clib_error_return (0, "duplicate command name with path %v", normalized_path); + + vec_free (normalized_path); + if (error) + return error; + } + else + { + /* Command does not exist: create it. */ + + /* Add root command (index 0). */ + if (vec_len (cm->commands) == 0) + { + /* Create command with index 0; path is empty string. */ + vec_resize (cm->commands, 1); + } + + ci = vec_len (cm->commands); + hash_set_mem (cm->command_index_by_path, normalized_path, ci); + vec_add1 (cm->commands, c[0]); + + c = vec_elt_at_index (cm->commands, ci); + c->path = normalized_path; + + /* Don't inherit from registration. */ + c->sub_commands = 0; + c->sub_command_index_by_name = 0; + c->sub_command_positions = 0; + } + + vlib_cli_make_parent (cm, ci); + return 0; +} + +clib_error_t * +vlib_cli_register_parse_rule (vlib_main_t * vm, vlib_cli_parse_rule_t * r_reg) +{ + vlib_cli_main_t * cm = &vm->cli_main; + vlib_cli_parse_rule_t * r; + clib_error_t * error = 0; + u8 * r_name; + uword * p; + + if (! cm->parse_rule_index_by_name) + cm->parse_rule_index_by_name = hash_create_vec (/* initial length */ 32, + sizeof (r->name[0]), + sizeof (uword)); + + /* Make vector copy of name. */ + r_name = format (0, "%s", r_reg->name); + + if ((p = hash_get_mem (cm->parse_rule_index_by_name, r_name))) + { + vec_free (r_name); + return clib_error_return (0, "duplicate parse rule name `%s'", r_reg->name); + } + + vec_add2 (cm->parse_rules, r, 1); + r[0] = r_reg[0]; + r->name = (char *) r_name; + hash_set_mem (cm->parse_rule_index_by_name, r->name, r - cm->parse_rules); + + return error; +} + +#if 0 +/* $$$ turn back on again someday, maybe */ +static clib_error_t * +vlib_cli_register_parse_rules (vlib_main_t * vm, + vlib_cli_parse_rule_t * lo, + vlib_cli_parse_rule_t * hi) + + __attribute__((unused)) +{ + clib_error_t * error = 0; + vlib_cli_parse_rule_t * r; + + for (r = lo; r < hi; r = clib_elf_section_data_next (r, 0)) + { + if (! r->name || strlen (r->name) == 0) + { + error = clib_error_return (0, "parse rule with no name"); + goto done; + } + + error = vlib_cli_register_parse_rule (vm, r); + if (error) + goto done; + } + + done: + return error; +} +#endif + +static clib_error_t * vlib_cli_init (vlib_main_t * vm) +{ + vlib_cli_main_t * cm = &vm->cli_main; + clib_error_t * error = 0; + vlib_cli_command_t * cmd; + + cmd = cm->cli_command_registrations; + + while (cmd) + { + error = vlib_cli_register (vm, cmd); + if (error) + return error; + cmd = cmd->next_cli_command; + } + return error; +} + +VLIB_INIT_FUNCTION (vlib_cli_init); diff --git a/vlib/vlib/cli.h b/vlib/vlib/cli.h new file mode 100644 index 00000000000..8c802475176 --- /dev/null +++ b/vlib/vlib/cli.h @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * cli.h: command line interface + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_cli_h +#define included_vlib_cli_h + +#include <vppinfra/format.h> + +struct vlib_cli_command_t; + +typedef struct { + u32 min_char; + + /* Indexed by name[position] - min_char. */ + uword ** bitmaps; +} vlib_cli_parse_position_t; + +typedef struct { + u8 * name; + + u32 index; +} vlib_cli_sub_command_t; + +typedef struct { + u8 * name; + + u32 rule_index; + + u32 command_index; +} vlib_cli_sub_rule_t; + +typedef struct { + char * name; + char * short_help; + char * long_help; + + /* Number of bytes in parsed data. Zero for vector. */ + uword data_size; + + unformat_function_t * unformat_function; + + /* Opaque for unformat function. */ + uword unformat_function_arg[2]; +} vlib_cli_parse_rule_t; + +/* CLI command callback function. */ +typedef clib_error_t * (vlib_cli_command_function_t) + (struct vlib_main_t * vm, + unformat_input_t * input, + struct vlib_cli_command_t * cmd); + +typedef struct vlib_cli_command_t { + /* Command path (e.g. "show something"). + Spaces delimit elements of path. */ + char * path; + + /* Short/long help strings. */ + char * short_help; + char * long_help; + + /* Callback function. */ + vlib_cli_command_function_t * function; + + /* Opaque. */ + uword function_arg; + + /* Known MP-safe? */ + uword is_mp_safe; + + /* Sub commands for this command. */ + vlib_cli_sub_command_t * sub_commands; + + /* Hash table mapping name (e.g. last path element) to sub command index. */ + uword * sub_command_index_by_name; + + /* bitmap[p][c][i] says whether sub-command i has character + c in position p. */ + vlib_cli_parse_position_t * sub_command_positions; + + /* Hash table mapping name (e.g. last path element) to sub rule index. */ + uword * sub_rule_index_by_name; + + /* Vector of possible parse rules for this path. */ + vlib_cli_sub_rule_t * sub_rules; + + /* List of CLI commands, built by constructors */ + struct vlib_cli_command_t * next_cli_command; + +} vlib_cli_command_t; + +typedef void (vlib_cli_output_function_t) (uword arg, + u8 * buffer, + uword buffer_bytes); +typedef struct { + /* Current output function. */ + vlib_cli_output_function_t * output_function; + + /* Opaque data for output function. */ + uword output_function_arg; + + /* Vector of all known commands. */ + vlib_cli_command_t * commands; + + /* Hash table mapping normalized path to index into all_commands. */ + uword * command_index_by_path; + + /* Vector of all known parse rules. */ + vlib_cli_parse_rule_t * parse_rules; + + /* Hash table mapping parse rule name to index into parse_rule vector. */ + uword * parse_rule_index_by_name; + + /* Data parsed for rules. */ + void ** parse_rule_data; + + /* registration list added by constructors */ + vlib_cli_command_t *cli_command_registrations; +} vlib_cli_main_t; + +#define VLIB_CLI_COMMAND(x,...) \ + __VA_ARGS__ vlib_cli_command_t x; \ +static void __vlib_cli_command_registration_##x (void) \ + __attribute__((__constructor__)) ; \ +static void __vlib_cli_command_registration_##x (void) \ +{ \ + vlib_main_t * vm = vlib_get_main(); \ + vlib_cli_main_t *cm = &vm->cli_main; \ + x.next_cli_command = cm->cli_command_registrations; \ + cm->cli_command_registrations = &x; \ +} \ +__VA_ARGS__ vlib_cli_command_t x + + +#define VLIB_CLI_PARSE_RULE(x) \ + vlib_cli_parse_rule_t x + +/* Output to current CLI connection. */ +void vlib_cli_output (struct vlib_main_t * vm, char * fmt, ...); + +/* Process CLI input. */ +void vlib_cli_input (struct vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_output_function_t * function, + uword function_arg); + +clib_error_t * vlib_cli_register (struct vlib_main_t * vm, + vlib_cli_command_t * c); +clib_error_t * vlib_cli_register_parse_rule (struct vlib_main_t * vm, + vlib_cli_parse_rule_t * c); + +#endif /* included_vlib_cli_h */ diff --git a/vlib/vlib/cli_funcs.h b/vlib/vlib/cli_funcs.h new file mode 100644 index 00000000000..a43ed20a2c2 --- /dev/null +++ b/vlib/vlib/cli_funcs.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * cli_funcs.h: VLIB CLI related functions/inlines + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_cli_funcs_h +#define included_vlib_cli_funcs_h + +always_inline void * +vlib_cli_get_parse_rule_result (vlib_main_t * vm, uword index) +{ + vlib_cli_main_t * cm = &vm->cli_main; + return vec_elt (cm->parse_rule_data, index); +} + +#endif /* included_vlib_cli_funcs_h */ diff --git a/vlib/vlib/counter.c b/vlib/vlib/counter.c new file mode 100644 index 00000000000..1b94884e319 --- /dev/null +++ b/vlib/vlib/counter.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * counter.c: simple and packet/byte counters + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> + +void vlib_clear_simple_counters (vlib_simple_counter_main_t * cm) +{ + uword i, j; + u16 * my_minis; + + for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++) + { + my_minis = cm->minis[i]; + + for (j = 0; j < vec_len (my_minis); j++) + { + cm->maxi[j] += my_minis[j]; + my_minis[j] = 0; + } + } + + j = vec_len (cm->maxi); + if (j > 0) + vec_validate (cm->value_at_last_clear, j - 1); + for (i = 0; i < j; i++) + cm->value_at_last_clear[i] = cm->maxi[i]; +} + +void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm) +{ + uword i, j; + vlib_mini_counter_t * my_minis; + + for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++) + { + my_minis = cm->minis[i]; + + for (j = 0; j < vec_len (my_minis); j++) + { + cm->maxi[j].packets += my_minis[j].packets; + cm->maxi[j].bytes += my_minis[j].bytes; + my_minis[j].packets = 0; + my_minis[j].bytes = 0; + } + } + + j = vec_len (cm->maxi); + if (j > 0) + vec_validate (cm->value_at_last_clear, j - 1); + + for (i = 0; i < j; i++) + { + vlib_counter_t * c = vec_elt_at_index (cm->value_at_last_clear, i); + + c[0] = cm->maxi[i]; + } +} + +void serialize_vlib_simple_counter_main (serialize_main_t * m, va_list * va) +{ + clib_warning ("unimplemented"); +} + +void unserialize_vlib_simple_counter_main (serialize_main_t * m, va_list * va) +{ + clib_warning ("unimplemented"); +} + +void serialize_vlib_combined_counter_main (serialize_main_t * m, va_list * va) +{ + clib_warning ("unimplemented"); +} + +void unserialize_vlib_combined_counter_main (serialize_main_t * m, va_list * va) +{ + clib_warning ("unimplemented"); +} diff --git a/vlib/vlib/counter.h b/vlib/vlib/counter.h new file mode 100644 index 00000000000..804757173bb --- /dev/null +++ b/vlib/vlib/counter.h @@ -0,0 +1,336 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * counter.h: simple and packet/byte counters + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_counter_h +#define included_vlib_counter_h + +/* + * Annoyingly enough, counters are created long before + * the CPU configuration is available, so we have to + * preallocate the mini-counter per-cpu vectors + */ +#define VLIB_COUNTER_MAX_CPUS 32 + +typedef struct { + /* Compact counters that (rarely) can overflow. */ + u16 ** minis; + + /* Counters to hold overflow. */ + u64 * maxi; + + /* Counter values as of last clear. */ + u64 * value_at_last_clear; + + /* Values as of last serialize. */ + u64 * value_at_last_serialize; + + /* Last counter index serialized incrementally. */ + u32 last_incremental_serialize_index; + + /* Counter name. */ + char * name; +} vlib_simple_counter_main_t; + +always_inline void +vlib_increment_simple_counter (vlib_simple_counter_main_t * cm, + u32 cpu_index, + u32 index, + u32 increment) +{ + u16 * my_minis; + u16 * mini; + u32 old, new; + + my_minis = cm->minis[cpu_index]; + mini = vec_elt_at_index (my_minis, index); + old = mini[0]; + new = old + increment; + mini[0] = new; + + if (PREDICT_FALSE (mini[0] != new)) + { + __sync_fetch_and_add (&cm->maxi[index], new); + my_minis[index] = 0; + } +} + +always_inline u64 +vlib_get_simple_counter (vlib_simple_counter_main_t * cm, u32 index) +{ + u16 *my_minis, *mini; + u64 v; + int i; + + ASSERT (index < vec_len (cm->maxi)); + + v = 0; + + for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++) + { + my_minis = cm->minis[i]; + mini = vec_elt_at_index (my_minis, index); + v += mini[0]; + } + + v += cm->maxi[index]; + + if (index < vec_len (cm->value_at_last_clear)) + { + ASSERT (v >= cm->value_at_last_clear[index]); + v -= cm->value_at_last_clear[index]; + } + + return v; +} + +always_inline void +vlib_zero_simple_counter (vlib_simple_counter_main_t * cm, u32 index) +{ + u16 * my_minis; + int i; + + ASSERT (index < vec_len (cm->maxi)); + + for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++) + { + my_minis = cm->minis[i]; + my_minis[index] = 0; + } + + cm->maxi[index] = 0; + + if (index < vec_len (cm->value_at_last_clear)) + cm->value_at_last_clear[index] = 0; +} + +/* Combined counters hold both packets and byte differences. */ +/* Maxi-packet/byte counter. */ +typedef struct { + u64 packets, bytes; +} vlib_counter_t; + +always_inline void +vlib_counter_add (vlib_counter_t * a, vlib_counter_t * b) +{ + a->packets += b->packets; + a->bytes += b->bytes; +} + +always_inline void +vlib_counter_sub (vlib_counter_t * a, vlib_counter_t * b) +{ + ASSERT (a->packets >= b->packets); + ASSERT (a->bytes >= b->bytes); + a->packets -= b->packets; + a->bytes -= b->bytes; +} + +always_inline void +vlib_counter_zero (vlib_counter_t * a) +{ a->packets = a->bytes = 0; } + +/* Micro-counter: 16 bits of packets and 16 bits of byte difference. */ +typedef struct { + /* Packet count. */ + u16 packets; + + /* The average packet size hack doesn't work in a multi-core config */ + i16 bytes; +} vlib_mini_counter_t; + +typedef struct { + /* Compact counters that (rarely) can overflow. */ + vlib_mini_counter_t ** minis; + + /* Counters to hold overflow. */ + vlib_counter_t * maxi; + + /* Debug counters for testing. */ + vlib_counter_t * debug; + + /* Counter values as of last clear. */ + vlib_counter_t * value_at_last_clear; + + /* Counter values as of last serialize. */ + vlib_counter_t * value_at_last_serialize; + + /* Last counter index serialized incrementally. */ + u32 last_incremental_serialize_index; + + /* Average packet sizes used in mini-counter byte differences. */ + u32 ave_packet_size; + + /* Current summed packets and bytes for average computation. */ + u32 ave_packets, ave_bytes; + + /* Counter name. */ + char * name; + +} vlib_combined_counter_main_t; + +void vlib_clear_simple_counters (vlib_simple_counter_main_t * cm); +void vlib_clear_combined_counters (vlib_combined_counter_main_t * cm); + +always_inline void +vlib_increment_combined_counter (vlib_combined_counter_main_t * cm, + u32 cpu_index, + u32 index, + u32 packet_increment, + u32 byte_increment) +{ + vlib_mini_counter_t * my_minis, * mini; + u32 old_packets, new_packets; + i32 old_bytes, new_bytes; + + /* Use this CPU's mini counter array */ + my_minis = cm->minis[cpu_index]; + + mini = vec_elt_at_index (my_minis, index); + old_packets = mini->packets; + old_bytes = mini->bytes; + + new_packets = old_packets + packet_increment; + new_bytes = old_bytes + byte_increment; + + mini->packets = new_packets; + mini->bytes = new_bytes; + + /* Bytes always overflow before packets.. */ + if (PREDICT_FALSE (mini->bytes != new_bytes)) + { + vlib_counter_t * maxi = vec_elt_at_index (cm->maxi, index); + + __sync_fetch_and_add (&maxi->packets, new_packets); + __sync_fetch_and_add (&maxi->bytes, new_bytes); + + mini->packets = 0; + mini->bytes = 0; + } +} + +/* This is never done in the speed path */ +static inline void +vlib_get_combined_counter (vlib_combined_counter_main_t * cm, + u32 index, + vlib_counter_t * result) +{ + vlib_mini_counter_t * my_minis, * mini; + vlib_counter_t * maxi; + int i; + + result->packets = 0; + result->bytes = 0; + + for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++) + { + my_minis = cm->minis[i]; + + mini = vec_elt_at_index (my_minis, index); + result->packets += mini->packets; + result->bytes += mini->bytes; + } + + maxi = vec_elt_at_index (cm->maxi, index); + result->packets += maxi->packets; + result->bytes += maxi->bytes; + + if (index < vec_len (cm->value_at_last_clear)) + vlib_counter_sub (result, &cm->value_at_last_clear[index]); +} + +always_inline void +vlib_zero_combined_counter (vlib_combined_counter_main_t * cm, + u32 index) +{ + vlib_mini_counter_t * mini, * my_minis; + int i; + + for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++) + { + my_minis = cm->minis[i]; + + mini = vec_elt_at_index (my_minis, index); + mini->packets = 0; + mini->bytes = 0; + } + + vlib_counter_zero (&cm->maxi[index]); + if (index < vec_len (cm->value_at_last_clear)) + vlib_counter_zero (&cm->value_at_last_clear[index]); +} + +/* Initialize/allocate given counter index. + Works for both simple and combined counters. */ +#define vlib_validate_counter_DEPRECATED(cm,index) \ + do { \ + int i; \ + \ + vec_validate ((cm)->minis, VLIB_COUNTER_MAX_CPUS-1); \ + for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++) \ + vec_validate ((cm)->minis[i], (index)); \ + vec_validate ((cm)->maxi, (index)); \ + } while (0) + +static inline void +vlib_validate_simple_counter (vlib_simple_counter_main_t *cm, u32 index) +{ + int i; + vec_validate (cm->minis, VLIB_COUNTER_MAX_CPUS-1); + for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++) + vec_validate_aligned (cm->minis[i], index, CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (cm->maxi, index, CLIB_CACHE_LINE_BYTES); +} + +static inline void +vlib_validate_combined_counter (vlib_combined_counter_main_t *cm, u32 index) +{ + int i; + vec_validate (cm->minis, VLIB_COUNTER_MAX_CPUS-1); + for (i = 0; i < VLIB_COUNTER_MAX_CPUS; i++) + vec_validate_aligned (cm->minis[i], index, CLIB_CACHE_LINE_BYTES); + vec_validate_aligned (cm->maxi, index, CLIB_CACHE_LINE_BYTES); +} + +/* Number of simple/combined counters allocated. */ +#define vlib_counter_len(cm) vec_len((cm)->maxi) + +serialize_function_t serialize_vlib_simple_counter_main, unserialize_vlib_simple_counter_main; +serialize_function_t serialize_vlib_combined_counter_main, unserialize_vlib_combined_counter_main; + +#endif /* included_vlib_counter_h */ diff --git a/vlib/vlib/defs.h b/vlib/vlib/defs.h new file mode 100644 index 00000000000..ff9046861f3 --- /dev/null +++ b/vlib/vlib/defs.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * defs.h: VLIB generic C definitions + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_defs_h +#define included_vlib_defs_h + +/* Receive or transmit. */ +typedef enum { + VLIB_RX, + VLIB_TX, + VLIB_N_RX_TX = 2, /* Used to size arrays. */ +} vlib_rx_or_tx_t; + +#define vlib_foreach_rx_tx(v) for (v = 0; v < VLIB_N_RX_TX; v++) + +/* Read/write. */ +typedef enum { + VLIB_READ, + VLIB_WRITE, +} vlib_read_or_write_t; + +/* Up/down. */ +typedef enum { + VLIB_DOWN = 0, + VLIB_UP = 1, +} vlib_up_or_down_t; + +/* Enable/disable. */ +typedef enum { + VLIB_DISABLE = 0, + VLIB_ENABLE = 1, +} vlib_enable_or_disable_t; + +#endif /* included_vlib_defs_h */ diff --git a/vlib/vlib/dpdk_buffer.c b/vlib/vlib/dpdk_buffer.c new file mode 100644 index 00000000000..dbbd5806fd2 --- /dev/null +++ b/vlib/vlib/dpdk_buffer.c @@ -0,0 +1,1206 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * buffer.c: allocate/free network buffers. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <rte_config.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_memory.h> +#include <rte_memcpy.h> +#include <rte_memzone.h> +#include <rte_tailq.h> +#include <rte_eal.h> +#include <rte_per_lcore.h> +#include <rte_launch.h> +#include <rte_atomic.h> +#include <rte_cycles.h> +#include <rte_prefetch.h> +#include <rte_lcore.h> +#include <rte_per_lcore.h> +#include <rte_branch_prediction.h> +#include <rte_interrupts.h> +#include <rte_pci.h> +#include <rte_random.h> +#include <rte_debug.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_ring.h> +#include <rte_mempool.h> +#include <rte_mbuf.h> + +#include <vlib/vlib.h> + +phys_addr_t __attribute__ ((weak)) rte_mem_virt2phy(); +int __attribute__ ((weak)) rte_eal_has_hugepages(); +unsigned __attribute__ ((weak)) rte_socket_id(); +struct rte_mempool * __attribute__ ((weak)) rte_mempool_create(); +void __attribute__ ((weak)) rte_pktmbuf_init(); +void __attribute__ ((weak)) rte_pktmbuf_pool_init(); + +uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm, vlib_buffer_t * b_first) +{ + vlib_buffer_t * b = b_first; + uword l_first = b_first->current_length; + uword l = 0; + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b = vlib_get_buffer (vm, b->next_buffer); + l += b->current_length; + } + b_first->total_length_not_including_first_buffer = l; + b_first->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; + return l + l_first; +} + +u8 * format_vlib_buffer (u8 * s, va_list * args) +{ + vlib_buffer_t * b = va_arg (*args, vlib_buffer_t *); + uword indent = format_get_indent (s); + + s = format (s, "current data %d, length %d, free-list %d", + b->current_data, b->current_length, + b->free_list_index); + + if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID) + s = format (s, ", totlen-nifb %d", + b->total_length_not_including_first_buffer); + + if (b->flags & VLIB_BUFFER_IS_TRACED) + s = format (s, ", trace 0x%x", b->trace_index); + + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + { + vlib_main_t * vm = vlib_get_main(); + u32 next_buffer = b->next_buffer; + b = vlib_get_buffer(vm, next_buffer); + + s = format (s, "\n%Unext-buffer 0x%x, segment length %d", + format_white_space, indent, next_buffer, b->current_length); + } + + + return s; +} + +u8 * format_vlib_buffer_and_data (u8 * s, va_list * args) +{ + vlib_buffer_t * b = va_arg (*args, vlib_buffer_t *); + + s = format (s, "%U, %U", + format_vlib_buffer, b, + format_hex_bytes, vlib_buffer_get_current (b), 64); + + return s; +} + +u8 * format_vlib_buffer_contents (u8 * s, va_list * va) +{ + vlib_main_t * vm = va_arg (*va, vlib_main_t *); + vlib_buffer_t * b = va_arg (*va, vlib_buffer_t *); + + while (1) + { + vec_add (s, vlib_buffer_get_current (b), + b->current_length); + if (! (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + break; + b = vlib_get_buffer (vm, b->next_buffer); + } + + return s; +} + +vlib_main_t **vlib_mains; + +/* Aligned copy routine. */ +void +vlib_aligned_memcpy (void * _dst, void * _src, int n_bytes) +{ + vlib_copy_unit_t * dst = _dst; + vlib_copy_unit_t * src = _src; + + /* Arguments must be naturally aligned. */ + ASSERT (pointer_to_uword (dst) % sizeof (dst[0]) == 0); + ASSERT (pointer_to_uword (src) % sizeof (src[0]) == 0); + ASSERT (n_bytes % sizeof (dst[0]) == 0); + + if (4 * sizeof (dst[0]) == CLIB_CACHE_LINE_BYTES) + { + CLIB_PREFETCH (dst + 0, 4 * sizeof (dst[0]), WRITE); + CLIB_PREFETCH (src + 0, 4 * sizeof (src[0]), READ); + + while (n_bytes >= 4 * sizeof (dst[0])) + { + dst += 4; + src += 4; + n_bytes -= 4 * sizeof (dst[0]); + CLIB_PREFETCH (dst, 4 * sizeof (dst[0]), WRITE); + CLIB_PREFETCH (src, 4 * sizeof (src[0]), READ); + dst[-4] = src[-4]; + dst[-3] = src[-3]; + dst[-2] = src[-2]; + dst[-1] = src[-1]; + } + } + else if (8 * sizeof (dst[0]) == CLIB_CACHE_LINE_BYTES) + { + CLIB_PREFETCH (dst + 0, 8 * sizeof (dst[0]), WRITE); + CLIB_PREFETCH (src + 0, 8 * sizeof (src[0]), READ); + + while (n_bytes >= 8 * sizeof (dst[0])) + { + dst += 8; + src += 8; + n_bytes -= 8 * sizeof (dst[0]); + CLIB_PREFETCH (dst, 8 * sizeof (dst[0]), WRITE); + CLIB_PREFETCH (src, 8 * sizeof (src[0]), READ); + dst[-8] = src[-8]; + dst[-7] = src[-7]; + dst[-6] = src[-6]; + dst[-5] = src[-5]; + dst[-4] = src[-4]; + dst[-3] = src[-3]; + dst[-2] = src[-2]; + dst[-1] = src[-1]; + } + } + else + /* Cache line size unknown: fall back to slow version. */; + + while (n_bytes > 0) + { + *dst++ = *src++; + n_bytes -= 1 * sizeof (dst[0]); + } +} + +#define BUFFERS_PER_COPY (sizeof (vlib_copy_unit_t) / sizeof (u32)) + +/* Make sure we have at least given number of unaligned buffers. */ +static void +fill_unaligned (vlib_main_t * vm, + vlib_buffer_free_list_t * free_list, + uword n_unaligned_buffers) +{ + word la = vec_len (free_list->aligned_buffers); + word lu = vec_len (free_list->unaligned_buffers); + + /* Aligned come in aligned copy-sized chunks. */ + ASSERT (la % BUFFERS_PER_COPY == 0); + + ASSERT (la >= n_unaligned_buffers); + + while (lu < n_unaligned_buffers) + { + /* Copy 4 buffers from end of aligned vector to unaligned vector. */ + vec_add (free_list->unaligned_buffers, + free_list->aligned_buffers + la - BUFFERS_PER_COPY, + BUFFERS_PER_COPY); + la -= BUFFERS_PER_COPY; + lu += BUFFERS_PER_COPY; + } + _vec_len (free_list->aligned_buffers) = la; +} + +/* After free aligned buffers may not contain even sized chunks. */ +static void +trim_aligned (vlib_buffer_free_list_t * f) +{ + uword l, n_trim; + + /* Add unaligned to aligned before trim. */ + l = vec_len (f->unaligned_buffers); + if (l > 0) + { + vec_add_aligned (f->aligned_buffers, f->unaligned_buffers, l, + /* align */ sizeof (vlib_copy_unit_t)); + + _vec_len (f->unaligned_buffers) = 0; + } + + /* Remove unaligned buffers from end of aligned vector and save for next trim. */ + l = vec_len (f->aligned_buffers); + n_trim = l % BUFFERS_PER_COPY; + if (n_trim) + { + /* Trim aligned -> unaligned. */ + vec_add (f->unaligned_buffers, f->aligned_buffers + l - n_trim, n_trim); + + /* Remove from aligned. */ + _vec_len (f->aligned_buffers) = l - n_trim; + } +} + +static void +merge_free_lists (vlib_buffer_free_list_t * dst, + vlib_buffer_free_list_t * src) +{ + uword l; + u32 * d; + + trim_aligned (src); + trim_aligned (dst); + + l = vec_len (src->aligned_buffers); + if (l > 0) + { + vec_add2_aligned (dst->aligned_buffers, d, l, + /* align */ sizeof (vlib_copy_unit_t)); + vlib_aligned_memcpy (d, src->aligned_buffers, l * sizeof (d[0])); + vec_free (src->aligned_buffers); + } + + l = vec_len (src->unaligned_buffers); + if (l > 0) + { + vec_add (dst->unaligned_buffers, src->unaligned_buffers, l); + vec_free (src->unaligned_buffers); + } +} + +always_inline u32 +vlib_buffer_get_free_list_with_size (vlib_main_t * vm, u32 size) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + + size = vlib_buffer_round_size (size); + uword * p = hash_get (bm->free_list_by_size, size); + return p ? p[0] : ~0; +} + +/* Add buffer free list. */ +static u32 +vlib_buffer_create_free_list_helper (vlib_main_t * vm, + u32 n_data_bytes, + u32 is_public, + u32 is_default, + u8 * name) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_buffer_free_list_t * f; + + if (! is_default && pool_elts (bm->buffer_free_list_pool) == 0) + { + u32 default_free_free_list_index; + + default_free_free_list_index = + vlib_buffer_create_free_list_helper (vm, + /* default buffer size */ VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, + /* is_public */ 1, + /* is_default */ 1, + (u8 *) "default"); + ASSERT (default_free_free_list_index == VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + if (n_data_bytes == VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES && is_public) + return default_free_free_list_index; + } + + pool_get_aligned (bm->buffer_free_list_pool, f, CLIB_CACHE_LINE_BYTES); + + memset (f, 0, sizeof (f[0])); + f->index = f - bm->buffer_free_list_pool; + f->n_data_bytes = vlib_buffer_round_size (n_data_bytes); + f->min_n_buffers_each_physmem_alloc = 16; + f->name = clib_mem_is_heap_object (name) ? name : format (0, "%s", name); + + /* Setup free buffer template. */ + f->buffer_init_template.free_list_index = f->index; + + if (is_public) + { + uword * p = hash_get (bm->free_list_by_size, f->n_data_bytes); + if (! p) + hash_set (bm->free_list_by_size, f->n_data_bytes, f->index); + } + + return f->index; +} + +u32 vlib_buffer_create_free_list (vlib_main_t * vm, u32 n_data_bytes, + char * fmt, ...) +{ + va_list va; + u8 * name; + + va_start (va, fmt); + name = va_format (0, fmt, &va); + va_end (va); + + return vlib_buffer_create_free_list_helper (vm, n_data_bytes, + /* is_public */ 0, + /* is_default */ 0, + name); +} + +u32 vlib_buffer_get_or_create_free_list (vlib_main_t * vm, u32 n_data_bytes, + char * fmt, ...) +{ + u32 i = vlib_buffer_get_free_list_with_size (vm, n_data_bytes); + + if (i == ~0) + { + va_list va; + u8 * name; + + va_start (va, fmt); + name = va_format (0, fmt, &va); + va_end (va); + + i = vlib_buffer_create_free_list_helper (vm, n_data_bytes, + /* is_public */ 1, + /* is_default */ 0, + name); + } + + return i; +} + +static void +del_free_list (vlib_main_t * vm, vlib_buffer_free_list_t * f) +{ + u32 i; + struct rte_mbuf *mb; + vlib_buffer_t *b; + + for (i = 0; i < vec_len (f->unaligned_buffers); i++) { + b = vlib_get_buffer (vm, f->unaligned_buffers[i]); + mb = ((struct rte_mbuf *)b)-1; + rte_pktmbuf_free (mb); + } + for (i = 0; i < vec_len (f->aligned_buffers); i++) { + b = vlib_get_buffer (vm, f->aligned_buffers[i]); + mb = ((struct rte_mbuf *)b)-1; + rte_pktmbuf_free (mb); + } + vec_free (f->name); + vec_free (f->unaligned_buffers); + vec_free (f->aligned_buffers); +} + +/* Add buffer free list. */ +void vlib_buffer_delete_free_list (vlib_main_t * vm, u32 free_list_index) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_buffer_free_list_t * f; + u32 merge_index; + + f = vlib_buffer_get_free_list (vm, free_list_index); + + merge_index = vlib_buffer_get_free_list_with_size (vm, f->n_data_bytes); + if (merge_index != ~0 && merge_index != free_list_index) + { + merge_free_lists (pool_elt_at_index (bm->buffer_free_list_pool, + merge_index), f); + } + + del_free_list (vm, f); + + /* Poison it. */ + memset (f, 0xab, sizeof (f[0])); + + pool_put (bm->buffer_free_list_pool, f); +} + +/* Make sure free list has at least given number of free buffers. */ +static uword +fill_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * fl, + uword min_free_buffers) +{ + vlib_buffer_t * b; + int n, i; + u32 bi; + u32 n_remaining, n_alloc; + unsigned socket_id = rte_socket_id ? rte_socket_id() : 0; + struct rte_mempool *rmp = vm->buffer_main->pktmbuf_pools[socket_id]; + struct rte_mbuf *mb; + + /* Too early? */ + if (PREDICT_FALSE(rmp == 0)) + return 0; + + trim_aligned (fl); + + /* Already have enough free buffers on free list? */ + n = min_free_buffers - vec_len (fl->aligned_buffers); + if (n <= 0) + return min_free_buffers; + + /* Always allocate round number of buffers. */ + n = round_pow2 (n, BUFFERS_PER_COPY); + + /* Always allocate new buffers in reasonably large sized chunks. */ + n = clib_max (n, fl->min_n_buffers_each_physmem_alloc); + + vec_validate (vm->mbuf_alloc_list, n-1); + + if (rte_mempool_get_bulk (rmp, vm->mbuf_alloc_list, n) < 0) + return 0; + + _vec_len (vm->mbuf_alloc_list) = n; + + for (i = 0; i < n; i++) + { + mb = vm->mbuf_alloc_list[i]; + + ASSERT(rte_mbuf_refcnt_read(mb) == 0); + rte_mbuf_refcnt_set(mb, 1); + mb->next = NULL; + mb->data_off = RTE_PKTMBUF_HEADROOM; + mb->nb_segs = 1; + + b = (vlib_buffer_t *)(mb+1); + bi = vlib_get_buffer_index (vm, b); + + vec_add1_aligned (fl->aligned_buffers, bi, sizeof (vlib_copy_unit_t)); + n_alloc++; + n_remaining--; + + vlib_buffer_init_for_free_list (b, fl); + + if (fl->buffer_init_function) + fl->buffer_init_function (vm, fl, &bi, 1); + } + + fl->n_alloc += n; + + return n; +} + +always_inline uword +copy_alignment (u32 * x) +{ return (pointer_to_uword (x) / sizeof (x[0])) % BUFFERS_PER_COPY; } + +static u32 +alloc_from_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * free_list, + u32 * alloc_buffers, + u32 n_alloc_buffers) +{ + u32 * dst, * u_src; + uword u_len, n_left; + uword n_unaligned_start, n_unaligned_end, n_filled; + + n_left = n_alloc_buffers; + dst = alloc_buffers; + n_unaligned_start = ((BUFFERS_PER_COPY - copy_alignment (dst)) + & (BUFFERS_PER_COPY - 1)); + + n_filled = fill_free_list (vm, free_list, n_alloc_buffers); + if (n_filled == 0) + return 0; + + n_left = n_filled < n_left ? n_filled : n_left; + n_alloc_buffers = n_left; + + if (n_unaligned_start >= n_left) + { + n_unaligned_start = n_left; + n_unaligned_end = 0; + } + else + n_unaligned_end = copy_alignment (dst + n_alloc_buffers); + + fill_unaligned (vm, free_list, n_unaligned_start + n_unaligned_end); + + u_len = vec_len (free_list->unaligned_buffers); + u_src = free_list->unaligned_buffers + u_len - 1; + + if (n_unaligned_start) + { + uword n_copy = n_unaligned_start; + if (n_copy > n_left) + n_copy = n_left; + n_left -= n_copy; + + while (n_copy > 0) + { + *dst++ = *u_src--; + n_copy--; + u_len--; + } + + /* Now dst should be aligned. */ + if (n_left > 0) + ASSERT (pointer_to_uword (dst) % sizeof (vlib_copy_unit_t) == 0); + } + + /* Aligned copy. */ + { + vlib_copy_unit_t * d, * s; + uword n_copy; + + if (vec_len(free_list->aligned_buffers) < ((n_left/BUFFERS_PER_COPY)*BUFFERS_PER_COPY)) + abort(); + + n_copy = n_left / BUFFERS_PER_COPY; + n_left = n_left % BUFFERS_PER_COPY; + + /* Remove buffers from aligned free list. */ + _vec_len (free_list->aligned_buffers) -= n_copy * BUFFERS_PER_COPY; + + s = (vlib_copy_unit_t *) vec_end (free_list->aligned_buffers); + d = (vlib_copy_unit_t *) dst; + + /* Fast path loop. */ + while (n_copy >= 4) + { + d[0] = s[0]; + d[1] = s[1]; + d[2] = s[2]; + d[3] = s[3]; + n_copy -= 4; + s += 4; + d += 4; + } + + while (n_copy >= 1) + { + d[0] = s[0]; + n_copy -= 1; + s += 1; + d += 1; + } + + dst = (void *) d; + } + + /* Unaligned copy. */ + ASSERT (n_unaligned_end == n_left); + while (n_left > 0) + { + *dst++ = *u_src--; + n_left--; + u_len--; + } + + if (! free_list->unaligned_buffers) + ASSERT (u_len == 0); + else + _vec_len (free_list->unaligned_buffers) = u_len; + + return n_alloc_buffers; +} + +/* Allocate a given number of buffers into given array. + Returns number actually allocated which will be either zero or + number requested. */ +u32 vlib_buffer_alloc (vlib_main_t * vm, u32 * buffers, u32 n_buffers) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + + return alloc_from_free_list + (vm, + pool_elt_at_index (bm->buffer_free_list_pool, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX), + buffers, n_buffers); +} + +u32 vlib_buffer_alloc_from_free_list (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers, + u32 free_list_index) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_buffer_free_list_t * f; + f = pool_elt_at_index (bm->buffer_free_list_pool, free_list_index); + return alloc_from_free_list (vm, f, buffers, n_buffers); +} + +always_inline void +add_buffer_to_free_list (vlib_main_t * vm, + vlib_buffer_free_list_t * f, + u32 buffer_index, u8 do_init) +{ + vlib_buffer_t * b; + b = vlib_get_buffer (vm, buffer_index); + if (PREDICT_TRUE(do_init)) + vlib_buffer_init_for_free_list (b, f); + vec_add1_aligned (f->aligned_buffers, buffer_index, sizeof (vlib_copy_unit_t)); +} + +always_inline vlib_buffer_free_list_t * +buffer_get_free_list (vlib_main_t * vm, vlib_buffer_t * b, u32 * index) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + u32 i; + + *index = i = b->free_list_index; + return pool_elt_at_index (bm->buffer_free_list_pool, i); +} + +void *vlib_set_buffer_free_callback (vlib_main_t *vm, void *fp) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + void * rv = bm->buffer_free_callback; + + bm->buffer_free_callback = fp; + return rv; +} + +static_always_inline void +vlib_buffer_free_inline (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers, + u32 follow_buffer_next) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_buffer_free_list_t * fl; + u32 fi; + int i; + u32 (*cb)(vlib_main_t * vm, u32 * buffers, u32 n_buffers, + u32 follow_buffer_next); + + cb = bm->buffer_free_callback; + + if (PREDICT_FALSE (cb != 0)) + n_buffers = (*cb)(vm, buffers, n_buffers, follow_buffer_next); + + if (! n_buffers) + return; + + for (i = 0; i < n_buffers; i++) + { + vlib_buffer_t * b; + struct rte_mbuf * mb; + + b = vlib_get_buffer (vm, buffers[i]); + + fl = buffer_get_free_list (vm, b, &fi); + + /* The only current use of this callback: multicast recycle */ + if (PREDICT_FALSE (fl->buffers_added_to_freelist_function != 0)) + { + int j; + + add_buffer_to_free_list (vm, fl, buffers[i], b->clone_count == 0); + + for (j = 0; j < vec_len (bm->announce_list); j++) + { + if (fl == bm->announce_list[j]) + goto already_announced; + } + vec_add1 (bm->announce_list, fl); + already_announced: + ; + } + else + { + mb = ((struct rte_mbuf *)b)-1; + rte_pktmbuf_free (mb); + } + } + if (vec_len(bm->announce_list)) + { + vlib_buffer_free_list_t * fl; + for (i = 0; i < vec_len (bm->announce_list); i++) + { + fl = bm->announce_list[i]; + fl->buffers_added_to_freelist_function (vm, fl); + } + _vec_len(bm->announce_list) = 0; + } +} + +void vlib_buffer_free (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers) +{ + vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ 1); +} + +void vlib_buffer_free_no_next (vlib_main_t * vm, + u32 * buffers, + u32 n_buffers) +{ + vlib_buffer_free_inline (vm, buffers, n_buffers, /* follow_buffer_next */ 0); +} + +/* Copy template packet data into buffers as they are allocated. */ +__attribute__((unused)) static void +vlib_packet_template_buffer_init (vlib_main_t * vm, + vlib_buffer_free_list_t * fl, + u32 * buffers, + u32 n_buffers) +{ + vlib_packet_template_t * t = uword_to_pointer (fl->buffer_init_function_opaque, + vlib_packet_template_t *); + uword i; + + for (i = 0; i < n_buffers; i++) + { + vlib_buffer_t * b = vlib_get_buffer (vm, buffers[i]); + ASSERT (b->current_length == vec_len (t->packet_data)); + memcpy (vlib_buffer_get_current (b), t->packet_data, b->current_length); + } +} + +void vlib_packet_template_init (vlib_main_t * vm, + vlib_packet_template_t * t, + void * packet_data, + uword n_packet_data_bytes, + uword min_n_buffers_each_physmem_alloc, + char * fmt, + ...) +{ + va_list va; + __attribute__((unused)) u8 * name; + + va_start (va, fmt); + name = va_format (0, fmt, &va); + va_end (va); + + vlib_worker_thread_barrier_sync(vm); + memset (t, 0, sizeof (t[0])); + + vec_add (t->packet_data, packet_data, n_packet_data_bytes); + + vlib_worker_thread_barrier_release(vm); +} + +void * +vlib_packet_template_get_packet (vlib_main_t * vm, + vlib_packet_template_t * t, + u32 * bi_result) +{ + u32 bi; + vlib_buffer_t * b; + + if (vlib_buffer_alloc (vm, &bi, 1) != 1) + return 0; + + *bi_result = bi; + + b = vlib_get_buffer (vm, bi); + memcpy (vlib_buffer_get_current (b), + t->packet_data, vec_len(t->packet_data)); + b->current_length = vec_len(t->packet_data); + + /* Fix up mbuf header length fields */ + struct rte_mbuf * mb; + mb = ((struct rte_mbuf *)b) - 1; + mb->data_len = b->current_length; + mb->pkt_len = b->current_length; + + return b->data; +} + +/* Append given data to end of buffer, possibly allocating new buffers. */ +u32 vlib_buffer_add_data (vlib_main_t * vm, + u32 free_list_index, + u32 buffer_index, + void * data, u32 n_data_bytes) +{ + u32 n_buffer_bytes, n_left, n_left_this_buffer, bi; + vlib_buffer_t * b; + void * d; + + bi = buffer_index; + if (bi == 0 + && 1 != vlib_buffer_alloc_from_free_list (vm, &bi, 1, free_list_index)) + goto out_of_buffers; + + d = data; + n_left = n_data_bytes; + n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, free_list_index); + + b = vlib_get_buffer (vm, bi); + b->flags &= ~VLIB_BUFFER_TOTAL_LENGTH_VALID; + + /* Get to the end of the chain before we try to append data...*/ + while (b->flags & VLIB_BUFFER_NEXT_PRESENT) + b = vlib_get_buffer (vm, b->next_buffer); + + while (1) + { + u32 n; + + ASSERT (n_buffer_bytes >= b->current_length); + n_left_this_buffer = n_buffer_bytes - (b->current_data + b->current_length); + n = clib_min (n_left_this_buffer, n_left); + memcpy (vlib_buffer_get_current (b) + b->current_length, d, n); + b->current_length += n; + n_left -= n; + if (n_left == 0) + break; + + d += n; + if (1 != vlib_buffer_alloc_from_free_list (vm, &b->next_buffer, 1, free_list_index)) + goto out_of_buffers; + + b->flags |= VLIB_BUFFER_NEXT_PRESENT; + + b = vlib_get_buffer (vm, b->next_buffer); + } + + return bi; + + out_of_buffers: + clib_error ("out of buffers"); + return bi; +} + +clib_error_t * +vlib_buffer_pool_create(vlib_main_t * vm, unsigned num_mbufs, + unsigned mbuf_size, unsigned socket_id) +{ + vlib_buffer_main_t * bm = vm->buffer_main; + vlib_physmem_main_t * vpm = &vm->physmem_main; + struct rte_mempool * rmp; + uword new_start, new_size; + int i; + + if (!rte_mempool_create) + return clib_error_return (0, "not linked with DPDK"); + + vec_validate_aligned(bm->pktmbuf_pools, socket_id, CLIB_CACHE_LINE_BYTES); + + /* pool already exists, nothing to do */ + if (bm->pktmbuf_pools[socket_id]) + return 0; + + u8 * pool_name = format(0, "mbuf_pool_socket%u%c",socket_id, 0); + rmp = rte_mempool_create((char *) pool_name, + num_mbufs, mbuf_size, 512, + sizeof(struct rte_pktmbuf_pool_private), + rte_pktmbuf_pool_init, NULL, + rte_pktmbuf_init, NULL, + socket_id, 0); + vec_free(pool_name); + + if (rmp) + { + new_start = pointer_to_uword(rmp); + new_size = rmp->elt_va_end - new_start; + + if (vpm->virtual.size > 0) + { + ASSERT(new_start != vpm->virtual.start); + if (new_start < vpm->virtual.start) + { + new_size = vpm->virtual.size + vpm->virtual.start - new_start; + } + else + { + new_size += new_start - vpm->virtual.start; + new_start = vpm->virtual.start; + } + + /* check if fits into buffer index range */ + if (new_size > ( (uword) 1 << (32 + CLIB_LOG2_CACHE_LINE_BYTES))) + rmp = 0; + } + } + + if (rmp) + { + bm->pktmbuf_pools[socket_id] = rmp; + vpm->virtual.start = new_start; + vpm->virtual.size = new_size; + vpm->virtual.end = new_start + new_size; + return 0; + } + + /* no usable pool for this socket, try to use pool from another one */ + for (i = 0; i < vec_len(bm->pktmbuf_pools); i++) + { + if(bm->pktmbuf_pools[i]) + { + clib_warning("WARNING: Failed to allocate mempool for CPU socket %u. " + "Threads running on socket %u will use socket %u mempool.", + socket_id, socket_id, i); + bm->pktmbuf_pools[socket_id] = bm->pktmbuf_pools[i]; + return 0; + } + } + + return clib_error_return (0, "failed to allocate mempool on socket %u", + socket_id); +} + + +static void vlib_serialize_tx (serialize_main_header_t * m, serialize_stream_t * s) +{ + vlib_main_t * vm; + vlib_serialize_buffer_main_t * sm; + uword n, n_bytes_to_write; + vlib_buffer_t * last; + + n_bytes_to_write = s->current_buffer_index; + sm = uword_to_pointer (s->data_function_opaque, vlib_serialize_buffer_main_t *); + vm = sm->vlib_main; + + ASSERT (sm->tx.max_n_data_bytes_per_chain > 0); + if (serialize_stream_is_end_of_stream (s) + || sm->tx.n_total_data_bytes + n_bytes_to_write > sm->tx.max_n_data_bytes_per_chain) + { + vlib_process_t * p = vlib_get_current_process (vm); + + last = vlib_get_buffer (vm, sm->last_buffer); + last->current_length = n_bytes_to_write; + + vlib_set_next_frame_buffer (vm, &p->node_runtime, sm->tx.next_index, sm->first_buffer); + + sm->first_buffer = sm->last_buffer = ~0; + sm->tx.n_total_data_bytes = 0; + } + + else if (n_bytes_to_write == 0 && s->n_buffer_bytes == 0) + { + ASSERT (sm->first_buffer == ~0); + ASSERT (sm->last_buffer == ~0); + n = vlib_buffer_alloc_from_free_list (vm, &sm->first_buffer, 1, sm->tx.free_list_index); + if (n != 1) + serialize_error (m, clib_error_create ("vlib_buffer_alloc_from_free_list fails")); + sm->last_buffer = sm->first_buffer; + s->n_buffer_bytes = vlib_buffer_free_list_buffer_size (vm, sm->tx.free_list_index); + } + + if (n_bytes_to_write > 0) + { + vlib_buffer_t * prev = vlib_get_buffer (vm, sm->last_buffer); + n = vlib_buffer_alloc_from_free_list (vm, &sm->last_buffer, 1, sm->tx.free_list_index); + if (n != 1) + serialize_error (m, clib_error_create ("vlib_buffer_alloc_from_free_list fails")); + sm->tx.n_total_data_bytes += n_bytes_to_write; + prev->current_length = n_bytes_to_write; + prev->next_buffer = sm->last_buffer; + prev->flags |= VLIB_BUFFER_NEXT_PRESENT; + } + + if (sm->last_buffer != ~0) + { + last = vlib_get_buffer (vm, sm->last_buffer); + s->buffer = vlib_buffer_get_current (last); + s->current_buffer_index = 0; + ASSERT (last->current_data == s->current_buffer_index); + } +} + +static void vlib_serialize_rx (serialize_main_header_t * m, serialize_stream_t * s) +{ + vlib_main_t * vm; + vlib_serialize_buffer_main_t * sm; + vlib_buffer_t * last; + + sm = uword_to_pointer (s->data_function_opaque, vlib_serialize_buffer_main_t *); + vm = sm->vlib_main; + + if (serialize_stream_is_end_of_stream (s)) + return; + + if (sm->last_buffer != ~0) + { + last = vlib_get_buffer (vm, sm->last_buffer); + + if (last->flags & VLIB_BUFFER_NEXT_PRESENT) + sm->last_buffer = last->next_buffer; + else + { + vlib_buffer_free (vm, &sm->first_buffer, /* count */ 1); + sm->first_buffer = sm->last_buffer = ~0; + } + } + + if (sm->last_buffer == ~0) + { + while (clib_fifo_elts (sm->rx.buffer_fifo) == 0) + { + sm->rx.ready_one_time_event = vlib_process_create_one_time_event (vm, vlib_current_process (vm), ~0); + vlib_process_wait_for_one_time_event (vm, /* no event data */ 0, sm->rx.ready_one_time_event); + } + + clib_fifo_sub1 (sm->rx.buffer_fifo, sm->first_buffer); + sm->last_buffer = sm->first_buffer; + } + + ASSERT (sm->last_buffer != ~0); + + last = vlib_get_buffer (vm, sm->last_buffer); + s->current_buffer_index = 0; + s->buffer = vlib_buffer_get_current (last); + s->n_buffer_bytes = last->current_length; +} + +static void +serialize_open_vlib_helper (serialize_main_t * m, + vlib_main_t * vm, + vlib_serialize_buffer_main_t * sm, + uword is_read) +{ + /* Initialize serialize main but save overflow buffer for re-use between calls. */ + { + u8 * save = m->stream.overflow_buffer; + memset (m, 0, sizeof (m[0])); + m->stream.overflow_buffer = save; + if (save) + _vec_len (save) = 0; + } + + sm->first_buffer = sm->last_buffer = ~0; + if (is_read) + clib_fifo_reset (sm->rx.buffer_fifo); + else + sm->tx.n_total_data_bytes = 0; + sm->vlib_main = vm; + m->header.data_function = is_read ? vlib_serialize_rx : vlib_serialize_tx; + m->stream.data_function_opaque = pointer_to_uword (sm); +} + +void serialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, vlib_serialize_buffer_main_t * sm) +{ serialize_open_vlib_helper (m, vm, sm, /* is_read */ 0); } + +void unserialize_open_vlib_buffer (serialize_main_t * m, vlib_main_t * vm, vlib_serialize_buffer_main_t * sm) +{ serialize_open_vlib_helper (m, vm, sm, /* is_read */ 1); } + +u32 serialize_close_vlib_buffer (serialize_main_t * m) +{ + vlib_serialize_buffer_main_t * sm + = uword_to_pointer (m->stream.data_function_opaque, vlib_serialize_buffer_main_t *); + vlib_buffer_t * last; + serialize_stream_t * s = &m->stream; + + last = vlib_get_buffer (sm->vlib_main, sm->last_buffer); + last->current_length = s->current_buffer_index; + + if (vec_len (s->overflow_buffer) > 0) + { + sm->last_buffer + = vlib_buffer_add_data (sm->vlib_main, sm->tx.free_list_index, + sm->last_buffer == ~0 ? 0 : sm->last_buffer, + s->overflow_buffer, + vec_len (s->overflow_buffer)); + _vec_len (s->overflow_buffer) = 0; + } + + return sm->first_buffer; +} + +void unserialize_close_vlib_buffer (serialize_main_t * m) +{ + vlib_serialize_buffer_main_t * sm + = uword_to_pointer (m->stream.data_function_opaque, vlib_serialize_buffer_main_t *); + if (sm->first_buffer != ~0) + vlib_buffer_free_one (sm->vlib_main, sm->first_buffer); + clib_fifo_reset (sm->rx.buffer_fifo); + if (m->stream.overflow_buffer) + _vec_len (m->stream.overflow_buffer) = 0; +} + +static u8 * format_vlib_buffer_free_list (u8 * s, va_list * va) +{ + vlib_buffer_free_list_t * f = va_arg (*va, vlib_buffer_free_list_t *); + u32 threadnum= va_arg (*va, u32); + uword bytes_alloc, bytes_free, n_free, size; + + if (! f) + return format (s, "%=7s%=30s%=12s%=12s%=12s%=12s%=12s%=12s", + "Thread", "Name", "Index", "Size", "Alloc", "Free", "#Alloc", "#Free"); + + size = sizeof (vlib_buffer_t) + f->n_data_bytes; + n_free = vec_len (f->aligned_buffers) + vec_len (f->unaligned_buffers); + bytes_alloc = size * f->n_alloc; + bytes_free = size * n_free; + + s = format (s, "%7d%30s%12d%12d%=12U%=12U%=12d%=12d", + threadnum, + f->name, f->index, f->n_data_bytes, + format_memory_size, bytes_alloc, + format_memory_size, bytes_free, + f->n_alloc, n_free); + + return s; +} + +static clib_error_t * +show_buffers (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_buffer_main_t * bm; + vlib_buffer_free_list_t * f; + vlib_main_t *curr_vm; + u32 vm_index = 0; + + vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, 0, 0); + + do { + curr_vm = vec_len(vlib_mains) ? vlib_mains[vm_index] : vm; + bm = curr_vm->buffer_main; + + pool_foreach (f, bm->buffer_free_list_pool, ({ + vlib_cli_output (vm, "%U", format_vlib_buffer_free_list, f, vm_index); + })); + + vm_index++; + } while (vm_index < vec_len(vlib_mains)); + + return 0; +} + +VLIB_CLI_COMMAND (show_buffers_command, static) = { + .path = "show buffers", + .short_help = "Show packet buffer allocation", + .function = show_buffers, +}; + +#if CLIB_DEBUG > 0 + +u32 * vlib_buffer_state_validation_lock; +uword * vlib_buffer_state_validation_hash; +void * vlib_buffer_state_heap; + +static clib_error_t * +buffer_state_validation_init (vlib_main_t * vm) +{ + void * oldheap; + + vlib_buffer_state_heap = mheap_alloc (0, 10<<20); + + oldheap = clib_mem_set_heap (vlib_buffer_state_heap); + + vlib_buffer_state_validation_hash = hash_create (0, sizeof(uword)); + vec_validate_aligned (vlib_buffer_state_validation_lock, 0, + CLIB_CACHE_LINE_BYTES); + clib_mem_set_heap (oldheap); + return 0; +} + +VLIB_INIT_FUNCTION (buffer_state_validation_init); +#endif diff --git a/vlib/vlib/error.c b/vlib/vlib/error.c new file mode 100644 index 00000000000..59b89cefc3a --- /dev/null +++ b/vlib/vlib/error.c @@ -0,0 +1,271 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * error.c: VLIB error handler + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vppinfra/heap.h> + +uword +vlib_error_drop_buffers (vlib_main_t * vm, + vlib_node_runtime_t * node, + u32 * buffers, + u32 next_buffer_stride, + u32 n_buffers, + u32 next_index, + u32 drop_error_node, + u32 drop_error_code) +{ + u32 n_left_this_frame, n_buffers_left, * args, n_args_left; + vlib_error_t drop_error; + + drop_error = vlib_error_set (drop_error_node, drop_error_code); + + n_buffers_left = n_buffers; + while (n_buffers_left > 0) + { + vlib_get_next_frame (vm, node, next_index, args, n_args_left); + + n_left_this_frame = clib_min (n_buffers_left, n_args_left); + n_buffers_left -= n_left_this_frame; + n_args_left -= n_left_this_frame; + + while (n_left_this_frame >= 4) + { + u32 bi0, bi1, bi2, bi3; + vlib_buffer_t * b0, * b1, * b2, * b3; + + args[0] = bi0 = buffers[0]; + args[1] = bi1 = buffers[1]; + args[2] = bi2 = buffers[2]; + args[3] = bi3 = buffers[3]; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + b2 = vlib_get_buffer (vm, bi2); + b3 = vlib_get_buffer (vm, bi3); + + b0->error = drop_error; + b1->error = drop_error; + b2->error = drop_error; + b3->error = drop_error; + + buffers += 4; + args += 4; + n_left_this_frame -= 4; + } + + while (n_left_this_frame >= 1) + { + u32 bi0; + vlib_buffer_t * b0; + + args[0] = bi0 = buffers[0]; + + b0 = vlib_get_buffer (vm, bi0); + b0->error = drop_error; + + buffers += 1; + args += 1; + n_left_this_frame -= 1; + } + + vlib_put_next_frame (vm, node, next_index, n_args_left); + } + + return n_buffers; +} + +/* Convenience node to drop a vector of buffers with a "misc error". */ +static uword +misc_drop_buffers (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + return vlib_error_drop_buffers (vm, node, + vlib_frame_args (frame), + /* buffer stride */ 1, + frame->n_vectors, + /* next */ 0, + node->node_index, + /* error */ 0); +} + +static char * misc_drop_buffers_error_strings[] = { + [0] = "misc. errors", +}; + +VLIB_REGISTER_NODE (misc_drop_buffers_node,static) = { + .function = misc_drop_buffers, + .name = "misc-drop-buffers", + .vector_size = sizeof (u32), + .n_errors = 1, + .n_next_nodes = 1, + .next_nodes = { + "error-drop", + }, + .error_strings = misc_drop_buffers_error_strings, +}; + +/* Reserves given number of error codes for given node. */ +void vlib_register_errors (vlib_main_t * vm, + u32 node_index, + u32 n_errors, + char * error_strings[]) +{ + vlib_error_main_t * em = &vm->error_main; + vlib_node_t * n = vlib_get_node (vm, node_index); + uword l; + + /* Free up any previous error strings. */ + if (n->n_errors > 0) + heap_dealloc (em->error_strings_heap, n->error_heap_handle); + + n->n_errors = n_errors; + n->error_strings = error_strings; + + if (n_errors == 0) + return; + + n->error_heap_index = + heap_alloc (em->error_strings_heap, n_errors, + n->error_heap_handle); + + l = vec_len (em->error_strings_heap); + + memcpy (vec_elt_at_index (em->error_strings_heap, n->error_heap_index), + error_strings, + n_errors * sizeof (error_strings[0])); + + /* Allocate a counter/elog type for each error. */ + vec_validate (em->counters, l - 1); + vec_validate (vm->error_elog_event_types, l - 1); + + /* Zero counters for re-registrations of errors. */ + if (n->error_heap_index + n_errors <= vec_len (em->counters_last_clear)) + memcpy (em->counters + n->error_heap_index, + em->counters_last_clear + n->error_heap_index, + n_errors * sizeof (em->counters[0])); + else + memset (em->counters + n->error_heap_index, + 0, + n_errors * sizeof (em->counters[0])); + + { + elog_event_type_t t; + uword i; + + memset (&t, 0, sizeof (t)); + for (i = 0; i < n_errors; i++) + { + t.format = (char *) format (0, "%v %s: %%d", + n->name, + error_strings[i]); + vm->error_elog_event_types[n->error_heap_index + i] = t; + } + } +} + +static clib_error_t * +show_errors (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_error_main_t * em = &vm->error_main; + vlib_node_t * n; + u32 code, i, ni; + u64 c; + + vlib_cli_output (vm, "%=16s%=40s%=20s", "Count", "Node", "Reason"); + + for (ni = 0; ni < vec_len (vm->node_main.nodes); ni++) + { + n = vlib_get_node (vm, ni); + for (code = 0; code < n->n_errors; code++) + { + i = n->error_heap_index + code; + c = em->counters[i]; + if (i < vec_len (em->counters_last_clear)) + c -= em->counters_last_clear[i]; + + if (c == 0) + continue; + + vlib_cli_output (vm, "%16Ld%=40v%s", c, n->name, em->error_strings_heap[i]); + } + } + + return 0; +} + +VLIB_CLI_COMMAND (cli_show_errors, static) = { + .path = "show errors", + .short_help = "Show error counts", + .function = show_errors, +}; + +VLIB_CLI_COMMAND (cli_show_node_counters, static) = { + .path = "show node counters", + .short_help = "Show node counters", + .function = show_errors, +}; + +static clib_error_t * +clear_error_counters (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_error_main_t * em = &vm->error_main; + u32 i; + + vec_validate (em->counters_last_clear, vec_len (em->counters) - 1); + for (i = 0; i < vec_len (em->counters); i++) + em->counters_last_clear[i] = em->counters[i]; + return 0; +} + +VLIB_CLI_COMMAND (cli_clear_error_counters, static) = { + .path = "clear errors", + .short_help = "Clear error counters", + .function = clear_error_counters, +}; + +VLIB_CLI_COMMAND (cli_clear_node_counters, static) = { + .path = "clear node counters", + .short_help = "Clear node counters", + .function = clear_error_counters, +}; diff --git a/vlib/vlib/error.h b/vlib/vlib/error.h new file mode 100644 index 00000000000..4bf0b926718 --- /dev/null +++ b/vlib/vlib/error.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * error.h: drop/punt error packets + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_error_h +#define included_vlib_error_h + +/* Combined 16 bit node & 16 bit code as 32 bit number. */ +typedef u32 vlib_error_t; + +always_inline u32 +vlib_error_get_node (vlib_error_t e) +{ return e >> 12; } + +always_inline u32 +vlib_error_get_code (vlib_error_t e) +{ return e & 0xfff; } + +always_inline vlib_error_t +vlib_error_set (u32 node_index, u32 code) +{ + ASSERT (node_index < (1 << 20)); + ASSERT (code < (1 << 12)); + return (node_index << 12) | code; +} + +always_inline vlib_error_t +vlib_error_set_code (vlib_error_t e, u32 code) +{ + ASSERT (vlib_error_get_code (e) == 0); + ASSERT (code < (1 << 12)); + e |= code; + return e; +} + +typedef struct { + /* Error counters. */ + u64 * counters; + + /* Counter values as of last counter clear. */ + u64 * counters_last_clear; + + /* Error name strings in heap. Heap index + indexes counter vector. */ + char ** error_strings_heap; +} vlib_error_main_t; + +/* Per node error registration. */ +void vlib_register_errors (struct vlib_main_t * vm, + u32 node_index, + u32 n_errors, + char * error_strings[]); + +#endif /* included_vlib_error_h */ diff --git a/vlib/vlib/error_funcs.h b/vlib/vlib/error_funcs.h new file mode 100644 index 00000000000..acdd5d2d898 --- /dev/null +++ b/vlib/vlib/error_funcs.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * error_funcs.h: VLIB error handling + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_error_funcs_h +#define included_vlib_error_funcs_h + +#include <vlib/node_funcs.h> + +always_inline void +vlib_error_elog_count (vlib_main_t * vm, uword counter, uword increment) +{ + elog_main_t * em = &vm->elog_main; + if (VLIB_ELOG_MAIN_LOOP > 0 && increment > 0) + elog (em, vec_elt_at_index (vm->error_elog_event_types, counter), increment); +} + +always_inline void +vlib_error_count (vlib_main_t * vm, uword node_index, + uword counter, uword increment) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + vlib_error_main_t * em = &vm->error_main; + + ASSERT (counter < n->n_errors); + counter += n->error_heap_index; + + ASSERT (counter < vec_len (em->counters)); + em->counters[counter] += increment; + + vlib_error_elog_count (vm, counter, increment); +} + +/* Drop all buffers in frame with given error code. */ +uword +vlib_error_drop_buffers (vlib_main_t * vm, + vlib_node_runtime_t * node, + u32 * buffers, + u32 next_buffer_stride, + u32 n_buffers, + u32 error_next_index, + u32 error_node, + u32 error_code); + +#endif /* included_vlib_error_funcs_h */ diff --git a/vlib/vlib/format.c b/vlib/vlib/format.c new file mode 100644 index 00000000000..3c77d8dbd18 --- /dev/null +++ b/vlib/vlib/format.c @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * format.c: generic network formatting/unformating + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> + +u8 * format_vlib_rx_tx (u8 * s, va_list * args) +{ + vlib_rx_or_tx_t r = va_arg (*args, vlib_rx_or_tx_t); + char * t; + + switch (r) + { + case VLIB_RX: t = "rx"; break; + case VLIB_TX: t = "tx"; break; + default: t = "INVALID"; break; + } + + vec_add (s, t, strlen (t)); + return s; +} + +u8 * format_vlib_read_write (u8 * s, va_list * args) +{ + vlib_rx_or_tx_t r = va_arg (*args, vlib_rx_or_tx_t); + char * t; + + switch (r) + { + case VLIB_READ: t = "read"; break; + case VLIB_WRITE: t = "write"; break; + default: t = "INVALID"; break; + } + + vec_add (s, t, strlen (t)); + return s; +} + +/* Formats buffer data as printable ascii or as hex. */ +u8 * format_vlib_buffer_data (u8 * s, va_list * args) +{ + u8 * data = va_arg (*args, u8 *); + u32 n_data_bytes = va_arg (*args, u32); + u32 i, is_printable; + + is_printable = 1; + for (i = 0; i < n_data_bytes && is_printable; i++) + { + u8 c = data[i]; + if (c < 0x20) + is_printable = 0; + else if (c >= 0x7f) + is_printable = 0; + } + + if (is_printable) + vec_add (s, data, n_data_bytes); + else + s = format (s, "%U", format_hex_bytes, data, n_data_bytes); + + return s; +} + +/* Enable/on => 1; disable/off => 0. */ +uword unformat_vlib_enable_disable (unformat_input_t * input, va_list * args) +{ + int * result = va_arg (*args, int *); + int enable; + + if (unformat (input, "enable") || unformat (input, "on")) + enable = 1; + else if (unformat (input, "disable") || unformat (input, "off")) + enable = 0; + else + return 0; + + *result = enable; + return 1; +} + +/* rx/tx => VLIB_RX/VLIB_TX. */ +uword unformat_vlib_rx_tx (unformat_input_t * input, va_list * args) +{ + int * result = va_arg (*args, int *); + if (unformat (input, "rx")) + *result = VLIB_RX; + else if (unformat (input, "tx")) + *result = VLIB_TX; + else + return 0; + return 1; +} + +/* Parse an int either %d or 0x%x. */ +uword unformat_vlib_number (unformat_input_t * input, va_list * args) +{ + int * result = va_arg (*args, int *); + + return (unformat (input, "0x%x", result) + || unformat (input, "%d", result)); +} + +/* Parse a-zA-Z0-9_ token and hash to value. */ +uword unformat_vlib_number_by_name (unformat_input_t * input, va_list * args) +{ + uword * hash = va_arg (*args, uword *); + int * result = va_arg (*args, int *); + uword * p; + u8 * token; + int i; + + if (! unformat_user (input, unformat_token, "a-zA-Z0-9_", &token)) + return 0; + + /* Null terminate. */ + if (vec_len (token) > 0 && + token[vec_len (token) - 1] != 0) + vec_add1 (token, 0); + + /* Check for exact match. */ + p = hash_get_mem (hash, token); + if (p) + goto done; + + /* Convert to upper case & try match. */ + for (i = 0; i < vec_len (token); i++) + if (token[i] >= 'a' && token[i] <= 'z') + token[i] = 'A' + token[i] - 'a'; + p = hash_get_mem (hash, token); + + done: + vec_free (token); + if (p) + *result = p[0]; + return p != 0; +} diff --git a/vlib/vlib/format_funcs.h b/vlib/vlib/format_funcs.h new file mode 100644 index 00000000000..02d8a555d78 --- /dev/null +++ b/vlib/vlib/format_funcs.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * format_funcs.h: VLIB formatting/unformating + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_format_h +#define included_vlib_format_h + +/* Format vlib_rx_or_tx_t/vlib_read_or_write_t enum as string. */ +u8 * format_vlib_rx_tx (u8 * s, va_list * args); +u8 * format_vlib_read_write (u8 * s, va_list * args); + +/* Formats buffer data as printable ascii or as hex. */ +u8 * format_vlib_buffer_data (u8 * s, va_list * args); + +/* Enable/on => 1; disable/off => 0. */ +uword unformat_vlib_enable_disable (unformat_input_t * input, va_list * args); + +/* rx/tx => VLIB_RX/VLIB_TX. */ +uword unformat_vlib_rx_tx (unformat_input_t * input, va_list * args); + +/* Parse a-zA-Z0-9_ token and hash to value. */ +uword unformat_vlib_number_by_name (unformat_input_t * input, va_list * args); + +/* Parse an int either %d or 0x%x. */ +uword unformat_vlib_number (unformat_input_t * input, va_list * args); + +/* Flag to format_vlib_*_header functions to tell them not to recurse + into the next layer's header. For example, tells format_vlib_ethernet_header + not to format ip header. */ +#define FORMAT_VLIB_HEADER_NO_RECURSION (~0) + +#endif /* included_vlib_format_h */ diff --git a/vlib/vlib/global_funcs.h b/vlib/vlib/global_funcs.h new file mode 100644 index 00000000000..406ce7d71b6 --- /dev/null +++ b/vlib/vlib/global_funcs.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * global_funcs.h: global data structure access functions + */ + +#ifndef included_vlib_global_funcs_h_ +#define included_vlib_global_funcs_h_ + +always_inline vlib_main_t * +vlib_get_main (void) +{ + vlib_main_t * vm; + vm = vlib_mains ? vlib_mains[os_get_cpu_number()] : &vlib_global_main; + ASSERT(vm); + return vm; +} + +always_inline vlib_thread_main_t * +vlib_get_thread_main() +{ + return &vlib_thread_main; +} + +#endif /* included_vlib_global_funcs_h_ */ diff --git a/vlib/vlib/init.c b/vlib/vlib/init.c new file mode 100644 index 00000000000..3991c800147 --- /dev/null +++ b/vlib/vlib/init.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * init.c: mechanism for functions to be called at init/exit. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> + +clib_error_t * +vlib_call_init_exit_functions (vlib_main_t * vm, + _vlib_init_function_list_elt_t *head, + int call_once) +{ + clib_error_t * error = 0; + _vlib_init_function_list_elt_t * i; + + i = head; + while (i) + { + if (call_once && !hash_get (vm->init_functions_called, i->f)) + { + if (call_once) + hash_set1 (vm->init_functions_called, i->f); + error = i->f (vm); + if (error) + return error; + } + i = i->next_init_function; + } + return error; +} + +clib_error_t * vlib_call_all_init_functions (vlib_main_t * vm) +{ + /* Call dummy functions to make sure purely static modules are + linked in. */ +#define _(f) vlib_##f##_reference (); + foreach_vlib_module_reference; +#undef _ + + return vlib_call_init_exit_functions + (vm, vm->init_function_registrations, 1 /* call_once */); +} + +clib_error_t * vlib_call_all_main_loop_enter_functions (vlib_main_t * vm) +{ + return vlib_call_init_exit_functions + (vm, vm->main_loop_enter_function_registrations, 1 /* call_once */); +} + +clib_error_t * vlib_call_all_main_loop_exit_functions (vlib_main_t * vm) +{ + return vlib_call_init_exit_functions + (vm, vm->main_loop_exit_function_registrations, 1 /* call_once */); +} + +clib_error_t * vlib_call_all_config_functions (vlib_main_t * vm, + unformat_input_t * input, + int is_early) +{ + clib_error_t * error = 0; + vlib_config_function_runtime_t * c, ** all; + uword * hash = 0, * p; + uword i; + + hash = hash_create_string (0, sizeof (uword)); + all = 0; + + c = vm->config_function_registrations; + + while (c) + { + hash_set_mem (hash, c->name, vec_len (all)); + vec_add1 (all, c); + unformat_init (&c->input, 0, 0); + c = c->next_registration; + } + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + u8 * s, * v; + + if (! unformat (input, "%s %v", &s, &v) + || ! (p = hash_get_mem (hash, s))) + { + error = clib_error_create ("unknown input `%s %v'", s, v); + goto done; + } + + c = all[p[0]]; + if (vec_len (c->input.buffer) > 0) + vec_add1 (c->input.buffer, ' '); + vec_add (c->input.buffer, v, vec_len (v)); + vec_free (v); + vec_free (s); + } + + for (i = 0; i < vec_len (all); i++) + { + c = all[i]; + + /* Is this an early config? Are we doing early configs? */ + if (is_early ^ c->is_early) + continue; + + /* Already called? */ + if (hash_get (vm->init_functions_called, c->function)) + continue; + hash_set1 (vm->init_functions_called, c->function); + + error = c->function (vm, &c->input); + if (error) + goto done; + } + + done: + for (i = 0; i < vec_len (all); i++) + { + c = all[i]; + unformat_free (&c->input); + } + vec_free (all); + hash_free (hash); + return error; +} diff --git a/vlib/vlib/init.h b/vlib/vlib/init.h new file mode 100644 index 00000000000..9d940d0745f --- /dev/null +++ b/vlib/vlib/init.h @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * init.h: mechanism for functions to be called at init/exit. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_init_h +#define included_vlib_init_h + +#include <vppinfra/error.h> +#include <vppinfra/format.h> +#include <vppinfra/hash.h> + +/* Init/exit functions: called at start/end of main routine. Init + functions are typically used to register and setup packet + processing nodes. */ + +typedef clib_error_t * (vlib_init_function_t) (struct vlib_main_t * vm); + +typedef struct _vlib_init_function_list_elt { + struct _vlib_init_function_list_elt * next_init_function; + vlib_init_function_t * f; +} _vlib_init_function_list_elt_t; + +/* Configuration functions: called with configuration input just before + main polling loop starts. */ +typedef clib_error_t * (vlib_config_function_t) (struct vlib_main_t * vm, + unformat_input_t * input); + +typedef struct vlib_config_function_runtime_t { + /* Function to call. Set to null once function has already been called. */ + vlib_config_function_t * function; + + /* Input for function. */ + unformat_input_t input; + + /* next config function registration */ + struct vlib_config_function_runtime_t * next_registration; + + /* To be invoked as soon as the clib heap is available */ + u8 is_early; + + /* Name used to distinguish input on command line. */ + char name[32]; +} vlib_config_function_runtime_t; + +#define _VLIB_INIT_FUNCTION_SYMBOL(x, type) \ + _vlib_##type##_function_##x + +#define VLIB_INIT_FUNCTION_SYMBOL(x) \ + _VLIB_INIT_FUNCTION_SYMBOL(x, init) +#define VLIB_MAIN_LOOP_ENTER_FUNCTION_SYMBOL(x) \ + _VLIB_INIT_FUNCTION_SYMBOL(x, main_loop_enter) +#define VLIB_MAIN_LOOP_EXIT_FUNCTION_SYMBOL(x) \ + _VLIB_INIT_FUNCTION_SYMBOL(x, main_loop_exit) +#define VLIB_CONFIG_FUNCTION_SYMBOL(x) \ + _VLIB_INIT_FUNCTION_SYMBOL(x, config) + +/* Declaration is global (e.g. not static) so that init functions can + be called from other modules to resolve init function depend. */ + +#define VLIB_DECLARE_INIT_FUNCTION(x, tag) \ +vlib_init_function_t * _VLIB_INIT_FUNCTION_SYMBOL (x, tag) = x; \ +static void __vlib_add_##tag##_function_##x (void) \ + __attribute__((__constructor__)) ; \ +static void __vlib_add_##tag##_function_##x (void) \ +{ \ + vlib_main_t * vm = vlib_get_main(); \ + static _vlib_init_function_list_elt_t _vlib_init_function; \ + _vlib_init_function.next_init_function \ + = vm->tag##_function_registrations; \ + vm->tag##_function_registrations = &_vlib_init_function; \ + _vlib_init_function.f = &x; \ +} + +#define VLIB_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,init) + +#define VLIB_MAIN_LOOP_ENTER_FUNCTION(x) \ + VLIB_DECLARE_INIT_FUNCTION(x,main_loop_enter) +#define VLIB_MAIN_LOOP_EXIT_FUNCTION(x) \ +VLIB_DECLARE_INIT_FUNCTION(x,main_loop_exit) + +#define VLIB_CONFIG_FUNCTION(x,n,...) \ + __VA_ARGS__ vlib_config_function_runtime_t \ + VLIB_CONFIG_FUNCTION_SYMBOL(x); \ +static void __vlib_add_config_function_##x (void) \ + __attribute__((__constructor__)) ; \ +static void __vlib_add_config_function_##x (void) \ +{ \ + vlib_main_t * vm = vlib_get_main(); \ + VLIB_CONFIG_FUNCTION_SYMBOL(x).next_registration \ + = vm->config_function_registrations; \ + vm->config_function_registrations \ + = &VLIB_CONFIG_FUNCTION_SYMBOL(x); \ +} \ + vlib_config_function_runtime_t \ + VLIB_CONFIG_FUNCTION_SYMBOL (x) \ + = { \ + .name = n, \ + .function = x, \ + .is_early = 0, \ + } + +#define VLIB_EARLY_CONFIG_FUNCTION(x,n,...) \ + __VA_ARGS__ vlib_config_function_runtime_t \ + VLIB_CONFIG_FUNCTION_SYMBOL(x); \ +static void __vlib_add_config_function_##x (void) \ + __attribute__((__constructor__)) ; \ +static void __vlib_add_config_function_##x (void) \ +{ \ + vlib_main_t * vm = vlib_get_main(); \ + VLIB_CONFIG_FUNCTION_SYMBOL(x).next_registration \ + = vm->config_function_registrations; \ + vm->config_function_registrations \ + = &VLIB_CONFIG_FUNCTION_SYMBOL(x); \ +} \ + vlib_config_function_runtime_t \ + VLIB_CONFIG_FUNCTION_SYMBOL (x) \ + = { \ + .name = n, \ + .function = x, \ + .is_early = 1, \ + } + +/* Call given init function: used for init function dependencies. */ +#define vlib_call_init_function(vm, x) \ + ({ \ + extern vlib_init_function_t * VLIB_INIT_FUNCTION_SYMBOL (x); \ + vlib_init_function_t * _f = VLIB_INIT_FUNCTION_SYMBOL (x); \ + clib_error_t * _error = 0; \ + if (! hash_get (vm->init_functions_called, _f)) \ + { \ + hash_set1 (vm->init_functions_called, _f); \ + _error = _f (vm); \ + } \ + _error; \ + }) + +#define vlib_call_post_graph_init_function(vm, x) \ + ({ \ + extern vlib_init_function_t * VLIB_POST_GRAPH_INIT_FUNCTION_SYMBOL (x); \ + vlib_init_function_t * _f = VLIB_POST_GRAPH_INIT_FUNCTION_SYMBOL (x); \ + clib_error_t * _error = 0; \ + if (! hash_get (vm->init_functions_called, _f)) \ + { \ + hash_set1 (vm->init_functions_called, _f); \ + _error = _f (vm); \ + } \ + _error; \ + }) + +#define vlib_call_config_function(vm, x) \ + ({ \ + vlib_config_function_runtime_t * _r; \ + clib_error_t * _error = 0; \ + extern vlib_config_function_runtime_t \ + VLIB_CONFIG_FUNCTION_SYMBOL (x); \ + \ + _r = &VLIB_CONFIG_FUNCTION_SYMBOL (x); \ + if (! hash_get (vm->init_functions_called, _r->function)) \ + { \ + hash_set1 (vm->init_functions_called, _r->function); \ + _error = _r->function (vm, &_r->input); \ + } \ + _error; \ + }) + +/* External functions. */ +clib_error_t * vlib_call_all_init_functions (struct vlib_main_t * vm); +clib_error_t * vlib_call_all_config_functions (struct vlib_main_t * vm, + unformat_input_t * input, + int is_early); +clib_error_t * vlib_call_all_main_loop_enter_functions (struct vlib_main_t * vm); +clib_error_t * vlib_call_all_main_loop_exit_functions (struct vlib_main_t * vm); +clib_error_t * +vlib_call_init_exit_functions (struct vlib_main_t * vm, + _vlib_init_function_list_elt_t *head, + int call_once); + +#define foreach_vlib_module_reference \ + _ (node_cli) \ + _ (trace_cli) + +/* Dummy function to get node_cli.c linked in. */ +#define _(x) void vlib_##x##_reference (void); +foreach_vlib_module_reference +#undef _ + +#endif /* included_vlib_init_h */ diff --git a/vlib/vlib/lex.c b/vlib/vlib/lex.c new file mode 100644 index 00000000000..de650900c11 --- /dev/null +++ b/vlib/vlib/lex.c @@ -0,0 +1,251 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/vlib.h> +#include <vlib/lex.h> + +vlib_lex_main_t vlib_lex_main; + +#define LEX_DEBUG 0 + +u8 * format_vlib_lex_token (u8 * s, va_list * args) +{ + vlib_lex_main_t *lm = va_arg (*args, vlib_lex_main_t *); + vlib_lex_token_t *t = va_arg (*args, vlib_lex_token_t *); + + if (t->token == VLIB_LEX_word) + s = format (s, "%s", t->value.as_pointer); + else + s = format (s, "%s", lm->lex_token_names[t->token]); + return s; +} + +void vlib_lex_get_token (vlib_lex_main_t * lm, vlib_lex_token_t * rv) +{ + u8 c; + vlib_lex_table_t *t; + vlib_lex_table_entry_t *e; + uword tv; + + if (PREDICT_FALSE (lm->pushback_sp >= 0)) + { + rv[0] = lm->pushback_vector [lm->pushback_sp--]; + return; + } + + rv->value.as_uword = ~0; + + while (1) + { + if (PREDICT_FALSE(lm->current_index >= vec_len (lm->input_vector))) + { + rv->token = VLIB_LEX_eof; + return; + } + + t = vec_elt_at_index (lm->lex_tables, lm->current_table_index); + c = (lm->input_vector [lm->current_index++]) & 0x7f; + e = &t->entries [c]; + lm->current_table_index = e->next_table_index; + + switch (e->action) + { + case VLIB_LEX_IGNORE: + continue; + + case VLIB_LEX_START_NUMBER: + lm->current_token_value = 0; + /* fallthru */ + + case VLIB_LEX_ADD_TO_NUMBER: + lm->current_number_base = e->token; + lm->current_token_value *= lm->current_number_base; + tv = c - '0'; + if (tv >= lm->current_number_base) + { + tv = 10 + c - 'A'; + if (tv >= lm->current_number_base) + tv = 10 + c - 'a'; + } + lm->current_token_value += tv; + continue; + + case VLIB_LEX_ADD_TO_TOKEN: + vec_add1(lm->token_buffer, c); + continue; + + case VLIB_LEX_KEYWORD_CHECK: { + uword * p; + + vec_add1 (lm->token_buffer, 0); + + /* It's either a keyword or just a word. */ + p = hash_get_mem (lm->lex_keywords, lm->token_buffer); + if (p) + { + rv->token = p[0]; + if (LEX_DEBUG > 0) + clib_warning ("keyword '%s' token %s", + lm->token_buffer, + lm->lex_token_names[rv->token]); + } + else + { + /* it's a WORD */ + rv->token = VLIB_LEX_word; + rv->value.as_pointer = vec_dup (lm->token_buffer); + if (LEX_DEBUG > 0) + clib_warning ("%s, value '%s'", + lm->lex_token_names[VLIB_LEX_word], + rv->value.as_pointer); + } + _vec_len (lm->token_buffer) = 0; + + /* Rescan the character which terminated the keyword/word. */ + lm->current_index--; + return; + } + + case VLIB_LEX_RETURN_AND_RESCAN: + ASSERT(lm->current_index); + lm->current_index--; + /* note flow-through */ + + case VLIB_LEX_RETURN: + rv->token = e->token; + rv->value.as_uword = lm->current_token_value; + lm->current_token_value = ~0; + if (LEX_DEBUG > 0) + { + clib_warning ("table %s char '%c'(0x%02x) next table %s return %s", + t->name, c, c, lm->lex_tables[e->next_table_index].name, + lm->lex_token_names[e->token]); + if (rv->token == VLIB_LEX_number) + clib_warning (" numeric value 0x%x (%d)", rv->value, + rv->value); + } + return; + } + } +} + +u16 vlib_lex_add_token (vlib_lex_main_t *lm, char *token_name) +{ + uword *p; + u16 rv; + + p = hash_get_mem (lm->lex_tokens_by_name, token_name); + + if (p) + return p[0]; + + rv = vec_len (lm->lex_token_names); + hash_set_mem (lm->lex_tokens_by_name, token_name, rv); + vec_add1 (lm->lex_token_names, token_name); + + return rv; +} + +static u16 add_keyword (vlib_lex_main_t *lm, char *keyword, char *token_name) +{ + uword *p; + u16 token; + + p = hash_get_mem (lm->lex_keywords, keyword); + + ASSERT (p == 0); + + token = vlib_lex_add_token (lm, token_name); + + hash_set_mem (lm->lex_keywords, keyword, token); + return token; +} + +u16 vlib_lex_find_or_add_keyword (vlib_lex_main_t *lm, char *keyword, char *token_name) +{ + uword * p = hash_get_mem (lm->lex_keywords, keyword); + return p ? p[0] : add_keyword (lm, keyword, token_name); +} + +void vlib_lex_set_action_range (u32 table_index, u8 lo, u8 hi, u16 action, + u16 token, u32 next_table_index) +{ + int i; + vlib_lex_main_t *lm = &vlib_lex_main; + vlib_lex_table_t *t = pool_elt_at_index (lm->lex_tables, table_index); + + for (i = lo; i <= hi; i++) + { + ASSERT (i < ARRAY_LEN (t->entries)); + t->entries[i].action = action; + t->entries[i].token = token; + t->entries[i].next_table_index = next_table_index; + } +} + +u16 vlib_lex_add_table (char *name) +{ + vlib_lex_main_t *lm = &vlib_lex_main; + vlib_lex_table_t *t; + uword *p; + + p = hash_get_mem (lm->lex_tables_by_name, name); + + ASSERT(p == 0); + + pool_get_aligned (lm->lex_tables, t, CLIB_CACHE_LINE_BYTES); + + t->name = name; + + hash_set_mem (lm->lex_tables_by_name, name, t - lm->lex_tables); + + vlib_lex_set_action_range (t - lm->lex_tables, 1, 0x7F, VLIB_LEX_IGNORE, ~0, + t - lm->lex_tables); + + vlib_lex_set_action_range (t - lm->lex_tables, 0, 0, VLIB_LEX_RETURN, VLIB_LEX_eof, + t - lm->lex_tables); + + return t - lm->lex_tables; +} + +void vlib_lex_reset (vlib_lex_main_t *lm, u8 *input_vector) +{ + if (lm->pushback_vector) + _vec_len (lm->pushback_vector) = 0; + lm->pushback_sp = -1; + + lm->input_vector = input_vector; + lm->current_index = 0; +} + +static clib_error_t * lex_onetime_init (vlib_main_t * vm) +{ + vlib_lex_main_t *lm = &vlib_lex_main; + + lm->lex_tables_by_name = hash_create_string (0, sizeof (uword)); + lm->lex_tokens_by_name = hash_create_string (0, sizeof (uword)); + lm->lex_keywords = hash_create_string (0, sizeof (uword)); + lm->pushback_sp = -1; + +#define _(f) { u16 tmp = vlib_lex_add_token (lm, #f); ASSERT (tmp == VLIB_LEX_##f); } + foreach_vlib_lex_global_token; +#undef _ + + vec_validate (lm->token_buffer, 127); + _vec_len (lm->token_buffer) = 0; + + return 0; +} + +VLIB_INIT_FUNCTION (lex_onetime_init); diff --git a/vlib/vlib/lex.h b/vlib/vlib/lex.h new file mode 100644 index 00000000000..d5ea509915c --- /dev/null +++ b/vlib/vlib/lex.h @@ -0,0 +1,130 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vlib_lex_h +#define included_vlib_lex_h + +#include <vppinfra/hash.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/error.h> +#include <vppinfra/pool.h> + +#define foreach_vlib_lex_global_token \ + _ (invalid) \ + _ (eof) \ + _ (word) \ + _ (number) \ + _ (lt) \ + _ (gt) \ + _ (dot) \ + _ (slash) \ + _ (qmark) \ + _ (equals) \ + _ (plus) \ + _ (minus) \ + _ (star) \ + _ (lpar) \ + _ (rpar) + +typedef enum { +#define _(f) VLIB_LEX_##f, + foreach_vlib_lex_global_token +#undef _ +} vlib_lex_global_token_t; + +typedef enum { + VLIB_LEX_IGNORE, + VLIB_LEX_ADD_TO_TOKEN, + VLIB_LEX_RETURN, + VLIB_LEX_RETURN_AND_RESCAN, + VLIB_LEX_KEYWORD_CHECK, + VLIB_LEX_START_NUMBER, + VLIB_LEX_ADD_TO_NUMBER, +} vlib_lex_action_t; + +typedef struct { + u16 action; + u16 next_table_index; + u16 token; +} vlib_lex_table_entry_t; + +typedef struct { + char *name; + vlib_lex_table_entry_t entries [128]; +} vlib_lex_table_t; + +typedef struct { + u32 token; + + union { + uword as_uword; + void * as_pointer; + char * as_string; + } value; +} vlib_lex_token_t; + +typedef struct { + vlib_lex_table_t * lex_tables; + uword * lex_tables_by_name; + + /* Vector of token strings. */ + char ** lex_token_names; + + /* Hash mapping c string name to token index. */ + uword * lex_tokens_by_name; + + /* Hash mapping c string keyword name to token index. */ + uword * lex_keywords; + + vlib_lex_token_t * pushback_vector; + + i32 pushback_sp; + + u32 current_table_index; + + uword current_token_value; + + uword current_number_base; + + /* Input string we are lex-ing. */ + u8 *input_vector; + + /* Current index into input vector. */ + u32 current_index; + + /* Re-used vector for forming token strings and hashing them. */ + u8 * token_buffer; +} vlib_lex_main_t; + +vlib_lex_main_t vlib_lex_main; + +always_inline void +vlib_lex_cleanup_token (vlib_lex_token_t * t) +{ + if (t->token == VLIB_LEX_word) + { + u8 * tv = t->value.as_pointer; + vec_free (tv); + } +} + +u16 vlib_lex_add_table (char *name); +void vlib_lex_get_token (vlib_lex_main_t *lm, vlib_lex_token_t * result); +u16 vlib_lex_add_token (vlib_lex_main_t *lm, char *token_name); +void vlib_lex_set_action_range (u32 table_index, u8 lo, u8 hi, u16 action, + u16 token, u32 next_table_index); +void vlib_lex_reset (vlib_lex_main_t *lm, u8 *input_vector); +format_function_t format_vlib_lex_token; + +#endif /* included_vlib_lex_h */ diff --git a/vlib/vlib/main.c b/vlib/vlib/main.c new file mode 100644 index 00000000000..64bd3c02b60 --- /dev/null +++ b/vlib/vlib/main.c @@ -0,0 +1,1559 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * main.c: main vector processing loop + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <math.h> +#include <vppinfra/format.h> +#include <vlib/vlib.h> +#include <vlib/threads.h> + +#include <vlib/unix/cj.h> + +CJ_GLOBAL_LOG_PROTOTYPE; + + +//#define VLIB_ELOG_MAIN_LOOP 1 + +/* Actually allocate a few extra slots of vector data to support + speculative vector enqueues which overflow vector data in next frame. */ +#define VLIB_FRAME_SIZE_ALLOC (VLIB_FRAME_SIZE + 4) + +always_inline u32 +vlib_frame_bytes (u32 n_scalar_bytes, u32 n_vector_bytes) +{ + u32 n_bytes; + + /* Make room for vlib_frame_t plus scalar arguments. */ + n_bytes = vlib_frame_vector_byte_offset (n_scalar_bytes); + + /* Make room for vector arguments. + Allocate a few extra slots of vector data to support + speculative vector enqueues which overflow vector data in next frame. */ +#define VLIB_FRAME_SIZE_EXTRA 4 + n_bytes += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * n_vector_bytes; + + /* Magic number is first 32bit number after vector data. + Used to make sure that vector data is never overrun. */ +#define VLIB_FRAME_MAGIC (0xabadc0ed) + n_bytes += sizeof (u32); + + /* Pad to cache line. */ + n_bytes = round_pow2 (n_bytes, CLIB_CACHE_LINE_BYTES); + + return n_bytes; +} + +always_inline u32 * +vlib_frame_find_magic (vlib_frame_t * f, vlib_node_t * node) +{ + void * p = f; + + p += vlib_frame_vector_byte_offset (node->scalar_size); + + p += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * node->vector_size; + + return p; +} + +static vlib_frame_size_t * +get_frame_size_info (vlib_node_main_t * nm, + u32 n_scalar_bytes, u32 n_vector_bytes) +{ + uword key = (n_scalar_bytes << 16) | n_vector_bytes; + uword * p, i; + + p = hash_get (nm->frame_size_hash, key); + if (p) + i = p[0]; + else + { + i = vec_len (nm->frame_sizes); + vec_validate (nm->frame_sizes, i); + hash_set (nm->frame_size_hash, key, i); + } + + return vec_elt_at_index (nm->frame_sizes, i); +} + +static u32 +vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index, u32 frame_flags) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_frame_size_t * fs; + vlib_node_t * to_node; + vlib_frame_t * f; + u32 fi, l, n, scalar_size, vector_size; + + to_node = vlib_get_node (vm, to_node_index); + + scalar_size = to_node->scalar_size; + vector_size = to_node->vector_size; + + fs = get_frame_size_info (nm, scalar_size, vector_size); + n = vlib_frame_bytes (scalar_size, vector_size); + if ((l = vec_len (fs->free_frame_indices)) > 0) + { + /* Allocate from end of free list. */ + fi = fs->free_frame_indices[l - 1]; + f = vlib_get_frame_no_check (vm, fi); + _vec_len (fs->free_frame_indices) = l - 1; + } + else + { + f = clib_mem_alloc_aligned_no_fail (n, CLIB_CACHE_LINE_BYTES); + f->cpu_index = vm->cpu_index; + fi = vlib_frame_index_no_check (vm, f); + } + + /* Poison frame when debugging. */ + if (CLIB_DEBUG > 0) + { + u32 save_cpu_index = f->cpu_index; + + memset (f, 0xfe, n); + + f->cpu_index = save_cpu_index; + } + + /* Insert magic number. */ + { + u32 * magic; + + magic = vlib_frame_find_magic (f, to_node); + *magic = VLIB_FRAME_MAGIC; + } + + f->flags = VLIB_FRAME_IS_ALLOCATED | frame_flags; + f->n_vectors = 0; + f->scalar_size = scalar_size; + f->vector_size = vector_size; + + fs->n_alloc_frames += 1; + + return fi; +} + +/* Allocate a frame for from FROM_NODE to TO_NODE via TO_NEXT_INDEX. + Returns frame index. */ +static u32 +vlib_frame_alloc (vlib_main_t * vm, vlib_node_runtime_t * from_node_runtime, u32 to_next_index) +{ + vlib_node_t * from_node; + + from_node = vlib_get_node (vm, from_node_runtime->node_index); + ASSERT (to_next_index < vec_len (from_node->next_nodes)); + + return vlib_frame_alloc_to_node (vm, + from_node->next_nodes[to_next_index], + /* frame_flags */ 0); +} + +vlib_frame_t * +vlib_get_frame_to_node (vlib_main_t * vm, u32 to_node_index) +{ + u32 fi = vlib_frame_alloc_to_node (vm, to_node_index, + /* frame_flags */ VLIB_FRAME_FREE_AFTER_DISPATCH); + return vlib_get_frame (vm, fi); +} + +void vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_t * f) +{ + vlib_pending_frame_t * p; + vlib_node_t * to_node; + + if (f->n_vectors == 0) + return; + + to_node = vlib_get_node (vm, to_node_index); + + vec_add2 (vm->node_main.pending_frames, p, 1); + + f->flags |= VLIB_FRAME_PENDING; + p->frame_index = vlib_frame_index (vm, f); + p->node_runtime_index = to_node->runtime_index; + p->next_frame_index = VLIB_PENDING_FRAME_NO_NEXT_FRAME; +} + +/* Free given frame. */ +void +vlib_frame_free (vlib_main_t * vm, + vlib_node_runtime_t * r, + vlib_frame_t * f) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * node; + vlib_frame_size_t * fs; + u32 frame_index; + + ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED); + + node = vlib_get_node (vm, r->node_index); + fs = get_frame_size_info (nm, node->scalar_size, node->vector_size); + + frame_index = vlib_frame_index (vm, f); + + ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED); + + /* No next frames may point to freed frame. */ + if (CLIB_DEBUG > 0) + { + vlib_next_frame_t * nf; + vec_foreach (nf, vm->node_main.next_frames) + ASSERT (nf->frame_index != frame_index); + } + + f->flags &= ~VLIB_FRAME_IS_ALLOCATED; + + vec_add1 (fs->free_frame_indices, frame_index); + ASSERT (fs->n_alloc_frames > 0); + fs->n_alloc_frames -= 1; +} + +static clib_error_t * +show_frame_stats (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_frame_size_t * fs; + + vlib_cli_output (vm, "%=6s%=12s%=12s", "Size", "# Alloc", "# Free"); + vec_foreach (fs, nm->frame_sizes) + { + u32 n_alloc = fs->n_alloc_frames; + u32 n_free = vec_len (fs->free_frame_indices); + + if (n_alloc + n_free > 0) + vlib_cli_output (vm, "%=6d%=12d%=12d", + fs - nm->frame_sizes, n_alloc, n_free); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_frame_stats_cli, static) = { + .path = "show vlib frame-allocation", + .short_help = "Show node dispatch frame statistics", + .function = show_frame_stats, +}; + +/* Change ownership of enqueue rights to given next node. */ +static void +vlib_next_frame_change_ownership (vlib_main_t * vm, + vlib_node_runtime_t * node_runtime, + u32 next_index) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_next_frame_t * next_frame; + vlib_node_t * node, * next_node; + + node = vec_elt (nm->nodes, node_runtime->node_index); + + /* Only internal & input nodes are allowed to call other nodes. */ + ASSERT (node->type == VLIB_NODE_TYPE_INTERNAL + || node->type == VLIB_NODE_TYPE_INPUT + || node->type == VLIB_NODE_TYPE_PROCESS); + + ASSERT (vec_len (node->next_nodes) == node_runtime->n_next_nodes); + + next_frame = vlib_node_runtime_get_next_frame (vm, node_runtime, next_index); + next_node = vec_elt (nm->nodes, node->next_nodes[next_index]); + + if (next_node->owner_node_index != VLIB_INVALID_NODE_INDEX) + { + /* Get frame from previous owner. */ + vlib_next_frame_t * owner_next_frame; + vlib_next_frame_t tmp; + + owner_next_frame = + vlib_node_get_next_frame (vm, + next_node->owner_node_index, + next_node->owner_next_index); + + /* Swap target next frame with owner's. */ + tmp = owner_next_frame[0]; + owner_next_frame[0] = next_frame[0]; + next_frame[0] = tmp; + + /* + * If next_frame is already pending, we have to track down + * all pending frames and fix their next_frame_index fields. + */ + if (next_frame->flags & VLIB_FRAME_PENDING) + { + vlib_pending_frame_t * p; + if (next_frame->frame_index != ~0) + { + vec_foreach (p, nm->pending_frames) + { + if (p->frame_index == next_frame->frame_index) + { + p->next_frame_index = + next_frame - vm->node_main.next_frames; + } + } + } + } + } + else + { + /* No previous owner. Take ownership. */ + next_frame->flags |= VLIB_FRAME_OWNER; + } + + /* Record new owner. */ + next_node->owner_node_index = node->index; + next_node->owner_next_index = next_index; + + /* Now we should be owner. */ + ASSERT (next_frame->flags & VLIB_FRAME_OWNER); +} + +/* Make sure that magic number is still there. + Otherwise, it is likely that caller has overrun frame arguments. */ +always_inline void +validate_frame_magic (vlib_main_t * vm, + vlib_frame_t * f, + vlib_node_t * n, + uword next_index) +{ + vlib_node_t * next_node = vlib_get_node (vm, n->next_nodes[next_index]); + u32 * magic = vlib_frame_find_magic (f, next_node); + ASSERT (VLIB_FRAME_MAGIC == magic[0]); +} + +vlib_frame_t * +vlib_get_next_frame_internal (vlib_main_t * vm, + vlib_node_runtime_t * node, + u32 next_index, + u32 allocate_new_next_frame) +{ + vlib_frame_t * f; + vlib_next_frame_t * nf; + u32 n_used; + + nf = vlib_node_runtime_get_next_frame (vm, node, next_index); + + /* Make sure this next frame owns right to enqueue to destination frame. */ + if (PREDICT_FALSE (! (nf->flags & VLIB_FRAME_OWNER))) + vlib_next_frame_change_ownership (vm, node, next_index); + + /* ??? Don't need valid flag: can use frame_index == ~0 */ + if (PREDICT_FALSE (! (nf->flags & VLIB_FRAME_IS_ALLOCATED))) + { + nf->frame_index = vlib_frame_alloc (vm, node, next_index); + nf->flags |= VLIB_FRAME_IS_ALLOCATED; + } + + f = vlib_get_frame (vm, nf->frame_index); + + /* Has frame been removed from pending vector (e.g. finished dispatching)? + If so we can reuse frame. */ + if ((nf->flags & VLIB_FRAME_PENDING) && ! (f->flags & VLIB_FRAME_PENDING)) + { + nf->flags &= ~VLIB_FRAME_PENDING; + f->n_vectors = 0; + } + + /* Allocate new frame if current one is already full. */ + n_used = f->n_vectors; + if (n_used >= VLIB_FRAME_SIZE || (allocate_new_next_frame && n_used > 0)) + { + /* Old frame may need to be freed after dispatch, since we'll have + two redundant frames from node -> next node. */ + if (! (nf->flags & VLIB_FRAME_NO_FREE_AFTER_DISPATCH)) + { + vlib_frame_t * f_old = vlib_get_frame (vm, nf->frame_index); + f_old->flags |= VLIB_FRAME_FREE_AFTER_DISPATCH; + } + + /* Allocate new frame to replace full one. */ + nf->frame_index = vlib_frame_alloc (vm, node, next_index); + f = vlib_get_frame (vm, nf->frame_index); + n_used = f->n_vectors; + } + + /* Should have free vectors in frame now. */ + ASSERT (n_used < VLIB_FRAME_SIZE); + + if (CLIB_DEBUG > 0) + { + validate_frame_magic (vm, f, + vlib_get_node (vm, node->node_index), + next_index); + } + + return f; +} + +static void +vlib_put_next_frame_validate (vlib_main_t * vm, + vlib_node_runtime_t * rt, + u32 next_index, + u32 n_vectors_left) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_next_frame_t * nf; + vlib_frame_t * f; + vlib_node_runtime_t * next_rt; + vlib_node_t * next_node; + u32 n_before, n_after; + + nf = vlib_node_runtime_get_next_frame (vm, rt, next_index); + f = vlib_get_frame (vm, nf->frame_index); + + ASSERT (n_vectors_left <= VLIB_FRAME_SIZE); + n_after = VLIB_FRAME_SIZE - n_vectors_left; + n_before = f->n_vectors; + + ASSERT (n_after >= n_before); + + next_rt = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL], + nf->node_runtime_index); + next_node = vlib_get_node (vm, next_rt->node_index); + if (n_after > 0 && next_node->validate_frame) + { + u8 * msg = next_node->validate_frame (vm, rt, f); + if (msg) + { + clib_warning ("%v", msg); + ASSERT (0); + } + vec_free (msg); + } +} + +void +vlib_put_next_frame (vlib_main_t * vm, + vlib_node_runtime_t * r, + u32 next_index, + u32 n_vectors_left) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_next_frame_t * nf; + vlib_frame_t * f; + u32 n_vectors_in_frame; + + if (DPDK == 0 && CLIB_DEBUG > 0) + vlib_put_next_frame_validate (vm, r, next_index, n_vectors_left); + + nf = vlib_node_runtime_get_next_frame (vm, r, next_index); + f = vlib_get_frame (vm, nf->frame_index); + + /* Make sure that magic number is still there. Otherwise, caller + has overrun frame meta data. */ + if (CLIB_DEBUG > 0) + { + vlib_node_t * node = vlib_get_node (vm, r->node_index); + validate_frame_magic (vm, f, node, next_index); + } + + /* Convert # of vectors left -> number of vectors there. */ + ASSERT (n_vectors_left <= VLIB_FRAME_SIZE); + n_vectors_in_frame = VLIB_FRAME_SIZE - n_vectors_left; + + f->n_vectors = n_vectors_in_frame; + + /* If vectors were added to frame, add to pending vector. */ + if (PREDICT_TRUE (n_vectors_in_frame > 0)) + { + vlib_pending_frame_t * p; + u32 v0, v1; + + r->cached_next_index = next_index; + + if (!(f->flags & VLIB_FRAME_PENDING)) + { + __attribute__((unused)) vlib_node_t *node; + vlib_node_t *next_node; + vlib_node_runtime_t *next_runtime; + + node = vlib_get_node (vm, r->node_index); + next_node = vlib_get_next_node (vm, r->node_index, next_index); + next_runtime = vlib_node_get_runtime (vm, next_node->index); + + vec_add2 (nm->pending_frames, p, 1); + + p->frame_index = nf->frame_index; + p->node_runtime_index = nf->node_runtime_index; + p->next_frame_index = nf - nm->next_frames; + nf->flags |= VLIB_FRAME_PENDING; + f->flags |= VLIB_FRAME_PENDING; + + /* + * If we're going to dispatch this frame on another thread, + * force allocation of a new frame. Otherwise, we create + * a dangling frame reference. Each thread has its own copy of + * the next_frames vector. + */ + if (0 && r->cpu_index != next_runtime->cpu_index) + { + nf->frame_index = ~0; + nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED); + } + } + + /* Copy trace flag from next_frame and from runtime. */ + nf->flags |= (nf->flags & VLIB_NODE_FLAG_TRACE) | (r->flags & VLIB_NODE_FLAG_TRACE); + + v0 = nf->vectors_since_last_overflow; + v1 = v0 + n_vectors_in_frame; + nf->vectors_since_last_overflow = v1; + if (PREDICT_FALSE (v1 < v0)) + { + vlib_node_t * node = vlib_get_node (vm, r->node_index); + vec_elt (node->n_vectors_by_next_node, next_index) += v0; + } + } +} + +/* Sync up runtime (32 bit counters) and main node stats (64 bit counters). */ +never_inline void +vlib_node_runtime_sync_stats (vlib_main_t * vm, + vlib_node_runtime_t * r, + uword n_calls, + uword n_vectors, + uword n_clocks) +{ + vlib_node_t * n = vlib_get_node (vm, r->node_index); + + n->stats_total.calls += n_calls + r->calls_since_last_overflow; + n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow; + n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow; + n->stats_total.max_clock = r->max_clock; + n->stats_total.max_clock_n = r->max_clock_n; + + r->calls_since_last_overflow = 0; + r->vectors_since_last_overflow = 0; + r->clocks_since_last_overflow = 0; +} + +always_inline void +vlib_process_sync_stats (vlib_main_t * vm, + vlib_process_t * p, + uword n_calls, + uword n_vectors, + uword n_clocks) +{ + vlib_node_runtime_t * rt = &p->node_runtime; + vlib_node_t * n = vlib_get_node (vm, rt->node_index); + vlib_node_runtime_sync_stats (vm, rt, n_calls, n_vectors, n_clocks); + n->stats_total.suspends += p->n_suspends; + p->n_suspends = 0; +} + +void vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n) +{ + vlib_node_runtime_t * rt; + + if (n->type == VLIB_NODE_TYPE_PROCESS) + { + /* Nothing to do for PROCESS nodes except in main thread */ + if (vm != &vlib_global_main) return; + + vlib_process_t * p = vlib_get_process_from_node (vm, n); + n->stats_total.suspends += p->n_suspends; + p->n_suspends = 0; + rt = &p->node_runtime; + } + else + rt = vec_elt_at_index (vm->node_main.nodes_by_type[n->type], n->runtime_index); + + vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0); + + /* Sync up runtime next frame vector counters with main node structure. */ + { + vlib_next_frame_t * nf; + uword i; + for (i = 0; i < rt->n_next_nodes; i++) + { + nf = vlib_node_runtime_get_next_frame (vm, rt, i); + vec_elt (n->n_vectors_by_next_node, i) += nf->vectors_since_last_overflow; + nf->vectors_since_last_overflow = 0; + } + } +} + +always_inline u32 +vlib_node_runtime_update_stats (vlib_main_t * vm, + vlib_node_runtime_t * node, + uword n_calls, + uword n_vectors, + uword n_clocks) +{ + u32 ca0, ca1, v0, v1, cl0, cl1, r; + + cl0 = cl1 = node->clocks_since_last_overflow; + ca0 = ca1 = node->calls_since_last_overflow; + v0 = v1 = node->vectors_since_last_overflow; + + ca1 = ca0 + n_calls; + v1 = v0 + n_vectors; + cl1 = cl0 + n_clocks; + + node->calls_since_last_overflow = ca1; + node->clocks_since_last_overflow = cl1; + node->vectors_since_last_overflow = v1; + node->max_clock_n = node->max_clock > n_clocks ? + node->max_clock_n : n_vectors; + node->max_clock = node->max_clock > n_clocks ? + node->max_clock : n_clocks; + + r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors); + + if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0)) + { + node->calls_since_last_overflow = ca0; + node->clocks_since_last_overflow = cl0; + node->vectors_since_last_overflow = v0; + vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks); + } + + return r; +} + +always_inline void +vlib_process_update_stats (vlib_main_t * vm, + vlib_process_t * p, + uword n_calls, + uword n_vectors, + uword n_clocks) +{ + vlib_node_runtime_update_stats (vm, &p->node_runtime, + n_calls, n_vectors, n_clocks); +} + +static clib_error_t * +vlib_cli_elog_clear (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + elog_reset_buffer (&vm->elog_main); + return 0; +} + +VLIB_CLI_COMMAND (elog_clear_cli, static) = { + .path = "clear event-logger", + .short_help = "Clear current event log", + .function = vlib_cli_elog_clear, +}; + +#ifdef CLIB_UNIX +static clib_error_t * +elog_save_buffer (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + elog_main_t * em = &vm->elog_main; + char * file, * chroot_file; + clib_error_t * error = 0; + + if (! unformat (input, "%s", &file)) + { + vlib_cli_output (vm, "expected file name, got `%U'", + format_unformat_error, input); + return 0; + } + + /* It's fairly hard to get "../oopsie" through unformat; just in case */ + if (strstr(file, "..") || index(file, '/')) + { + vlib_cli_output (vm, "illegal characters in filename '%s'", file); + return 0; + } + + chroot_file = (char *) format (0, "/tmp/%s%c", file, 0); + + vec_free(file); + + vlib_cli_output (vm, "Saving %wd of %wd events to %s", + elog_n_events_in_buffer (em), + elog_buffer_capacity (em), + chroot_file); + + vlib_worker_thread_barrier_sync (vm); + error = elog_write_file (em, chroot_file); + vlib_worker_thread_barrier_release(vm); + vec_free (chroot_file); + return error; +} + +VLIB_CLI_COMMAND (elog_save_cli, static) = { + .path = "save event-logger", + .short_help = "save event-logger <filename> (saves log in /tmp/<filename>)", + .function = elog_save_buffer, +}; + +#endif /* CLIB_UNIX */ + +static void elog_show_buffer_internal (vlib_main_t * vm, u32 n_events_to_show) +{ + elog_main_t * em = &vm->elog_main; + elog_event_t * e, * es; + f64 dt; + + /* Show events in VLIB time since log clock starts after VLIB clock. */ + dt = (em->init_time.cpu - vm->clib_time.init_cpu_time) + * vm->clib_time.seconds_per_clock; + + es = elog_peek_events (em); + vlib_cli_output (vm, "%d events in buffer", vec_len (es)); + vec_foreach (e, es) + { + vlib_cli_output (vm, "%18.9f: %U", + e->time + dt, + format_elog_event, em, e); + n_events_to_show--; + if (n_events_to_show == 0) + break; + } + vec_free (es); + +} + +static clib_error_t * +elog_show_buffer (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + u32 n_events_to_show; + clib_error_t * error = 0; + + n_events_to_show = 250; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "%d", &n_events_to_show)) + ; + else if (unformat (input, "all")) + n_events_to_show = ~0; + else + return unformat_parse_error (input); + } + elog_show_buffer_internal (vm, n_events_to_show); + return error; +} + +VLIB_CLI_COMMAND (elog_show_cli, static) = { + .path = "show event-logger", + .short_help = "Show event logger info", + .function = elog_show_buffer, +}; + +void vlib_gdb_show_event_log (void) +{ + elog_show_buffer_internal (vlib_get_main(), (u32)~0); +} + +always_inline void +vlib_elog_main_loop_event (vlib_main_t * vm, + u32 node_index, + u64 time, + u32 n_vectors, + u32 is_return) +{ + elog_main_t * em = &vm->elog_main; + + if (VLIB_ELOG_MAIN_LOOP) + elog (em, + /* event type */ + vec_elt_at_index (is_return + ? vm->node_return_elog_event_types + : vm->node_call_elog_event_types, + node_index), + /* data to log */ n_vectors); +} + +void vlib_dump_context_trace (vlib_main_t *vm, u32 bi) +{ + vlib_node_main_t * vnm = &vm->node_main; + vlib_buffer_t * b; + u8 i, n; + + if (VLIB_BUFFER_TRACE_TRAJECTORY) + { + b = vlib_get_buffer (vm, bi); + n = b->pre_data[0]; + + fformat(stderr, "Context trace for bi %d b 0x%llx, visited %d\n", + bi, b, n); + + if (n == 0 || n > 20) + { + fformat(stderr, "n is unreasonable\n"); + return; + } + + + for (i = 0; i < n; i++) + { + u32 node_index; + + node_index = b->pre_data[i+1]; + + if (node_index > vec_len (vnm->nodes)) + { + fformat(stderr, "Skip bogus node index %d\n", node_index); + continue; + } + + fformat(stderr, "%v (%d)\n", vnm->nodes[node_index]->name, + node_index); + } + } + else + { + fformat(stderr, + "in vlib/buffers.h, #define VLIB_BUFFER_TRACE_TRAJECTORY 1\n"); + } +} + + +/* static_always_inline */ u64 +dispatch_node (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_node_type_t type, + vlib_node_state_t dispatch_state, + vlib_frame_t * frame, + u64 last_time_stamp) +{ + uword n, v; + u64 t; + vlib_node_main_t * nm = &vm->node_main; + vlib_next_frame_t * nf; + + if (CLIB_DEBUG > 0) + { + vlib_node_t * n = vlib_get_node (vm, node->node_index); + ASSERT (n->type == type); + } + + /* Only non-internal nodes may be disabled. */ + if (type != VLIB_NODE_TYPE_INTERNAL && node->state != dispatch_state) + { + ASSERT (type != VLIB_NODE_TYPE_INTERNAL); + return last_time_stamp; + } + + if ((type == VLIB_NODE_TYPE_PRE_INPUT || type == VLIB_NODE_TYPE_INPUT) + && dispatch_state != VLIB_NODE_STATE_INTERRUPT) + { + u32 c = node->input_main_loops_per_call; + /* Only call node when count reaches zero. */ + if (c) + { + node->input_main_loops_per_call = c - 1; + return last_time_stamp; + } + } + + /* Speculatively prefetch next frames. */ + if (node->n_next_nodes > 0) + { + nf = vec_elt_at_index (nm->next_frames, node->next_frame_index); + CLIB_PREFETCH (nf, 4 * sizeof (nf[0]), WRITE); + } + + vm->cpu_time_last_node_dispatch = last_time_stamp; + + if (1 /* || vm->cpu_index == node->cpu_index */) + { + vlib_main_t *stat_vm; + + stat_vm = /* vlib_mains ? vlib_mains[0] : */ vm; + + vlib_elog_main_loop_event (vm, node->node_index, + last_time_stamp, + frame ? frame->n_vectors : 0, + /* is_after */ 0); + + /* + * Turn this on if you run into + * "bad monkey" contexts, and you want to know exactly + * which nodes they've visited... See ixge.c... + */ + if (VLIB_BUFFER_TRACE_TRAJECTORY && frame) + { + int i; + int log_index; + u32 * from; + from = vlib_frame_vector_args (frame); + for (i = 0; i < frame->n_vectors; i++) + { + vlib_buffer_t *b = vlib_get_buffer (vm, from[i]); + ASSERT (b->pre_data[0] < 32); + log_index = b->pre_data[0]++ + 1; + b->pre_data[log_index] = node->node_index; + } + n = node->function (vm, node, frame); + } + else + n = node->function (vm, node, frame); + + t = clib_cpu_time_now (); + + vlib_elog_main_loop_event (vm, node->node_index, t, n, /* is_after */ 1); + + vm->main_loop_vectors_processed += n; + vm->main_loop_nodes_processed += n > 0; + + v = vlib_node_runtime_update_stats (stat_vm, node, + /* n_calls */ 1, + /* n_vectors */ n, + /* n_clocks */ t - last_time_stamp); + + /* When in interrupt mode and vector rate crosses threshold switch to + polling mode. */ + if ((DPDK == 0 && dispatch_state == VLIB_NODE_STATE_INTERRUPT) + || (DPDK == 0 && dispatch_state == VLIB_NODE_STATE_POLLING + && (node->flags + & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE))) + { + ELOG_TYPE_DECLARE (e) = { + .function = (char *) __FUNCTION__, + .format = "%s vector length %d, switching to %s", + .format_args = "T4i4t4", + .n_enum_strings = 2, + .enum_strings = { + "interrupt", "polling", + }, + }; + struct { u32 node_name, vector_length, is_polling; } * ed; + + if (dispatch_state == VLIB_NODE_STATE_INTERRUPT + && v >= nm->polling_threshold_vector_length) + { + vlib_node_t * n = vlib_get_node (vm, node->node_index); + n->state = VLIB_NODE_STATE_POLLING; + node->state = VLIB_NODE_STATE_POLLING; + ASSERT (! (node->flags & VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE)); + node->flags &= ~VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE; + node->flags |= VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE; + nm->input_node_counts_by_state[VLIB_NODE_STATE_INTERRUPT] -= 1; + nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] += 1; + + ed = ELOG_DATA (&vm->elog_main, e); + ed->node_name = n->name_elog_string; + ed->vector_length = v; + ed->is_polling = 1; + } + else if (dispatch_state == VLIB_NODE_STATE_POLLING + && v <= nm->interrupt_threshold_vector_length) + { + vlib_node_t * n = vlib_get_node (vm, node->node_index); + if (node->flags & VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE) + { + /* Switch to interrupt mode after dispatch in polling one more time. + This allows driver to re-enable interrupts. */ + n->state = VLIB_NODE_STATE_INTERRUPT; + node->state = VLIB_NODE_STATE_INTERRUPT; + node->flags &= ~VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE; + nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] -= 1; + nm->input_node_counts_by_state[VLIB_NODE_STATE_INTERRUPT] += 1; + + } + else + { + node->flags |= VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE; + ed = ELOG_DATA (&vm->elog_main, e); + ed->node_name = n->name_elog_string; + ed->vector_length = v; + ed->is_polling = 0; + } + } + } + } + + return t; +} + +/* static */ u64 +dispatch_pending_node (vlib_main_t * vm, + vlib_pending_frame_t * p, + u64 last_time_stamp) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_frame_t * f; + vlib_next_frame_t * nf, nf_dummy; + vlib_node_runtime_t * n; + u32 restore_frame_index; + + n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL], + p->node_runtime_index); + + f = vlib_get_frame (vm, p->frame_index); + if (p->next_frame_index == VLIB_PENDING_FRAME_NO_NEXT_FRAME) + { + /* No next frame: so use dummy on stack. */ + nf = &nf_dummy; + nf->flags = f->flags & VLIB_NODE_FLAG_TRACE; + nf->frame_index = ~p->frame_index; + } + else + nf = vec_elt_at_index (nm->next_frames, p->next_frame_index); + + ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED); + + /* Force allocation of new frame while current frame is being + dispatched. */ + restore_frame_index = ~0; + if (nf->frame_index == p->frame_index) + { + nf->frame_index = ~0; + nf->flags &= ~VLIB_FRAME_IS_ALLOCATED; + if (! (n->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH)) + restore_frame_index = p->frame_index; + } + + /* Frame must be pending. */ + ASSERT (f->flags & VLIB_FRAME_PENDING); + ASSERT (f->n_vectors > 0); + + /* Copy trace flag from next frame to node. + Trace flag indicates that at least one vector in the dispatched + frame is traced. */ + n->flags &= ~VLIB_NODE_FLAG_TRACE; + n->flags |= (nf->flags & VLIB_FRAME_TRACE) ? VLIB_NODE_FLAG_TRACE : 0; + nf->flags &= ~VLIB_FRAME_TRACE; + + last_time_stamp = dispatch_node (vm, n, + VLIB_NODE_TYPE_INTERNAL, + VLIB_NODE_STATE_POLLING, + f, last_time_stamp); + + f->flags &= ~VLIB_FRAME_PENDING; + + /* Frame is ready to be used again, so restore it. */ + if (restore_frame_index != ~0) + { + /* p->next_frame_index can change during node dispatch if node + function decides to change graph hook up. */ + nf = vec_elt_at_index (nm->next_frames, p->next_frame_index); + nf->frame_index = restore_frame_index; + nf->flags |= VLIB_FRAME_IS_ALLOCATED; + } + + if (f->flags & VLIB_FRAME_FREE_AFTER_DISPATCH) + { + ASSERT (! (n->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH)); + vlib_frame_free (vm, n, f); + } + + return last_time_stamp; +} + +always_inline uword +vlib_process_stack_is_valid (vlib_process_t * p) +{ return p->stack[0] == VLIB_PROCESS_STACK_MAGIC; } + +typedef struct { + vlib_main_t * vm; + vlib_process_t * process; + vlib_frame_t * frame; +} vlib_process_bootstrap_args_t; + +/* Called in process stack. */ +static uword vlib_process_bootstrap (uword _a) +{ + vlib_process_bootstrap_args_t * a; + vlib_main_t * vm; + vlib_node_runtime_t * node; + vlib_frame_t * f; + vlib_process_t * p; + uword n; + + a = uword_to_pointer (_a, vlib_process_bootstrap_args_t *); + + vm = a->vm; + p = a->process; + f = a->frame; + node = &p->node_runtime; + + n = node->function (vm, node, f); + + ASSERT (vlib_process_stack_is_valid (p)); + + clib_longjmp (&p->return_longjmp, n); + + return n; +} + +/* Called in main stack. */ +static_always_inline uword +vlib_process_startup (vlib_main_t * vm, + vlib_process_t * p, + vlib_frame_t * f) +{ + vlib_process_bootstrap_args_t a; + uword r; + + a.vm = vm; + a.process = p; + a.frame = f; + + r = clib_setjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_RETURN); + if (r == VLIB_PROCESS_RETURN_LONGJMP_RETURN) + r = clib_calljmp (vlib_process_bootstrap, pointer_to_uword (&a), + (void *) p->stack + (1 << p->log2_n_stack_bytes)); + + return r; +} + +static_always_inline uword +vlib_process_resume (vlib_process_t * p) +{ + uword r; + p->flags &= ~(VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK + | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT + | VLIB_PROCESS_RESUME_PENDING); + r = clib_setjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_RETURN); + if (r == VLIB_PROCESS_RETURN_LONGJMP_RETURN) + clib_longjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_RESUME); + return r; +} + +static u64 +dispatch_process (vlib_main_t * vm, + vlib_process_t * p, + vlib_frame_t * f, + u64 last_time_stamp) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_runtime_t * node_runtime = &p->node_runtime; + vlib_node_t * node = vlib_get_node (vm, node_runtime->node_index); + u64 t; + uword n_vectors, is_suspend; + + if (node->state != VLIB_NODE_STATE_POLLING + || (p->flags & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK + | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT))) + return last_time_stamp; + + p->flags |= VLIB_PROCESS_IS_RUNNING; + + t = last_time_stamp; + vlib_elog_main_loop_event (vm, node_runtime->node_index, t, + f ? f->n_vectors : 0, /* is_after */ 0); + + /* Save away current process for suspend. */ + nm->current_process_index = node->runtime_index; + + n_vectors = vlib_process_startup (vm, p, f); + + nm->current_process_index = ~0; + + ASSERT (n_vectors != VLIB_PROCESS_RETURN_LONGJMP_RETURN); + is_suspend = n_vectors == VLIB_PROCESS_RETURN_LONGJMP_SUSPEND; + if (is_suspend) + { + vlib_pending_frame_t * pf; + + n_vectors = 0; + pool_get (nm->suspended_process_frames, pf); + pf->node_runtime_index = node->runtime_index; + pf->frame_index = f ? vlib_frame_index (vm, f) : ~0; + pf->next_frame_index = ~0; + + p->n_suspends += 1; + p->suspended_process_frame_index = pf - nm->suspended_process_frames; + + if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK) + timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time, + vlib_timing_wheel_data_set_suspended_process (node->runtime_index)); + } + else + p->flags &= ~VLIB_PROCESS_IS_RUNNING; + + t = clib_cpu_time_now (); + + vlib_elog_main_loop_event (vm, node_runtime->node_index, t, is_suspend, /* is_after */ 1); + + vlib_process_update_stats (vm, p, + /* n_calls */ ! is_suspend, + /* n_vectors */ n_vectors, + /* n_clocks */ t - last_time_stamp); + + return t; +} + +void vlib_start_process (vlib_main_t * vm, uword process_index) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_process_t * p = vec_elt (nm->processes, process_index); + dispatch_process (vm, p, /* frame */ 0, /* cpu_time_now */ 0); +} + +static u64 +dispatch_suspended_process (vlib_main_t * vm, + uword process_index, + u64 last_time_stamp) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_runtime_t * node_runtime; + vlib_node_t * node; + vlib_frame_t * f; + vlib_process_t * p; + vlib_pending_frame_t * pf; + u64 t, n_vectors, is_suspend; + + t = last_time_stamp; + + p = vec_elt (nm->processes, process_index); + if (PREDICT_FALSE (! (p->flags & VLIB_PROCESS_IS_RUNNING))) + return last_time_stamp; + + ASSERT (p->flags & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK + | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT)); + + pf = pool_elt_at_index (nm->suspended_process_frames, p->suspended_process_frame_index); + + node_runtime = &p->node_runtime; + node = vlib_get_node (vm, node_runtime->node_index); + f = pf->frame_index != ~0 ? vlib_get_frame (vm, pf->frame_index) : 0; + + vlib_elog_main_loop_event (vm, node_runtime->node_index, t, f ? f->n_vectors : 0, /* is_after */ 0); + + /* Save away current process for suspend. */ + nm->current_process_index = node->runtime_index; + + n_vectors = vlib_process_resume (p); + t = clib_cpu_time_now (); + + nm->current_process_index = ~0; + + is_suspend = n_vectors == VLIB_PROCESS_RETURN_LONGJMP_SUSPEND; + if (is_suspend) + { + /* Suspend it again. */ + n_vectors = 0; + p->n_suspends += 1; + if (p->flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK) + timing_wheel_insert (&nm->timing_wheel, p->resume_cpu_time, + vlib_timing_wheel_data_set_suspended_process (node->runtime_index)); + } + else + { + p->flags &= ~VLIB_PROCESS_IS_RUNNING; + p->suspended_process_frame_index = ~0; + pool_put (nm->suspended_process_frames, pf); + } + + t = clib_cpu_time_now (); + vlib_elog_main_loop_event (vm, node_runtime->node_index, t, ! is_suspend, /* is_after */ 1); + + vlib_process_update_stats (vm, p, + /* n_calls */ ! is_suspend, + /* n_vectors */ n_vectors, + /* n_clocks */ t - last_time_stamp); + + return t; +} + +static void vlib_main_loop (vlib_main_t * vm) +{ + vlib_node_main_t * nm = &vm->node_main; + uword i; + u64 cpu_time_now; + + /* Initialize pending node vector. */ + vec_resize (nm->pending_frames, 32); + _vec_len (nm->pending_frames) = 0; + + /* Mark time of main loop start. */ + cpu_time_now = vm->clib_time.last_cpu_time; + vm->cpu_time_main_loop_start = cpu_time_now; + + /* Arrange for first level of timing wheel to cover times we care + most about. */ + nm->timing_wheel.min_sched_time = 10e-6; + nm->timing_wheel.max_sched_time = 10e-3; + timing_wheel_init (&nm->timing_wheel, + cpu_time_now, + vm->clib_time.clocks_per_second); + + /* Pre-allocate expired nodes. */ + vec_alloc (nm->data_from_advancing_timing_wheel, 32); + vec_alloc (nm->pending_interrupt_node_runtime_indices, 32); + + if (! nm->polling_threshold_vector_length) + nm->polling_threshold_vector_length = 10; + if (! nm->interrupt_threshold_vector_length) + nm->interrupt_threshold_vector_length = 5; + + nm->current_process_index = ~0; + + /* Start all processes. */ + { + uword i; + for (i = 0; i < vec_len (nm->processes); i++) + cpu_time_now = dispatch_process (vm, nm->processes[i], /* frame */ 0, cpu_time_now); + } + + while (1) + { + vlib_node_runtime_t * n; + + /* Process pre-input nodes. */ + vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) + cpu_time_now = dispatch_node (vm, n, + VLIB_NODE_TYPE_PRE_INPUT, + VLIB_NODE_STATE_POLLING, + /* frame */ 0, + cpu_time_now); + + /* Next process input nodes. */ + vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]) + cpu_time_now = dispatch_node (vm, n, + VLIB_NODE_TYPE_INPUT, + VLIB_NODE_STATE_POLLING, + /* frame */ 0, + cpu_time_now); + + if (PREDICT_FALSE(vm->queue_signal_pending)) + if (vm->queue_signal_callback) + vm->queue_signal_callback (vm); + + /* Next handle interrupts. */ + { + uword l = _vec_len (nm->pending_interrupt_node_runtime_indices); + uword i; + if (l > 0) + { + _vec_len (nm->pending_interrupt_node_runtime_indices) = 0; + for (i = 0; i < l; i++) + { + n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], + nm->pending_interrupt_node_runtime_indices[i]); + cpu_time_now = dispatch_node (vm, n, + VLIB_NODE_TYPE_INPUT, + VLIB_NODE_STATE_INTERRUPT, + /* frame */ 0, + cpu_time_now); + } + } + } + + /* Check if process nodes have expired from timing wheel. */ + nm->data_from_advancing_timing_wheel + = timing_wheel_advance (&nm->timing_wheel, cpu_time_now, + nm->data_from_advancing_timing_wheel, + &nm->cpu_time_next_process_ready); + + ASSERT (nm->data_from_advancing_timing_wheel != 0); + if (PREDICT_FALSE (_vec_len (nm->data_from_advancing_timing_wheel) > 0)) + { + uword i; + + processes_timing_wheel_data: + for (i = 0; i < _vec_len (nm->data_from_advancing_timing_wheel); i++) + { + u32 d = nm->data_from_advancing_timing_wheel[i]; + u32 di = vlib_timing_wheel_data_get_index (d); + + if (vlib_timing_wheel_data_is_timed_event (d)) + { + vlib_signal_timed_event_data_t * te = pool_elt_at_index (nm->signal_timed_event_data_pool, di); + vlib_node_t * n = vlib_get_node (vm, te->process_node_index); + vlib_process_t * p = vec_elt (nm->processes, n->runtime_index); + void * data; + data = vlib_process_signal_event_helper (nm, n, p, te->event_type_index, te->n_data_elts, te->n_data_elt_bytes); + if (te->n_data_bytes < sizeof (te->inline_event_data)) + memcpy (data, te->inline_event_data, te->n_data_bytes); + else + { + memcpy (data, te->event_data_as_vector, te->n_data_bytes); + vec_free (te->event_data_as_vector); + } + pool_put (nm->signal_timed_event_data_pool, te); + } + else + { + cpu_time_now = clib_cpu_time_now(); + cpu_time_now = dispatch_suspended_process (vm, di, cpu_time_now); + } + } + + /* Reset vector. */ + _vec_len (nm->data_from_advancing_timing_wheel) = 0; + } + + /* Input nodes may have added work to the pending vector. + Process pending vector until there is nothing left. + All pending vectors will be processed from input -> output. */ + for (i = 0; i < _vec_len (nm->pending_frames); i++) + cpu_time_now = dispatch_pending_node (vm, nm->pending_frames + i, + cpu_time_now); + /* Reset pending vector for next iteration. */ + _vec_len (nm->pending_frames) = 0; + + /* Pending internal nodes may resume processes. */ + if (_vec_len (nm->data_from_advancing_timing_wheel) > 0) + goto processes_timing_wheel_data; + + vlib_increment_main_loop_counter (vm); + + /* Record time stamp in case there are no enabled nodes and above + calls do not update time stamp. */ + cpu_time_now = clib_cpu_time_now (); + } +} + +vlib_main_t vlib_global_main; + +static clib_error_t * +vlib_main_configure (vlib_main_t * vm, unformat_input_t * input) +{ + int turn_on_mem_trace = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "memory-trace")) + turn_on_mem_trace = 1; + + else if (unformat (input, "elog-events %d", + &vm->elog_main.event_ring_size)) + ; + else + return unformat_parse_error (input); + } + + unformat_free (input); + + /* Enable memory trace as early as possible. */ + if (turn_on_mem_trace) + clib_mem_trace (1); + + return 0; +} + +VLIB_EARLY_CONFIG_FUNCTION (vlib_main_configure, "vlib"); + +/* Main function. */ +int vlib_main (vlib_main_t * vm, unformat_input_t * input) +{ + clib_error_t * error; + + clib_time_init (&vm->clib_time); + + /* Turn on event log. */ + if (! vm->elog_main.event_ring_size) + vm->elog_main.event_ring_size = 128 << 10; + elog_init (&vm->elog_main, vm->elog_main.event_ring_size); + elog_enable_disable (&vm->elog_main, 1); + + /* Default name. */ + if (! vm->name) + vm->name = "VLIB"; + + vec_validate (vm->buffer_main, 0); + + if ((error = vlib_thread_init (vm))) + { + clib_error_report (error); + goto done; + } + + /* Register static nodes so that init functions may use them. */ + vlib_register_all_static_nodes (vm); + + /* Set seed for random number generator. + Allow user to specify seed to make random sequence deterministic. */ + if (! unformat (input, "seed %wd", &vm->random_seed)) + vm->random_seed = clib_cpu_time_now (); + clib_random_buffer_init (&vm->random_buffer, vm->random_seed); + + /* See unix/main.c; most likely already set up */ + if (vm->init_functions_called == 0) + vm->init_functions_called = hash_create (0, /* value bytes */ 0); + if ((error = vlib_call_all_init_functions (vm))) + goto done; + + /* Initialize node graph. */ + if ((error = vlib_node_main_init (vm))) + { + /* Arrange for graph hook up error to not be fatal when debugging. */ + if (CLIB_DEBUG > 0) + clib_error_report (error); + else + goto done; + } + + /* Create default buffer free list. */ + vlib_buffer_get_or_create_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, + "default"); + + switch (clib_setjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_NONE)) + { + case VLIB_MAIN_LOOP_EXIT_NONE: + vm->main_loop_exit_set = 1; + break; + + case VLIB_MAIN_LOOP_EXIT_CLI: + goto done; + + default: + error = vm->main_loop_error; + goto done; + } + + if ((error = vlib_call_all_config_functions (vm, input, 0 /* is_early */))) + goto done; + + /* Call all main loop enter functions. */ + { + clib_error_t * sub_error; + sub_error = vlib_call_all_main_loop_enter_functions (vm); + if (sub_error) + clib_error_report (sub_error); + } + + vlib_main_loop (vm); + + done: + /* Call all exit functions. */ + { + clib_error_t * sub_error; + sub_error = vlib_call_all_main_loop_exit_functions (vm); + if (sub_error) + clib_error_report (sub_error); + } + + if (error) + clib_error_report (error); + + return 0; +} diff --git a/vlib/vlib/main.h b/vlib/vlib/main.h new file mode 100644 index 00000000000..5a8d745661b --- /dev/null +++ b/vlib/vlib/main.h @@ -0,0 +1,315 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * main.h: VLIB main data structure + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_main_h +#define included_vlib_main_h + +#include <vppinfra/elog.h> +#include <vppinfra/format.h> +#include <vppinfra/longjmp.h> +#include <vppinfra/pool.h> +#include <vppinfra/random_buffer.h> +#include <vppinfra/time.h> + +#include <pthread.h> + + +/* By default turn off node/error event logging. + Override with -DVLIB_ELOG_MAIN_LOOP */ +#ifndef VLIB_ELOG_MAIN_LOOP +#define VLIB_ELOG_MAIN_LOOP 0 +#endif + +typedef struct vlib_main_t { + /* Instruction level timing state. */ + clib_time_t clib_time; + + /* Time stamp of last node dispatch. */ + u64 cpu_time_last_node_dispatch; + + /* Time stamp when main loop was entered (time 0). */ + u64 cpu_time_main_loop_start; + + /* Incremented once for each main loop. */ + u32 main_loop_count; + + /* Count of vectors processed this main loop. */ + u32 main_loop_vectors_processed; + u32 main_loop_nodes_processed; + + /* Circular buffer of input node vector counts. + Indexed by low bits of + (main_loop_count >> VLIB_LOG2_INPUT_VECTORS_PER_MAIN_LOOP). */ + u32 vector_counts_per_main_loop[2]; + u32 node_counts_per_main_loop[2]; + + /* Every so often we switch to the next counter. */ +#define VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE 7 + + /* Jump target to exit main loop with given code. */ + u32 main_loop_exit_set; + clib_longjmp_t main_loop_exit; +#define VLIB_MAIN_LOOP_EXIT_NONE 0 +#define VLIB_MAIN_LOOP_EXIT_PANIC 1 + /* Exit via CLI. */ +#define VLIB_MAIN_LOOP_EXIT_CLI 2 + + /* Error marker to use when exiting main loop. */ + clib_error_t * main_loop_error; + + /* Name for e.g. syslog. */ + char * name; + + /* Start and size of CLIB heap. */ + void * heap_base; + uword heap_size; + + vlib_buffer_main_t * buffer_main; + + vlib_physmem_main_t physmem_main; + + /* Allocate/free buffer memory for DMA transfers, descriptor rings, etc. + buffer memory is guaranteed to be cache-aligned. */ + void * (* os_physmem_alloc_aligned) (vlib_physmem_main_t * pm, + uword n_bytes, + uword alignment); + void (* os_physmem_free) (void * x); + + /* Node graph main structure. */ + vlib_node_main_t node_main; + + /* Command line interface. */ + vlib_cli_main_t cli_main; + + /* Packet trace buffer. */ + vlib_trace_main_t trace_main; + + /* Error handling. */ + vlib_error_main_t error_main; + + /* Punt packets to underlying operating system for when fast switching + code does not know what to do. */ + void (* os_punt_frame) (struct vlib_main_t * vm, + struct vlib_node_runtime_t * node, + vlib_frame_t * frame); + + /* Multicast distribution. Set to zero for MC disabled. */ + mc_main_t * mc_main; + + /* Stream index to use for distribution when MC is enabled. */ + u32 mc_stream_index; + + vlib_one_time_waiting_process_t * procs_waiting_for_mc_stream_join; + + /* Event logger. */ + elog_main_t elog_main; + + /* Node call and return event types. */ + elog_event_type_t * node_call_elog_event_types; + elog_event_type_t * node_return_elog_event_types; + + elog_event_type_t * error_elog_event_types; + + /* Seed for random number generator. */ + uword random_seed; + + /* Buffer of random data for various uses. */ + clib_random_buffer_t random_buffer; + + /* Hash table to record which init functions have been called. */ + uword * init_functions_called; + + /* to compare with node runtime */ + u32 cpu_index; + + void **mbuf_alloc_list; + + /* List of init functions to call, setup by constructors */ + _vlib_init_function_list_elt_t *init_function_registrations; + _vlib_init_function_list_elt_t *main_loop_enter_function_registrations; + _vlib_init_function_list_elt_t *main_loop_exit_function_registrations; + _vlib_init_function_list_elt_t *api_init_function_registrations; + vlib_config_function_runtime_t *config_function_registrations; + mc_serialize_msg_t *mc_msg_registrations; /* mc_main is a pointer... */ + + /* control-plane API queue signal pending */ + volatile u32 queue_signal_pending; + void (*queue_signal_callback)(struct vlib_main_t *); +} vlib_main_t; + +/* Global main structure. */ +vlib_main_t vlib_global_main; + +always_inline f64 +vlib_time_now (vlib_main_t * vm) +{ return clib_time_now (&vm->clib_time); } + +always_inline f64 +vlib_time_now_ticks (vlib_main_t * vm, u64 n) +{ return clib_time_now_internal (&vm->clib_time, n); } + +/* Busy wait for specified time. */ +always_inline void +vlib_time_wait (vlib_main_t * vm, f64 wait) +{ + f64 t = vlib_time_now (vm); + f64 limit = t + wait; + while (t < limit) + t = vlib_time_now (vm); +} + +/* Time a piece of code. */ +#define vlib_time_code(vm,body) \ +do { \ + f64 _t[2]; \ + _t[0] = vlib_time_now (vm); \ + do { body; } while (0); \ + _t[1] = vlib_time_now (vm); \ + clib_warning ("%.7e", _t[1] - _t[0]); \ +} while (0) + +#define vlib_wait_with_timeout(vm,suspend_time,timeout_time,test) \ +({ \ + uword __vlib_wait_with_timeout = 0; \ + f64 __vlib_wait_time = 0; \ + while (! (__vlib_wait_with_timeout = (test)) \ + && __vlib_wait_time < (timeout_time)) \ + { \ + vlib_process_suspend (vm, suspend_time); \ + __vlib_wait_time += suspend_time; \ + } \ + __vlib_wait_with_timeout; \ +}) + +always_inline void +vlib_panic_with_error (vlib_main_t * vm, clib_error_t * error) +{ + vm->main_loop_error = error; + clib_longjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_PANIC); +} + +#define vlib_panic_with_msg(vm,args...) \ + vlib_panic_with_error (vm, clib_error_return (0, args)) + +always_inline void +vlib_panic (vlib_main_t * vm) +{ vlib_panic_with_error (vm, 0); } + +always_inline u32 +vlib_vector_input_stats_index (vlib_main_t * vm, word delta) +{ + u32 i; + i = vm->main_loop_count >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE; + ASSERT (is_pow2 (ARRAY_LEN (vm->vector_counts_per_main_loop))); + return (i + delta) & (ARRAY_LEN (vm->vector_counts_per_main_loop) - 1); +} + +/* Estimate input rate based on previous + 2^VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE + samples. */ +always_inline u32 +vlib_last_vectors_per_main_loop (vlib_main_t * vm) +{ + u32 i = vlib_vector_input_stats_index (vm, -1); + u32 n = vm->vector_counts_per_main_loop[i]; + return n >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE; +} + +/* Total ave vector count per iteration of main loop. */ +always_inline f64 +vlib_last_vectors_per_main_loop_as_f64 (vlib_main_t * vm) +{ + u32 i = vlib_vector_input_stats_index (vm, -1); + u32 v = vm->vector_counts_per_main_loop[i]; + return (f64) v / (f64) (1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE); +} + +/* Total ave vectors/node count per iteration of main loop. */ +always_inline f64 +vlib_last_vector_length_per_node (vlib_main_t * vm) +{ + u32 i = vlib_vector_input_stats_index (vm, -1); + u32 v = vm->vector_counts_per_main_loop[i]; + u32 n = vm->node_counts_per_main_loop[i]; + return n == 0 ? 0 : (f64) v / (f64) n; +} + +u32 wraps; + +always_inline void +vlib_increment_main_loop_counter (vlib_main_t * vm) +{ + u32 i, c, n, v, is_wrap; + + c = vm->main_loop_count++; + + is_wrap = (c & pow2_mask (VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE)) == 0; + + if (is_wrap) + wraps++; + + i = vlib_vector_input_stats_index (vm, /* delta */ is_wrap); + + v = is_wrap ? 0 : vm->vector_counts_per_main_loop[i]; + n = is_wrap ? 0 : vm->node_counts_per_main_loop[i]; + + v += vm->main_loop_vectors_processed; + n += vm->main_loop_nodes_processed; + vm->main_loop_vectors_processed = 0; + vm->main_loop_nodes_processed = 0; + vm->vector_counts_per_main_loop[i] = v; + vm->node_counts_per_main_loop[i] = n; +} + +always_inline void vlib_set_queue_signal_callback +(vlib_main_t *vm, void (*fp)(vlib_main_t *)) +{ + vm->queue_signal_callback = fp; +} + +/* Main routine. */ +int vlib_main (vlib_main_t * vm, unformat_input_t * input); + +/* Thread stacks, for os_get_cpu_number */ +u8 **vlib_thread_stacks; + +/* Number of thread stacks that the application needs */ +u32 vlib_app_num_thread_stacks_needed (void) __attribute__ ((weak)); + +#endif /* included_vlib_main_h */ diff --git a/vlib/vlib/mc.c b/vlib/vlib/mc.c new file mode 100644 index 00000000000..460145ef0e6 --- /dev/null +++ b/vlib/vlib/mc.c @@ -0,0 +1,2354 @@ +/* + * mc.c: vlib reliable sequenced multicast distributed applications + * + * Copyright (c) 2010 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> + +/* + * 1 to enable msg id training wheels, which are useful for tracking + * down catchup and/or partitioned network problems + */ +#define MSG_ID_DEBUG 0 + +static format_function_t format_mc_stream_state; + +static u32 elog_id_for_peer_id (mc_main_t * m, u64 peer_id) +{ + uword * p, r; + mhash_t * h = &m->elog_id_by_peer_id; + + if (! m->elog_id_by_peer_id.hash) + mhash_init (h, sizeof (uword), sizeof (mc_peer_id_t)); + + p = mhash_get (h, &peer_id); + if (p) + return p[0]; + r = elog_string (m->elog_main, "%U", + m->transport.format_peer_id, peer_id); + mhash_set (h, &peer_id, r, /* old_value */ 0); + return r; +} + +static u32 elog_id_for_msg_name (mc_main_t * m, char *msg_name) +{ + uword * p, r; + uword * h = m->elog_id_by_msg_name; + u8 *name_copy; + + if (! h) + h = m->elog_id_by_msg_name + = hash_create_string (0, sizeof (uword)); + + p = hash_get_mem (h, msg_name); + if (p) + return p[0]; + r = elog_string (m->elog_main, "%s", msg_name); + + name_copy = format (0, "%s%c", msg_name, 0); + + hash_set_mem (h, name_copy, r); + m->elog_id_by_msg_name = h; + + return r; +} + +static void elog_tx_msg (mc_main_t * m, u32 stream_id, u32 local_sequence, u32 retry_count) +{ + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "tx-msg: stream %d local seq %d attempt %d", + .format_args = "i4i4i4", + }; + struct { u32 stream_id, local_sequence, retry_count; } * ed; + ed = ELOG_DATA (m->elog_main, e); + ed->stream_id = stream_id; + ed->local_sequence = local_sequence; + ed->retry_count = retry_count; + } +} + +/* + * seq_cmp + * correctly compare two unsigned sequence numbers. + * This function works so long as x and y are within 2**(n-1) of each + * other, where n = bits(x, y). + * + * Magic decoder ring: + * seq_cmp == 0 => x and y are equal + * seq_cmp < 0 => x is "in the past" with respect to y + * seq_cmp > 0 => x is "in the future" with respect to y + */ +always_inline i32 mc_seq_cmp (u32 x, u32 y) +{ return (i32) x - (i32) y;} + +void * mc_get_vlib_buffer (vlib_main_t * vm, u32 n_bytes, u32 * bi_return) +{ + u32 n_alloc, bi; + vlib_buffer_t * b; + + n_alloc = vlib_buffer_alloc (vm, &bi, 1); + ASSERT (n_alloc == 1); + + b = vlib_get_buffer (vm, bi); + b->current_length = n_bytes; + *bi_return = bi; + return (void *) b->data; +} + +static void +delete_peer_with_index (mc_main_t * mcm, mc_stream_t * s, + uword index, + int notify_application) +{ + mc_stream_peer_t * p = pool_elt_at_index (s->peers, index); + ASSERT (p != 0); + if (s->config.peer_died && notify_application) + s->config.peer_died (mcm, s, p->id); + + s->all_peer_bitmap = clib_bitmap_andnoti (s->all_peer_bitmap, p - s->peers); + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "delete peer %s from all_peer_bitmap", + .format_args = "T4", + }; + struct { u32 peer; } * ed = 0; + + ed = ELOG_DATA (mcm->elog_main, e); + ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64); + } + /* Do not delete the pool / hash table entries, or we lose sequence number state */ +} + +static mc_stream_peer_t * +get_or_create_peer_with_id (mc_main_t * mcm, + mc_stream_t * s, mc_peer_id_t id, + int * created) +{ + uword * q = mhash_get (&s->peer_index_by_id, &id); + mc_stream_peer_t * p; + + if (q) + { + p = pool_elt_at_index (s->peers, q[0]); + goto done; + } + + pool_get (s->peers, p); + memset (p, 0, sizeof (p[0])); + p->id = id; + p->last_sequence_received = ~0; + mhash_set (&s->peer_index_by_id, &id, p - s->peers, /* old_value */ 0); + if (created) + *created = 1; + + done: + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "get_or_create %s peer %s stream %d seq %d", + .format_args = "t4T4i4i4", + .n_enum_strings = 2, + .enum_strings = { "old", "new", }, + }; + struct { u32 is_new, peer, stream_index, rx_sequence; } * ed = 0; + + ed = ELOG_DATA (mcm->elog_main, e); + ed->is_new = q ? 0 : 1; + ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64); + ed->stream_index = s->index; + ed->rx_sequence = p->last_sequence_received; + } + /* $$$$ Enable or reenable this peer */ + s->all_peer_bitmap = clib_bitmap_ori (s->all_peer_bitmap, p - s->peers); + return p; +} + +static void maybe_send_window_open_event (vlib_main_t * vm, mc_stream_t * stream) +{ + vlib_one_time_waiting_process_t * p; + + if (pool_elts (stream->retry_pool) >= stream->config.window_size) + return; + + vec_foreach (p, stream->procs_waiting_for_open_window) + vlib_signal_one_time_waiting_process (vm, p); + + if (stream->procs_waiting_for_open_window) + _vec_len (stream->procs_waiting_for_open_window) = 0; +} + +static void mc_retry_free (mc_main_t * mcm, mc_stream_t *s, mc_retry_t * r) +{ + mc_retry_t record, *retp; + + if (r->unacked_by_peer_bitmap) + _vec_len (r->unacked_by_peer_bitmap) = 0; + + if (clib_fifo_elts (s->retired_fifo) >= 2 * s->config.window_size) + { + clib_fifo_sub1 (s->retired_fifo, record); + vlib_buffer_free_one (mcm->vlib_main, record.buffer_index); + } + + clib_fifo_add2 (s->retired_fifo, retp); + + retp->buffer_index = r->buffer_index; + retp->local_sequence = r->local_sequence; + + r->buffer_index = ~0; /* poison buffer index in this retry */ +} + +static void mc_resend_retired (mc_main_t *mcm, mc_stream_t *s, u32 local_sequence) +{ + mc_retry_t *retry; + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "resend-retired: search for local seq %d", + .format_args = "i4", + }; + struct { u32 local_sequence; } * ed; + ed = ELOG_DATA (mcm->elog_main, e); + ed->local_sequence = local_sequence; + } + + clib_fifo_foreach + (retry, s->retired_fifo, + ({ + if (retry->local_sequence == local_sequence) + { + elog_tx_msg (mcm, s->index, retry->local_sequence, -13); + + mcm->transport.tx_buffer + (mcm->transport.opaque, + MC_TRANSPORT_USER_REQUEST_TO_RELAY, + retry->buffer_index); + return; + } + })); + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "resend-retired: FAILED search for local seq %d", + .format_args = "i4", + }; + struct { u32 local_sequence; } * ed; + ed = ELOG_DATA (mcm->elog_main, e); + ed->local_sequence = local_sequence; + } +} + +static uword * +delete_retry_fifo_elt (mc_main_t * mcm, + mc_stream_t * stream, + mc_retry_t * r, + uword * dead_peer_bitmap) +{ + mc_stream_peer_t * p; + + pool_foreach (p, stream->peers, ({ + uword pi = p - stream->peers; + uword is_alive = 0 == clib_bitmap_get (r->unacked_by_peer_bitmap, pi); + + if (! is_alive) + dead_peer_bitmap = clib_bitmap_ori (dead_peer_bitmap, pi); + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "delete_retry_fifo_elt: peer %s is %s", + .format_args = "T4t4", + .n_enum_strings = 2, + .enum_strings = { "alive", "dead", }, + }; + struct { u32 peer, is_alive; } * ed; + ed = ELOG_DATA (mcm->elog_main, e); + ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64); + ed->is_alive = is_alive; + } + })); + + hash_unset (stream->retry_index_by_local_sequence, r->local_sequence); + mc_retry_free (mcm, stream, r); + + return dead_peer_bitmap; +} + +always_inline mc_retry_t * +prev_retry (mc_stream_t * s, mc_retry_t * r) +{ + return (r->prev_index != ~0 + ? pool_elt_at_index (s->retry_pool, r->prev_index) + : 0); +} + +always_inline mc_retry_t * +next_retry (mc_stream_t * s, mc_retry_t * r) +{ + return (r->next_index != ~0 + ? pool_elt_at_index (s->retry_pool, r->next_index) + : 0); +} + +always_inline void +remove_retry_from_pool (mc_stream_t * s, mc_retry_t * r) +{ + mc_retry_t * p = prev_retry (s, r); + mc_retry_t * n = next_retry (s, r); + + if (p) + p->next_index = r->next_index; + else + s->retry_head_index = r->next_index; + if (n) + n->prev_index = r->prev_index; + else + s->retry_tail_index = r->prev_index; + + pool_put_index (s->retry_pool, r - s->retry_pool); +} + +static void check_retry (mc_main_t * mcm, mc_stream_t * s) +{ + mc_retry_t * r; + vlib_main_t * vm = mcm->vlib_main; + f64 now = vlib_time_now(vm); + uword * dead_peer_bitmap = 0; + u32 ri, ri_next; + + for (ri = s->retry_head_index; ri != ~0; ri = ri_next) + { + r = pool_elt_at_index (s->retry_pool, ri); + ri_next = r->next_index; + + if (now < r->sent_at + s->config.retry_interval) + continue; + + r->n_retries += 1; + if (r->n_retries > s->config.retry_limit) + { + dead_peer_bitmap = + delete_retry_fifo_elt (mcm, s, r, dead_peer_bitmap); + remove_retry_from_pool (s, r); + } + else + { + if (MC_EVENT_LOGGING > 0) + { + mc_stream_peer_t * p; + ELOG_TYPE_DECLARE (t) = { + .format = "resend local seq %d attempt %d", + .format_args = "i4i4", + }; + + pool_foreach (p, s->peers, ({ + if (clib_bitmap_get (r->unacked_by_peer_bitmap, p - s->peers)) + { + ELOG_TYPE_DECLARE (ev) = { + .format = "resend: needed by peer %s local seq %d", + .format_args = "T4i4", + }; + struct { u32 peer, rx_sequence; } * ed; + ed = ELOG_DATA (mcm->elog_main, ev); + ed->peer = elog_id_for_peer_id (mcm, p->id.as_u64); + ed->rx_sequence = r->local_sequence; + } + })); + + struct { u32 sequence; u32 trail; } * ed; + ed = ELOG_DATA (mcm->elog_main, t); + ed->sequence = r->local_sequence; + ed->trail = r->n_retries; + } + + r->sent_at = vlib_time_now (vm); + s->stats.n_retries += 1; + + elog_tx_msg (mcm, s->index, r->local_sequence, r->n_retries); + + mcm->transport.tx_buffer + (mcm->transport.opaque, + MC_TRANSPORT_USER_REQUEST_TO_RELAY, + r->buffer_index); + } + } + + maybe_send_window_open_event (mcm->vlib_main, s); + + /* Delete any dead peers we've found. */ + if (! clib_bitmap_is_zero (dead_peer_bitmap)) + { + uword i; + + clib_bitmap_foreach (i, dead_peer_bitmap, ({ + delete_peer_with_index (mcm, s, i, /* notify_application */ 1); + + /* Delete any references to just deleted peer in retry pool. */ + pool_foreach (r, s->retry_pool, ({ + r->unacked_by_peer_bitmap = + clib_bitmap_andnoti (r->unacked_by_peer_bitmap, i); + })); + })); + clib_bitmap_free (dead_peer_bitmap); + } +} + +always_inline mc_main_t * +mc_node_get_main (vlib_node_runtime_t * node) +{ + mc_main_t ** p = (void *) node->runtime_data; + return p[0]; +} + +static uword +mc_retry_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + mc_main_t * mcm = mc_node_get_main (node); + mc_stream_t * s; + + while (1) + { + vlib_process_suspend (vm, 1.0); + vec_foreach (s, mcm->stream_vector) + { + if (s->state != MC_STREAM_STATE_invalid) + check_retry (mcm, s); + } + } + return 0; /* not likely */ +} + +static void send_join_or_leave_request (mc_main_t * mcm, u32 stream_index, u32 is_join) +{ + vlib_main_t * vm = mcm->vlib_main; + mc_msg_join_or_leave_request_t * mp; + u32 bi; + + mp = mc_get_vlib_buffer (vm, sizeof (mp[0]), &bi); + memset(mp, 0, sizeof (*mp)); + mp->type = MC_MSG_TYPE_join_or_leave_request; + mp->peer_id = mcm->transport.our_ack_peer_id; + mp->stream_index = stream_index; + mp->is_join = is_join; + + mc_byte_swap_msg_join_or_leave_request (mp); + + /* + * These msgs are unnumbered, unordered so send on the from-relay + * channel. + */ + mcm->transport.tx_buffer (mcm->transport.opaque, MC_TRANSPORT_JOIN, bi); +} + +static uword +mc_join_ager_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + mc_main_t * mcm = mc_node_get_main (node); + + while (1) + { + if (mcm->joins_in_progress) + { + mc_stream_t * s; + vlib_one_time_waiting_process_t * p; + f64 now = vlib_time_now (vm); + + vec_foreach (s, mcm->stream_vector) + { + if (s->state != MC_STREAM_STATE_join_in_progress) + continue; + + if (now > s->join_timeout) + { + s->state = MC_STREAM_STATE_ready; + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "stream %d join timeout", + }; + ELOG (mcm->elog_main, e, s->index); + } + /* Make sure that this app instance exists as a stream peer, + or we may answer a catchup request with a NULL + all_peer_bitmap... */ + (void) get_or_create_peer_with_id + (mcm, s, mcm->transport.our_ack_peer_id, /* created */ 0); + + vec_foreach (p, s->procs_waiting_for_join_done) + vlib_signal_one_time_waiting_process (vm, p); + if (s->procs_waiting_for_join_done) + _vec_len (s->procs_waiting_for_join_done) = 0; + + mcm->joins_in_progress--; + ASSERT (mcm->joins_in_progress >= 0); + } + else + { + /* Resent join request which may have been lost. */ + send_join_or_leave_request (mcm, s->index, + 1 /* is_join */); + + /* We're *not* alone, retry for as long as it takes */ + if (mcm->relay_state == MC_RELAY_STATE_SLAVE) + s->join_timeout = vlib_time_now (vm) + 2.0; + + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "stream %d resend join request", + }; + ELOG (mcm->elog_main, e, s->index); + } + } + } + } + + vlib_process_suspend (vm, .5); + } + + return 0; /* not likely */ +} + +static void serialize_mc_register_stream_name (serialize_main_t * m, va_list * va) +{ + char * name = va_arg (*va, char *); + serialize_cstring (m, name); +} + +static void elog_stream_name (char * buf, int n_buf_bytes, char * v) +{ + memcpy (buf, v, clib_min (n_buf_bytes - 1, vec_len (v))); + buf[n_buf_bytes - 1] = 0; +} + +static void unserialize_mc_register_stream_name (serialize_main_t * m, va_list * va) +{ + mc_main_t * mcm = va_arg (*va, mc_main_t *); + char * name; + mc_stream_t * s; + uword * p; + + unserialize_cstring (m, &name); + + if ((p = hash_get_mem (mcm->stream_index_by_name, name))) + { + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "stream index %d already named %s", + .format_args = "i4s16", + }; + struct { u32 stream_index; char name[16]; } * ed; + ed = ELOG_DATA (mcm->elog_main, e); + ed->stream_index = p[0]; + elog_stream_name (ed->name, sizeof (ed->name), name); + } + + vec_free (name); + return; + } + + vec_add2 (mcm->stream_vector, s, 1); + mc_stream_init (s); + s->state = MC_STREAM_STATE_name_known; + s->index = s - mcm->stream_vector; + s->config.name = name; + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "stream index %d named %s", + .format_args = "i4s16", + }; + struct { u32 stream_index; char name[16]; } * ed; + ed = ELOG_DATA (mcm->elog_main, e); + ed->stream_index = s->index; + elog_stream_name (ed->name, sizeof (ed->name), name); + } + + hash_set_mem (mcm->stream_index_by_name, name, s->index); + + p = hash_get (mcm->procs_waiting_for_stream_name_by_name, name); + if (p) + { + vlib_one_time_waiting_process_t * wp, ** w; + w = pool_elt_at_index (mcm->procs_waiting_for_stream_name_pool, p[0]); + vec_foreach (wp, w[0]) + vlib_signal_one_time_waiting_process (mcm->vlib_main, wp); + pool_put (mcm->procs_waiting_for_stream_name_pool, w); + hash_unset_mem (mcm->procs_waiting_for_stream_name_by_name, name); + } +} + +MC_SERIALIZE_MSG (mc_register_stream_name_msg, static) = { + .name = "mc_register_stream_name", + .serialize = serialize_mc_register_stream_name, + .unserialize = unserialize_mc_register_stream_name, +}; + +void +mc_rx_buffer_unserialize (mc_main_t * mcm, + mc_stream_t * stream, + mc_peer_id_t peer_id, + u32 buffer_index) +{ return mc_unserialize (mcm, stream, buffer_index); } + +static u8 * +mc_internal_catchup_snapshot (mc_main_t * mcm, + u8 * data_vector, + u32 last_global_sequence_processed) +{ + serialize_main_t m; + + /* Append serialized data to data vector. */ + serialize_open_vector (&m, data_vector); + m.stream.current_buffer_index = vec_len (data_vector); + + serialize (&m, serialize_mc_main, mcm); + return serialize_close_vector (&m); +} + +static void +mc_internal_catchup (mc_main_t * mcm, + u8 * data, + u32 n_data_bytes) +{ + serialize_main_t s; + + unserialize_open_data (&s, data, n_data_bytes); + + unserialize (&s, unserialize_mc_main, mcm); +} + +/* Overridden from the application layer, not actually used here */ +void mc_stream_join_process_hold (void) __attribute__ ((weak)); +void mc_stream_join_process_hold (void) { } + +static u32 +mc_stream_join_helper (mc_main_t * mcm, + mc_stream_config_t * config, + u32 is_internal) +{ + mc_stream_t * s; + vlib_main_t * vm = mcm->vlib_main; + + s = 0; + if (! is_internal) + { + uword * p; + + /* Already have a stream with given name? */ + if ((s = mc_stream_by_name (mcm, config->name))) + { + /* Already joined and ready? */ + if (s->state == MC_STREAM_STATE_ready) + return s->index; + } + + /* First join MC internal stream. */ + if (! mcm->stream_vector + || (mcm->stream_vector[MC_STREAM_INDEX_INTERNAL].state + == MC_STREAM_STATE_invalid)) + { + static mc_stream_config_t c = { + .name = "mc-internal", + .rx_buffer = mc_rx_buffer_unserialize, + .catchup = mc_internal_catchup, + .catchup_snapshot = mc_internal_catchup_snapshot, + }; + + c.save_snapshot = config->save_snapshot; + + mc_stream_join_helper (mcm, &c, /* is_internal */ 1); + } + + /* If stream is still unknown register this name and wait for + sequenced message to name stream. This way all peers agree + on stream name to index mappings. */ + s = mc_stream_by_name (mcm, config->name); + if (! s) + { + vlib_one_time_waiting_process_t * wp, ** w; + u8 * name_copy = format (0, "%s", config->name); + + mc_serialize_stream (mcm, + MC_STREAM_INDEX_INTERNAL, + &mc_register_stream_name_msg, + config->name); + + /* Wait for this stream to be named. */ + p = hash_get_mem (mcm->procs_waiting_for_stream_name_by_name, name_copy); + if (p) + w = pool_elt_at_index (mcm->procs_waiting_for_stream_name_pool, p[0]); + else + { + pool_get (mcm->procs_waiting_for_stream_name_pool, w); + if (! mcm->procs_waiting_for_stream_name_by_name) + mcm->procs_waiting_for_stream_name_by_name + = hash_create_string (/* elts */ 0, /* value size */ sizeof (uword)); + hash_set_mem (mcm->procs_waiting_for_stream_name_by_name, + name_copy, + w - mcm->procs_waiting_for_stream_name_pool); + w[0] = 0; + } + + vec_add2 (w[0], wp, 1); + vlib_current_process_wait_for_one_time_event (vm, wp); + vec_free (name_copy); + } + + /* Name should be known now. */ + s = mc_stream_by_name (mcm, config->name); + ASSERT (s != 0); + ASSERT (s->state == MC_STREAM_STATE_name_known); + } + + if (! s) + { + vec_add2 (mcm->stream_vector, s, 1); + mc_stream_init (s); + s->index = s - mcm->stream_vector; + } + + { + /* Save name since we could have already used it as hash key. */ + char * name_save = s->config.name; + + s->config = config[0]; + + if (name_save) + s->config.name = name_save; + } + + if (s->config.window_size == 0) + s->config.window_size = 8; + + if (s->config.retry_interval == 0.0) + s->config.retry_interval = 1.0; + + /* Sanity. */ + ASSERT (s->config.retry_interval < 30); + + if (s->config.retry_limit == 0) + s->config.retry_limit = 7; + + s->state = MC_STREAM_STATE_join_in_progress; + if (! s->peer_index_by_id.hash) + mhash_init (&s->peer_index_by_id, sizeof (uword), sizeof (mc_peer_id_t)); + + /* If we don't hear from someone in 5 seconds, we're alone */ + s->join_timeout = vlib_time_now (vm) + 5.0; + mcm->joins_in_progress++; + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "stream index %d join request %s", + .format_args = "i4s16", + }; + struct { u32 stream_index; char name[16]; } * ed; + ed = ELOG_DATA (mcm->elog_main, e); + ed->stream_index = s->index; + elog_stream_name (ed->name, sizeof (ed->name), s->config.name); + } + + send_join_or_leave_request (mcm, s->index, 1 /* join */); + + vlib_current_process_wait_for_one_time_event_vector + (vm, &s->procs_waiting_for_join_done); + + if (MC_EVENT_LOGGING) + { + ELOG_TYPE (e, "join complete stream %d"); + ELOG (mcm->elog_main, e, s->index); + } + + return s->index; +} + +u32 mc_stream_join (mc_main_t * mcm, mc_stream_config_t * config) +{ return mc_stream_join_helper (mcm, config, /* is_internal */ 0); } + +void mc_stream_leave (mc_main_t * mcm, u32 stream_index) +{ + mc_stream_t * s = mc_stream_by_index (mcm, stream_index); + + if (! s) + return; + + if (MC_EVENT_LOGGING) + { + ELOG_TYPE_DECLARE (t) = { + .format = "leave-stream: %d", + .format_args = "i4", + }; + struct { u32 index; } * ed; + ed = ELOG_DATA (mcm->elog_main, t); + ed->index = stream_index; + } + + send_join_or_leave_request (mcm, stream_index, 0 /* is_join */); + mc_stream_free (s); + s->state = MC_STREAM_STATE_name_known; +} + +void mc_msg_join_or_leave_request_handler (mc_main_t * mcm, + mc_msg_join_or_leave_request_t * req, + u32 buffer_index) +{ + mc_stream_t * s; + mc_msg_join_reply_t * rep; + u32 bi; + + mc_byte_swap_msg_join_or_leave_request (req); + + s = mc_stream_by_index (mcm, req->stream_index); + if (! s || s->state != MC_STREAM_STATE_ready) + return; + + /* If the peer is joining, create it */ + if (req->is_join) + { + mc_stream_t * this_s; + + /* We're not in a position to catch up a peer until all + stream joins are complete. */ + if (0) + { + /* XXX This is hard to test so we've. */ + vec_foreach (this_s, mcm->stream_vector) + { + if (this_s->state != MC_STREAM_STATE_ready + && this_s->state != MC_STREAM_STATE_name_known) + return; + } + } + else + if (mcm->joins_in_progress > 0) + return; + + (void) get_or_create_peer_with_id (mcm, + s, + req->peer_id, + /* created */ 0); + + rep = mc_get_vlib_buffer (mcm->vlib_main, sizeof (rep[0]), &bi); + memset (rep, 0, sizeof (rep[0])); + rep->type = MC_MSG_TYPE_join_reply; + rep->stream_index = req->stream_index; + + mc_byte_swap_msg_join_reply (rep); + /* These two are already in network byte order... */ + rep->peer_id = mcm->transport.our_ack_peer_id; + rep->catchup_peer_id = mcm->transport.our_catchup_peer_id; + + mcm->transport.tx_buffer (mcm->transport.opaque, MC_TRANSPORT_JOIN, bi); + } + else + { + if (s->config.peer_died) + s->config.peer_died (mcm, s, req->peer_id); + } +} + +void mc_msg_join_reply_handler (mc_main_t * mcm, + mc_msg_join_reply_t * mp, + u32 buffer_index) +{ + mc_stream_t * s; + + mc_byte_swap_msg_join_reply (mp); + + s = mc_stream_by_index (mcm, mp->stream_index); + + if (! s || s->state != MC_STREAM_STATE_join_in_progress) + return; + + /* Switch to catchup state; next join reply + for this stream will be ignored. */ + s->state = MC_STREAM_STATE_catchup; + + mcm->joins_in_progress--; + mcm->transport.catchup_request_fun (mcm->transport.opaque, + mp->stream_index, + mp->catchup_peer_id); +} + +void mc_wait_for_stream_ready (mc_main_t * m, char * stream_name) +{ + mc_stream_t * s; + + while (1) + { + s = mc_stream_by_name (m, stream_name); + if (s) + break; + vlib_process_suspend (m->vlib_main, .1); + } + + /* It's OK to send a message in catchup and ready states. */ + if (s->state == MC_STREAM_STATE_catchup + || s->state == MC_STREAM_STATE_ready) + return; + + /* Otherwise we are waiting for a join to finish. */ + vlib_current_process_wait_for_one_time_event_vector + (m->vlib_main, &s->procs_waiting_for_join_done); +} + +u32 mc_stream_send (mc_main_t * mcm, u32 stream_index, u32 buffer_index) +{ + mc_stream_t * s = mc_stream_by_index (mcm, stream_index); + vlib_main_t * vm = mcm->vlib_main; + mc_retry_t * r; + mc_msg_user_request_t * mp; + vlib_buffer_t * b = vlib_get_buffer (vm, buffer_index); + u32 ri; + + if (! s) + return 0; + + if (s->state != MC_STREAM_STATE_ready) + vlib_current_process_wait_for_one_time_event_vector + (vm, &s->procs_waiting_for_join_done); + + while (pool_elts (s->retry_pool) >= s->config.window_size) + { + vlib_current_process_wait_for_one_time_event_vector + (vm, &s->procs_waiting_for_open_window); + } + + pool_get (s->retry_pool, r); + ri = r - s->retry_pool; + + r->prev_index = s->retry_tail_index; + r->next_index = ~0; + s->retry_tail_index = ri; + + if (r->prev_index == ~0) + s->retry_head_index = ri; + else + { + mc_retry_t * p = pool_elt_at_index (s->retry_pool, r->prev_index); + p->next_index = ri; + } + + vlib_buffer_advance (b, -sizeof (mp[0])); + mp = vlib_buffer_get_current (b); + + mp->peer_id = mcm->transport.our_ack_peer_id; + /* mp->transport.global_sequence set by relay agent. */ + mp->global_sequence = 0xdeadbeef; + mp->stream_index = s->index; + mp->local_sequence = s->our_local_sequence++; + mp->n_data_bytes = vlib_buffer_index_length_in_chain (vm, buffer_index) - sizeof (mp[0]); + + r->buffer_index = buffer_index; + r->local_sequence = mp->local_sequence; + r->sent_at = vlib_time_now(vm); + r->n_retries = 0; + + /* Retry will be freed when all currently known peers have acked. */ + vec_validate (r->unacked_by_peer_bitmap, vec_len (s->all_peer_bitmap) - 1); + vec_copy (r->unacked_by_peer_bitmap, s->all_peer_bitmap); + + hash_set (s->retry_index_by_local_sequence, r->local_sequence, r - s->retry_pool); + + elog_tx_msg (mcm, s->index, mp->local_sequence, r->n_retries); + + mc_byte_swap_msg_user_request (mp); + + mcm->transport.tx_buffer (mcm->transport.opaque, MC_TRANSPORT_USER_REQUEST_TO_RELAY, buffer_index); + + s->user_requests_sent++; + + /* return amount of window remaining */ + return s->config.window_size - pool_elts (s->retry_pool); +} + +void mc_msg_user_request_handler (mc_main_t * mcm, mc_msg_user_request_t * mp, u32 buffer_index) +{ + vlib_main_t * vm = mcm->vlib_main; + mc_stream_t * s; + mc_stream_peer_t * peer; + i32 seq_cmp_result; + static int once=0; + + mc_byte_swap_msg_user_request (mp); + + s = mc_stream_by_index (mcm, mp->stream_index); + + /* Not signed up for this stream? Turf-o-matic */ + if (! s || s->state != MC_STREAM_STATE_ready) + { + vlib_buffer_free_one (vm, buffer_index); + return; + } + + /* Find peer, including ourselves. */ + peer = get_or_create_peer_with_id (mcm, + s, mp->peer_id, + /* created */ 0); + + seq_cmp_result = mc_seq_cmp (mp->local_sequence, + peer->last_sequence_received + 1); + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "rx-msg: peer %s stream %d rx seq %d seq_cmp %d", + .format_args = "T4i4i4i4", + }; + struct { u32 peer, stream_index, rx_sequence; i32 seq_cmp_result; } * ed; + ed = ELOG_DATA (mcm->elog_main, e); + ed->peer = elog_id_for_peer_id (mcm, peer->id.as_u64); + ed->stream_index = mp->stream_index; + ed->rx_sequence = mp->local_sequence; + ed->seq_cmp_result = seq_cmp_result; + } + + if (0 && mp->stream_index == 1 && once == 0) + { + once = 1; + ELOG_TYPE (e, "FAKE lost msg on stream 1"); + ELOG (mcm->elog_main,e,0); + return; + } + + peer->last_sequence_received += seq_cmp_result == 0; + s->user_requests_received++; + + if (seq_cmp_result > 0) + peer->stats.n_msgs_from_future += 1; + + /* Send ack even if msg from future */ + if (1) + { + mc_msg_user_ack_t * rp; + u32 bi; + + rp = mc_get_vlib_buffer (vm, sizeof (rp[0]), &bi); + rp->peer_id = mcm->transport.our_ack_peer_id; + rp->stream_index = s->index; + rp->local_sequence = mp->local_sequence; + rp->seq_cmp_result = seq_cmp_result; + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "tx-ack: stream %d local seq %d", + .format_args = "i4i4", + }; + struct { u32 stream_index; u32 local_sequence; } * ed; + ed = ELOG_DATA (mcm->elog_main, e); + ed->stream_index = rp->stream_index; + ed->local_sequence = rp->local_sequence; + } + + mc_byte_swap_msg_user_ack (rp); + + mcm->transport.tx_ack (mcm->transport.opaque, mp->peer_id, bi); + /* Msg from past? If so, free the buffer... */ + if (seq_cmp_result < 0) + { + vlib_buffer_free_one (vm, buffer_index); + peer->stats.n_msgs_from_past += 1; + } + } + + if (seq_cmp_result == 0) + { + vlib_buffer_t * b = vlib_get_buffer (vm, buffer_index); + switch (s->state) + { + case MC_STREAM_STATE_ready: + vlib_buffer_advance (b, sizeof (mp[0])); + s->config.rx_buffer(mcm, s, mp->peer_id, buffer_index); + + /* Stream vector can change address via rx callback for mc-internal + stream. */ + s = mc_stream_by_index (mcm, mp->stream_index); + ASSERT (s != 0); + s->last_global_sequence_processed = mp->global_sequence; + break; + + case MC_STREAM_STATE_catchup: + clib_fifo_add1 (s->catchup_fifo, buffer_index); + break; + + default: + clib_warning ("stream in unknown state %U", + format_mc_stream_state, s->state); + break; + } + } +} + +void mc_msg_user_ack_handler (mc_main_t * mcm, mc_msg_user_ack_t * mp, u32 buffer_index) +{ + vlib_main_t * vm = mcm->vlib_main; + uword *p; + mc_stream_t * s; + mc_stream_peer_t * peer; + mc_retry_t * r; + int peer_created = 0; + + mc_byte_swap_msg_user_ack (mp); + + s = mc_stream_by_index (mcm, mp->stream_index); + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (t) = { + .format = "rx-ack: local seq %d peer %s seq_cmp_result %d", + .format_args = "i4T4i4", + }; + struct { u32 local_sequence; u32 peer; i32 seq_cmp_result;} * ed; + ed = ELOG_DATA (mcm->elog_main, t); + ed->local_sequence = mp->local_sequence; + ed->peer = elog_id_for_peer_id (mcm, mp->peer_id.as_u64); + ed->seq_cmp_result = mp->seq_cmp_result; + } + + /* Unknown stream? */ + if (! s) + return; + + /* Find the peer which just ack'ed. */ + peer = get_or_create_peer_with_id (mcm, s, mp->peer_id, + /* created */ &peer_created); + + /* + * Peer reports message from the future. If it's not in the retry + * fifo, look for a retired message. + */ + if (mp->seq_cmp_result > 0) + { + p = hash_get (s->retry_index_by_local_sequence, mp->local_sequence - + mp->seq_cmp_result); + if (p == 0) + mc_resend_retired (mcm, s, mp->local_sequence - mp->seq_cmp_result); + + /* Normal retry should fix it... */ + return; + } + + /* + * Pointer to the indicated retry fifo entry. + * Worth hashing because we could use a window size of 100 or 1000. + */ + p = hash_get (s->retry_index_by_local_sequence, mp->local_sequence); + + /* + * Is this a duplicate ACK, received after we've retired the + * fifo entry. This can happen when learning about new + * peers. + */ + if (p == 0) + { + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (t) = + { + .format = "ack: for seq %d from peer %s no fifo elt", + .format_args = "i4T4", + }; + struct { u32 seq; u32 peer; } * ed; + ed = ELOG_DATA (mcm->elog_main, t); + ed->seq = mp->local_sequence; + ed->peer = elog_id_for_peer_id (mcm, mp->peer_id.as_u64); + } + + return; + } + + r = pool_elt_at_index (s->retry_pool, p[0]); + + /* Make sure that this new peer ACKs our msgs from now on */ + if (peer_created) + { + mc_retry_t *later_retry = next_retry (s, r); + + while (later_retry) + { + later_retry->unacked_by_peer_bitmap = + clib_bitmap_ori (later_retry->unacked_by_peer_bitmap, + peer - s->peers); + later_retry = next_retry (s, later_retry); + } + } + + ASSERT (mp->local_sequence == r->local_sequence); + + /* If we weren't expecting to hear from this peer */ + if (!peer_created && + ! clib_bitmap_get (r->unacked_by_peer_bitmap, peer - s->peers)) + { + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (t) = + { + .format = "dup-ack: for seq %d from peer %s", + .format_args = "i4T4", + }; + struct { u32 seq; u32 peer; } * ed; + ed = ELOG_DATA (mcm->elog_main, t); + ed->seq = r->local_sequence; + ed->peer = elog_id_for_peer_id (mcm, peer->id.as_u64); + } + if (! clib_bitmap_is_zero (r->unacked_by_peer_bitmap)) + return; + } + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (t) = + { + .format = "ack: for seq %d from peer %s", + .format_args = "i4T4", + }; + struct { u32 seq; u32 peer; } * ed; + ed = ELOG_DATA (mcm->elog_main, t); + ed->seq = mp->local_sequence; + ed->peer = elog_id_for_peer_id (mcm, peer->id.as_u64); + } + + r->unacked_by_peer_bitmap = + clib_bitmap_andnoti (r->unacked_by_peer_bitmap, peer - s->peers); + + /* Not all clients have ack'ed */ + if (! clib_bitmap_is_zero (r->unacked_by_peer_bitmap)) + { + return; + } + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (t) = + { + .format = "ack: retire fifo elt loc seq %d after %d acks", + .format_args = "i4i4", + }; + struct { u32 seq; u32 npeers; } * ed; + ed = ELOG_DATA (mcm->elog_main, t); + ed->seq = r->local_sequence; + ed->npeers = pool_elts (s->peers); + } + + hash_unset (s->retry_index_by_local_sequence, mp->local_sequence); + mc_retry_free (mcm, s, r); + remove_retry_from_pool (s, r); + maybe_send_window_open_event (vm, s); +} + +#define EVENT_MC_SEND_CATCHUP_DATA 0 + +static uword +mc_catchup_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + mc_main_t * mcm = mc_node_get_main (node); + uword *event_data = 0; + mc_catchup_process_arg_t * args; + int i; + + while (1) + { + if (event_data) + _vec_len(event_data) = 0; + vlib_process_wait_for_event_with_type (vm, &event_data, EVENT_MC_SEND_CATCHUP_DATA); + + for (i = 0; i < vec_len(event_data); i++) + { + args = pool_elt_at_index (mcm->catchup_process_args, + event_data[i]); + + mcm->transport.catchup_send_fun (mcm->transport.opaque, + args->catchup_opaque, + args->catchup_snapshot); + + /* Send function will free snapshot data vector. */ + pool_put (mcm->catchup_process_args, args); + } + } + + return 0; /* not likely */ +} + +static void serialize_mc_stream (serialize_main_t * m, va_list * va) +{ + mc_stream_t * s = va_arg (*va, mc_stream_t *); + mc_stream_peer_t * p; + + serialize_integer (m, pool_elts (s->peers), sizeof (u32)); + pool_foreach (p, s->peers, ({ + u8 * x = serialize_get (m, sizeof (p->id)); + memcpy (x, p->id.as_u8, sizeof (p->id)); + serialize_integer (m, p->last_sequence_received, + sizeof (p->last_sequence_received)); + })); + serialize_bitmap (m, s->all_peer_bitmap); +} + +void unserialize_mc_stream (serialize_main_t * m, va_list * va) +{ + mc_stream_t * s = va_arg (*va, mc_stream_t *); + u32 i, n_peers; + mc_stream_peer_t * p; + + unserialize_integer (m, &n_peers, sizeof (u32)); + mhash_init (&s->peer_index_by_id, sizeof (uword), sizeof (mc_peer_id_t)); + for (i = 0; i < n_peers; i++) + { + u8 * x; + pool_get (s->peers, p); + x = unserialize_get (m, sizeof (p->id)); + memcpy (p->id.as_u8, x, sizeof (p->id)); + unserialize_integer (m, &p->last_sequence_received, sizeof (p->last_sequence_received)); + mhash_set (&s->peer_index_by_id, &p->id, p - s->peers, /* old_value */ 0); + } + s->all_peer_bitmap = unserialize_bitmap (m); + + /* This is really bad. */ + if (!s->all_peer_bitmap) + clib_warning ("BUG: stream %s all_peer_bitmap NULL", s->config.name); +} + +void mc_msg_catchup_request_handler (mc_main_t * mcm, mc_msg_catchup_request_t * req, u32 catchup_opaque) +{ + vlib_main_t * vm = mcm->vlib_main; + mc_stream_t * s; + mc_catchup_process_arg_t * args; + + mc_byte_swap_msg_catchup_request (req); + + s = mc_stream_by_index (mcm, req->stream_index); + if (! s || s->state != MC_STREAM_STATE_ready) + return; + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (t) = + { + .format = "catchup-request: from %s stream %d", + .format_args = "T4i4", + }; + struct { u32 peer, stream; } * ed; + ed = ELOG_DATA (mcm->elog_main, t); + ed->peer = elog_id_for_peer_id (mcm, req->peer_id.as_u64); + ed->stream = req->stream_index; + } + + /* + * The application has to snapshoot its data structures right + * here, right now. If we process any messages after + * noting the last global sequence we've processed, the client + * won't be able to accurately reconstruct our data structures. + * + * Once the data structures are e.g. vec_dup()'ed, we + * send the resulting messages from a separate process, to + * make sure that we don't cause a bunch of message retransmissions + */ + pool_get (mcm->catchup_process_args, args); + + args->stream_index = s - mcm->stream_vector; + args->catchup_opaque = catchup_opaque; + args->catchup_snapshot = 0; + + /* Construct catchup reply and snapshot state for stream to send as + catchup reply payload. */ + { + mc_msg_catchup_reply_t * rep; + serialize_main_t m; + + vec_resize (args->catchup_snapshot, sizeof (rep[0])); + + rep = (void *) args->catchup_snapshot; + + rep->peer_id = req->peer_id; + rep->stream_index = req->stream_index; + rep->last_global_sequence_included = s->last_global_sequence_processed; + + /* Setup for serialize to append to catchup snapshot. */ + serialize_open_vector (&m, args->catchup_snapshot); + m.stream.current_buffer_index = vec_len (m.stream.buffer); + + serialize (&m, serialize_mc_stream, s); + + args->catchup_snapshot = serialize_close_vector (&m); + + /* Actually copy internal state */ + args->catchup_snapshot = s->config.catchup_snapshot + (mcm, + args->catchup_snapshot, + rep->last_global_sequence_included); + + rep = (void *) args->catchup_snapshot; + rep->n_data_bytes = vec_len (args->catchup_snapshot) - sizeof (rep[0]); + + mc_byte_swap_msg_catchup_reply (rep); + } + + /* now go send it... */ + vlib_process_signal_event (vm, mcm->catchup_process, + EVENT_MC_SEND_CATCHUP_DATA, + args - mcm->catchup_process_args); +} + +#define EVENT_MC_UNSERIALIZE_BUFFER 0 +#define EVENT_MC_UNSERIALIZE_CATCHUP 1 + +void mc_msg_catchup_reply_handler (mc_main_t * mcm, mc_msg_catchup_reply_t * mp, u32 catchup_opaque) +{ + vlib_process_signal_event (mcm->vlib_main, + mcm->unserialize_process, + EVENT_MC_UNSERIALIZE_CATCHUP, + pointer_to_uword (mp)); +} + +static void perform_catchup (mc_main_t * mcm, mc_msg_catchup_reply_t * mp) +{ + mc_stream_t * s; + i32 seq_cmp_result; + + mc_byte_swap_msg_catchup_reply (mp); + + s = mc_stream_by_index (mcm, mp->stream_index); + + /* Never heard of this stream or already caught up. */ + if (! s || s->state == MC_STREAM_STATE_ready) + return; + + { + serialize_main_t m; + mc_stream_peer_t * p; + u32 n_stream_bytes; + + /* For offline sim replay: save the entire catchup snapshot... */ + if (s->config.save_snapshot) + s->config.save_snapshot (mcm, /* is_catchup */ 1, mp->data, mp->n_data_bytes); + + unserialize_open_data (&m, mp->data, mp->n_data_bytes); + unserialize (&m, unserialize_mc_stream, s); + + /* Make sure we start numbering our messages as expected */ + pool_foreach (p, s->peers, ({ + if (p->id.as_u64 == mcm->transport.our_ack_peer_id.as_u64) + s->our_local_sequence = p->last_sequence_received + 1; + })); + + n_stream_bytes = m.stream.current_buffer_index; + + /* No need to unserialize close; nothing to free. */ + + /* After serialized stream is user's catchup data. */ + s->config.catchup (mcm, mp->data + n_stream_bytes, + mp->n_data_bytes - n_stream_bytes); + } + + /* Vector could have been moved by catchup. + This can only happen for mc-internal stream. */ + s = mc_stream_by_index (mcm, mp->stream_index); + + s->last_global_sequence_processed = mp->last_global_sequence_included; + + while (clib_fifo_elts (s->catchup_fifo)) + { + mc_msg_user_request_t * gp; + u32 bi; + vlib_buffer_t * b; + + clib_fifo_sub1(s->catchup_fifo, bi); + + b = vlib_get_buffer (mcm->vlib_main, bi); + gp = vlib_buffer_get_current (b); + + /* Make sure we're replaying "new" news */ + seq_cmp_result = mc_seq_cmp (gp->global_sequence, + mp->last_global_sequence_included); + + if (seq_cmp_result > 0) + { + vlib_buffer_advance (b, sizeof (gp[0])); + s->config.rx_buffer (mcm, s, gp->peer_id, bi); + s->last_global_sequence_processed = gp->global_sequence; + + if (MC_EVENT_LOGGING) + { + ELOG_TYPE_DECLARE (t) = { + .format = "catchup replay local sequence 0x%x", + .format_args = "i4", + }; + struct { u32 local_sequence; } * ed; + ed = ELOG_DATA (mcm->elog_main, t); + ed->local_sequence = gp->local_sequence; + } + } + else + { + if (MC_EVENT_LOGGING) + { + ELOG_TYPE_DECLARE (t) = { + .format = "catchup discard local sequence 0x%x", + .format_args = "i4", + }; + struct { u32 local_sequence; } * ed; + ed = ELOG_DATA (mcm->elog_main, t); + ed->local_sequence = gp->local_sequence; + } + + vlib_buffer_free_one (mcm->vlib_main, bi); + } + } + + s->state = MC_STREAM_STATE_ready; + + /* Now that we are caught up wake up joining process. */ + { + vlib_one_time_waiting_process_t * wp; + vec_foreach (wp, s->procs_waiting_for_join_done) + vlib_signal_one_time_waiting_process (mcm->vlib_main, wp); + if (s->procs_waiting_for_join_done) + _vec_len (s->procs_waiting_for_join_done) = 0; + } +} + +static void this_node_maybe_master (mc_main_t * mcm) +{ + vlib_main_t * vm = mcm->vlib_main; + mc_msg_master_assert_t * mp; + uword event_type; + int timeouts = 0; + int is_master = mcm->relay_state == MC_RELAY_STATE_MASTER; + clib_error_t * error; + f64 now, time_last_master_assert = -1; + u32 bi; + + while (1) + { + if (! mcm->we_can_be_relay_master) + { + mcm->relay_state = MC_RELAY_STATE_SLAVE; + if (MC_EVENT_LOGGING) + { + ELOG_TYPE (e, "become slave (config)"); + ELOG (mcm->elog_main, e, 0); + } + return; + } + + now = vlib_time_now (vm); + if (now >= time_last_master_assert + 1) + { + time_last_master_assert = now; + mp = mc_get_vlib_buffer (mcm->vlib_main, sizeof (mp[0]), &bi); + + mp->peer_id = mcm->transport.our_ack_peer_id; + mp->global_sequence = mcm->relay_global_sequence; + + /* + * these messages clog the event log, set MC_EVENT_LOGGING higher + * if you want them + */ + if (MC_EVENT_LOGGING > 1) + { + ELOG_TYPE_DECLARE (e) = { + .format = "tx-massert: peer %s global seq %u", + .format_args = "T4i4", + }; + struct { u32 peer, global_sequence; } * ed; + ed = ELOG_DATA (mcm->elog_main, e); + ed->peer = elog_id_for_peer_id (mcm, mp->peer_id.as_u64); + ed->global_sequence = mp->global_sequence; + } + + mc_byte_swap_msg_master_assert (mp); + + error = mcm->transport.tx_buffer (mcm->transport.opaque, MC_TRANSPORT_MASTERSHIP, bi); + if (error) + clib_error_report (error); + } + + vlib_process_wait_for_event_or_clock (vm, 1.0); + event_type = vlib_process_get_events (vm, /* no event data */ 0); + + switch (event_type) + { + case ~0: + if (! is_master && timeouts++ > 2) + { + mcm->relay_state = MC_RELAY_STATE_MASTER; + mcm->relay_master_peer_id = mcm->transport.our_ack_peer_id.as_u64; + if (MC_EVENT_LOGGING) + { + ELOG_TYPE (e, "become master (was maybe_master)"); + ELOG (mcm->elog_main, e, 0); + } + return; + } + break; + + case MC_RELAY_STATE_SLAVE: + mcm->relay_state = MC_RELAY_STATE_SLAVE; + if (MC_EVENT_LOGGING && mcm->relay_state != MC_RELAY_STATE_SLAVE) + { + ELOG_TYPE (e, "become slave (was maybe_master)"); + ELOG (mcm->elog_main, e, 0); + } + return; + } + } +} + +static void this_node_slave (mc_main_t * mcm) +{ + vlib_main_t * vm = mcm->vlib_main; + uword event_type; + int timeouts = 0; + + if (MC_EVENT_LOGGING) + { + ELOG_TYPE (e, "become slave"); + ELOG (mcm->elog_main, e, 0); + } + + while (1) + { + vlib_process_wait_for_event_or_clock (vm, 1.0); + event_type = vlib_process_get_events (vm, /* no event data */ 0); + + switch (event_type) + { + case ~0: + if (timeouts++ > 2) + { + mcm->relay_state = MC_RELAY_STATE_NEGOTIATE; + mcm->relay_master_peer_id = ~0ULL; + if (MC_EVENT_LOGGING) + { + ELOG_TYPE (e, "timeouts; negoitate mastership"); + ELOG (mcm->elog_main, e, 0); + } + return; + } + break; + + case MC_RELAY_STATE_SLAVE: + mcm->relay_state = MC_RELAY_STATE_SLAVE; + timeouts = 0; + break; + } + } +} + +static uword +mc_mastership_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + mc_main_t * mcm = mc_node_get_main (node); + + while (1) + { + switch (mcm->relay_state) + { + case MC_RELAY_STATE_NEGOTIATE: + case MC_RELAY_STATE_MASTER: + this_node_maybe_master(mcm); + break; + + case MC_RELAY_STATE_SLAVE: + this_node_slave (mcm); + break; + } + } + return 0; /* not likely */ +} + +void mc_enable_disable_mastership (mc_main_t * mcm, int we_can_be_master) +{ + if (we_can_be_master != mcm->we_can_be_relay_master) + { + mcm->we_can_be_relay_master = we_can_be_master; + vlib_process_signal_event (mcm->vlib_main, + mcm->mastership_process, + MC_RELAY_STATE_NEGOTIATE, 0); + } +} + +void mc_msg_master_assert_handler (mc_main_t * mcm, mc_msg_master_assert_t * mp, u32 buffer_index) +{ + mc_peer_id_t his_peer_id, our_peer_id; + i32 seq_cmp_result; + u8 signal_slave = 0; + u8 update_global_sequence = 0; + + mc_byte_swap_msg_master_assert (mp); + + his_peer_id = mp->peer_id; + our_peer_id = mcm->transport.our_ack_peer_id; + + /* compare the incoming global sequence with ours */ + seq_cmp_result = mc_seq_cmp (mp->global_sequence, + mcm->relay_global_sequence); + + /* If the sender has a lower peer id and the sender's sequence >= + our global sequence, we become a slave. Otherwise we are master. */ + if (mc_peer_id_compare (his_peer_id, our_peer_id) < 0 && seq_cmp_result >= 0) + { + vlib_process_signal_event (mcm->vlib_main, + mcm->mastership_process, + MC_RELAY_STATE_SLAVE, 0); + signal_slave = 1; + } + + /* Update our global sequence. */ + if (seq_cmp_result > 0) + { + mcm->relay_global_sequence = mp->global_sequence; + update_global_sequence = 1; + } + + { + uword * q = mhash_get (&mcm->mastership_peer_index_by_id, &his_peer_id); + mc_mastership_peer_t * p; + + if (q) + p = vec_elt_at_index (mcm->mastership_peers, q[0]); + else + { + vec_add2 (mcm->mastership_peers, p, 1); + p->peer_id = his_peer_id; + mhash_set (&mcm->mastership_peer_index_by_id, &p->peer_id, p - mcm->mastership_peers, + /* old_value */ 0); + } + p->time_last_master_assert_received = vlib_time_now (mcm->vlib_main); + } + + /* + * these messages clog the event log, set MC_EVENT_LOGGING higher + * if you want them. + */ + if (MC_EVENT_LOGGING > 1) + { + ELOG_TYPE_DECLARE (e) = { + .format = "rx-massert: peer %s global seq %u upd %d slave %d", + .format_args = "T4i4i1i1", + }; + struct { + u32 peer; + u32 global_sequence; + u8 update_sequence; + u8 slave; + } * ed; + ed = ELOG_DATA (mcm->elog_main, e); + ed->peer = elog_id_for_peer_id (mcm, his_peer_id.as_u64); + ed->global_sequence = mp->global_sequence; + ed->update_sequence = update_global_sequence; + ed->slave = signal_slave; + } +} + +static void +mc_serialize_init (mc_main_t * mcm) +{ + mc_serialize_msg_t * m; + vlib_main_t * vm = vlib_get_main(); + + mcm->global_msg_index_by_name + = hash_create_string (/* elts */ 0, sizeof (uword)); + + m = vm->mc_msg_registrations; + + while (m) + { + m->global_index = vec_len (mcm->global_msgs); + hash_set_mem (mcm->global_msg_index_by_name, + m->name, + m->global_index); + vec_add1 (mcm->global_msgs, m); + m = m->next_registration; + } +} + +clib_error_t * +mc_serialize_va (mc_main_t * mc, + u32 stream_index, + u32 multiple_messages_per_vlib_buffer, + mc_serialize_msg_t * msg, + va_list * va) +{ + mc_stream_t * s; + clib_error_t * error; + serialize_main_t * m = &mc->serialize_mains[VLIB_TX]; + vlib_serialize_buffer_main_t * sbm = &mc->serialize_buffer_mains[VLIB_TX]; + u32 bi, n_before, n_after, n_total, n_this_msg; + u32 si, gi; + + if (! sbm->vlib_main) + { + sbm->tx.max_n_data_bytes_per_chain = 4096; + sbm->tx.free_list_index = VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX; + } + + if (sbm->first_buffer == 0) + serialize_open_vlib_buffer (m, mc->vlib_main, sbm); + + n_before = serialize_vlib_buffer_n_bytes (m); + + s = mc_stream_by_index (mc, stream_index); + gi = msg->global_index; + ASSERT (msg == vec_elt (mc->global_msgs, gi)); + + si = ~0; + if (gi < vec_len (s->stream_msg_index_by_global_index)) + si = s->stream_msg_index_by_global_index[gi]; + + serialize_likely_small_unsigned_integer (m, si); + + /* For first time message is sent, use name to identify message. */ + if (si == ~0 || MSG_ID_DEBUG) + serialize_cstring (m, msg->name); + + if (MSG_ID_DEBUG && MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "serialize-msg: %s index %d", + .format_args = "T4i4", + }; + struct { u32 c[2]; } * ed; + ed = ELOG_DATA (mc->elog_main, e); + ed->c[0] = elog_id_for_msg_name (mc, msg->name); + ed->c[1] = si; + } + + error = va_serialize (m, va); + + n_after = serialize_vlib_buffer_n_bytes (m); + n_this_msg = n_after - n_before; + n_total = n_after + sizeof (mc_msg_user_request_t); + + /* For max message size ignore first message where string name is sent. */ + if (si != ~0) + msg->max_n_bytes_serialized = clib_max (msg->max_n_bytes_serialized, n_this_msg); + + if (! multiple_messages_per_vlib_buffer + || si == ~0 + || n_total + msg->max_n_bytes_serialized > mc->transport.max_packet_size) + { + bi = serialize_close_vlib_buffer (m); + sbm->first_buffer = 0; + if (! error) + mc_stream_send (mc, stream_index, bi); + else if (bi != ~0) + vlib_buffer_free_one (mc->vlib_main, bi); + } + + return error; +} + +clib_error_t * +mc_serialize_internal (mc_main_t * mc, + u32 stream_index, + u32 multiple_messages_per_vlib_buffer, + mc_serialize_msg_t * msg, + ...) +{ + vlib_main_t * vm = mc->vlib_main; + va_list va; + clib_error_t * error; + + if (stream_index == ~0) + { + if (vm->mc_main && vm->mc_stream_index == ~0) + vlib_current_process_wait_for_one_time_event_vector + (vm, &vm->procs_waiting_for_mc_stream_join); + stream_index = vm->mc_stream_index; + } + + va_start (va, msg); + error = mc_serialize_va (mc, stream_index, + multiple_messages_per_vlib_buffer, + msg, &va); + va_end (va); + return error; +} + +uword mc_unserialize_message (mc_main_t * mcm, + mc_stream_t * s, + serialize_main_t * m) +{ + mc_serialize_stream_msg_t * sm; + u32 gi, si; + + si = unserialize_likely_small_unsigned_integer (m); + + if (! (si == ~0 || MSG_ID_DEBUG)) + { + sm = vec_elt_at_index (s->stream_msgs, si); + gi = sm->global_index; + } + else + { + char * name; + + unserialize_cstring (m, &name); + + if (MSG_ID_DEBUG && MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "unserialize-msg: %s rx index %d", + .format_args = "T4i4", + }; + struct { u32 c[2]; } * ed; + ed = ELOG_DATA (mcm->elog_main, e); + ed->c[0] = elog_id_for_msg_name (mcm, name); + ed->c[1] = si; + } + + { + uword * p = hash_get_mem (mcm->global_msg_index_by_name, name); + gi = p ? p[0] : ~0; + } + + /* Unknown message? */ + if (gi == ~0) + { + vec_free (name); + goto done; + } + + vec_validate_init_empty (s->stream_msg_index_by_global_index, gi, ~0); + si = s->stream_msg_index_by_global_index[gi]; + + /* Stream local index unknown? Create it. */ + if (si == ~0) + { + vec_add2 (s->stream_msgs, sm, 1); + + si = sm - s->stream_msgs; + sm->global_index = gi; + s->stream_msg_index_by_global_index[gi] = si; + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "msg-bind: stream %d %s to index %d", + .format_args = "i4T4i4", + }; + struct { u32 c[3]; } * ed; + ed = ELOG_DATA (mcm->elog_main, e); + ed->c[0] = s->index; + ed->c[1] = elog_id_for_msg_name (mcm, name); + ed->c[2] = si; + } + } + else + { + sm = vec_elt_at_index (s->stream_msgs, si); + if (gi != sm->global_index && MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "msg-id-ERROR: %s index %d expected %d", + .format_args = "T4i4i4", + }; + struct { u32 c[3]; } * ed; + ed = ELOG_DATA (mcm->elog_main, e); + ed->c[0] = elog_id_for_msg_name (mcm, name); + ed->c[1] = si; + ed->c[2] = ~0; + if (sm->global_index < vec_len (s->stream_msg_index_by_global_index)) + ed->c[2] = s->stream_msg_index_by_global_index[sm->global_index]; + } + } + + vec_free (name); + } + + if (gi != ~0) + { + mc_serialize_msg_t * msg; + msg = vec_elt (mcm->global_msgs, gi); + unserialize (m, msg->unserialize, mcm); + } + + done: + return gi != ~0; +} + +void +mc_unserialize_internal (mc_main_t * mcm, u32 stream_and_buffer_index) +{ + vlib_main_t * vm = mcm->vlib_main; + serialize_main_t * m = &mcm->serialize_mains[VLIB_RX]; + vlib_serialize_buffer_main_t * sbm = &mcm->serialize_buffer_mains[VLIB_RX]; + mc_stream_and_buffer_t * sb; + mc_stream_t * stream; + u32 buffer_index; + + sb = pool_elt_at_index (mcm->mc_unserialize_stream_and_buffers, stream_and_buffer_index); + buffer_index = sb->buffer_index; + stream = vec_elt_at_index (mcm->stream_vector, sb->stream_index); + pool_put (mcm->mc_unserialize_stream_and_buffers, sb); + + if (stream->config.save_snapshot) + { + u32 n_bytes = vlib_buffer_index_length_in_chain (vm, buffer_index); + static u8 * contents; + vec_reset_length (contents); + vec_validate (contents, n_bytes - 1); + vlib_buffer_contents (vm, buffer_index, contents); + stream->config.save_snapshot (mcm, /* is_catchup */ 0, contents, n_bytes); + } + + ASSERT (vlib_in_process_context (vm)); + + unserialize_open_vlib_buffer (m, vm, sbm); + + clib_fifo_add1 (sbm->rx.buffer_fifo, buffer_index); + + while (unserialize_vlib_buffer_n_bytes (m) > 0) + mc_unserialize_message (mcm, stream, m); + + /* Frees buffer. */ + unserialize_close_vlib_buffer (m); +} + +void +mc_unserialize (mc_main_t * mcm, mc_stream_t * s, u32 buffer_index) +{ + vlib_main_t * vm = mcm->vlib_main; + mc_stream_and_buffer_t * sb; + pool_get (mcm->mc_unserialize_stream_and_buffers, sb); + sb->stream_index = s->index; + sb->buffer_index = buffer_index; + vlib_process_signal_event (vm, mcm->unserialize_process, + EVENT_MC_UNSERIALIZE_BUFFER, sb - mcm->mc_unserialize_stream_and_buffers); +} + +static uword +mc_unserialize_process (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * f) +{ + mc_main_t * mcm = mc_node_get_main (node); + uword event_type, * event_data = 0; + int i; + + while (1) + { + if (event_data) + _vec_len(event_data) = 0; + + vlib_process_wait_for_event (vm); + event_type = vlib_process_get_events (vm, &event_data); + switch (event_type) + { + case EVENT_MC_UNSERIALIZE_BUFFER: + for (i = 0; i < vec_len (event_data); i++) + mc_unserialize_internal (mcm, event_data[i]); + break; + + case EVENT_MC_UNSERIALIZE_CATCHUP: + for (i = 0; i < vec_len (event_data); i++) + { + u8 * mp = uword_to_pointer (event_data[i], u8 *); + perform_catchup (mcm, (void *) mp); + vec_free (mp); + } + break; + + default: + break; + } + } + + return 0; /* not likely */ +} + +void serialize_mc_main (serialize_main_t * m, va_list * va) +{ + mc_main_t * mcm = va_arg (*va, mc_main_t *); + mc_stream_t * s; + mc_serialize_stream_msg_t * sm; + mc_serialize_msg_t * msg; + + serialize_integer (m, vec_len (mcm->stream_vector), sizeof (u32)); + vec_foreach (s, mcm->stream_vector) + { + /* Stream name. */ + serialize_cstring (m, s->config.name); + + /* Serialize global names for all sent messages. */ + serialize_integer (m, vec_len (s->stream_msgs), sizeof (u32)); + vec_foreach (sm, s->stream_msgs) + { + msg = vec_elt (mcm->global_msgs, sm->global_index); + serialize_cstring (m, msg->name); + } + } +} + +void unserialize_mc_main (serialize_main_t * m, va_list * va) +{ + mc_main_t * mcm = va_arg (*va, mc_main_t *); + u32 i, n_streams, n_stream_msgs; + char * name; + mc_stream_t * s; + mc_serialize_stream_msg_t * sm; + + unserialize_integer (m, &n_streams, sizeof (u32)); + for (i = 0; i < n_streams; i++) + { + unserialize_cstring (m, &name); + if (i != MC_STREAM_INDEX_INTERNAL + && ! mc_stream_by_name (mcm, name)) + { + vec_validate (mcm->stream_vector, i); + s = vec_elt_at_index (mcm->stream_vector, i); + mc_stream_init (s); + s->index = s - mcm->stream_vector; + s->config.name = name; + s->state = MC_STREAM_STATE_name_known; + hash_set_mem (mcm->stream_index_by_name, s->config.name, s->index); + } + else + vec_free (name); + + s = vec_elt_at_index (mcm->stream_vector, i); + + vec_free (s->stream_msgs); + vec_free (s->stream_msg_index_by_global_index); + + unserialize_integer (m, &n_stream_msgs, sizeof (u32)); + vec_resize (s->stream_msgs, n_stream_msgs); + vec_foreach (sm, s->stream_msgs) + { + uword * p; + u32 si, gi; + + unserialize_cstring (m, &name); + p = hash_get (mcm->global_msg_index_by_name, name); + gi = p ? p[0] : ~0; + si = sm - s->stream_msgs; + + if (MC_EVENT_LOGGING > 0) + { + ELOG_TYPE_DECLARE (e) = { + .format = "catchup-bind: %s to %d global index %d stream %d", + .format_args = "T4i4i4i4", + }; + struct { u32 c[4]; } * ed; + ed = ELOG_DATA (mcm->elog_main, e); + ed->c[0] = elog_id_for_msg_name (mcm, name); + ed->c[1] = si; + ed->c[2] = gi; + ed->c[3] = s->index; + } + + vec_free (name); + + sm->global_index = gi; + if (gi != ~0) + { + vec_validate_init_empty (s->stream_msg_index_by_global_index, + gi, ~0); + s->stream_msg_index_by_global_index[gi] = si; + } + } + } +} + +void mc_main_init (mc_main_t * mcm, char * tag) +{ + vlib_main_t * vm = vlib_get_main(); + + mcm->vlib_main = vm; + mcm->elog_main = &vm->elog_main; + + mcm->relay_master_peer_id = ~0ULL; + mcm->relay_state = MC_RELAY_STATE_NEGOTIATE; + + mcm->stream_index_by_name + = hash_create_string (/* elts */ 0, /* value size */ sizeof (uword)); + + { + vlib_node_registration_t r; + + memset (&r, 0, sizeof (r)); + + r.type = VLIB_NODE_TYPE_PROCESS; + + /* Point runtime data to main instance. */ + r.runtime_data = &mcm; + r.runtime_data_bytes = sizeof (&mcm); + + r.name = (char *) format (0, "mc-mastership-%s", tag); + r.function = mc_mastership_process; + mcm->mastership_process = vlib_register_node (vm, &r); + + r.name = (char *) format (0, "mc-join-ager-%s", tag); + r.function = mc_join_ager_process; + mcm->join_ager_process = vlib_register_node (vm, &r); + + r.name = (char *) format (0, "mc-retry-%s", tag); + r.function = mc_retry_process; + mcm->retry_process = vlib_register_node (vm, &r); + + r.name = (char *) format (0, "mc-catchup-%s", tag); + r.function = mc_catchup_process; + mcm->catchup_process = vlib_register_node (vm, &r); + + r.name = (char *) format (0, "mc-unserialize-%s", tag); + r.function = mc_unserialize_process; + mcm->unserialize_process = vlib_register_node (vm, &r); + } + + if (MC_EVENT_LOGGING > 0) + mhash_init (&mcm->elog_id_by_peer_id, sizeof (uword), sizeof (mc_peer_id_t)); + + mhash_init (&mcm->mastership_peer_index_by_id, sizeof (uword), sizeof (mc_peer_id_t)); + mc_serialize_init (mcm); +} + +static u8 * format_mc_relay_state (u8 * s, va_list * args) +{ + mc_relay_state_t state = va_arg (*args, mc_relay_state_t); + char * t = 0; + switch (state) + { + case MC_RELAY_STATE_NEGOTIATE: + t = "negotiate"; + break; + case MC_RELAY_STATE_MASTER: + t = "master"; + break; + case MC_RELAY_STATE_SLAVE: + t = "slave"; + break; + default: + return format (s, "unknown 0x%x", state); + } + + return format (s, "%s", t); +} + +static u8 * format_mc_stream_state (u8 * s, va_list * args) +{ + mc_stream_state_t state = va_arg (*args, mc_stream_state_t); + char * t = 0; + switch (state) + { +#define _(f) case MC_STREAM_STATE_##f: t = #f; break; + foreach_mc_stream_state +#undef _ + default: + return format (s, "unknown 0x%x", state); + } + + return format (s, "%s", t); +} + +u8 * format_mc_main (u8 * s, va_list * args) +{ + mc_main_t * mcm = va_arg (*args, mc_main_t *); + mc_stream_t * t; + mc_stream_peer_t * p, * ps; + uword indent = format_get_indent (s); + + s = format (s, "MC state %U, %d streams joined, global sequence 0x%x", + format_mc_relay_state, mcm->relay_state, + vec_len (mcm->stream_vector), + mcm->relay_global_sequence); + + { + mc_mastership_peer_t * mp; + f64 now = vlib_time_now (mcm->vlib_main); + s = format (s, "\n%UMost recent mastership peers:", + format_white_space, indent + 2); + vec_foreach (mp, mcm->mastership_peers) + { + s = format (s, "\n%U%-30U%.4e", + format_white_space, indent + 4, + mcm->transport.format_peer_id, mp->peer_id, + now - mp->time_last_master_assert_received); + } + } + + vec_foreach (t, mcm->stream_vector) + { + s = format (s, "\n%Ustream `%s' index %d", + format_white_space, indent + 2, + t->config.name, t->index); + + s = format (s, "\n%Ustate %U", + format_white_space, indent + 4, + format_mc_stream_state, t->state); + + s = format (s, "\n%Uretries: interval %.0f sec, limit %d, pool elts %d, %Ld sent", + format_white_space, indent + 4, t->config.retry_interval, + t->config.retry_limit, + pool_elts (t->retry_pool), + t->stats.n_retries - t->stats_last_clear.n_retries); + + s = format (s, "\n%U%Ld/%Ld user requests sent/received", + format_white_space, indent + 4, + t->user_requests_sent, t->user_requests_received); + + s = format (s, "\n%U%d peers, local/global sequence 0x%x/0x%x", + format_white_space, indent + 4, + pool_elts (t->peers), + t->our_local_sequence, + t->last_global_sequence_processed); + + ps = 0; + pool_foreach (p, t->peers, + ({ + if (clib_bitmap_get (t->all_peer_bitmap, p - t->peers)) + vec_add1 (ps, p[0]); + })); + vec_sort (ps, p1, p2, mc_peer_id_compare (p1->id, p2->id)); + s = format (s, "\n%U%=30s%10s%16s%16s", + format_white_space, indent + 6, + "Peer", "Last seq", "Retries", "Future"); + + vec_foreach (p, ps) + { + s = format (s, "\n%U%-30U0x%08x%16Ld%16Ld%s", + format_white_space, indent + 6, + mcm->transport.format_peer_id, p->id.as_u64, + p->last_sequence_received, + p->stats.n_msgs_from_past - p->stats_last_clear.n_msgs_from_past, + p->stats.n_msgs_from_future - p->stats_last_clear.n_msgs_from_future, + (mcm->transport.our_ack_peer_id.as_u64 == p->id.as_u64 + ? " (self)" : "")); + } + vec_free (ps); + } + + return s; +} diff --git a/vlib/vlib/mc.h b/vlib/vlib/mc.h new file mode 100644 index 00000000000..55dce2822c6 --- /dev/null +++ b/vlib/vlib/mc.h @@ -0,0 +1,674 @@ +/* + * mc.h: vlib reliable sequenced multicast distributed applications + * + * Copyright (c) 2010 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef included_vlib_mc_h +#define included_vlib_mc_h + +#include <vppinfra/elog.h> +#include <vppinfra/fifo.h> +#include <vppinfra/mhash.h> +#include <vlib/node.h> + +#ifndef MC_EVENT_LOGGING +#define MC_EVENT_LOGGING 1 +#endif + +always_inline uword +mc_need_byte_swap (void) +{ return CLIB_ARCH_IS_LITTLE_ENDIAN; } + +/* + * Used to uniquely identify hosts. + * For IP4 this would be ip4_address plus tcp/udp port. + */ +typedef union { + u8 as_u8[8]; + u64 as_u64; +} mc_peer_id_t; + +always_inline mc_peer_id_t +mc_byte_swap_peer_id (mc_peer_id_t i) +{ + /* Peer id is already in network byte order. */ + return i; +} + +always_inline int +mc_peer_id_compare (mc_peer_id_t a, mc_peer_id_t b) +{ + return memcmp (a.as_u8, b.as_u8, sizeof (a.as_u8)); +} + +/* Assert mastership. Lowest peer_id amount all peers wins mastership. + Only sent/received over mastership channel (MC_TRANSPORT_MASTERSHIP). + So, we don't need a message opcode. */ +typedef CLIB_PACKED (struct { + /* Peer id asserting mastership. */ + mc_peer_id_t peer_id; + + /* Global sequence number asserted. */ + u32 global_sequence; +}) mc_msg_master_assert_t; + +always_inline void +mc_byte_swap_msg_master_assert (mc_msg_master_assert_t * r) +{ + if (mc_need_byte_swap ()) + { + r->peer_id = mc_byte_swap_peer_id (r->peer_id); + r->global_sequence = clib_byte_swap_u32 (r->global_sequence); + } +} + +#define foreach_mc_msg_type \ + _ (master_assert) \ + _ (join_or_leave_request) \ + _ (join_reply) \ + _ (user_request) \ + _ (user_ack) \ + _ (catchup_request) \ + _ (catchup_reply) + +typedef enum { +#define _(f) MC_MSG_TYPE_##f, + foreach_mc_msg_type +#undef _ +} mc_relay_msg_type_t; + +/* Request to join a given stream. Multicast over MC_TRANSPORT_JOIN. */ +typedef CLIB_PACKED (struct { + mc_peer_id_t peer_id; + + mc_relay_msg_type_t type : 32; /* MC_MSG_TYPE_join_or_leave_request */ + + /* Stream to join or leave. */ + u32 stream_index; + + /* join = 1, leave = 0 */ + u8 is_join; +}) mc_msg_join_or_leave_request_t; + +always_inline void +mc_byte_swap_msg_join_or_leave_request (mc_msg_join_or_leave_request_t * r) +{ + if (mc_need_byte_swap ()) + { + r->peer_id = mc_byte_swap_peer_id (r->peer_id); + r->type = clib_byte_swap_u32 (r->type); + r->stream_index = clib_byte_swap_u32 (r->stream_index); + } +} + +/* Join reply. Multicast over MC_TRANSPORT_JOIN. */ +typedef CLIB_PACKED (struct { + mc_peer_id_t peer_id; + + mc_relay_msg_type_t type : 32; /* MC_MSG_TYPE_join_reply */ + + u32 stream_index; + + /* Peer ID to contact to catchup with this stream. */ + mc_peer_id_t catchup_peer_id; +}) mc_msg_join_reply_t; + +always_inline void +mc_byte_swap_msg_join_reply (mc_msg_join_reply_t * r) +{ + if (mc_need_byte_swap ()) + { + r->peer_id = mc_byte_swap_peer_id (r->peer_id); + r->type = clib_byte_swap_u32 (r->type); + r->stream_index = clib_byte_swap_u32 (r->stream_index); + r->catchup_peer_id = mc_byte_swap_peer_id (r->catchup_peer_id); + } +} + +/* Generic (application) request. Multicast over MC_TRANSPORT_USER_REQUEST_TO_RELAY and then + relayed by relay master after filling in global sequence number. */ +typedef CLIB_PACKED (struct { + mc_peer_id_t peer_id; + + u32 stream_index; + + /* Global sequence number as filled in by relay master. */ + u32 global_sequence; + + /* Local sequence number as filled in by peer sending message. */ + u32 local_sequence; + + /* Size of request data. */ + u32 n_data_bytes; + + /* Opaque request data. */ + u8 data[0]; +}) mc_msg_user_request_t; + +always_inline void +mc_byte_swap_msg_user_request (mc_msg_user_request_t * r) +{ + if (mc_need_byte_swap ()) + { + r->peer_id = mc_byte_swap_peer_id (r->peer_id); + r->stream_index = clib_byte_swap_u32 (r->stream_index); + r->global_sequence = clib_byte_swap_u32 (r->global_sequence); + r->local_sequence = clib_byte_swap_u32 (r->local_sequence); + r->n_data_bytes = clib_byte_swap_u32 (r->n_data_bytes); + } +} + +/* Sent unicast over ACK channel. */ +typedef CLIB_PACKED (struct { + mc_peer_id_t peer_id; + u32 global_sequence; + u32 stream_index; + u32 local_sequence; + i32 seq_cmp_result; +}) mc_msg_user_ack_t; + +always_inline void +mc_byte_swap_msg_user_ack (mc_msg_user_ack_t * r) +{ + if (mc_need_byte_swap ()) + { + r->peer_id = mc_byte_swap_peer_id (r->peer_id); + r->stream_index = clib_byte_swap_u32 (r->stream_index); + r->global_sequence = clib_byte_swap_u32 (r->global_sequence); + r->local_sequence = clib_byte_swap_u32 (r->local_sequence); + r->seq_cmp_result = clib_byte_swap_i32 (r->seq_cmp_result); + } +} + +/* Sent/received unicast over catchup channel (e.g. using TCP). */ +typedef CLIB_PACKED (struct { + mc_peer_id_t peer_id; + u32 stream_index; +}) mc_msg_catchup_request_t; + +always_inline void +mc_byte_swap_msg_catchup_request (mc_msg_catchup_request_t * r) +{ + if (mc_need_byte_swap ()) + { + r->peer_id = mc_byte_swap_peer_id (r->peer_id); + r->stream_index = clib_byte_swap_u32 (r->stream_index); + } +} + +/* Sent/received unicast over catchup channel. */ +typedef CLIB_PACKED (struct { + mc_peer_id_t peer_id; + + u32 stream_index; + + /* Last global sequence number included in catchup data. */ + u32 last_global_sequence_included; + + /* Size of catchup data. */ + u32 n_data_bytes; + + /* Catchup data. */ + u8 data[0]; +}) mc_msg_catchup_reply_t; + +always_inline void +mc_byte_swap_msg_catchup_reply (mc_msg_catchup_reply_t * r) +{ + if (mc_need_byte_swap ()) + { + r->peer_id = mc_byte_swap_peer_id (r->peer_id); + r->stream_index = clib_byte_swap_u32 (r->stream_index); + r->last_global_sequence_included = clib_byte_swap_u32 (r->last_global_sequence_included); + r->n_data_bytes = clib_byte_swap_u32 (r->n_data_bytes); + } +} + +typedef struct _mc_serialize_msg { + /* Name for this type. */ + char * name; + + /* Functions to serialize/unserialize data. */ + serialize_function_t * serialize; + serialize_function_t * unserialize; + + /* Maximum message size in bytes when serialized. + If zero then this will be set to the largest sent message. */ + u32 max_n_bytes_serialized; + + /* Opaque to use for first argument to serialize/unserialize function. */ + u32 opaque; + + /* Index in global message vector. */ + u32 global_index; + + /* Registration list */ + struct _mc_serialize_msg * next_registration; +} mc_serialize_msg_t; + +typedef struct { + /* Index into global message vector. */ + u32 global_index; +} mc_serialize_stream_msg_t; + +#define MC_SERIALIZE_MSG(x,...) \ + __VA_ARGS__ mc_serialize_msg_t x; \ +static void __mc_serialize_msg_registration_##x (void) \ + __attribute__((__constructor__)) ; \ +static void __mc_serialize_msg_registration_##x (void) \ +{ \ + vlib_main_t * vm = vlib_get_main(); \ + x.next_registration = vm->mc_msg_registrations; \ + vm->mc_msg_registrations = &x; \ +} \ +__VA_ARGS__ mc_serialize_msg_t x + +typedef enum { + MC_TRANSPORT_MASTERSHIP, + MC_TRANSPORT_JOIN, + MC_TRANSPORT_USER_REQUEST_TO_RELAY, + MC_TRANSPORT_USER_REQUEST_FROM_RELAY, + MC_N_TRANSPORT_TYPE, +} mc_transport_type_t; + +typedef struct { + clib_error_t * (* tx_buffer) (void * opaque, mc_transport_type_t type, u32 buffer_index); + + clib_error_t * (* tx_ack) (void * opaque, mc_peer_id_t peer_id, u32 buffer_index); + + /* Returns catchup opaque. */ + uword (* catchup_request_fun) (void * opaque, u32 stream_index, mc_peer_id_t catchup_peer_id); + + void (* catchup_send_fun) (void * opaque, uword catchup_opaque, u8 * data_vector); + + /* Opaque passed to callbacks. */ + void * opaque; + + mc_peer_id_t our_ack_peer_id; + mc_peer_id_t our_catchup_peer_id; + + /* Max packet size (MTU) for this transport. + For IP this is interface MTU less IP + UDP header size. */ + u32 max_packet_size; + + format_function_t * format_peer_id; +} mc_transport_t; + +typedef struct { + /* Count of messages received from this peer from the past/future + (with seq_cmp != 0). */ + u64 n_msgs_from_past; + u64 n_msgs_from_future; +} mc_stream_peer_stats_t; + +typedef struct { + /* ID of this peer. */ + mc_peer_id_t id; + + /* The last sequence we received from this peer. */ + u32 last_sequence_received; + + mc_stream_peer_stats_t stats, stats_last_clear; +} mc_stream_peer_t; + +typedef struct { + u32 buffer_index; + + /* Cached copy of local sequence number from buffer. */ + u32 local_sequence; + + /* Number of times this buffer has been sent (retried). */ + u32 n_retries; + + /* Previous/next retries in doubly-linked list. */ + u32 prev_index, next_index; + + /* Bitmap of all peers which have acked this msg */ + uword * unacked_by_peer_bitmap; + + /* Message send or resend time */ + f64 sent_at; +} mc_retry_t; + +typedef struct { + /* Number of retries sent for this stream. */ + u64 n_retries; +} mc_stream_stats_t; + +struct mc_main_t; +struct mc_stream_t; + +typedef struct { + /* Stream name. */ + char * name; + + /* Number of outstanding messages. */ + u32 window_size; + + /* Retry interval, in seconds */ + f64 retry_interval; + + /* Retry limit */ + u32 retry_limit; + + /* User rx buffer callback */ + void (* rx_buffer) (struct mc_main_t * mc_main, + struct mc_stream_t * stream, + mc_peer_id_t peer_id, + u32 buffer_index); + + /* User callback to create a snapshot */ + u8 * (* catchup_snapshot) (struct mc_main_t *mc_main, + u8 * snapshot_vector, + u32 last_global_sequence_included); + + /* User callback to replay a snapshot */ + void (* catchup) (struct mc_main_t *mc_main, + u8 * snapshot_data, + u32 n_snapshot_data_bytes); + + /* Callback to save a snapshot for offline replay */ + void (* save_snapshot) (struct mc_main_t *mc_main, + u32 is_catchup, + u8 * snapshot_data, + u32 n_snapshot_data_bytes); + + /* Called when a peer dies */ + void (* peer_died) (struct mc_main_t * mc_main, + struct mc_stream_t * stream, + mc_peer_id_t peer_id); +} mc_stream_config_t; + +#define foreach_mc_stream_state \ + _ (invalid) \ + _ (name_known) \ + _ (join_in_progress) \ + _ (catchup) \ + _ (ready) + +typedef enum { +#define _(f) MC_STREAM_STATE_##f, + foreach_mc_stream_state +#undef _ +} mc_stream_state_t; + +typedef struct mc_stream_t { + mc_stream_config_t config; + + mc_stream_state_t state; + + /* Index in stream pool. */ + u32 index; + + /* Stream index 0 is always for MC internal use. */ +#define MC_STREAM_INDEX_INTERNAL 0 + + mc_retry_t * retry_pool; + + /* Head and tail index of retry pool. */ + u32 retry_head_index, retry_tail_index; + + /* + * Country club for recently retired messages + * If the set of peers is expanding and a new peer + * misses a message, we can easily retire the FIFO + * element before we even know about the new peer + */ + mc_retry_t * retired_fifo; + + /* Hash mapping local sequence to retry pool index. */ + uword * retry_index_by_local_sequence; + + /* catch-up fifo of VLIB buffer indices. + start recording when catching up. */ + u32 * catchup_fifo; + + mc_stream_stats_t stats, stats_last_clear; + + /* Peer pool. */ + mc_stream_peer_t * peers; + + /* Bitmap with ones for all peers in peer pool. */ + uword * all_peer_bitmap; + + /* Map of 64 bit id to index in stream pool. */ + mhash_t peer_index_by_id; + + /* Timeout, in case we're alone in the world */ + f64 join_timeout; + + vlib_one_time_waiting_process_t * procs_waiting_for_join_done; + + vlib_one_time_waiting_process_t * procs_waiting_for_open_window; + + /* Next sequence number to use */ + u32 our_local_sequence; + + /* + * Last global sequence we processed. + * When supplying catchup data, we need to tell + * the client precisely where to start replaying + */ + u32 last_global_sequence_processed; + + /* Vector of unique messages we've sent on this stream. */ + mc_serialize_stream_msg_t * stream_msgs; + + /* Vector global message index into per stream message index. */ + u32 * stream_msg_index_by_global_index; + + /* Hashed by message name. */ + uword * stream_msg_index_by_name; + + u64 user_requests_sent; + u64 user_requests_received; +} mc_stream_t; + +always_inline void +mc_stream_free (mc_stream_t * s) +{ + pool_free (s->retry_pool); + hash_free (s->retry_index_by_local_sequence); + clib_fifo_free (s->catchup_fifo); + pool_free (s->peers); + mhash_free (&s->peer_index_by_id); + vec_free (s->procs_waiting_for_join_done); + vec_free (s->procs_waiting_for_open_window); +} + +always_inline void +mc_stream_init (mc_stream_t * s) +{ + memset (s, 0, sizeof (s[0])); + s->retry_head_index = s->retry_tail_index = ~0; +} + +typedef struct { + u32 stream_index; + u32 catchup_opaque; + u8 *catchup_snapshot; +} mc_catchup_process_arg_t; + +typedef enum { + MC_RELAY_STATE_NEGOTIATE, + MC_RELAY_STATE_MASTER, + MC_RELAY_STATE_SLAVE, +} mc_relay_state_t; + +typedef struct { + mc_peer_id_t peer_id; + + f64 time_last_master_assert_received; +} mc_mastership_peer_t; + +typedef struct { + u32 stream_index; + u32 buffer_index; +} mc_stream_and_buffer_t; + +typedef struct mc_main_t { + mc_relay_state_t relay_state; + + /* Mastership */ + u32 we_can_be_relay_master; + + u64 relay_master_peer_id; + + mc_mastership_peer_t * mastership_peers; + + /* Map of 64 bit id to index in stream pool. */ + mhash_t mastership_peer_index_by_id; + + /* The transport we're using. */ + mc_transport_t transport; + + /* Last-used global sequence number. */ + u32 relay_global_sequence; + + /* Vector of streams. */ + mc_stream_t * stream_vector; + + /* Hash table mapping stream name to pool index. */ + uword * stream_index_by_name; + + uword * procs_waiting_for_stream_name_by_name; + + vlib_one_time_waiting_process_t ** procs_waiting_for_stream_name_pool; + + int joins_in_progress; + + mc_catchup_process_arg_t * catchup_process_args; + + /* Node indices for mastership, join ager, + retry and catchup processes. */ + u32 mastership_process; + u32 join_ager_process; + u32 retry_process; + u32 catchup_process; + u32 unserialize_process; + + /* Global vector of messages. */ + mc_serialize_msg_t ** global_msgs; + + /* Hash table mapping message name to index. */ + uword * global_msg_index_by_name; + + /* Shared serialize/unserialize main. */ + serialize_main_t serialize_mains[VLIB_N_RX_TX]; + + vlib_serialize_buffer_main_t serialize_buffer_mains[VLIB_N_RX_TX]; + + /* Convenience variables */ + struct vlib_main_t * vlib_main; + elog_main_t * elog_main; + + /* Maps 64 bit peer id to elog string table offset for this formatted peer id. */ + mhash_t elog_id_by_peer_id; + + uword *elog_id_by_msg_name; + + /* For mc_unserialize. */ + mc_stream_and_buffer_t * mc_unserialize_stream_and_buffers; +} mc_main_t; + +always_inline mc_stream_t * +mc_stream_by_name (mc_main_t * m, char * name) +{ + uword * p = hash_get (m->stream_index_by_name, name); + return p ? vec_elt_at_index (m->stream_vector, p[0]) : 0; +} + +always_inline mc_stream_t * +mc_stream_by_index (mc_main_t * m, u32 i) +{ + return i < vec_len (m->stream_vector) ? m->stream_vector + i : 0; +} + +always_inline void +mc_clear_stream_stats (mc_main_t * m) +{ + mc_stream_t * s; + mc_stream_peer_t * p; + vec_foreach (s, m->stream_vector) + { + s->stats_last_clear = s->stats; + pool_foreach (p, s->peers, ({ + p->stats_last_clear = p->stats; + })); + } +} + +/* Declare all message handlers. */ +#define _(f) void mc_msg_##f##_handler (mc_main_t * mcm, mc_msg_##f##_t * msg, u32 buffer_index); +foreach_mc_msg_type +#undef _ + +u32 mc_stream_join (mc_main_t * mcm, mc_stream_config_t *); + +void mc_stream_leave (mc_main_t * mcm, u32 stream_index); + +void mc_wait_for_stream_ready (mc_main_t * m, char * stream_name); + +u32 mc_stream_send (mc_main_t * mcm, u32 stream_index, u32 buffer_index); + +void mc_main_init (mc_main_t * mcm, char * tag); + +void mc_enable_disable_mastership (mc_main_t * mcm, int we_can_be_master); + +void * mc_get_vlib_buffer (struct vlib_main_t * vm, u32 n_bytes, u32 * bi_return); + +format_function_t format_mc_main; + +clib_error_t * +mc_serialize_internal (mc_main_t * mc, + u32 stream_index, + u32 multiple_messages_per_vlib_buffer, + mc_serialize_msg_t * msg, + ...); + +clib_error_t * +mc_serialize_va (mc_main_t * mc, + u32 stream_index, + u32 multiple_messages_per_vlib_buffer, + mc_serialize_msg_t * msg, + va_list * va); + +#define mc_serialize_stream(mc,si,msg,args...) \ + mc_serialize_internal((mc),(si),(0),(msg),(msg)->serialize,args) + +#define mc_serialize(mc,msg,args...) \ + mc_serialize_internal((mc),(~0),(0),(msg),(msg)->serialize,args) + +#define mc_serialize2(mc,add,msg,args...) \ + mc_serialize_internal((mc),(~0),(add),(msg),(msg)->serialize,args) + +void mc_unserialize (mc_main_t * mcm, mc_stream_t * s, u32 buffer_index); +uword mc_unserialize_message (mc_main_t * mcm, mc_stream_t * s, + serialize_main_t * m); + +serialize_function_t serialize_mc_main, unserialize_mc_main; + +always_inline uword +mc_max_message_size_in_bytes (mc_main_t * mcm) +{ return mcm->transport.max_packet_size - sizeof (mc_msg_user_request_t); } + +always_inline word +mc_serialize_n_bytes_left (mc_main_t * mcm, serialize_main_t * m) +{ return mc_max_message_size_in_bytes (mcm) - serialize_vlib_buffer_n_bytes (m); } + +void unserialize_mc_stream (serialize_main_t * m, va_list * va); +void mc_stream_join_process_hold (void); + +#endif /* included_vlib_mc_h */ diff --git a/vlib/vlib/node.c b/vlib/vlib/node.c new file mode 100644 index 00000000000..4fb117e4f3e --- /dev/null +++ b/vlib/vlib/node.c @@ -0,0 +1,566 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * node.c: VLIB processing nodes + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vlib/threads.h> + +/* Query node given name. */ +vlib_node_t * vlib_get_node_by_name (vlib_main_t * vm, u8 * name) +{ + vlib_node_main_t * nm = &vm->node_main; + uword * p; + u8 * key = name; + if (! clib_mem_is_heap_object (key)) + key = format (0, "%s", key); + p = hash_get (nm->node_by_name, key); + if (key != name) + vec_free (key); + return p ? vec_elt (nm->nodes, p[0]) : 0; +} + +static void node_set_elog_name (vlib_main_t * vm, uword node_index) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + elog_event_type_t * t; + + t = vec_elt_at_index (vm->node_call_elog_event_types, node_index); + vec_free (t->format); + t->format = (char *) format (0, "%v (%%d)", n->name); + + t = vec_elt_at_index (vm->node_return_elog_event_types, node_index); + vec_free (t->format); + t->format = (char *) format (0, "%v () = %%d", n->name); + + n->name_elog_string = elog_string (&vm->elog_main, "%v", n->name); +} + +void vlib_node_rename (vlib_main_t * vm, u32 node_index, char * fmt, ...) +{ + va_list va; + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * n = vlib_get_node (vm, node_index); + + va_start (va, fmt); + hash_unset (nm->node_by_name, n->name); + vec_free (n->name); + n->name = va_format (0, fmt, &va); + va_end (va); + hash_set (nm->node_by_name, n->name, n->index); + + node_set_elog_name (vm, node_index); +} + +static void +vlib_node_runtime_update (vlib_main_t * vm, + u32 node_index, + u32 next_index) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_runtime_t * r, * s; + vlib_node_t * node, * next_node; + vlib_next_frame_t * nf; + vlib_pending_frame_t * pf; + i32 i, j, n_insert; + + ASSERT(os_get_cpu_number() == 0); + + vlib_worker_thread_barrier_sync(vm); + + node = vec_elt (nm->nodes, node_index); + r = vlib_node_get_runtime (vm, node_index); + + n_insert = vec_len (node->next_nodes) - r->n_next_nodes; + if (n_insert > 0) + { + i = r->next_frame_index + r->n_next_nodes; + vec_insert (nm->next_frames, n_insert, i); + + /* Initialize newly inserted next frames. */ + for (j = 0; j < n_insert; j++) + vlib_next_frame_init (nm->next_frames + i + j); + + /* Relocate other next frames at higher indices. */ + for (j = 0; j < vec_len (nm->nodes); j++) + { + s = vlib_node_get_runtime (vm, j); + if (j != node_index + && s->next_frame_index >= i) + s->next_frame_index += n_insert; + } + + /* Pending frames may need to be relocated also. */ + vec_foreach (pf, nm->pending_frames) + { + if (pf->next_frame_index != VLIB_PENDING_FRAME_NO_NEXT_FRAME + && pf->next_frame_index >= i) + pf->next_frame_index += n_insert; + } + pool_foreach (pf, nm->suspended_process_frames, ({ + if (pf->next_frame_index != ~0 && pf->next_frame_index >= i) + pf->next_frame_index += n_insert; + })); + + r->n_next_nodes = vec_len (node->next_nodes); + } + + /* Set frame's node runtime index. */ + next_node = vlib_get_node (vm, node->next_nodes[next_index]); + nf = nm->next_frames + r->next_frame_index + next_index; + nf->node_runtime_index = next_node->runtime_index; + + vlib_worker_thread_node_runtime_update(); + + vlib_worker_thread_barrier_release(vm); +} + +/* Add next node to given node in given slot. */ +uword +vlib_node_add_next_with_slot (vlib_main_t * vm, + uword node_index, + uword next_node_index, + uword slot) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * node, * next; + uword * p; + + node = vec_elt (nm->nodes, node_index); + next = vec_elt (nm->nodes, next_node_index); + + /* Fill in static next nodes if runtime has yet to be initialized. */ + if (slot == ~0 && ! (nm->flags & VLIB_NODE_MAIN_RUNTIME_STARTED)) + { + uword i; + for (i = 0; i < vec_len (node->next_node_names); i++) + { + char * a = node->next_node_names[i]; + if (a) + vlib_node_add_named_next_with_slot (vm, node->index, a, i); + } + } + + if ((p = hash_get (node->next_slot_by_node, next_node_index))) + { + /* Next already exists: slot must match. */ + if (slot != ~0) + ASSERT (slot == p[0]); + return p[0]; + } + + if (slot == ~0) + slot = vec_len (node->next_nodes); + + vec_validate_init_empty (node->next_nodes, slot, ~0); + vec_validate (node->n_vectors_by_next_node, slot); + + node->next_nodes[slot] = next_node_index; + hash_set (node->next_slot_by_node, next_node_index, slot); + + vlib_node_runtime_update (vm, node_index, slot); + + next->prev_node_bitmap = clib_bitmap_ori (next->prev_node_bitmap, + node_index); + + /* Siblings all get same node structure. */ + { + uword sib_node_index, sib_slot; + vlib_node_t * sib_node; + clib_bitmap_foreach (sib_node_index, node->sibling_bitmap, ({ + sib_node = vec_elt (nm->nodes, sib_node_index); + if (sib_node != node) + { + sib_slot = vlib_node_add_next_with_slot (vm, sib_node_index, next_node_index, slot); + ASSERT (sib_slot == slot); + } + })); + } + + return slot; +} + +/* Add named next node to given node in given slot. */ +uword +vlib_node_add_named_next_with_slot (vlib_main_t * vm, + uword node, + char * name, + uword slot) +{ + vlib_node_main_t * nm; + vlib_node_t * n, * n_next; + + nm = &vm->node_main; + n = vlib_get_node (vm, node); + + n_next = vlib_get_node_by_name (vm, (u8 *) name); + if (! n_next) + { + if (nm->flags & VLIB_NODE_MAIN_RUNTIME_STARTED) + return ~0; + + if (slot == ~0) + slot = clib_max (vec_len (n->next_node_names), + vec_len (n->next_nodes)); + vec_validate (n->next_node_names, slot); + n->next_node_names[slot] = name; + return slot; + } + + return vlib_node_add_next_with_slot (vm, node, n_next->index, slot); +} + +static void node_elog_init (vlib_main_t * vm, uword ni) +{ + elog_event_type_t t; + + memset (&t, 0, sizeof (t)); + + /* 2 event types for this node: one when node function is called. + One when it returns. */ + vec_validate (vm->node_call_elog_event_types, ni); + vm->node_call_elog_event_types[ni] = t; + + vec_validate (vm->node_return_elog_event_types, ni); + vm->node_return_elog_event_types[ni] = t; + + node_set_elog_name (vm, ni); +} + +#ifdef CLIB_UNIX +#define STACK_ALIGN 4096 +#else +#define STACK_ALIGN CLIB_CACHE_LINE_BYTES +#endif + +static void register_node (vlib_main_t * vm, + vlib_node_registration_t * r) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * n; + int i; + + if (CLIB_DEBUG > 0) + { + /* Default (0) type should match INTERNAL. */ + vlib_node_t zero = {0}; + ASSERT (VLIB_NODE_TYPE_INTERNAL == zero.type); + } + + ASSERT (r->function != 0); + + n = clib_mem_alloc_no_fail (sizeof (n[0])); + memset (n, 0, sizeof (n[0])); + n->index = vec_len (nm->nodes); + + vec_add1 (nm->nodes, n); + + /* Name is always a vector so it can be formatted with %v. */ + if (clib_mem_is_heap_object (vec_header (r->name, 0))) + n->name = vec_dup ((u8 *) r->name); + else + n->name = format (0, "%s", r->name); + + if (! nm->node_by_name) + nm->node_by_name = hash_create_vec (/* size */ 32, + sizeof (n->name[0]), + sizeof (uword)); + + /* Node names must be unique. */ + { + vlib_node_t * o = vlib_get_node_by_name (vm, n->name); + if (o) + clib_error ("more than one node named `%v'", n->name); + } + + hash_set (nm->node_by_name, n->name, n->index); + + r->index = n->index; /* save index in registration */ + n->function = r->function; + + /* Node index of next sibling will be filled in by vlib_node_main_init. */ + n->sibling_of = r->sibling_of; + + if (r->type == VLIB_NODE_TYPE_INTERNAL) + ASSERT (r->vector_size > 0); + +#define _(f) n->f = r->f + + _ (type); + _ (flags); + _ (state); + _ (scalar_size); + _ (vector_size); + _ (format_buffer); + _ (unformat_buffer); + _ (format_trace); + _ (validate_frame); + + /* Register error counters. */ + vlib_register_errors (vm, n->index, r->n_errors, r->error_strings); + node_elog_init (vm, n->index); + + _ (runtime_data_bytes); + if (r->runtime_data_bytes > 0) + { + vec_resize (n->runtime_data, r->runtime_data_bytes); + if (r->runtime_data) + memcpy (n->runtime_data, r->runtime_data, r->runtime_data_bytes); + } + + vec_resize (n->next_node_names, r->n_next_nodes); + for (i = 0; i < r->n_next_nodes; i++) + n->next_node_names[i] = r->next_nodes[i]; + + vec_validate_init_empty (n->next_nodes, r->n_next_nodes - 1, ~0); + vec_validate (n->n_vectors_by_next_node, r->n_next_nodes - 1); + + n->owner_node_index = n->owner_next_index = ~0; + + /* Initialize node runtime. */ + { + vlib_node_runtime_t * rt; + u32 i; + + if (n->type == VLIB_NODE_TYPE_PROCESS) + { + vlib_process_t * p; + uword log2_n_stack_bytes; + + log2_n_stack_bytes = clib_max (r->process_log2_n_stack_bytes, 15); + + p = clib_mem_alloc_aligned_no_fail + (sizeof (p[0]) + (1 << log2_n_stack_bytes), + STACK_ALIGN); + + memset (p, 0, sizeof (p[0])); + p->log2_n_stack_bytes = log2_n_stack_bytes; + + /* Process node's runtime index is really index into process + pointer vector. */ + n->runtime_index = vec_len (nm->processes); + + vec_add1 (nm->processes, p); + + /* Paint first stack word with magic number so we can at least + detect process stack overruns. */ + p->stack[0] = VLIB_PROCESS_STACK_MAGIC; + + /* Node runtime is stored inside of process. */ + rt = &p->node_runtime; + +#ifdef CLIB_UNIX + /* + * Disallow writes to the bottom page of the stack, to + * catch stack overflows. + */ + if (mprotect (p->stack, 4096, PROT_READ) < 0) + clib_unix_warning ("process stack"); +#endif + + } + else + { + vec_add2_aligned (nm->nodes_by_type[n->type], rt, 1, + /* align */ CLIB_CACHE_LINE_BYTES); + n->runtime_index = rt - nm->nodes_by_type[n->type]; + } + + if (n->type == VLIB_NODE_TYPE_INPUT) + nm->input_node_counts_by_state[n->state] += 1; + + rt->function = n->function; + rt->flags = n->flags; + rt->state = n->state; + rt->node_index = n->index; + + rt->n_next_nodes = r->n_next_nodes; + rt->next_frame_index = vec_len (nm->next_frames); + + vec_resize (nm->next_frames, rt->n_next_nodes); + for (i = 0; i < rt->n_next_nodes; i++) + vlib_next_frame_init (nm->next_frames + rt->next_frame_index + i); + + vec_resize (rt->errors, r->n_errors); + for (i = 0; i < vec_len (rt->errors); i++) + rt->errors[i] = vlib_error_set (n->index, i); + + ASSERT (vec_len (n->runtime_data) <= sizeof (rt->runtime_data)); + if (vec_len (n->runtime_data) > 0) + memcpy (rt->runtime_data, n->runtime_data, vec_len (n->runtime_data)); + + vec_free (n->runtime_data); + } +} + +/* Register new packet processing node. */ +u32 vlib_register_node (vlib_main_t * vm, vlib_node_registration_t * r) +{ + register_node (vm, r); + return r->index; +} + +void vlib_register_all_static_nodes (vlib_main_t * vm) +{ + vlib_node_registration_t * r; + + r = vm->node_main.node_registrations; + while (r) { + register_node (vm, r); + r = r->next_registration; + } +} + +clib_error_t * +vlib_node_main_init (vlib_main_t * vm) +{ + vlib_node_main_t * nm = &vm->node_main; + clib_error_t * error = 0; + vlib_node_t * n; + uword ni; + + nm->flags |= VLIB_NODE_MAIN_RUNTIME_STARTED; + + /* Resolve next names into next indices. */ + for (ni = 0; ni < vec_len (nm->nodes); ni++) + { + uword i; + + n = vec_elt (nm->nodes, ni); + + for (i = 0; i < vec_len (n->next_node_names); i++) + { + char * a = n->next_node_names[i]; + + if (! a) + continue; + + if (~0 == vlib_node_add_named_next_with_slot (vm, n->index, a, i)) + { + error = clib_error_create + ("node `%v' refers to unknown node `%s'", n->name, a); + goto done; + } + } + + vec_free (n->next_node_names); + } + + /* Set previous node pointers. */ + for (ni = 0; ni < vec_len (nm->nodes); ni++) + { + vlib_node_t * n_next; + uword i; + + n = vec_elt (nm->nodes, ni); + + for (i = 0; i < vec_len (n->next_nodes); i++) + { + if (n->next_nodes[i] >= vec_len (nm->nodes)) + continue; + + n_next = vec_elt (nm->nodes, n->next_nodes[i]); + n_next->prev_node_bitmap = + clib_bitmap_ori (n_next->prev_node_bitmap, n->index); + } + } + + { + vlib_next_frame_t * nf; + vlib_node_runtime_t * r; + vlib_node_t * next; + uword i; + + vec_foreach (r, nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]) + { + if (r->n_next_nodes == 0) + continue; + + n = vlib_get_node (vm, r->node_index); + nf = vec_elt_at_index (nm->next_frames, r->next_frame_index); + + for (i = 0; i < vec_len (n->next_nodes); i++) + { + next = vlib_get_node (vm, n->next_nodes[i]); + + /* Validate node runtime indices are correctly initialized. */ + ASSERT (nf[i].node_runtime_index == next->runtime_index); + + nf[i].flags = 0; + if (next->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH) + nf[i].flags |= VLIB_FRAME_NO_FREE_AFTER_DISPATCH; + } + } + } + + /* Generate node sibling relationships. */ + { + vlib_node_t * n, * sib; + uword si; + + for (ni = 0; ni < vec_len (nm->nodes); ni++) + { + n = vec_elt (nm->nodes, ni); + + if (! n->sibling_of) + continue; + + sib = vlib_get_node_by_name (vm, (u8 *) n->sibling_of); + if (! sib) + clib_error ("sibling `%s' not found for node `%v'", n->sibling_of, n->name); + + clib_bitmap_foreach (si, sib->sibling_bitmap, ({ + vlib_node_t * m = vec_elt (nm->nodes, si); + + /* Connect all of sibling's siblings to us. */ + m->sibling_bitmap = clib_bitmap_ori (m->sibling_bitmap, n->index); + + /* Connect us to all of sibling's siblings. */ + n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, si); + })); + + /* Connect sibling to us. */ + sib->sibling_bitmap = clib_bitmap_ori (sib->sibling_bitmap, n->index); + + /* Connect us to sibling. */ + n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, sib->index); + } + } + + done: + return error; +} diff --git a/vlib/vlib/node.h b/vlib/vlib/node.h new file mode 100644 index 00000000000..806a9dae1b5 --- /dev/null +++ b/vlib/vlib/node.h @@ -0,0 +1,621 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * node.h: VLIB processing nodes + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_node_h +#define included_vlib_node_h + +#include <vppinfra/longjmp.h> +#include <vppinfra/timing_wheel.h> +#include <vlib/trace.h> /* for vlib_trace_filter_t */ + +/* Forward declaration. */ +struct vlib_node_runtime_t; +struct vlib_frame_t; + +/* Internal nodes (including output nodes) move data from node to + node (or out of the graph for output nodes). */ +typedef uword (vlib_node_function_t) (struct vlib_main_t * vm, + struct vlib_node_runtime_t * node, + struct vlib_frame_t * frame); + +typedef enum { + /* An internal node on the call graph (could be output). */ + VLIB_NODE_TYPE_INTERNAL, + + /* Nodes which input data into the processing graph. + Input nodes are called for each iteration of main loop. */ + VLIB_NODE_TYPE_INPUT, + + /* Nodes to be called before all input nodes. + Used, for example, to clean out driver TX rings before + processing input. */ + VLIB_NODE_TYPE_PRE_INPUT, + + /* "Process" nodes which can be suspended and later resumed. */ + VLIB_NODE_TYPE_PROCESS, + + VLIB_N_NODE_TYPE, +} vlib_node_type_t; + +typedef struct _vlib_node_registration { + /* Vector processing function for this node. */ + vlib_node_function_t * function; + + /* Node name. */ + char * name; + + /* Name of sibling (if applicable). */ + char * sibling_of; + + /* Node index filled in by registration. */ + u32 index; + + /* Type of this node. */ + vlib_node_type_t type; + + /* Error strings indexed by error code for this node. */ + char ** error_strings; + + /* Buffer format/unformat for this node. */ + format_function_t * format_buffer; + unformat_function_t * unformat_buffer; + + /* Trace format/unformat for this node. */ + format_function_t * format_trace; + unformat_function_t * unformat_trace; + + /* Function to validate incoming frames. */ + u8 * (* validate_frame) (struct vlib_main_t * vm, + struct vlib_node_runtime_t *, + struct vlib_frame_t * f); + + /* Per-node runtime data. */ + void * runtime_data; + + /* Process stack size. */ + u16 process_log2_n_stack_bytes; + + /* Number of bytes of per-node run time data. */ + u8 runtime_data_bytes; + + /* State for input nodes. */ + u8 state; + + /* Node flags. */ + u16 flags; + + /* Size of scalar and vector arguments in bytes. */ + u16 scalar_size, vector_size; + + /* Number of error codes used by this node. */ + u16 n_errors; + + /* Number of next node names that follow. */ + u16 n_next_nodes; + + /* Constructor link-list, don't ask... */ + struct _vlib_node_registration * next_registration; + + /* Names of next nodes which this node feeds into. */ + char * next_nodes[]; + +} vlib_node_registration_t; + +#define VLIB_REGISTER_NODE(x,...) \ + __VA_ARGS__ vlib_node_registration_t x; \ +static void __vlib_add_node_registration_##x (void) \ + __attribute__((__constructor__)) ; \ +static void __vlib_add_node_registration_##x (void) \ +{ \ + vlib_main_t * vm = vlib_get_main(); \ + x.next_registration = vm->node_main.node_registrations; \ + vm->node_main.node_registrations = &x; \ +} \ +__VA_ARGS__ vlib_node_registration_t x + +always_inline vlib_node_registration_t * +vlib_node_next_registered (vlib_node_registration_t * c) +{ + c = clib_elf_section_data_next (c, c->n_next_nodes * sizeof (c->next_nodes[0])); + return c; +} + +typedef struct { + /* Total calls, clock ticks and vector elements processed for this node. */ + u64 calls, vectors, clocks, suspends; + u64 max_clock; + u64 max_clock_n; +} vlib_node_stats_t; + +#define foreach_vlib_node_state \ + /* Input node is called each iteration of main loop. \ + This is the default (zero). */ \ + _ (POLLING) \ + /* Input node is called when device signals an interrupt. */ \ + _ (INTERRUPT) \ + /* Input node is never called. */ \ + _ (DISABLED) + +typedef enum { +#define _(f) VLIB_NODE_STATE_##f, + foreach_vlib_node_state +#undef _ + VLIB_N_NODE_STATE, +} vlib_node_state_t; + +typedef struct vlib_node_t { + /* Vector processing function for this node. */ + vlib_node_function_t * function; + + /* Node name. */ + u8 * name; + + /* Node name index in elog string table. */ + u32 name_elog_string; + + /* Total statistics for this node. */ + vlib_node_stats_t stats_total; + + /* Saved values as of last clear (or zero if never cleared). + Current values are always stats_total - stats_last_clear. */ + vlib_node_stats_t stats_last_clear; + + /* Type of this node. */ + vlib_node_type_t type; + + /* Node index. */ + u32 index; + + /* Index of corresponding node runtime. */ + u32 runtime_index; + + /* Runtime data for this node. */ + void * runtime_data; + + /* Node flags. */ + u16 flags; + + /* Processing function keeps frame. Tells node dispatching code not + to free frame after dispatch is done. */ +#define VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH (1 << 0) + + /* Node counts as output/drop/punt node for stats purposes. */ +#define VLIB_NODE_FLAG_IS_OUTPUT (1 << 1) +#define VLIB_NODE_FLAG_IS_DROP (1 << 2) +#define VLIB_NODE_FLAG_IS_PUNT (1 << 3) +#define VLIB_NODE_FLAG_IS_HANDOFF (1 << 4) + + /* Set if current node runtime has traced vectors. */ +#define VLIB_NODE_FLAG_TRACE (1 << 5) + +#define VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE (1 << 6) +#define VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE (1 << 7) + + /* State for input nodes. */ + u8 state; + + /* Number of bytes of run time data. */ + u8 runtime_data_bytes; + + /* Number of error codes used by this node. */ + u16 n_errors; + + /* Size of scalar and vector arguments in bytes. */ + u16 scalar_size, vector_size; + + /* Handle/index in error heap for this node. */ + u32 error_heap_handle; + u32 error_heap_index; + + /* Error strings indexed by error code for this node. */ + char ** error_strings; + + /* Vector of next node names. + Only used before next_nodes array is initialized. */ + char ** next_node_names; + + /* Next node indices for this node. */ + u32 * next_nodes; + + /* Name of node that we are sibling of. */ + char * sibling_of; + + /* Bitmap of all of this node's siblings. */ + uword * sibling_bitmap; + + /* Total number of vectors sent to each next node. */ + u64 * n_vectors_by_next_node; + + /* Hash table mapping next node index into slot in + next_nodes vector. Quickly determines whether this node + is connected to given next node and, if so, with which slot. */ + uword * next_slot_by_node; + + /* Bitmap of node indices which feed this node. */ + uword * prev_node_bitmap; + + /* Node/next-index which own enqueue rights with to this node. */ + u32 owner_node_index, owner_next_index; + + /* Buffer format/unformat for this node. */ + format_function_t * format_buffer; + unformat_function_t * unformat_buffer; + + /* Trace buffer format/unformat for this node. */ + format_function_t * format_trace; + + /* Function to validate incoming frames. */ + u8 * (* validate_frame) (struct vlib_main_t * vm, + struct vlib_node_runtime_t *, + struct vlib_frame_t * f); +} vlib_node_t; + +#define VLIB_INVALID_NODE_INDEX ((u32) ~0) + +/* Max number of vector elements to process at once per node. */ +#define VLIB_FRAME_SIZE 256 + +/* Calling frame (think stack frame) for a node. */ +typedef struct vlib_frame_t { + /* Frame flags. */ + u16 flags; + + /* Number of scalar bytes in arguments. */ + u8 scalar_size; + + /* Number of bytes per vector argument. */ + u8 vector_size; + + /* Number of vector elements currently in frame. */ + u16 n_vectors; + + /* Owner cpuid / heap id */ + u16 cpu_index; + + /* Scalar and vector arguments to next node. */ + u8 arguments[0]; +} vlib_frame_t; + +typedef struct { + /* Frame index. */ + u32 frame_index; + + /* Node runtime for this next. */ + u32 node_runtime_index; + + /* Next frame flags. */ + u32 flags; + + /* Reflects node frame-used flag for this next. */ +#define VLIB_FRAME_NO_FREE_AFTER_DISPATCH \ + VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH + + /* This next frame owns enqueue to node + corresponding to node_runtime_index. */ +#define VLIB_FRAME_OWNER (1 << 15) + + /* Set when frame has been allocated for this next. */ +#define VLIB_FRAME_IS_ALLOCATED VLIB_NODE_FLAG_IS_OUTPUT + + /* Set when frame has been added to pending vector. */ +#define VLIB_FRAME_PENDING VLIB_NODE_FLAG_IS_DROP + + /* Set when frame is to be freed after dispatch. */ +#define VLIB_FRAME_FREE_AFTER_DISPATCH VLIB_NODE_FLAG_IS_PUNT + + /* Set when frame has traced packets. */ +#define VLIB_FRAME_TRACE VLIB_NODE_FLAG_TRACE + + /* Number of vectors enqueue to this next since last overflow. */ + u32 vectors_since_last_overflow; +} vlib_next_frame_t; + +always_inline void +vlib_next_frame_init (vlib_next_frame_t * nf) +{ + memset (nf, 0, sizeof (nf[0])); + nf->frame_index = ~0; + nf->node_runtime_index = ~0; +} + +/* A frame pending dispatch by main loop. */ +typedef struct { + /* Node and runtime for this frame. */ + u32 node_runtime_index; + + /* Frame index (in the heap). */ + u32 frame_index; + + /* Start of next frames for this node. */ + u32 next_frame_index; + + /* Special value for next_frame_index when there is no next frame. */ +#define VLIB_PENDING_FRAME_NO_NEXT_FRAME ((u32) ~0) +} vlib_pending_frame_t; + +typedef struct vlib_node_runtime_t { + /* Node function to call. */ + vlib_node_function_t * function; + + /* Vector of errors for this node. */ + vlib_error_t * errors; + + /* Number of clock cycles. */ + u32 clocks_since_last_overflow; + + /* Maximum clock cycle for an invocation. */ + u32 max_clock; + + /* Number of vectors in the recorded max_clock. */ + u32 max_clock_n; + + /* Number of calls. */ + u32 calls_since_last_overflow; + + /* Number of vector elements processed by this node. */ + u32 vectors_since_last_overflow; + + /* Start of next frames for this node. */ + u32 next_frame_index; + + /* Node index. */ + u32 node_index; + + /* For input nodes: decremented on each main loop interation until it reaches zero + and function is called. Allows some input nodes to be called + more than others. */ + u32 input_main_loops_per_call; + + /* Saved main loop counter of last dispatch of this node. */ + u32 main_loop_count_last_dispatch; + + u32 main_loop_vector_stats[2]; + + /* Copy of main node flags. */ + u16 flags; + + /* Input node state. */ + u16 state; + + u16 n_next_nodes; + + /* Next frame index that vector arguments were last enqueued to + last time this node ran. Set to zero before first run + of this node. */ + u16 cached_next_index; + + /* CPU this node runs on */ + u16 cpu_index; + + /* Function dependent node-runtime. */ + uword runtime_data[(128 + - 1 * sizeof (vlib_node_function_t *) + - 1 * sizeof (vlib_error_t *) + - 11 * sizeof (u32) + - 5 * sizeof (u16)) / sizeof (uword)]; +} vlib_node_runtime_t; + +typedef struct { + /* Number of allocated frames for this scalar/vector size. */ + u32 n_alloc_frames; + + /* Vector of free frame indices for this scalar/vector size. */ + u32 * free_frame_indices; +} vlib_frame_size_t; + +typedef struct { + /* Users opaque value for event type. */ + uword opaque; +} vlib_process_event_type_t; + +typedef struct { + /* Node runtime for this process. */ + vlib_node_runtime_t node_runtime; + + /* Where to longjmp when process is done. */ + clib_longjmp_t return_longjmp; + +#define VLIB_PROCESS_RETURN_LONGJMP_RETURN ((uword) ~0 - 0) +#define VLIB_PROCESS_RETURN_LONGJMP_SUSPEND ((uword) ~0 - 1) + + /* Where to longjmp to resume node after suspend. */ + clib_longjmp_t resume_longjmp; +#define VLIB_PROCESS_RESUME_LONGJMP_SUSPEND 0 +#define VLIB_PROCESS_RESUME_LONGJMP_RESUME 1 + + u16 flags; +#define VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK (1 << 0) +#define VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT (1 << 1) + /* Set to indicate that this process has been added to resume vector. */ +#define VLIB_PROCESS_RESUME_PENDING (1 << 2) + + /* Process function is currently running. */ +#define VLIB_PROCESS_IS_RUNNING (1 << 3) + + /* Size of process stack. */ + u16 log2_n_stack_bytes; + + u32 suspended_process_frame_index; + + /* Number of times this process was suspended. */ + u32 n_suspends; + + /* Vectors of pending event data indexed by event type index. */ + void ** pending_event_data_by_type_index; + + /* Bitmap of event type-indices with non-empty vectors. */ + uword * non_empty_event_type_bitmap; + + /* Bitmap of event type-indices which are one time events. */ + uword * one_time_event_type_bitmap; + + /* Type is opaque pointer -- typically a pointer to an event handler + function. Hash table to map opaque to a type index. */ + uword * event_type_index_by_type_opaque; + + /* Pool of currently valid event types. */ + vlib_process_event_type_t * event_type_pool; + + /* When suspending saves cpu cycle counter when process is to be resumed. */ + u64 resume_cpu_time; + +#ifdef CLIB_UNIX + /* Pad to a multiple of the page size so we can mprotect process stacks */ + CLIB_PAD_FROM_TO (0x140, 0x1000); +#endif + /* Process stack. Starts here and extends 2^log2_n_stack_bytes + bytes. */ + +#define VLIB_PROCESS_STACK_MAGIC (0xdead7ead) + u32 stack[0]; +} vlib_process_t __attribute__ ((aligned (CLIB_CACHE_LINE_BYTES))); + +typedef struct { + u32 node_index; + + u32 one_time_event; +} vlib_one_time_waiting_process_t; + +typedef struct { + u16 n_data_elts; + + u16 n_data_elt_bytes; + + /* n_data_elts * n_data_elt_bytes */ + u32 n_data_bytes; + + /* Process node & event type to be used to signal event. */ + u32 process_node_index; + + u32 event_type_index; + + union { + u8 inline_event_data[64 - 3 * sizeof (u32) - 2 * sizeof (u16)]; + + /* Vector of event data used only when data does not fit inline. */ + u8 * event_data_as_vector; + }; +} vlib_signal_timed_event_data_t; + +always_inline uword +vlib_timing_wheel_data_is_timed_event (u32 d) +{ return d & 1; } + +always_inline u32 +vlib_timing_wheel_data_set_suspended_process (u32 i) +{ return 0 + 2*i; } + +always_inline u32 +vlib_timing_wheel_data_set_timed_event (u32 i) +{ return 1 + 2*i; } + +always_inline uword +vlib_timing_wheel_data_get_index (u32 d) +{ return d / 2; } + +typedef struct { + /* Public nodes. */ + vlib_node_t ** nodes; + + /* Node index hashed by node name. */ + uword * node_by_name; + + u32 flags; +#define VLIB_NODE_MAIN_RUNTIME_STARTED (1 << 0) + + /* Nodes segregated by type for cache locality. + Does not apply to nodes of type VLIB_NODE_TYPE_INTERNAL. */ + vlib_node_runtime_t * nodes_by_type[VLIB_N_NODE_TYPE]; + + /* Node runtime indices for input nodes with pending interrupts. */ + u32 * pending_interrupt_node_runtime_indices; + + /* Input nodes are switched from/to interrupt to/from polling mode + when average vector length goes above/below polling/interrupt + thresholds. */ + u32 polling_threshold_vector_length; + u32 interrupt_threshold_vector_length; + + /* Vector of next frames. */ + vlib_next_frame_t * next_frames; + + /* Vector of internal node's frames waiting to be called. */ + vlib_pending_frame_t * pending_frames; + + /* Timing wheel for scheduling time-based node dispatch. */ + timing_wheel_t timing_wheel; + + vlib_signal_timed_event_data_t * signal_timed_event_data_pool; + + /* Opaque data vector added via timing_wheel_advance. */ + u32 * data_from_advancing_timing_wheel; + + /* CPU time of next process to be ready on timing wheel. */ + u64 cpu_time_next_process_ready; + + /* Vector of process nodes. + One for each node of type VLIB_NODE_TYPE_PROCESS. */ + vlib_process_t ** processes; + + /* Current running process or ~0 if no process running. */ + u32 current_process_index; + + /* Pool of pending process frames. */ + vlib_pending_frame_t * suspended_process_frames; + + /* Vector of event data vectors pending recycle. */ + void ** recycled_event_data_vectors; + + /* Current counts of nodes in each state. */ + u32 input_node_counts_by_state[VLIB_N_NODE_STATE]; + + /* Hash of (scalar_size,vector_size) to frame_sizes index. */ + uword * frame_size_hash; + + /* Per-size frame allocation information. */ + vlib_frame_size_t * frame_sizes; + + /* Time of last node runtime stats clear. */ + f64 time_last_runtime_stats_clear; + + /* Node registrations added by constructors */ + vlib_node_registration_t * node_registrations; +} vlib_node_main_t; + +#endif /* included_vlib_node_h */ diff --git a/vlib/vlib/node_cli.c b/vlib/vlib/node_cli.c new file mode 100644 index 00000000000..58c3776a67b --- /dev/null +++ b/vlib/vlib/node_cli.c @@ -0,0 +1,441 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * node_cli.c: node CLI + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vlib/threads.h> + +static clib_error_t * +show_node_graph (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * n; + u32 node_index; + + vlib_cli_output (vm, "%U\n", format_vlib_node_graph, nm, 0); + + if (unformat (input, "%U", unformat_vlib_node, vm, &node_index)) + { + n = vlib_get_node (vm, node_index); + vlib_cli_output (vm, "%U\n", format_vlib_node_graph, nm, n); + } + else + { + vlib_node_t ** nodes = vec_dup (nm->nodes); + uword i; + + vec_sort (nodes, n1, n2, + vec_cmp (n1[0]->name, n2[0]->name)); + + for (i = 0; i < vec_len (nodes); i++) + vlib_cli_output (vm, "%U\n\n", format_vlib_node_graph, nm, nodes[i]); + + vec_free (nodes); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_node_graph_command, static) = { + .path = "show vlib graph", + .short_help = "Show packet processing node graph", + .function = show_node_graph, +}; + +static u8 * format_vlib_node_stats (u8 * s, va_list * va) +{ + vlib_main_t * vm = va_arg (*va, vlib_main_t *); + vlib_node_t * n = va_arg (*va, vlib_node_t *); + int max = va_arg (*va, int); + f64 v; + char * state; + u8 * ns; + u8 * misc_info = 0; + u64 c, p, l, d; + f64 x; + f64 maxc, maxcn; + u32 maxn; + uword indent; + + if (! n) + { + if (max) + return format (s, + "%=30s%=17s%=16s%=16s%=16s%=16s", + "Name", "Max Node Clocks", "Vectors at Max", "Max Clocks", "Avg Clocks", "Avg Vectors/Call"); + else + return format (s, + "%=30s%=12s%=16s%=16s%=16s%=16s%=16s", + "Name", "State", "Calls", "Vectors", "Suspends", "Clocks", "Vectors/Call"); + } + + indent = format_get_indent (s); + + l = n->stats_total.clocks - n->stats_last_clear.clocks; + c = n->stats_total.calls - n->stats_last_clear.calls; + p = n->stats_total.vectors - n->stats_last_clear.vectors; + d = n->stats_total.suspends - n->stats_last_clear.suspends; + maxc = (f64)n->stats_total.max_clock; + maxn = n->stats_total.max_clock_n; + if (n->stats_total.max_clock_n) + maxcn = (f64)n->stats_total.max_clock / (f64)maxn; + else + maxcn = 0.0; + + /* Clocks per packet, per call or per suspend. */ + x = 0; + if (p > 0) + x = (f64) l / (f64) p; + else if (c > 0) + x = (f64) l / (f64) c; + else if (d > 0) + x = (f64) l / (f64) d; + + if (c > 0) + v = (double)p / (double)c; + else + v = 0; + + state = "active"; + if (n->type == VLIB_NODE_TYPE_PROCESS) + { + vlib_process_t * p = vlib_get_process_from_node (vm, n); + + /* Show processes with events pending. This helps spot bugs where events are not + being handled. */ + if (! clib_bitmap_is_zero (p->non_empty_event_type_bitmap)) + misc_info = format (misc_info, "events pending, "); + + switch (p->flags & (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK + | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT)) + { + default: + if (! (p->flags & VLIB_PROCESS_IS_RUNNING)) + state = "done"; + break; + + case VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK: + state = "time wait"; + break; + + case VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT: + state = "event wait"; + break; + + case (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT + | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK): + state = "any wait"; + break; + } + } + else if (n->type != VLIB_NODE_TYPE_INTERNAL) + { + state = "polling"; + if (n->state == VLIB_NODE_STATE_DISABLED) + state = "disabled"; + else if (n->state == VLIB_NODE_STATE_INTERRUPT) + state = "interrupt wait"; + } + + ns = n->name; + + if (max) + s = format (s, "%-30v%=17.2e%=16d%=16.2e%=16.2e%=16.2e", + ns, maxc, maxn, maxcn, x, v); + else + s = format (s, "%-30v%=12s%16Ld%16Ld%16Ld%16.2e%16.2f", ns, state, + c, p, d, x, v); + + if (ns != n->name) + vec_free (ns); + + if (misc_info) + { + s = format (s, "\n%U%v", format_white_space, indent + 4, misc_info); + vec_free (misc_info); + } + + return s; +} + +static clib_error_t * +show_node_runtime (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * n; + f64 time_now; + u32 node_index; + vlib_node_t *** node_dups = 0; + f64 * vectors_per_main_loop = 0; + f64 * last_vector_length_per_node = 0; + + time_now = vlib_time_now (vm); + + if (unformat (input, "%U", unformat_vlib_node, vm, &node_index)) + { + n = vlib_get_node (vm, node_index); + vlib_node_sync_stats (vm, n); + vlib_cli_output (vm, "%U\n", format_vlib_node_stats, vm, 0, 0); + vlib_cli_output (vm, "%U\n", format_vlib_node_stats, vm, n, 0); + } + else + { + vlib_node_t ** nodes; + uword i, j; + f64 dt; + u64 n_input, n_output, n_drop, n_punt; + u64 n_internal_vectors, n_internal_calls; + u64 n_clocks, l, v, c, d; + int brief = 1; + int max = 0; + vlib_main_t ** stat_vms = 0, *stat_vm; + + /* Suppress nodes with zero calls since last clear */ + if (unformat (input, "brief") || unformat (input, "b")) + brief = 1; + if (unformat (input, "verbose") || unformat(input, "v")) + brief = 0; + if (unformat (input, "max") || unformat(input, "m")) + max = 1; + + if (vec_len(vlib_mains) == 0) + vec_add1 (stat_vms, vm); + else + { + for (i = 0; i < vec_len (vlib_mains); i++) + { + stat_vm = vlib_mains[i]; + if (stat_vm) + vec_add1 (stat_vms, stat_vm); + } + } + + /* + * Barrier sync across stats scraping. + * Otherwise, the counts will be grossly inaccurate. + */ + vlib_worker_thread_barrier_sync(vm); + + for (j = 0; j < vec_len (stat_vms); j++) + { + stat_vm = stat_vms[j]; + nm = &stat_vm->node_main; + + for (i = 0; i < vec_len (nm->nodes); i++) + { + n = nm->nodes[i]; + vlib_node_sync_stats (stat_vm, n); + } + + nodes = vec_dup (nm->nodes); + + vec_add1(node_dups, nodes); + vec_add1 (vectors_per_main_loop, + vlib_last_vectors_per_main_loop_as_f64 (stat_vm)); + vec_add1 (last_vector_length_per_node, + vlib_last_vector_length_per_node (stat_vm)); + } + vlib_worker_thread_barrier_release(vm); + + + for (j = 0; j < vec_len (stat_vms); j++) + { + stat_vm = stat_vms[j]; + nodes = node_dups[j]; + + vec_sort (nodes, n1, n2, + vec_cmp (n1[0]->name, n2[0]->name)); + + n_input = n_output = n_drop = n_punt = n_clocks = 0; + n_internal_vectors = n_internal_calls = 0; + for (i = 0; i < vec_len (nodes); i++) + { + n = nodes[i]; + + l = n->stats_total.clocks - n->stats_last_clear.clocks; + n_clocks += l; + + v = n->stats_total.vectors - n->stats_last_clear.vectors; + c = n->stats_total.calls - n->stats_last_clear.calls; + + switch (n->type) + { + default: + continue; + + case VLIB_NODE_TYPE_INTERNAL: + n_output += (n->flags & VLIB_NODE_FLAG_IS_OUTPUT) ? v : 0; + n_drop += (n->flags & VLIB_NODE_FLAG_IS_DROP) ? v : 0; + n_punt += (n->flags & VLIB_NODE_FLAG_IS_PUNT) ? v : 0; + if (! (n->flags & VLIB_NODE_FLAG_IS_OUTPUT)) + { + n_internal_vectors += v; + n_internal_calls += c; + } + if (n->flags & VLIB_NODE_FLAG_IS_HANDOFF) + n_input += v; + break; + + case VLIB_NODE_TYPE_INPUT: + n_input += v; + break; + } + } + + if (vec_len (vlib_mains)) + { + vlib_worker_thread_t *w = vlib_worker_threads + j; + if (j > 0) + vlib_cli_output (vm, "---------------"); + + if ( w->dpdk_lcore_id > -1) + vlib_cli_output (vm, "Thread %d %v (lcore %u)", j, w->name, + w->dpdk_lcore_id); + else + vlib_cli_output (vm, "Thread %d %v", j, + w->name); + } + + dt = time_now - nm->time_last_runtime_stats_clear; + vlib_cli_output + (vm, + "Time %.1f, average vectors/node %.2f, last %d main loops %.2f per node %.2f" + "\n vector rates in %.4e, out %.4e, drop %.4e, punt %.4e", + dt, + (n_internal_calls > 0 + ? (f64) n_internal_vectors / (f64) n_internal_calls + : 0), + 1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE, + vectors_per_main_loop [j], + last_vector_length_per_node [j], + (f64) n_input / dt, + (f64) n_output / dt, + (f64) n_drop / dt, + (f64) n_punt / dt); + + vlib_cli_output (vm, "%U", format_vlib_node_stats, stat_vm, 0, max); + for (i = 0; i < vec_len (nodes); i++) + { + c = nodes[i]->stats_total.calls - nodes[i]->stats_last_clear.calls; + d = nodes[i]->stats_total.suspends - nodes[i]->stats_last_clear.suspends; + if (c || d || ! brief) + { + vlib_cli_output (vm, "%U", format_vlib_node_stats, stat_vm, + nodes[i], max); + } + } + vec_free (nodes); + } + vec_free (stat_vms); + vec_free (node_dups); + vec_free (vectors_per_main_loop); + vec_free (last_vector_length_per_node); + } + + return 0; +} + +VLIB_CLI_COMMAND (show_node_runtime_command, static) = { + .path = "show runtime", + .short_help = "Show packet processing runtime", + .function = show_node_runtime, + .is_mp_safe = 1, +}; + +static clib_error_t * +clear_node_runtime (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_node_main_t * nm; + vlib_node_t * n; + int i, j; + vlib_main_t ** stat_vms = 0, *stat_vm; + vlib_node_runtime_t * r; + + if (vec_len(vlib_mains) == 0) + vec_add1 (stat_vms, vm); + else + { + for (i = 0; i < vec_len (vlib_mains); i++) + { + stat_vm = vlib_mains[i]; + if (stat_vm) + vec_add1 (stat_vms, stat_vm); + } + } + + vlib_worker_thread_barrier_sync(vm); + + for (j = 0; j < vec_len (stat_vms); j++) + { + stat_vm = stat_vms[j]; + nm = &stat_vm->node_main; + + for (i = 0; i < vec_len (nm->nodes); i++) + { + n = nm->nodes[i]; + vlib_node_sync_stats (stat_vm, n); + n->stats_last_clear = n->stats_total; + + r = vlib_node_get_runtime (stat_vm, n->index); + r->max_clock = 0; + } + /* Note: input/output rates computed using vlib_global_main */ + nm->time_last_runtime_stats_clear = vlib_time_now (vm); + } + + vlib_worker_thread_barrier_release(vm); + + vec_free (stat_vms); + + return 0; +} + +VLIB_CLI_COMMAND (clear_node_runtime_command, static) = { + .path = "clear runtime", + .short_help = "Clear packet processing runtime statistics", + .function = clear_node_runtime, +}; + +/* Dummy function to get us linked in. */ +void vlib_node_cli_reference (void) {} diff --git a/vlib/vlib/node_format.c b/vlib/vlib/node_format.c new file mode 100644 index 00000000000..d1d415e1376 --- /dev/null +++ b/vlib/vlib/node_format.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * node_format.c: node formatting + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> + +u8 * format_vlib_node_graph (u8 * s, va_list * va) +{ + vlib_node_main_t * nm = va_arg (*va, vlib_node_main_t *); + vlib_node_t * n = va_arg (*va, vlib_node_t *); + int i, j; + uword indent; + typedef struct { + u32 next_node; + u32 next_slot; + u32 prev_node; + } tmp_t; + tmp_t * tmps = 0; + tmp_t empty = { .next_node = ~0, .prev_node = ~0 }; + + if (! n) + return format (s, + "%=26s%=26s%=26s", + "Name", "Next", "Previous"); + + s = format (s, "%-26v", n->name); + + indent = format_get_indent (s); + + for (i = j = 0; i < vec_len (n->next_nodes); i++) + { + if (n->next_nodes[i] == VLIB_INVALID_NODE_INDEX) + continue; + vec_validate_init_empty (tmps, j, empty); + tmps[j].next_node = n->next_nodes[i]; + tmps[j].next_slot = i; + j++; + } + + j = 0; + clib_bitmap_foreach (i, n->prev_node_bitmap, ({ + vec_validate_init_empty (tmps, j, empty); + tmps[j].prev_node = i; + j++; + })); + + for (i = 0; i < vec_len (tmps); i++) + { + if (i > 0) + s = format (s, "\n%U", format_white_space, indent); + + if (tmps[i].next_node != ~0) + { + vlib_node_t * x; + u8 * t = 0; + + x = vec_elt (nm->nodes, tmps[i].next_node); + t = format (t, "%v [%d]", x->name, tmps[i].next_slot); + s = format (s, "%=26v", t); + vec_free (t); + } + else + s = format (s, "%26s", ""); + + if (tmps[i].prev_node != ~0) + { + vlib_node_t * x; + x = vec_elt (nm->nodes, tmps[i].prev_node); + s = format (s, "%=26v", x->name); + } + } + + vec_free (tmps); + + return s; +} + +u8 * format_vlib_node_and_next (u8 * s, va_list * va) +{ + vlib_main_t * vm = va_arg (*va, vlib_main_t *); + vlib_node_t * n = va_arg (*va, vlib_node_t *); + u32 next_index = va_arg (*va, u32); + vlib_node_t * n_next; + u32 * ni; + + ni = vec_elt_at_index (n->next_nodes, next_index); + n_next = vlib_get_node (vm, ni[0]); + return format (s, "%v -> %v", n->name, n_next->name); +} + +u8 * format_vlib_node_name (u8 * s, va_list * va) +{ + vlib_main_t * vm = va_arg (*va, vlib_main_t *); + u32 node_index = va_arg (*va, u32); + vlib_node_t * n = vlib_get_node (vm, node_index); + + return format (s, "%v", n->name); +} + +u8 * format_vlib_next_node_name (u8 * s, va_list * va) +{ + vlib_main_t * vm = va_arg (*va, vlib_main_t *); + u32 node_index = va_arg (*va, u32); + u32 next_index = va_arg (*va, u32); + vlib_node_t * next = vlib_get_next_node (vm, node_index, next_index); + return format (s, "%v", next->name); +} + +/* Parse node name -> node index. */ +uword unformat_vlib_node (unformat_input_t * input, va_list * args) +{ + vlib_main_t * vm = va_arg (*args, vlib_main_t *); + u32 * result = va_arg (*args, u32 *); + + return unformat_user (input, unformat_hash_vec_string, + vm->node_main.node_by_name, result); +} + +u8 * format_vlib_time (u8 * s, va_list * va) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *); + f64 time = va_arg (*va, f64); + return format (s, "%12.4f", time); +} + +u8 * format_vlib_cpu_time (u8 * s, va_list * va) +{ + vlib_main_t * vm = va_arg (*va, vlib_main_t *); + u64 cpu_time = va_arg (*va, u64); + f64 dt; + + dt = (cpu_time - vm->clib_time.init_cpu_time) * vm->clib_time.seconds_per_clock; + return format (s, "%U", format_vlib_time, vm, dt); +} diff --git a/vlib/vlib/node_funcs.h b/vlib/vlib/node_funcs.h new file mode 100644 index 00000000000..80dc3c602a1 --- /dev/null +++ b/vlib/vlib/node_funcs.h @@ -0,0 +1,979 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * node_funcs.h: processing nodes global functions/inlines + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_node_funcs_h +#define included_vlib_node_funcs_h + +#include <vppinfra/fifo.h> + +always_inline vlib_node_t * +vlib_get_node (vlib_main_t * vm, u32 i) +{ return vec_elt (vm->node_main.nodes, i); } + +always_inline vlib_node_t * +vlib_get_next_node (vlib_main_t * vm, u32 node_index, u32 next_index) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * n; + + n = vec_elt (nm->nodes, node_index); + ASSERT (next_index < vec_len (n->next_nodes)); + return vlib_get_node (vm, n->next_nodes[next_index]); +} + +always_inline vlib_node_runtime_t * +vlib_node_get_runtime (vlib_main_t * vm, u32 node_index) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * n = vec_elt (nm->nodes, node_index); + vlib_process_t * p; + if (n->type != VLIB_NODE_TYPE_PROCESS) + return vec_elt_at_index (nm->nodes_by_type[n->type], n->runtime_index); + else + { + p = vec_elt (nm->processes, n->runtime_index); + return &p->node_runtime; + } +} + +always_inline void * +vlib_node_get_runtime_data (vlib_main_t * vm, u32 node_index) +{ + vlib_node_runtime_t * r = vlib_node_get_runtime (vm, node_index); + return r->runtime_data; +} + +always_inline void +vlib_node_set_runtime_data (vlib_main_t * vm, u32 node_index, + void * runtime_data, + u32 n_runtime_data_bytes) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + vlib_node_runtime_t * r = vlib_node_get_runtime (vm, node_index); + + n->runtime_data_bytes = n_runtime_data_bytes; + vec_free (n->runtime_data); + vec_add (n->runtime_data, runtime_data, n_runtime_data_bytes); + + ASSERT (vec_len (n->runtime_data) <= sizeof (r->runtime_data)); + if (vec_len (n->runtime_data) > 0) + memcpy (r->runtime_data, n->runtime_data, vec_len (n->runtime_data)); +} + +always_inline void +vlib_node_set_state (vlib_main_t * vm, u32 node_index, vlib_node_state_t new_state) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * n; + vlib_node_runtime_t * r; + + n = vec_elt (nm->nodes, node_index); + if (n->type == VLIB_NODE_TYPE_PROCESS) + { + vlib_process_t * p = vec_elt (nm->processes, n->runtime_index); + r = &p->node_runtime; + + /* When disabling make sure flags are cleared. */ + p->flags &= ~(VLIB_PROCESS_RESUME_PENDING + | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK + | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT); + } + else + r = vec_elt_at_index (nm->nodes_by_type[n->type], n->runtime_index); + + ASSERT (new_state < VLIB_N_NODE_STATE); + + if (n->type == VLIB_NODE_TYPE_INPUT) + { + ASSERT (nm->input_node_counts_by_state[n->state] > 0); + nm->input_node_counts_by_state[n->state] -= 1; + nm->input_node_counts_by_state[new_state] += 1; + } + + n->state = new_state; + r->state = new_state; +} + +always_inline void +vlib_node_set_interrupt_pending (vlib_main_t * vm, u32 node_index) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * n = vec_elt (nm->nodes, node_index); + ASSERT (n->type == VLIB_NODE_TYPE_INPUT); + vec_add1 (nm->pending_interrupt_node_runtime_indices, n->runtime_index); +} + +always_inline vlib_process_t * +vlib_get_process_from_node (vlib_main_t * vm, vlib_node_t * node) +{ + vlib_node_main_t * nm = &vm->node_main; + ASSERT (node->type == VLIB_NODE_TYPE_PROCESS); + return vec_elt (nm->processes, node->runtime_index); +} + +/* Fetches frame with given handle. */ +always_inline vlib_frame_t * +vlib_get_frame_no_check (vlib_main_t * vm, uword frame_index) +{ + vlib_frame_t * f; + u32 cpu_index = frame_index & VLIB_CPU_MASK; + u32 offset = frame_index & VLIB_OFFSET_MASK; + vm = vlib_mains ? vlib_mains[cpu_index] : vm; + f = vm->heap_base + offset; + return f; +} + +always_inline u32 +vlib_frame_index_no_check (vlib_main_t * vm, vlib_frame_t * f) +{ + u32 i; + + ASSERT (((uword)f & VLIB_CPU_MASK)==0); + + vm = vlib_mains ? vlib_mains[f->cpu_index] : vm; + + i = ((u8 *) f - (u8 *) vm->heap_base); + return i | f->cpu_index; +} + +always_inline vlib_frame_t * +vlib_get_frame (vlib_main_t * vm, uword frame_index) +{ + vlib_frame_t * f = vlib_get_frame_no_check (vm, frame_index); + ASSERT (f->flags & VLIB_FRAME_IS_ALLOCATED); + return f; +} + +always_inline u32 +vlib_frame_index (vlib_main_t * vm, vlib_frame_t * f) +{ + uword i = vlib_frame_index_no_check (vm, f); + ASSERT (vlib_get_frame (vm, i) == f); + return i; +} + +/* Byte alignment for vector arguments. */ +#define VLIB_FRAME_VECTOR_ALIGN (1 << 4) + +always_inline u32 +vlib_frame_vector_byte_offset (u32 scalar_size) +{ + return round_pow2 (sizeof (vlib_frame_t) + scalar_size, + VLIB_FRAME_VECTOR_ALIGN); +} + +always_inline void * +vlib_frame_vector_args (vlib_frame_t * f) +{ + return (void *) f + vlib_frame_vector_byte_offset (f->scalar_size); +} + +/* Scalar data lies before aligned vector data. */ +always_inline void * +vlib_frame_args (vlib_frame_t * f) +{ return vlib_frame_vector_args (f) - f->scalar_size; } + +always_inline vlib_next_frame_t * +vlib_node_runtime_get_next_frame (vlib_main_t * vm, + vlib_node_runtime_t * n, + u32 next_index) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_next_frame_t * nf; + + ASSERT (next_index < n->n_next_nodes); + nf = vec_elt_at_index (nm->next_frames, + n->next_frame_index + next_index); + + if (CLIB_DEBUG > 0) + { + vlib_node_t * node, * next; + node = vec_elt (nm->nodes, n->node_index); + next = vec_elt (nm->nodes, node->next_nodes[next_index]); + ASSERT (nf->node_runtime_index == next->runtime_index); + } + + return nf; +} + +always_inline vlib_next_frame_t * +vlib_node_get_next_frame (vlib_main_t * vm, + u32 node_index, + u32 next_index) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * n; + vlib_node_runtime_t * r; + + n = vec_elt (nm->nodes, node_index); + r = vec_elt_at_index (nm->nodes_by_type[n->type], n->runtime_index); + return vlib_node_runtime_get_next_frame (vm, r, next_index); +} + +vlib_frame_t * +vlib_get_next_frame_internal (vlib_main_t * vm, + vlib_node_runtime_t * node, + u32 next_index, + u32 alloc_new_frame); + +#define vlib_get_next_frame_macro(vm,node,next_index,vectors,n_vectors_left,alloc_new_frame) \ +do { \ + vlib_frame_t * _f \ + = vlib_get_next_frame_internal ((vm), (node), (next_index), \ + (alloc_new_frame)); \ + u32 _n = _f->n_vectors; \ + (vectors) = vlib_frame_vector_args (_f) + _n * sizeof ((vectors)[0]); \ + (n_vectors_left) = VLIB_FRAME_SIZE - _n; \ +} while (0) + +#define vlib_get_next_frame(vm,node,next_index,vectors,n_vectors_left) \ + vlib_get_next_frame_macro (vm, node, next_index, \ + vectors, n_vectors_left, \ + /* alloc new frame */ 0) + +#define vlib_get_new_next_frame(vm,node,next_index,vectors,n_vectors_left) \ + vlib_get_next_frame_macro (vm, node, next_index, \ + vectors, n_vectors_left, \ + /* alloc new frame */ 1) + +void +vlib_put_next_frame (vlib_main_t * vm, + vlib_node_runtime_t * r, + u32 next_index, + u32 n_packets_left); + +/* Combination get plus put. Returns vector argument just added. */ +#define vlib_set_next_frame(vm,node,next_index,v) \ +({ \ + uword _n_left; \ + vlib_get_next_frame ((vm), (node), (next_index), (v), _n_left); \ + ASSERT (_n_left > 0); \ + vlib_put_next_frame ((vm), (node), (next_index), _n_left - 1); \ + (v); \ +}) + +always_inline void +vlib_set_next_frame_buffer (vlib_main_t * vm, + vlib_node_runtime_t * node, + u32 next_index, + u32 buffer_index) +{ + u32 * p; + p = vlib_set_next_frame (vm, node, next_index, p); + p[0] = buffer_index; +} + +vlib_frame_t * vlib_get_frame_to_node (vlib_main_t * vm, u32 to_node_index); +void vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_t * f); + +always_inline vlib_process_t * +vlib_get_current_process (vlib_main_t * vm) +{ + vlib_node_main_t * nm = &vm->node_main; + return vec_elt (nm->processes, nm->current_process_index); +} + +always_inline uword +vlib_in_process_context (vlib_main_t * vm) +{ return vm->node_main.current_process_index != ~0; } + +always_inline uword +vlib_current_process (vlib_main_t * vm) +{ return vlib_get_current_process (vm)->node_runtime.node_index; } + +/* Anything less than 1e-6 is considered zero. */ +always_inline uword +vlib_process_suspend_time_is_zero (f64 dt) +{ return dt < 1e-6; } + +always_inline uword +vlib_process_suspend (vlib_main_t * vm, f64 dt) +{ + uword r; + vlib_node_main_t * nm = &vm->node_main; + vlib_process_t * p = vec_elt (nm->processes, nm->current_process_index); + u64 dt_cpu = dt * vm->clib_time.clocks_per_second; + + if (vlib_process_suspend_time_is_zero (dt)) + return VLIB_PROCESS_RESUME_LONGJMP_RESUME; + + p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK; + r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND); + if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND) + { + p->resume_cpu_time = clib_cpu_time_now () + dt_cpu; + clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND); + } + + return r; +} + +always_inline void +vlib_process_free_event_type (vlib_process_t * p, uword t, uword is_one_time_event) +{ + ASSERT (! pool_is_free_index (p->event_type_pool, t)); + pool_put_index (p->event_type_pool, t); + if (is_one_time_event) + p->one_time_event_type_bitmap = + clib_bitmap_andnoti (p->one_time_event_type_bitmap, t); +} + +always_inline void +vlib_process_maybe_free_event_type (vlib_process_t * p, uword t) +{ + ASSERT (! pool_is_free_index (p->event_type_pool, t)); + if (clib_bitmap_get (p->one_time_event_type_bitmap, t)) + vlib_process_free_event_type (p, t, /* is_one_time_event */ 1); +} + +always_inline void * +vlib_process_get_event_data (vlib_main_t * vm, uword * return_event_type_opaque) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_process_t * p; + vlib_process_event_type_t * et; + uword t, l; + void * event_data_vector; + + p = vec_elt (nm->processes, nm->current_process_index); + + /* Find first type with events ready. + Return invalid type when there's nothing there. */ + t = clib_bitmap_first_set (p->non_empty_event_type_bitmap); + if (t == ~0) + return 0; + + p->non_empty_event_type_bitmap = clib_bitmap_andnoti (p->non_empty_event_type_bitmap, t); + + l = _vec_len (p->pending_event_data_by_type_index[t]); + ASSERT (l > 0); + event_data_vector = p->pending_event_data_by_type_index[t]; + p->pending_event_data_by_type_index[t] = 0; + + et = pool_elt_at_index (p->event_type_pool, t); + + /* Return user's opaque value and possibly index. */ + *return_event_type_opaque = et->opaque; + + vlib_process_maybe_free_event_type (p, t); + + return event_data_vector; +} + +/* Return event data vector for later reuse. We reuse event data to avoid + repeatedly allocating event vectors in cases where we care about speed. */ +always_inline void +vlib_process_put_event_data (vlib_main_t * vm, void * event_data) +{ + vlib_node_main_t * nm = &vm->node_main; + vec_add1 (nm->recycled_event_data_vectors, event_data); +} + +/* Return type & add any events to data vector. */ +always_inline uword +vlib_process_get_events (vlib_main_t * vm, uword ** data_vector) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_process_t * p; + vlib_process_event_type_t * et; + uword r, t, l; + + p = vec_elt (nm->processes, nm->current_process_index); + + /* Find first type with events ready. + Return invalid type when there's nothing there. */ + t = clib_bitmap_first_set (p->non_empty_event_type_bitmap); + if (t == ~0) + return t; + + p->non_empty_event_type_bitmap = clib_bitmap_andnoti (p->non_empty_event_type_bitmap, t); + + l = _vec_len (p->pending_event_data_by_type_index[t]); + if (data_vector) + vec_add (*data_vector, p->pending_event_data_by_type_index[t], l); + _vec_len (p->pending_event_data_by_type_index[t]) = 0; + + et = pool_elt_at_index (p->event_type_pool, t); + + /* Return user's opaque value. */ + r = et->opaque; + + vlib_process_maybe_free_event_type (p, t); + + return r; +} + +always_inline uword +vlib_process_get_events_helper (vlib_process_t * p, uword t, uword ** data_vector) +{ + uword l; + + p->non_empty_event_type_bitmap = clib_bitmap_andnoti (p->non_empty_event_type_bitmap, t); + + l = _vec_len (p->pending_event_data_by_type_index[t]); + if (data_vector) + vec_add (*data_vector, p->pending_event_data_by_type_index[t], l); + _vec_len (p->pending_event_data_by_type_index[t]) = 0; + + vlib_process_maybe_free_event_type (p, t); + + return l; +} + +/* As above but query as specified type of event. Returns number of + events found. */ +always_inline uword +vlib_process_get_events_with_type (vlib_main_t * vm, uword ** data_vector, + uword with_type_opaque) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_process_t * p; + uword t, * h; + + p = vec_elt (nm->processes, nm->current_process_index); + h = hash_get (p->event_type_index_by_type_opaque, with_type_opaque); + if (! h) + /* This can happen when an event has not yet been + signaled with given opaque type. */ + return 0; + + t = h[0]; + if (! clib_bitmap_get (p->non_empty_event_type_bitmap, t)) + return 0; + + return vlib_process_get_events_helper (p, t, data_vector); +} + +always_inline uword * +vlib_process_wait_for_event (vlib_main_t * vm) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_process_t * p; + uword r; + + p = vec_elt (nm->processes, nm->current_process_index); + if (clib_bitmap_is_zero (p->non_empty_event_type_bitmap)) + { + p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT; + r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND); + if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND) + clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND); + } + + return p->non_empty_event_type_bitmap; +} + +always_inline uword +vlib_process_wait_for_one_time_event (vlib_main_t * vm, + uword ** data_vector, + uword with_type_index) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_process_t * p; + uword r; + + p = vec_elt (nm->processes, nm->current_process_index); + ASSERT (! pool_is_free_index (p->event_type_pool, with_type_index)); + while (! clib_bitmap_get (p->non_empty_event_type_bitmap, with_type_index)) + { + p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT; + r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND); + if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND) + clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND); + } + + return vlib_process_get_events_helper (p, with_type_index, data_vector); +} + +always_inline uword +vlib_process_wait_for_event_with_type (vlib_main_t * vm, + uword ** data_vector, + uword with_type_opaque) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_process_t * p; + uword r, * h; + + p = vec_elt (nm->processes, nm->current_process_index); + h = hash_get (p->event_type_index_by_type_opaque, with_type_opaque); + while (! h || ! clib_bitmap_get (p->non_empty_event_type_bitmap, h[0])) + { + p->flags |= VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT; + r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND); + if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND) + clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND); + + /* See if unknown event type has been signaled now. */ + if (! h) + h = hash_get (p->event_type_index_by_type_opaque, with_type_opaque); + } + + return vlib_process_get_events_helper (p, h[0], data_vector); +} + +always_inline f64 +vlib_process_wait_for_event_or_clock (vlib_main_t * vm, f64 dt) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_process_t * p; + f64 wakeup_time; + uword r; + + p = vec_elt (nm->processes, nm->current_process_index); + + if (vlib_process_suspend_time_is_zero (dt) + || ! clib_bitmap_is_zero (p->non_empty_event_type_bitmap)) + return dt; + + wakeup_time = vlib_time_now (vm) + dt; + + /* Suspend waiting for both clock and event to occur. */ + p->flags |= (VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT + | VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK); + + r = clib_setjmp (&p->resume_longjmp, VLIB_PROCESS_RESUME_LONGJMP_SUSPEND); + if (r == VLIB_PROCESS_RESUME_LONGJMP_SUSPEND) + { + p->resume_cpu_time = (clib_cpu_time_now () + + (dt * vm->clib_time.clocks_per_second)); + clib_longjmp (&p->return_longjmp, VLIB_PROCESS_RETURN_LONGJMP_SUSPEND); + } + + /* Return amount of time still left to sleep. + If <= 0 then we've been waken up by the clock (and not an event). */ + return wakeup_time - vlib_time_now (vm); +} + +always_inline vlib_process_event_type_t * +vlib_process_new_event_type (vlib_process_t * p, uword with_type_opaque) +{ + vlib_process_event_type_t * et; + pool_get (p->event_type_pool, et); + et->opaque = with_type_opaque; + return et; +} + +always_inline uword +vlib_process_create_one_time_event (vlib_main_t * vm, uword node_index, uword with_type_opaque) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * n = vlib_get_node (vm, node_index); + vlib_process_t * p = vec_elt (nm->processes, n->runtime_index); + vlib_process_event_type_t * et; + uword t; + + et = vlib_process_new_event_type (p, with_type_opaque); + t = et - p->event_type_pool; + p->one_time_event_type_bitmap = clib_bitmap_ori (p->one_time_event_type_bitmap, t); + return t; +} + +always_inline void +vlib_process_delete_one_time_event (vlib_main_t * vm, uword node_index, uword t) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * n = vlib_get_node (vm, node_index); + vlib_process_t * p = vec_elt (nm->processes, n->runtime_index); + + ASSERT (clib_bitmap_get (p->one_time_event_type_bitmap, t)); + vlib_process_free_event_type (p, t, /* is_one_time_event */ 1); +} + +always_inline void * +vlib_process_signal_event_helper (vlib_node_main_t * nm, + vlib_node_t * n, + vlib_process_t * p, + uword t, + uword n_data_elts, + uword n_data_elt_bytes) +{ + uword p_flags, add_to_pending, delete_from_wheel; + void * data_to_be_written_by_caller; + + ASSERT (! pool_is_free_index (p->event_type_pool, t)); + + vec_validate (p->pending_event_data_by_type_index, t); + + /* Resize data vector and return caller's data to be written. */ + { + void * data_vec = p->pending_event_data_by_type_index[t]; + uword l; + + if (! data_vec && vec_len (nm->recycled_event_data_vectors)) + { + data_vec = vec_pop (nm->recycled_event_data_vectors); + _vec_len (data_vec) = 0; + } + + l = vec_len (data_vec); + + data_vec = _vec_resize (data_vec, + /* length_increment */ n_data_elts, + /* total size after increment */ (l + n_data_elts) * n_data_elt_bytes, + /* header_bytes */ 0, /* data_align */ 0); + + p->pending_event_data_by_type_index[t] = data_vec; + data_to_be_written_by_caller = data_vec + l * n_data_elt_bytes; + } + + p->non_empty_event_type_bitmap = clib_bitmap_ori (p->non_empty_event_type_bitmap, t); + + p_flags = p->flags; + + /* Event was already signalled? */ + add_to_pending = (p_flags & VLIB_PROCESS_RESUME_PENDING) == 0; + + /* Process will resume when suspend time elapses? */ + delete_from_wheel = 0; + if (p_flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_CLOCK) + { + /* Waiting for both event and clock? */ + if (p_flags & VLIB_PROCESS_IS_SUSPENDED_WAITING_FOR_EVENT) + delete_from_wheel = 1; + else + /* Waiting only for clock. Event will be queue and may be + handled when timer expires. */ + add_to_pending = 0; + } + + /* Never add current process to pending vector since current process is + already running. */ + add_to_pending &= nm->current_process_index != n->runtime_index; + + if (add_to_pending) + { + u32 x = vlib_timing_wheel_data_set_suspended_process (n->runtime_index); + p->flags = p_flags | VLIB_PROCESS_RESUME_PENDING; + vec_add1 (nm->data_from_advancing_timing_wheel, x); + if (delete_from_wheel) + timing_wheel_delete (&nm->timing_wheel, x); + } + + return data_to_be_written_by_caller; +} + +always_inline void * +vlib_process_signal_event_data (vlib_main_t * vm, + uword node_index, + uword type_opaque, + uword n_data_elts, + uword n_data_elt_bytes) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * n = vlib_get_node (vm, node_index); + vlib_process_t * p = vec_elt (nm->processes, n->runtime_index); + uword * h, t; + + h = hash_get (p->event_type_index_by_type_opaque, type_opaque); + if (! h) + { + vlib_process_event_type_t * et = vlib_process_new_event_type (p, type_opaque); + t = et - p->event_type_pool; + hash_set (p->event_type_index_by_type_opaque, type_opaque, t); + } + else + t = h[0]; + + return vlib_process_signal_event_helper (nm, n, p, t, n_data_elts, n_data_elt_bytes); +} + +always_inline void * +vlib_process_signal_event_at_time (vlib_main_t * vm, + f64 dt, + uword node_index, + uword type_opaque, + uword n_data_elts, + uword n_data_elt_bytes) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * n = vlib_get_node (vm, node_index); + vlib_process_t * p = vec_elt (nm->processes, n->runtime_index); + uword * h, t; + + h = hash_get (p->event_type_index_by_type_opaque, type_opaque); + if (! h) + { + vlib_process_event_type_t * et = vlib_process_new_event_type (p, type_opaque); + t = et - p->event_type_pool; + hash_set (p->event_type_index_by_type_opaque, type_opaque, t); + } + else + t = h[0]; + + if (vlib_process_suspend_time_is_zero (dt)) + return vlib_process_signal_event_helper (nm, n, p, t, n_data_elts, n_data_elt_bytes); + else + { + vlib_signal_timed_event_data_t * te; + u64 dt_cpu = dt * vm->clib_time.clocks_per_second; + + pool_get_aligned (nm->signal_timed_event_data_pool, te, sizeof (te[0])); + + te->n_data_elts = n_data_elts; + te->n_data_elt_bytes = n_data_elt_bytes; + te->n_data_bytes = n_data_elts * n_data_elt_bytes; + + /* Assert that structure fields are big enough. */ + ASSERT (te->n_data_elts == n_data_elts); + ASSERT (te->n_data_elt_bytes == n_data_elt_bytes); + ASSERT (te->n_data_bytes == n_data_elts * n_data_elt_bytes); + + te->process_node_index = n->runtime_index; + te->event_type_index = t; + + timing_wheel_insert (&nm->timing_wheel, clib_cpu_time_now () + dt_cpu, + vlib_timing_wheel_data_set_timed_event (te - nm->signal_timed_event_data_pool)); + + /* Inline data big enough to hold event? */ + if (te->n_data_bytes < sizeof (te->inline_event_data)) + return te->inline_event_data; + else + { + te->event_data_as_vector = 0; + vec_resize (te->event_data_as_vector, te->n_data_bytes); + return te->event_data_as_vector; + } + } +} + +always_inline void * +vlib_process_signal_one_time_event_data (vlib_main_t * vm, + uword node_index, + uword type_index, + uword n_data_elts, + uword n_data_elt_bytes) +{ + vlib_node_main_t * nm = &vm->node_main; + vlib_node_t * n = vlib_get_node (vm, node_index); + vlib_process_t * p = vec_elt (nm->processes, n->runtime_index); + return vlib_process_signal_event_helper (nm, n, p, type_index, n_data_elts, n_data_elt_bytes); +} + +always_inline void +vlib_process_signal_event (vlib_main_t * vm, + uword node_index, + uword type_opaque, + uword data) +{ + uword * d = vlib_process_signal_event_data (vm, node_index, type_opaque, + 1 /* elts */, sizeof (uword)); + d[0] = data; +} + +always_inline void +vlib_process_signal_event_pointer (vlib_main_t * vm, + uword node_index, + uword type_opaque, + void * data) +{ + void ** d = vlib_process_signal_event_data (vm, node_index, type_opaque, + 1 /* elts */, sizeof (data)); + d[0] = data; +} + +always_inline void +vlib_process_signal_one_time_event (vlib_main_t * vm, + uword node_index, + uword type_index, + uword data) +{ + uword * d = vlib_process_signal_one_time_event_data (vm, node_index, type_index, + 1 /* elts */, sizeof (uword)); + d[0] = data; +} + +always_inline void +vlib_signal_one_time_waiting_process (vlib_main_t * vm, vlib_one_time_waiting_process_t * p) +{ + vlib_process_signal_one_time_event (vm, p->node_index, p->one_time_event, /* data */ ~0); + memset (p, ~0, sizeof (p[0])); +} + +always_inline void +vlib_signal_one_time_waiting_process_vector (vlib_main_t * vm, + vlib_one_time_waiting_process_t ** wps) +{ + vlib_one_time_waiting_process_t * wp; + vec_foreach (wp, *wps) + vlib_signal_one_time_waiting_process (vm, wp); + vec_free (*wps); +} + +always_inline void +vlib_current_process_wait_for_one_time_event (vlib_main_t * vm, vlib_one_time_waiting_process_t * p) +{ + p->node_index = vlib_current_process (vm); + p->one_time_event = + vlib_process_create_one_time_event (vm, p->node_index, /* type opaque */ ~0); + vlib_process_wait_for_one_time_event (vm, + /* don't care about data */ 0, + p->one_time_event); +} + +always_inline void +vlib_current_process_wait_for_one_time_event_vector (vlib_main_t * vm, + vlib_one_time_waiting_process_t ** wps) +{ + vlib_one_time_waiting_process_t * wp; + vec_add2 (*wps, wp, 1); + vlib_current_process_wait_for_one_time_event (vm, wp); +} + +always_inline u32 +vlib_node_runtime_update_main_loop_vector_stats (vlib_main_t * vm, + vlib_node_runtime_t * node, + uword n_vectors) +{ + u32 i, d, vi0, vi1; + u32 i0, i1; + + ASSERT (is_pow2 (ARRAY_LEN (node->main_loop_vector_stats))); + i = ((vm->main_loop_count >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE) + & (ARRAY_LEN (node->main_loop_vector_stats) - 1)); + i0 = i ^ 0; + i1 = i ^ 1; + d = ((vm->main_loop_count >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE) + - (node->main_loop_count_last_dispatch >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE)); + vi0 = node->main_loop_vector_stats[i0]; + vi1 = node->main_loop_vector_stats[i1]; + vi0 = d == 0 ? vi0 : 0; + vi1 = d <= 1 ? vi1 : 0; + vi0 += n_vectors; + node->main_loop_vector_stats[i0] = vi0; + node->main_loop_vector_stats[i1] = vi1; + node->main_loop_count_last_dispatch = vm->main_loop_count; + /* Return previous counter. */ + return node->main_loop_vector_stats[i1]; +} + +always_inline f64 +vlib_node_vectors_per_main_loop_as_float (vlib_main_t * vm, u32 node_index) +{ + vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, node_index); + u32 v; + + v = vlib_node_runtime_update_main_loop_vector_stats (vm, rt, /* n_vectors */ 0); + return (f64) v / (1 << VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE); +} + +always_inline u32 +vlib_node_vectors_per_main_loop_as_integer (vlib_main_t * vm, u32 node_index) +{ + vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, node_index); + u32 v; + + v = vlib_node_runtime_update_main_loop_vector_stats (vm, rt, /* n_vectors */ 0); + return v >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE; +} + +void +vlib_frame_free (vlib_main_t * vm, + vlib_node_runtime_t * r, + vlib_frame_t * f); + +/* Add next node to given node in given slot. */ +uword +vlib_node_add_next_with_slot (vlib_main_t * vm, + uword node, + uword next_node, + uword slot); + +/* As above but adds to end of node's next vector. */ +always_inline uword +vlib_node_add_next (vlib_main_t * vm, uword node, uword next_node) +{ return vlib_node_add_next_with_slot (vm, node, next_node, ~0); } + +/* Add next node to given node in given slot. */ +uword +vlib_node_add_named_next_with_slot (vlib_main_t * vm, + uword node, + char * next_name, + uword slot); + +/* As above but adds to end of node's next vector. */ +always_inline uword +vlib_node_add_named_next (vlib_main_t * vm, + uword node, + char * name) +{ return vlib_node_add_named_next_with_slot (vm, node, name, ~0); } + +/* Query node given name. */ +vlib_node_t * vlib_get_node_by_name (vlib_main_t * vm, u8 * name); + +/* Rename a node. */ +void vlib_node_rename (vlib_main_t * vm, u32 node_index, char * fmt, ...); + +/* Register new packet processing node. Nodes can be registered + dynamically via this call or statically via the VLIB_REGISTER_NODE + macro. */ +u32 vlib_register_node (vlib_main_t * vm, vlib_node_registration_t * r); + +/* Register all static nodes registered via VLIB_REGISTER_NODE. */ +void vlib_register_all_static_nodes (vlib_main_t * vm); + +/* Start a process. */ +void vlib_start_process (vlib_main_t * vm, uword process_index); + +/* Sync up runtime and main node stats. */ +void +vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n); + +/* Node graph initialization function. */ +clib_error_t * vlib_node_main_init (vlib_main_t * vm); + +format_function_t format_vlib_node_graph; +format_function_t format_vlib_node_name; +format_function_t format_vlib_next_node_name; +format_function_t format_vlib_node_and_next; +format_function_t format_vlib_cpu_time; +format_function_t format_vlib_time; +/* Parse node name -> node index. */ +unformat_function_t unformat_vlib_node; + +always_inline void +vlib_node_increment_counter (vlib_main_t *vm, u32 node_index, + u32 counter_index, u64 increment) +{ + vlib_node_t * n = vlib_get_node (vm, node_index); + vlib_error_main_t * em = &vm->error_main; + u32 node_counter_base_index = n->error_heap_index; + em->counters[node_counter_base_index + counter_index] += increment; +} + +#endif /* included_vlib_node_funcs_h */ diff --git a/vlib/vlib/parse.c b/vlib/vlib/parse.c new file mode 100644 index 00000000000..844be8aafe3 --- /dev/null +++ b/vlib/vlib/parse.c @@ -0,0 +1,980 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/parse.h> + +#define PARSE_DEBUG 0 + +u16 word_type_index, number_type_index, eof_type_index, rule_eof_type_index, + plus_type_index, minus_type_index, star_type_index, slash_type_index, + lpar_type_index, rpar_type_index; + +u8 * format_vlib_parse_value (u8 * s, va_list * args) +{ + vlib_parse_main_t *pm = va_arg (*args, vlib_parse_main_t *); + vlib_parse_type_t *type; + vlib_parse_value_t *v; + u16 type_index; + + s = format (s, "%d items:\n", vec_len (pm->parse_value)); + vec_foreach (v, pm->parse_value) + { + type_index = v->type; + type = pool_elt_at_index (pm->parse_types, type_index); + if (type->format_value) + s = format (s, "[%d]: %U\n", v - pm->parse_value, + type->format_value, v); + else + s = format (s, "[%d]: (nofun)\n", v - pm->parse_value); + } + return s; +} + +static u8 * format_vlib_parse_match (u8 * s, va_list * args) +{ + vlib_parse_match_t m = va_arg (*args, vlib_parse_match_t); + char * t = 0; + switch (m) + { +#define _(a) case VLIB_PARSE_##a: t = #a; break; + foreach_parse_match_type +#undef _ + default: t = 0; break; + } + + if (t) + return format (s, "%s", t); + else + return format (s, "unknown 0x%x", m); +} + +static u8 * format_vlib_parse_item (u8 * s, va_list * args) +{ + vlib_parse_main_t *pm = va_arg (*args, vlib_parse_main_t *); + vlib_parse_item_t *item = va_arg (*args, vlib_parse_item_t *); + vlib_parse_type_t *type = pool_elt_at_index (pm->parse_types, item->type); + + if (item->type == word_type_index) + s = format (s, "%s", item->value.as_pointer); + else + s = format (s, "<%s>", type->name); + return s; +} + +static u8 * format_vlib_parse_graph (u8 * s, va_list * args) +{ + vlib_parse_main_t *pm = va_arg (*args, vlib_parse_main_t *); + vlib_parse_graph_t *node = va_arg (*args, vlib_parse_graph_t *); + vlib_parse_item_t *item; + vlib_parse_type_t *type; + + /* $$$ hash table */ + pool_foreach (type, pm->parse_types, + ({ + if (type->rule_index == node - pm->parse_graph) + s = format (s, "\n<%s>\n", type->name); + })); + + if (pm->root_index == (node - pm->parse_graph)) + s = format (s, "\n<root>\n"); + + item = pool_elt_at_index (pm->parse_items, node->item); + + s = format (s, "[%d] %U ", node - pm->parse_graph, + format_vlib_parse_item, pm, item); + + if (node->peer == (u32)~0) + s = format (s, "peer nil "); + else + s = format (s, "peer %4u ", node->peer); + + if (node->deeper == (u32)~0) + s = format (s, "deeper nil "); + else + s = format (s, "deeper %4u ", node->deeper); + + return s; +} + +void dump_parse_graph (void) +{ + vlib_parse_main_t *pm = &vlib_parse_main; + vlib_parse_graph_t *node; + + pool_foreach (node, pm->parse_graph, ({ + fformat(stdout, "%U\n", format_vlib_parse_graph, pm, node); + })); +} + +always_inline void +parse_cleanup_value (vlib_parse_main_t *pm, vlib_parse_value_t *pv) +{ + vlib_parse_type_t *type = pool_elt_at_index (pm->parse_types, pv->type); + if (type->value_cleanup_function) + type->value_cleanup_function (pv); +} + +static void parse_reset (vlib_parse_main_t *pm, u8 *input) +{ + vlib_lex_token_t *t; + vlib_parse_value_t *pv; + + vlib_lex_reset (pm->lex_main, input); + + vec_foreach (t, pm->tokens) + vlib_lex_cleanup_token (t); + + vec_foreach (pv, pm->parse_value) + parse_cleanup_value (pm, pv); + + _vec_len (pm->parse_value) = 0; + _vec_len (pm->tokens) = 0; + pm->current_token_index = 0; +} + +static void parse_help (vlib_parse_main_t *pm, u32 index) +{ + vlib_parse_graph_t *node; + vlib_parse_item_t *item; + vlib_parse_type_t *type; + vlib_main_t *vm = pm->vlib_main; + u8 *help_input; + int i; + + help_input = vec_dup (pm->lex_main->input_vector); + + for (i = vec_len(help_input)-1; i >= 0; i--) + if (help_input[i] == '?') + { + help_input[i] = 0; + _vec_len(help_input) = i; + break; + } + + for (i = vec_len(help_input)-1; i >= 0; i--) + { + if (help_input[i] != ' ' && help_input[i] != '\t') + break; + help_input[i] = 0; + break; + } + _vec_len(help_input) = i+1; + + while (index != (u32)~0) + { + node = pool_elt_at_index (pm->parse_graph, index); + item = pool_elt_at_index (pm->parse_items, node->item); + type = pool_elt_at_index (pm->parse_types, item->type); + + if (item->type == eof_type_index && vec_len (pm->match_items) == 0) + /* do nothing */; + else if (item->type == word_type_index) + vlib_cli_output (vm, "%s %s\n", help_input, item->value.as_pointer); + else + vlib_cli_output (vm, "%s <%s>\n", help_input, type->name); + index = node->peer; + } + vec_free (help_input); +} + +static vlib_parse_match_t +parse_eval_internal (vlib_parse_main_t *pm, u32 index) +{ + vlib_parse_graph_t *node; + vlib_parse_item_t *item; + vlib_parse_type_t *type; + vlib_parse_value_t value, *pv; + vlib_parse_match_t rv; + u32 *partial_matches = 0; + vlib_lex_token_t *t; + u32 save_token_index=(u32)~0, save_match_items=0; + int had_value = 0; + + if (pm->current_token_index >= vec_len(pm->tokens)) + return VLIB_PARSE_MATCH_FAIL; + + /* current token */ + t = vec_elt_at_index (pm->tokens, pm->current_token_index); + + /* Help ? */ + if (PREDICT_FALSE(t->token == VLIB_LEX_qmark)) + { + parse_help (pm, index); + _vec_len (pm->match_items) = 0; + return VLIB_PARSE_MATCH_DONE; + } + + /* Across all peers at this level of the parse graph */ + while (index != (u32)~0) + { + node = pool_elt_at_index (pm->parse_graph, index); + item = pool_elt_at_index (pm->parse_items, node->item); + type = pool_elt_at_index (pm->parse_types, item->type); + + /* + * Save the token index. We may have to back up several + * trie plies. Type-specific match functions can consume + * multiple tokens, and they may not be optimally careful + */ + save_token_index = pm->current_token_index; + save_match_items = vec_len (pm->match_items); + vec_add1 (pm->match_items, node->item); + + if (PARSE_DEBUG > 1) + clib_warning ("Try to match token %U against node %d", + format_vlib_lex_token, pm->lex_main, t, index); + + /* Call the type-specific match function */ + rv = type->match_function (pm, type, t, &value); + + if (PARSE_DEBUG > 1) + clib_warning ("returned %U", format_vlib_parse_match, rv); + + switch (rv) + { + case VLIB_PARSE_MATCH_VALUE: + /* + * Matched, and returned a value to append to the + * set of args passed to the action function + */ + value.type = item->type; + vec_add1 (pm->parse_value, value); + had_value = 1; + /* fallthrough */ + + case VLIB_PARSE_MATCH_FULL: + unambiguous_partial_match: + /* Consume the matched token */ + pm->current_token_index++; + + /* continue matching along this path */ + rv = parse_eval_internal (pm, node->deeper); + + /* this is not the right path */ + if (rv == VLIB_PARSE_MATCH_FAIL) + { + if (had_value) + { + /* Delete the value */ + value = pm->parse_value [vec_len (pm->parse_value)-1]; + parse_cleanup_value (pm, &value); + _vec_len (pm->parse_value) -= 1; + } + /* Continue with the next sibling */ + pm->current_token_index = save_token_index; + _vec_len (pm->match_items) = save_match_items; + index = node->peer; + break; + } + return rv; + + case VLIB_PARSE_MATCH_PARTIAL: + /* Partial (substring) match, remember it but keep going */ + vec_add1 (partial_matches, node - pm->parse_graph); + index = node->peer; + break; + + case VLIB_PARSE_MATCH_FAIL: + /* Continue with the next sibling */ + index = node->peer; + _vec_len (pm->match_items) = save_match_items; + break; + + case VLIB_PARSE_MATCH_DONE: + /* Parse complete, invoke the action function */ + if (PARSE_DEBUG > 0) + clib_warning ("parse_value: %U", format_vlib_parse_value, pm); + + { + vlib_parse_eval_function_t * f = item->value.as_pointer; + if (f) + rv = f (pm, item, pm->parse_value); + } + + vec_foreach (pv, pm->parse_value) + parse_cleanup_value (pm, pv); + _vec_len (pm->parse_value) = 0; + _vec_len (pm->match_items) = 0; + return rv; + + case VLIB_PARSE_MATCH_AMBIGUOUS: + case VLIB_PARSE_MATCH_EVAL_FAIL: + case VLIB_PARSE_MATCH_RULE: + _vec_len (pm->match_items) = save_match_items; + return rv; + } + } + + /* + * Out of siblings. If we have exactly one partial match + * we win + */ + if (vec_len (partial_matches) == 1) + { + index = partial_matches[0]; + node = pool_elt_at_index (pm->parse_graph, index); + vec_free (partial_matches); + goto unambiguous_partial_match; + } + + /* Ordinary loser */ + rv = VLIB_PARSE_MATCH_FAIL; + + /* Ambiguous loser */ + if (vec_len (partial_matches) > 1) + { + vec_free (partial_matches); + rv = VLIB_PARSE_MATCH_AMBIGUOUS; + } + + _vec_len (pm->match_items) = save_match_items; + return rv; +} + +vlib_parse_match_t rule_match (vlib_parse_main_t *pm, vlib_parse_type_t *type, + vlib_lex_token_t *t, + vlib_parse_value_t *valuep) +{ + vlib_parse_match_t rv; + static int recursion_level; + + if (PARSE_DEBUG > 1) + clib_warning ("[%d]: try to match type %s graph index %d", + recursion_level, + type->name, + type->rule_index); + recursion_level++; + rv = parse_eval_internal (pm, type->rule_index); + recursion_level--; + + /* Break the recusive unwind here... */ + if (rv == VLIB_PARSE_MATCH_RULE) + { + if (PARSE_DEBUG > 1) + clib_warning ("[%d]: type %s matched", recursion_level, type->name); + + return VLIB_PARSE_MATCH_FULL; + } + else + { + if (PARSE_DEBUG > 1) + clib_warning ("[%d]: type %s returns %U", recursion_level, type->name, + format_vlib_parse_match, rv); + } + return rv; +} + +static int parse_eval (vlib_parse_main_t *pm, u8 *input) +{ + vlib_lex_token_t * t; + + parse_reset (pm, input); + + /* Tokenize the entire input vector */ + do { + vec_add2 (pm->tokens, t, 1); + vlib_lex_get_token (pm->lex_main, t); + } while (t->token != VLIB_LEX_eof); + + /* Feed it to the parser */ + return parse_eval_internal (pm, pm->root_index); +} + +/* Temporary vlib stub */ +vlib_parse_match_t vlib_parse_eval (u8 *input) +{ + return parse_eval (&vlib_parse_main, input); +} + +u16 parse_type_find_or_create (vlib_parse_main_t *pm, vlib_parse_type_t *t) +{ + uword *p; + vlib_parse_type_t *n; + u8 *name_copy; + + p = hash_get_mem (pm->parse_type_by_name_hash, t->name); + if (p) + return p[0]; + + pool_get (pm->parse_types, n); + *n = *t; + n->rule_index = (u32) ~0; + + name_copy = format (0, "%s%c", n->name, 0); + + hash_set_mem (pm->parse_type_by_name_hash, name_copy, n - pm->parse_types); + return n - pm->parse_types; +} + +u16 parse_type_find_by_name (vlib_parse_main_t *pm, char *name) +{ + uword *p; + + p = hash_get_mem (pm->parse_type_by_name_hash, name); + if (p) + return p[0]; + + return (u16) ~0; +} + +u32 parse_item_find_or_create (vlib_parse_main_t *pm, vlib_parse_item_t *item) + +{ + uword *p; + vlib_parse_item_t *i; + + /* Exact match the entire item */ + p = mhash_get (&pm->parse_item_hash, item); + if (p) + return p[0]; + + pool_get (pm->parse_items, i); + *i = *item; + + mhash_set (&pm->parse_item_hash, i, i - pm->parse_items, 0); + return i - pm->parse_items; +} + +static void parse_type_and_graph_init (vlib_parse_main_t *pm) +{ + u32 eof_index; + vlib_parse_type_t type; + vlib_parse_item_t item; + + memset (&type, 0, sizeof (type)); + +#define foreach_token_type \ + _ (eof) \ + _ (rule_eof) \ + _ (word) \ + _ (number) \ + _ (plus) \ + _ (minus) \ + _ (star) \ + _ (slash) \ + _ (lpar) \ + _ (rpar) + +#define _(a) a##_type_index = parse_type_find_by_name (pm, #a); + foreach_token_type +#undef _ + + memset (&item, 0, sizeof (item)); + item.type = eof_type_index; + + eof_index = parse_item_find_or_create (pm, &item); + pm->root_index = (u32)~0; + +#if 0 + pool_get (pm->parse_graph, g); + memset (g, 0xff, sizeof (*g)); + g->item = eof_index; + pm->root_index = 0; +#endif +} + + + +static void tokenize (vlib_parse_main_t *pm, parse_registration_t *pr) +{ + vlib_lex_token_t *t; + pm->register_input = format (pm->register_input, + "%s%c", pr->initializer, 0); + + parse_reset (pm, pm->register_input); + + do { + vec_add2 (pm->tokens, t, 1); + vlib_lex_get_token (pm->lex_main, t); + } while (t->token != VLIB_LEX_eof); + _vec_len (pm->register_input) = 0; +} + +static int is_typed_rule (vlib_parse_main_t *pm) +{ + vlib_lex_token_t *t = vec_elt_at_index (pm->tokens, 0); + + /* <mytype> = blah blah blah */ + if (vec_len(pm->tokens) >= 4 + && t[0].token == VLIB_LEX_lt + && t[1].token == VLIB_LEX_word + && t[2].token == VLIB_LEX_gt + && t[3].token == VLIB_LEX_equals) + return 1; + return 0; +} + +static int token_matches_graph_node (vlib_parse_main_t *pm, + vlib_lex_token_t *t, + vlib_parse_graph_t *node, + vlib_parse_item_t *item, + vlib_parse_type_t *type, + u32 *token_increment) +{ + /* EOFs don't match */ + if (t->token == VLIB_LEX_eof) + return 0; + + /* New chain element is a word */ + if (t->token == VLIB_LEX_word) + { + /* but the item in hand is not a word */ + if (item->type != word_type_index) + return 0; + + /* Or it's not this particular word */ + if (strcmp (t->value.as_pointer, item->value.as_pointer)) + return 0; + *token_increment = 1; + return 1; + } + /* New chain element is a type-name: < TYPE-NAME > */ + if (t->token == VLIB_LEX_lt) + { + u16 token_type_index; + + /* < TYPE > */ + if (t[1].token != VLIB_LEX_word || + t[2].token != VLIB_LEX_gt) + { + clib_warning (0, "broken type name in '%s'", pm->register_input); + return 0; + } + + token_type_index = parse_type_find_by_name (pm, t[1].value.as_pointer); + if (token_type_index == (u16)~0) + { + clib_warning (0, "unknown type '%s'", t[1].value.as_pointer); + return 0; + } + + /* Its a known type but does not match. */ + if (item->type != token_type_index) + return 0; + + *token_increment = 3; + return 1; + } + clib_warning ("BUG: t->token = %d", t->token); + return 0; +} + +u32 generate_subgraph_from_tokens (vlib_parse_main_t *pm, + vlib_lex_token_t *t, + u32 *new_subgraph_depth, + parse_registration_t *pr, + int not_a_rule) +{ + vlib_parse_graph_t *g, *last_g; + vlib_parse_item_t new_item; + u32 rv = (u32)~0, new_item_index, last_index = (u32)~0; + u16 token_type_index; + u32 depth = 0; + + while (t < pm->tokens + vec_len (pm->tokens)) + { + memset (&new_item, 0, sizeof (new_item)); + + if (t->token == VLIB_LEX_word) + { + new_item.type = word_type_index; + new_item.value.as_pointer = vec_dup ((u8 *) t->value.as_pointer); + new_item_index = parse_item_find_or_create (pm, &new_item); + t++; + } + else if (t->token == VLIB_LEX_lt) + { + if (t[1].token != VLIB_LEX_word || + t[2].token != VLIB_LEX_gt) + { + clib_warning ("broken type name in '%s'", pm->register_input); + goto screwed; + } + token_type_index = parse_type_find_by_name (pm, + t[1].value.as_pointer); + if (token_type_index == (u16)~0) + { + clib_warning ("unknown type 2 '%s'", t[1].value.as_pointer); + goto screwed; + } + + new_item.type = token_type_index; + new_item.value.as_pointer = 0; + new_item_index = parse_item_find_or_create (pm, &new_item); + t += 3; /* skip < <type-name> and > */ + } + else if (t->token == VLIB_LEX_eof) + { + screwed: + new_item.type = not_a_rule ? eof_type_index : rule_eof_type_index; + new_item.value.as_pointer = pr->eof_match; + new_item_index = parse_item_find_or_create (pm, &new_item); + t++; + } + else + { + clib_warning ("unexpected token %U index %d in '%s'", + format_vlib_lex_token, pm->lex_main, t, + t - pm->tokens, pm->register_input); + goto screwed; + } + + pool_get (pm->parse_graph, g); + memset (g, 0xff, sizeof (*g)); + g->item = new_item_index; + depth++; + + if (rv == (u32)~0) + { + rv = g - pm->parse_graph; + last_index = rv; + } + else + { + last_g = pool_elt_at_index (pm->parse_graph, last_index); + last_index = last_g->deeper = g - pm->parse_graph; + } + } + *new_subgraph_depth = depth; + return rv; +} + +static u32 measure_depth (vlib_parse_main_t *pm, u32 index) +{ + vlib_parse_graph_t *node; + vlib_parse_item_t *item; + u32 max=0; + u32 depth; + + if (index == (u32)~0) + return 0; + + node = pool_elt_at_index (pm->parse_graph, index); + item = pool_elt_at_index (pm->parse_items, node->item); + + if (item->type == eof_type_index) + return 1; + + while (index != (u32)~0) + { + node = pool_elt_at_index (pm->parse_graph, index); + depth = measure_depth (pm, node->deeper); + if (max < depth) + max = depth; + index = node->peer; + } + + return max + 1; +} + +static void add_subgraph_to_graph (vlib_parse_main_t *pm, + u32 last_matching_index, + u32 graph_root_index, + u32 new_subgraph_index, + u32 new_subgraph_depth) +{ + vlib_parse_graph_t *parent_node; + int new_subgraph_longest = 1; + u32 current_peer_index; + u32 current_depth; + vlib_parse_graph_t *current_peer = 0; + vlib_parse_graph_t *new_subgraph_node = + pool_elt_at_index (pm->parse_graph, new_subgraph_index); + + /* + * Case 1: top-level peer. Splice into the top-level + * peer chain according to rule depth + */ + if (last_matching_index == (u32)~0) + { + u32 index = graph_root_index; + while (1) { + current_peer = pool_elt_at_index (pm->parse_graph, index); + current_depth = measure_depth (pm, index); + if (current_depth < new_subgraph_depth + || current_peer->peer == (u32)~0) + break; + index = current_peer->peer; + } + new_subgraph_node->peer = current_peer->peer; + current_peer->peer = new_subgraph_index; + return; + } + + parent_node = pool_elt_at_index (pm->parse_graph, last_matching_index); + current_peer_index = parent_node->deeper; + + while (current_peer_index != (u32)~0) + { + current_peer = pool_elt_at_index (pm->parse_graph, current_peer_index); + current_depth = measure_depth (pm, current_peer_index); + if (current_depth < new_subgraph_depth) + break; + new_subgraph_longest = 0; + current_peer_index = current_peer->peer; + } + + ASSERT (current_peer); + + if (new_subgraph_longest) + { + new_subgraph_node->peer = parent_node->deeper; + parent_node->deeper = new_subgraph_index; + } + else + { + new_subgraph_node->peer = current_peer->peer; + current_peer->peer = new_subgraph_index; + } +} + +static clib_error_t * +parse_register_one (vlib_parse_main_t *pm, parse_registration_t *pr) +{ + u32 graph_root_index; + u16 subgraph_type_index = (u16)~0; + vlib_parse_type_t *subgraph_type = 0; + vlib_lex_token_t *t; + vlib_parse_graph_t *node; + u32 node_index, last_index, token_increment, new_subgraph_index; + u32 new_subgraph_depth, last_matching_index; + vlib_parse_item_t *item; + vlib_parse_type_t *type; + + int use_main_graph = 1; + + tokenize (pm, pr); + + /* A typed rule? */ + if (is_typed_rule (pm)) + { + /* Get the type and its current subgraph root, if any */ + t = vec_elt_at_index (pm->tokens, 1); + subgraph_type_index = parse_type_find_by_name (pm, t->value.as_pointer); + if (subgraph_type_index == (u16)~0) + return clib_error_return (0, "undeclared type '%s'", + t->value.as_pointer); + subgraph_type = pool_elt_at_index (pm->parse_types, subgraph_type_index); + graph_root_index = subgraph_type->rule_index; + /* Skip "mytype> = */ + t += 3; + use_main_graph = 0; + } + else + { + /* top-level graph */ + graph_root_index = pm->root_index; + t = vec_elt_at_index (pm->tokens, 0); + } + + last_matching_index = (u32)~0; + last_index = node_index = graph_root_index; + + /* Find the first token which isn't already being parsed */ + while (t < pm->tokens + vec_len (pm->tokens) && node_index != (u32) ~0) + { + node = pool_elt_at_index (pm->parse_graph, node_index); + item = pool_elt_at_index (pm->parse_items, node->item); + type = pool_elt_at_index (pm->parse_types, item->type); + last_index = node_index; + + if (token_matches_graph_node (pm, t, node, item, type, &token_increment)) + { + t += token_increment; + last_matching_index = node_index; + node_index = node->deeper; + } + else + node_index = node->peer; + } + + new_subgraph_index = + generate_subgraph_from_tokens (pm, t, &new_subgraph_depth, pr, + use_main_graph); + + /* trivial cases: first graph node or first type rule */ + if (graph_root_index == (u32)~0) + { + if (use_main_graph) + pm->root_index = new_subgraph_index; + else + subgraph_type->rule_index = new_subgraph_index; + return 0; + } + + add_subgraph_to_graph (pm, last_matching_index, graph_root_index, + new_subgraph_index, + new_subgraph_depth); + return 0; +} + +static clib_error_t * +parse_register (vlib_main_t * vm, + parse_registration_t * lo, + parse_registration_t * hi, + vlib_parse_main_t *pm) +{ + parse_registration_t * pr; + + for (pr = lo; pr < hi; pr = vlib_elf_section_data_next (pr, 0)) + vec_add1 (pm->parse_registrations, pr); + + return 0; +} + +static clib_error_t * +parse_register_one_type (vlib_parse_main_t *pm, vlib_parse_type_t *rp) +{ + (void) parse_type_find_or_create (pm, (vlib_parse_type_t *)rp); + return 0; +} + +static clib_error_t * +parse_type_register (vlib_main_t * vm, + vlib_parse_type_t * lo, + vlib_parse_type_t * hi, + vlib_parse_main_t *pm) +{ + clib_error_t * error = 0; + vlib_parse_type_t * ptr; + + for (ptr = lo; ptr < hi; ptr = vlib_elf_section_data_next (ptr, 0)) { + error = parse_register_one_type (pm, ptr); + if (error) + goto done; + } + + done: + return error; +} + +clib_error_t *vlib_stdlex_init (vlib_main_t *vm) __attribute__((weak)); +clib_error_t *vlib_stdlex_init (vlib_main_t *vm) +{ + (void) vlib_lex_add_table ("ignore_everything"); + return 0; +} + +static int compute_rule_length (parse_registration_t *r) +{ + int length, i; + vlib_parse_main_t *pm = &vlib_parse_main; + + if (r->rule_length) + return r->rule_length; + + length = 0; + + tokenize (pm, r); + length = vec_len (pm->tokens); + + /* Account for "<foo> = " in "<foo> = bar" etc. */ + if (is_typed_rule (pm)) + length -= 2; + + for (i = 0; i < vec_len (pm->tokens); i++) + { + switch (pm->tokens[i].token) + { + case VLIB_LEX_lt: + case VLIB_LEX_gt: + length -= 1; + + default: + break; + } + } + + ASSERT (length > 0); + r->rule_length = length; + return length; +} + +static int rule_length_compare (parse_registration_t *r1, + parse_registration_t *r2) +{ + compute_rule_length (r1); + compute_rule_length (r2); + /* Descending sort */ + return r2->rule_length - r1->rule_length; +} + + +static clib_error_t * parse_init (vlib_main_t *vm) +{ + vlib_parse_main_t *pm = &vlib_parse_main; + vlib_lex_main_t *lm = &vlib_lex_main; + vlib_elf_section_bounds_t * b, * bounds; + clib_error_t * error = 0; + parse_registration_t *rule; + int i; + + if ((error = vlib_call_init_function (vm, lex_onetime_init))) + return error; + + if ((error = vlib_stdlex_init(vm))) + return error; + + if ((error = vlib_call_init_function (vm, parse_builtin_init))) + return error; + + pm->vlib_main = vm; + pm->lex_main = lm; + + mhash_init (&pm->parse_item_hash, sizeof (u32), sizeof (vlib_parse_item_t)); + pm->parse_type_by_name_hash = hash_create_string (0, sizeof (u32)); + + vec_validate (pm->parse_value, 16); + vec_validate (pm->tokens, 16); + vec_validate (pm->register_input, 32); + vec_validate (pm->match_items, 16); + + _vec_len (pm->parse_value) = 0; + _vec_len (pm->tokens) = 0; + _vec_len (pm->register_input) = 0; + _vec_len (pm->match_items) = 0; + + bounds = vlib_get_elf_section_bounds (vm, "parse_type_registrations"); + vec_foreach (b, bounds) + { + error = parse_type_register (vm, b->lo, b->hi, pm); + if (error) + break; + } + vec_free (bounds); + + parse_type_and_graph_init (pm); + + bounds = vlib_get_elf_section_bounds (vm, "parse_registrations"); + vec_foreach (b, bounds) + { + error = parse_register (vm, b->lo, b->hi, pm); + if (error) + break; + } + vec_free (bounds); + + vec_sort (pm->parse_registrations, r1, r2, + rule_length_compare (r1[0], r2[0])); + + for (i = 0; i < vec_len (pm->parse_registrations); i++) + { + rule = pm->parse_registrations[i]; + parse_register_one (pm, rule); + } + + return error; +} + +VLIB_INIT_FUNCTION (parse_init); diff --git a/vlib/vlib/parse.h b/vlib/vlib/parse.h new file mode 100644 index 00000000000..5b9acebf774 --- /dev/null +++ b/vlib/vlib/parse.h @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vlib_parse_h +#define included_vlib_parse_h + +#include <vlib/vlib.h> +#include <vlib/lex.h> +#include <vppinfra/mhash.h> + +typedef struct { + /* Word aligned value. */ + union { + u8 as_u8[32 - 1 * sizeof (u16)]; + void * as_pointer; + uword as_uword; + word as_word; + u64 as_u64; + } value; + + /* 16 bit type at end so that 30 bytes of value are aligned. */ + u16 type; +} __attribute ((packed)) vlib_parse_value_t; + +/* Instance of a type. */ +typedef struct { + u32 type; + + u32 origin; + + u32 help_index; + + union { + void * as_pointer; + uword as_uword; + } value; +} vlib_parse_item_t; + +typedef struct { + /* Index of item for this node. */ + u32 item; + + /* Graph index of peer (sibling) node (linked list of peers). */ + u32 peer; + + /* Graph index of deeper (child) node (linked list of children). */ + u32 deeper; +} vlib_parse_graph_t; + +#define foreach_parse_match_type \ + _(MATCH_DONE) \ + _(MATCH_RULE) \ + _(MATCH_FAIL) \ + _(MATCH_FULL) \ + _(MATCH_VALUE) \ + _(MATCH_PARTIAL) \ + _(MATCH_AMBIGUOUS) \ + _(MATCH_EVAL_FAIL) + +typedef enum { +#define _(a) VLIB_PARSE_##a, + foreach_parse_match_type +#undef _ +} vlib_parse_match_t; + +struct vlib_parse_type; +struct vlib_parse_main; + +typedef vlib_parse_match_t (vlib_parse_match_function_t) + (struct vlib_parse_main *, + struct vlib_parse_type *, + vlib_lex_token_t *, + vlib_parse_value_t *); +typedef void (vlib_parse_value_cleanup_function_t) (vlib_parse_value_t *); + +typedef struct vlib_parse_type { + /* Type name. */ + char * name; + + vlib_parse_match_function_t * match_function; + + vlib_parse_value_cleanup_function_t * value_cleanup_function; + + format_function_t * format_value; + + u32 rule_index; +} vlib_parse_type_t; + +typedef struct { + char *initializer; + void * eof_match; + int rule_length; +} parse_registration_t; + +typedef struct vlib_parse_main { + /* (type, origin, help, value) tuples */ + vlib_parse_item_t *parse_items; + mhash_t parse_item_hash; + + /* (item, peer, deeper) tuples */ + vlib_parse_graph_t *parse_graph; + u32 root_index; + + u8 *register_input; + + /* parser types */ + vlib_parse_type_t * parse_types; + uword *parse_type_by_name_hash; + + /* Vector of MATCH_VALUEs */ + vlib_parse_value_t * parse_value; + u32 * match_items; + + /* Parse registrations */ + parse_registration_t **parse_registrations; + + /* Token vector */ + vlib_lex_token_t *tokens; + u32 current_token_index; + + vlib_lex_main_t *lex_main; + vlib_main_t *vlib_main; +} vlib_parse_main_t; + +vlib_parse_main_t vlib_parse_main; + +typedef vlib_parse_match_t (vlib_parse_eval_function_t) + (vlib_parse_main_t *, + vlib_parse_item_t *, + vlib_parse_value_t *); + +vlib_parse_match_t vlib_parse_eval (u8 * input); + +format_function_t format_vlib_parse_value; + +/* FIXME need these to be global? */ +vlib_parse_match_function_t rule_match, eof_match, word_match, number_match; + +#define _PARSE_REGISTRATION_DATA(x) \ +VLIB_ELF_SECTION_DATA(x##_registration,parse_registration_t,parse_registrations) + +#define PARSE_INIT(x, s, e) \ +static _PARSE_REGISTRATION_DATA(x) = { \ + .initializer = s, \ + .eof_match = e, \ +}; + +#define _PARSE_TYPE_REGISTRATION_DATA(x) \ +VLIB_ELF_SECTION_DATA(x##_type_registration,vlib_parse_type_t, \ +parse_type_registrations) + +#define PARSE_TYPE_INIT(n, m, c, f) \ +static _PARSE_TYPE_REGISTRATION_DATA(n) = { \ + .name = #n, \ + .match_function = m, \ + .value_cleanup_function = c, \ + .format_value = f, \ +}; + +#endif /* included_vlib_parse_h */ diff --git a/vlib/vlib/parse_builtin.c b/vlib/vlib/parse_builtin.c new file mode 100644 index 00000000000..df830db4e21 --- /dev/null +++ b/vlib/vlib/parse_builtin.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vlib/parse.h> + +always_inline void * +parse_last_match_value (vlib_parse_main_t * pm) +{ + vlib_parse_item_t * i; + i = pool_elt_at_index (pm->parse_items, + vec_elt (pm->match_items, vec_len (pm->match_items) - 1)); + return i->value.as_pointer; +} + +vlib_parse_match_t eof_match (vlib_parse_main_t *pm, vlib_parse_type_t *type, + vlib_lex_token_t *t, vlib_parse_value_t *valuep) +{ return t->token == VLIB_LEX_eof ? VLIB_PARSE_MATCH_DONE : VLIB_PARSE_MATCH_FAIL; } + +PARSE_TYPE_INIT (eof, eof_match, 0 /* cleanup value */, 0 /* format value */); + +vlib_parse_match_t rule_eof_match (vlib_parse_main_t *pm, vlib_parse_type_t *type, + vlib_lex_token_t *t, vlib_parse_value_t *valuep) +{ + vlib_parse_match_function_t * fp = parse_last_match_value (pm); + pm->current_token_index--; + return fp ? fp (pm, type, t, valuep) : VLIB_PARSE_MATCH_RULE; +} + +PARSE_TYPE_INIT (rule_eof, rule_eof_match, 0, 0); + +vlib_parse_match_t word_match (vlib_parse_main_t *pm, vlib_parse_type_t *type, + vlib_lex_token_t *t, vlib_parse_value_t *valuep) +{ + u8 * tv, * iv; + int i; + + if (t->token != VLIB_LEX_word) + return VLIB_PARSE_MATCH_FAIL; + + tv = t->value.as_pointer; + iv = parse_last_match_value (pm); + + for (i = 0; tv[i]; i++) + { + if (tv[i] != iv[i]) + return VLIB_PARSE_MATCH_FAIL; + } + + return iv[i] == 0 ? VLIB_PARSE_MATCH_FULL : VLIB_PARSE_MATCH_PARTIAL; +} + +PARSE_TYPE_INIT (word, word_match, 0 /* clnup value */, 0 /* format value */); + +vlib_parse_match_t number_match (vlib_parse_main_t *pm, vlib_parse_type_t *type, + vlib_lex_token_t *t, vlib_parse_value_t *valuep) +{ + if (t->token == VLIB_LEX_number) + { + valuep->value.as_uword = t->value.as_uword; + return VLIB_PARSE_MATCH_VALUE; + } + return VLIB_PARSE_MATCH_FAIL; +} + +static u8 * format_value_number (u8 * s, va_list * args) +{ + vlib_parse_value_t * v = va_arg (*args, vlib_parse_value_t *); + uword a = v->value.as_uword; + + if (BITS(uword) == 64) + s = format (s, "%lld(0x%llx)", a, a); + else + s = format (s, "%ld(0x%lx)", a, a); + return s; +} + +PARSE_TYPE_INIT (number, number_match, 0 /* cln value */, + format_value_number /* fmt value */); + + +#define foreach_vanilla_lex_match_function \ + _(plus) \ + _(minus) \ + _(star) \ + _(slash) \ + _(lpar) \ + _(rpar) + +#define LEX_MATCH_DEBUG 0 + +#define _(name) \ +vlib_parse_match_t name##_match (vlib_parse_main_t *pm, \ + vlib_parse_type_t *type, \ + vlib_lex_token_t *t, \ + vlib_parse_value_t *valuep) \ +{ \ + if (LEX_MATCH_DEBUG > 0) \ + clib_warning ("against %U returns %s", \ + format_vlib_lex_token, pm->lex_main, t, \ + (t->token == VLIB_LEX_##name) \ + ? "VLIB_PARSE_MATCH_FULL" : \ + "VLIB_PARSE_MATCH_FAIL"); \ + if (t->token == VLIB_LEX_##name) \ + return VLIB_PARSE_MATCH_FULL; \ + return VLIB_PARSE_MATCH_FAIL; \ +} \ + \ +PARSE_TYPE_INIT (name, name##_match, 0 /* cln value */, \ + 0 /* fmt val */); + +foreach_vanilla_lex_match_function +#undef _ + +/* So we're linked in. */ +static clib_error_t * +parse_builtin_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (parse_builtin_init); diff --git a/vlib/vlib/physmem.h b/vlib/vlib/physmem.h new file mode 100644 index 00000000000..6e70291c1d9 --- /dev/null +++ b/vlib/vlib/physmem.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * physmem.h: virtual <-> physical memory mapping for VLIB buffers + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_physmem_h +#define included_vlib_physmem_h + +typedef struct { + uword start, end, size; +} vlib_physmem_region_t; + +typedef struct { + vlib_physmem_region_t virtual; + + uword log2_n_bytes_per_page; + + /* 1 << log2_n_bytes_per_page - 1. */ + uword page_mask; + + u64 * page_table; +} vlib_physmem_main_t; + +always_inline u64 +vlib_physmem_offset_to_physical (vlib_physmem_main_t * pm, uword o) +{ + uword page_index = o >> pm->log2_n_bytes_per_page; + ASSERT (o < pm->virtual.size); + ASSERT (pm->page_table[page_index] != 0); + return (vec_elt (pm->page_table, page_index) + (o & pm->page_mask)); +} + +always_inline int +vlib_physmem_is_virtual (vlib_physmem_main_t * pm, uword p) +{ return p >= pm->virtual.start && p < pm->virtual.end; } + +always_inline uword +vlib_physmem_offset_of (vlib_physmem_main_t * pm, void * p) +{ + uword a = pointer_to_uword (p); + uword o; + + ASSERT (vlib_physmem_is_virtual (pm, a)); + o = a - pm->virtual.start; + + /* Offset must fit in 32 bits. */ + ASSERT ((uword) o == a - pm->virtual.start); + + return o; +} + +always_inline void * +vlib_physmem_at_offset (vlib_physmem_main_t * pm, uword offset) +{ + ASSERT (offset < pm->virtual.size); + return uword_to_pointer (pm->virtual.start + offset, void *); +} + +#endif /* included_vlib_physmem_h */ diff --git a/vlib/vlib/threads.c b/vlib/vlib/threads.c new file mode 100644 index 00000000000..4621f843dd5 --- /dev/null +++ b/vlib/vlib/threads.c @@ -0,0 +1,1166 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <signal.h> +#include <math.h> +#include <vppinfra/format.h> +#include <vlib/vlib.h> + +#include <vlib/threads.h> +#include <vlib/unix/physmem.h> + +#include <vlib/unix/cj.h> + +#if DPDK==1 +#include <rte_config.h> +#include <rte_common.h> +#include <rte_eal.h> +#include <rte_launch.h> +#include <rte_lcore.h> +#endif +DECLARE_CJ_GLOBAL_LOG; + +#define FRAME_QUEUE_NELTS 32 + + +#if DPDK==1 +/* + * Weak definitions of DPDK symbols used in this file. + * Needed for linking test programs without DPDK libs. + */ +unsigned __thread __attribute__((weak)) RTE_PER_LCORE(_lcore_id); +struct lcore_config __attribute__((weak)) lcore_config[]; +unsigned __attribute__((weak)) rte_socket_id(); +int __attribute__((weak)) rte_eal_remote_launch(); +#endif +u32 vl(void *p) +{ + return vec_len (p); +} + +void debug_hex_bytes (u8 *s, u32 n) +{ + fformat (stderr, "%U\n", format_hex_bytes, s, n); +} + +vlib_thread_main_t vlib_thread_main; + +uword +os_get_cpu_number (void) +{ + void * sp; + uword n; + u32 len; + + len = vec_len (vlib_thread_stacks); + if (len == 0) + return 0; + + /* Get any old stack address. */ + sp = &sp; + + n = ((uword)sp - (uword)vlib_thread_stacks[0]) + >> VLIB_LOG2_THREAD_STACK_SIZE; + + /* "processes" have their own stacks, and they always run in thread 0 */ + n = n >= len ? 0 : n; + + return n; +} + +void +vlib_set_thread_name (char *name) +{ + int pthread_setname_np (pthread_t __target_thread, const char *__name); + pthread_t thread = pthread_self(); + + if (thread) + pthread_setname_np(thread, name); +} + +static int sort_registrations_by_no_clone (void *a0, void * a1) +{ + vlib_thread_registration_t ** tr0 = a0; + vlib_thread_registration_t ** tr1 = a1; + + return ((i32)((*tr0)->no_data_structure_clone) + - ((i32)((*tr1)->no_data_structure_clone))); +} + +static uword * +vlib_sysfs_list_to_bitmap(char * filename) +{ + FILE *fp; + uword *r = 0; + + fp = fopen (filename, "r"); + + if (fp != NULL) + { + u8 * buffer = 0; + vec_validate (buffer, 256-1); + if (fgets ((char *)buffer, 256, fp)) + { + unformat_input_t in; + unformat_init_string (&in, (char *) buffer, strlen ((char *) buffer)); + unformat(&in, "%U", unformat_bitmap_list, &r); + unformat_free (&in); + } + vec_free(buffer); + fclose(fp); + } + return r; +} + + +/* Called early in the init sequence */ + +clib_error_t * +vlib_thread_init (vlib_main_t * vm) +{ + vlib_thread_main_t * tm = &vlib_thread_main; + vlib_worker_thread_t * w; + vlib_thread_registration_t * tr; + u32 n_vlib_mains = 1; + u32 first_index = 1; + u32 i; + uword * avail_cpu; + + /* get bitmaps of active cpu cores and sockets */ + tm->cpu_core_bitmap = + vlib_sysfs_list_to_bitmap("/sys/devices/system/cpu/online"); + tm->cpu_socket_bitmap = + vlib_sysfs_list_to_bitmap("/sys/devices/system/node/online"); + + avail_cpu = clib_bitmap_dup(tm->cpu_core_bitmap); + + /* skip cores */ + for (i=0; i < tm->skip_cores; i++) + { + uword c = clib_bitmap_first_set(avail_cpu); + if (c == ~0) + return clib_error_return (0, "no available cpus to skip"); + + avail_cpu = clib_bitmap_set(avail_cpu, c, 0); + } + + /* grab cpu for main thread */ + if (!tm->main_lcore) + { + tm->main_lcore = clib_bitmap_first_set(avail_cpu); + if (tm->main_lcore == ~0) + return clib_error_return (0, "no available cpus to be used for the" + " main thread"); + } + else + { + if (clib_bitmap_get(avail_cpu, tm->main_lcore) == 0) + return clib_error_return (0, "cpu %u is not available to be used" + " for the main thread", tm->main_lcore); + } + avail_cpu = clib_bitmap_set(avail_cpu, tm->main_lcore, 0); + + /* assume that there is socket 0 only if there is no data from sysfs */ + if (!tm->cpu_socket_bitmap) + tm->cpu_socket_bitmap = clib_bitmap_set(0, 0, 1); + + /* as many threads as stacks... */ + vec_validate_aligned (vlib_worker_threads, vec_len(vlib_thread_stacks)-1, + CLIB_CACHE_LINE_BYTES); + + /* Preallocate thread 0 */ + _vec_len(vlib_worker_threads) = 1; + w = vlib_worker_threads; + w->thread_mheap = clib_mem_get_heap(); + w->thread_stack = vlib_thread_stacks[0]; + w->dpdk_lcore_id = -1; + w->lwp = syscall(SYS_gettid); + tm->n_vlib_mains = 1; + + /* assign threads to cores and set n_vlib_mains */ + tr = tm->next; + + while (tr) + { + vec_add1 (tm->registrations, tr); + tr = tr->next; + } + + vec_sort_with_function + (tm->registrations, sort_registrations_by_no_clone); + + for (i = 0; i < vec_len (tm->registrations); i++) + { + int j; + tr = tm->registrations[i]; + tr->first_index = first_index; + first_index += tr->count; + n_vlib_mains += (tr->no_data_structure_clone == 0) ? tr->count : 0; + + /* construct coremask */ + if (tr->use_pthreads || !tr->count) + continue; + + if (tr->coremask) + { + uword c; + clib_bitmap_foreach (c, tr->coremask, ({ + if (clib_bitmap_get(avail_cpu, c) == 0) + return clib_error_return (0, "cpu %u is not available to be used" + " for the '%s' thread",c, tr->name); + + avail_cpu = clib_bitmap_set(avail_cpu, c, 0); + })); + + } + else + { + for (j=0; j < tr->count; j++) + { + uword c = clib_bitmap_first_set(avail_cpu); + if (c == ~0) + return clib_error_return (0, "no available cpus to be used for" + " the '%s' thread", tr->name); + + avail_cpu = clib_bitmap_set(avail_cpu, c, 0); + tr->coremask = clib_bitmap_set(tr->coremask, c, 1); + } + } + } + + clib_bitmap_free(avail_cpu); + + tm->n_vlib_mains = n_vlib_mains; + + vec_validate_aligned (vlib_worker_threads, first_index-1, + CLIB_CACHE_LINE_BYTES); + + + tm->efd.enabled = VLIB_EFD_DISABLED; + tm->efd.queue_hi_thresh = ((VLIB_EFD_DEF_WORKER_HI_THRESH_PCT * + FRAME_QUEUE_NELTS)/100); + return 0; +} + +vlib_worker_thread_t * +vlib_alloc_thread (vlib_main_t * vm) +{ + vlib_worker_thread_t * w; + + if (vec_len(vlib_worker_threads) >= vec_len (vlib_thread_stacks)) + { + clib_warning ("out of worker threads... Quitting..."); + exit(1); + } + vec_add2 (vlib_worker_threads, w, 1); + w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads]; + return w; +} + +vlib_frame_queue_t * vlib_frame_queue_alloc (int nelts) +{ + vlib_frame_queue_t * fq; + + fq = clib_mem_alloc_aligned(sizeof (*fq), CLIB_CACHE_LINE_BYTES); + memset (fq, 0, sizeof (*fq)); + fq->nelts = nelts; + fq->vector_threshold = 128; // packets + vec_validate_aligned (fq->elts, nelts-1, CLIB_CACHE_LINE_BYTES); + + if (1) + { + if (((uword)&fq->tail) & (CLIB_CACHE_LINE_BYTES - 1)) + fformat(stderr, "WARNING: fq->tail unaligned\n"); + if (((uword)&fq->head) & (CLIB_CACHE_LINE_BYTES - 1)) + fformat(stderr, "WARNING: fq->head unaligned\n"); + if (((uword)fq->elts) & (CLIB_CACHE_LINE_BYTES - 1)) + fformat(stderr, "WARNING: fq->elts unaligned\n"); + + if (sizeof (fq->elts[0]) % CLIB_CACHE_LINE_BYTES) + fformat(stderr, "WARNING: fq->elts[0] size %d\n", + sizeof (fq->elts[0])); + if (nelts & (nelts -1)) + { + fformat (stderr, "FATAL: nelts MUST be a power of 2\n"); + abort(); + } + } + + return (fq); +} + +void vl_msg_api_handler_no_free (void *) __attribute__ ((weak)); +void vl_msg_api_handler_no_free (void *v) { } + +/* Turned off, save as reference material... */ +#if 0 +static inline int vlib_frame_queue_dequeue_internal (int thread_id, + vlib_main_t *vm, + vlib_node_main_t *nm) +{ + vlib_frame_queue_t *fq = vlib_frame_queues[thread_id]; + vlib_frame_queue_elt_t *elt; + vlib_frame_t *f; + vlib_pending_frame_t *p; + vlib_node_runtime_t *r; + u32 node_runtime_index; + int msg_type; + u64 before; + int processed = 0; + + ASSERT(vm == vlib_mains[thread_id]); + + while (1) + { + if (fq->head == fq->tail) + return processed; + + elt = fq->elts + ((fq->head+1) & (fq->nelts-1)); + + if (!elt->valid) + return processed; + + before = clib_cpu_time_now(); + + f = elt->frame; + node_runtime_index = elt->node_runtime_index; + msg_type = elt->msg_type; + + switch (msg_type) + { + case VLIB_FRAME_QUEUE_ELT_FREE_BUFFERS: + vlib_buffer_free (vm, vlib_frame_vector_args (f), f->n_vectors); + /* note fallthrough... */ + case VLIB_FRAME_QUEUE_ELT_FREE_FRAME: + r = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL], + node_runtime_index); + vlib_frame_free (vm, r, f); + break; + case VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME: + vec_add2 (vm->node_main.pending_frames, p, 1); + f->flags |= (VLIB_FRAME_PENDING | VLIB_FRAME_FREE_AFTER_DISPATCH); + p->node_runtime_index = elt->node_runtime_index; + p->frame_index = vlib_frame_index (vm, f); + p->next_frame_index = VLIB_PENDING_FRAME_NO_NEXT_FRAME; + fq->dequeue_vectors += (u64) f->n_vectors; + break; + case VLIB_FRAME_QUEUE_ELT_API_MSG: + vl_msg_api_handler_no_free (f); + break; + default: + clib_warning ("bogus frame queue message, type %d", msg_type); + break; + } + elt->valid = 0; + fq->dequeues++; + fq->dequeue_ticks += clib_cpu_time_now() - before; + CLIB_MEMORY_BARRIER(); + fq->head++; + processed++; + } + ASSERT(0); + return processed; +} + +int vlib_frame_queue_dequeue (int thread_id, + vlib_main_t *vm, + vlib_node_main_t *nm) +{ + return vlib_frame_queue_dequeue_internal (thread_id, vm, nm); +} + +int vlib_frame_queue_enqueue (vlib_main_t *vm, u32 node_runtime_index, + u32 frame_queue_index, vlib_frame_t *frame, + vlib_frame_queue_msg_type_t type) +{ + vlib_frame_queue_t *fq = vlib_frame_queues[frame_queue_index]; + vlib_frame_queue_elt_t *elt; + u32 save_count; + u64 new_tail; + u64 before = clib_cpu_time_now(); + + ASSERT (fq); + + new_tail = __sync_add_and_fetch (&fq->tail, 1); + + /* Wait until a ring slot is available */ + while (new_tail >= fq->head + fq->nelts) + { + f64 b4 = vlib_time_now_ticks (vm, before); + vlib_worker_thread_barrier_check (vm, b4); + /* Bad idea. Dequeue -> enqueue -> dequeue -> trouble */ + // vlib_frame_queue_dequeue (vm->cpu_index, vm, nm); + } + + elt = fq->elts + (new_tail & (fq->nelts-1)); + + /* this would be very bad... */ + while (elt->valid) + { + } + + /* Once we enqueue the frame, frame->n_vectors is owned elsewhere... */ + save_count = frame->n_vectors; + + elt->frame = frame; + elt->node_runtime_index = node_runtime_index; + elt->msg_type = type; + CLIB_MEMORY_BARRIER(); + elt->valid = 1; + + return save_count; +} +#endif /* 0 */ + +/* To be called by vlib worker threads upon startup */ +void vlib_worker_thread_init (vlib_worker_thread_t * w) +{ + vlib_thread_main_t *tm = vlib_get_thread_main(); + + /* worker threads wants no signals. */ + { + sigset_t s; + sigfillset (&s); + pthread_sigmask (SIG_SETMASK, &s, 0); + } + + clib_mem_set_heap (w->thread_mheap); + + if (vec_len(tm->thread_prefix) && w->registration->short_name) + { + w->name = format(0, "%v_%s_%d%c", tm->thread_prefix, + w->registration->short_name, + w->instance_id, + '\0'); + vlib_set_thread_name((char *)w->name); + } + + if (!w->registration->use_pthreads) + { + + /* Initial barrier sync, for both worker and i/o threads */ + clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1); + + while (*vlib_worker_threads->wait_at_barrier) + ; + + clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1); + } +} + +void *vlib_worker_thread_bootstrap_fn (void *arg) +{ + void *rv; + vlib_worker_thread_t *w = arg; + + w->lwp = syscall(SYS_gettid); + w->dpdk_lcore_id = -1; +#if DPDK==1 + if (w->registration && !w->registration->use_pthreads && + rte_socket_id) /* do we really have dpdk linked */ + { + unsigned lcore = rte_lcore_id(); + lcore = lcore < RTE_MAX_LCORE ? lcore : -1; + w->dpdk_lcore_id = lcore; + } +#endif + + rv = (void *) clib_calljmp + ((uword (*)(uword)) w->thread_function, + (uword) arg, w->thread_stack + VLIB_THREAD_STACK_SIZE); + /* NOTREACHED, we hope */ + return rv; +} + +static int +vlib_launch_thread (void *fp, vlib_worker_thread_t *w, unsigned lcore_id) +{ + pthread_t dummy; + void *(*fp_arg)(void *) = fp; + +#if DPDK==1 + if (!w->registration->use_pthreads) + if (rte_eal_remote_launch) /* do we have dpdk linked */ + return rte_eal_remote_launch (fp, (void *)w, lcore_id); + else + return -1; + else +#endif + return pthread_create (&dummy, NULL /* attr */, fp_arg, (void *)w); +} + +static clib_error_t * start_workers (vlib_main_t * vm) +{ + int i, j; + vlib_worker_thread_t *w; + vlib_main_t *vm_clone; + void *oldheap; + vlib_frame_queue_t *fq; + vlib_thread_main_t * tm = &vlib_thread_main; + vlib_thread_registration_t * tr; + vlib_node_runtime_t * rt; + u32 n_vlib_mains = tm->n_vlib_mains; + u32 worker_thread_index; + + vec_reset_length (vlib_worker_threads); + + /* Set up the main thread */ + vec_add2_aligned (vlib_worker_threads, w, 1, CLIB_CACHE_LINE_BYTES); + w->elog_track.name = "thread 0"; + elog_track_register (&vm->elog_main, &w->elog_track); + + if (vec_len(tm->thread_prefix)) + { + w->name = format(0, "%v_main%c", tm->thread_prefix, '\0'); + vlib_set_thread_name((char *)w->name); + } + +#if DPDK==1 + w->dpdk_lcore_id = -1; + if (rte_socket_id) /* do we really have dpdk linked */ + { + unsigned lcore = rte_lcore_id(); + w->dpdk_lcore_id = lcore < RTE_MAX_LCORE ? lcore : -1;; + } +#endif + + if (n_vlib_mains > 1) + { + u8 * heap = clib_mem_get_per_cpu_heap(); + mheap_t * h = mheap_header (heap); + + /* make the main heap thread-safe */ + h->flags |= MHEAP_FLAG_THREAD_SAFE; + + /* Make the event-log MP-safe */ + vm->elog_main.lock = + clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + + vm->elog_main.lock[0] = 0; + + vec_validate (vlib_mains, tm->n_vlib_mains - 1); + _vec_len (vlib_mains) = 0; + vec_add1 (vlib_mains, vm); + + vec_validate (vlib_frame_queues, tm->n_vlib_mains - 1); + _vec_len (vlib_frame_queues) = 0; + fq = vlib_frame_queue_alloc (FRAME_QUEUE_NELTS); + vec_add1 (vlib_frame_queues, fq); + + vlib_worker_threads->wait_at_barrier = + clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES); + vlib_worker_threads->workers_at_barrier = + clib_mem_alloc_aligned (sizeof (u32), CLIB_CACHE_LINE_BYTES); + + /* Ask for an initial barrier sync */ + *vlib_worker_threads->workers_at_barrier = 0; + *vlib_worker_threads->wait_at_barrier = 1; + + worker_thread_index = 1; + + for (i = 0; i < vec_len(tm->registrations); i++) + { + vlib_node_main_t *nm, *nm_clone; + vlib_buffer_main_t *bm_clone; + vlib_buffer_free_list_t *fl_clone, *fl_orig; + vlib_buffer_free_list_t *orig_freelist_pool; + int k; + + tr = tm->registrations[i]; + + if (tr->count == 0) + continue; + + for (k = 0; k < tr->count; k++) + { + vec_add2 (vlib_worker_threads, w, 1); + /* + * Share the main heap which is now thread-safe. + * + * To allocate separate heaps, code: + * mheap_alloc (0 / * use VM * /, tr->mheap_size); + */ + w->thread_mheap = heap; + w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads]; + w->thread_function = tr->function; + w->thread_function_arg = w; + w->instance_id = k; + w->registration = tr; + + w->elog_track.name = (char *) format (0, "thread %d", i+1); + vec_add1 (w->elog_track.name, 0); + elog_track_register (&vm->elog_main, &w->elog_track); + + if (tr->no_data_structure_clone) + continue; + + /* Allocate "to-worker-N" frame queue */ + fq = vlib_frame_queue_alloc (FRAME_QUEUE_NELTS); + vec_validate (vlib_frame_queues, worker_thread_index); + vlib_frame_queues[worker_thread_index] = fq; + + /* Fork vlib_global_main et al. Look for bugs here */ + oldheap = clib_mem_set_heap (w->thread_mheap); + + vm_clone = clib_mem_alloc (sizeof (*vm_clone)); + memcpy (vm_clone, vlib_mains[0], sizeof (*vm_clone)); + + vm_clone->cpu_index = worker_thread_index; + vm_clone->heap_base = w->thread_mheap; + vm_clone->mbuf_alloc_list = 0; + memset (&vm_clone->random_buffer, 0, sizeof (vm_clone->random_buffer)); + + nm = &vlib_mains[0]->node_main; + nm_clone = &vm_clone->node_main; + /* fork next frames array, preserving node runtime indices */ + nm_clone->next_frames = vec_dup (nm->next_frames); + for (j = 0; j < vec_len (nm_clone->next_frames); j++) + { + vlib_next_frame_t *nf = &nm_clone->next_frames[j]; + u32 save_node_runtime_index; + + save_node_runtime_index = nf->node_runtime_index; + vlib_next_frame_init (nf); + nf->node_runtime_index = save_node_runtime_index; + } + + /* fork the frame dispatch queue */ + nm_clone->pending_frames = 0; + vec_validate (nm_clone->pending_frames, 10); /* $$$$$?????? */ + _vec_len (nm_clone->pending_frames) = 0; + + /* fork nodes */ + nm_clone->nodes = 0; + for (j = 0; j < vec_len (nm->nodes); j++) + { + vlib_node_t *n; + n = clib_mem_alloc_no_fail (sizeof(*n)); + memcpy (n, nm->nodes[j], sizeof (*n)); + /* none of the copied nodes have enqueue rights given out */ + n->owner_node_index = VLIB_INVALID_NODE_INDEX; + memset (&n->stats_total, 0, sizeof (n->stats_total)); + memset (&n->stats_last_clear, 0, sizeof (n->stats_last_clear)); + vec_add1 (nm_clone->nodes, n); + } + nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] = + vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]); + + nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] = + vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]); + vec_foreach(rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) + rt->cpu_index = vm_clone->cpu_index; + + nm_clone->processes = vec_dup (nm->processes); + + /* zap the (per worker) frame freelists, etc */ + nm_clone->frame_sizes = 0; + nm_clone->frame_size_hash = 0; + + /* Packet trace buffers are guaranteed to be empty, nothing to do here */ + + clib_mem_set_heap (oldheap); + vec_add1 (vlib_mains, vm_clone); + + unix_physmem_init (vm_clone, 0 /* physmem not required */); + + /* Fork the vlib_buffer_main_t free lists, etc. */ + bm_clone = vec_dup (vm_clone->buffer_main); + vm_clone->buffer_main = bm_clone; + + orig_freelist_pool = bm_clone->buffer_free_list_pool; + bm_clone->buffer_free_list_pool = 0; + + pool_foreach (fl_orig, orig_freelist_pool, + ({ + pool_get_aligned (bm_clone->buffer_free_list_pool, + fl_clone, CLIB_CACHE_LINE_BYTES); + ASSERT (fl_orig - orig_freelist_pool + == fl_clone - bm_clone->buffer_free_list_pool); + + fl_clone[0] = fl_orig[0]; + fl_clone->aligned_buffers = 0; + fl_clone->unaligned_buffers = 0; + fl_clone->n_alloc = 0; + })); + + worker_thread_index++; + } + } + } + else + { + /* only have non-data-structure copy threads to create... */ + for (i = 0; i < vec_len(tm->registrations); i++) + { + tr = tm->registrations[i]; + + for (j = 0; j < tr->count; j++) + { + vec_add2 (vlib_worker_threads, w, 1); + w->thread_mheap = mheap_alloc (0 /* use VM */, tr->mheap_size); + w->thread_stack = vlib_thread_stacks[w - vlib_worker_threads]; + w->thread_function = tr->function; + w->thread_function_arg = w; + w->instance_id = j; + w->elog_track.name = (char *) format (0, "thread %d", i+1); + w->registration = tr; + vec_add1 (w->elog_track.name, 0); + elog_track_register (&vm->elog_main, &w->elog_track); + } + } + } + + worker_thread_index = 1; + + for (i = 0; i < vec_len (tm->registrations); i++) + { + int j; + + tr = tm->registrations[i]; + + if (tr->use_pthreads || tm->use_pthreads) + { + for (j = 0; j < tr->count; j++) + { + w = vlib_worker_threads + worker_thread_index++; + if (vlib_launch_thread (vlib_worker_thread_bootstrap_fn, w, 0) < 0) + clib_warning ("Couldn't start '%s' pthread ", tr->name); + } + } + else + { + uword c; + clib_bitmap_foreach (c, tr->coremask, ({ + w = vlib_worker_threads + worker_thread_index++; + if (vlib_launch_thread (vlib_worker_thread_bootstrap_fn, w, c) < 0) + clib_warning ("Couldn't start DPDK lcore %d", c); + + })); + } + } + vlib_worker_thread_barrier_sync(vm); + vlib_worker_thread_barrier_release(vm); + return 0; +} + +VLIB_MAIN_LOOP_ENTER_FUNCTION (start_workers); + +void vlib_worker_thread_node_runtime_update(void) +{ + int i, j; + vlib_worker_thread_t *w; + vlib_main_t *vm; + vlib_node_main_t *nm, *nm_clone; + vlib_node_t ** old_nodes_clone; + vlib_main_t *vm_clone; + vlib_node_runtime_t * rt, * old_rt; + void *oldheap; + never_inline void + vlib_node_runtime_sync_stats (vlib_main_t * vm, + vlib_node_runtime_t * r, + uword n_calls, + uword n_vectors, + uword n_clocks); + + ASSERT (os_get_cpu_number() == 0); + + if (vec_len (vlib_mains) == 0) + return; + + vm = vlib_mains[0]; + nm = &vm->node_main; + + ASSERT (os_get_cpu_number() == 0); + ASSERT (*vlib_worker_threads->wait_at_barrier == 1); + + /* + * Scrape all runtime stats, so we don't lose node runtime(s) with + * pending counts, or throw away worker / io thread counts. + */ + for (j = 0; j < vec_len (nm->nodes); j++) + { + vlib_node_t * n; + n = nm->nodes[j]; + vlib_node_sync_stats (vm, n); + } + + for (i = 1; i < vec_len (vlib_mains); i++) + { + vlib_node_t * n; + + vm_clone = vlib_mains[i]; + nm_clone = &vm_clone->node_main; + + for (j = 0; j < vec_len (nm_clone->nodes); j++) + { + n = nm_clone->nodes[j]; + + rt = vlib_node_get_runtime (vm_clone, n->index); + vlib_node_runtime_sync_stats (vm_clone, rt, 0, 0, 0); + } + } + + for (i = 1; i < vec_len (vlib_mains); i++) + { + vlib_node_runtime_t * rt; + w = vlib_worker_threads + i; + oldheap = clib_mem_set_heap (w->thread_mheap); + + vm_clone = vlib_mains[i]; + + /* Re-clone error heap */ + memcpy (&vm_clone->error_main, &vm->error_main, sizeof (vm->error_main)); + + nm_clone = &vm_clone->node_main; + vec_free (nm_clone->next_frames); + nm_clone->next_frames = vec_dup (nm->next_frames); + + for (j = 0; j < vec_len (nm_clone->next_frames); j++) + { + vlib_next_frame_t *nf = &nm_clone->next_frames[j]; + u32 save_node_runtime_index; + + save_node_runtime_index = nf->node_runtime_index; + vlib_next_frame_init (nf); + nf->node_runtime_index = save_node_runtime_index; + } + + old_nodes_clone = nm_clone->nodes; + nm_clone->nodes = 0; + + /* re-fork nodes */ + for (j = 0; j < vec_len (nm->nodes); j++) { + vlib_node_t *old_n_clone; + vlib_node_t *new_n, *new_n_clone; + + new_n = nm->nodes[j]; + old_n_clone = old_nodes_clone[j]; + + new_n_clone = clib_mem_alloc_no_fail (sizeof(*new_n_clone)); + memcpy (new_n_clone, new_n, sizeof (*new_n)); + /* none of the copied nodes have enqueue rights given out */ + new_n_clone->owner_node_index = VLIB_INVALID_NODE_INDEX; + + if (j >= vec_len (old_nodes_clone)) + { + /* new node, set to zero */ + memset (&new_n_clone->stats_total, 0, + sizeof (new_n_clone->stats_total)); + memset (&new_n_clone->stats_last_clear, 0, + sizeof (new_n_clone->stats_last_clear)); + } + else + { + /* Copy stats if the old data is valid */ + memcpy (&new_n_clone->stats_total, + &old_n_clone->stats_total, + sizeof (new_n_clone->stats_total)); + memcpy (&new_n_clone->stats_last_clear, + &old_n_clone->stats_last_clear, + sizeof (new_n_clone->stats_last_clear)); + + /* keep previous node state */ + new_n_clone->state = old_n_clone->state; + } + vec_add1 (nm_clone->nodes, new_n_clone); + } + /* Free the old node clone */ + for (j = 0; j < vec_len(old_nodes_clone); j++) + clib_mem_free (old_nodes_clone[j]); + vec_free (old_nodes_clone); + + vec_free (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]); + + nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL] = + vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]); + + /* clone input node runtime */ + old_rt = nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]; + + nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT] = + vec_dup (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]); + + vec_foreach(rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) + { + rt->cpu_index = vm_clone->cpu_index; + } + + for (j=0; j < vec_len(old_rt); j++) + { + rt = vlib_node_get_runtime (vm_clone, old_rt[j].node_index); + rt->state = old_rt[j].state; + } + + vec_free(old_rt); + + nm_clone->processes = vec_dup (nm->processes); + + clib_mem_set_heap (oldheap); + + // vnet_main_fork_fixup (i); + } +} + +static clib_error_t * +cpu_config (vlib_main_t * vm, unformat_input_t * input) +{ + vlib_thread_registration_t *tr; + uword * p; + vlib_thread_main_t * tm = &vlib_thread_main; + u8 * name; + u64 coremask; + uword * bitmap; + u32 count; + + tm->thread_registrations_by_name = hash_create_string (0, sizeof (uword)); + tm->n_thread_stacks = 1; /* account for main thread */ + + tr = tm->next; + + while (tr) + { + hash_set_mem (tm->thread_registrations_by_name, tr->name, (uword)tr); + tr = tr->next; + } + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "main-thread-io")) + tm->main_thread_is_io_node = 1; + else if (unformat (input, "use-pthreads")) + tm->use_pthreads = 1; + else if (unformat (input, "thread-prefix %v", &tm->thread_prefix)) + ; + else if (unformat (input, "main-core %u", &tm->main_lcore)) + ; + else if (unformat (input, "skip-cores %u", &tm->skip_cores)) + ; + else if (unformat (input, "coremask-%s %llx", &name, &coremask)) + { + p = hash_get_mem (tm->thread_registrations_by_name, name); + if (p == 0) + return clib_error_return (0, "no such thread type '%s'", name); + + tr = (vlib_thread_registration_t *)p[0]; + + if (tr->use_pthreads) + return clib_error_return (0, "coremask cannot be set for '%s' threads", + name); + + tr->coremask = clib_bitmap_set_multiple + (tr->coremask, 0, coremask, BITS(coremask)); + tr->count = clib_bitmap_count_set_bits (tr->coremask); + } + else if (unformat (input, "corelist-%s %U", &name, unformat_bitmap_list, + &bitmap)) + { + p = hash_get_mem (tm->thread_registrations_by_name, name); + if (p == 0) + return clib_error_return (0, "no such thread type '%s'", name); + + tr = (vlib_thread_registration_t *)p[0]; + + if (tr->use_pthreads) + return clib_error_return (0, "corelist cannot be set for '%s' threads", + name); + + tr->coremask = bitmap; + tr->count = clib_bitmap_count_set_bits (tr->coremask); + } + else if (unformat (input, "%s %u", &name, &count)) + { + p = hash_get_mem (tm->thread_registrations_by_name, name); + if (p == 0) + return clib_error_return (0, "no such thread type '%s'", name); + + tr = (vlib_thread_registration_t *)p[0]; + if (tr->fixed_count) + return clib_error_return + (0, "number of %s threads not configurable", tr->name); + tr->count = count; + } + else + break; + } + + tr = tm->next; + + if (!tm->thread_prefix) + tm->thread_prefix = format(0, "vpp"); + + while (tr) + { + tm->n_thread_stacks += tr->count; + tm->n_pthreads += tr->count * tr->use_pthreads; + tm->n_eal_threads += tr->count * (tr->use_pthreads == 0); + tr = tr->next; + } + + return 0; +} + +VLIB_EARLY_CONFIG_FUNCTION (cpu_config, "cpu"); + +#if !defined (__x86_64__) +void __sync_fetch_and_add_8 (void) +{ + fformat(stderr, "%s called\n", __FUNCTION__); + abort(); +} +void __sync_add_and_fetch_8 (void) +{ + fformat(stderr, "%s called\n", __FUNCTION__); + abort(); +} +#endif + +void vnet_main_fixup (vlib_fork_fixup_t which) __attribute__ ((weak)); +void vnet_main_fixup (vlib_fork_fixup_t which) { } + +void vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which) +{ + vlib_main_t * vm = vlib_get_main(); + + if (vlib_mains == 0) + return; + + ASSERT(os_get_cpu_number() == 0); + vlib_worker_thread_barrier_sync(vm); + + switch (which) + { + case VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX: + vnet_main_fixup (VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX); + break; + + default: + ASSERT(0); + } + vlib_worker_thread_barrier_release(vm); +} + +void vlib_worker_thread_barrier_sync(vlib_main_t *vm) +{ + f64 deadline; + u32 count; + + if (!vlib_mains) + return; + + count = vec_len (vlib_mains) - 1; + + /* Tolerate recursive calls */ + if (++vlib_worker_threads[0].recursion_level > 1) + return; + + ASSERT (os_get_cpu_number() == 0); + + deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT; + + *vlib_worker_threads->wait_at_barrier = 1; + while (*vlib_worker_threads->workers_at_barrier != count) + { + if (vlib_time_now(vm) > deadline) + { + fformat(stderr, "%s: worker thread deadlock\n", __FUNCTION__); + os_panic(); + } + } +} + +void vlib_worker_thread_barrier_release(vlib_main_t * vm) +{ + f64 deadline; + + if (!vlib_mains) + return; + + if (--vlib_worker_threads[0].recursion_level > 0) + return; + + deadline = vlib_time_now (vm) + BARRIER_SYNC_TIMEOUT; + + *vlib_worker_threads->wait_at_barrier = 0; + + while (*vlib_worker_threads->workers_at_barrier > 0) + { + if (vlib_time_now(vm) > deadline) + { + fformat(stderr, "%s: worker thread deadlock\n", __FUNCTION__); + os_panic(); + } + } +} + +static clib_error_t * +show_threads_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_worker_thread_t * w; + int i; + + vlib_cli_output (vm, "%-7s%-20s%-12s%-8s%-7s%-7s%-7s%-10s", + "ID", "Name", "Type", "LWP", + "lcore", "Core", "Socket", "State"); + + for (i = 0; i < vec_len(vlib_worker_threads); i++) + { + w = vlib_worker_threads + i; + u8 * line = NULL; + + line = format(line, "%-7d%-20s%-12s%-8d", + i, + w->name ? w->name : (u8 *) "", + w->registration ? w->registration->name : "", + w->lwp); + + int lcore = w->dpdk_lcore_id; + if (lcore > -1) + { + line = format(line, "%-7u%-7u%-7u", + lcore, + lcore_config[lcore].core_id, + lcore_config[lcore].socket_id); + + switch(lcore_config[lcore].state) + { + case WAIT: + line = format(line, "wait"); + break; + case RUNNING: + line = format(line, "running"); + break; + case FINISHED: + line = format(line, "finished"); + break; + default: + line = format(line, "unknown"); + } + } + + vlib_cli_output(vm, "%v", line); + vec_free(line); + } + + return 0; +} + + +VLIB_CLI_COMMAND (show_threads_command, static) = { + .path = "show threads", + .short_help = "Show threads", + .function = show_threads_fn, +}; diff --git a/vlib/vlib/threads.h b/vlib/vlib/threads.h new file mode 100644 index 00000000000..9ce42a1367d --- /dev/null +++ b/vlib/vlib/threads.h @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vlib_threads_h +#define included_vlib_threads_h + +#include <vlib/main.h> + +vlib_main_t **vlib_mains; + +static inline uword +vlib_get_cpu_number_inline (void) +{ + void * sp; + uword n; + u32 len; + + /* Get any old stack address. */ + sp = &sp; + + n = ((uword)sp - (uword)vlib_thread_stacks[0]) >> 20; + + /* "processes" have their own stacks, and they always run in thread 0 */ + n = n >= len ? 0 : n; + + return n; +} + +void +vlib_set_thread_name (char *name); + +/* arg is actually a vlib__thread_t * */ +typedef void (vlib_thread_function_t) (void * arg); + +typedef struct vlib_thread_registration_ { + /* constructor generated list of thread registrations */ + struct vlib_thread_registration_ * next; + + /* config parameters */ + char * name; + char * short_name; + vlib_thread_function_t * function; + uword mheap_size; + int fixed_count; + u32 count; + int no_data_structure_clone; + /* All threads of this type run on pthreads */ + int use_pthreads; + u32 first_index; + uword * coremask; +} vlib_thread_registration_t; + +#define VLIB_MAX_CPUS 32 + +/* + * Objects passed around by "index" are cache-line aligned. + * We can stick the owner CPU into the low 6 bits. + */ +#if VLIB_MAX_CPUS > 64 +#error VLIB_MAX_CPUS must be <= 64 +#endif + +#define VLIB_CPU_MASK (VLIB_MAX_CPUS - 1) /* 0x3f, max */ +#define VLIB_OFFSET_MASK (~VLIB_CPU_MASK) + +#define VLIB_LOG2_THREAD_STACK_SIZE (20) +#define VLIB_THREAD_STACK_SIZE (1<<VLIB_LOG2_THREAD_STACK_SIZE) + +typedef enum { + VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME, +} vlib_frame_queue_msg_type_t; + +typedef struct { + volatile u32 valid; + u32 msg_type; + u32 n_vectors; + u32 last_n_vectors; + + /* 256 * 4 = 1024 bytes, even mult of cache line size */ + u32 buffer_index[VLIB_FRAME_SIZE]; + + /* Pad to a cache line boundary */ + u8 pad[CLIB_CACHE_LINE_BYTES - 4 * sizeof(u32)]; +} vlib_frame_queue_elt_t; + +typedef struct { + /* First cache line */ + volatile u32 *wait_at_barrier; + volatile u32 *workers_at_barrier; + u8 pad0[CLIB_CACHE_LINE_BYTES - (2 * sizeof (u32 *))]; + + /* Second Cache Line */ + void *thread_mheap; + u8 * thread_stack; + void (*thread_function)(void *); + void * thread_function_arg; + i64 recursion_level; + elog_track_t elog_track; + u32 instance_id; + vlib_thread_registration_t *registration; + u8 *name; + + long lwp; + int dpdk_lcore_id; +} vlib_worker_thread_t; + +vlib_worker_thread_t *vlib_worker_threads; + +typedef struct { + /* enqueue side */ + volatile u64 tail; + u64 enqueues; + u64 enqueue_ticks; + u64 enqueue_vectors; + u32 enqueue_full_events; + u32 enqueue_efd_discards; + u8 pad2[CLIB_CACHE_LINE_BYTES + - (2 * sizeof(u32)) + - (4 * sizeof(u64))]; + + /* dequeue side */ + volatile u64 head; + u64 dequeues; + u64 dequeue_ticks; + u64 dequeue_vectors; + u64 trace; + u64 vector_threshold; + u8 pad4[CLIB_CACHE_LINE_BYTES + - (6 * sizeof(u64))]; + + /* dequeue hint to enqueue side */ + volatile u64 head_hint; + u8 pad5 [CLIB_CACHE_LINE_BYTES - sizeof(u64)]; + + /* read-only, constant, shared */ + vlib_frame_queue_elt_t *elts; + u32 nelts; +} vlib_frame_queue_t; + +vlib_frame_queue_t **vlib_frame_queues; + +/* Called early, in thread 0's context */ +clib_error_t * vlib_thread_init (vlib_main_t * vm); + +vlib_worker_thread_t * vlib_alloc_thread (vlib_main_t * vm); + +int vlib_frame_queue_enqueue (vlib_main_t *vm, u32 node_runtime_index, + u32 frame_queue_index, vlib_frame_t *frame, + vlib_frame_queue_msg_type_t type); + +int vlib_frame_queue_dequeue (int thread_id, + vlib_main_t *vm, + vlib_node_main_t *nm); + +u64 dispatch_node (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_node_type_t type, + vlib_node_state_t dispatch_state, + vlib_frame_t * frame, + u64 last_time_stamp); + +u64 dispatch_pending_node (vlib_main_t * vm, + vlib_pending_frame_t * p, + u64 last_time_stamp); + +void vlib_worker_thread_node_runtime_update(void); + +void vlib_create_worker_threads (vlib_main_t *vm, int n, + void (*thread_function)(void *)); + +void vlib_worker_thread_init (vlib_worker_thread_t * w); + +/* Check for a barrier sync request every 30ms */ +#define BARRIER_SYNC_DELAY (0.030000) + +#if CLIB_DEBUG > 0 +/* long barrier timeout, for gdb... */ +#define BARRIER_SYNC_TIMEOUT (600.1) +#else +#define BARRIER_SYNC_TIMEOUT (1.0) +#endif + +void vlib_worker_thread_barrier_sync(vlib_main_t *vm); +void vlib_worker_thread_barrier_release(vlib_main_t *vm); + +always_inline void vlib_smp_unsafe_warning (void) +{ + if (CLIB_DEBUG > 0) + { + if (os_get_cpu_number()) + fformat(stderr, "%s: SMP unsafe warning...\n", __FUNCTION__); + } +} + +typedef enum { + VLIB_WORKER_THREAD_FORK_FIXUP_ILLEGAL = 0, + VLIB_WORKER_THREAD_FORK_FIXUP_NEW_SW_IF_INDEX, +} vlib_fork_fixup_t; + +void vlib_worker_thread_fork_fixup (vlib_fork_fixup_t which); + +static inline void vlib_worker_thread_barrier_check (void) +{ + if (PREDICT_FALSE(*vlib_worker_threads->wait_at_barrier)) + { + clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, 1); + while (*vlib_worker_threads->wait_at_barrier) + ; + clib_smp_atomic_add (vlib_worker_threads->workers_at_barrier, -1); + } +} + +#define foreach_vlib_main(body) \ +do { \ + vlib_main_t ** __vlib_mains = 0, *this_vlib_main; \ + int ii; \ + \ + if (vec_len (vlib_mains) == 0) \ + vec_add1 (__vlib_mains, &vlib_global_main); \ + else \ + { \ + for (ii = 0; ii < vec_len (vlib_mains); ii++) \ + { \ + this_vlib_main = vlib_mains[ii]; \ + if (this_vlib_main) \ + vec_add1 (__vlib_mains, this_vlib_main); \ + } \ + } \ + \ + for (ii = 0; ii < vec_len (__vlib_mains); ii++) \ + { \ + this_vlib_main = __vlib_mains[ii]; \ + /* body uses this_vlib_main... */ \ + (body); \ + } \ + vec_free (__vlib_mains); \ +} while (0); + + +/* Early-Fast-Discard (EFD) */ +#define VLIB_EFD_DISABLED 0 +#define VLIB_EFD_DISCARD_ENABLED (1 << 0) +#define VLIB_EFD_MONITOR_ENABLED (1 << 1) + +#define VLIB_EFD_DEF_WORKER_HI_THRESH_PCT 90 + +/* EFD worker thread settings */ +typedef struct vlib_efd_t { + u16 enabled; + u16 queue_hi_thresh; + u8 ip_prec_bitmap; + u8 mpls_exp_bitmap; + u8 vlan_cos_bitmap; + u8 pad; +} vlib_efd_t; + +typedef struct { + /* Link list of registrations, built by constructors */ + vlib_thread_registration_t * next; + + /* Vector of registrations, w/ non-data-structure clones at the top */ + vlib_thread_registration_t ** registrations; + + uword * thread_registrations_by_name; + + vlib_worker_thread_t * worker_threads; + + /* thread / cpu / io thread parameters */ + u32 main_thread_is_io_node; + + /* + * Launch all threads as pthreads, + * not eal_rte_launch (strict affinity) threads + */ + int use_pthreads; + + /* Number of vlib_main / vnet_main clones */ + u32 n_vlib_mains; + + /* Number of thread stacks to create */ + u32 n_thread_stacks; + + /* Number of pthreads */ + u32 n_pthreads; + + /* Number of DPDK eal threads */ + u32 n_eal_threads; + + /* Number of cores to skip, must match the core mask */ + u32 skip_cores; + + /* Thread prefix name */ + u8 *thread_prefix; + + /* main thread lcore */ + u8 main_lcore; + + /* Bitmap of available CPU cores */ + uword * cpu_core_bitmap; + + /* Bitmap of available CPU sockets (NUMA nodes) */ + uword * cpu_socket_bitmap; + + vlib_efd_t efd; + +} vlib_thread_main_t; + +vlib_thread_main_t vlib_thread_main; + +#define VLIB_REGISTER_THREAD(x,...) \ + __VA_ARGS__ vlib_thread_registration_t x; \ +static void __vlib_add_thread_registration_##x (void) \ + __attribute__((__constructor__)) ; \ +static void __vlib_add_thread_registration_##x (void) \ +{ \ + vlib_thread_main_t * tm = &vlib_thread_main; \ + x.next = tm->next; \ + tm->next = &x; \ +} \ +__VA_ARGS__ vlib_thread_registration_t x + +#endif /* included_vlib_threads_h */ diff --git a/vlib/vlib/trace.c b/vlib/vlib/trace.c new file mode 100644 index 00000000000..6272d853145 --- /dev/null +++ b/vlib/vlib/trace.c @@ -0,0 +1,294 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * trace.c: VLIB trace buffer. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vlib/threads.h> + +/* Helper function for nodes which only trace buffer data. */ +void +vlib_trace_frame_buffers_only (vlib_main_t * vm, + vlib_node_runtime_t * node, + u32 * buffers, + uword n_buffers, + uword next_buffer_stride, + uword n_buffer_data_bytes_in_trace) +{ + u32 n_left, * from; + + n_left = n_buffers; + from = buffers; + + while (n_left >= 4) + { + u32 bi0, bi1; + vlib_buffer_t * b0, * b1; + u8 * t0, * t1; + + /* Prefetch next iteration. */ + vlib_prefetch_buffer_with_index (vm, from[2], LOAD); + vlib_prefetch_buffer_with_index (vm, from[3], LOAD); + + bi0 = from[0]; + bi1 = from[1]; + + b0 = vlib_get_buffer (vm, bi0); + b1 = vlib_get_buffer (vm, bi1); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, n_buffer_data_bytes_in_trace); + memcpy (t0, b0->data + b0->current_data, + n_buffer_data_bytes_in_trace); + } + if (b1->flags & VLIB_BUFFER_IS_TRACED) + { + t1 = vlib_add_trace (vm, node, b1, n_buffer_data_bytes_in_trace); + memcpy (t1, b1->data + b1->current_data, + n_buffer_data_bytes_in_trace); + } + from += 2; + n_left -= 2; + } + + while (n_left >= 1) + { + u32 bi0; + vlib_buffer_t * b0; + u8 * t0; + + bi0 = from[0]; + + b0 = vlib_get_buffer (vm, bi0); + + if (b0->flags & VLIB_BUFFER_IS_TRACED) + { + t0 = vlib_add_trace (vm, node, b0, n_buffer_data_bytes_in_trace); + memcpy (t0, b0->data + b0->current_data, + n_buffer_data_bytes_in_trace); + } + from += 1; + n_left -= 1; + } +} + +/* Free up all trace buffer memory. */ +always_inline void +clear_trace_buffer (vlib_trace_main_t * tm) +{ + int i; + + foreach_vlib_main ( + ({ + void *mainheap; + + tm = &this_vlib_main->trace_main; + mainheap = clib_mem_set_heap (this_vlib_main->heap_base); + + for (i = 0; i < vec_len (tm->trace_buffer_pool); i++) + if (! pool_is_free_index (tm->trace_buffer_pool, i)) + vec_free (tm->trace_buffer_pool[i]); + pool_free (tm->trace_buffer_pool); + clib_mem_set_heap (mainheap); + })); +} + +static u8 * format_vlib_trace (u8 * s, va_list * va) +{ + vlib_main_t * vm = va_arg (*va, vlib_main_t *); + vlib_trace_header_t * h = va_arg (*va, vlib_trace_header_t *); + vlib_trace_header_t * e = vec_end (h); + vlib_node_t * node, * prev_node; + clib_time_t * ct = &vm->clib_time; + f64 t; + + prev_node = 0; + while (h < e) + { + node = vlib_get_node (vm, h->node_index); + + if (node != prev_node) + { + t = (h->time - vm->cpu_time_main_loop_start) * ct->seconds_per_clock; + s = format (s, "\n%U: %v", + format_time_interval, "h:m:s:u", t, + node->name); + } + prev_node = node; + + if (node->format_trace) + s = format (s, "\n %U", + node->format_trace, vm, node, h->data); + else + s = format (s, "\n %U", + node->format_buffer, h->data); + + h = vlib_trace_header_next (h); + } + + return s; +} + +/* Root of all trace cli commands. */ +VLIB_CLI_COMMAND (trace_cli_command,static) = { + .path = "trace", + .short_help = "Packet tracer commands", +}; + +static clib_error_t * +cli_show_trace_buffer (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_trace_main_t * tm = &vm->trace_main; + vlib_trace_header_t ** h, ** traces; + u32 i, index = 0; + char * fmt; + + /* Get active traces from pool. */ + + foreach_vlib_main ( + ({ + void *mainheap; + + fmt = "------------------- Start of thread %d %v -------------------"; + vlib_cli_output (vm, fmt, index, vlib_worker_threads[index].name); + + tm = &this_vlib_main->trace_main; + + mainheap = clib_mem_set_heap (this_vlib_main->heap_base); + traces = 0; + pool_foreach (h, tm->trace_buffer_pool, + ({ + vec_add1 (traces, h[0]); + })); + + if (vec_len (traces) == 0) + { + clib_mem_set_heap (mainheap); + vlib_cli_output (vm, "No packets in trace buffer"); + goto done; + } + + /* Sort them by increasing time. */ + vec_sort (traces, t0, t1, ({ + i64 dt = t0[0]->time - t1[0]->time; + dt < 0 ? -1 : (dt > 0 ? +1 : 0); + })); + + for (i = 0; i < vec_len (traces); i++) + { + clib_mem_set_heap (mainheap); + + vlib_cli_output (vm, "Packet %d\n%U\n\n", i + 1, + format_vlib_trace, vm, traces[i]); + + mainheap = clib_mem_set_heap (this_vlib_main->heap_base); + } + + done: + vec_free (traces); + clib_mem_set_heap (mainheap); + + index++; + })); + + return 0; +} + +VLIB_CLI_COMMAND (show_trace_cli,static) = { + .path = "show trace", + .short_help = "Show trace buffer", + .function = cli_show_trace_buffer, +}; + +static clib_error_t * +cli_add_trace_buffer (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_trace_main_t * tm; + vlib_trace_node_t * tn; + u32 node_index, add; + + if (unformat (input, "%U %d", unformat_vlib_node, vm, &node_index, &add)) + ; + else + return clib_error_create ("expected NODE COUNT, got `%U'", + format_unformat_error, input); + + foreach_vlib_main ( + ({ + void *oldheap; + tm = &this_vlib_main->trace_main; + + oldheap = clib_mem_set_heap (this_vlib_main->heap_base); + + vec_validate (tm->nodes, node_index); + tn = tm->nodes + node_index; + tn->limit += add; + clib_mem_set_heap (oldheap); + })); + + return 0; +} + +VLIB_CLI_COMMAND (add_trace_cli,static) = { + .path = "trace add", + .short_help = "Trace given number of packets", + .function = cli_add_trace_buffer, +}; + +static clib_error_t * +cli_clear_trace_buffer (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + vlib_trace_main_t * tm = &vm->trace_main; + clear_trace_buffer (tm); + return 0; +} + +VLIB_CLI_COMMAND (clear_trace_cli,static) = { + .path = "clear trace", + .short_help = "Clear trace buffer and free memory", + .function = cli_clear_trace_buffer, +}; + +/* Dummy function to get us linked in. */ +void vlib_trace_cli_reference (void) {} diff --git a/vlib/vlib/trace.h b/vlib/vlib/trace.h new file mode 100644 index 00000000000..228a22abb95 --- /dev/null +++ b/vlib/vlib/trace.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * trace.h: VLIB trace buffer. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_trace_h +#define included_vlib_trace_h + +#include <vppinfra/pool.h> + +typedef struct { + /* CPU time stamp trace was made. */ + u64 time; + + /* Node which generated this trace. */ + u32 node_index; + + /* Number of data words in this trace. */ + u32 n_data; + + /* Trace data follows. */ + u8 data[0]; +} vlib_trace_header_t; + +typedef struct { + /* Current number of traces in buffer. */ + u32 count; + + /* Max. number of traces to be added to buffer. */ + u32 limit; +} vlib_trace_node_t; + +typedef struct { + /* Pool of trace buffers. */ + vlib_trace_header_t ** trace_buffer_pool; + + /* Per node trace counts. */ + vlib_trace_node_t * nodes; +} vlib_trace_main_t; + +#endif /* included_vlib_trace_h */ diff --git a/vlib/vlib/trace_funcs.h b/vlib/vlib/trace_funcs.h new file mode 100644 index 00000000000..3dc7471e152 --- /dev/null +++ b/vlib/vlib/trace_funcs.h @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * trace_funcs.h: VLIB trace buffer. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_trace_funcs_h +#define included_vlib_trace_funcs_h + +always_inline void +vlib_validate_trace (vlib_trace_main_t * tm, vlib_buffer_t * b) +{ + /* + * this assert seems right, but goes off constantly. + * disabling it appears to make the pain go away + */ + ASSERT (1 || b->flags & VLIB_BUFFER_IS_TRACED); + ASSERT (! pool_is_free_index (tm->trace_buffer_pool, b->trace_index)); +} + +always_inline void * +vlib_add_trace (vlib_main_t * vm, + vlib_node_runtime_t * r, + vlib_buffer_t * b, + u32 n_data_bytes) +{ + vlib_trace_main_t * tm = &vm->trace_main; + vlib_trace_header_t * h; + u32 n_data_words; + + vlib_validate_trace (tm, b); + + n_data_bytes = round_pow2 (n_data_bytes, sizeof (h[0])); + n_data_words = n_data_bytes / sizeof (h[0]); + vec_add2_aligned (tm->trace_buffer_pool[b->trace_index], h, + 1 + n_data_words, + sizeof (h[0])); + + h->time = vm->cpu_time_last_node_dispatch; + h->n_data = n_data_words; + h->node_index = r->node_index; + + return h->data; +} + +always_inline vlib_trace_header_t * +vlib_trace_header_next (vlib_trace_header_t * h) +{ return h + 1 + h->n_data; } + +always_inline void +vlib_free_trace (vlib_main_t * vm, vlib_buffer_t * b) +{ + vlib_trace_main_t * tm = &vm->trace_main; + vlib_validate_trace (tm, b); + _vec_len (tm->trace_buffer_pool[b->trace_index]) = 0; + pool_put_index (tm->trace_buffer_pool, b->trace_index); +} + +always_inline void +vlib_trace_next_frame (vlib_main_t * vm, + vlib_node_runtime_t * r, + u32 next_index) +{ + vlib_next_frame_t * nf; + nf = vlib_node_runtime_get_next_frame (vm, r, next_index); + nf->flags |= VLIB_FRAME_TRACE; +} + +/* Mark buffer as traced and allocate trace buffer. */ +always_inline void +vlib_trace_buffer (vlib_main_t * vm, + vlib_node_runtime_t * r, + u32 next_index, + vlib_buffer_t * b, + int follow_chain) +{ + vlib_trace_main_t * tm = &vm->trace_main; + vlib_trace_header_t ** h; + + vlib_trace_next_frame (vm, r, next_index); + + pool_get (tm->trace_buffer_pool, h); + + do { + b->flags |= VLIB_BUFFER_IS_TRACED; + b->trace_index = h - tm->trace_buffer_pool; + } while (follow_chain && (b = vlib_get_next_buffer (vm, b))); +} + +always_inline void +vlib_buffer_copy_trace_flag (vlib_main_t * vm, vlib_buffer_t * b, u32 bi_target) +{ + vlib_buffer_t * b_target = vlib_get_buffer (vm, bi_target); + b_target->flags |= b->flags & VLIB_BUFFER_IS_TRACED; + b_target->trace_index = b->trace_index; +} + +always_inline u32 +vlib_get_trace_count (vlib_main_t * vm, vlib_node_runtime_t * rt) +{ + vlib_trace_main_t * tm = &vm->trace_main; + vlib_trace_node_t * tn; + int n; + + if (rt->node_index >= vec_len (tm->nodes)) + return 0; + tn = tm->nodes + rt->node_index; + n = tn->limit - tn->count; + ASSERT (n >= 0); + + return n; +} + +always_inline void +vlib_set_trace_count (vlib_main_t * vm, vlib_node_runtime_t * rt, + u32 count) +{ + vlib_trace_main_t * tm = &vm->trace_main; + vlib_trace_node_t * tn = vec_elt_at_index (tm->nodes, rt->node_index); + + ASSERT (count <= tn->limit); + tn->count = tn->limit - count; +} + +/* Helper function for nodes which only trace buffer data. */ +void +vlib_trace_frame_buffers_only (vlib_main_t * vm, + vlib_node_runtime_t * node, + u32 * buffers, + uword n_buffers, + uword next_buffer_stride, + uword n_buffer_data_bytes_in_trace); + +#endif /* included_vlib_trace_funcs_h */ diff --git a/vlib/vlib/unix/cj.c b/vlib/vlib/unix/cj.c new file mode 100644 index 00000000000..665a13fa4f5 --- /dev/null +++ b/vlib/vlib/unix/cj.c @@ -0,0 +1,218 @@ +/* + *------------------------------------------------------------------ + * cj.c + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <stdio.h> +#include <vlib/vlib.h> + +#include <vlib/unix/cj.h> + +cj_main_t cj_main; + +void +cj_log (u32 type, void * data0, void * data1) +{ + u64 new_tail; + cj_main_t * cjm = &cj_main; + cj_record_t * r; + + if (cjm->enable == 0) + return; + + new_tail = __sync_add_and_fetch (&cjm->tail, 1); + + r = (cj_record_t *) &(cjm->records[new_tail & (cjm->num_records - 1)]); + r->time = vlib_time_now (cjm->vlib_main); + r->cpu = os_get_cpu_number(); + r->type = type; + r->data[0] = (u64) data0; + r->data[1] = (u64) data1; +} + +void cj_stop(void) +{ + cj_main_t * cjm = &cj_main; + + cjm->enable = 0; +} + + +clib_error_t * cj_init (vlib_main_t * vm) +{ + cj_main_t * cjm = &cj_main; + + cjm->vlib_main = vm; + return 0; +} +VLIB_INIT_FUNCTION (cj_init); + +static clib_error_t * +cj_config (vlib_main_t * vm, unformat_input_t * input) +{ + cj_main_t * cjm = &cj_main; + int matched = 0; + int enable = 0; + + while (unformat_check_input(input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "records %d", &cjm->num_records)) + matched = 1; + else if (unformat (input, "on")) + enable = 1; + else + return clib_error_return (0, "cj_config: unknown input '%U'", + format_unformat_error, input); + } + + if (matched == 0) + return 0; + + cjm->num_records = max_pow2 (cjm->num_records); + vec_validate (cjm->records, cjm->num_records-1); + memset (cjm->records, 0xff, cjm->num_records * sizeof (cj_record_t)); + cjm->tail = ~0; + cjm->enable = enable; + + return 0; +} + +VLIB_CONFIG_FUNCTION (cj_config, "cj"); + +void cj_enable_disable (int is_enable) +{ + cj_main_t * cjm = &cj_main; + + if (cjm->num_records) + cjm->enable = is_enable; + else + vlib_cli_output (cjm->vlib_main, "CJ not configured..."); +} + +static inline void cj_dump_one_record (cj_record_t * r) +{ + fprintf (stderr, "[%d]: %10.6f T%02d %llx %llx\n", + r->cpu, r->time, r->type, (long long unsigned int) r->data[0], + (long long unsigned int) r->data[1]); +} + +static void cj_dump_internal (u8 filter0_enable, u64 filter0, + u8 filter1_enable, u64 filter1) +{ + cj_main_t * cjm = &cj_main; + cj_record_t * r; + u32 i, index; + + if (cjm->num_records == 0) + { + fprintf (stderr, "CJ not configured...\n"); + return; + } + + if (cjm->tail == (u64)~0) + { + fprintf (stderr, "No data collected...\n"); + return; + } + + /* Has the trace wrapped? */ + index = (cjm->tail+1) & (cjm->num_records - 1); + r = &(cjm->records[index]); + + if (r->cpu != (u32)~0) + { + /* Yes, dump from tail + 1 to the end */ + for (i = index; i < cjm->num_records; i++) + { + if (filter0_enable && (r->data[0] != filter0)) + goto skip; + if (filter1_enable && (r->data[1] != filter1)) + goto skip; + cj_dump_one_record (r); + skip: + r++; + } + } + /* dump from the beginning through the final tail */ + r = cjm->records; + for (i = 0; i <= cjm->tail; i++) + { + if (filter0_enable && (r->data[0] != filter0)) + goto skip2; + if (filter1_enable && (r->data[1] != filter1)) + goto skip2; + cj_dump_one_record (r); + skip2: + r++; + } +} + +void cj_dump (void) +{ + cj_dump_internal (0, 0, 0, 0); +} + +void cj_dump_filter_data0 (u64 filter0) +{ + cj_dump_internal (1/* enable f0 */, filter0, 0, 0); +} + +void cj_dump_filter_data1 (u64 filter1) +{ + cj_dump_internal (0, 0, 1 /* enable f1 */, filter1); +} + +void cj_dump_filter_data12 (u64 filter0, u64 filter1) +{ + cj_dump_internal (1, filter0, 1, filter1); +} + +static clib_error_t * +cj_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + int is_enable = -1; + int is_dump = -1; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { + if (unformat (input, "enable") || unformat (input, "on")) + is_enable = 1; + else if (unformat (input, "disable") || unformat (input, "off")) + is_enable = 0; + else if (unformat (input, "dump")) + is_dump = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (is_enable >= 0) + cj_enable_disable (is_enable); + + if (is_dump > 0) + cj_dump (); + + return 0; +} + +VLIB_CLI_COMMAND (cj_command,static) = { + .path = "cj", + .short_help = "cj", + .function = cj_command_fn, +}; + diff --git a/vlib/vlib/unix/cj.h b/vlib/vlib/unix/cj.h new file mode 100644 index 00000000000..3c37f2bf22f --- /dev/null +++ b/vlib/vlib/unix/cj.h @@ -0,0 +1,68 @@ +/* + *------------------------------------------------------------------ + * cj.h + * + * Copyright (c) 2013 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#ifndef __included_cj_h__ +#define __included_cj_h__ + +typedef struct { + f64 time; + u32 cpu; + u32 type; + u64 data[2]; +} cj_record_t; + +typedef struct { + volatile u64 tail; + cj_record_t * records; + u32 num_records; + volatile u32 enable; + + vlib_main_t * vlib_main; +} cj_main_t; + +void cj_log (u32 type, void * data0, void * data1); + +/* + * Supply in application main, so we can log from any library... + * Declare a weak reference in the library, off you go. + */ + +#define DECLARE_CJ_GLOBAL_LOG \ +void cj_global_log (unsigned type, void * data0, void * data1) \ + __attribute__ ((weak)); \ + \ +unsigned __cj_type; \ +void * __cj_data0; \ +void * __cj_data1; \ + \ +void \ +cj_global_log (unsigned type, void * data0, void * data1) \ +{ \ + __cj_type = type; \ + __cj_data0 = data0; \ + __cj_data1 = data1; \ +} + +#define CJ_GLOBAL_LOG_PROTOTYPE +void cj_global_log (unsigned type, void * data0, void * data1) \ + __attribute__ ((weak)); \ + +void cj_stop(void); + +#endif /* __included_cj_h__ */ diff --git a/vlib/vlib/unix/cli.c b/vlib/vlib/unix/cli.c new file mode 100644 index 00000000000..3cb13fc8550 --- /dev/null +++ b/vlib/vlib/unix/cli.c @@ -0,0 +1,900 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * cli.c: Unix stdin/socket CLI. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> + +#include <fcntl.h> +#include <sys/stat.h> +#include <termios.h> +#include <unistd.h> +#include <arpa/telnet.h> + +typedef struct { + u32 unix_file_index; + + /* Vector of output pending write to file descriptor. */ + u8 * output_vector; + + /* Vector of input saved by Unix input node to be processed by + CLI process. */ + u8 * input_vector; + + u8 has_history; + u8 ** command_history; + u8 * current_command; + i32 excursion; + u32 history_limit; + u8 * search_key; + int search_mode; + + u32 process_node_index; +} unix_cli_file_t; + +always_inline void +unix_cli_file_free (unix_cli_file_t * f) +{ + vec_free (f->output_vector); + vec_free (f->input_vector); +} + +typedef struct { + /* Prompt string for CLI. */ + u8 * cli_prompt; + + unix_cli_file_t * cli_file_pool; + + u32 * unused_cli_process_node_indices; + + /* File pool index of current input. */ + u32 current_input_file_index; +} unix_cli_main_t; + +static unix_cli_main_t unix_cli_main; + +static void +unix_cli_add_pending_output (unix_file_t * uf, + unix_cli_file_t * cf, + u8 * buffer, + uword buffer_bytes) +{ + unix_main_t * um = &unix_main; + + vec_add (cf->output_vector, buffer, buffer_bytes); + if (vec_len (cf->output_vector) > 0) + { + int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); + uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; + if (! skip_update) + um->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + } +} + +static void +unix_cli_del_pending_output (unix_file_t * uf, + unix_cli_file_t * cf, + uword n_bytes) +{ + unix_main_t * um = &unix_main; + + vec_delete (cf->output_vector, n_bytes, 0); + if (vec_len (cf->output_vector) <= 0) + { + int skip_update = 0 == (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); + uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE; + if (! skip_update) + um->file_update (uf, UNIX_FILE_UPDATE_MODIFY); + } +} + +/* VLIB cli output function. */ +static void unix_vlib_cli_output (uword cli_file_index, + u8 * buffer, + uword buffer_bytes) +{ + unix_main_t * um = &unix_main; + unix_cli_main_t * cm = &unix_cli_main; + unix_cli_file_t * cf; + unix_file_t * uf; + int n; + + cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); + uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + n = 0; + if (vec_len (cf->output_vector) == 0) + n = write (uf->file_descriptor, buffer, buffer_bytes); + if (n < 0 && errno != EAGAIN) + clib_unix_warning ("write"); + + else if ((word) n < (word) buffer_bytes) + { + if (n < 0) n = 0; + unix_cli_add_pending_output (uf, cf, buffer + n, buffer_bytes - n); + } +} + +static int unix_cli_line_edit (unix_main_t * um, unix_cli_file_t * cf) +{ + unix_file_t * uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + u8 * prev; + int i, j, delta; + + for (i = 0; i < vec_len (cf->input_vector); i++) + { + switch (cf->input_vector[i]) + { + case 0: + continue; + + case '?': + /* Erase the current command (if any) plus ?*/ + for (j = 0; j < (vec_len (cf->current_command)+1); j++) + unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3); + + unix_cli_add_pending_output (uf, cf, (u8 *) "\r\nHistory:\r\n", 12); + + for (j = 0; j < vec_len (cf->command_history); j++) + { + unix_cli_add_pending_output (uf, cf, cf->command_history[j], + vec_len(cf->command_history[j])); + unix_cli_add_pending_output (uf, cf, (u8 *) "\r\n", 2); + } + goto crlf; + + /* ^R - reverse search */ + case 'R' - '@': + case 'S' - '@': + if (cf->search_mode == 0) + { + /* Erase the current command (if any) plus ^R */ + for (j = 0; j < (vec_len (cf->current_command)+2); j++) + unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3); + + vec_reset_length (cf->search_key); + vec_reset_length (cf->current_command); + if (cf->input_vector[i] == 'R' - '@') + cf->search_mode = -1; + else + cf->search_mode = 1; + } + else + { + if (cf->input_vector[i] == 'R' - '@') + cf->search_mode = -1; + else + cf->search_mode = 1; + + cf->excursion += cf->search_mode; + unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3); + goto search_again; + } + break; + + /* ^U - line-kill */ + case 'U'-'@': + /* Erase the command, plus ^U */ + for (j = 0; j < (vec_len (cf->current_command)+2); j++) + unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3); + vec_reset_length (cf->current_command); + cf->search_mode = 0; + continue; + + /* ^P - previous, ^N - next */ + case 'P' - '@': + case 'N' - '@': + cf->search_mode = 0; + /* Erase the command, plus ^P */ + for (j = 0; j < (vec_len (cf->current_command)+2); j++) + unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3); + vec_reset_length (cf->current_command); + if (vec_len (cf->command_history)) + { + if (cf->input_vector[i] == 'P' - '@') + delta = -1; + else + delta = 1; + + cf->excursion += delta; + + if (cf->excursion > (i32) vec_len (cf->command_history) -1) + cf->excursion = 0; + else if (cf->excursion < 0) + cf->excursion = vec_len (cf->command_history) -1; + + prev = cf->command_history [cf->excursion]; + vec_validate (cf->current_command, vec_len(prev)-1); + + memcpy (cf->current_command, prev, vec_len(prev)); + _vec_len (cf->current_command) = vec_len(prev); + unix_cli_add_pending_output (uf, cf, cf->current_command, + vec_len (cf->current_command)); + break; + } + break; + + case 0x7f: + case 'H' - '@': + for (j = 0; j < 2; j++) + unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3); + if (vec_len (cf->current_command)) + { + unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3); + _vec_len (cf->current_command)--; + } + cf->search_mode = 0; + cf->excursion = 0; + cf->search_mode = 0; + vec_reset_length (cf->search_key); + break; + + case '\r': + case '\n': + crlf: + vec_add1 (cf->current_command, '\r'); + vec_add1 (cf->current_command, '\n'); + unix_cli_add_pending_output (uf, cf, (u8 *) "\b\b \b\b\r\n", 8); + + vec_validate (cf->input_vector, vec_len(cf->current_command)-1); + memcpy (cf->input_vector, cf->current_command, + vec_len(cf->current_command)); + _vec_len(cf->input_vector) = _vec_len (cf->current_command); + + if (vec_len(cf->command_history) >= cf->history_limit) + { + vec_free (cf->command_history[0]); + vec_delete (cf->command_history, 1, 0); + } + /* Don't add blank lines to the cmd history */ + if (vec_len (cf->current_command) > 2) + { + _vec_len (cf->current_command) -= 2; + vec_add1 (cf->command_history, cf->current_command); + cf->current_command = 0; + } + else + vec_reset_length (cf->current_command); + cf->excursion = 0; + cf->search_mode = 0; + vec_reset_length (cf->search_key); + return 0; + + /* telnet "mode character" blort, echo but don't process. */ + case 0xff: + unix_cli_add_pending_output (uf, cf, cf->input_vector + i, + 6); + i += 6; + continue; + + default: + if (cf->search_mode) + { + int j, k, limit, offset; + u8 * item; + + vec_add1 (cf->search_key, cf->input_vector[i]); + + search_again: + for (j = 0; j < vec_len(cf->command_history); j++) + { + if (cf->excursion > (i32) vec_len (cf->command_history) -1) + cf->excursion = 0; + else if (cf->excursion < 0) + cf->excursion = vec_len (cf->command_history) -1; + + item = cf->command_history[cf->excursion]; + + limit = (vec_len(cf->search_key) > vec_len (item)) ? + vec_len(item) : vec_len (cf->search_key); + + for (offset = 0; offset <= vec_len(item) - limit; offset++) + { + for (k = 0; k < limit; k++) + { + if (item[k+offset] != cf->search_key[k]) + goto next_offset; + } + goto found_at_offset; + + next_offset: + ; + } + goto next; + + found_at_offset: + for (j = 0; j < vec_len (cf->current_command)+1; j++) + unix_cli_add_pending_output (uf, cf, (u8 *) "\b \b", 3); + + vec_validate (cf->current_command, vec_len(item)-1); + + memcpy (cf->current_command, item, vec_len(item)); + _vec_len (cf->current_command) = vec_len(item); + unix_cli_add_pending_output (uf, cf, cf->current_command, + vec_len (cf->current_command)); + goto found; + + next: + cf->excursion += cf->search_mode; + } + + unix_cli_add_pending_output (uf, cf, (u8 *)"\r\nno match..", 12); + vec_reset_length (cf->search_key); + vec_reset_length (cf->current_command); + cf->search_mode = 0; + goto crlf; + } + else + vec_add1 (cf->current_command, cf->input_vector[i]); + + found: + + break; + } + } + vec_reset_length(cf->input_vector); + return 1; +} + +static void unix_cli_process_input (unix_cli_main_t * cm, uword cli_file_index) +{ + unix_main_t * um = &unix_main; + unix_file_t * uf; + unix_cli_file_t * cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); + unformat_input_t input; + int vlib_parse_eval (u8 *); + + /* Try vlibplex first. Someday... */ + if (0 && vlib_parse_eval (cf->input_vector) == 0) + goto done; + + /* Line edit, echo, etc. */ + if (cf->has_history && unix_cli_line_edit (um, cf)) + return; + + if (um->log_fd) + { + static u8 * lv; + vec_reset_length (lv); + lv = format (lv, "%U[%d]: %v", + format_timeval, + 0 /* current bat-time */, + 0 /* current bat-format */, + cli_file_index, + cf->input_vector); + { + int rv __attribute__((unused)) = + write (um->log_fd, lv, vec_len(lv)); + } + } + + unformat_init_vector (&input, cf->input_vector); + + /* Remove leading white space from input. */ + (void) unformat (&input, ""); + + cm->current_input_file_index = cli_file_index; + + if (unformat_check_input (&input) != UNFORMAT_END_OF_INPUT) + vlib_cli_input (um->vlib_main, &input, unix_vlib_cli_output, cli_file_index); + + /* Re-fetch pointer since pool may have moved. */ + cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); + + /* Zero buffer since otherwise unformat_free will call vec_free on it. */ + input.buffer = 0; + + unformat_free (&input); + + /* Re-use input vector. */ +done: + _vec_len (cf->input_vector) = 0; + + /* Prompt. */ + uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + unix_cli_add_pending_output (uf, cf, + cm->cli_prompt, + vec_len (cm->cli_prompt)); +} + +static void unix_cli_kill (unix_cli_main_t * cm, uword cli_file_index) +{ + unix_main_t * um = &unix_main; + unix_cli_file_t * cf; + unix_file_t * uf; + int i; + + cf = pool_elt_at_index (cm->cli_file_pool, cli_file_index); + uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + + /* Quit/EOF on stdin means quit program. */ + if (uf->file_descriptor == 0) + clib_longjmp (&um->vlib_main->main_loop_exit, VLIB_MAIN_LOOP_EXIT_CLI); + + vec_free (cf->current_command); + vec_free (cf->search_key); + + for (i = 0; i < vec_len (cf->command_history); i++) + vec_free (cf->command_history[i]); + + vec_free (cf->command_history); + + unix_file_del (um, uf); + + unix_cli_file_free (cf); + pool_put (cm->cli_file_pool, cf); +} + +typedef enum { + UNIX_CLI_PROCESS_EVENT_READ_READY, + UNIX_CLI_PROCESS_EVENT_QUIT, +} unix_cli_process_event_type_t; + +static uword +unix_cli_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + unix_cli_main_t * cm = &unix_cli_main; + uword i, * data = 0; + + while (1) + { + unix_cli_process_event_type_t event_type; + vlib_process_wait_for_event (vm); + event_type = vlib_process_get_events (vm, &data); + + switch (event_type) + { + case UNIX_CLI_PROCESS_EVENT_READ_READY: + for (i = 0; i < vec_len (data); i++) + unix_cli_process_input (cm, data[i]); + break; + + case UNIX_CLI_PROCESS_EVENT_QUIT: + /* Kill this process. */ + for (i = 0; i < vec_len (data); i++) + unix_cli_kill (cm, data[i]); + goto done; + } + + if (data) + _vec_len (data) = 0; + } + + done: + vec_free (data); + + vlib_node_set_state (vm, rt->node_index, VLIB_NODE_STATE_DISABLED); + + /* Add node index so we can re-use this process later. */ + vec_add1 (cm->unused_cli_process_node_indices, rt->node_index); + + return 0; +} + +static clib_error_t * unix_cli_write_ready (unix_file_t * uf) +{ + unix_cli_main_t * cm = &unix_cli_main; + unix_cli_file_t * cf; + int n; + + cf = pool_elt_at_index (cm->cli_file_pool, uf->private_data); + + /* Flush output vector. */ + n = write (uf->file_descriptor, + cf->output_vector, vec_len (cf->output_vector)); + + if (n < 0 && errno != EAGAIN) + return clib_error_return_unix (0, "write"); + + else if (n > 0) + unix_cli_del_pending_output (uf, cf, n); + + return /* no error */ 0; +} + +static clib_error_t * unix_cli_read_ready (unix_file_t * uf) +{ + unix_main_t * um = &unix_main; + unix_cli_main_t * cm = &unix_cli_main; + unix_cli_file_t * cf; + uword l; + int n, n_read, n_try; + + cf = pool_elt_at_index (cm->cli_file_pool, uf->private_data); + + n = n_try = 4096; + while (n == n_try) { + l = vec_len (cf->input_vector); + vec_resize (cf->input_vector, l + n_try); + + n = read (uf->file_descriptor, cf->input_vector + l, n_try); + + /* Error? */ + if (n < 0 && errno != EAGAIN) + return clib_error_return_unix (0, "read"); + + n_read = n < 0 ? 0 : n; + _vec_len (cf->input_vector) = l + n_read; + } + + if (! (n < 0)) + vlib_process_signal_event (um->vlib_main, + cf->process_node_index, + (n_read == 0 + ? UNIX_CLI_PROCESS_EVENT_QUIT + : UNIX_CLI_PROCESS_EVENT_READ_READY), + /* event data */ uf->private_data); + + return /* no error */ 0; +} + +static u32 unix_cli_file_add (unix_cli_main_t * cm, char * name, int fd) +{ + unix_main_t * um = &unix_main; + unix_cli_file_t * cf; + unix_file_t * uf, template = {0}; + vlib_main_t * vm = um->vlib_main; + vlib_node_t * n; + + name = (char *) format (0, "unix-cli-%s", name); + + if (vec_len (cm->unused_cli_process_node_indices) > 0) + { + uword l = vec_len (cm->unused_cli_process_node_indices); + + /* Find node and give it new name. */ + n = vlib_get_node (vm, cm->unused_cli_process_node_indices[l - 1]); + vec_free (n->name); + n->name = (u8 *) name; + + vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING); + + _vec_len (cm->unused_cli_process_node_indices) = l - 1; + } + else + { + static vlib_node_registration_t r = { + .function = unix_cli_process, + .type = VLIB_NODE_TYPE_PROCESS, + .process_log2_n_stack_bytes = 14, + }; + + r.name = name; + vlib_register_node (vm, &r); + vec_free (name); + + n = vlib_get_node (vm, r.index); + } + + pool_get (cm->cli_file_pool, cf); + memset (cf, 0, sizeof (*cf)); + + template.read_function = unix_cli_read_ready; + template.write_function = unix_cli_write_ready; + template.file_descriptor = fd; + template.private_data = cf - cm->cli_file_pool; + + cf->process_node_index = n->index; + cf->unix_file_index = unix_file_add (um, &template); + cf->output_vector = 0; + cf->input_vector = 0; + + uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + + /* Prompt. */ + unix_cli_add_pending_output (uf, cf, + cm->cli_prompt, vec_len (cm->cli_prompt)); + + vlib_start_process (vm, n->runtime_index); + return cf - cm->cli_file_pool; +} + +static clib_error_t * unix_cli_listen_read_ready (unix_file_t * uf) +{ + unix_main_t * um = &unix_main; + unix_cli_main_t * cm = &unix_cli_main; + clib_socket_t * s = &um->cli_listen_socket; + clib_socket_t client; + char * client_name; + clib_error_t * error; + unix_cli_file_t * cf; + u32 cf_index; + + error = clib_socket_accept (s, &client); + if (error) + return error; + + client_name = (char *) format (0, "%U%c", format_sockaddr, &client.peer, 0); + + cf_index = unix_cli_file_add (cm, client_name, client.fd); + cf = pool_elt_at_index (cm->cli_file_pool, cf_index); + + /* No longer need CLIB version of socket. */ + clib_socket_free (&client); + + vec_free (client_name); + + /* if we're supposed to run telnet session in character mode (default) */ + if (um->cli_line_mode == 0) + { + u8 charmode_option[6]; + + cf->has_history = 1; + cf->history_limit = um->cli_history_limit ? um->cli_history_limit : 50; + + /* + * Set telnet client character mode, echo on, suppress "go-ahead" + * Empirically, this sequence works. YMMV. + */ + + /* Tell the client no linemode, echo */ + charmode_option[0] = IAC; + charmode_option[1] = DONT; + charmode_option[2] = TELOPT_LINEMODE; + charmode_option[3] = IAC; + charmode_option[4] = DO; + charmode_option[5] = TELOPT_SGA; + + uf = pool_elt_at_index (um->file_pool, cf->unix_file_index); + + unix_cli_add_pending_output (uf, cf, charmode_option, + ARRAY_LEN(charmode_option)); + } + + return error; +} + +static clib_error_t * +unix_cli_config (vlib_main_t * vm, unformat_input_t * input) +{ + unix_main_t * um = &unix_main; + unix_cli_main_t * cm = &unix_cli_main; + int flags, standard_input_fd; + clib_error_t * error; + + /* We depend on unix flags being set. */ + if ((error = vlib_call_config_function (vm, unix_config))) + return error; + + if (um->flags & UNIX_FLAG_INTERACTIVE) + { + standard_input_fd = 0; + + /* Set stdin to be non-blocking. */ + if ((flags = fcntl (standard_input_fd, F_GETFL, 0)) < 0) + flags = 0; + fcntl (standard_input_fd, F_SETFL, flags | O_NONBLOCK); + + unix_cli_file_add (cm, "stdin", standard_input_fd); + } + + { + /* CLI listen. */ + clib_socket_t * s = &um->cli_listen_socket; + unix_file_t template = {0}; + + s->flags = SOCKET_IS_SERVER; /* listen, don't connect */ + + error = clib_socket_init (s); + if (error) + return error; + + template.read_function = unix_cli_listen_read_ready; + template.file_descriptor = s->fd; + + unix_file_add (um, &template); + } + + /* Set CLI prompt. */ + if (! cm->cli_prompt) + cm->cli_prompt = format (0, "VLIB: "); + + return 0; +} + +VLIB_CONFIG_FUNCTION (unix_cli_config, "unix-cli"); + +void vlib_unix_cli_set_prompt (char * prompt) +{ + char * fmt = (prompt[strlen(prompt)-1] == ' ') ? "%s" : "%s "; + unix_cli_main_t * cm = &unix_cli_main; + if (cm->cli_prompt) + vec_free (cm->cli_prompt); + cm->cli_prompt = format (0, fmt, prompt); +} + +static clib_error_t * +unix_cli_quit (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unix_cli_main_t * cm = &unix_cli_main; + + vlib_process_signal_event (vm, + vlib_current_process (vm), + UNIX_CLI_PROCESS_EVENT_QUIT, + cm->current_input_file_index); + return 0; +} + +VLIB_CLI_COMMAND (unix_cli_quit_command, static) = { + .path = "quit", + .short_help = "Exit CLI", + .function = unix_cli_quit, +}; + +static clib_error_t * +unix_cli_exec (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + char * file_name; + int fd; + unformat_input_t sub_input; + clib_error_t * error; + + file_name = 0; + fd = -1; + error = 0; + + if (! unformat (input, "%s", &file_name)) + { + error = clib_error_return (0, "expecting file name, got `%U'", + format_unformat_error, input); + goto done; + } + + fd = open (file_name, O_RDONLY); + if (fd < 0) + { + error = clib_error_return_unix (0, "failed to open `%s'", file_name); + goto done; + } + + /* Make sure its a regular file. */ + { + struct stat s; + + if (fstat (fd, &s) < 0) + { + error = clib_error_return_unix (0, "failed to stat `%s'", file_name); + goto done; + } + + if (! (S_ISREG (s.st_mode) || S_ISLNK (s.st_mode))) + { + error = clib_error_return (0, "not a regular file `%s'", file_name); + goto done; + } + } + + unformat_init_unix_file (&sub_input, fd); + + vlib_cli_input (vm, &sub_input, 0, 0); + unformat_free (&sub_input); + + done: + if (fd > 0) + close (fd); + vec_free (file_name); + + return error; +} + +VLIB_CLI_COMMAND (cli_exec, static) = { + .path = "exec", + .short_help = "Execute commands from file", + .function = unix_cli_exec, +}; + +static clib_error_t * +unix_show_errors (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + unix_main_t * um = &unix_main; + clib_error_t * error = 0; + int i, n_errors_to_show; + unix_error_history_t * unix_errors = 0; + + n_errors_to_show = 1 << 30; + + if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (! unformat (input, "%d", &n_errors_to_show)) + { + error = clib_error_return (0, "expecting integer number of errors to show, got `%U'", + format_unformat_error, input); + goto done; + } + } + + n_errors_to_show = clib_min (ARRAY_LEN (um->error_history), n_errors_to_show); + + i = um->error_history_index > 0 ? um->error_history_index - 1 : ARRAY_LEN (um->error_history) - 1; + + while (n_errors_to_show > 0) + { + unix_error_history_t * eh = um->error_history + i; + + if (! eh->error) + break; + + vec_add1 (unix_errors, eh[0]); + n_errors_to_show -= 1; + if (i == 0) + i = ARRAY_LEN (um->error_history) - 1; + else + i--; + } + + if (vec_len (unix_errors) == 0) + vlib_cli_output (vm, "no Unix errors so far"); + else + { + vlib_cli_output (vm, "%Ld total errors seen", um->n_total_errors); + for (i = vec_len (unix_errors) - 1; i >= 0; i--) + { + unix_error_history_t * eh = vec_elt_at_index (unix_errors, i); + vlib_cli_output (vm, "%U: %U", + format_time_interval, "h:m:s:u", eh->time, + format_clib_error, eh->error); + } + vlib_cli_output (vm, "%U: time now", + format_time_interval, "h:m:s:u", vlib_time_now (vm)); + } + + done: + vec_free (unix_errors); + return error; +} + +VLIB_CLI_COMMAND (cli_unix_show_errors, static) = { + .path = "show unix-errors", + .short_help = "Show Unix system call error history", + .function = unix_show_errors, +}; + +static clib_error_t * +unix_cli_init (vlib_main_t * vm) +{ + return 0; +} + +VLIB_INIT_FUNCTION (unix_cli_init); diff --git a/vlib/vlib/unix/input.c b/vlib/vlib/unix/input.c new file mode 100644 index 00000000000..ea10e4fc354 --- /dev/null +++ b/vlib/vlib/unix/input.c @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * input.c: Unix file input + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <signal.h> + +/* FIXME autoconf */ +#define HAVE_LINUX_EPOLL + +#ifdef HAVE_LINUX_EPOLL + +#include <sys/epoll.h> + +typedef struct { + int epoll_fd; + struct epoll_event * epoll_events; + + /* Statistics. */ + u64 epoll_files_ready; + u64 epoll_waits; +} linux_epoll_main_t; + +static linux_epoll_main_t linux_epoll_main; + +static void +linux_epoll_file_update (unix_file_t * f, + unix_file_update_type_t update_type) +{ + unix_main_t * um = &unix_main; + linux_epoll_main_t * em = &linux_epoll_main; + struct epoll_event e; + + memset (&e, 0, sizeof (e)); + + e.events = EPOLLIN; + if (f->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE) + e.events |= EPOLLOUT; + e.data.u32 = f - um->file_pool; + + if (epoll_ctl (em->epoll_fd, + (update_type == UNIX_FILE_UPDATE_ADD + ? EPOLL_CTL_ADD + : (update_type == UNIX_FILE_UPDATE_MODIFY + ? EPOLL_CTL_MOD + : EPOLL_CTL_DEL)), + f->file_descriptor, + &e) < 0) + clib_warning ("epoll_ctl"); +} + +static uword +linux_epoll_input (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + unix_main_t * um = &unix_main; + linux_epoll_main_t * em = &linux_epoll_main; + struct epoll_event * e; + int n_fds_ready; + + { + vlib_node_main_t * nm = &vm->node_main; + u64 t = nm->cpu_time_next_process_ready; + f64 timeout; + int timeout_ms, max_timeout_ms = 10; + f64 vector_rate = vlib_last_vectors_per_main_loop (vm); + + if (t == ~0ULL) + { + timeout = 10e-3; + timeout_ms = max_timeout_ms; + } + else + { + timeout = + (((i64) t - (i64) clib_cpu_time_now ()) + * vm->clib_time.seconds_per_clock) + /* subtract off some slop time */ - 50e-6; + timeout_ms = timeout * 1e3; + + /* Must be between 1 and 10 ms. */ + timeout_ms = clib_max (1, timeout_ms); + timeout_ms = clib_min (max_timeout_ms, timeout_ms); + } + + /* If we still have input nodes polling (e.g. vnet packet generator) + don't sleep. */ + if (nm->input_node_counts_by_state[VLIB_NODE_STATE_POLLING] > 0) + timeout_ms = 0; + + if (vector_rate > 1) + { + /* When busy don't wait & only epoll for input every 8 times + through main loop. */ + timeout_ms = 0; + node->input_main_loops_per_call = 1024; + } + else + /* We're not busy; go to sleep for a while. */ + node->input_main_loops_per_call = 0; + + /* Allow any signal to wakeup our sleep. */ + { + static sigset_t unblock_all_signals; + n_fds_ready = epoll_pwait (em->epoll_fd, + em->epoll_events, + vec_len (em->epoll_events), + timeout_ms, + &unblock_all_signals); + + /* This kludge is necessary to run over absurdly old kernels */ + if (n_fds_ready < 0 && errno == ENOSYS) + { + n_fds_ready = epoll_wait (em->epoll_fd, + em->epoll_events, + vec_len (em->epoll_events), + timeout_ms); + } + } + } + + if (n_fds_ready < 0) + { + if (unix_error_is_fatal (errno)) + vlib_panic_with_error (vm, clib_error_return_unix (0, "epoll_wait")); + + /* non fatal error (e.g. EINTR). */ + return 0; + } + + em->epoll_waits += 1; + em->epoll_files_ready += n_fds_ready; + + for (e = em->epoll_events; e < em->epoll_events + n_fds_ready; e++) + { + u32 i = e->data.u32; + unix_file_t * f = pool_elt_at_index (um->file_pool, i); + clib_error_t * errors[4]; + int n_errors = 0; + + if (PREDICT_TRUE (! (e->events & EPOLLERR))) + { + if (e->events & EPOLLIN) + { + errors[n_errors] = f->read_function (f); + n_errors += errors[n_errors] != 0; + } + if (e->events & EPOLLOUT) + { + errors[n_errors] = f->write_function (f); + n_errors += errors[n_errors] != 0; + } + } + else + { + if (f->error_function) + { + errors[n_errors] = f->error_function (f); + n_errors += errors[n_errors] != 0; + } + } + + ASSERT (n_errors < ARRAY_LEN (errors)); + for (i = 0; i < n_errors; i++) + { + unix_save_error (um, errors[i]); + } + } + + return 0; +} + +VLIB_REGISTER_NODE (linux_epoll_input_node,static) = { + .function = linux_epoll_input, + .type = VLIB_NODE_TYPE_PRE_INPUT, + .name = "unix-epoll-input", +}; + +clib_error_t * +linux_epoll_input_init (vlib_main_t * vm) +{ + linux_epoll_main_t * em = &linux_epoll_main; + unix_main_t * um = &unix_main; + + /* Allocate some events. */ + vec_resize (em->epoll_events, VLIB_FRAME_SIZE); + + em->epoll_fd = epoll_create (vec_len (em->epoll_events)); + if (em->epoll_fd < 0) + return clib_error_return_unix (0, "epoll_create"); + + um->file_update = linux_epoll_file_update; + + return 0; +} + +VLIB_INIT_FUNCTION (linux_epoll_input_init); + +#endif /* HAVE_LINUX_EPOLL */ + +static clib_error_t * +unix_input_init (vlib_main_t * vm) +{ + return vlib_call_init_function (vm, linux_epoll_input_init); +} + +VLIB_INIT_FUNCTION (unix_input_init); diff --git a/vlib/vlib/unix/main.c b/vlib/vlib/unix/main.c new file mode 100644 index 00000000000..b85f3e73326 --- /dev/null +++ b/vlib/vlib/unix/main.c @@ -0,0 +1,471 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * main.c: Unix main routine + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vlib/unix/plugin.h> + +#include <signal.h> +#include <sys/ucontext.h> +#include <syslog.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +unix_main_t unix_main; + +static clib_error_t * +unix_main_init (vlib_main_t * vm) +{ + unix_main_t * um = &unix_main; + um->vlib_main = vm; + return vlib_call_init_function (vm, unix_input_init); +} + +VLIB_INIT_FUNCTION (unix_main_init); + +static void unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc) +{ + uword fatal; + u8 * msg = 0; + + msg = format (msg, "received signal %U, PC %U", + format_signal, signum, + format_ucontext_pc, uc); + + if (signum == SIGSEGV) + msg = format (msg, ", faulting address %p", si->si_addr); + + switch (signum) + { + /* these (caught) signals cause the application to exit */ + case SIGTERM: + if (unix_main.vlib_main->main_loop_exit_set) + { + syslog (LOG_ERR | LOG_DAEMON, "received SIGTERM, exiting..."); + + clib_longjmp (&unix_main.vlib_main->main_loop_exit, + VLIB_MAIN_LOOP_EXIT_CLI); + } + case SIGQUIT: + case SIGINT: + case SIGILL: + case SIGBUS: + case SIGSEGV: + case SIGHUP: + case SIGFPE: + fatal = 1; + break; + + /* by default, print a message and continue */ + default: + fatal = 0; + break; + } + + /* Null terminate. */ + vec_add1 (msg, 0); + + if (fatal) + { + syslog (LOG_ERR | LOG_DAEMON, "%s", msg); + os_exit (1); + } + else + clib_warning ("%s", msg); + + vec_free (msg); +} + +static clib_error_t * +setup_signal_handlers (unix_main_t * um) +{ + uword i; + struct sigaction sa; + + for (i = 1; i < 32; i++) + { + memset (&sa, 0, sizeof (sa)); + sa.sa_sigaction = (void *) unix_signal_handler; + sa.sa_flags = SA_SIGINFO; + + switch (i) + { + /* these signals take the default action */ + case SIGABRT: + case SIGKILL: + case SIGSTOP: + case SIGUSR1: + case SIGUSR2: + continue; + + /* ignore SIGPIPE, SIGCHLD */ + case SIGPIPE: + case SIGCHLD: + sa.sa_sigaction = (void *) SIG_IGN; + break; + + /* catch and handle all other signals */ + default: + break; + } + + if (sigaction (i, &sa, 0) < 0) + return clib_error_return_unix (0, "sigaction %U", format_signal, i); + } + + return 0; +} + +static void unix_error_handler (void * arg, u8 * msg, int msg_len) +{ + unix_main_t * um = arg; + + /* Echo to stderr when interactive. */ + if (um->flags & UNIX_FLAG_INTERACTIVE) + { + CLIB_UNUSED (int r) = write (2, msg, msg_len); + } + else + { + char save = msg[msg_len - 1]; + + /* Null Terminate. */ + msg[msg_len-1] = 0; + + syslog (LOG_ERR | LOG_DAEMON, "%s", msg); + + msg[msg_len-1] = save; + } +} + +void vlib_unix_error_report (vlib_main_t * vm, clib_error_t * error) +{ + unix_main_t * um = &unix_main; + + if (um->flags & UNIX_FLAG_INTERACTIVE || error == 0) + return; + + { + char save; + u8 * msg; + u32 msg_len; + + msg = error->what; + msg_len = vec_len(msg); + + /* Null Terminate. */ + save = msg[msg_len-1]; + msg[msg_len-1] = 0; + + syslog (LOG_ERR | LOG_DAEMON, "%s", msg); + + msg[msg_len-1] = save; + } +} + +static uword +startup_config_process (vlib_main_t * vm, + vlib_node_runtime_t * rt, + vlib_frame_t * f) +{ + unix_main_t * um = &unix_main; + u8 * buf = 0; + uword l, n = 1; + + vlib_process_suspend (vm, 2.0); + + while (um->unix_config_complete == 0) + vlib_process_suspend (vm, 0.1); + + if (um->startup_config_filename) { + unformat_input_t sub_input; + int fd; + struct stat s; + char *fn = (char *)um->startup_config_filename; + + fd = open (fn, O_RDONLY); + if (fd < 0) { + clib_warning ("failed to open `%s'", fn); + return 0; + } + + if (fstat (fd, &s) < 0) { + clib_warning ("failed to stat `%s'", fn); + bail: + close(fd); + return 0; + } + + if (! (S_ISREG (s.st_mode) || S_ISLNK (s.st_mode))) { + clib_warning ("not a regular file: `%s'", fn); + goto bail; + } + + while (n > 0) + { + l = vec_len (buf); + vec_resize (buf, 4096); + n = read (fd, buf + l, 4096); + if (n > 0) + { + _vec_len (buf) = l + n; + if (n < 4096) + break; + } + else + break; + } + if (um->log_fd && vec_len (buf)) + { + u8 * lv = 0; + lv = format (lv, "%U: ***** Startup Config *****\n%v", + format_timeval, + 0 /* current bat-time */, + 0 /* current bat-format */, + buf); + { + int rv __attribute__((unused)) = + write (um->log_fd, lv, vec_len(lv)); + } + vec_reset_length (lv); + lv = format (lv, "%U: ***** End Startup Config *****\n", + format_timeval, + 0 /* current bat-time */, + 0 /* current bat-format */); + { + int rv __attribute__((unused)) = + write (um->log_fd, lv, vec_len(lv)); + } + vec_free (lv); + } + + if (vec_len(buf)) + { + unformat_init_vector (&sub_input, buf); + vlib_cli_input (vm, &sub_input, 0, 0); + /* frees buf for us */ + unformat_free (&sub_input); + } + close(fd); + } + return 0; +} + +VLIB_REGISTER_NODE (startup_config_node,static) = { + .function = startup_config_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "startup-config-process", +}; + +static clib_error_t * +unix_config (vlib_main_t * vm, unformat_input_t * input) +{ + unix_main_t * um = &unix_main; + clib_error_t * error = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + char * cli_prompt; + if (unformat (input, "interactive")) + um->flags |= UNIX_FLAG_INTERACTIVE; + else if (unformat (input, "nodaemon")) + um->flags |= UNIX_FLAG_NODAEMON; + else if (unformat (input, "cli-prompt %s", &cli_prompt)) + vlib_unix_cli_set_prompt (cli_prompt); + else if (unformat (input, "cli-listen %s", &um->cli_listen_socket.config)) + ; + else if (unformat (input, "cli-line-mode")) + um->cli_line_mode = 1; + else if (unformat (input, "cli-history-limit %d", &um->cli_history_limit)) + ; + else if (unformat (input, "full-coredump")) + { + int fd; + + fd = open ("/proc/self/coredump_filter", O_WRONLY); + if (fd > 0) + { + if (write (fd, "0x6f\n", 5) != 5) + clib_unix_warning ("coredump filter write failed!"); + close(fd); + } + else + clib_unix_warning ("couldn't open /proc/self/coredump_filter"); + } + else if (unformat (input, "startup-config %s", + &um->startup_config_filename)) + ; + else if (unformat (input, "exec %s", + &um->startup_config_filename)) + ; + else if (unformat (input, "log %s", &um->log_filename)) + { + um->log_fd = open ((char *) um->log_filename, + O_CREAT | O_WRONLY | O_APPEND, 0644); + if (um->log_fd < 0) + { + clib_warning ("couldn't open log '%s'\n", um->log_filename); + um->log_fd = 0; + } + else + { + u8 * lv = 0; + lv = format (0, "%U: ***** Start: PID %d *****\n", + format_timeval, + 0 /* current bat-time */, + 0 /* current bat-format */, + getpid()); + { + int rv __attribute__((unused)) = + write (um->log_fd, lv, vec_len(lv)); + } + vec_free (lv); + } + } + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if (! (um->flags & UNIX_FLAG_INTERACTIVE)) + { + error = setup_signal_handlers (um); + if (error) + return error; + + openlog (vm->name, LOG_CONS | LOG_PERROR | LOG_PID, LOG_DAEMON); + clib_error_register_handler (unix_error_handler, um); + + if (! (um->flags & UNIX_FLAG_NODAEMON) + && daemon (/* chdir to / */ 0, + /* stdin/stdout/stderr -> /dev/null */ 0) < 0) + clib_error_return (0, "daemon () fails"); + } + um->unix_config_complete = 1; + + return 0; +} + +/* unix { ... } configuration. */ +VLIB_CONFIG_FUNCTION (unix_config, "unix"); + +static clib_error_t * +unix_exit (vlib_main_t * vm) +{ + /* Close syslog connection. */ + closelog (); + return 0; +} + +VLIB_MAIN_LOOP_EXIT_FUNCTION (unix_exit); + +u8 **vlib_thread_stacks; + +static char **argv_global; + +static uword thread0 (uword arg) +{ + vlib_main_t * vm = (vlib_main_t *)arg; + unformat_input_t input; + int i; + + unformat_init_command_line (&input, argv_global); + i = vlib_main (vm, &input); + unformat_free (&input); + + return i; + } + +int vlib_unix_main (int argc, char * argv[]) +{ + vlib_main_t * vm = &vlib_global_main; /* one and only time for this! */ + + clib_smp_main_t * sm = &clib_smp_main; + vlib_thread_main_t * tm = &vlib_thread_main; + unformat_input_t input; + u8 * thread_stacks; + clib_error_t * e; + int i; + + argv_global = argv; + vm->name = argv[0]; + vm->heap_base = clib_mem_get_heap (); + ASSERT(vm->heap_base); + + i = vlib_plugin_early_init (vm); + if (i) + return i; + + unformat_init_command_line (&input, argv_global); + vm->init_functions_called = hash_create (0, /* value bytes */ 0); + e = vlib_call_all_config_functions (vm, &input, 1 /* early */); + if (e != 0) + { + clib_error_report(e); + return 1; + } + unformat_free (&input); + + /* allocate N x 1mb stacks, aligned e.g. to a 16mb boundary */ + thread_stacks = clib_mem_alloc_aligned + (tm->n_thread_stacks * VLIB_THREAD_STACK_SIZE, + (VLIB_MAX_CPUS << VLIB_LOG2_THREAD_STACK_SIZE)); + + sm->vm_base = thread_stacks; + sm->log2_n_per_cpu_vm_bytes = VLIB_LOG2_THREAD_STACK_SIZE; + + vec_validate (vlib_thread_stacks, tm->n_thread_stacks - 1); + for (i = 0; i < vec_len (vlib_thread_stacks); i++) + { + vlib_thread_stacks[i] = thread_stacks; + + /* + * Disallow writes to the bottom page of the stack, to + * catch stack overflows. + */ + if (mprotect (thread_stacks, 4096, PROT_READ) < 0) + clib_unix_warning ("thread stack"); + + thread_stacks += VLIB_THREAD_STACK_SIZE; + } + + i = clib_calljmp (thread0, (uword) vm, + (void *)(vlib_thread_stacks[0] + VLIB_THREAD_STACK_SIZE)); + return i; +} diff --git a/vlib/vlib/unix/mc_socket.c b/vlib/vlib/unix/mc_socket.c new file mode 100644 index 00000000000..1169203f855 --- /dev/null +++ b/vlib/vlib/unix/mc_socket.c @@ -0,0 +1,972 @@ +/* + * mc_socket.c: socket based multicast for vlib mc + * + * Copyright (c) 2010 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/vlib.h> +#include <vlib/unix/mc_socket.h> + +#include <sys/ioctl.h> /* for FIONBIO */ +#include <netinet/tcp.h> /* for TCP_NODELAY */ +#include <net/if.h> /* for struct ifreq */ + +static u8 * format_socket_peer_id (u8 * s, va_list * args) +{ + u64 peer_id_as_u64 = va_arg (*args, u64); + mc_peer_id_t peer_id; + peer_id.as_u64 = peer_id_as_u64; + u32 a = mc_socket_peer_id_get_address (peer_id); + u32 p = mc_socket_peer_id_get_port (peer_id); + + s = format (s, "%U:%04x", format_network_address, AF_INET, &a, + ntohs (p)); + + return s; +} + +typedef void (mc_msg_handler_t) (mc_main_t * mcm, void * msg, u32 buffer_index); + +always_inline void msg_handler (mc_main_t * mcm, + u32 buffer_index, + u32 handler_frees_buffer, + void * _h) +{ + vlib_main_t * vm = mcm->vlib_main; + mc_msg_handler_t * h = _h; + vlib_buffer_t * b = vlib_get_buffer (vm, buffer_index); + void * the_msg = vlib_buffer_get_current (b); + + h (mcm, the_msg, buffer_index); + if (! handler_frees_buffer) + vlib_buffer_free_one (vm, buffer_index); +} + +static uword +append_buffer_index_to_iovec (vlib_main_t * vm, + u32 buffer_index, + struct iovec ** iovs_return) +{ + struct iovec * i; + vlib_buffer_t * b; + u32 bi = buffer_index; + u32 l = 0; + + while (1) + { + b = vlib_get_buffer (vm, bi); + vec_add2 (*iovs_return, i, 1); + i->iov_base = vlib_buffer_get_current (b); + i->iov_len = b->current_length; + l += i->iov_len; + if (! (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + break; + bi = b->next_buffer; + } + + return l; +} + +static clib_error_t * +sendmsg_helper (mc_socket_main_t * msm, + int socket, + struct sockaddr_in * tx_addr, + u32 buffer_index) +{ + vlib_main_t * vm = msm->mc_main.vlib_main; + struct msghdr h; + word n_bytes, n_bytes_tx, n_retries; + + memset (&h, 0, sizeof (h)); + h.msg_name = tx_addr; + h.msg_namelen = sizeof (tx_addr[0]); + + if (msm->iovecs) + _vec_len (msm->iovecs) = 0; + + n_bytes = append_buffer_index_to_iovec (vm, buffer_index, &msm->iovecs); + ASSERT (n_bytes <= msm->mc_main.transport.max_packet_size); + if (n_bytes > msm->mc_main.transport.max_packet_size) + clib_error ("sending packet larger than interace MTU %d bytes", n_bytes); + + h.msg_iov = msm->iovecs; + h.msg_iovlen = vec_len (msm->iovecs); + + n_retries = 0; + while ((n_bytes_tx = sendmsg (socket, &h, /* flags */ 0)) != n_bytes + && errno == EAGAIN) + n_retries++; + if (n_bytes_tx != n_bytes) + { + clib_unix_warning ("sendmsg"); + return 0; + } + if (n_retries) + { + ELOG_TYPE_DECLARE (e) = { + .format = "sendmsg-helper: %d retries", + .format_args = "i4", + }; + struct { u32 retries; } * ed = 0; + + ed = ELOG_DATA (&vm->elog_main, e); + ed->retries = n_retries; + } + return 0; +} + +static clib_error_t * +tx_buffer (void * transport, mc_transport_type_t type, u32 buffer_index) +{ + mc_socket_main_t *msm = (mc_socket_main_t *)transport; + vlib_main_t * vm = msm->mc_main.vlib_main; + mc_multicast_socket_t * ms = &msm->multicast_sockets[type]; + clib_error_t * error; + error = sendmsg_helper (msm, ms->socket, &ms->tx_addr, buffer_index); + if (type != MC_TRANSPORT_USER_REQUEST_TO_RELAY) + vlib_buffer_free_one (vm, buffer_index); + return error; +} + +static clib_error_t * +tx_ack (void *transport, mc_peer_id_t dest_peer_id, u32 buffer_index) +{ + struct sockaddr_in tx_addr; + mc_socket_main_t *msm = (mc_socket_main_t *)transport; + vlib_main_t * vm = msm->mc_main.vlib_main; + clib_error_t * error; + + memset (&tx_addr, 0, sizeof (tx_addr)); + tx_addr.sin_family = AF_INET; + tx_addr.sin_addr.s_addr = mc_socket_peer_id_get_address (dest_peer_id); + tx_addr.sin_port = mc_socket_peer_id_get_port (dest_peer_id); + + error = sendmsg_helper (msm, msm->ack_socket, &tx_addr, buffer_index); + vlib_buffer_free_one (vm, buffer_index); + return error; +} + +static clib_error_t * +recvmsg_helper (mc_socket_main_t * msm, + int socket, + struct sockaddr_in * rx_addr, + u32 * buffer_index, + u32 drop_message) +{ + vlib_main_t * vm = msm->mc_main.vlib_main; + vlib_buffer_t * b; + uword n_left, n_alloc, n_mtu, i, i_rx; + const uword buffer_size = VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; + word n_bytes_left; + + /* Make sure we have at least a MTU worth of buffers. */ + n_mtu = msm->rx_mtu_n_buffers; + n_left = vec_len (msm->rx_buffers); + if (n_left < n_mtu) + { + uword max_alloc = 8 * n_mtu; + vec_validate (msm->rx_buffers, max_alloc - 1); + n_alloc = vlib_buffer_alloc (vm, msm->rx_buffers + n_left, max_alloc - n_left); + _vec_len (msm->rx_buffers) = n_left + n_alloc; + } + + ASSERT (vec_len (msm->rx_buffers) >= n_mtu); + vec_validate (msm->iovecs, n_mtu - 1); + + /* Allocate RX buffers from end of rx_buffers. + Turn them into iovecs to pass to readv. */ + i_rx = vec_len (msm->rx_buffers) - 1; + for (i = 0; i < n_mtu; i++) + { + b = vlib_get_buffer (vm, msm->rx_buffers[i_rx - i]); + msm->iovecs[i].iov_base = b->data; + msm->iovecs[i].iov_len = buffer_size; + } + _vec_len (msm->iovecs) = n_mtu; + + { + struct msghdr h; + + memset (&h, 0, sizeof (h)); + if (rx_addr) + { + h.msg_name = rx_addr; + h.msg_namelen = sizeof (rx_addr[0]); + } + h.msg_iov = msm->iovecs; + h.msg_iovlen = vec_len (msm->iovecs); + + n_bytes_left = recvmsg (socket, &h, 0); + if (n_bytes_left < 0) + return clib_error_return_unix (0, "recvmsg"); + } + + if (drop_message) + { + *buffer_index = ~0; + return 0; + } + + *buffer_index = msm->rx_buffers[i_rx]; + while (1) + { + b = vlib_get_buffer (vm, msm->rx_buffers[i_rx]); + + b->flags = 0; + b->current_data = 0; + b->current_length = n_bytes_left < buffer_size ? n_bytes_left : buffer_size; + + n_bytes_left -= buffer_size; + + if (n_bytes_left <= 0) + break; + + i_rx--; + b->flags |= VLIB_BUFFER_NEXT_PRESENT; + b->next_buffer = msm->rx_buffers[i_rx]; + } + + _vec_len (msm->rx_buffers) = i_rx; + + return 0 /* no error */; +} + +static clib_error_t * mastership_socket_read_ready (unix_file_t * uf) +{ + mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data; + mc_main_t * mcm = &msm->mc_main; + mc_multicast_socket_t * ms = &msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP]; + clib_error_t * error; + u32 bi; + + error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */ 0); + if (! error) + msg_handler (mcm, bi, + /* handler_frees_buffer */ 0, + mc_msg_master_assert_handler); + + return error; +} + +static clib_error_t * to_relay_socket_read_ready (unix_file_t * uf) +{ + mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data; + mc_main_t *mcm = &msm->mc_main; + vlib_main_t * vm = msm->mc_main.vlib_main; + mc_multicast_socket_t * ms_to_relay = &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY]; + mc_multicast_socket_t * ms_from_relay = &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY]; + clib_error_t * error; + u32 bi; + u32 is_master = mcm->relay_state == MC_RELAY_STATE_MASTER; + + /* Not the ordering master? Turf the msg */ + error = recvmsg_helper (msm, ms_to_relay->socket, /* rx_addr */ 0, &bi, + /* drop_message */ ! is_master); + + /* If we are the master, number and rebroadcast the msg. */ + if (! error && is_master) + { + vlib_buffer_t * b = vlib_get_buffer (vm, bi); + mc_msg_user_request_t * mp = vlib_buffer_get_current (b); + mp->global_sequence = clib_host_to_net_u32 (mcm->relay_global_sequence); + mcm->relay_global_sequence++; + error = sendmsg_helper (msm, ms_from_relay->socket, &ms_from_relay->tx_addr, bi); + vlib_buffer_free_one (vm, bi); + } + + return error; +} + +static clib_error_t * from_relay_socket_read_ready (unix_file_t * uf) +{ + mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data; + mc_main_t * mcm = &msm->mc_main; + mc_multicast_socket_t * ms = &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY]; + clib_error_t * error; + u32 bi; + + error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */ 0); + if (! error) + { + msg_handler (mcm, bi, /* handler_frees_buffer */ 1, + mc_msg_user_request_handler); + } + return error; +} + +static clib_error_t * join_socket_read_ready (unix_file_t * uf) +{ + mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data; + mc_main_t * mcm = &msm->mc_main; + vlib_main_t * vm = mcm->vlib_main; + mc_multicast_socket_t * ms = &msm->multicast_sockets[MC_TRANSPORT_JOIN]; + clib_error_t * error; + u32 bi; + + error = recvmsg_helper (msm, ms->socket, /* rx_addr */ 0, &bi, /* drop_message */ 0); + if (! error) + { + vlib_buffer_t * b = vlib_get_buffer (vm, bi); + mc_msg_join_or_leave_request_t * mp = vlib_buffer_get_current (b); + + switch (clib_host_to_net_u32 (mp->type)) + { + case MC_MSG_TYPE_join_or_leave_request: + msg_handler (mcm, bi, /* handler_frees_buffer */ 0, + mc_msg_join_or_leave_request_handler); + break; + + case MC_MSG_TYPE_join_reply: + msg_handler (mcm, bi, /* handler_frees_buffer */ 0, + mc_msg_join_reply_handler); + break; + + default: + ASSERT (0); + break; + } + } + return error; +} + +static clib_error_t * ack_socket_read_ready (unix_file_t * uf) +{ + mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data; + mc_main_t * mcm = &msm->mc_main; + clib_error_t * error; + u32 bi; + + error = recvmsg_helper (msm, msm->ack_socket, /* rx_addr */ 0, &bi, /* drop_message */ 0); + if (! error) + msg_handler (mcm, bi, /* handler_frees_buffer */ 0, + mc_msg_user_ack_handler); + return error; +} + +static void catchup_cleanup (mc_socket_main_t *msm, + mc_socket_catchup_t *c, + unix_main_t *um, unix_file_t *uf) +{ + hash_unset (msm->catchup_index_by_file_descriptor, uf->file_descriptor); + unix_file_del (um, uf); + vec_free (c->input_vector); + vec_free (c->output_vector); + pool_put (msm->catchups, c); +} + +static mc_socket_catchup_t * +find_catchup_from_file_descriptor (mc_socket_main_t * msm, int file_descriptor) +{ + uword * p = hash_get (msm->catchup_index_by_file_descriptor, file_descriptor); + return p ? pool_elt_at_index (msm->catchups, p[0]) : 0; +} + +static clib_error_t * catchup_socket_read_ready (unix_file_t * uf, int is_server) +{ + unix_main_t * um = &unix_main; + mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data; + mc_main_t *mcm = &msm->mc_main; + mc_socket_catchup_t * c = find_catchup_from_file_descriptor (msm, uf->file_descriptor); + word l, n, is_eof; + + l = vec_len (c->input_vector); + vec_resize (c->input_vector, 4096); + n = read (uf->file_descriptor, c->input_vector + l, vec_len (c->input_vector) - l); + is_eof = n == 0; + + if (n < 0) + { + if (errno == EAGAIN) + n = 0; + else + { + catchup_cleanup (msm, c, um, uf); + return clib_error_return_unix (0, "read"); + } + } + + _vec_len (c->input_vector) = l + n; + + if (is_eof && vec_len (c->input_vector) > 0) + { + if (is_server) + { + mc_msg_catchup_request_handler (mcm, (void *) c->input_vector, c - msm->catchups); + _vec_len (c->input_vector) = 0; + } + else + { + mc_msg_catchup_reply_handler (mcm, (void *) c->input_vector, c - msm->catchups); + c->input_vector = 0; /* reply handler is responsible for freeing vector */ + catchup_cleanup (msm, c, um, uf); + } + } + + return 0 /* no error */; +} + +static clib_error_t * catchup_server_read_ready (unix_file_t * uf) +{ return catchup_socket_read_ready (uf, /* is_server */ 1); } + +static clib_error_t * catchup_client_read_ready (unix_file_t * uf) +{ + if (MC_EVENT_LOGGING) + { + mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data; + vlib_main_t * vm = msm->mc_main.vlib_main; + + ELOG_TYPE (e, "catchup_client_read_ready"); + ELOG (&vm->elog_main, e, 0); + } + return catchup_socket_read_ready (uf, /* is_server */ 0); +} + +static clib_error_t * +catchup_socket_write_ready (unix_file_t * uf, int is_server) +{ + unix_main_t * um = &unix_main; + mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data; + mc_socket_catchup_t *c = find_catchup_from_file_descriptor (msm, uf->file_descriptor); + clib_error_t * error = 0; + int n; + + if (c->connect_in_progress) + { + u32 len, value; + + c->connect_in_progress = 0; + len = sizeof (value); + if (getsockopt (c->socket, SOL_SOCKET, + SO_ERROR, &value, &len) < 0) + { + error = clib_error_return_unix (0, "getsockopt SO_ERROR"); + goto error_quit; + } + if (value != 0) + { + error = clib_error_return_code (0, value, CLIB_ERROR_ERRNO_VALID, "connect fails"); + goto error_quit; + } + } + + while (1) + { + u32 n_this_write; + + n_this_write = + clib_min (vec_len (c->output_vector) - c->output_vector_n_written, + msm->rx_mtu_n_bytes - 64 /* ip + tcp + option allowance */); + + if (n_this_write <= 0) + break; + + do { + n = write (uf->file_descriptor, + c->output_vector + c->output_vector_n_written, + n_this_write); + } while (n < 0 && errno == EAGAIN); + + if (n < 0) + { + error = clib_error_return_unix (0, "write"); + goto error_quit; + } + c->output_vector_n_written += n; + } + + if (c->output_vector_n_written >= vec_len (c->output_vector)) + { + if (! is_server) + { + uf->flags &= ~UNIX_FILE_DATA_AVAILABLE_TO_WRITE; + unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY); + /* Send EOF to other side. */ + shutdown (uf->file_descriptor, SHUT_WR); + return error; + } + else + { + error_quit: + catchup_cleanup (msm, c, um, uf); + } + } + return error; +} + +static clib_error_t * +catchup_server_write_ready (unix_file_t * uf) +{ return catchup_socket_write_ready (uf, /* is_server */ 1); } + +static clib_error_t * +catchup_client_write_ready (unix_file_t * uf) +{ return catchup_socket_write_ready (uf, /* is_server */ 0); } + +static clib_error_t *catchup_socket_error_ready (unix_file_t *uf) +{ + unix_main_t *um = &unix_main; + mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data; + mc_socket_catchup_t *c = find_catchup_from_file_descriptor (msm, uf->file_descriptor); + catchup_cleanup (msm, c, um, uf); + return clib_error_return (0, "error"); +} + +static clib_error_t *catchup_listen_read_ready (unix_file_t * uf) +{ + mc_socket_main_t *msm = (mc_socket_main_t *)uf->private_data; + struct sockaddr_in client_addr; + int client_len; + mc_socket_catchup_t *c; + unix_file_t template = {0}; + + pool_get (msm->catchups, c); + memset(c, 0, sizeof (c[0])); + + client_len = sizeof(client_addr); + + /* Acquires the non-blocking attrib from the server socket. */ + c->socket = accept (uf->file_descriptor, + (struct sockaddr *)&client_addr, + (socklen_t *)&client_len); + + if (c->socket < 0) + { + pool_put (msm->catchups, c); + return clib_error_return_unix (0, "accept"); + } + + if (MC_EVENT_LOGGING) + { + mc_main_t * mcm = &msm->mc_main; + vlib_main_t * vm = mcm->vlib_main; + + ELOG_TYPE_DECLARE (e) = { + .format = "catchup accepted from 0x%lx", + .format_args = "i4", + }; + struct { u32 addr; } * ed = 0; + + ed = ELOG_DATA (&vm->elog_main, e); + ed->addr = ntohl(client_addr.sin_addr.s_addr); + } + + /* Disable the Nagle algorithm, ship catchup pkts immediately */ + { + int one = 1; + if ((setsockopt(c->socket, IPPROTO_TCP, + TCP_NODELAY, (void *)&one, sizeof(one))) < 0) { + clib_unix_warning("catchup socket: set TCP_NODELAY"); + } + } + + template.read_function = catchup_server_read_ready; + template.write_function = catchup_server_write_ready; + template.error_function = catchup_socket_error_ready; + template.file_descriptor = c->socket; + template.private_data = pointer_to_uword (msm); + c->unix_file_index = unix_file_add (&unix_main, &template); + hash_set (msm->catchup_index_by_file_descriptor, c->socket, c - msm->catchups); + + return 0; +} + +/* Return and bind to an unused port. */ +static word find_and_bind_to_free_port (word sock, word port) +{ + for (; port < 1 << 16; port++) + { + struct sockaddr_in a; + + memset (&a, 0, sizeof(a)); /* Warnings be gone */ + + a.sin_family = PF_INET; + a.sin_addr.s_addr = INADDR_ANY; + a.sin_port = htons (port); + + if (bind (sock, (struct sockaddr *) &a, sizeof (a)) >= 0) + break; + } + + return port < 1 << 16 ? port : -1; +} + +static clib_error_t * +setup_mutlicast_socket (mc_socket_main_t * msm, + mc_multicast_socket_t * ms, + char * type, + uword udp_port) +{ + int one = 1; + struct ip_mreq mcast_req; + + if (! msm->multicast_ttl) + msm->multicast_ttl = 1; + + /* mastership (multicast) TX socket */ + if ((ms->socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP)) < 0) + return clib_error_return_unix(0, "%s socket", type); + + { + u8 ttl = msm->multicast_ttl; + + if ((setsockopt(ms->socket, IPPROTO_IP, + IP_MULTICAST_TTL, (void *)&ttl, sizeof(ttl))) < 0) + return clib_error_return_unix(0, "%s set multicast ttl", type); + } + + if (setsockopt(ms->socket, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0) + return clib_error_return_unix (0, "%s setsockopt SO_REUSEADDR", type); + + memset (&ms->tx_addr, 0, sizeof (ms->tx_addr)); + ms->tx_addr.sin_family = AF_INET; + ms->tx_addr.sin_addr.s_addr = htonl (msm->multicast_tx_ip4_address_host_byte_order); + ms->tx_addr.sin_port = htons (udp_port); + + if (bind(ms->socket, (struct sockaddr *)&ms->tx_addr, + sizeof (ms->tx_addr)) < 0) + return clib_error_return_unix(0, "%s bind", type); + + memset (&mcast_req, 0, sizeof (mcast_req)); + mcast_req.imr_multiaddr.s_addr = htonl (msm->multicast_tx_ip4_address_host_byte_order); + mcast_req.imr_interface.s_addr = msm->if_ip4_address_net_byte_order; + + if ((setsockopt(ms->socket, IPPROTO_IP, + IP_ADD_MEMBERSHIP, (void *)&mcast_req, + sizeof (mcast_req))) < 0) + return clib_error_return_unix(0, "%s IP_ADD_MEMBERSHIP setsockopt", type); + + if (ioctl (ms->socket, FIONBIO, &one) < 0) + return clib_error_return_unix (0, "%s set FIONBIO", type); + + /* FIXME remove this when we support tx_ready. */ + { + u32 len = 1 << 20; + socklen_t sl = sizeof (len); + if (setsockopt(ms->socket, SOL_SOCKET, SO_SNDBUF, &len, sl) < 0) + clib_unix_error ("setsockopt"); + } + + return 0; +} + +static clib_error_t * +socket_setup (mc_socket_main_t *msm) +{ + int one = 1; + clib_error_t * error; + u32 port; + + if (! msm->base_multicast_udp_port_host_byte_order) + msm->base_multicast_udp_port_host_byte_order = + 0xffff - ((MC_N_TRANSPORT_TYPE + 2 /* ack socket, catchup socket */) + - 1); + + port = msm->base_multicast_udp_port_host_byte_order; + + error = setup_mutlicast_socket (msm, + &msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP], + "mastership", + port++); + if (error) + return error; + + error = setup_mutlicast_socket (msm, + &msm->multicast_sockets[MC_TRANSPORT_JOIN], + "join", + port++); + if (error) + return error; + + error = setup_mutlicast_socket (msm, + &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY], + "to relay", + port++); + if (error) + return error; + + error = setup_mutlicast_socket (msm, + &msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY], + "from relay", + port++); + if (error) + return error; + + /* ACK rx socket */ + msm->ack_socket = socket (PF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (msm->ack_socket < 0) + return clib_error_return_unix(0, "ack socket"); + + msm->ack_udp_port = find_and_bind_to_free_port (msm->ack_socket, port++); + + if (ioctl (msm->ack_socket, FIONBIO, &one) < 0) + return clib_error_return_unix (0, "ack socket FIONBIO"); + + msm->catchup_server_socket = socket(AF_INET, SOCK_STREAM, 0); + if (msm->catchup_server_socket < 0) + return clib_error_return_unix (0, "catchup server socket"); + + msm->catchup_tcp_port = find_and_bind_to_free_port (msm->catchup_server_socket, port++); + + if (ioctl (msm->catchup_server_socket, FIONBIO, &one) < 0) + return clib_error_return_unix (0, "catchup server socket FIONBIO"); + + if (listen(msm->catchup_server_socket, 5) < 0) + return clib_error_return_unix (0, "catchup server socket listen"); + + /* epoll setup for multicast mastership socket */ + { + unix_file_t template = {0}; + + template.read_function = mastership_socket_read_ready; + template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_MASTERSHIP].socket; + template.private_data = (uword) msm; + unix_file_add (&unix_main, &template); + + /* epoll setup for multicast to_relay socket */ + template.read_function = to_relay_socket_read_ready; + template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_TO_RELAY].socket; + template.private_data = (uword) msm; + unix_file_add (&unix_main, &template); + + /* epoll setup for multicast from_relay socket */ + template.read_function = from_relay_socket_read_ready; + template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_USER_REQUEST_FROM_RELAY].socket; + template.private_data = (uword) msm; + unix_file_add (&unix_main, &template); + + template.read_function = join_socket_read_ready; + template.file_descriptor = msm->multicast_sockets[MC_TRANSPORT_JOIN].socket; + template.private_data = (uword) msm; + unix_file_add (&unix_main, &template); + + /* epoll setup for ack rx socket */ + template.read_function = ack_socket_read_ready; + template.file_descriptor = msm->ack_socket; + template.private_data = (uword) msm; + unix_file_add (&unix_main, &template); + + /* epoll setup for TCP catchup server */ + template.read_function = catchup_listen_read_ready; + template.file_descriptor = msm->catchup_server_socket; + template.private_data = (uword) msm; + unix_file_add (&unix_main, &template); + } + + return 0; +} + +static void * +catchup_add_pending_output (mc_socket_catchup_t * c, uword n_bytes, u8 * set_output_vector) +{ + unix_file_t * uf = pool_elt_at_index (unix_main.file_pool, + c->unix_file_index); + u8 * result=0; + + if (set_output_vector) + c->output_vector = set_output_vector; + else + vec_add2 (c->output_vector, result, n_bytes); + if (vec_len (c->output_vector) > 0) + { + int skip_update = 0 != (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); + uf->flags |= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; + if (! skip_update) + unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY); + } + return result; +} + +static uword catchup_request_fun (void *transport_main, + u32 stream_index, + mc_peer_id_t catchup_peer_id) +{ + mc_socket_main_t *msm = (mc_socket_main_t *)transport_main; + mc_main_t * mcm = &msm->mc_main; + vlib_main_t * vm = mcm->vlib_main; + mc_socket_catchup_t *c; + struct sockaddr_in addr; + unix_main_t *um = &unix_main; + int one = 1; + + pool_get (msm->catchups, c); + memset (c, 0, sizeof (*c)); + + c->socket = socket(AF_INET, SOCK_STREAM, 0); + if (c->socket < 0) + { + clib_unix_warning ("socket"); + return 0; + } + + if (ioctl (c->socket, FIONBIO, &one) < 0) + { + clib_unix_warning ("FIONBIO"); + return 0; + } + + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = mc_socket_peer_id_get_address (catchup_peer_id); + addr.sin_port = mc_socket_peer_id_get_port (catchup_peer_id); + + c->connect_in_progress = 1; + + if (MC_EVENT_LOGGING) + { + ELOG_TYPE_DECLARE (e) = { + .format = "connecting to peer 0x%Lx", + .format_args = "i8", + }; + struct { u64 peer; } * ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->peer = catchup_peer_id.as_u64; + } + + if (connect(c->socket, (const void *)&addr,sizeof(addr)) + < 0 && errno != EINPROGRESS) + { + clib_unix_warning ("connect to %U fails", + format_socket_peer_id, catchup_peer_id); + return 0; + } + + { + unix_file_t template = {0}; + + template.read_function = catchup_client_read_ready; + template.write_function = catchup_client_write_ready; + template.error_function = catchup_socket_error_ready; + template.file_descriptor = c->socket; + template.private_data = (uword) msm; + c->unix_file_index = unix_file_add (um, &template); + + hash_set (msm->catchup_index_by_file_descriptor, c->socket, c - msm->catchups); + } + + { + mc_msg_catchup_request_t * mp; + mp = catchup_add_pending_output (c, sizeof (mp[0]), /* set_output_vector */ 0); + mp->peer_id = msm->mc_main.transport.our_catchup_peer_id; + mp->stream_index = stream_index; + mc_byte_swap_msg_catchup_request (mp); + } + + return c - msm->catchups; +} + +static void catchup_send_fun (void *transport_main, uword opaque, u8 * data) +{ + mc_socket_main_t *msm = (mc_socket_main_t *)transport_main; + mc_socket_catchup_t *c = pool_elt_at_index (msm->catchups, opaque); + catchup_add_pending_output (c, 0, data); +} + +static int +find_interface_ip4_address (char * if_name, u32 * ip4_address, u32 * mtu) +{ + int fd; + struct ifreq ifr; + struct sockaddr_in * sa; + + /* Dig up our IP address */ + fd = socket (PF_INET, AF_INET, 0); + if (fd < 0) { + clib_unix_error ("socket"); + return -1; + } + + ifr.ifr_addr.sa_family = AF_INET; + strncpy (ifr.ifr_name, if_name, sizeof(ifr.ifr_name)-1); + if (ioctl (fd, SIOCGIFADDR, &ifr) < 0) { + clib_unix_error ("ioctl(SIOCFIGADDR)"); + return -1; + } + + sa = (void *) &ifr.ifr_addr; + memcpy (ip4_address, &sa->sin_addr.s_addr, sizeof (ip4_address[0])); + + if (ioctl (fd, SIOCGIFMTU, &ifr) < 0) + return -1; + if (mtu) + *mtu = ifr.ifr_mtu - (/* IP4 header */ 20 + /* UDP header */ 8); + + close (fd); + + return 0; +} + +clib_error_t * +mc_socket_main_init (mc_socket_main_t * msm, char **intfc_probe_list, + int n_intfcs_to_probe) +{ + clib_error_t * error; + mc_main_t * mcm; + u32 mtu; + + mcm = &msm->mc_main; + + /* 239.255.0.7 */ + if (! msm->multicast_tx_ip4_address_host_byte_order) + msm->multicast_tx_ip4_address_host_byte_order = 0xefff0007; + + { + u32 i, a, win; + + win = 0; + if (msm->multicast_interface_name) + { + win = ! find_interface_ip4_address (msm->multicast_interface_name, &a, &mtu); + } + else + { + for (i = 0; i < n_intfcs_to_probe; i++) + if (! find_interface_ip4_address (intfc_probe_list[i], &a, &mtu)) + { + win = 1; + msm->multicast_interface_name = intfc_probe_list[i]; + break; + } + } + + if (! win) + return clib_error_return (0, "can't find interface ip4 address"); + + msm->if_ip4_address_net_byte_order = a; + } + + msm->rx_mtu_n_bytes = mtu; + msm->rx_mtu_n_buffers = msm->rx_mtu_n_bytes / VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES; + msm->rx_mtu_n_buffers += (msm->rx_mtu_n_bytes % VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES) != 0; + + error = socket_setup (msm); + if (error) + return error; + + mcm->transport.our_ack_peer_id = + mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order, msm->ack_udp_port); + + mcm->transport.our_catchup_peer_id = + mc_socket_set_peer_id (msm->if_ip4_address_net_byte_order, msm->catchup_tcp_port); + + mcm->transport.tx_buffer = tx_buffer; + mcm->transport.tx_ack = tx_ack; + mcm->transport.catchup_request_fun = catchup_request_fun; + mcm->transport.catchup_send_fun = catchup_send_fun; + mcm->transport.format_peer_id = format_socket_peer_id; + mcm->transport.opaque = msm; + mcm->transport.max_packet_size = mtu; + + mc_main_init (mcm, "socket"); + + return error; +} diff --git a/vlib/vlib/unix/mc_socket.h b/vlib/vlib/unix/mc_socket.h new file mode 100644 index 00000000000..7dd6b5e27b1 --- /dev/null +++ b/vlib/vlib/unix/mc_socket.h @@ -0,0 +1,126 @@ +/* + * mc_socket.h: socket based multicast for vlib mc + * + * Copyright (c) 2010 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_mc_socket_h__ +#define __included_mc_socket_h__ + +#include <vlib/unix/unix.h> +#include <netinet/in.h> + +typedef struct { + int socket; + struct sockaddr_in tx_addr; +} mc_multicast_socket_t; + +/* TCP catchup socket */ +typedef struct { + int socket; + u32 unix_file_index; + + u8 * input_vector; + u8 * output_vector; + u32 output_vector_n_written; + + u32 connect_in_progress; +} mc_socket_catchup_t; + +typedef struct mc_socket_main_t { + mc_main_t mc_main; + + /* Multicast mastership/to-relay/from-relay sockets. */ + mc_multicast_socket_t multicast_sockets[MC_N_TRANSPORT_TYPE]; + + /* Unicast UDP ack sockets */ + int ack_socket; + + /* TCP catchup server socket */ + int catchup_server_socket; + + /* Pool of stream-private catchup sockets */ + mc_socket_catchup_t *catchups; + + uword * catchup_index_by_file_descriptor; + + u32 rx_mtu_n_bytes; + + /* Receive MTU in bytes and VLIB buffers. */ + u32 rx_mtu_n_buffers; + + /* Vector of RX VLIB buffers. */ + u32 * rx_buffers; + /* Vector of scatter/gather descriptors for sending/receiving VLIB buffers + via kernel. */ + struct iovec * iovecs; + + /* IP address of interface to use for multicast. */ + u32 if_ip4_address_net_byte_order; + + u32 ack_udp_port; + u32 catchup_tcp_port; + + /* Interface on which to listen for multicasts. */ + char * multicast_interface_name; + + /* Multicast address to use (e.g. 0xefff0000). + Host byte order. */ + u32 multicast_tx_ip4_address_host_byte_order; + + /* TTL to use for multicasts. */ + u32 multicast_ttl; + + /* Multicast ports for mastership, joins, etc. will be chosen + starting at the given port in host byte order. + A total of MC_N_TRANSPORT_TYPE ports will be used. */ + u32 base_multicast_udp_port_host_byte_order; +} mc_socket_main_t; + +always_inline u32 +mc_socket_peer_id_get_address (mc_peer_id_t i) +{ + u32 a = ((i.as_u8[0] << 24) + | (i.as_u8[1] << 16) + | (i.as_u8[2] << 8) + | (i.as_u8[3] << 0)); + return clib_host_to_net_u32 (a); +} + +always_inline u32 +mc_socket_peer_id_get_port (mc_peer_id_t i) +{ return clib_host_to_net_u16 ((i.as_u8[4] << 8) | i.as_u8[5]); } + +static_always_inline mc_peer_id_t +mc_socket_set_peer_id (u32 address_net_byte_order, u32 port_host_byte_order) +{ + mc_peer_id_t i; + u32 a = ntohl (address_net_byte_order); + u32 p = port_host_byte_order; + i.as_u8[0] = (a >> 24) & 0xff; + i.as_u8[1] = (a >> 16) & 0xff; + i.as_u8[2] = (a >> 8) & 0xff; + i.as_u8[3] = (a >> 0) & 0xff; + i.as_u8[4] = (p >> 8) & 0xff; + i.as_u8[5] = (p >> 0) & 0xff; + i.as_u8[6] = 0; + i.as_u8[7] = 0; + return i; +} + +clib_error_t * +mc_socket_main_init (mc_socket_main_t * msm, char **intfc_probe_list, + int n_intfcs_to_probe); +#endif /* __included_mc_socket_h__ */ + diff --git a/vlib/vlib/unix/pci.c b/vlib/vlib/unix/pci.c new file mode 100644 index 00000000000..02c37f72707 --- /dev/null +++ b/vlib/vlib/unix/pci.c @@ -0,0 +1,577 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * pci.c: Linux user space PCI bus management. + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/vlib.h> +#include <vlib/pci/pci.h> +#include <vlib/unix/unix.h> +#include <vlib/unix/pci.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <dirent.h> + +linux_pci_main_t linux_pci_main; + +static clib_error_t * +foreach_directory_file (char * dir_name, + clib_error_t * (* f) (void * arg, u8 * path_name, u8 * file_name), + void * arg, + int scan_dirs) +{ + DIR * d; + struct dirent * e; + clib_error_t * error = 0; + u8 * s, * t; + + d = opendir (dir_name); + if (! d) + { + /* System has no PCI bus. */ + if (errno == ENOENT) + return 0; + return clib_error_return_unix (0, "open `%s'", dir_name); + } + + s = t = 0; + while (1) + { + e = readdir (d); + if (! e) + break; + if (scan_dirs) + { + if (e->d_type == DT_DIR + && (! strcmp (e->d_name, ".") + || ! strcmp (e->d_name, ".."))) + continue; + } + else + { + if (e->d_type == DT_DIR) + continue; + } + + s = format (s, "%s/%s", dir_name, e->d_name); + t = format (t, "%s", e->d_name); + error = f (arg, s, t); + _vec_len (s) = 0; + _vec_len (t) = 0; + + if (error) + break; + } + + vec_free (s); + closedir (d); + + return error; +} + +static clib_error_t * +write_sys_fs (char * file_name, char * fmt, ...) +{ + u8 * s; + int fd; + + fd = open (file_name, O_WRONLY); + if (fd < 0) + return clib_error_return_unix (0, "open `%s'", file_name); + + va_list va; + va_start (va, fmt); + s = va_format (0, fmt, &va); + va_end (va); + + if (write (fd, s, vec_len (s)) < 0) + return clib_error_return_unix (0, "write `%s'", file_name); + + vec_free (s); + close (fd); + return 0; +} + +static clib_error_t * +scan_uio_dir (void * arg, u8 * path_name, u8 * file_name) +{ + linux_pci_device_t * l = arg; + unformat_input_t input; + + unformat_init_string (&input, (char *) file_name, vec_len (file_name)); + + if (! unformat (&input, "uio%d", &l->uio_minor)) + abort (); + + unformat_free (&input); + return 0; +} + +static clib_error_t * linux_pci_uio_read_ready (unix_file_t * uf) +{ + linux_pci_main_t * pm = &linux_pci_main; + vlib_main_t * vm = pm->vlib_main; + linux_pci_device_t * l; + u32 li = uf->private_data; + + l = pool_elt_at_index (pm->pci_devices, li); + vlib_node_set_interrupt_pending (vm, l->device_input_node_index); + + /* Let node know which device is interrupting. */ + { + vlib_node_runtime_t * rt = vlib_node_get_runtime (vm, l->device_input_node_index); + rt->runtime_data[0] |= 1 << l->device_index; + } + + return /* no error */ 0; +} + +static clib_error_t *linux_pci_uio_error_ready (unix_file_t *uf) +{ + u32 error_index = (u32) uf->private_data; + + return clib_error_return (0, "pci device %d: error", error_index); +} + +static uword pci_resource_size (uword os_handle, uword resource) +{ + linux_pci_main_t * pm = &linux_pci_main; + linux_pci_device_t * p; + u8 * file_name; + struct stat b; + uword result = 0; + + p = pool_elt_at_index (pm->pci_devices, os_handle); + + file_name = format (0, "%v/resource%d%c", p->dev_dir_name, resource, 0); + if (stat ((char *) file_name, &b) >= 0) + result = b.st_size; + vec_free (file_name); + return result; +} + +void os_add_pci_disable_interrupts_reg (uword os_handle, u32 resource, + u32 reg_offset, u32 reg_value) +{ + linux_pci_main_t * pm = &linux_pci_main; + linux_pci_device_t * l; + char * file_name; + clib_error_t * error; + + l = pool_elt_at_index (pm->pci_devices, os_handle); + ASSERT (resource == 0); + ASSERT (reg_offset < pci_resource_size (os_handle, resource)); + file_name = (char *) format (0, "%s/disable_interrupt_regs%c", l->dev_dir_name, 0); + error = write_sys_fs (file_name, "%x %x", reg_offset, reg_value); + if (error) + clib_error_report (error); + vec_free (file_name); +} + +static void add_device (pci_device_t * dev, linux_pci_device_t * pdev) +{ + linux_pci_main_t * pm = &linux_pci_main; + linux_pci_device_t * l; + pci_config_header_t * c; + u32 x[4]; + clib_error_t * error; + + c = &dev->config0.header; + + pool_get (pm->pci_devices, l); + l[0] = pdev[0]; + + l->dev_dir_name = vec_dup (l->dev_dir_name); + + /* Parse bus, dev, function from directory name. */ + { + unformat_input_t input; + + unformat_init_string (&input, (char *) l->dev_dir_name, + vec_len (l->dev_dir_name)); + + if (! unformat (&input, "/sys/bus/pci/devices/%x:%x:%x.%x", + &x[0], &x[1], &x[2], &x[3])) + abort (); + + unformat_free (&input); + + l->bus_address.bus = x[1]; + l->bus_address.slot_function = (x[2] << 3) | x[3]; + dev->bus_address = l->bus_address; + } + + dev->os_handle = l - pm->pci_devices; + + error = write_sys_fs ("/sys/bus/pci/drivers/uio_pci_dma/new_id", + "%x %x", c->vendor_id, c->device_id); + if (error) + clib_error_report (error); + error = write_sys_fs ("/sys/bus/pci/drivers/uio_pci_dma/bind", + "%04x:%02x:%02x.%x", x[0], x[1], x[2], x[3]); + /* Errors happen when re-binding so just ignore them. */ + if (error) + clib_error_free (error); + + { + u8 * uio_dir = format (0, "%s/uio", l->dev_dir_name); + foreach_directory_file ((char *) uio_dir, scan_uio_dir, l, /* scan_dirs */ 1); + vec_free (uio_dir); + } + + { + char * uio_name = (char *) format (0, "/dev/uio%d%c", l->uio_minor, 0); + l->uio_fd = open (uio_name, O_RDWR); + if (l->uio_fd < 0) + clib_unix_error ("open `%s'", uio_name); + vec_free (uio_name); + } + + { + unix_file_t template = {0}; + unix_main_t * um = &unix_main; + + template.read_function = linux_pci_uio_read_ready; + template.file_descriptor = l->uio_fd; + template.error_function = linux_pci_uio_error_ready; + template.private_data = l - pm->pci_devices; + + /* To be filled in by driver. */ + l->device_input_node_index = ~0; + l->device_index = 0; + + l->unix_file_index = unix_file_add (um, &template); + } +} + +static void linux_pci_device_free (linux_pci_device_t * l) +{ + int i; + for (i = 0; i < vec_len (l->resource_fds); i++) + if (l->resource_fds[i] > 0) + close (l->resource_fds[i]); + if (l->config_fd > 0) + close (l->config_fd); + if (l->uio_fd > 0) + close (l->uio_fd); + vec_free (l->resource_fds); + vec_free (l->dev_dir_name); +} + +/* Configuration space read/write. */ +clib_error_t * +os_read_write_pci_config (uword os_handle, + vlib_read_or_write_t read_or_write, + uword address, + void * data, + u32 n_bytes) +{ + linux_pci_main_t * pm = &linux_pci_main; + linux_pci_device_t * p; + int n; + + p = pool_elt_at_index (pm->pci_devices, os_handle); + + if (address != lseek (p->config_fd, address, SEEK_SET)) + return clib_error_return_unix (0, "seek offset %d", address); + + if (read_or_write == VLIB_READ) + n = read (p->config_fd, data, n_bytes); + else + n = write (p->config_fd, data, n_bytes); + + if (n != n_bytes) + return clib_error_return_unix (0, "%s", + read_or_write == VLIB_READ + ? "read" : "write"); + + return 0; +} + +static clib_error_t * +os_map_pci_resource_internal (uword os_handle, + u32 resource, + u8 *addr, + void ** result) +{ + linux_pci_main_t * pm = &linux_pci_main; + linux_pci_device_t * p; + struct stat stat_buf; + u8 * file_name; + int fd; + clib_error_t * error; + int flags = MAP_SHARED; + + error = 0; + p = pool_elt_at_index (pm->pci_devices, os_handle); + + file_name = format (0, "%v/resource%d%c", p->dev_dir_name, resource, 0); + fd = open ((char *) file_name, O_RDWR); + if (fd < 0) + { + error = clib_error_return_unix (0, "open `%s'", file_name); + goto done; + } + + if (fstat (fd, &stat_buf) < 0) + { + error = clib_error_return_unix (0, "fstat `%s'", file_name); + goto done; + } + + vec_validate (p->resource_fds, resource); + p->resource_fds[resource] = fd; + if (addr != 0) + flags |= MAP_FIXED; + + *result = mmap (addr, + /* size */ stat_buf.st_size, + PROT_READ | PROT_WRITE, + flags, + /* file */ fd, + /* offset */ 0); + if (*result == (void *) -1) + { + error = clib_error_return_unix (0, "mmap `%s'", file_name); + goto done; + } + + done: + if (error) + { + if (fd > 0) + close (fd); + } + vec_free (file_name); + return error; +} + +clib_error_t * +os_map_pci_resource (uword os_handle, + u32 resource, + void ** result) +{ + return (os_map_pci_resource_internal (os_handle, resource, 0 /* addr */, + result)); +} + +clib_error_t * +os_map_pci_resource_fixed (uword os_handle, + u32 resource, + u8 *addr, + void ** result) +{ + return (os_map_pci_resource_internal (os_handle, resource, addr, result)); +} + +void os_free_pci_device (uword os_handle) +{ + linux_pci_main_t * pm = &linux_pci_main; + linux_pci_device_t * l; + + l = pool_elt_at_index (pm->pci_devices, os_handle); + linux_pci_device_free (l); + pool_put (pm->pci_devices, l); +} + +u8 * format_os_pci_handle (u8 * s, va_list * va) +{ + linux_pci_main_t * pm = &linux_pci_main; + uword os_pci_handle = va_arg (*va, uword); + linux_pci_device_t * l; + + l = pool_elt_at_index (pm->pci_devices, os_pci_handle); + return format (s, "%x/%x/%x", l->bus_address.bus, + (l->bus_address.slot_function >> 3), + (l->bus_address.slot_function & 0x7)); +} + +static inline pci_device_registration_t * +pci_device_next_registered (pci_device_registration_t * r) +{ + uword i; + + /* Null vendor id marks end of initialized list. */ + for (i = 0; r->supported_devices[i].vendor_id != 0; i++) + ; + + return clib_elf_section_data_next (r, i * sizeof (r->supported_devices[0])); +} + +static inline u8 kernel_driver_installed (pci_device_registration_t *r) +{ + u8 * link_name; + struct stat b; + + link_name = format (0, "/sys/bus/pci/drivers/%s", r->kernel_driver); + if (stat ((char *)link_name, &b) >= 0) + r->kernel_driver_running++; + else + r->kernel_driver_running=0; + + vec_free (link_name); + return r->kernel_driver_running; +} + +static clib_error_t * +init_device_from_registered (vlib_main_t * vm, + pci_device_t * dev, + linux_pci_device_t * pdev) +{ + unix_main_t * um = vlib_unix_get_main(); + pci_device_registration_t * r; + pci_device_id_t * i; + pci_config_header_t * c; + + c = &dev->config0.header; + + r = um->pci_device_registrations; + + while (r) + { + for (i = r->supported_devices; i->vendor_id != 0; i++) + if (i->vendor_id == c->vendor_id && i->device_id == c->device_id) + { + if (r->kernel_driver && kernel_driver_installed(r)) + { + if (r->kernel_driver_running == 1) + { + clib_warning("PCI device type [%04x:%04x] is busy!\n" + "\tUninstall the associated linux kernel " + "driver: sudo rmmod %s", + c->vendor_id, c->device_id, r->kernel_driver); + } + continue; + } + add_device (dev, pdev); + return r->init_function (vm, dev); + } + r = r->next_registration; + } + /* No driver, close the PCI config-space FD */ + close (pdev->config_fd); + return 0; +} + +static clib_error_t * +init_device (vlib_main_t * vm, + pci_device_t * dev, + linux_pci_device_t * pdev) +{ + return init_device_from_registered (vm, dev, pdev); +} + +static clib_error_t * +scan_device (void * arg, u8 * dev_dir_name, u8 * ignored) +{ + vlib_main_t * vm = arg; + int fd; + u8 * f; + clib_error_t * error = 0; + pci_device_t dev = {0}; + linux_pci_device_t pdev = {0}; + + f = format (0, "%v/config%c", dev_dir_name, 0); + fd = open ((char *) f, O_RDWR); + + /* Try read-only access if write fails. */ + if (fd < 0) + fd = open ((char *) f, O_RDONLY); + + if (fd < 0) + { + error = clib_error_return_unix (0, "open `%s'", f); + goto done; + } + + /* You can only read more that 64 bytes of config space as root; so we try to + read the full space but fall back to just the first 64 bytes. */ + if (read (fd, &dev.config_data, sizeof (dev.config_data)) != sizeof (dev.config_data) + && read (fd, &dev.config0, sizeof (dev.config0)) != sizeof (dev.config0)) + { + error = clib_error_return_unix (0, "read `%s'", f); + goto done; + } + + { + static pci_config_header_t all_ones; + if (all_ones.vendor_id == 0) + memset (&all_ones, ~0, sizeof (all_ones)); + + if (! memcmp (&dev.config0.header, &all_ones, sizeof (all_ones))) + { + error = clib_error_return (0, "invalid PCI config for `%s'", f); + goto done; + } + } + + if (dev.config0.header.header_type == 0) + pci_config_type0_little_to_host (&dev.config0); + else + pci_config_type1_little_to_host (&dev.config1); + + pdev.config_fd = fd; + pdev.dev_dir_name = dev_dir_name; + + error = init_device (vm, &dev, &pdev); + + done: + vec_free (f); + return error; +} + +clib_error_t * pci_bus_init (vlib_main_t * vm) +{ + linux_pci_main_t * pm = &linux_pci_main; + clib_error_t * error; + + pm->vlib_main = vm; + + if ((error = vlib_call_init_function (vm, unix_input_init))) + return error; + + error = foreach_directory_file ("/sys/bus/pci/devices", scan_device, vm, /* scan_dirs */ 0); + + /* Complain and continue. might not be root, etc. */ + if (error) + clib_error_report (error); + + return error; +} + +VLIB_INIT_FUNCTION (pci_bus_init); diff --git a/vlib/vlib/unix/pci.h b/vlib/vlib/unix/pci.h new file mode 100644 index 00000000000..b384250eb47 --- /dev/null +++ b/vlib/vlib/unix/pci.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * unix/pci.h: Linux specific pci state + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_unix_pci_h +#define included_unix_pci_h + +#include <vlib/pci/pci.h> + +typedef struct { + /* /sys/bus/pci/devices/... directory name for this device. */ + u8 * dev_dir_name; + + /* Resource file descriptors. */ + int * resource_fds; + + /* File descriptor for config space read/write. */ + int config_fd; + + /* PCI bus address for this devices parsed from /sys/bus/pci/devices name. */ + pci_bus_address_t bus_address; + + /* File descriptor for /dev/uio%d */ + int uio_fd; + + /* Minor device for uio device. */ + u32 uio_minor; + + /* Index given by unix_file_add. */ + u32 unix_file_index; + + /* Input node to handle interrupts for this device. */ + u32 device_input_node_index; + + /* Node runtime will be a bitmap of device indices with pending interrupts. */ + u32 device_index; +} linux_pci_device_t; + +/* Pool of PCI devices. */ +typedef struct { + vlib_main_t * vlib_main; + linux_pci_device_t * pci_devices; +} linux_pci_main_t; + +extern linux_pci_main_t linux_pci_main; + +always_inline linux_pci_device_t * +pci_dev_for_linux (pci_device_t * dev) +{ + linux_pci_main_t * pm = &linux_pci_main; + return pool_elt_at_index (pm->pci_devices, dev->os_handle); +} + +/* Call to allocate/initialize the pci subsystem. + This is not an init function so that users can explicitly enable + pci only when it's needed. */ +clib_error_t * pci_bus_init (vlib_main_t * vm); + +#endif /* included_unix_pci_h */ diff --git a/vlib/vlib/unix/physmem.c b/vlib/vlib/unix/physmem.c new file mode 100644 index 00000000000..83b40be6449 --- /dev/null +++ b/vlib/vlib/unix/physmem.c @@ -0,0 +1,472 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * physmem.c: Unix physical memory + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <vlib/unix/physmem.h> + +static physmem_main_t physmem_main; + +static void * +unix_physmem_alloc_aligned (vlib_physmem_main_t * vpm, uword n_bytes, uword alignment) +{ + physmem_main_t * pm = &physmem_main; + uword lo_offset, hi_offset; + uword * to_free = 0; + +#if DPDK > 0 + clib_warning ("unsafe alloc!"); +#endif + + /* IO memory is always at least cache aligned. */ + alignment = clib_max (alignment, CLIB_CACHE_LINE_BYTES); + + while (1) + { + mheap_get_aligned (pm->heap, n_bytes, + /* align */ alignment, + /* align offset */ 0, + &lo_offset); + + /* Allocation failed? */ + if (lo_offset == ~0) + break; + + /* Make sure allocation does not span DMA physical chunk boundary. */ + hi_offset = lo_offset + n_bytes - 1; + + if ((lo_offset >> vpm->log2_n_bytes_per_page) == + (hi_offset >> vpm->log2_n_bytes_per_page)) + break; + + /* Allocation would span chunk boundary, queue it to be freed as soon as + we find suitable chunk. */ + vec_add1 (to_free, lo_offset); + } + + if (to_free != 0) + { + uword i; + for (i = 0; i < vec_len (to_free); i++) + mheap_put (pm->heap, to_free[i]); + vec_free (to_free); + } + + return lo_offset != ~0 ? pm->heap + lo_offset : 0; +} + +static void unix_physmem_free (void * x) +{ + physmem_main_t * pm = &physmem_main; + + /* Return object to region's heap. */ + mheap_put (pm->heap, x - pm->heap); +} + +static void htlb_shutdown(void) +{ + physmem_main_t * pm = &physmem_main; + + if (! pm->shmid) + return; + shmctl (pm->shmid, IPC_RMID, 0); + pm->shmid = 0; +} + +/* try to use huge TLB pgs if possible */ +static int htlb_init (vlib_main_t * vm) +{ + vlib_physmem_main_t * vpm = &vm->physmem_main; + physmem_main_t * pm = &physmem_main; + u64 hugepagesize, pagesize; + u64 pfn, seek_loc; + u64 cur, physaddr, ptbits; + int fd, i; + + pm->shmid = shmget (11 /* key, my amp goes to 11 */, pm->mem_size, + IPC_CREAT | SHM_HUGETLB | SHM_R | SHM_W); + if (pm->shmid < 0) + { + clib_unix_warning ("shmget"); + return 0; + } + + pm->mem = shmat (pm->shmid, NULL, 0 /* flags */); + if (pm->mem == 0) + { + shmctl (pm->shmid, IPC_RMID, 0); + return 0; + } + + memset (pm->mem, 0, pm->mem_size); + + /* $$$ get page size info from /proc/meminfo */ + hugepagesize = 2<<20; + pagesize = 4<<10; + vpm->log2_n_bytes_per_page = min_log2 (hugepagesize); + vec_resize (vpm->page_table, pm->mem_size / hugepagesize); + + vpm->page_mask = pow2_mask (vpm->log2_n_bytes_per_page); + vpm->virtual.start = pointer_to_uword (pm->mem); + vpm->virtual.size = pm->mem_size; + vpm->virtual.end = vpm->virtual.start + vpm->virtual.size; + + fd = open("/proc/self/pagemap", O_RDONLY); + + if (fd < 0) + { + (void) shmdt (pm->mem); + return 0; + } + + pm->heap = mheap_alloc_with_flags + (pm->mem, pm->mem_size, + /* Don't want mheap mmap/munmap with IO memory. */ + MHEAP_FLAG_DISABLE_VM); + + cur = (u64) pm->mem; + i = 0; + + while (cur < (u64) pm->mem + pm->mem_size) + { + pfn = (u64) cur / pagesize; + seek_loc = pfn * sizeof (u64); + if (lseek (fd, seek_loc, SEEK_SET) != seek_loc) + { + clib_unix_warning ("lseek to 0x%llx", seek_loc); + shmctl (pm->shmid, IPC_RMID, 0); + close(fd); + return 0; + } + if (read (fd, &ptbits, sizeof (ptbits)) != (sizeof(ptbits))) + { + clib_unix_warning ("read ptbits"); + shmctl (pm->shmid, IPC_RMID, 0); + close(fd); + return 0; + } + + /* bits 0-54 are the physical page number */ + physaddr = (ptbits & 0x7fffffffffffffULL) * pagesize; + if (CLIB_DEBUG > 1) + fformat(stderr, "pm: virtual 0x%llx physical 0x%llx\n", + cur, physaddr); + vpm->page_table[i++] = physaddr; + + cur += hugepagesize; + } + close(fd); + atexit (htlb_shutdown); + return 1; +} + +int vlib_app_physmem_init (vlib_main_t * vm, + physmem_main_t * pm, int) __attribute__ ((weak)); +int vlib_app_physmem_init (vlib_main_t * vm, physmem_main_t * pm, int x) +{ + return 0; +} + +clib_error_t * unix_physmem_init (vlib_main_t * vm, int physical_memory_required) +{ + vlib_physmem_main_t * vpm = &vm->physmem_main; + physmem_main_t * pm = &physmem_main; + clib_error_t * error = 0; + char * dev_uio_dma_file = "/dev/uio-dma"; + int using_fake_memory = 0; + + /* Avoid multiple calls. */ + if (vm->os_physmem_alloc_aligned) + return error; + + vm->os_physmem_alloc_aligned = unix_physmem_alloc_aligned; + vm->os_physmem_free = unix_physmem_free; + pm->mem = MAP_FAILED; + + if (pm->mem_size == 0) + pm->mem_size = 16 << 20; + + /* OK, Mr. App, you tell us */ + if (vlib_app_physmem_init (vm, pm, physical_memory_required)) + return 0; + + if (physical_memory_required) + { + if (!pm->no_hugepages && htlb_init(vm)) + { + fformat(stderr, "%s: use huge pages\n", __FUNCTION__); + return 0; + } + pm->uio_dma_fd = open (dev_uio_dma_file, O_RDWR); + } + else + pm->uio_dma_fd = -1; + + if (pm->uio_dma_fd < 0) + { + if (physical_memory_required) + { + error = clib_error_return_unix (0, "open `%s'", dev_uio_dma_file); + goto done; + } + + using_fake_memory = 1; + pm->mem = mmap (0, pm->mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (pm->mem == MAP_FAILED) + { + error = clib_error_return_unix (0, "mmap"); + goto done; + } + + pm->heap = mheap_alloc (pm->mem, pm->mem_size); + + /* Identity map with a single page. */ + vpm->log2_n_bytes_per_page = min_log2 (pm->mem_size); + vec_add1 (vpm->page_table, pointer_to_uword (pm->mem)); + } + else + error = clib_error_return (0, "uio_dma deprecated"); + + if (using_fake_memory) + fformat(stderr, "%s: use fake dma pages\n", __FUNCTION__); + else + fformat(stderr, "%s: use uio dma pages\n", __FUNCTION__); + + done: + if (error) + { + if (pm->mem != MAP_FAILED) + munmap (pm->mem, pm->mem_size); + if (pm->uio_dma_fd >= 0) + { + close (pm->uio_dma_fd); + pm->uio_dma_fd = -1; + } + } + return error; +} + +static clib_error_t * +show_physmem (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ +#if DPDK > 0 + vlib_cli_output (vm, "Not supported with DPDK drivers."); +#else + physmem_main_t * pm = &physmem_main; + + if (pm->heap) + vlib_cli_output (vm, "%U", format_mheap, pm->heap, /* verbose */ 0); + else + vlib_cli_output (vm, "No physmem allocated."); +#endif + return 0; +} + +VLIB_CLI_COMMAND (show_physmem_command, static) = { + .path = "show physmem", + .short_help = "Show physical memory allocation", + .function = show_physmem, +}; + +static clib_error_t * +show_affinity (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + cpu_set_t set; + cpu_set_t *setp = &set; + int i, rv; + u8 *s = 0; + int first_set_bit_in_run = -1; + int last_set_bit_in_run = -1; + int output_done = 0; + + rv = sched_getaffinity (0 /* pid, 0 = this proc */, + sizeof (*setp), setp); + if (rv < 0) + { + vlib_cli_output (vm, "Couldn't get affinity mask: %s\n", + strerror(errno)); + return 0; + } + + for (i = 0; i < 64; i++) + { + if (CPU_ISSET(i, setp)) + { + if (first_set_bit_in_run == -1) + { + first_set_bit_in_run = i; + last_set_bit_in_run = i; + if (output_done) + s = format (s, ","); + s = format (s, "%d-", i); + output_done = 1; + } + else + { + if (i == (last_set_bit_in_run+1)) + last_set_bit_in_run = i; + } + } + else + { + if (first_set_bit_in_run != -1) + { + if (first_set_bit_in_run == (i-1)) + { + _vec_len (s) -= 2 + ((first_set_bit_in_run/10)); + } + s = format (s, "%d", last_set_bit_in_run); + first_set_bit_in_run = -1; + last_set_bit_in_run = -1; + } + } + } + + if (first_set_bit_in_run != -1) + s = format (s, "%d", first_set_bit_in_run); + + vlib_cli_output (vm, "Process runs on: %v", s); + return 0; +} + +VLIB_CLI_COMMAND (show_affinity_command, static) = { + .path = "show affinity", + .short_help = "Show process cpu affinity", + .function = show_affinity, +}; + +static clib_error_t * +set_affinity (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + cpu_set_t set; + cpu_set_t *setp = &set; + int i, rv; + int another_round; + u32 first, last; + + memset (setp, 0, sizeof (*setp)); + + do { + another_round = 0; + if (unformat (input, "%d-%d,", &first, &last)) + { + if (first > 64 || last > 64) + { + barf1: + vlib_cli_output (vm, "range %d-%d invalid", first, last); + return 0; + } + + for (i = first; i <= last; i++) + CPU_SET(i, setp); + another_round = 1; + } + else if (unformat (input, "%d-%d", &first, &last)) + { + if (first > 64 || last > 64) + goto barf1; + + for (i = first; i <= last; i++) + CPU_SET(i, setp); + } + else if (unformat (input, "%d,", &first)) + { + if (first > 64) + { + barf2: + vlib_cli_output (vm, "cpu %d invalid", first); + return 0; + } + CPU_SET(first, setp); + another_round = 1; + } + else if (unformat (input, "%d", &first)) + { + if (first > 64) + goto barf2; + + CPU_SET(first, setp); + } + } while (another_round); + + rv = sched_setaffinity (0 /* pid, 0 = this proc */, + sizeof (*setp), setp); + + if (rv < 0) + { + vlib_cli_output (vm, "Couldn't get affinity mask: %s\n", + strerror(errno)); + return 0; + } + return show_affinity (vm, input, cmd); +} + +VLIB_CLI_COMMAND (set_affinity_command, static) = { + .path = "set affinity", + .short_help = "Set process cpu affinity", + .function = set_affinity, +}; + +static clib_error_t * +vlib_physmem_configure (vlib_main_t * vm, unformat_input_t * input) +{ + physmem_main_t * pm = &physmem_main; + u32 size_in_mb; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "no-huge") || unformat (input, "no-huge-pages")) + pm->no_hugepages = 1; + + else if (unformat(input, "size-in-mb %d", &size_in_mb) || + unformat(input, "size %d", &size_in_mb)) + pm->mem_size = size_in_mb << 20; + else + return unformat_parse_error (input); + } + + unformat_free (input); + return 0; +} + +VLIB_EARLY_CONFIG_FUNCTION (vlib_physmem_configure, "physmem"); diff --git a/vlib/vlib/unix/physmem.h b/vlib/vlib/unix/physmem.h new file mode 100644 index 00000000000..a963be746d8 --- /dev/null +++ b/vlib/vlib/unix/physmem.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_physmem_h__ +#define __included_physmem_h__ + +/* Manage I/O physical memory. */ +#define _GNU_SOURCE +#include <sched.h> +#include <vppinfra/cache.h> +#include <vppinfra/error.h> +#include <vppinfra/mheap.h> +#include <vppinfra/os.h> + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> + +#include <sys/fcntl.h> /* for open */ +#include <sys/file.h> /* for flock */ +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <sys/ipc.h> +#include <sys/shm.h> + +typedef struct { + /* File descriptor for /dev/uio-dma. */ + int uio_dma_fd; + + /* Virtual memory via mmaped. */ + void * mem; + + /* Size in bytes. */ + uword mem_size; + + /* Heap allocated out of virtual memory. */ + void * heap; + + /* huge TLB segment id */ + int shmid; + + /* should we try to use htlb ? */ + int no_hugepages; + +} physmem_main_t; + +#endif /* __included_physmem_h__ */ diff --git a/vlib/vlib/unix/plugin.c b/vlib/vlib/unix/plugin.c new file mode 100644 index 00000000000..3411ef340af --- /dev/null +++ b/vlib/vlib/unix/plugin.c @@ -0,0 +1,210 @@ +/* + * plugin.c: plugin handling + * + * Copyright (c) 2011 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vlib/unix/plugin.h> +#include <dlfcn.h> +#include <dirent.h> + +plugin_main_t vlib_plugin_main; + +void vlib_set_get_handoff_structure_cb (void *cb) +{ + plugin_main_t * pm = &vlib_plugin_main; + pm->handoff_structure_get_cb = cb; +} + +static void * vnet_get_handoff_structure (void) +{ + void * (*fp)(void); + + fp = vlib_plugin_main.handoff_structure_get_cb; + if (fp == 0) + return 0; + else + return (*fp)(); +} + +static int +load_one_plugin (plugin_main_t *pm, plugin_info_t *pi, int from_early_init) +{ + void *handle, *register_handle; + clib_error_t * (*fp)(vlib_main_t *, void *, int); + clib_error_t * error; + void *handoff_structure; + + handle = dlopen ((char *)pi->name, RTLD_LAZY); + + /* + * Note: this can happen if the plugin has an undefined symbol reference, + * so print a warning. Otherwise, the poor slob won't know what happened. + * Ask me how I know that... + */ + if (handle == 0) + { + clib_warning ("%s", dlerror()); + return -1; + } + + pi->handle = handle; + + register_handle = dlsym (pi->handle, "vlib_plugin_register"); + if (register_handle == 0) + { + dlclose (handle); + return 0; + } + + fp = register_handle; + + handoff_structure = vnet_get_handoff_structure(); + + if (handoff_structure == 0) + error = clib_error_return (0, "handoff structure callback returned 0"); + else + error = (*fp)(pm->vlib_main, handoff_structure, from_early_init); + + if (error) + { + clib_error_report (error); + dlclose (handle); + return 1; + } + + clib_warning ("Loaded plugin: %s", pi->name); + + return 0; +} + +static u8 **split_plugin_path (plugin_main_t *pm) +{ + int i; + u8 **rv = 0; + u8 *path = pm->plugin_path; + u8 *this = 0; + + for (i = 0; i < vec_len (pm->plugin_path); i++) + { + if (path[i] != ':') + { + vec_add1(this, path[i]); + continue; + } + vec_add1(this, 0); + vec_add1 (rv, this); + this = 0; + } + if (this) + { + vec_add1 (this, 0); + vec_add1 (rv, this); + } + return rv; +} + +int vlib_load_new_plugins (plugin_main_t *pm, int from_early_init) +{ + DIR *dp; + struct dirent *entry; + struct stat statb; + uword *p; + plugin_info_t *pi; + u8 **plugin_path; + int i; + + plugin_path = split_plugin_path (pm); + + for (i = 0; i < vec_len (plugin_path); i++) + { + dp = opendir ((char *)plugin_path[i]); + + if (dp == 0) + continue; + + while ((entry = readdir (dp))) + { + u8 *plugin_name; + + if (pm->plugin_name_filter) + { + int j; + for (j = 0; j < vec_len (pm->plugin_name_filter); j++) + if (entry->d_name[j] != pm->plugin_name_filter[j]) + goto next; + } + + plugin_name = format (0, "%s/%s%c", plugin_path[i], + entry->d_name, 0); + + /* unreadable */ + if (stat ((char *)plugin_name, &statb) < 0) + { + ignore: + vec_free (plugin_name); + continue; + } + + /* a dir or other things which aren't plugins */ + if (!S_ISREG(statb.st_mode)) + goto ignore; + + p = hash_get_mem (pm->plugin_by_name_hash, plugin_name); + if (p == 0) + { + vec_add2 (pm->plugin_info, pi, 1); + pi->name = plugin_name; + pi->file_info = statb; + + if (load_one_plugin (pm, pi, from_early_init)) + { + vec_free (plugin_name); + _vec_len (pm->plugin_info) = vec_len (pm->plugin_info) - 1; + continue; + } + memset (pi, 0, sizeof (*pi)); + hash_set_mem (pm->plugin_by_name_hash, plugin_name, + pi - pm->plugin_info); + } + next: + ; + } + closedir (dp); + vec_free (plugin_path[i]); + } + vec_free (plugin_path); + return 0; +} +char *vlib_plugin_path __attribute__((weak)); +char *vlib_plugin_path = ""; +char *vlib_plugin_name_filter __attribute__((weak)); +char *vlib_plugin_name_filter = 0; + +int vlib_plugin_early_init (vlib_main_t *vm) +{ + plugin_main_t *pm = &vlib_plugin_main; + + pm->plugin_path = format (0, "%s%c", vlib_plugin_path, 0); + + clib_warning ("plugin path %s", pm->plugin_path); + + if (vlib_plugin_name_filter) + pm->plugin_name_filter = format (0, "%s%c", vlib_plugin_name_filter, 0); + + pm->plugin_by_name_hash = hash_create_string (0, sizeof (uword)); + pm->vlib_main = vm; + + return vlib_load_new_plugins (pm, 1 /* from_early_init */); +} diff --git a/vlib/vlib/unix/plugin.h b/vlib/vlib/unix/plugin.h new file mode 100644 index 00000000000..e7d75099ed9 --- /dev/null +++ b/vlib/vlib/unix/plugin.h @@ -0,0 +1,88 @@ +/* + * plugin.h: plugin handling + * + * Copyright (c) 2011 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __included_plugin_h__ +#define __included_plugin_h__ + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +/* + * vlib plugin scheme + * + * Almost anything which can be made to work in a vlib unix + * application will also work in a vlib plugin. + * + * The elf-section magic which registers static objects + * works so long as plugins are preset when the vlib unix process + * starts. But wait: there's more... + * + * If an application calls vlib_load_new_plugins() -- possibly after + * changing vlib_plugin_main.plugin_path / vlib_plugin_main.plugin_name_filter, + * -- new plugins will be loaded. That, in turn, allows considerable + * flexibility in terms of adding feature code or fixing bugs without + * requiring the data-plane process to restart. + * + * When the plugin mechanism loads a plugin, it uses dlsym to locate + * and call the plugin's function vlib_plugin_register() if it exists. + * A plugin which expects to be loaded after the vlib application + * starts uses this callback to modify the application. If vlib_plugin_register + * returns non-zero, the plugin mechanism dlclose()'s the plugin. + * + * Applications control the plugin search path and name filter by + * declaring the variables vlib_plugin_path and vlib_plugin_name_filter. + * libvlib_unix.la supplies weak references for these symbols which + * effectively disable the scheme. In order for the elf-section magic to + * work, static plugins must be loaded at the earliest possible moment. + * + * An application can change these parameters at any time and call + * vlib_load_new_plugins(). + */ + + + +typedef struct { + u8 *name; + struct stat file_info; + void *handle; +} plugin_info_t; + +typedef struct { + /* loaded plugin info */ + plugin_info_t *plugin_info; + uword *plugin_by_name_hash; + + /* path and name filter */ + u8 *plugin_path; + u8 *plugin_name_filter; + + /* handoff structure get callback */ + void *handoff_structure_get_cb; + + /* usual */ + vlib_main_t *vlib_main; +} plugin_main_t; + +plugin_main_t vlib_plugin_main; + +int vlib_plugin_early_init (vlib_main_t *vm); +int vlib_load_new_plugins (plugin_main_t *pm, int from_early_init); + +#endif /* __included_plugin_h__ */ diff --git a/vlib/vlib/unix/unix.h b/vlib/vlib/unix/unix.h new file mode 100644 index 00000000000..0802a93baa3 --- /dev/null +++ b/vlib/vlib/unix/unix.h @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * unix.h: Unix specific main state + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_unix_unix_h +#define included_unix_unix_h + +#include <vppinfra/socket.h> + +struct unix_file; +typedef clib_error_t * (unix_file_function_t) (struct unix_file * f); + +typedef struct unix_file { + /* Unix file descriptor from open/socket. */ + u32 file_descriptor; + + u32 flags; +#define UNIX_FILE_DATA_AVAILABLE_TO_WRITE (1 << 0) + + /* Data available for function's use. */ + uword private_data; + + /* Functions to be called when read/write data becomes ready. */ + unix_file_function_t * read_function, * write_function, * error_function; +} unix_file_t; + +typedef struct { + f64 time; + clib_error_t * error; +} unix_error_history_t; + +typedef enum { + UNIX_FILE_UPDATE_ADD, + UNIX_FILE_UPDATE_MODIFY, + UNIX_FILE_UPDATE_DELETE, +} unix_file_update_type_t; + +typedef struct { + /* Back pointer to main structure. */ + vlib_main_t * vlib_main; + + u32 flags; + /* Run interactively or as daemon (background process). */ +#define UNIX_FLAG_INTERACTIVE (1 << 0) +#define UNIX_FLAG_NODAEMON (1 << 1) + + /* Pool of files to poll for input/output. */ + unix_file_t * file_pool; + + /* CLI listen socket. */ + clib_socket_t cli_listen_socket; + + void (* file_update) (unix_file_t * file, unix_file_update_type_t update_type); + + /* Circular buffer of last unix errors. */ + unix_error_history_t error_history[128]; + u32 error_history_index; + u64 n_total_errors; + + /* startup-config filename */ + u8 *startup_config_filename; + + /* unix config complete */ + volatile int unix_config_complete; + + /* CLI log file. GIGO. */ + u8 *log_filename; + int log_fd; + /* Don't put telnet connections into character mode */ + int cli_line_mode; + u32 cli_history_limit; + +} unix_main_t; + +/* Global main structure. */ +extern unix_main_t unix_main; + +always_inline uword +unix_file_add (unix_main_t * um, unix_file_t * template) +{ + unix_file_t * f; + pool_get (um->file_pool, f); + f[0] = template[0]; + um->file_update (f, UNIX_FILE_UPDATE_ADD); + return f - um->file_pool; +} + +always_inline void +unix_file_del (unix_main_t * um, unix_file_t * f) +{ + um->file_update (f, UNIX_FILE_UPDATE_DELETE); + close (f->file_descriptor); + f->file_descriptor = ~0; + pool_put (um->file_pool, f); +} + +always_inline uword +unix_file_set_data_available_to_write (u32 unix_file_index, uword is_available) +{ + unix_file_t * uf = pool_elt_at_index (unix_main.file_pool, unix_file_index); + uword was_available = (uf->flags & UNIX_FILE_DATA_AVAILABLE_TO_WRITE); + if ((was_available != 0) != (is_available != 0)) + { + uf->flags ^= UNIX_FILE_DATA_AVAILABLE_TO_WRITE; + unix_main.file_update (uf, UNIX_FILE_UPDATE_MODIFY); + } + return was_available != 0; +} + +always_inline void +unix_save_error (unix_main_t * um, clib_error_t * error) +{ + unix_error_history_t * eh = um->error_history + um->error_history_index; + clib_error_free_vector (eh->error); + eh->error = error; + eh->time = vlib_time_now (um->vlib_main); + um->n_total_errors += 1; + if (++um->error_history_index >= ARRAY_LEN (um->error_history)) + um->error_history_index = 0; +} + +/* Main function for Unix VLIB. */ +int vlib_unix_main (int argc, char * argv[]); + +/* Call to allocate/initialize physical DMA memory subsystem. + This is not an init function so that users can explicitly enable/disable + physmem when its not needed. */ +clib_error_t * unix_physmem_init (vlib_main_t * vm, + int fail_if_physical_memory_not_present); + +/* Set prompt for CLI. */ +void vlib_unix_cli_set_prompt (char * prompt); + +static inline unix_main_t * vlib_unix_get_main (void) +{ + return &unix_main; +} + +/* thread stack array; vec_len = max number of threads */ +u8 **vlib_thread_stacks; + +#endif /* included_unix_unix_h */ diff --git a/vlib/vlib/vlib.h b/vlib/vlib/vlib.h new file mode 100644 index 00000000000..74101f8d297 --- /dev/null +++ b/vlib/vlib/vlib.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* + * vlib.h: top-level include file + * + * Copyright (c) 2008 Eliot Dresselhaus + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef included_vlib_h +#define included_vlib_h + +#include <vppinfra/clib.h> +#include <vppinfra/elf_clib.h> + +/* Generic definitions. */ +#include <vlib/defs.h> + +/* Forward declarations of structs to avoid circular dependencies. */ +struct vlib_main_t; + +/* All includes in alphabetical order. */ +#include <vlib/buffer.h> +#include <vlib/cli.h> +#include <vlib/counter.h> +#include <vlib/error.h> +#include <vlib/init.h> +#include <vlib/mc.h> +#include <vlib/node.h> +#include <vlib/physmem.h> +#include <vlib/trace.h> + +/* Main include depends on other vlib/ includes so we put it last. */ +#include <vlib/main.h> + +/* Inline/extern function declarations. */ +#include <vlib/threads.h> +#include <vlib/buffer_funcs.h> +#include <vlib/cli_funcs.h> +#include <vlib/error_funcs.h> +#include <vlib/format_funcs.h> +#include <vlib/node_funcs.h> +#include <vlib/trace_funcs.h> +#include <vlib/global_funcs.h> + +#include <vlib/buffer_node.h> + +#endif /* included_vlib_h */ |