diff options
Diffstat (limited to 'src/vlib')
66 files changed, 5328 insertions, 2158 deletions
diff --git a/src/vlib/CMakeLists.txt b/src/vlib/CMakeLists.txt index c8127fbe337..3c354b764dd 100644 --- a/src/vlib/CMakeLists.txt +++ b/src/vlib/CMakeLists.txt @@ -22,6 +22,12 @@ else() set(BUFFER_ALLOC_FAULT_INJECTOR 0 CACHE STRING "fault injector off") endif() +if(VPP_PLATFORM_BUFFER_ALIGN) + set(VLIB_BUFFER_ALIGN ${VPP_PLATFORM_BUFFER_ALIGN}) +else() + set(VLIB_BUFFER_ALIGN ${VPP_CACHE_LINE_SIZE}) +endif() + set(PRE_DATA_SIZE 128 CACHE STRING "Buffer headroom size.") if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") @@ -41,29 +47,30 @@ configure_file( ) install( FILES ${CMAKE_CURRENT_BINARY_DIR}/config.h - DESTINATION include/vlib + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/vlib COMPONENT vpp-dev ) ############################################################################## -# Find lib and include files +# vlib shared library ############################################################################## -message(STATUS "Looking for libuuid") -vpp_find_path(UUID_INCLUDE_DIR NAMES uuid/uuid.h) -vpp_find_library(UUID_LIB NAMES uuid) -if(UUID_INCLUDE_DIR AND UUID_LIB) - include_directories(${UUID_INCLUDE_DIR}) - set(VMBUS_SOURCE linux/vmbus.c) - set(VMBUS_LIBS uuid) - message(STATUS "Found uuid in ${UUID_INCLUDE_DIR}") -else() - message(WARNING "-- libuuid not found - vmbus support disabled") +if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") +set(PLATFORM_SOURCES + linux/pci.c + linux/vfio.c + linux/vmbus.c +) + +set(PLATFORM_HEADERS + linux/vfio.h +) +elseif("${CMAKE_SYSTEM_NAME}" STREQUAL "FreeBSD") +set(PLATFORM_SOURCES + freebsd/pci.c +) endif() -############################################################################## -# vlib shared library -############################################################################## add_vpp_library(vlib SOURCES buffer.c @@ -75,8 +82,6 @@ add_vpp_library(vlib format.c handoff_trace.c init.c - linux/pci.c - linux/vfio.c log.c main.c node.c @@ -88,8 +93,15 @@ add_vpp_library(vlib physmem.c punt.c punt_node.c + stats/cli.c + stats/collector.c + stats/format.c + stats/init.c + stats/provider_mem.c + stats/stats.c threads.c threads_cli.c + time.c trace.c unix/cli.c unix/input.c @@ -97,7 +109,9 @@ add_vpp_library(vlib unix/plugin.c unix/util.c vmbus/vmbus.c - ${VMBUS_SOURCE} + dma/dma.c + dma/cli.c + ${PLATFORM_SOURCES} MULTIARCH_SOURCES buffer_funcs.c @@ -113,12 +127,12 @@ add_vpp_library(vlib counter.h counter_types.h defs.h + dma/dma.h error_funcs.h error.h format_funcs.h global_funcs.h init.h - linux/vfio.h log.h main.h node_funcs.h @@ -129,7 +143,10 @@ add_vpp_library(vlib physmem_funcs.h physmem.h punt.h + stats/shared.h + stats/stats.h threads.h + time.h trace_funcs.h trace.h unix/mc_socket.h @@ -137,11 +154,12 @@ add_vpp_library(vlib unix/unix.h vlib.h vmbus/vmbus.h + ${PLATFORM_HEADERS} API_FILES pci/pci_types.api - LINK_LIBRARIES vppinfra svm ${VMBUS_LIBS} ${CMAKE_DL_LIBS} + LINK_LIBRARIES vppinfra svm ${CMAKE_DL_LIBS} ${EPOLL_LIB} DEPENDS api_headers ) diff --git a/src/vlib/buffer.c b/src/vlib/buffer.c index 71f84d377af..674f15d5dc6 100644 --- a/src/vlib/buffer.c +++ b/src/vlib/buffer.c @@ -43,10 +43,11 @@ * Allocate/free network buffers. */ -#include <vppinfra/linux/sysfs.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/unix.h> #include <vlib/vlib.h> #include <vlib/unix/unix.h> -#include <vpp/stats/stat_segment.h> +#include <vlib/stats/stats.h> #define VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA 16384 #define VLIB_BUFFER_DEFAULT_BUFFERS_PER_NUMA_UNPRIV 8192 @@ -58,22 +59,8 @@ STATIC_ASSERT_FITS_IN (vlib_buffer_t, ref_count, 16); STATIC_ASSERT_FITS_IN (vlib_buffer_t, buffer_pool_index, 16); #endif -/* Make sure that buffer template size is not accidentally changed */ -STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, template_end, 64); - u16 __vlib_buffer_external_hdr_size = 0; -static void -buffer_gauges_update_cached_fn (stat_segment_directory_entry_t * e, - u32 index); - -static void -buffer_gauges_update_available_fn (stat_segment_directory_entry_t * e, - u32 index); - -static void -buffer_gauges_update_used_fn (stat_segment_directory_entry_t * e, u32 index); - uword vlib_buffer_length_in_chain_slow_path (vlib_main_t * vm, vlib_buffer_t * b_first) @@ -107,7 +94,7 @@ format_vlib_buffer_no_chain (u8 * s, va_list * args) "ref-count %u", b->current_data, b->current_length, b->buffer_pool_index, b->ref_count); - if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID) + if (b->flags & VLIB_BUFFER_NEXT_PRESENT) s = format (s, ", totlen-nifb %d", b->total_length_not_including_first_buffer); @@ -485,26 +472,28 @@ static uword vlib_buffer_alloc_size (uword ext_hdr_size, uword data_size) { uword alloc_size = ext_hdr_size + sizeof (vlib_buffer_t) + data_size; - alloc_size = CLIB_CACHE_LINE_ROUND (alloc_size); + alloc_size = round_pow2 (alloc_size, VLIB_BUFFER_ALIGN); - /* in case when we have even number of cachelines, we add one more for + /* in case when we have even number of 'cachelines', we add one more for * better cache occupancy */ - alloc_size |= CLIB_CACHE_LINE_BYTES; + alloc_size |= VLIB_BUFFER_ALIGN; return alloc_size; } u8 -vlib_buffer_pool_create (vlib_main_t * vm, char *name, u32 data_size, - u32 physmem_map_index) +vlib_buffer_pool_create (vlib_main_t *vm, u32 data_size, u32 physmem_map_index, + char *fmt, ...) { vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_pool_t *bp; vlib_physmem_map_t *m = vlib_physmem_get_map (vm, physmem_map_index); uword start = pointer_to_uword (m->base); uword size = (uword) m->n_pages << m->log2_page_size; - uword i, j; - u32 alloc_size, n_alloc_per_page; + uword page_mask = ~pow2_mask (m->log2_page_size); + u8 *p; + u32 alloc_size; + va_list va; if (vec_len (bm->buffer_pools) >= 255) return ~0; @@ -542,48 +531,57 @@ vlib_buffer_pool_create (vlib_main_t * vm, char *name, u32 data_size, bp->buffer_template.buffer_pool_index = bp->index; bp->buffer_template.ref_count = 1; bp->physmem_map_index = physmem_map_index; - bp->name = format (0, "%s%c", name, 0); bp->data_size = data_size; bp->numa_node = m->numa_node; + bp->log2_page_size = m->log2_page_size; + + va_start (va, fmt); + bp->name = va_format (0, fmt, &va); + va_end (va); vec_validate_aligned (bp->threads, vlib_get_n_threads () - 1, CLIB_CACHE_LINE_BYTES); alloc_size = vlib_buffer_alloc_size (bm->ext_hdr_size, data_size); - n_alloc_per_page = (1ULL << m->log2_page_size) / alloc_size; + bp->alloc_size = alloc_size; /* preallocate buffer indices memory */ - bp->n_buffers = m->n_pages * n_alloc_per_page; - bp->buffers = clib_mem_alloc_aligned (bp->n_buffers * sizeof (u32), - CLIB_CACHE_LINE_BYTES); + bp->buffers = clib_mem_alloc_aligned ( + round_pow2 ((size / alloc_size) * sizeof (u32), CLIB_CACHE_LINE_BYTES), + CLIB_CACHE_LINE_BYTES); clib_spinlock_init (&bp->lock); - for (j = 0; j < m->n_pages; j++) - for (i = 0; i < n_alloc_per_page; i++) - { - u8 *p; - u32 bi; - - p = m->base + (j << m->log2_page_size) + i * alloc_size; - p += bm->ext_hdr_size; - - /* - * Waste 1 buffer (maximum) so that 0 is never a valid buffer index. - * Allows various places to ASSERT (bi != 0). Much easier - * than debugging downstream crashes in successor nodes. - */ - if (p == m->base) - continue; + p = m->base; - vlib_buffer_copy_template ((vlib_buffer_t *) p, &bp->buffer_template); + /* start with naturally aligned address */ + p += alloc_size - (uword) p % alloc_size; - bi = vlib_get_buffer_index (vm, (vlib_buffer_t *) p); + /* + * Waste 1 buffer (maximum) so that 0 is never a valid buffer index. + * Allows various places to ASSERT (bi != 0). Much easier + * than debugging downstream crashes in successor nodes. + */ + if (p == m->base) + p += alloc_size; - bp->buffers[bp->n_avail++] = bi; + for (; p < (u8 *) m->base + size - alloc_size; p += alloc_size) + { + vlib_buffer_t *b; + u32 bi; + + /* skip if buffer spans across page boundary */ + if (((uword) p & page_mask) != ((uword) (p + alloc_size) & page_mask)) + continue; + + b = (vlib_buffer_t *) (p + bm->ext_hdr_size); + b->template = bp->buffer_template; + bi = vlib_get_buffer_index (vm, b); + bp->buffers[bp->n_avail++] = bi; + vlib_get_buffer (vm, bi); + } - vlib_get_buffer (vm, bi); - } + bp->n_buffers = bp->n_avail; return bp->index; } @@ -601,14 +599,13 @@ format_vlib_buffer_pool (u8 * s, va_list * va) "Pool Name", "Index", "NUMA", "Size", "Data Size", "Total", "Avail", "Cached", "Used"); - /* *INDENT-OFF* */ vec_foreach (bpt, bp->threads) cached += bpt->n_cached; - /* *INDENT-ON* */ - s = format (s, "%-20s%=6d%=6d%=6u%=11u%=6u%=8u%=8u%=8u", - bp->name, bp->index, bp->numa_node, bp->data_size + - sizeof (vlib_buffer_t) + vm->buffer_main->ext_hdr_size, + s = format (s, "%-20v%=6d%=6d%=6u%=11u%=6u%=8u%=8u%=8u", bp->name, bp->index, + bp->numa_node, + bp->data_size + sizeof (vlib_buffer_t) + + vm->buffer_main->ext_hdr_size, bp->data_size, bp->n_buffers, bp->n_avail, cached, bp->n_buffers - bp->n_avail - cached); @@ -638,34 +635,26 @@ show_buffers (vlib_main_t *vm, unformat_input_t *input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_buffers_command, static) = { .path = "show buffers", .short_help = "Show packet buffer allocation", .function = show_buffers, }; -/* *INDENT-ON* */ clib_error_t * -vlib_buffer_worker_init (vlib_main_t * vm) +vlib_buffer_num_workers_change (vlib_main_t *vm) { vlib_buffer_main_t *bm = vm->buffer_main; vlib_buffer_pool_t *bp; - /* *INDENT-OFF* */ vec_foreach (bp, bm->buffer_pools) - { - clib_spinlock_lock (&bp->lock); - vec_validate_aligned (bp->threads, vlib_get_n_threads () - 1, - CLIB_CACHE_LINE_BYTES); - clib_spinlock_unlock (&bp->lock); - } - /* *INDENT-ON* */ + vec_validate_aligned (bp->threads, vlib_get_n_threads () - 1, + CLIB_CACHE_LINE_BYTES); return 0; } -VLIB_WORKER_INIT_FUNCTION (vlib_buffer_worker_init); +VLIB_NUM_WORKERS_CHANGE_FN (vlib_buffer_num_workers_change); static clib_error_t * vlib_buffer_main_init_numa_alloc (struct vlib_main_t *vm, u32 numa_node, @@ -711,7 +700,6 @@ vlib_buffer_main_init_numa_node (struct vlib_main_t *vm, u32 numa_node, vlib_buffer_main_t *bm = vm->buffer_main; u32 physmem_map_index; clib_error_t *error; - u8 *name = 0; if (bm->log2_page_size == CLIB_MEM_PAGE_SZ_UNKNOWN) { @@ -742,14 +730,12 @@ vlib_buffer_main_init_numa_node (struct vlib_main_t *vm, u32 numa_node, return error; buffer_pool_create: - name = format (name, "default-numa-%d%c", numa_node, 0); - *index = vlib_buffer_pool_create (vm, (char *) name, - vlib_buffer_get_default_data_size (vm), - physmem_map_index); + *index = + vlib_buffer_pool_create (vm, vlib_buffer_get_default_data_size (vm), + physmem_map_index, "default-numa-%d", numa_node); if (*index == (u8) ~ 0) error = clib_error_return (0, "maximum number of buffer pools reached"); - vec_free (name); return error; @@ -776,10 +762,8 @@ buffer_get_cached (vlib_buffer_pool_t * bp) clib_spinlock_lock (&bp->lock); - /* *INDENT-OFF* */ vec_foreach (bpt, bp->threads) cached += bpt->n_cached; - /* *INDENT-ON* */ clib_spinlock_unlock (&bp->lock); @@ -798,37 +782,39 @@ buffer_get_by_index (vlib_buffer_main_t * bm, u32 index) } static void -buffer_gauges_update_used_fn (stat_segment_directory_entry_t * e, u32 index) +buffer_gauges_collect_used_fn (vlib_stats_collector_data_t *d) { vlib_main_t *vm = vlib_get_main (); - vlib_buffer_pool_t *bp = buffer_get_by_index (vm->buffer_main, index); + vlib_buffer_pool_t *bp = + buffer_get_by_index (vm->buffer_main, d->private_data); if (!bp) return; - e->value = bp->n_buffers - bp->n_avail - buffer_get_cached (bp); + d->entry->value = bp->n_buffers - bp->n_avail - buffer_get_cached (bp); } static void -buffer_gauges_update_available_fn (stat_segment_directory_entry_t * e, - u32 index) +buffer_gauges_collect_available_fn (vlib_stats_collector_data_t *d) { vlib_main_t *vm = vlib_get_main (); - vlib_buffer_pool_t *bp = buffer_get_by_index (vm->buffer_main, index); + vlib_buffer_pool_t *bp = + buffer_get_by_index (vm->buffer_main, d->private_data); if (!bp) return; - e->value = bp->n_avail; + d->entry->value = bp->n_avail; } static void -buffer_gauges_update_cached_fn (stat_segment_directory_entry_t * e, u32 index) +buffer_gauges_collect_cached_fn (vlib_stats_collector_data_t *d) { vlib_main_t *vm = vlib_get_main (); - vlib_buffer_pool_t *bp = buffer_get_by_index (vm->buffer_main, index); + vlib_buffer_pool_t *bp = + buffer_get_by_index (vm->buffer_main, d->private_data); if (!bp) return; - e->value = buffer_get_cached (bp); + d->entry->value = buffer_get_cached (bp); } clib_error_t * @@ -849,13 +835,8 @@ vlib_buffer_main_init (struct vlib_main_t * vm) clib_spinlock_init (&bm->buffer_known_hash_lockp); - if ((err = clib_sysfs_read ("/sys/devices/system/node/online", "%U", - unformat_bitmap_list, &bmp))) - clib_error_free (err); - - if ((err = clib_sysfs_read ("/sys/devices/system/node/has_memory", "%U", - unformat_bitmap_list, &bmp_has_memory))) - clib_error_free (err); + bmp = os_get_online_cpu_node_bitmap (); + bmp_has_memory = os_get_cpu_with_memory_bitmap (); if (bmp && bmp_has_memory) bmp = clib_bitmap_and (bmp, bmp_has_memory); @@ -868,7 +849,6 @@ vlib_buffer_main_init (struct vlib_main_t * vm) clib_panic ("system have more than %u NUMA nodes", VLIB_BUFFER_MAX_NUMA_NODES); - /* *INDENT-OFF* */ clib_bitmap_foreach (numa_node, bmp) { u8 *index = bm->default_buffer_pool_index_for_numa + numa_node; @@ -883,7 +863,6 @@ vlib_buffer_main_init (struct vlib_main_t * vm) if (first_valid_buffer_pool_index == 0xff) first_valid_buffer_pool_index = index[0]; } - /* *INDENT-ON* */ if (first_valid_buffer_pool_index == (u8) ~ 0) { @@ -891,34 +870,32 @@ vlib_buffer_main_init (struct vlib_main_t * vm) goto done; } - /* *INDENT-OFF* */ clib_bitmap_foreach (numa_node, bmp) { if (bm->default_buffer_pool_index_for_numa[numa_node] == (u8) ~0) bm->default_buffer_pool_index_for_numa[numa_node] = first_valid_buffer_pool_index; } - /* *INDENT-ON* */ vec_foreach (bp, bm->buffer_pools) { + vlib_stats_collector_reg_t reg = { .private_data = bp - bm->buffer_pools }; if (bp->n_buffers == 0) continue; - vec_reset_length (name); - name = format (name, "/buffer-pools/%s/cached%c", bp->name, 0); - stat_segment_register_gauge (name, buffer_gauges_update_cached_fn, - bp - bm->buffer_pools); + reg.entry_index = + vlib_stats_add_gauge ("/buffer-pools/%v/cached", bp->name); + reg.collect_fn = buffer_gauges_collect_cached_fn; + vlib_stats_register_collector_fn (®); - vec_reset_length (name); - name = format (name, "/buffer-pools/%s/used%c", bp->name, 0); - stat_segment_register_gauge (name, buffer_gauges_update_used_fn, - bp - bm->buffer_pools); + reg.entry_index = vlib_stats_add_gauge ("/buffer-pools/%v/used", bp->name); + reg.collect_fn = buffer_gauges_collect_used_fn; + vlib_stats_register_collector_fn (®); - vec_reset_length (name); - name = format (name, "/buffer-pools/%s/available%c", bp->name, 0); - stat_segment_register_gauge (name, buffer_gauges_update_available_fn, - bp - bm->buffer_pools); + reg.entry_index = + vlib_stats_add_gauge ("/buffer-pools/%v/available", bp->name); + reg.collect_fn = buffer_gauges_collect_available_fn; + vlib_stats_register_collector_fn (®); } done: diff --git a/src/vlib/buffer.h b/src/vlib/buffer.h index b548adf4be8..7d45689ed19 100644 --- a/src/vlib/buffer.h +++ b/src/vlib/buffer.h @@ -47,8 +47,7 @@ #include <vppinfra/lock.h> #include <vlib/error.h> /* for vlib_error_t */ -#include <vlib/config.h> /* for __PRE_DATA_SIZE */ -#define VLIB_BUFFER_PRE_DATA_SIZE __PRE_DATA_SIZE +#include <vlib/config.h> /* for VLIB_BUFFER_PRE_DATA_SIZE */ #define VLIB_BUFFER_DEFAULT_DATA_SIZE (2048) @@ -107,62 +106,78 @@ enum #define VLIB_BUFFER_TRACE_TRAJECTORY 0 #endif /* VLIB_BUFFER_TRACE_TRAJECTORY */ +#define vlib_buffer_template_fields \ + /** signed offset in data[], pre_data[] that we are currently \ + * processing. If negative current header points into predata area. */ \ + i16 current_data; \ + \ + /** Nbytes between current data and the end of this buffer. */ \ + u16 current_length; \ + /** buffer flags: \ + <br> VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list \ + index, <br> VLIB_BUFFER_IS_TRACED: trace this buffer. <br> \ + VLIB_BUFFER_NEXT_PRESENT: this is a multi-chunk buffer. <br> \ + VLIB_BUFFER_TOTAL_LENGTH_VALID: as it says <br> \ + VLIB_BUFFER_EXT_HDR_VALID: buffer contains valid external buffer manager \ + header, set to avoid adding it to a flow report <br> \ + VLIB_BUFFER_FLAG_USER(n): user-defined bit N \ + */ \ + u32 flags; \ + \ + /** Generic flow identifier */ \ + u32 flow_id; \ + \ + /** Reference count for this buffer. */ \ + volatile u8 ref_count; \ + \ + /** index of buffer pool this buffer belongs. */ \ + u8 buffer_pool_index; \ + \ + /** Error code for buffers to be enqueued to error handler. */ \ + vlib_error_t error; \ + \ + /** Next buffer for this linked-list of buffers. Only valid if \ + * VLIB_BUFFER_NEXT_PRESENT flag is set. */ \ + u32 next_buffer; \ + \ + /** The following fields can be in a union because once a packet enters \ + * the punt path, it is no longer on a feature arc */ \ + union \ + { \ + /** Used by feature subgraph arcs to visit enabled feature nodes */ \ + u32 current_config_index; \ + /* the reason the packet once punted */ \ + u32 punt_reason; \ + }; \ + \ + /** Opaque data used by sub-graphs for their own purposes. */ \ + u32 opaque[10]; + +typedef struct +{ + CLIB_ALIGN_MARK (align_mark, 64); + vlib_buffer_template_fields +} vlib_buffer_template_t; + +STATIC_ASSERT_SIZEOF (vlib_buffer_template_t, 64); + /** VLIB buffer representation. */ typedef union { + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); struct { - CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); - - /** signed offset in data[], pre_data[] that we are currently - * processing. If negative current header points into predata area. */ - i16 current_data; - - /** Nbytes between current data and the end of this buffer. */ - u16 current_length; - - /** buffer flags: - <br> VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, - <br> VLIB_BUFFER_IS_TRACED: trace this buffer. - <br> VLIB_BUFFER_NEXT_PRESENT: this is a multi-chunk buffer. - <br> VLIB_BUFFER_TOTAL_LENGTH_VALID: as it says - <br> VLIB_BUFFER_EXT_HDR_VALID: buffer contains valid external buffer manager header, - set to avoid adding it to a flow report - <br> VLIB_BUFFER_FLAG_USER(n): user-defined bit N - */ - u32 flags; - - /** Generic flow identifier */ - u32 flow_id; - - /** Reference count for this buffer. */ - volatile u8 ref_count; - - /** index of buffer pool this buffer belongs. */ - u8 buffer_pool_index; - - /** Error code for buffers to be enqueued to error handler. */ - vlib_error_t error; - - /** Next buffer for this linked-list of buffers. Only valid if - * VLIB_BUFFER_NEXT_PRESENT flag is set. */ - u32 next_buffer; - - /** The following fields can be in a union because once a packet enters - * the punt path, it is no longer on a feature arc */ union { - /** Used by feature subgraph arcs to visit enabled feature nodes */ - u32 current_config_index; - /* the reason the packet once punted */ - u32 punt_reason; + struct + { + vlib_buffer_template_fields + }; + vlib_buffer_template_t template; }; - /** Opaque data used by sub-graphs for their own purposes. */ - u32 opaque[10]; - - /** part of buffer metadata which is initialized on alloc ends here. */ - STRUCT_MARK (template_end); + /* Data above is initialized or zeroed on alloc, data bellow is not + * and it is app responsibility to ensure data is valid */ /** start of 2nd half (2nd cacheline on systems where cacheline size is 64) */ CLIB_ALIGN_MARK (second_half, 64); @@ -220,6 +235,7 @@ STATIC_ASSERT (VLIB_BUFFER_PRE_DATA_SIZE % CLIB_CACHE_LINE_BYTES == 0, "VLIB_BUFFER_PRE_DATA_SIZE must be divisible by cache line size"); #define VLIB_BUFFER_HDR_SIZE (sizeof(vlib_buffer_t) - VLIB_BUFFER_PRE_DATA_SIZE) +#define VLIB_BUFFER_INVALID_INDEX 0xffffffff /** \brief Prefetch buffer metadata. The first 64 bytes of buffer contains most header information @@ -452,11 +468,12 @@ typedef struct CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); uword start; uword size; - uword log2_page_size; + u8 log2_page_size; u8 index; - u32 numa_node; + u8 numa_node; u32 physmem_map_index; u32 data_size; + u32 alloc_size; u32 n_buffers; u32 n_avail; u32 *buffers; @@ -467,7 +484,7 @@ typedef struct vlib_buffer_pool_thread_t *threads; /* buffer metadata template */ - vlib_buffer_t buffer_template; + vlib_buffer_template_t buffer_template; } vlib_buffer_pool_t; #define VLIB_BUFFER_MAX_NUMA_NODES 32 diff --git a/src/vlib/buffer_funcs.c b/src/vlib/buffer_funcs.c index a661370a141..d910b25afac 100644 --- a/src/vlib/buffer_funcs.c +++ b/src/vlib/buffer_funcs.c @@ -8,32 +8,41 @@ #include <vppinfra/vector/compress.h> static_always_inline u32 -enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node, u64 *used_elt_bmp, - u16 next_index, u32 *buffers, u16 *nexts, u32 n_buffers, - u32 n_left, u32 *tmp) +enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_bitmap_t used_elt_bmp, u16 next_index, u32 *buffers, + u16 *nexts, u32 n_buffers, u32 n_left, u32 *tmp, u8 maybe_aux, + u32 *aux_data, u32 *tmp_aux) { - u64 match_bmp[VLIB_FRAME_SIZE / 64]; + vlib_frame_bitmap_t match_bmp; vlib_frame_t *f; u32 n_extracted, n_free; - u32 *to; + u32 *to, *to_aux = 0; f = vlib_get_next_frame_internal (vm, node, next_index, 0); + maybe_aux = maybe_aux && f->aux_offset; + n_free = VLIB_FRAME_SIZE - f->n_vectors; /* if frame contains enough space for worst case scenario, we can avoid * use of tmp */ if (n_free >= n_left) - to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors; + { + to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors; + if (maybe_aux) + to_aux = (u32 *) vlib_frame_aux_args (f) + f->n_vectors; + } else - to = tmp; - + { + to = tmp; + if (maybe_aux) + to_aux = tmp_aux; + } clib_mask_compare_u16 (next_index, nexts, match_bmp, n_buffers); - n_extracted = clib_compress_u32 (to, buffers, match_bmp, n_buffers); - - for (int i = 0; i < ARRAY_LEN (match_bmp); i++) - used_elt_bmp[i] |= match_bmp[i]; + if (maybe_aux) + clib_compress_u32 (to_aux, aux_data, match_bmp, n_buffers); + vlib_frame_bitmap_or (used_elt_bmp, match_bmp); if (to != tmp) { @@ -45,6 +54,11 @@ enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node, u64 *used_elt_bmp, /* enough space in the existing frame */ to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors; vlib_buffer_copy_indices (to, tmp, n_extracted); + if (maybe_aux) + { + to_aux = (u32 *) vlib_frame_aux_args (f) + f->n_vectors; + vlib_buffer_copy_indices (to_aux, tmp_aux, n_extracted); + } vlib_put_next_frame (vm, node, next_index, n_free - n_extracted); } else @@ -52,6 +66,11 @@ enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node, u64 *used_elt_bmp, /* full frame */ to = (u32 *) vlib_frame_vector_args (f) + f->n_vectors; vlib_buffer_copy_indices (to, tmp, n_free); + if (maybe_aux) + { + to_aux = (u32 *) vlib_frame_aux_args (f) + f->n_vectors; + vlib_buffer_copy_indices (to_aux, tmp_aux, n_free); + } vlib_put_next_frame (vm, node, next_index, 0); /* second frame */ @@ -59,6 +78,11 @@ enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node, u64 *used_elt_bmp, f = vlib_get_next_frame_internal (vm, node, next_index, 1); to = vlib_frame_vector_args (f); vlib_buffer_copy_indices (to, tmp + n_free, n_2nd_frame); + if (maybe_aux) + { + to_aux = vlib_frame_aux_args (f); + vlib_buffer_copy_indices (to_aux, tmp_aux + n_free, n_2nd_frame); + } vlib_put_next_frame (vm, node, next_index, VLIB_FRAME_SIZE - n_2nd_frame); } @@ -66,24 +90,27 @@ enqueue_one (vlib_main_t *vm, vlib_node_runtime_t *node, u64 *used_elt_bmp, return n_left - n_extracted; } -void __clib_section (".vlib_buffer_enqueue_to_next_fn") -CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_next_fn) -(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 *nexts, - uword count) +static_always_inline void +vlib_buffer_enqueue_to_next_fn_inline (vlib_main_t *vm, + vlib_node_runtime_t *node, u32 *buffers, + u32 *aux_data, u16 *nexts, uword count, + u8 maybe_aux) { u32 tmp[VLIB_FRAME_SIZE]; + u32 tmp_aux[VLIB_FRAME_SIZE]; u32 n_left; u16 next_index; while (count >= VLIB_FRAME_SIZE) { - u64 used_elt_bmp[VLIB_FRAME_SIZE / 64] = {}; + vlib_frame_bitmap_t used_elt_bmp = {}; n_left = VLIB_FRAME_SIZE; u32 off = 0; next_index = nexts[0]; n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers, nexts, - VLIB_FRAME_SIZE, n_left, tmp); + VLIB_FRAME_SIZE, n_left, tmp, maybe_aux, aux_data, + tmp_aux); while (n_left) { @@ -96,23 +123,26 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_next_fn) next_index = nexts[off * 64 + count_trailing_zeros (~used_elt_bmp[off])]; n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers, - nexts, VLIB_FRAME_SIZE, n_left, tmp); + nexts, VLIB_FRAME_SIZE, n_left, tmp, maybe_aux, + aux_data, tmp_aux); } buffers += VLIB_FRAME_SIZE; + if (maybe_aux) + aux_data += VLIB_FRAME_SIZE; nexts += VLIB_FRAME_SIZE; count -= VLIB_FRAME_SIZE; } if (count) { - u64 used_elt_bmp[VLIB_FRAME_SIZE / 64] = {}; + vlib_frame_bitmap_t used_elt_bmp = {}; next_index = nexts[0]; n_left = count; u32 off = 0; n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers, nexts, - count, n_left, tmp); + count, n_left, tmp, maybe_aux, aux_data, tmp_aux); while (n_left) { @@ -124,26 +154,55 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_next_fn) next_index = nexts[off * 64 + count_trailing_zeros (~used_elt_bmp[off])]; - n_left = enqueue_one (vm, node, used_elt_bmp, next_index, buffers, - nexts, count, n_left, tmp); + n_left = + enqueue_one (vm, node, used_elt_bmp, next_index, buffers, nexts, + count, n_left, tmp, maybe_aux, aux_data, tmp_aux); } } } +void __clib_section (".vlib_buffer_enqueue_to_next_fn") +CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_next_fn) +(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 *nexts, + uword count) +{ + vlib_buffer_enqueue_to_next_fn_inline (vm, node, buffers, NULL, nexts, count, + 0 /* maybe_aux */); +} + CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_next_fn); -void __clib_section (".vlib_buffer_enqueue_to_single_next_fn") -CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_single_next_fn) -(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 next_index, - u32 count) +void __clib_section (".vlib_buffer_enqueue_to_next_with_aux_fn") +CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_next_with_aux_fn) +(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u32 *aux_data, + u16 *nexts, uword count) { - u32 *to_next, n_left_to_next, n_enq; + vlib_buffer_enqueue_to_next_fn_inline (vm, node, buffers, aux_data, nexts, + count, 1 /* maybe_aux */); +} - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); +CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_next_with_aux_fn); + +static_always_inline void +vlib_buffer_enqueue_to_single_next_fn_inline (vlib_main_t *vm, + vlib_node_runtime_t *node, + u32 *buffers, u32 *aux_data, + u16 next_index, u32 count, + u8 with_aux) +{ + u32 *to_next, *to_next_aux, n_left_to_next, n_enq; + + if (with_aux) + vlib_get_next_frame_with_aux (vm, node, next_index, to_next, to_next_aux, + n_left_to_next); + else + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); if (PREDICT_TRUE (n_left_to_next >= count)) { vlib_buffer_copy_indices (to_next, buffers, count); + if (with_aux) + vlib_buffer_copy_indices (to_next_aux, aux_data, count); n_left_to_next -= count; vlib_put_next_frame (vm, node, next_index, n_left_to_next); return; @@ -152,22 +211,49 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_single_next_fn) n_enq = n_left_to_next; next: vlib_buffer_copy_indices (to_next, buffers, n_enq); + if (with_aux) + vlib_buffer_copy_indices (to_next_aux, aux_data, n_enq); n_left_to_next -= n_enq; if (PREDICT_FALSE (count > n_enq)) { count -= n_enq; buffers += n_enq; + if (with_aux) + aux_data += n_enq; vlib_put_next_frame (vm, node, next_index, n_left_to_next); - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + if (with_aux) + vlib_get_next_frame_with_aux (vm, node, next_index, to_next, + to_next_aux, n_left_to_next); + else + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); n_enq = clib_min (n_left_to_next, count); goto next; } vlib_put_next_frame (vm, node, next_index, n_left_to_next); } + +void __clib_section (".vlib_buffer_enqueue_to_single_next_fn") +CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_single_next_fn) +(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 next_index, + u32 count) +{ + vlib_buffer_enqueue_to_single_next_fn_inline ( + vm, node, buffers, NULL, next_index, count, 0 /* with_aux */); +} CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_single_next_fn); +void __clib_section (".vlib_buffer_enqueue_to_single_next_with_aux_fn") +CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_single_next_with_aux_fn) +(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u32 *aux_data, + u16 next_index, u32 count) +{ + vlib_buffer_enqueue_to_single_next_fn_inline ( + vm, node, buffers, aux_data, next_index, count, 1 /* with_aux */); +} +CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_single_next_with_aux_fn); + static inline vlib_frame_queue_elt_t * vlib_get_frame_queue_elt (vlib_frame_queue_main_t *fqm, u32 index, int dont_wait) @@ -175,7 +261,7 @@ vlib_get_frame_queue_elt (vlib_frame_queue_main_t *fqm, u32 index, vlib_frame_queue_t *fq; u64 nelts, tail, new_tail; - fq = fqm->vlib_frame_queues[index]; + fq = vec_elt (fqm->vlib_frame_queues, index); ASSERT (fq); nelts = fq->nelts; @@ -205,11 +291,11 @@ vlib_buffer_enqueue_to_thread_inline (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_queue_main_t *fqm, u32 *buffer_indices, u16 *thread_indices, - u32 n_packets, int drop_on_congestion) + u32 n_packets, int drop_on_congestion, + int with_aux, u32 *aux_data) { u32 drop_list[VLIB_FRAME_SIZE], n_drop = 0; - u64 used_elts[VLIB_FRAME_SIZE / 64] = {}; - u64 mask[VLIB_FRAME_SIZE / 64]; + vlib_frame_bitmap_t mask, used_elts = {}; vlib_frame_queue_elt_t *hf = 0; u16 thread_index; u32 n_comp, off = 0, n_left = n_packets; @@ -222,6 +308,9 @@ more: n_comp = clib_compress_u32 (hf ? hf->buffer_index : drop_list + n_drop, buffer_indices, mask, n_packets); + if (with_aux) + clib_compress_u32 (hf ? hf->aux_data : drop_list + n_drop, aux_data, mask, + n_packets); if (hf) { @@ -238,8 +327,7 @@ more: if (n_left) { - for (int i = 0; i < ARRAY_LEN (used_elts); i++) - used_elts[i] |= mask[i]; + vlib_frame_bitmap_or (used_elts, mask); while (PREDICT_FALSE (used_elts[off] == ~0)) { @@ -274,7 +362,7 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn) { n_enq += vlib_buffer_enqueue_to_thread_inline ( vm, node, fqm, buffer_indices, thread_indices, VLIB_FRAME_SIZE, - drop_on_congestion); + drop_on_congestion, 0 /* with_aux */, NULL); buffer_indices += VLIB_FRAME_SIZE; thread_indices += VLIB_FRAME_SIZE; n_packets -= VLIB_FRAME_SIZE; @@ -283,24 +371,58 @@ CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_fn) if (n_packets == 0) return n_enq; - n_enq += vlib_buffer_enqueue_to_thread_inline (vm, node, fqm, buffer_indices, - thread_indices, n_packets, - drop_on_congestion); + n_enq += vlib_buffer_enqueue_to_thread_inline ( + vm, node, fqm, buffer_indices, thread_indices, n_packets, + drop_on_congestion, 0 /* with_aux */, NULL); + + return n_enq; +} + +u32 __clib_section (".vlib_buffer_enqueue_to_thread_with_aux_fn") +CLIB_MULTIARCH_FN (vlib_buffer_enqueue_to_thread_with_aux_fn) +(vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index, + u32 *buffer_indices, u32 *aux, u16 *thread_indices, u32 n_packets, + int drop_on_congestion) +{ + vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_frame_queue_main_t *fqm; + u32 n_enq = 0; + + fqm = vec_elt_at_index (tm->frame_queue_mains, frame_queue_index); + + while (n_packets >= VLIB_FRAME_SIZE) + { + n_enq += vlib_buffer_enqueue_to_thread_inline ( + vm, node, fqm, buffer_indices, thread_indices, VLIB_FRAME_SIZE, + drop_on_congestion, 1 /* with_aux */, aux); + buffer_indices += VLIB_FRAME_SIZE; + thread_indices += VLIB_FRAME_SIZE; + n_packets -= VLIB_FRAME_SIZE; + } + + if (n_packets == 0) + return n_enq; + + n_enq += vlib_buffer_enqueue_to_thread_inline ( + vm, node, fqm, buffer_indices, thread_indices, n_packets, + drop_on_congestion, 1 /* with_aux */, aux); return n_enq; } CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_fn); +CLIB_MARCH_FN_REGISTRATION (vlib_buffer_enqueue_to_thread_with_aux_fn); -u32 __clib_section (".vlib_frame_queue_dequeue_fn") -CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn) -(vlib_main_t *vm, vlib_frame_queue_main_t *fqm) +static_always_inline u32 +vlib_frame_queue_dequeue_inline (vlib_main_t *vm, vlib_frame_queue_main_t *fqm, + u8 with_aux) { u32 thread_id = vm->thread_index; vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id]; u32 mask = fq->nelts - 1; vlib_frame_queue_elt_t *elt; - u32 n_free, n_copy, *from, *to = 0, processed = 0, vectors = 0; + u32 n_free, n_copy, *from, *from_aux, *to = 0, *to_aux = 0, processed = 0, + vectors = 0; vlib_frame_t *f = 0; ASSERT (fq); @@ -357,13 +479,16 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn) break; from = elt->buffer_index + elt->offset; - + if (with_aux) + from_aux = elt->aux_data + elt->offset; ASSERT (elt->offset + elt->n_vectors <= VLIB_FRAME_SIZE); if (f == 0) { f = vlib_get_frame_to_node (vm, fqm->node_index); to = vlib_frame_vector_args (f); + if (with_aux) + to_aux = vlib_frame_aux_args (f); n_free = VLIB_FRAME_SIZE; } @@ -374,6 +499,12 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn) vlib_buffer_copy_indices (to, from, n_copy); to += n_copy; + if (with_aux) + { + vlib_buffer_copy_indices (to_aux, from_aux, n_copy); + to_aux += n_copy; + } + n_free -= n_copy; vectors += n_copy; @@ -413,8 +544,24 @@ CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn) return processed; } +u32 __clib_section (".vlib_frame_queue_dequeue_fn") +CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_fn) +(vlib_main_t *vm, vlib_frame_queue_main_t *fqm) +{ + return vlib_frame_queue_dequeue_inline (vm, fqm, 0 /* with_aux */); +} + CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_fn); +u32 __clib_section (".vlib_frame_queue_dequeue_with_aux_fn") +CLIB_MULTIARCH_FN (vlib_frame_queue_dequeue_with_aux_fn) +(vlib_main_t *vm, vlib_frame_queue_main_t *fqm) +{ + return vlib_frame_queue_dequeue_inline (vm, fqm, 1 /* with_aux */); +} + +CLIB_MARCH_FN_REGISTRATION (vlib_frame_queue_dequeue_with_aux_fn); + #ifndef CLIB_MARCH_VARIANT vlib_buffer_func_main_t vlib_buffer_func_main; @@ -424,12 +571,16 @@ vlib_buffer_funcs_init (vlib_main_t *vm) vlib_buffer_func_main_t *bfm = &vlib_buffer_func_main; bfm->buffer_enqueue_to_next_fn = CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_next_fn); + bfm->buffer_enqueue_to_next_with_aux_fn = + CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_next_with_aux_fn); bfm->buffer_enqueue_to_single_next_fn = CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_fn); + bfm->buffer_enqueue_to_single_next_with_aux_fn = + CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_single_next_with_aux_fn); bfm->buffer_enqueue_to_thread_fn = CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_fn); - bfm->frame_queue_dequeue_fn = - CLIB_MARCH_FN_POINTER (vlib_frame_queue_dequeue_fn); + bfm->buffer_enqueue_to_thread_with_aux_fn = + CLIB_MARCH_FN_POINTER (vlib_buffer_enqueue_to_thread_with_aux_fn); return 0; } diff --git a/src/vlib/buffer_funcs.h b/src/vlib/buffer_funcs.h index 77964fde821..010289ce2be 100644 --- a/src/vlib/buffer_funcs.h +++ b/src/vlib/buffer_funcs.h @@ -42,6 +42,7 @@ #include <vppinfra/hash.h> #include <vppinfra/fifo.h> +#include <vppinfra/vector/index_to_ptr.h> #include <vlib/buffer.h> #include <vlib/physmem_funcs.h> #include <vlib/main.h> @@ -55,24 +56,38 @@ typedef void (vlib_buffer_enqueue_to_next_fn_t) (vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 *nexts, uword count); +typedef void (vlib_buffer_enqueue_to_next_with_aux_fn_t) ( + vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u32 *aux_data, + u16 *nexts, uword count); typedef void (vlib_buffer_enqueue_to_single_next_fn_t) ( vlib_main_t *vm, vlib_node_runtime_t *node, u32 *ers, u16 next_index, u32 count); +typedef void (vlib_buffer_enqueue_to_single_next_with_aux_fn_t) ( + vlib_main_t *vm, vlib_node_runtime_t *node, u32 *ers, u32 *aux_data, + u16 next_index, u32 count); + typedef u32 (vlib_buffer_enqueue_to_thread_fn_t) ( vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index, u32 *buffer_indices, u16 *thread_indices, u32 n_packets, int drop_on_congestion); -typedef u32 (vlib_frame_queue_dequeue_fn_t) (vlib_main_t *vm, - vlib_frame_queue_main_t *fqm); +typedef u32 (vlib_buffer_enqueue_to_thread_with_aux_fn_t) ( + vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index, + u32 *buffer_indices, u32 *aux, u16 *thread_indices, u32 n_packets, + int drop_on_congestion); typedef struct { vlib_buffer_enqueue_to_next_fn_t *buffer_enqueue_to_next_fn; + vlib_buffer_enqueue_to_next_with_aux_fn_t + *buffer_enqueue_to_next_with_aux_fn; vlib_buffer_enqueue_to_single_next_fn_t *buffer_enqueue_to_single_next_fn; + vlib_buffer_enqueue_to_single_next_with_aux_fn_t + *buffer_enqueue_to_single_next_with_aux_fn; vlib_buffer_enqueue_to_thread_fn_t *buffer_enqueue_to_thread_fn; - vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn; + vlib_buffer_enqueue_to_thread_with_aux_fn_t + *buffer_enqueue_to_thread_with_aux_fn; } vlib_buffer_func_main_t; extern vlib_buffer_func_main_t vlib_buffer_func_main; @@ -166,7 +181,6 @@ vlib_buffer_copy_indices_to_ring (u32 * ring, u32 * src, u32 start, } } -STATIC_ASSERT_OFFSET_OF (vlib_buffer_t, template_end, 64); static_always_inline void vlib_buffer_copy_template (vlib_buffer_t * b, vlib_buffer_t * bt) { @@ -201,102 +215,38 @@ vlib_buffer_pool_get_default_for_numa (vlib_main_t * vm, u32 numa_node) @param offset - (i32) offset applied to each pointer */ static_always_inline void -vlib_get_buffers_with_offset (vlib_main_t * vm, u32 * bi, void **b, int count, +vlib_get_buffers_with_offset (vlib_main_t *vm, u32 *bi, void **b, u32 count, i32 offset) { uword buffer_mem_start = vm->buffer_main->buffer_mem_start; -#ifdef CLIB_HAVE_VEC512 - u64x8 of8 = u64x8_splat (buffer_mem_start + offset); - u64x4 off = u64x8_extract_lo (of8); - /* if count is not const, compiler will not unroll while loop - se we maintain two-in-parallel variant */ - while (count >= 32) - { - u64x8 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (bi)); - u64x8 b1 = u64x8_from_u32x8 (u32x8_load_unaligned (bi + 8)); - u64x8 b2 = u64x8_from_u32x8 (u32x8_load_unaligned (bi + 16)); - u64x8 b3 = u64x8_from_u32x8 (u32x8_load_unaligned (bi + 24)); - /* shift and add to get vlib_buffer_t pointer */ - u64x8_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b); - u64x8_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b + 8); - u64x8_store_unaligned ((b2 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b + 16); - u64x8_store_unaligned ((b3 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b + 24); - b += 32; - bi += 32; - count -= 32; - } - while (count >= 8) - { - u64x8 b0 = u64x8_from_u32x8 (u32x8_load_unaligned (bi)); - /* shift and add to get vlib_buffer_t pointer */ - u64x8_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + of8, b); - b += 8; - bi += 8; - count -= 8; - } -#elif defined CLIB_HAVE_VEC256 - u64x4 off = u64x4_splat (buffer_mem_start + offset); - /* if count is not const, compiler will not unroll while loop - se we maintain two-in-parallel variant */ - while (count >= 32) - { - u64x4 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (bi)); - u64x4 b1 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 4)); - u64x4 b2 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 8)); - u64x4 b3 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 12)); - u64x4 b4 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 16)); - u64x4 b5 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 20)); - u64x4 b6 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 24)); - u64x4 b7 = u64x4_from_u32x4 (u32x4_load_unaligned (bi + 28)); - /* shift and add to get vlib_buffer_t pointer */ - u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b); - u64x4_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 4); - u64x4_store_unaligned ((b2 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 8); - u64x4_store_unaligned ((b3 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 12); - u64x4_store_unaligned ((b4 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 16); - u64x4_store_unaligned ((b5 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 20); - u64x4_store_unaligned ((b6 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 24); - u64x4_store_unaligned ((b7 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 28); - b += 32; - bi += 32; - count -= 32; - } -#endif - while (count >= 4) - { -#ifdef CLIB_HAVE_VEC256 - u64x4 b0 = u64x4_from_u32x4 (u32x4_load_unaligned (bi)); - /* shift and add to get vlib_buffer_t pointer */ - u64x4_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b); -#elif defined (CLIB_HAVE_VEC128) - u64x2 off = u64x2_splat (buffer_mem_start + offset); - u32x4 bi4 = u32x4_load_unaligned (bi); - u64x2 b0 = u64x2_from_u32x4 ((u32x4) bi4); -#if defined (__aarch64__) - u64x2 b1 = u64x2_from_u32x4_high ((u32x4) bi4); -#else - bi4 = u32x4_shuffle (bi4, 2, 3, 0, 1); - u64x2 b1 = u64x2_from_u32x4 ((u32x4) bi4); -#endif - u64x2_store_unaligned ((b0 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b); - u64x2_store_unaligned ((b1 << CLIB_LOG2_CACHE_LINE_BYTES) + off, b + 2); -#else - b[0] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[0], offset); - b[1] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[1], offset); - b[2] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[2], offset); - b[3] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[3], offset); -#endif - b += 4; - bi += 4; - count -= 4; - } - while (count) + void *base = (void *) (buffer_mem_start + offset); + int objsize = __builtin_object_size (b, 0); + const int sh = CLIB_LOG2_CACHE_LINE_BYTES; + + if (COMPILE_TIME_CONST (count) == 0 && objsize >= 64 * sizeof (b[0]) && + (objsize & ((8 * sizeof (b[0])) - 1)) == 0) { - b[0] = vlib_buffer_ptr_from_index (buffer_mem_start, bi[0], offset); - b += 1; - bi += 1; - count -= 1; + u32 n = round_pow2 (count, 8); + ASSERT (objsize >= count); + CLIB_ASSUME (objsize >= count); + while (n >= 64) + { + clib_index_to_ptr_u32 (bi, base, sh, b, 64); + b += 64; + bi += 64; + n -= 64; + } + + while (n) + { + clib_index_to_ptr_u32 (bi, base, sh, b, 8); + b += 8; + bi += 8; + n -= 8; + } } + else + clib_index_to_ptr_u32 (bi, base, sh, b, count); } /** \brief Translate array of buffer indices into buffer pointers @@ -308,7 +258,7 @@ vlib_get_buffers_with_offset (vlib_main_t * vm, u32 * bi, void **b, int count, */ static_always_inline void -vlib_get_buffers (vlib_main_t * vm, u32 * bi, vlib_buffer_t ** b, int count) +vlib_get_buffers (vlib_main_t *vm, u32 *bi, vlib_buffer_t **b, u32 count) { vlib_get_buffers_with_offset (vm, bi, (void **) b, count, 0); } @@ -803,6 +753,23 @@ vlib_buffer_pool_put (vlib_main_t * vm, u8 buffer_pool_index, clib_spinlock_unlock (&bp->lock); } +/** \brief return unused buffers back to pool + This function can be used to return buffers back to pool without going + through vlib_buffer_free. Buffer metadata must not be modified in any + way before buffers are returned. + + @param vm - (vlib_main_t *) vlib main data structure pointer + @param buffers - (u32 * ) buffer index array + @param n_buffers - (u32) number of buffers to free + @param buffer_pool_index - (u8) buffer pool index +*/ +always_inline void +vlib_buffer_unalloc_to_pool (vlib_main_t *vm, u32 *buffers, u32 n_buffers, + u8 buffer_pool_index) +{ + vlib_buffer_pool_put (vm, buffer_pool_index, buffers, n_buffers); +} + static_always_inline void vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, int maybe_next) @@ -810,8 +777,8 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, const int queue_size = 128; vlib_buffer_pool_t *bp = 0; u8 buffer_pool_index = ~0; - u32 n_queue = 0, queue[queue_size + 4]; - vlib_buffer_t bt = { }; + u32 n_queue = 0, queue[queue_size + 8]; + vlib_buffer_template_t bt = {}; #if defined(CLIB_HAVE_VEC128) vlib_buffer_t bpi_mask = {.buffer_pool_index = ~0 }; vlib_buffer_t bpi_vec = {}; @@ -827,7 +794,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, vlib_buffer_t *b = vlib_get_buffer (vm, buffers[0]); buffer_pool_index = b->buffer_pool_index; bp = vlib_get_buffer_pool (vm, buffer_pool_index); - vlib_buffer_copy_template (&bt, &bp->buffer_template); + bt = bp->buffer_template; #if defined(CLIB_HAVE_VEC128) bpi_vec.buffer_pool_index = buffer_pool_index; #endif @@ -837,9 +804,16 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, vlib_buffer_t *b[8]; u32 bi, sum = 0, flags, next; +#if defined(CLIB_HAVE_VEC512) + if (n_buffers < 8) +#else if (n_buffers < 4) +#endif goto one_by_one; +#if defined(CLIB_HAVE_VEC512) + vlib_get_buffers (vm, buffers, b, 8); +#else vlib_get_buffers (vm, buffers, b, 4); if (n_buffers >= 12) @@ -850,8 +824,33 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, vlib_prefetch_buffer_header (b[6], LOAD); vlib_prefetch_buffer_header (b[7], LOAD); } +#endif -#if defined(CLIB_HAVE_VEC128) +#if defined(CLIB_HAVE_VEC512) + u8x16 p0, p1, p2, p3, p4, p5, p6, p7, r; + p0 = u8x16_load_unaligned (b[0]); + p1 = u8x16_load_unaligned (b[1]); + p2 = u8x16_load_unaligned (b[2]); + p3 = u8x16_load_unaligned (b[3]); + p4 = u8x16_load_unaligned (b[4]); + p5 = u8x16_load_unaligned (b[5]); + p6 = u8x16_load_unaligned (b[6]); + p7 = u8x16_load_unaligned (b[7]); + + r = p0 ^ bpi_vec.as_u8x16[0]; + r |= p1 ^ bpi_vec.as_u8x16[0]; + r |= p2 ^ bpi_vec.as_u8x16[0]; + r |= p3 ^ bpi_vec.as_u8x16[0]; + r |= p4 ^ bpi_vec.as_u8x16[0]; + r |= p5 ^ bpi_vec.as_u8x16[0]; + r |= p6 ^ bpi_vec.as_u8x16[0]; + r |= p7 ^ bpi_vec.as_u8x16[0]; + r &= bpi_mask.as_u8x16[0]; + r |= + (p0 | p1 | p2 | p3 | p4 | p5 | p6 | p7) & flags_refs_mask.as_u8x16[0]; + + sum = !u8x16_is_all_zero (r); +#elif defined(CLIB_HAVE_VEC128) u8x16 p0, p1, p2, p3, r; p0 = u8x16_load_unaligned (b[0]); p1 = u8x16_load_unaligned (b[1]); @@ -885,11 +884,41 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, if (sum) goto one_by_one; +#if defined(CLIB_HAVE_VEC512) + vlib_buffer_copy_indices (queue + n_queue, buffers, 8); + b[0]->template = bt; + b[1]->template = bt; + b[2]->template = bt; + b[3]->template = bt; + b[4]->template = bt; + b[5]->template = bt; + b[6]->template = bt; + b[7]->template = bt; + n_queue += 8; + + vlib_buffer_validate (vm, b[0]); + vlib_buffer_validate (vm, b[1]); + vlib_buffer_validate (vm, b[2]); + vlib_buffer_validate (vm, b[3]); + vlib_buffer_validate (vm, b[4]); + vlib_buffer_validate (vm, b[5]); + vlib_buffer_validate (vm, b[6]); + vlib_buffer_validate (vm, b[7]); + + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[0]); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[1]); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[2]); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[3]); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[4]); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[5]); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[6]); + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[7]); +#else vlib_buffer_copy_indices (queue + n_queue, buffers, 4); - vlib_buffer_copy_template (b[0], &bt); - vlib_buffer_copy_template (b[1], &bt); - vlib_buffer_copy_template (b[2], &bt); - vlib_buffer_copy_template (b[3], &bt); + b[0]->template = bt; + b[1]->template = bt; + b[2]->template = bt; + b[3]->template = bt; n_queue += 4; vlib_buffer_validate (vm, b[0]); @@ -901,14 +930,20 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[1]); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[2]); VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b[3]); +#endif if (n_queue >= queue_size) { vlib_buffer_pool_put (vm, buffer_pool_index, queue, n_queue); n_queue = 0; } +#if defined(CLIB_HAVE_VEC512) + buffers += 8; + n_buffers -= 8; +#else buffers += 4; n_buffers -= 4; +#endif continue; one_by_one: @@ -933,7 +968,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, bpi_vec.buffer_pool_index = buffer_pool_index; #endif bp = vlib_get_buffer_pool (vm, buffer_pool_index); - vlib_buffer_copy_template (&bt, &bp->buffer_template); + bt = bp->buffer_template; } vlib_buffer_validate (vm, b[0]); @@ -942,7 +977,7 @@ vlib_buffer_free_inline (vlib_main_t * vm, u32 * buffers, u32 n_buffers, if (clib_atomic_sub_fetch (&b[0]->ref_count, 1) == 0) { - vlib_buffer_copy_template (b[0], &bt); + b[0]->template = bt; queue[n_queue++] = bi; } diff --git a/src/vlib/buffer_node.h b/src/vlib/buffer_node.h index 10ebd253c1b..c0268b21562 100644 --- a/src/vlib/buffer_node.h +++ b/src/vlib/buffer_node.h @@ -236,6 +236,53 @@ do { \ } \ } while (0) +/** \brief Finish enqueueing one buffer forward in the graph, along with its + aux_data if possible. Standard single loop boilerplate element. This is a + MACRO, with MULTIPLE SIDE EFFECTS. In the ideal case, <code>next_index == + next0</code>, which means that the speculative enqueue at the top of the + single loop has correctly dealt with the packet in hand. In that case, the + macro does nothing at all. This function MAY return to_next_aux = NULL if + next_index does not support aux data + + @param vm vlib_main_t pointer, varies by thread + @param node current node vlib_node_runtime_t pointer + @param next_index speculated next index used for both packets + @param to_next speculated vector pointer used for both packets + @param to_next_aux speculated aux_data pointer used for both packets + @param n_left_to_next number of slots left in speculated vector + @param bi0 first buffer index + @param aux0 first aux_data + @param next0 actual next index to be used for the first packet + + @return @c next_index -- speculative next index to be used for future packets + @return @c to_next -- speculative frame to be used for future packets + @return @c n_left_to_next -- number of slots left in speculative frame +*/ +#define vlib_validate_buffer_enqueue_with_aux_x1( \ + vm, node, next_index, to_next, to_next_aux, n_left_to_next, bi0, aux0, \ + next0) \ + do \ + { \ + ASSERT (bi0 != 0); \ + if (PREDICT_FALSE (next0 != next_index)) \ + { \ + vlib_put_next_frame (vm, node, next_index, n_left_to_next + 1); \ + next_index = next0; \ + vlib_get_next_frame_with_aux_safe (vm, node, next_index, to_next, \ + to_next_aux, n_left_to_next); \ + \ + to_next[0] = bi0; \ + to_next += 1; \ + if (to_next_aux) \ + { \ + to_next_aux[0] = aux0; \ + to_next_aux += 1; \ + } \ + n_left_to_next -= 1; \ + } \ + } \ + while (0) + always_inline uword generic_buffer_node_inline (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -356,6 +403,16 @@ vlib_buffer_enqueue_to_next (vlib_main_t * vm, vlib_node_runtime_t * node, } static_always_inline void +vlib_buffer_enqueue_to_next_with_aux (vlib_main_t *vm, + vlib_node_runtime_t *node, u32 *buffers, + u32 *aux_data, u16 *nexts, uword count) +{ + vlib_buffer_enqueue_to_next_with_aux_fn_t *fn; + fn = vlib_buffer_func_main.buffer_enqueue_to_next_with_aux_fn; + (fn) (vm, node, buffers, aux_data, nexts, count); +} + +static_always_inline void vlib_buffer_enqueue_to_next_vec (vlib_main_t *vm, vlib_node_runtime_t *node, u32 **buffers, u16 **nexts, uword count) { @@ -379,6 +436,17 @@ vlib_buffer_enqueue_to_single_next (vlib_main_t * vm, (fn) (vm, node, buffers, next_index, count); } +static_always_inline void +vlib_buffer_enqueue_to_single_next_with_aux (vlib_main_t *vm, + vlib_node_runtime_t *node, + u32 *buffers, u32 *aux_data, + u16 next_index, u32 count) +{ + vlib_buffer_enqueue_to_single_next_with_aux_fn_t *fn; + fn = vlib_buffer_func_main.buffer_enqueue_to_single_next_with_aux_fn; + (fn) (vm, node, buffers, aux_data, next_index, count); +} + static_always_inline u32 vlib_buffer_enqueue_to_thread (vlib_main_t *vm, vlib_node_runtime_t *node, u32 frame_queue_index, u32 *buffer_indices, @@ -391,6 +459,20 @@ vlib_buffer_enqueue_to_thread (vlib_main_t *vm, vlib_node_runtime_t *node, n_packets, drop_on_congestion); } +static_always_inline u32 +vlib_buffer_enqueue_to_thread_with_aux (vlib_main_t *vm, + vlib_node_runtime_t *node, + u32 frame_queue_index, + u32 *buffer_indices, u32 *aux, + u16 *thread_indices, u32 n_packets, + int drop_on_congestion) +{ + vlib_buffer_enqueue_to_thread_with_aux_fn_t *fn; + fn = vlib_buffer_func_main.buffer_enqueue_to_thread_with_aux_fn; + return (fn) (vm, node, frame_queue_index, buffer_indices, aux, + thread_indices, n_packets, drop_on_congestion); +} + #endif /* included_vlib_buffer_node_h */ /* diff --git a/src/vlib/cli.c b/src/vlib/cli.c index a00ae6245f9..98d57c6ccb0 100644 --- a/src/vlib/cli.c +++ b/src/vlib/cli.c @@ -38,6 +38,7 @@ */ #include <vlib/vlib.h> +#include <vlib/stats/stats.h> #include <vlib/unix/unix.h> #include <vppinfra/callback.h> #include <vppinfra/cpu.h> @@ -54,36 +55,28 @@ int vl_api_get_elog_trace_api_messages (void); static void *current_traced_heap; /* Root of all show commands. */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_show_command, static) = { .path = "show", .short_help = "Show commands", }; -/* *INDENT-ON* */ /* Root of all clear commands. */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_clear_command, static) = { .path = "clear", .short_help = "Clear commands", }; -/* *INDENT-ON* */ /* Root of all set commands. */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_set_command, static) = { .path = "set", .short_help = "Set commands", }; -/* *INDENT-ON* */ /* Root of all test commands. */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_test_command, static) = { .path = "test", .short_help = "Test commands", }; -/* *INDENT-ON* */ /* Returns bitmap of commands which match key. */ static uword * @@ -158,6 +151,64 @@ done: return match; } +uword +unformat_vlib_cli_line (unformat_input_t *i, va_list *va) +{ + unformat_input_t *result = va_arg (*va, unformat_input_t *); + u8 *line = 0; + uword c; + int skip; + +next_line: + skip = 0; + + /* skip leading whitespace if any */ + unformat_skip_white_space (i); + + if (unformat_is_eof (i)) + return 0; + + while ((c = unformat_get_input (i)) != UNFORMAT_END_OF_INPUT) + { + if (c == '\\') + { + c = unformat_get_input (i); + + if (c == '\n') + { + if (!skip) + vec_add1 (line, '\n'); + skip = 0; + continue; + } + + if (!skip) + vec_add1 (line, '\\'); + + if (c == UNFORMAT_END_OF_INPUT) + break; + + if (!skip) + vec_add1 (line, c); + continue; + } + + if (c == '#') + skip = 1; + else if (c == '\n') + break; + + if (!skip) + vec_add1 (line, c); + } + + if (line == 0) + goto next_line; + + unformat_init_vector (result, line); + return 1; +} + /* Looks for string based sub-input formatted { SUB-INPUT }. */ uword unformat_vlib_cli_sub_input (unformat_input_t * i, va_list * args) @@ -304,7 +355,6 @@ vlib_cli_get_possible_completions (u8 * str) /* if we have a space at the end of input, and a unique match, * autocomplete the next level of subcommands */ help_next_level = (vec_len (str) == 0) || isspace (str[vec_len (str) - 1]); - /* *INDENT-OFF* */ clib_bitmap_foreach (index, match_bitmap) { if (help_next_level && is_unique) { c = get_sub_command (vcm, c, index); @@ -316,7 +366,6 @@ vlib_cli_get_possible_completions (u8 * str) sc = &c->sub_commands[index]; vec_add1(result, (u8*) sc->name); } - /* *INDENT-ON* */ done: clib_bitmap_free (match_bitmap); @@ -566,13 +615,11 @@ vlib_cli_dispatch_sub_commands (vlib_main_t * vm, { if (PREDICT_FALSE (vm->elog_trace_cli_commands)) { - /* *INDENT-OFF* */ ELOG_TYPE_DECLARE (e) = { .format = "cli-cmd: %s", .format_args = "T4", }; - /* *INDENT-ON* */ struct { u32 c; @@ -598,13 +645,11 @@ vlib_cli_dispatch_sub_commands (vlib_main_t * vm, if (PREDICT_FALSE (vm->elog_trace_cli_commands)) { - /* *INDENT-OFF* */ ELOG_TYPE_DECLARE (e) = { .format = "cli-cmd: %s %s", .format_args = "T4T4", }; - /* *INDENT-ON* */ struct { u32 c, err; @@ -616,7 +661,7 @@ vlib_cli_dispatch_sub_commands (vlib_main_t * vm, vec_add1 (c_error->what, 0); ed->err = elog_string (vlib_get_elog_main (), (char *) c_error->what); - _vec_len (c_error->what) -= 1; + vec_dec_len (c_error->what, 1); } else ed->err = elog_string (vlib_get_elog_main (), "OK"); @@ -753,13 +798,6 @@ vl_msg_pop_heap (void *oldheap) { } -void *vlib_stats_push_heap (void *) __attribute__ ((weak)); -void * -vlib_stats_push_heap (void *notused) -{ - return 0; -} - static clib_error_t * show_memory_usage (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -820,14 +858,14 @@ show_memory_usage (vlib_main_t * vm, } if (stats_segment) { - void *oldheap = vlib_stats_push_heap (0); + void *oldheap = vlib_stats_set_heap (); was_enabled = clib_mem_trace_enable_disable (0); u8 *s_in_svm = format (0, "%U\n", format_clib_mem_heap, 0, 1); if (oldheap) clib_mem_set_heap (oldheap); u8 *s = vec_dup (s_in_svm); - oldheap = vlib_stats_push_heap (0); + oldheap = vlib_stats_set_heap (); vec_free (s_in_svm); if (oldheap) { @@ -925,14 +963,12 @@ show_memory_usage (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_memory_usage_command, static) = { .path = "show memory", .short_help = "show memory [api-segment][stats-segment][verbose]\n" - " [numa-heaps][map]", + " [numa-heaps][map][main-heap]", .function = show_memory_usage, }; -/* *INDENT-ON* */ static clib_error_t * show_cpu (vlib_main_t * vm, unformat_input_t * input, @@ -959,13 +995,11 @@ show_cpu (vlib_main_t * vm, unformat_input_t * input, * Base Frequency: 3.20 GHz * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_cpu_command, static) = { .path = "show cpu", .short_help = "Show cpu information", .function = show_cpu, }; -/* *INDENT-ON* */ static clib_error_t * enable_disable_memory_trace (vlib_main_t * vm, @@ -1038,7 +1072,7 @@ enable_disable_memory_trace (vlib_main_t * vm, /* Stats segment */ if (stats_segment) { - oldheap = vlib_stats_push_heap (0); + oldheap = vlib_stats_set_heap (); current_traced_heap = clib_mem_get_heap (); clib_mem_trace (stats_segment); /* We don't want to call vlib_stats_pop_heap... */ @@ -1073,14 +1107,12 @@ enable_disable_memory_trace (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (enable_disable_memory_trace_command, static) = { .path = "memory-trace", .short_help = "memory-trace on|off [api-segment][stats-segment][main-heap]\n" " [numa-heap <numa-id>]\n", .function = enable_disable_memory_trace, }; -/* *INDENT-ON* */ static clib_error_t * restart_cmd_fn (vlib_main_t * vm, unformat_input_t * input, @@ -1094,27 +1126,23 @@ restart_cmd_fn (vlib_main_t * vm, unformat_input_t * input, extern char **environ; /* Close all known open files */ - /* *INDENT-OFF* */ pool_foreach (f, fm->file_pool) { if (f->file_descriptor > 2) close(f->file_descriptor); } - /* *INDENT-ON* */ /* Exec ourself */ - execve (vgm->name, (char **) vm->argv, environ); + execve (vgm->name, (char **) vgm->argv, environ); return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (restart_cmd,static) = { .path = "restart", .short_help = "restart process", .function = restart_cmd_fn, }; -/* *INDENT-ON* */ #ifdef TEST_CODE /* @@ -1140,13 +1168,11 @@ sleep_ten_seconds (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (ping_command, static) = { .path = "test sleep", .function = sleep_ten_seconds, .short_help = "Sleep for 10 seconds", }; -/* *INDENT-ON* */ #endif /* ifdef TEST_CODE */ static uword @@ -1185,7 +1211,7 @@ vlib_cli_normalize_path (char *input, char **result) /* Remove any extra space at end. */ if (l > 0 && s[l - 1] == ' ') - _vec_len (s) -= 1; + vec_dec_len (s, 1); *result = s; return index_of_last_space; @@ -1597,7 +1623,6 @@ print_status: * @cliend * @cliexcmd{event-logger trace [api][cli][barrier][disable]} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (event_logger_trace_command, static) = { .path = "event-logger trace", @@ -1605,7 +1630,6 @@ VLIB_CLI_COMMAND (event_logger_trace_command, static) = "[circuit-node <name> e.g. ethernet-input][disable]", .function = event_logger_trace_command_fn, }; -/* *INDENT-ON* */ static clib_error_t * suspend_command_fn (vlib_main_t * vm, @@ -1615,7 +1639,6 @@ suspend_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (suspend_command, static) = { .path = "suspend", @@ -1623,7 +1646,6 @@ VLIB_CLI_COMMAND (suspend_command, static) = .function = suspend_command_fn, .is_mp_safe = 1, }; -/* *INDENT-ON* */ static int @@ -1814,7 +1836,6 @@ show_cli_command_fn (vlib_main_t * vm, * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_cli_command, static) = { .path = "show cli", @@ -1822,7 +1843,6 @@ VLIB_CLI_COMMAND (show_cli_command, static) = .function = show_cli_command_fn, .is_mp_safe = 1, }; -/* *INDENT-ON* */ static clib_error_t * vlib_cli_init (vlib_main_t * vm) diff --git a/src/vlib/cli.h b/src/vlib/cli.h index 83b4eab1851..a2f9c24d2ec 100644 --- a/src/vlib/cli.h +++ b/src/vlib/cli.h @@ -204,6 +204,7 @@ clib_error_t *vlib_cli_register (struct vlib_main_t *vm, clib_error_t *vlib_cli_register_parse_rule (struct vlib_main_t *vm, vlib_cli_parse_rule_t * c); +unformat_function_t unformat_vlib_cli_line; uword unformat_vlib_cli_sub_input (unformat_input_t * i, va_list * args); /* Return an vector of strings consisting of possible auto-completions diff --git a/src/vlib/config.h.in b/src/vlib/config.h.in index 19ec10cfcca..b233b327d31 100644 --- a/src/vlib/config.h.in +++ b/src/vlib/config.h.in @@ -16,7 +16,8 @@ #ifndef included_vlib_config_h #define included_vlib_config_h -#define __PRE_DATA_SIZE @PRE_DATA_SIZE@ +#define VLIB_BUFFER_PRE_DATA_SIZE @PRE_DATA_SIZE@ +#define VLIB_BUFFER_ALIGN @VLIB_BUFFER_ALIGN@ #define VLIB_BUFFER_ALLOC_FAULT_INJECTOR @BUFFER_ALLOC_FAULT_INJECTOR@ #define VLIB_PROCESS_LOG2_STACK_SIZE @VLIB_PROCESS_LOG2_STACK_SIZE@ diff --git a/src/vlib/counter.c b/src/vlib/counter.c index 186b48d869e..9f14d02909f 100644 --- a/src/vlib/counter.c +++ b/src/vlib/counter.c @@ -38,7 +38,7 @@ */ #include <vlib/vlib.h> -#include <vlib/stat_weak_inlines.h> +#include <vlib/stats/stats.h> void vlib_clear_simple_counters (vlib_simple_counter_main_t * cm) @@ -79,66 +79,62 @@ void vlib_validate_simple_counter (vlib_simple_counter_main_t * cm, u32 index) { vlib_thread_main_t *tm = vlib_get_thread_main (); - int i, resized = 0; - void *oldheap = vlib_stats_push_heap (cm->counters); + char *name = cm->stat_segment_name ? cm->stat_segment_name : cm->name; - vec_validate (cm->counters, tm->n_vlib_mains - 1); - for (i = 0; i < tm->n_vlib_mains; i++) - if (index >= vec_len (cm->counters[i])) - { - if (vec_resize_will_expand (cm->counters[i], - index - vec_len (cm->counters[i]) + - 1 /* length_increment */)) - resized++; + if (name == 0) + { + if (cm->counters == 0) + cm->stats_entry_index = ~0; + vec_validate (cm->counters, tm->n_vlib_mains - 1); + for (int i = 0; i < tm->n_vlib_mains; i++) vec_validate_aligned (cm->counters[i], index, CLIB_CACHE_LINE_BYTES); - } + return; + } - /* Avoid the epoch increase when there was no counter vector resize. */ - if (resized) - vlib_stats_pop_heap (cm, oldheap, index, - 2 /* STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE */); - else - clib_mem_set_heap (oldheap); + if (cm->counters == 0) + cm->stats_entry_index = vlib_stats_add_counter_vector ("%s", name); + + vlib_stats_validate (cm->stats_entry_index, tm->n_vlib_mains - 1, index); + cm->counters = vlib_stats_get_entry_data_pointer (cm->stats_entry_index); } void vlib_free_simple_counter (vlib_simple_counter_main_t * cm) { - int i; - - vlib_stats_delete_cm (cm); - - void *oldheap = vlib_stats_push_heap (cm->counters); - for (i = 0; i < vec_len (cm->counters); i++) - vec_free (cm->counters[i]); - vec_free (cm->counters); - clib_mem_set_heap (oldheap); + if (cm->stats_entry_index == ~0) + { + for (int i = 0; i < vec_len (cm->counters); i++) + vec_free (cm->counters[i]); + vec_free (cm->counters); + } + else + { + vlib_stats_remove_entry (cm->stats_entry_index); + cm->counters = NULL; + } } void vlib_validate_combined_counter (vlib_combined_counter_main_t * cm, u32 index) { vlib_thread_main_t *tm = vlib_get_thread_main (); - int i, resized = 0; - void *oldheap = vlib_stats_push_heap (cm->counters); + char *name = cm->stat_segment_name ? cm->stat_segment_name : cm->name; - vec_validate (cm->counters, tm->n_vlib_mains - 1); - for (i = 0; i < tm->n_vlib_mains; i++) - if (index >= vec_len (cm->counters[i])) - { - if (vec_resize_will_expand (cm->counters[i], - index - vec_len (cm->counters[i]) + - 1 /* length_increment */)) - resized++; + if (name == 0) + { + if (cm->counters == 0) + cm->stats_entry_index = ~0; + vec_validate (cm->counters, tm->n_vlib_mains - 1); + for (int i = 0; i < tm->n_vlib_mains; i++) vec_validate_aligned (cm->counters[i], index, CLIB_CACHE_LINE_BYTES); - } + return; + } - /* Avoid the epoch increase when there was no counter vector resize. */ - if (resized) - vlib_stats_pop_heap (cm, oldheap, index, - 3 /*STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED */); - else - clib_mem_set_heap (oldheap); + if (cm->counters == 0) + cm->stats_entry_index = vlib_stats_add_counter_pair_vector ("%s", name); + + vlib_stats_validate (cm->stats_entry_index, tm->n_vlib_mains - 1, index); + cm->counters = vlib_stats_get_entry_data_pointer (cm->stats_entry_index); } int @@ -147,7 +143,7 @@ int { vlib_thread_main_t *tm = vlib_get_thread_main (); int i; - void *oldheap = vlib_stats_push_heap (cm->counters); + void *oldheap = vlib_stats_set_heap (); /* Possibly once in recorded history */ if (PREDICT_FALSE (vec_len (cm->counters) == 0)) @@ -176,15 +172,17 @@ int void vlib_free_combined_counter (vlib_combined_counter_main_t * cm) { - int i; - - vlib_stats_delete_cm (cm); - - void *oldheap = vlib_stats_push_heap (cm->counters); - for (i = 0; i < vec_len (cm->counters); i++) - vec_free (cm->counters[i]); - vec_free (cm->counters); - clib_mem_set_heap (oldheap); + if (cm->stats_entry_index == ~0) + { + for (int i = 0; i < vec_len (cm->counters); i++) + vec_free (cm->counters[i]); + vec_free (cm->counters); + } + else + { + vlib_stats_remove_entry (cm->stats_entry_index); + cm->counters = NULL; + } } u32 diff --git a/src/vlib/counter.h b/src/vlib/counter.h index 9f5654292b9..f9da576a5f2 100644 --- a/src/vlib/counter.h +++ b/src/vlib/counter.h @@ -59,11 +59,26 @@ typedef struct counter_t **counters; /**< Per-thread u64 non-atomic counters */ char *name; /**< The counter collection's name. */ char *stat_segment_name; /**< Name in stat segment directory */ + u32 stats_entry_index; } vlib_simple_counter_main_t; /** The number of counters (not the number of per-thread counters) */ u32 vlib_simple_counter_n_counters (const vlib_simple_counter_main_t * cm); +/** Pre-fetch a per-thread simple counter for the given object index */ +always_inline void +vlib_prefetch_simple_counter (const vlib_simple_counter_main_t *cm, + u32 thread_index, u32 index) +{ + counter_t *my_counters; + + /* + * This CPU's index is assumed to already be in cache + */ + my_counters = cm->counters[thread_index]; + clib_prefetch_store (my_counters + index); +} + /** Increment a simple counter @param cm - (vlib_simple_counter_main_t *) simple counter main pointer @param thread_index - (u32) the current cpu index @@ -205,6 +220,7 @@ typedef struct vlib_counter_t **counters; /**< Per-thread u64 non-atomic counter pairs */ char *name; /**< The counter collection's name. */ char *stat_segment_name; /**< Name in stat segment directory */ + u32 stats_entry_index; } vlib_combined_counter_main_t; /** The number of counters (not the number of per-thread counters) */ diff --git a/src/vlib/dma/cli.c b/src/vlib/dma/cli.c new file mode 100644 index 00000000000..1db59c81a2f --- /dev/null +++ b/src/vlib/dma/cli.c @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2022 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vlib/physmem_funcs.h> +#include <vlib/dma/dma.h> + +static clib_error_t * +show_dma_backends_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vlib_dma_main_t *dm = &vlib_dma_main; + + if (vec_len (dm->backends)) + { + vlib_dma_backend_t *b; + vec_foreach (b, dm->backends) + vlib_cli_output (vm, "%s", b->name); + } + else + vlib_cli_output (vm, "No active DMA backends"); + + return 0; +} + +VLIB_CLI_COMMAND (avf_create_command, static) = { + .path = "show dma backends", + .short_help = "show dma backends", + .function = show_dma_backends_command_fn, +}; + +static void +test_dma_cb_fn (vlib_main_t *vm, vlib_dma_batch_t *b) +{ + fformat (stderr, "%s: cb %p cookie %lx\n", __func__, b, + vlib_dma_batch_get_cookie (vm, b)); +} + +static clib_error_t * +fill_random_data (void *buffer, uword size) +{ + uword seed = random_default_seed (); + + uword remain = size; + const uword p = clib_mem_get_page_size (); + uword offset = 0; + + clib_random_buffer_t rb; + clib_random_buffer_init (&rb, seed); + + while (remain > 0) + { + uword fill_size = clib_min (p, remain); + + clib_random_buffer_fill (&rb, fill_size); + void *rbuf = clib_random_buffer_get_data (&rb, fill_size); + clib_memcpy_fast (buffer + offset, rbuf, fill_size); + clib_random_buffer_free (&rb); + + offset += fill_size; + remain -= fill_size; + } + + return 0; +} + +static clib_error_t * +test_dma_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + clib_error_t *err = 0; + vlib_dma_batch_t *b; + int config_index = -1; + u32 rsz, n_alloc, v; + u8 *from = 0, *to = 0; + vlib_dma_config_t cfg = { .max_transfers = 256, + .max_transfer_size = 4096, + .callback_fn = test_dma_cb_fn }; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "transfers %u", &v)) + cfg.max_transfers = v; + else if (unformat (input, "size %u", &v)) + cfg.max_transfer_size = v; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + if ((config_index = vlib_dma_config_add (vm, &cfg)) < 0) + { + err = clib_error_return (0, "Unable to allocate dma config"); + return err; + } + + rsz = round_pow2 (cfg.max_transfer_size, CLIB_CACHE_LINE_BYTES); + n_alloc = rsz * cfg.max_transfers * 2; + + if ((from = vlib_physmem_alloc_aligned_on_numa ( + vm, n_alloc, CLIB_CACHE_LINE_BYTES, vm->numa_node)) == 0) + { + err = clib_error_return (0, "Unable to allocate %u bytes of physmem", + n_alloc); + return err; + } + to = from + n_alloc / 2; + + u32 port_allocator_seed; + + fill_random_data (from, (uword) cfg.max_transfers * rsz); + + b = vlib_dma_batch_new (vm, config_index); + vlib_dma_batch_set_cookie (vm, b, 0x12345678); + + port_allocator_seed = clib_cpu_time_now (); + int transfers = random_u32 (&port_allocator_seed) % cfg.max_transfers; + if (!transfers) + transfers = 1; + for (int i = 0; i < transfers; i++) + vlib_dma_batch_add (vm, b, to + i * rsz, from + i * rsz, + cfg.max_transfer_size); + + vlib_dma_batch_submit (vm, b); + return err; +} + +static clib_error_t * +test_show_dma_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + clib_error_t *err = 0; + int config_index = 0; + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "config %u", &config_index)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + for (u32 i = 0; i < vlib_get_n_threads (); i++) + vlib_cli_output (vm, "Config %d %U", config_index, vlib_dma_config_info, + config_index, vlib_get_main_by_index (i)); + return err; +} + +VLIB_CLI_COMMAND (test_dma_command, static) = { + .path = "test dma", + .short_help = "test dma [transfers <x> size <x>]", + .function = test_dma_command_fn, +}; + +VLIB_CLI_COMMAND (show_dma_command, static) = { + .path = "show dma", + .short_help = "show dma [config <x>]", + .function = test_show_dma_fn, +}; diff --git a/src/vlib/dma/dma.c b/src/vlib/dma/dma.c new file mode 100644 index 00000000000..cc8ed94ea8f --- /dev/null +++ b/src/vlib/dma/dma.c @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2022 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vlib/log.h> +#include <vlib/dma/dma.h> + +VLIB_REGISTER_LOG_CLASS (dma_log) = { + .class_name = "dma", +}; + +vlib_dma_main_t vlib_dma_main = {}; + +clib_error_t * +vlib_dma_register_backend (vlib_main_t *vm, vlib_dma_backend_t *b) +{ + vlib_dma_main_t *dm = &vlib_dma_main; + vec_add1 (dm->backends, *b); + dma_log_info ("backend '%s' registered", b->name); + return 0; +} + +int +vlib_dma_config_add (vlib_main_t *vm, vlib_dma_config_t *c) +{ + vlib_dma_main_t *dm = &vlib_dma_main; + vlib_dma_backend_t *b; + vlib_dma_config_data_t *cd; + + pool_get_zero (dm->configs, cd); + cd->config_index = cd - dm->configs; + + clib_memcpy (&cd->cfg, c, sizeof (vlib_dma_config_t)); + + vec_foreach (b, dm->backends) + { + dma_log_info ("calling '%s' config_add_fn", b->name); + if (b->config_add_fn (vm, cd)) + { + dma_log_info ("config %u added into backend %s", cd - dm->configs, + b->name); + cd->backend_index = b - dm->backends; + return cd - dm->configs; + } + } + + pool_put (dm->configs, cd); + return -1; +} + +void +vlib_dma_config_del (vlib_main_t *vm, u32 config_index) +{ + vlib_dma_main_t *dm = &vlib_dma_main; + vlib_dma_config_data_t *cd = pool_elt_at_index (dm->configs, config_index); + vlib_dma_backend_t *b = vec_elt_at_index (dm->backends, cd->backend_index); + + if (b->config_del_fn) + b->config_del_fn (vm, cd); + + pool_put (dm->configs, cd); + dma_log_info ("config %u deleted from backend %s", config_index, b->name); +} + +u8 * +vlib_dma_config_info (u8 *s, va_list *args) +{ + vlib_dma_main_t *dm = &vlib_dma_main; + int config_index = va_arg (*args, int); + u32 len = pool_elts (dm->configs); + if (config_index >= len) + return format (s, "%s", "not found"); + vlib_dma_config_data_t *cd = pool_elt_at_index (dm->configs, config_index); + + vlib_dma_backend_t *b = vec_elt_at_index (dm->backends, cd->backend_index); + + if (b->info_fn) + return b->info_fn (s, args); + + return 0; +} diff --git a/src/vlib/dma/dma.h b/src/vlib/dma/dma.h new file mode 100644 index 00000000000..62d04110aa6 --- /dev/null +++ b/src/vlib/dma/dma.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2022 Cisco Systems, Inc. + */ + +#ifndef included_vlib_dma_h +#define included_vlib_dma_h +#include <vlib/vlib.h> + +#define dma_log_debug(f, ...) \ + vlib_log (VLIB_LOG_LEVEL_DEBUG, dma_log.class, "%s: " f, __func__, \ + ##__VA_ARGS__) + +#define dma_log_info(f, ...) \ + vlib_log (VLIB_LOG_LEVEL_INFO, dma_log.class, "%s: " f, __func__, \ + ##__VA_ARGS__) + +struct vlib_dma_batch; +struct vlib_dma_config_data; + +typedef int (vlib_dma_config_add_fn) (vlib_main_t *vm, + struct vlib_dma_config_data *cfg); +typedef void (vlib_dma_config_del_fn) (vlib_main_t *vm, + struct vlib_dma_config_data *cfg); +typedef struct vlib_dma_batch *(vlib_dma_batch_new_fn) ( + vlib_main_t *vm, struct vlib_dma_config_data *); +typedef int (vlib_dma_batch_submit_fn) (vlib_main_t *vm, + struct vlib_dma_batch *b); +typedef void (vlib_dma_batch_callback_fn) (vlib_main_t *vm, + struct vlib_dma_batch *b); +typedef struct +{ + union + { + struct + { + u32 barrier_before_last : 1; + u32 sw_fallback : 1; + }; + u32 features; + }; + u16 max_batches; + u16 max_transfers; + u32 max_transfer_size; + vlib_dma_batch_callback_fn *callback_fn; +} vlib_dma_config_t; + +typedef struct vlib_dma_batch +{ + vlib_dma_batch_submit_fn *submit_fn; + vlib_dma_batch_callback_fn *callback_fn; + uword cookie; + u16 src_ptr_off; + u16 dst_ptr_off; + u16 size_off; + u16 stride; + u16 n_enq; +} vlib_dma_batch_t; + +typedef struct +{ + char *name; + vlib_dma_config_add_fn *config_add_fn; + vlib_dma_config_del_fn *config_del_fn; + format_function_t *info_fn; +} vlib_dma_backend_t; + +typedef struct vlib_dma_config_data +{ + vlib_dma_config_t cfg; + vlib_dma_batch_new_fn *batch_new_fn; + uword private_data; + u32 backend_index; + u32 config_index; +} vlib_dma_config_data_t; + +typedef struct +{ + vlib_dma_backend_t *backends; + vlib_dma_config_data_t *configs; +} vlib_dma_main_t; + +extern vlib_dma_main_t vlib_dma_main; + +clib_error_t *vlib_dma_register_backend (vlib_main_t *vm, + vlib_dma_backend_t *b); + +int vlib_dma_config_add (vlib_main_t *vm, vlib_dma_config_t *b); +void vlib_dma_config_del (vlib_main_t *vm, u32 config_index); +u8 *vlib_dma_config_info (u8 *s, va_list *args); + +static_always_inline vlib_dma_batch_t * +vlib_dma_batch_new (vlib_main_t *vm, u32 config_index) +{ + vlib_dma_main_t *dm = &vlib_dma_main; + vlib_dma_config_data_t *cd = pool_elt_at_index (dm->configs, config_index); + + return cd->batch_new_fn (vm, cd); +} + +static_always_inline void +vlib_dma_batch_set_cookie (vlib_main_t *vm, vlib_dma_batch_t *batch, + uword cookie) +{ + batch->cookie = cookie; +} + +static_always_inline uword +vlib_dma_batch_get_cookie (vlib_main_t *vm, vlib_dma_batch_t *batch) +{ + return batch->cookie; +} + +static_always_inline void +vlib_dma_batch_add (vlib_main_t *vm, vlib_dma_batch_t *batch, void *dst, + void *src, u32 size) +{ + u8 *p = (u8 *) batch + batch->n_enq * batch->stride; + + *((void **) (p + batch->dst_ptr_off)) = dst; + *((void **) (p + batch->src_ptr_off)) = src; + *((u32 *) (p + batch->size_off)) = size; + + batch->n_enq++; +} + +static_always_inline void +vlib_dma_batch_submit (vlib_main_t *vm, vlib_dma_batch_t *batch) +{ + batch->submit_fn (vm, batch); +} + +#endif diff --git a/src/vlib/dma/dma.rst b/src/vlib/dma/dma.rst new file mode 100644 index 00000000000..4048d49b218 --- /dev/null +++ b/src/vlib/dma/dma.rst @@ -0,0 +1,70 @@ +.. _dma_plugin: + +.. toctree:: + +DMA plugin +========== + +Overview +-------- +This plugin utilize platform DMA accelerators like CBDMA/DSA for streaming +data movement. Modern DMA accelerators has high memory bandwidth and benefit +cross-numa traffic. Accelerator like DSA has the capability to do IO page +fault recovery, it will save IOMMU setup for the memory which not pinned. + +Terminology & Usage +------------------- + +A ``backend`` is the abstract of resource which inherited from DMA device, +it support necessary operations for DMA offloading like configuration, DMA +request and result query. + +A ``config`` is the abstract of application DMA capability. Application can +request a config instance through DMA node. DMA node will check the +requirements of application and bind suitable backend with it. + +Enable DSA work queue: +---------------------- + +.. code-block:: console + # configure 1 groups, each with one engine + accel-config config-engine dsa0/engine0.0 --group-id=0 + + # configure 1 queues, putting each in a different group, so each + # is backed by a single engine + accel-config config-wq dsa0/wq0.0 --group-id=0 --type=user \ + --priority=10 --max-batch-size=1024 --mode=dedicated -b 1 -a 0 --name=vpp1 + +DMA transfer: +------------- + +In this sample, application will request DMA capability which can hold +a batch contained maximum 256 transfers and each transfer hold maximum 4K bytes +from DMA node. If config_index value is not negative, mean resource has +been allocated and DMA engine is ready for serve. + +.. code-block:: console + void dma_completion_cb (vlib_main_t *vm, vlib_dma_batch_t *b); + + vlib_dma_config_args_t args; + args->max_transfers = 256; + args->max_transfer_size = 4096; + args->cpu_fallback = 1; + args->barrier_before_last = 1; + args->cb = dma_completion_cb; + u32 config_index = vlib_dma_config (vm, &args); + if (config_index < 0) + return; + + u8 *dst[n_transfers]; + u8 *src[n_transfers]; + u32 i = 0, size = 4096; + + vlib_dma_batch_t *b; + b = vlib_dma_batch_new (vm, config_index); + while (wrk_t->config_index >= 0 && n_transfers) { + vlib_dma_batch_add (vm, b, dst[i], src[i], size); + n_transfers --; + i ++; + } + vlib_dma_batch_submit (vm, config_index); diff --git a/src/vlib/drop.c b/src/vlib/drop.c index 3971123839d..3fda1d9b3b6 100644 --- a/src/vlib/drop.c +++ b/src/vlib/drop.c @@ -16,6 +16,7 @@ */ #include <vlib/vlib.h> +#include <vppinfra/vector/count_equal.h> typedef enum { @@ -73,7 +74,8 @@ counter_index (vlib_main_t * vm, vlib_error_t e) n = vlib_get_node (vm, ni); ci = vlib_error_get_code (&vm->node_main, e); - ASSERT (ci < n->n_errors); + if (ci >= n->n_errors) + return CLIB_U32_MAX; ci += n->error_heap_index; @@ -91,9 +93,12 @@ format_error_trace (u8 * s, va_list * va) u32 i; error_node = vlib_get_node (vm, vlib_error_get_node (&vm->node_main, e[0])); - i = counter_index (vm, vlib_error_get_code (&vm->node_main, e[0])) + - error_node->error_heap_index; - s = format (s, "%v: %s", error_node->name, em->counters_heap[i].name); + i = counter_index (vm, vlib_error_get_code (&vm->node_main, e[0])); + if (i != CLIB_U32_MAX) + { + i += error_node->error_heap_index; + s = format (s, "%v: %s", error_node->name, em->counters_heap[i].desc); + } return s; } @@ -221,7 +226,8 @@ process_drop_punt (vlib_main_t * vm, n_left -= count; c_index = counter_index (vm, error[0]); - em->counters[c_index] += count; + if (c_index != CLIB_U32_MAX) + em->counters[c_index] += count; vlib_error_elog_count (vm, c_index, count); } @@ -232,7 +238,7 @@ process_drop_punt (vlib_main_t * vm, /* If there is no punt function, free the frame as well. */ if (disposition == ERROR_DISPOSITION_PUNT && !vm->os_punt_frame) - vlib_frame_free (vm, node, frame); + vlib_frame_free (vm, frame); } else vm->os_punt_frame (vm, node, frame); @@ -254,7 +260,6 @@ VLIB_NODE_FN (error_punt_node) (vlib_main_t * vm, return process_drop_punt (vm, node, frame, ERROR_DISPOSITION_PUNT); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (error_drop_node) = { .name = "drop", .flags = VLIB_NODE_FLAG_IS_DROP, @@ -262,9 +267,7 @@ VLIB_REGISTER_NODE (error_drop_node) = { .format_trace = format_error_trace, .validate_frame = validate_error_frame, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (error_punt_node) = { .name = "punt", .flags = (VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH @@ -273,7 +276,6 @@ VLIB_REGISTER_NODE (error_punt_node) = { .format_trace = format_error_trace, .validate_frame = validate_error_frame, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vlib/error.c b/src/vlib/error.c index 97cb0b52192..3008af307bf 100644 --- a/src/vlib/error.c +++ b/src/vlib/error.c @@ -39,7 +39,7 @@ #include <vlib/vlib.h> #include <vppinfra/heap.h> -#include <vlib/stat_weak_inlines.h> +#include <vlib/stats/stats.h> uword vlib_error_drop_buffers (vlib_main_t * vm, @@ -112,6 +112,34 @@ vlib_error_drop_buffers (vlib_main_t * vm, return n_buffers; } +static u8 * +format_stats_counter_name (u8 *s, va_list *va) +{ + u8 *id = va_arg (*va, u8 *); + + for (u32 i = 0; id[i] != 0; i++) + vec_add1 (s, id[i] == ' ' ? ' ' : id[i]); + + return s; +} + +void +vlib_unregister_errors (vlib_main_t *vm, u32 node_index) +{ + vlib_error_main_t *em = &vm->error_main; + vlib_node_t *n = vlib_get_node (vm, node_index); + vlib_error_desc_t *cd; + + if (n->n_errors > 0) + { + cd = vec_elt_at_index (em->counters_heap, n->error_heap_index); + for (u32 i = 0; i < n->n_errors; i++) + vlib_stats_remove_entry (cd[i].stats_entry_index); + heap_dealloc (em->counters_heap, n->error_heap_handle); + n->n_errors = 0; + } +} + /* Reserves given number of error codes for given node. */ void vlib_register_errors (vlib_main_t *vm, u32 node_index, u32 n_errors, @@ -119,94 +147,88 @@ vlib_register_errors (vlib_main_t *vm, u32 node_index, u32 n_errors, { vlib_error_main_t *em = &vm->error_main; vlib_node_main_t *nm = &vm->node_main; - vlib_node_t *n = vlib_get_node (vm, node_index); + vlib_error_desc_t *cd; + u32 n_threads = vlib_get_n_threads (); + elog_event_type_t t = {}; uword l; - void *oldheap; + u64 **sc; ASSERT (vlib_get_thread_index () == 0); + vlib_stats_segment_lock (); + /* Free up any previous error strings. */ - if (n->n_errors > 0) - heap_dealloc (em->counters_heap, n->error_heap_handle); + vlib_unregister_errors (vm, node_index); n->n_errors = n_errors; n->error_counters = counters; if (n_errors == 0) - return; + goto done; + + n->error_heap_index = + heap_alloc (em->counters_heap, n_errors, n->error_heap_handle); + l = vec_len (em->counters_heap); + cd = vec_elt_at_index (em->counters_heap, n->error_heap_index); /* Legacy node */ if (!counters) { - counters = clib_mem_alloc (sizeof (counters[0]) * n_errors); - int i; - for (i = 0; i < n_errors; i++) + for (int i = 0; i < n_errors; i++) { - counters[i].name = error_strings[i]; - counters[i].desc = error_strings[i]; - counters[i].severity = VL_COUNTER_SEVERITY_ERROR; + cd[i].name = error_strings[i]; + cd[i].desc = error_strings[i]; + cd[i].severity = VL_COUNTER_SEVERITY_ERROR; } } - - n->error_heap_index = - heap_alloc (em->counters_heap, n_errors, n->error_heap_handle); - l = vec_len (em->counters_heap); - clib_memcpy (vec_elt_at_index (em->counters_heap, n->error_heap_index), - counters, n_errors * sizeof (counters[0])); + else + clib_memcpy (cd, counters, n_errors * sizeof (counters[0])); vec_validate (vm->error_elog_event_types, l - 1); - /* Switch to the stats segment ... */ - oldheap = vlib_stats_push_heap (0); + if (em->stats_err_entry_index == 0) + em->stats_err_entry_index = vlib_stats_add_counter_vector ("/node/errors"); - /* Allocate a counter/elog type for each error. */ - vec_validate (em->counters, l - 1); + ASSERT (em->stats_err_entry_index != 0 && em->stats_err_entry_index != ~0); - /* Zero counters for re-registrations of errors. */ - if (n->error_heap_index + n_errors <= vec_len (em->counters_last_clear)) - clib_memcpy (em->counters + n->error_heap_index, - em->counters_last_clear + n->error_heap_index, - n_errors * sizeof (em->counters[0])); - else - clib_memset (em->counters + n->error_heap_index, - 0, n_errors * sizeof (em->counters[0])); + vlib_stats_validate (em->stats_err_entry_index, n_threads - 1, l - 1); + sc = vlib_stats_get_entry_data_pointer (em->stats_err_entry_index); + + for (int i = 0; i < n_threads; i++) + { + vlib_main_t *tvm = vlib_get_main_by_index (i); + vlib_error_main_t *tem = &tvm->error_main; + tem->counters = sc[i]; + + /* Zero counters for re-registrations of errors. */ + if (n->error_heap_index + n_errors <= vec_len (tem->counters_last_clear)) + clib_memcpy (tem->counters + n->error_heap_index, + tem->counters_last_clear + n->error_heap_index, + n_errors * sizeof (tem->counters[0])); + else + clib_memset (tem->counters + n->error_heap_index, 0, + n_errors * sizeof (tem->counters[0])); + } /* Register counter indices in the stat segment directory */ - { - int i; - u8 *error_name = 0; - - for (i = 0; i < n_errors; i++) - { - vec_reset_length (error_name); - error_name = - format (error_name, "/err/%v/%s%c", n->name, counters[i].name, 0); - vlib_stats_register_error_index (oldheap, error_name, em->counters, - n->error_heap_index + i); - } - - vec_free (error_name); - } - - /* (re)register the em->counters base address, switch back to main heap */ - vlib_stats_pop_heap2 (em->counters, vm->thread_index, oldheap, 1); - - { - elog_event_type_t t; - uword i; - - clib_memset (&t, 0, sizeof (t)); - if (n_errors > 0) - vec_validate (nm->node_by_error, n->error_heap_index + n_errors - 1); - for (i = 0; i < n_errors; i++) - { - t.format = (char *) format (0, "%v %s: %%d", - n->name, counters[i].name); - vm->error_elog_event_types[n->error_heap_index + i] = t; - nm->node_by_error[n->error_heap_index + i] = n->index; - } - } + for (int i = 0; i < n_errors; i++) + cd[i].stats_entry_index = vlib_stats_add_symlink ( + em->stats_err_entry_index, n->error_heap_index + i, "/err/%v/%U", + n->name, format_stats_counter_name, cd[i].name); + + vec_validate (nm->node_by_error, n->error_heap_index + n_errors - 1); + + for (u32 i = 0; i < n_errors; i++) + { + t.format = (char *) format (0, "%v %s: %%d", n->name, cd[i].name); + vec_free (vm->error_elog_event_types[n->error_heap_index + i].format); + vm->error_elog_event_types[n->error_heap_index + i] = t; + nm->node_by_error[n->error_heap_index + i] = n->index; + } + +done: + vlib_stats_segment_unlock (); } uword @@ -306,11 +328,11 @@ show_errors (vlib_main_t * vm, if (verbose) vlib_cli_output (vm, "%10lu%=35v%=35s%=10s%=6d", c, n->name, - em->counters_heap[i].name, + em->counters_heap[i].desc, sev2str (em->counters_heap[i].severity), i); else vlib_cli_output (vm, "%10lu%=35v%=35s%=10s", c, n->name, - em->counters_heap[i].name, + em->counters_heap[i].desc, sev2str (em->counters_heap[i].severity)); } } @@ -330,7 +352,7 @@ show_errors (vlib_main_t * vm, { if (verbose) vlib_cli_output (vm, "%10lu%=40v%=20s%=10d", sums[i], n->name, - em->counters_heap[i].name, i); + em->counters_heap[i].desc, i); } } } @@ -340,21 +362,17 @@ show_errors (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (vlib_cli_show_errors) = { .path = "show errors", .short_help = "Show error counts", .function = show_errors, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_show_node_counters, static) = { .path = "show node counters", .short_help = "Show node counters", .function = show_errors, }; -/* *INDENT-ON* */ static clib_error_t * clear_error_counters (vlib_main_t * vm, @@ -373,21 +391,17 @@ clear_error_counters (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_clear_error_counters, static) = { .path = "clear errors", .short_help = "Clear error counters", .function = clear_error_counters, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_clear_node_counters, static) = { .path = "clear node counters", .short_help = "Clear node counters", .function = clear_error_counters, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vlib/error.h b/src/vlib/error.h index b921067ee84..b5cc264b60d 100644 --- a/src/vlib/error.h +++ b/src/vlib/error.h @@ -56,6 +56,7 @@ typedef struct char *name; char *desc; enum vl_counter_severity_e severity; + u32 stats_entry_index; } vlib_error_desc_t; typedef struct @@ -69,12 +70,16 @@ typedef struct /* Counter structures in heap. Heap index indexes counter vector. */ vlib_error_desc_t *counters_heap; + + /* stats segment entry index */ + u32 stats_err_entry_index; } vlib_error_main_t; /* Per node error registration. */ void vlib_register_errors (struct vlib_main_t *vm, u32 node_index, u32 n_errors, char *error_strings[], vlib_error_desc_t counters[]); +void vlib_unregister_errors (struct vlib_main_t *vm, u32 node_index); unformat_function_t unformat_vlib_error; diff --git a/src/vlib/freebsd/pci.c b/src/vlib/freebsd/pci.c new file mode 100644 index 00000000000..a4e9eb2dda6 --- /dev/null +++ b/src/vlib/freebsd/pci.c @@ -0,0 +1,380 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2024 Tom Jones <thj@freebsd.org> + * + * This software was developed by Tom Jones <thj@freebsd.org> under sponsorship + * from the FreeBSD Foundation. + * + */ + +#include <vlib/vlib.h> +#include <vlib/pci/pci.h> +#include <vlib/unix/unix.h> + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/eventfd.h> + +#include <sys/pciio.h> + +#include <fcntl.h> +#include <dirent.h> +#include <net/if.h> + +extern vlib_pci_main_t freebsd_pci_main; + +uword +vlib_pci_get_private_data (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + return 0; +} + +void +vlib_pci_set_private_data (vlib_main_t *vm, vlib_pci_dev_handle_t h, + uword private_data) +{ +} + +vlib_pci_addr_t * +vlib_pci_get_addr (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + return NULL; +} + +u32 +vlib_pci_get_numa_node (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + return 0; +} + +u32 +vlib_pci_get_num_msix_interrupts (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + return 0; +} + +/* Call to allocate/initialize the pci subsystem. + This is not an init function so that users can explicitly enable + pci only when it's needed. */ +clib_error_t *pci_bus_init (vlib_main_t *vm); + +vlib_pci_device_info_t * +vlib_pci_get_device_info (vlib_main_t *vm, vlib_pci_addr_t *addr, + clib_error_t **error) +{ + /* Populate a vlib_pci_device_info_t from the given address */ + clib_error_t *err = NULL; + vlib_pci_device_info_t *di = NULL; + + int fd = -1; + struct pci_conf_io pci; + struct pci_conf match; + struct pci_match_conf pattern; + bzero (&match, sizeof (match)); + bzero (&pattern, sizeof (pattern)); + + pattern.pc_sel.pc_domain = addr->domain; + pattern.pc_sel.pc_bus = addr->bus; + pattern.pc_sel.pc_dev = addr->slot; + pattern.pc_sel.pc_func = addr->function; + pattern.flags = PCI_GETCONF_MATCH_DOMAIN | PCI_GETCONF_MATCH_BUS | + PCI_GETCONF_MATCH_DEV | PCI_GETCONF_MATCH_FUNC; + + pci.pat_buf_len = sizeof (pattern); + pci.num_patterns = 1; + pci.patterns = &pattern; + pci.match_buf_len = sizeof (match); + pci.num_matches = 1; + pci.matches = &match; + pci.offset = 0; + pci.generation = 0; + pci.status = 0; + + fd = open ("/dev/pci", 0); + if (fd == -1) + { + err = clib_error_return_unix (0, "open '/dev/pci'"); + goto error; + } + + if (ioctl (fd, PCIOCGETCONF, &pci) == -1) + { + err = clib_error_return_unix (0, "reading PCIOCGETCONF"); + goto error; + } + + di = clib_mem_alloc (sizeof (vlib_pci_device_info_t)); + clib_memset (di, 0, sizeof (vlib_pci_device_info_t)); + + di->addr.as_u32 = addr->as_u32; + di->numa_node = 0; /* TODO: Place holder until we have NUMA on FreeBSD */ + + di->device_class = match.pc_class; + di->vendor_id = match.pc_vendor; + di->device_id = match.pc_device; + di->revision = match.pc_revid; + + di->product_name = NULL; + di->vpd_r = 0; + di->vpd_w = 0; + di->driver_name = format (0, "%s", &match.pd_name); + di->iommu_group = -1; + + goto done; + +error: + vlib_pci_free_device_info (di); + di = NULL; +done: + if (error) + *error = err; + close (fd); + return di; +} + +clib_error_t *__attribute__ ((weak)) +vlib_pci_get_device_root_bus (vlib_pci_addr_t *addr, vlib_pci_addr_t *root_bus) +{ + return NULL; +} + +clib_error_t * +vlib_pci_bind_to_uio (vlib_main_t *vm, vlib_pci_addr_t *addr, + char *uio_drv_name, int force) +{ + clib_error_t *error = 0; + + if (error) + { + return error; + } + + if (strncmp ("auto", uio_drv_name, 5) == 0) + { + /* TODO: We should confirm that nic_uio is loaded and return an error. */ + uio_drv_name = "nic_uio"; + } + return error; +} + +clib_error_t * +vlib_pci_register_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h, + pci_intx_handler_function_t *intx_handler) +{ + return NULL; +} + +clib_error_t * +vlib_pci_unregister_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + return NULL; +} + +clib_error_t * +vlib_pci_register_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h, + u32 start, u32 count, + pci_msix_handler_function_t *msix_handler) +{ + return NULL; +} + +clib_error_t * +vlib_pci_unregister_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h, + u32 start, u32 count) +{ + return NULL; +} + +clib_error_t * +vlib_pci_enable_msix_irq (vlib_main_t *vm, vlib_pci_dev_handle_t h, u16 start, + u16 count) +{ + return NULL; +} + +uword +vlib_pci_get_msix_file_index (vlib_main_t *vm, vlib_pci_dev_handle_t h, + u16 index) +{ + return 0; +} + +clib_error_t * +vlib_pci_disable_msix_irq (vlib_main_t *vm, vlib_pci_dev_handle_t h, u16 start, + u16 count) +{ + return NULL; +} + +/* Configuration space read/write. */ +clib_error_t * +vlib_pci_read_write_config (vlib_main_t *vm, vlib_pci_dev_handle_t h, + vlib_read_or_write_t read_or_write, uword address, + void *data, u32 n_bytes) +{ + return NULL; +} + +clib_error_t * +vlib_pci_map_region (vlib_main_t *vm, vlib_pci_dev_handle_t h, u32 resource, + void **result) +{ + return NULL; +} + +clib_error_t * +vlib_pci_map_region_fixed (vlib_main_t *vm, vlib_pci_dev_handle_t h, + u32 resource, u8 *addr, void **result) +{ + return NULL; +} + +clib_error_t * +vlib_pci_io_region (vlib_main_t *vm, vlib_pci_dev_handle_t h, u32 resource) +{ + return NULL; +} + +clib_error_t * +vlib_pci_read_write_io (vlib_main_t *vm, vlib_pci_dev_handle_t h, + vlib_read_or_write_t read_or_write, uword offset, + void *data, u32 length) +{ + return NULL; +} + +clib_error_t * +vlib_pci_map_dma (vlib_main_t *vm, vlib_pci_dev_handle_t h, void *ptr) +{ + return NULL; +} + +int +vlib_pci_supports_virtual_addr_dma (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + return 0; +} + +clib_error_t * +vlib_pci_device_open (vlib_main_t *vm, vlib_pci_addr_t *addr, + pci_device_id_t ids[], vlib_pci_dev_handle_t *handle) +{ + return NULL; +} + +void +vlib_pci_device_close (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ +} + +void +init_device_from_registered (vlib_main_t *vm, vlib_pci_device_info_t *di) +{ +} + +static int +pci_addr_cmp (void *v1, void *v2) +{ + vlib_pci_addr_t *a1 = v1; + vlib_pci_addr_t *a2 = v2; + + if (a1->domain > a2->domain) + return 1; + if (a1->domain < a2->domain) + return -1; + if (a1->bus > a2->bus) + return 1; + if (a1->bus < a2->bus) + return -1; + if (a1->slot > a2->slot) + return 1; + if (a1->slot < a2->slot) + return -1; + if (a1->function > a2->function) + return 1; + if (a1->function < a2->function) + return -1; + return 0; +} + +vlib_pci_addr_t * +vlib_pci_get_all_dev_addrs () +{ + vlib_pci_addr_t *addrs = 0; + + int fd = -1; + struct pci_conf_io pci; + struct pci_conf matches[32]; + bzero (matches, sizeof (matches)); + + pci.pat_buf_len = 0; + pci.num_patterns = 0; + pci.patterns = NULL; + pci.match_buf_len = sizeof (matches); + pci.num_matches = 32; + pci.matches = (struct pci_conf *) &matches; + pci.offset = 0; + pci.generation = 0; + pci.status = 0; + + fd = open ("/dev/pci", 0); + if (fd == -1) + { + clib_error_return_unix (0, "opening /dev/pci"); + return (NULL); + } + + if (ioctl (fd, PCIOCGETCONF, &pci) == -1) + { + clib_error_return_unix (0, "reading pci config"); + close (fd); + return (NULL); + } + + for (int i = 0; i < pci.num_matches; i++) + { + struct pci_conf *m = &pci.matches[i]; + vlib_pci_addr_t addr; + + addr.domain = m->pc_sel.pc_domain; + addr.bus = m->pc_sel.pc_bus; + addr.slot = m->pc_sel.pc_dev; + addr.function = m->pc_sel.pc_func; + + vec_add1 (addrs, addr); + } + + vec_sort_with_function (addrs, pci_addr_cmp); + close (fd); + + return addrs; +} + +clib_error_t * +freebsd_pci_init (vlib_main_t *vm) +{ + vlib_pci_main_t *pm = &pci_main; + vlib_pci_addr_t *addr = 0, *addrs; + + pm->vlib_main = vm; + + ASSERT (sizeof (vlib_pci_addr_t) == sizeof (u32)); + + addrs = vlib_pci_get_all_dev_addrs (); + vec_foreach (addr, addrs) + { + vlib_pci_device_info_t *d; + if ((d = vlib_pci_get_device_info (vm, addr, 0))) + { + init_device_from_registered (vm, d); + vlib_pci_free_device_info (d); + } + } + + return 0; +} + +VLIB_INIT_FUNCTION (freebsd_pci_init) = { + .runs_after = VLIB_INITS ("unix_input_init"), +}; diff --git a/src/vlib/handoff_trace.c b/src/vlib/handoff_trace.c index 964c095b9f6..1370ba2c95a 100644 --- a/src/vlib/handoff_trace.c +++ b/src/vlib/handoff_trace.c @@ -75,7 +75,6 @@ typedef enum HANDOFF_TRACE_N_NEXT, } tplaceholder_next_t; -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (handoff_trace_node, static) = { .name = "handoff_trace", @@ -94,7 +93,6 @@ VLIB_REGISTER_NODE (handoff_trace_node, static) = .n_errors = ARRAY_LEN(handoff_trace_error_strings), .error_strings = handoff_trace_error_strings, }; -/* *INDENT-ON* */ int vlib_add_handoff_trace (vlib_main_t * vm, vlib_buffer_t * b) @@ -117,7 +115,6 @@ vlib_add_handoff_trace (vlib_main_t * vm, vlib_buffer_t * b) } -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vlib/init.c b/src/vlib/init.c index 4a56ab20b4a..55f260b74ed 100644 --- a/src/vlib/init.c +++ b/src/vlib/init.c @@ -270,12 +270,10 @@ again: } /* Finally, clean up all the fine data we allocated */ - /* *INDENT-OFF* */ hash_foreach_pair (hp, index_by_name, ({ vec_add1 (keys_to_delete, (u8 *)hp->key); })); - /* *INDENT-ON* */ hash_free (index_by_name); for (i = 0; i < vec_len (keys_to_delete); i++) vec_free (keys_to_delete[i]); @@ -657,12 +655,10 @@ show_init_function_command_fn (vlib_main_t * vm, } } } - /* *INDENT-OFF* */ hash_foreach_pair (hp, index_by_name, ({ vec_add1 (keys_to_delete, (u8 *)hp->key); })); - /* *INDENT-ON* */ hash_free (index_by_name); for (i = 0; i < vec_len (keys_to_delete); i++) vec_free (keys_to_delete[i]); @@ -679,13 +675,11 @@ show_init_function_command_fn (vlib_main_t * vm, * @cliexstart{show init-function [init | enter | exit] [verbose [nn]]} * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_init_function, static) = { .path = "show init-function", .short_help = "show init-function [init | enter | exit][verbose [nn]]", .function = show_init_function_command_fn, }; -/* *INDENT-ON* */ /* diff --git a/src/vlib/init.h b/src/vlib/init.h index e6235652ad1..364989eafe0 100644 --- a/src/vlib/init.h +++ b/src/vlib/init.h @@ -171,6 +171,8 @@ static __clib_unused void * __clib_unused_##tag##_##x = x #define VLIB_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,init) #define VLIB_WORKER_INIT_FUNCTION(x) VLIB_DECLARE_INIT_FUNCTION(x,worker_init) +#define VLIB_NUM_WORKERS_CHANGE_FN(x) \ + VLIB_DECLARE_INIT_FUNCTION (x, num_workers_change) #define VLIB_MAIN_LOOP_ENTER_FUNCTION(x) \ VLIB_DECLARE_INIT_FUNCTION(x,main_loop_enter) diff --git a/src/vlib/lex.c b/src/vlib/lex.c index 1cc8f1678d2..7facba5fe25 100644 --- a/src/vlib/lex.c +++ b/src/vlib/lex.c @@ -113,7 +113,7 @@ vlib_lex_get_token (vlib_lex_main_t * lm, vlib_lex_token_t * rv) lm->lex_token_names[VLIB_LEX_word], rv->value.as_pointer); } - _vec_len (lm->token_buffer) = 0; + vec_set_len (lm->token_buffer, 0); /* Rescan the character which terminated the keyword/word. */ lm->current_index--; @@ -233,7 +233,7 @@ void vlib_lex_reset (vlib_lex_main_t * lm, u8 * input_vector) { if (lm->pushback_vector) - _vec_len (lm->pushback_vector) = 0; + vec_set_len (lm->pushback_vector, 0); lm->pushback_sp = -1; lm->input_vector = input_vector; @@ -255,7 +255,7 @@ lex_onetime_init (vlib_main_t * vm) #undef _ vec_validate (lm->token_buffer, 127); - _vec_len (lm->token_buffer) = 0; + vec_set_len (lm->token_buffer, 0); return 0; } diff --git a/src/vlib/linux/pci.c b/src/vlib/linux/pci.c index 7c18505bbfc..29ca3d97523 100644 --- a/src/vlib/linux/pci.c +++ b/src/vlib/linux/pci.c @@ -38,6 +38,8 @@ */ #include <vppinfra/linux/sysfs.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/unix.h> #include <vlib/vlib.h> #include <vlib/pci/pci.h> @@ -53,19 +55,26 @@ #include <linux/ethtool.h> #include <linux/sockios.h> #include <linux/vfio.h> +#include <limits.h> #include <sys/eventfd.h> +#define SYSFS_DEVICES_PCI "/sys/devices/pci" static const char *sysfs_pci_dev_path = "/sys/bus/pci/devices"; static const char *sysfs_pci_drv_path = "/sys/bus/pci/drivers"; static char *sysfs_mod_vfio_noiommu = "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode"; -#define pci_log_debug(vm, dev, f, ...) \ - vlib_log(VLIB_LOG_LEVEL_DEBUG, pci_main.log_default, "%U: " f, \ - format_vlib_pci_addr, vlib_pci_get_addr(vm, dev->handle), ## __VA_ARGS__) -#define pci_log_err(vm, dev, f, ...) \ - vlib_log(VLIB_LOG_LEVEL_ERR, pci_main.log_default, "%U: " f, \ - format_vlib_pci_addr, vlib_pci_get_addr(vm, dev->handle), ## __VA_ARGS__) +VLIB_REGISTER_LOG_CLASS (pci_log, static) = { + .class_name = "pci", + .subclass_name = "linux", +}; + +#define log_debug(p, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_DEBUG, pci_log.class, "%U: " f, \ + format_vlib_pci_log, p->handle, ##__VA_ARGS__) +#define log_err(p, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_ERR, pci_log.class, "%U: " f, format_vlib_pci_log, \ + p->handle, ##__VA_ARGS__) typedef struct { @@ -232,32 +241,14 @@ vlib_pci_get_device_info (vlib_main_t * vm, vlib_pci_addr_t * addr, /* You can only read more that 64 bytes of config space as root; so we try to read the full space but fall back to just the first 64 bytes. */ - if (read (fd, &di->config_data, sizeof (di->config_data)) < - sizeof (di->config0)) + if (read (fd, &di->config, sizeof (di->config)) < + sizeof (vlib_pci_config_hdr_t)) { err = clib_error_return_unix (0, "read `%s'", f); close (fd); goto error; } - { - static pci_config_header_t all_ones; - if (all_ones.vendor_id == 0) - clib_memset (&all_ones, ~0, sizeof (all_ones)); - - if (!memcmp (&di->config0.header, &all_ones, sizeof (all_ones))) - { - err = clib_error_return (0, "invalid PCI config for `%s'", f); - close (fd); - goto error; - } - } - - if (di->config0.header.header_type == 0) - pci_config_type0_little_to_host (&di->config0); - else - pci_config_type1_little_to_host (&di->config1); - di->numa_node = -1; vec_reset_length (f); f = format (f, "%v/numa_node%c", dev_dir_name, 0); @@ -269,11 +260,7 @@ vlib_pci_get_device_info (vlib_main_t * vm, vlib_pci_addr_t * addr, } if (di->numa_node == -1) { - /* if '/sys/bus/pci/devices/<device id>/numa_node' returns -1 and - it is a SMP system, set numa_node to 0. */ - if ((err = clib_sysfs_read ("/sys/devices/system/node/online", "%U", - unformat_bitmap_list, &bmp))) - clib_error_free (err); + bmp = os_get_online_cpu_node_bitmap (); if (clib_bitmap_count_set_bits (bmp) == 1) di->numa_node = 0; } @@ -300,15 +287,19 @@ vlib_pci_get_device_info (vlib_main_t * vm, vlib_pci_addr_t * addr, di->device_id = tmp; vec_reset_length (f); - f = format (f, "%v/driver%c", dev_dir_name, 0); - di->driver_name = clib_sysfs_link_to_name ((char *) f); + f = format (f, "%v/revision%c", dev_dir_name, 0); + err = clib_sysfs_read ((char *) f, "0x%x", &tmp); + if (err) + goto error; + di->revision = tmp; + + di->driver_name = + clib_file_get_resolved_basename ("%v/driver", dev_dir_name); if (!di->driver_name) di->driver_name = format (0, "<NONE>%c", 0); di->iommu_group = -1; - vec_reset_length (f); - f = format (f, "%v/iommu_group%c", dev_dir_name, 0); - tmpstr = clib_sysfs_link_to_name ((char *) f); + tmpstr = clib_file_get_resolved_basename ("%v/iommu_group", dev_dir_name); if (tmpstr) { di->iommu_group = atoi ((char *) tmpstr); @@ -347,7 +338,7 @@ vlib_pci_get_device_info (vlib_main_t * vm, vlib_pci_addr_t * addr, break; len = (tag[2] << 8) | tag[1]; - vec_validate (data, len); + vec_validate (data, len - 1); if (read (fd, data, len) != len) { @@ -383,6 +374,64 @@ done: return di; } +clib_error_t *__attribute__ ((weak)) +vlib_pci_get_device_root_bus (vlib_pci_addr_t *addr, vlib_pci_addr_t *root_bus) +{ + u8 *rel_path = 0, *abs_path = 0, *link_path = 0; + unformat_input_t input; + int fd = open (sysfs_pci_dev_path, O_RDONLY); + ssize_t size = 0; + u32 domain = 0, bus; + clib_error_t *err = NULL; + + if (fd < 0) + return clib_error_return_unix (0, "failed to open %s", sysfs_pci_dev_path); + + vec_alloc (rel_path, PATH_MAX); + vec_alloc (abs_path, PATH_MAX); + + link_path = + format (0, "%s/%U", sysfs_pci_dev_path, format_vlib_pci_addr, addr); + size = readlinkat (fd, (char *) link_path, (char *) rel_path, PATH_MAX); + if (size < 0) + { + err = clib_error_return_unix (0, "failed to read %s", rel_path); + goto done; + } + + rel_path[size] = '\0'; + vec_free (link_path); + + link_path = format (0, "%s/%s", sysfs_pci_dev_path, rel_path); + if (!realpath ((char *) link_path, (char *) abs_path)) + { + err = clib_error_return_unix (0, "failed to resolve %s", link_path); + goto done; + } + + unformat_init_string (&input, (char *) abs_path, + clib_strnlen ((char *) abs_path, PATH_MAX)); + + if (!unformat (&input, SYSFS_DEVICES_PCI "%x:%x/%s", &domain, &bus, + link_path)) + { + err = clib_error_return (0, "unknown input '%U'", format_unformat_error, + input); + goto done; + } + + root_bus->domain = domain; + root_bus->bus = bus; + +done: + vec_free (abs_path); + vec_free (link_path); + vec_free (rel_path); + close (fd); + + return err; +} + static int directory_exists (char *path) { @@ -394,8 +443,8 @@ directory_exists (char *path) } clib_error_t * -vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr, - char *uio_drv_name) +vlib_pci_bind_to_uio (vlib_main_t *vm, vlib_pci_addr_t *addr, + char *uio_drv_name, int force) { clib_error_t *error = 0; u8 *s = 0, *driver_name = 0; @@ -427,7 +476,7 @@ vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr, "is bound to IOMMU group and " "vfio-pci driver is not loaded", format_vlib_pci_addr, addr); - goto done; + goto err0; } else uio_drv_name = "vfio-pci"; @@ -448,92 +497,94 @@ vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr, error = clib_error_return (0, "Skipping PCI device %U: missing " "kernel VFIO or UIO driver", format_vlib_pci_addr, addr); - goto done; + goto err0; } clib_error_free (error); } } - s = format (s, "%v/driver%c", dev_dir_name, 0); - driver_name = clib_sysfs_link_to_name ((char *) s); - vec_reset_length (s); + driver_name = clib_file_get_resolved_basename ("%v/driver", dev_dir_name); if (driver_name && ((strcmp ("vfio-pci", (char *) driver_name) == 0) || (strcmp ("uio_pci_generic", (char *) driver_name) == 0) || (strcmp ("igb_uio", (char *) driver_name) == 0))) - goto done; - - /* walk trough all linux interfaces and if interface belonging to - this device is founf check if interface is admin up */ - dir = opendir ("/sys/class/net"); - s = format (s, "%U%c", format_vlib_pci_addr, addr, 0); + goto err0; - if (!dir) + if (!force) { - error = clib_error_return (0, "Skipping PCI device %U: failed to " - "read /sys/class/net", - format_vlib_pci_addr, addr); - goto done; - } + /* walk trough all linux interfaces and if interface belonging to + this device is found check if interface is admin up */ + dir = opendir ("/sys/class/net"); + s = format (s, "%U%c", format_vlib_pci_addr, addr, 0); - fd = socket (PF_INET, SOCK_DGRAM, 0); - if (fd < 0) - { - error = clib_error_return_unix (0, "socket"); - goto done; - } + if (!dir) + { + error = clib_error_return (0, + "Skipping PCI device %U: failed to " + "read /sys/class/net", + format_vlib_pci_addr, addr); + goto err0; + } - while ((e = readdir (dir))) - { - struct ifreq ifr; - struct ethtool_drvinfo drvinfo; + fd = socket (PF_INET, SOCK_DGRAM, 0); + if (fd < 0) + { + error = clib_error_return_unix (0, "socket"); + goto err1; + } - if (e->d_name[0] == '.') /* skip . and .. */ - continue; + while ((e = readdir (dir))) + { + struct ifreq ifr; + struct ethtool_drvinfo drvinfo; - clib_memset (&ifr, 0, sizeof ifr); - clib_memset (&drvinfo, 0, sizeof drvinfo); - ifr.ifr_data = (char *) &drvinfo; - clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1); + if (e->d_name[0] == '.') /* skip . and .. */ + continue; - drvinfo.cmd = ETHTOOL_GDRVINFO; - if (ioctl (fd, SIOCETHTOOL, &ifr) < 0) - { - /* Some interfaces (eg "lo") don't support this ioctl */ - if ((errno != ENOTSUP) && (errno != ENODEV)) - clib_unix_warning ("ioctl fetch intf %s bus info error", - e->d_name); - continue; - } + clib_memset (&ifr, 0, sizeof ifr); + clib_memset (&drvinfo, 0, sizeof drvinfo); + ifr.ifr_data = (char *) &drvinfo; + clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1); - if (strcmp ((char *) s, drvinfo.bus_info)) - continue; + drvinfo.cmd = ETHTOOL_GDRVINFO; + if (ioctl (fd, SIOCETHTOOL, &ifr) < 0) + { + /* Some interfaces (eg "lo") don't support this ioctl */ + if ((errno != ENOTSUP) && (errno != ENODEV)) + clib_unix_warning ("ioctl fetch intf %s bus info error", + e->d_name); + continue; + } - clib_memset (&ifr, 0, sizeof (ifr)); - clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1); + if (strcmp ((char *) s, drvinfo.bus_info)) + continue; - if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0) - { - error = clib_error_return_unix (0, "ioctl fetch intf %s flags", - e->d_name); - close (fd); - goto done; - } + clib_memset (&ifr, 0, sizeof (ifr)); + clib_strncpy (ifr.ifr_name, e->d_name, sizeof (ifr.ifr_name) - 1); - if (ifr.ifr_flags & IFF_UP) - { - vlib_log (VLIB_LOG_LEVEL_WARNING, pci_main.log_default, - "Skipping PCI device %U as host " - "interface %s is up", format_vlib_pci_addr, addr, - e->d_name); - close (fd); - goto done; + if (ioctl (fd, SIOCGIFFLAGS, &ifr) < 0) + { + error = clib_error_return_unix (0, "ioctl fetch intf %s flags", + e->d_name); + close (fd); + goto err1; + } + + if (ifr.ifr_flags & IFF_UP) + { + vlib_log (VLIB_LOG_LEVEL_WARNING, pci_main.log_default, + "Skipping PCI device %U as host " + "interface %s is up", + format_vlib_pci_addr, addr, e->d_name); + close (fd); + goto err1; + } } - } - close (fd); - vec_reset_length (s); + close (fd); + vec_reset_length (s); + } s = format (s, "%v/driver/unbind%c", dev_dir_name, 0); clib_sysfs_write ((char *) s, "%U", format_vlib_pci_addr, addr); @@ -565,8 +616,9 @@ vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr, vec_reset_length (s); } -done: +err1: closedir (dir); +err0: vec_free (s); vec_free (dev_dir_name); vec_free (driver_name); @@ -595,34 +647,14 @@ vfio_set_irqs (vlib_main_t * vm, linux_pci_device_t * p, u32 index, u32 start, { int data_len = efds ? count * sizeof (int) : 0; u8 buf[sizeof (struct vfio_irq_set) + data_len]; - struct vfio_irq_info ii = { 0 }; struct vfio_irq_set *irq_set = (struct vfio_irq_set *) buf; - - ii.argsz = sizeof (struct vfio_irq_info); - ii.index = index; - - if (ioctl (p->fd, VFIO_DEVICE_GET_IRQ_INFO, &ii) < 0) - return clib_error_return_unix (0, "ioctl(VFIO_DEVICE_GET_IRQ_INFO) " - "'%U'", format_vlib_pci_addr, &p->addr); - - pci_log_debug (vm, p, "%s index:%u count:%u flags: %s%s%s%s(0x%x)", - __func__, ii.index, ii.count, - ii.flags & VFIO_IRQ_INFO_EVENTFD ? "eventfd " : "", - ii.flags & VFIO_IRQ_INFO_MASKABLE ? "maskable " : "", - ii.flags & VFIO_IRQ_INFO_AUTOMASKED ? "automasked " : "", - ii.flags & VFIO_IRQ_INFO_NORESIZE ? "noresize " : "", - ii.flags); - - if (ii.count < start + count) - return clib_error_return_unix (0, "vfio_set_irq: unexistng interrupt on " - "'%U'", format_vlib_pci_addr, &p->addr); - - if (efds) { + int *data = (int *) irq_set->data; flags |= VFIO_IRQ_SET_DATA_EVENTFD; - clib_memcpy_fast (&irq_set->data, efds, data_len); + for (u32 i = 0; i < count; i++) + data[i] = efds[i]; } else flags |= VFIO_IRQ_SET_DATA_NONE; @@ -637,11 +669,11 @@ vfio_set_irqs (vlib_main_t * vm, linux_pci_device_t * p, u32 index, u32 start, irq_set->flags = flags; if (ioctl (p->fd, VFIO_DEVICE_SET_IRQS, irq_set) < 0) - return clib_error_return_unix (0, "%U:ioctl(VFIO_DEVICE_SET_IRQS) " - "[index = %u, start = %u, count = %u, " - "flags = 0x%x]", + return clib_error_return_unix (0, "%U:ioctl(VFIO_DEVICE_SET_IRQS)\n%U", format_vlib_pci_addr, &p->addr, - index, start, count, flags); + format_vfio_irq_set, irq_set); + + log_debug (p, "%s:\n%U", __func__, format_vfio_irq_set, irq_set); return 0; } @@ -800,13 +832,12 @@ vlib_pci_register_intx_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, if (ioctl (p->fd, VFIO_DEVICE_GET_IRQ_INFO, &ii) < 0) return clib_error_return_unix (0, "ioctl(VFIO_DEVICE_GET_IRQ_INFO) '" "%U'", format_vlib_pci_addr, &p->addr); - pci_log_debug (vm, p, "%s index:%u count:%u flags: %s%s%s%s(0x%x)", - __func__, ii.index, ii.count, - ii.flags & VFIO_IRQ_INFO_EVENTFD ? "eventfd " : "", - ii.flags & VFIO_IRQ_INFO_MASKABLE ? "maskable " : "", - ii.flags & VFIO_IRQ_INFO_AUTOMASKED ? "automasked " : "", - ii.flags & VFIO_IRQ_INFO_NORESIZE ? "noresize " : "", - ii.flags); + log_debug ( + p, "%s index:%u count:%u flags: %s%s%s%s(0x%x)", __func__, ii.index, + ii.count, ii.flags & VFIO_IRQ_INFO_EVENTFD ? "eventfd " : "", + ii.flags & VFIO_IRQ_INFO_MASKABLE ? "maskable " : "", + ii.flags & VFIO_IRQ_INFO_AUTOMASKED ? "automasked " : "", + ii.flags & VFIO_IRQ_INFO_NORESIZE ? "noresize " : "", ii.flags); if (ii.count != 1) return clib_error_return (0, "INTx interrupt does not exist on device" "'%U'", format_vlib_pci_addr, &p->addr); @@ -835,6 +866,27 @@ vlib_pci_register_intx_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, } clib_error_t * +vlib_pci_unregister_intx_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + linux_pci_device_t *p = linux_pci_get_device (h); + linux_pci_irq_t *irq = &p->intx_irq; + + if (irq->intx_handler == 0) + return 0; + + clib_file_del_by_index (&file_main, irq->clib_file_index); + if (p->type == LINUX_PCI_DEVICE_TYPE_VFIO) + { + close (irq->fd); + irq->fd = -1; + } + + irq->intx_handler = 0; + + return 0; +} + +clib_error_t * vlib_pci_register_msix_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u32 start, u32 count, pci_msix_handler_function_t * msix_handler) @@ -847,10 +899,8 @@ vlib_pci_register_msix_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, return clib_error_return (0, "vfio driver is needed for MSI-X interrupt " "support"); - /* *INDENT-OFF* */ vec_validate_init_empty (p->msix_irqs, start + count - 1, (linux_pci_irq_t) { .fd = -1}); - /* *INDENT-ON* */ for (i = start; i < start + count; i++) { @@ -892,6 +942,33 @@ error: } clib_error_t * +vlib_pci_unregister_msix_handler (vlib_main_t *vm, vlib_pci_dev_handle_t h, + u32 start, u32 count) +{ + clib_error_t *err = 0; + linux_pci_device_t *p = linux_pci_get_device (h); + u32 i; + + if (p->type != LINUX_PCI_DEVICE_TYPE_VFIO) + return clib_error_return (0, "vfio driver is needed for MSI-X interrupt " + "support"); + + for (i = start; i < start + count; i++) + { + linux_pci_irq_t *irq = vec_elt_at_index (p->msix_irqs, i); + + if (irq->fd != -1) + { + clib_file_del_by_index (&file_main, irq->clib_file_index); + close (irq->fd); + irq->fd = -1; + } + } + + return err; +} + +clib_error_t * vlib_pci_enable_msix_irq (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 start, u16 count) { @@ -903,9 +980,9 @@ vlib_pci_enable_msix_irq (vlib_main_t * vm, vlib_pci_dev_handle_t h, return clib_error_return (0, "vfio driver is needed for MSI-X interrupt " "support"); - for (i = start; i < start + count; i++) + for (i = 0; i < count; i++) { - linux_pci_irq_t *irq = vec_elt_at_index (p->msix_irqs, i); + linux_pci_irq_t *irq = vec_elt_at_index (p->msix_irqs, start + i); fds[i] = irq->fd; } @@ -978,7 +1055,7 @@ add_device_vfio (vlib_main_t * vm, linux_pci_device_t * p, goto error; } - pci_log_debug (vm, p, "%s %U", __func__, format_vfio_region_info, ®); + log_debug (p, "%s %U", __func__, format_vfio_region_info, ®); p->config_offset = reg.offset; p->config_fd = p->fd; @@ -1001,7 +1078,6 @@ add_device_vfio (vlib_main_t * vm, linux_pci_device_t * p, if (p->supports_va_dma) { vlib_buffer_pool_t *bp; - /* *INDENT-OFF* */ vec_foreach (bp, vm->buffer_main->buffer_pools) { u32 i; @@ -1010,7 +1086,6 @@ add_device_vfio (vlib_main_t * vm, linux_pci_device_t * p, for (i = 0; i < pm->n_pages; i++) vfio_map_physmem_page (vm, pm->base + (i << pm->log2_page_size)); } - /* *INDENT-ON* */ } if (r && r->init_function) @@ -1113,7 +1188,7 @@ vlib_pci_region (vlib_main_t * vm, vlib_pci_dev_handle_t h, u32 bar, int *fd, _fd = p->fd; _size = r->size; _offset = r->offset; - pci_log_debug (vm, p, "%s %U", __func__, format_vfio_region_info, r); + log_debug (p, "%s %U", __func__, format_vfio_region_info, r); clib_mem_free (r); } else @@ -1133,29 +1208,25 @@ vlib_pci_map_region_int (vlib_main_t * vm, vlib_pci_dev_handle_t h, linux_pci_device_t *p = linux_pci_get_device (h); int fd = -1; clib_error_t *error; - int flags = MAP_SHARED; u64 size = 0, offset = 0; - u16 command; + vlib_pci_config_reg_command_t command; - pci_log_debug (vm, p, "map region %u to va %p", bar, addr); + log_debug (p, "map region %u to va %p", bar, addr); - if ((error = vlib_pci_read_config_u16 (vm, h, 4, &command))) + if ((error = vlib_pci_read_config_u16 (vm, h, 4, &command.as_u16))) return error; - if (!(command & PCI_COMMAND_MEMORY)) + if (!(command.mem_space)) { - pci_log_debug (vm, p, "setting memory enable bit"); - command |= PCI_COMMAND_MEMORY; - if ((error = vlib_pci_write_config_u16 (vm, h, 4, &command))) + log_debug (p, "setting memory enable bit"); + command.mem_space = 1; + if ((error = vlib_pci_write_config_u16 (vm, h, 4, &command.as_u16))) return error; } if ((error = vlib_pci_region (vm, h, bar, &fd, &size, &offset))) return error; - if (p->type == LINUX_PCI_DEVICE_TYPE_UIO && addr != 0) - flags |= MAP_FIXED; - *result = clib_mem_vm_map_shared (addr, size, fd, offset, "PCIe %U region %u", format_vlib_pci_addr, vlib_pci_get_addr (vm, h), bar); @@ -1167,10 +1238,8 @@ vlib_pci_map_region_int (vlib_main_t * vm, vlib_pci_dev_handle_t h, return error; } - /* *INDENT-OFF* */ vec_validate_init_empty (p->regions, bar, (linux_pci_region_t) { .fd = -1}); - /* *INDENT-ON* */ if (p->type == LINUX_PCI_DEVICE_TYPE_UIO) p->regions[bar].fd = fd; p->regions[bar].addr = *result; @@ -1261,12 +1330,19 @@ vlib_pci_device_open (vlib_main_t * vm, vlib_pci_addr_t * addr, if (err) return err; - for (i = ids; i->vendor_id != 0; i++) - if (i->vendor_id == di->vendor_id && i->device_id == di->device_id) - break; - if (i->vendor_id == 0) - return clib_error_return (0, "Wrong vendor or device id"); + if (ids) + { + for (i = ids; i->vendor_id != 0; i++) + if (i->vendor_id == di->vendor_id && i->device_id == di->device_id) + break; + + if (i->vendor_id == 0) + { + vlib_pci_free_device_info (di); + return clib_error_return (0, "Wrong vendor or device id"); + } + } pool_get (lpm->linux_pci_devices, p); p->handle = p - lpm->linux_pci_devices; @@ -1279,9 +1355,8 @@ vlib_pci_device_open (vlib_main_t * vm, vlib_pci_addr_t * addr, */ p->io_fd = -1; - pci_log_debug (vm, p, "open vid:0x%04x did:0x%04x driver:%s iommu_group:%d", - di->vendor_id, di->device_id, di->driver_name, - di->iommu_group); + log_debug (p, "open vid:0x%04x did:0x%04x driver:%s iommu_group:%d", + di->vendor_id, di->device_id, di->driver_name, di->iommu_group); if (clib_strncmp ("vfio-pci", (char *) di->driver_name, 8) == 0) err = add_device_vfio (vm, p, di, 0); @@ -1299,7 +1374,7 @@ error: vlib_pci_free_device_info (di); if (err) { - pci_log_err (vm, p, "%U", format_clib_error, err); + log_err (p, "%U", format_clib_error, err); clib_memset (p, 0, sizeof (linux_pci_device_t)); pool_put (lpm->linux_pci_devices, p); } @@ -1345,7 +1420,6 @@ vlib_pci_device_close (vlib_main_t * vm, vlib_pci_dev_handle_t h) err = vfio_set_irqs (vm, p, VFIO_PCI_MSIX_IRQ_INDEX, 0, 0, VFIO_IRQ_SET_ACTION_TRIGGER, 0); clib_error_free (err); - /* *INDENT-OFF* */ vec_foreach (irq, p->msix_irqs) { if (irq->fd == -1) @@ -1353,12 +1427,10 @@ vlib_pci_device_close (vlib_main_t * vm, vlib_pci_dev_handle_t h) clib_file_del_by_index (&file_main, irq->clib_file_index); close (irq->fd); } - /* *INDENT-ON* */ vec_free (p->msix_irqs); } } - /* *INDENT-OFF* */ vec_foreach (res, p->regions) { if (res->size == 0) @@ -1367,7 +1439,6 @@ vlib_pci_device_close (vlib_main_t * vm, vlib_pci_dev_handle_t h) if (res->fd != -1) close (res->fd); } - /* *INDENT-ON* */ vec_free (p->regions); close (p->fd); @@ -1490,28 +1561,27 @@ linux_pci_init (vlib_main_t * vm) ASSERT (sizeof (vlib_pci_addr_t) == sizeof (u32)); - addrs = vlib_pci_get_all_dev_addrs (); - /* *INDENT-OFF* */ - vec_foreach (addr, addrs) + if (pm->pci_device_registrations) { - vlib_pci_device_info_t *d; - if ((d = vlib_pci_get_device_info (vm, addr, 0))) + addrs = vlib_pci_get_all_dev_addrs (); + vec_foreach (addr, addrs) { - init_device_from_registered (vm, d); - vlib_pci_free_device_info (d); + vlib_pci_device_info_t *d; + if ((d = vlib_pci_get_device_info (vm, addr, 0))) + { + init_device_from_registered (vm, d); + vlib_pci_free_device_info (d); + } } } - /* *INDENT-ON* */ return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (linux_pci_init) = { .runs_after = VLIB_INITS("unix_input_init"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vlib/linux/vfio.c b/src/vlib/linux/vfio.c index dc68c52db02..1462cc6c7ca 100644 --- a/src/vlib/linux/vfio.c +++ b/src/vlib/linux/vfio.c @@ -185,10 +185,8 @@ linux_vfio_group_get_device_fd (vlib_pci_addr_t * addr, int *fdp, int fd; *is_noiommu = 0; - s = - format (s, "/sys/bus/pci/devices/%U/iommu_group%c", format_vlib_pci_addr, - addr, 0); - tmpstr = clib_sysfs_link_to_name ((char *) s); + tmpstr = clib_file_get_resolved_basename ( + "/sys/bus/pci/devices/%U/iommu_group", format_vlib_pci_addr, addr); if (tmpstr) { iommu_group = atoi ((char *) tmpstr); @@ -303,10 +301,44 @@ format_vfio_region_info (u8 * s, va_list * args) return s; } -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ +u8 * +format_vfio_irq_set (u8 *s, va_list *args) +{ + struct vfio_irq_set *is = va_arg (*args, struct vfio_irq_set *); + u32 indent = format_get_indent (s); + + s = format (s, "index:%u start:%u count:%u flags: 0x%x", is->index, + is->start, is->count, is->flags); + + s = format (s, " (data:"); + if (is->flags & VFIO_IRQ_SET_DATA_NONE) + s = format (s, " none"); + if (is->flags & VFIO_IRQ_SET_DATA_BOOL) + s = format (s, " bool"); + if (is->flags & VFIO_IRQ_SET_DATA_EVENTFD) + s = format (s, " eventfd"); + + s = format (s, ", action:"); + if (is->flags & VFIO_IRQ_SET_ACTION_MASK) + s = format (s, " mask"); + if (is->flags & VFIO_IRQ_SET_ACTION_UNMASK) + s = format (s, " unmask"); + if (is->flags & VFIO_IRQ_SET_ACTION_TRIGGER) + s = format (s, " trigger"); + vec_add1 (s, ')'); + + if (is->flags & VFIO_IRQ_SET_DATA_EVENTFD) + { + s = format (s, "\n%U eventfd data:", format_white_space, indent); + for (u32 i = 0; i < is->count; i++) + s = format (s, " %d", ((int *) (is->data))[i]); + } + if (is->flags & VFIO_IRQ_SET_DATA_BOOL) + { + s = format (s, "\n%U bool data:", format_white_space, indent); + for (u32 i = 0; i < is->count; i++) + s = format (s, " %u", is->data); + } + + return s; +} diff --git a/src/vlib/linux/vfio.h b/src/vlib/linux/vfio.h index fe4f0f75346..c2bb2e9b667 100644 --- a/src/vlib/linux/vfio.h +++ b/src/vlib/linux/vfio.h @@ -50,7 +50,7 @@ clib_error_t *linux_vfio_group_get_device_fd (vlib_pci_addr_t * addr, int *fd, int *is_noiommu); format_function_t format_vfio_region_info; - +format_function_t format_vfio_irq_set; #endif /* included_vlib_linux_vfio_h */ diff --git a/src/vlib/linux/vmbus.c b/src/vlib/linux/vmbus.c index d50b539910b..9dc9d554ebd 100644 --- a/src/vlib/linux/vmbus.c +++ b/src/vlib/linux/vmbus.c @@ -31,8 +31,6 @@ #include <linux/ethtool.h> #include <linux/sockios.h> -#include <uuid/uuid.h> - static const char sysfs_vmbus_dev_path[] = "/sys/bus/vmbus/devices"; static const char sysfs_vmbus_drv_path[] = "/sys/bus/vmbus/drivers"; static const char sysfs_class_net_path[] = "/sys/class/net"; @@ -123,16 +121,39 @@ unformat_vlib_vmbus_addr (unformat_input_t *input, va_list *args) { vlib_vmbus_addr_t *addr = va_arg (*args, vlib_vmbus_addr_t *); uword ret = 0; - u8 *s; + u8 *s = 0; - if (!unformat (input, "%s", &s)) + if (!unformat (input, "%U", unformat_token, "a-zA-Z0-9-", &s)) return 0; - if (uuid_parse ((char *) s, addr->guid) == 0) - ret = 1; + if (vec_len (s) != 36) + goto fail; - vec_free (s); + if (s[8] != '-' || s[13] != '-' || s[18] != '-' || s[23] != '-') + goto fail; + + clib_memmove (s + 8, s + 9, 4); + clib_memmove (s + 12, s + 14, 4); + clib_memmove (s + 16, s + 19, 4); + clib_memmove (s + 20, s + 24, 12); + + for (int i = 0; i < 32; i++) + if (s[i] >= '0' && s[i] <= '9') + s[i] -= '0'; + else if (s[i] >= 'A' && s[i] <= 'F') + s[i] -= 'A' - 10; + else if (s[i] >= 'a' && s[i] <= 'f') + s[i] -= 'a' - 10; + else + goto fail; + + for (int i = 0; i < 16; i++) + addr->guid[i] = s[2 * i] * 16 + s[2 * i + 1]; + + ret = 1; +fail: + vec_free (s); return ret; } @@ -141,10 +162,24 @@ u8 * format_vlib_vmbus_addr (u8 *s, va_list *va) { vlib_vmbus_addr_t *addr = va_arg (*va, vlib_vmbus_addr_t *); - char tmp[40]; - - uuid_unparse (addr->guid, tmp); - return format (s, "%s", tmp); + u8 *bytes = addr->guid; + + for (int i = 0; i < 4; i++) + s = format (s, "%02x", bytes++[0]); + vec_add1 (s, '-'); + for (int i = 0; i < 2; i++) + s = format (s, "%02x", bytes++[0]); + vec_add1 (s, '-'); + for (int i = 0; i < 2; i++) + s = format (s, "%02x", bytes++[0]); + vec_add1 (s, '-'); + for (int i = 0; i < 2; i++) + s = format (s, "%02x", bytes++[0]); + vec_add1 (s, '-'); + for (int i = 0; i < 6; i++) + s = format (s, "%02x", bytes++[0]); + + return s; } /* workaround for mlx bug, bring lower device up before unbind */ @@ -218,16 +253,14 @@ vlib_vmbus_bind_to_uio (vlib_vmbus_addr_t * addr) static int uio_new_id_needed = 1; struct dirent *e; struct ifreq ifr; - u8 *s, *driver_name; + u8 *s = 0, *driver_name; DIR *dir; int fd; dev_dir_name = format (0, "%s/%U", sysfs_vmbus_dev_path, format_vlib_vmbus_addr, addr); - s = format (0, "%v/driver%c", dev_dir_name, 0); - driver_name = clib_sysfs_link_to_name ((char *) s); - vec_reset_length (s); + driver_name = clib_file_get_resolved_basename ("%v/driver", dev_dir_name); /* skip if not using the Linux kernel netvsc driver */ if (!driver_name || strcmp ("hv_netvsc", (char *) driver_name) != 0) @@ -284,9 +317,9 @@ vlib_vmbus_bind_to_uio (vlib_vmbus_addr_t * addr) if (ifr.ifr_flags & IFF_UP) { - error = clib_error_return (0, - "Skipping VMBUS device %U as host interface %s is up", - format_vlib_vmbus_addr, addr, e->d_name); + error = clib_error_return ( + 0, "Skipping VMBUS device %U as host interface %s is up", + format_vlib_vmbus_addr, addr, ifname); close (fd); goto done; } @@ -383,7 +416,13 @@ vmbus_addr_cmp (void *v1, void *v2) vlib_vmbus_addr_t *a1 = v1; vlib_vmbus_addr_t *a2 = v2; - return uuid_compare (a1->guid, a2->guid); + for (int i = 0; i < ARRAY_LEN (a1->guid); i++) + if (a1->guid[i] > a2->guid[i]) + return 1; + else if (a1->guid[i] < a2->guid[i]) + return -1; + + return 0; } vlib_vmbus_addr_t * @@ -416,12 +455,10 @@ linux_vmbus_init (vlib_main_t * vm) return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (linux_vmbus_init) = { .runs_before = VLIB_INITS("unix_input_init"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vlib/log.c b/src/vlib/log.c index fc67a1f8903..60fb9fb5178 100644 --- a/src/vlib/log.c +++ b/src/vlib/log.c @@ -25,15 +25,13 @@ vlib_log_main_t log_main = { .default_syslog_log_level = VLIB_LOG_LEVEL_WARNING, .unthrottle_time = 3, .size = 512, - .add_to_elog = 1, + .add_to_elog = 0, .default_rate_limit = 50, }; -/* *INDENT-OFF* */ VLIB_REGISTER_LOG_CLASS (log_log, static) = { .class_name = "log", }; -/* *INDENT-ON* */ static const int colors[] = { [VLIB_LOG_LEVEL_EMERG] = 1, /* red */ @@ -70,27 +68,12 @@ last_log_entry () i += lm->size; return i; } - -static vlib_log_class_data_t * -get_class_data (vlib_log_class_t ci) -{ - vlib_log_main_t *lm = &log_main; - return vec_elt_at_index (lm->classes, (ci >> 16)); -} - -static vlib_log_subclass_data_t * -get_subclass_data (vlib_log_class_t ci) -{ - vlib_log_class_data_t *c = get_class_data (ci); - return vec_elt_at_index (c->subclasses, (ci & 0xffff)); -} - u8 * format_vlib_log_class (u8 * s, va_list * args) { vlib_log_class_t ci = va_arg (*args, vlib_log_class_t); - vlib_log_class_data_t *c = get_class_data (ci); - vlib_log_subclass_data_t *sc = get_subclass_data (ci); + vlib_log_class_data_t *c = vlib_log_get_class_data (ci); + vlib_log_subclass_data_t *sc = vlib_log_get_subclass_data (ci); if (sc->name) return format (s, "%v/%v", c->name, sc->name); @@ -105,7 +88,6 @@ format_indent (u8 * s, va_list * args) u32 indent = va_arg (*args, u32); u8 *c; - /* *INDENT-OFF* */ vec_foreach (c, v) { vec_add (s, c, 1); @@ -113,7 +95,6 @@ format_indent (u8 * s, va_list * args) for (u32 i = 0; i < indent; i++) vec_add1 (s, (u8) ' '); } - /* *INDENT-ON* */ return s; } @@ -133,7 +114,7 @@ vlib_log (vlib_log_level_t level, vlib_log_class_t class, char *fmt, ...) vlib_main_t *vm = vlib_get_main (); vlib_log_main_t *lm = &log_main; vlib_log_entry_t *e; - vlib_log_subclass_data_t *sc = get_subclass_data (class); + vlib_log_subclass_data_t *sc = vlib_log_get_subclass_data (class); va_list va; f64 t = vlib_time_now (vm); f64 delta = t - sc->last_event_timestamp; @@ -226,13 +207,13 @@ vlib_log (vlib_log_level_t level, vlib_log_class_t class, char *fmt, ...) if (lm->add_to_elog) { - /* *INDENT-OFF* */ - ELOG_TYPE_DECLARE(ee) = + ELOG_TYPE_DECLARE(ee) = { .format = "log-%s: %s", .format_args = "t4T4", - .n_enum_strings = 9, + .n_enum_strings = VLIB_LOG_N_LEVELS, .enum_strings = { + "unknown", "emerg", "alert", "crit", @@ -244,15 +225,15 @@ vlib_log (vlib_log_level_t level, vlib_log_class_t class, char *fmt, ...) "disabled", }, }; - struct { - u32 log_level; - u32 string_index; - } *ed; - /* *INDENT-ON* */ + struct + { + u32 log_level; + u32 string_index; + } * ed; ed = ELOG_DATA (&vlib_global_main.elog_main, ee); ed->log_level = level; ed->string_index = - elog_string (&vlib_global_main.elog_main, "%v", e->string); + elog_string (&vlib_global_main.elog_main, "%v%c", e->string, 0); } lm->next = (lm->next + 1) % lm->size; @@ -366,8 +347,8 @@ format_vlib_log_level (u8 * s, va_list * args) return format (s, "%s", t); } -static clib_error_t * -vlib_log_init (vlib_main_t * vm) +clib_error_t * +vlib_log_init (vlib_main_t *vm) { vlib_log_main_t *lm = &log_main; vlib_log_class_registration_t *r = lm->registrations; @@ -381,9 +362,10 @@ vlib_log_init (vlib_main_t * vm) { r->class = vlib_log_register_class (r->class_name, r->subclass_name); if (r->default_level) - get_subclass_data (r->class)->level = r->default_level; + vlib_log_get_subclass_data (r->class)->level = r->default_level; if (r->default_syslog_level) - get_subclass_data (r->class)->syslog_level = r->default_syslog_level; + vlib_log_get_subclass_data (r->class)->syslog_level = + r->default_syslog_level; r = r->next; } @@ -396,9 +378,6 @@ vlib_log_init (vlib_main_t * vm) return 0; } -VLIB_INIT_FUNCTION (vlib_log_init); - - static clib_error_t * show_log (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -416,23 +395,20 @@ show_log (vlib_main_t * vm, while (count--) { e = vec_elt_at_index (lm->entries, i); - vlib_cli_output (vm, "%U %-10U %-14U %v", - format_time_float, 0, e->timestamp + time_offset, - format_vlib_log_level, e->level, - format_vlib_log_class, e->class, e->string); + vlib_cli_output (vm, "%U %-10U %-14U %v", format_time_float, NULL, + e->timestamp + time_offset, format_vlib_log_level, + e->level, format_vlib_log_class, e->class, e->string); i = (i + 1) % lm->size; } return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_show_log, static) = { .path = "show logging", .short_help = "show logging", .function = show_log, }; -/* *INDENT-ON* */ static clib_error_t * show_log_config (vlib_main_t * vm, @@ -474,13 +450,11 @@ show_log_config (vlib_main_t * vm, return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_show_log_config, static) = { .path = "show logging configuration", .short_help = "show logging configuration", .function = show_log_config, }; -/* *INDENT-ON* */ static clib_error_t * clear_log (vlib_main_t * vm, @@ -505,13 +479,11 @@ clear_log (vlib_main_t * vm, return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_clear_log, static) = { .path = "clear logging", .short_help = "clear logging", .function = clear_log, }; -/* *INDENT-ON* */ static uword unformat_vlib_log_level (unformat_input_t * input, va_list * args) @@ -639,14 +611,12 @@ set_log_class (vlib_main_t * vm, return rv; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_set_log, static) = { .path = "set logging class", .short_help = "set logging class <class> [rate-limit <int>] " "[level <level>] [syslog-level <level>]", .function = set_log_class, }; -/* *INDENT-ON* */ static clib_error_t * set_log_unth_time (vlib_main_t * vm, @@ -673,13 +643,11 @@ set_log_unth_time (vlib_main_t * vm, return rv; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_set_log_params, static) = { .path = "set logging unthrottle-time", .short_help = "set logging unthrottle-time <int>", .function = set_log_unth_time, }; -/* *INDENT-ON* */ static clib_error_t * set_log_size (vlib_main_t * vm, @@ -709,13 +677,11 @@ set_log_size (vlib_main_t * vm, return rv; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_set_log_size, static) = { .path = "set logging size", .short_help = "set logging size <int>", .function = set_log_size, }; -/* *INDENT-ON* */ static uword unformat_vlib_log_subclass (unformat_input_t * input, va_list * args) @@ -788,13 +754,11 @@ test_log_class_subclass (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_test_log, static) = { .path = "test log", .short_help = "test log <level> <class> <subclass> <message>", .function = test_log_class_subclass, }; -/* *INDENT-ON* */ static clib_error_t * log_config_class (vlib_main_t * vm, char *name, unformat_input_t * input) diff --git a/src/vlib/log.h b/src/vlib/log.h index c3ebb8150ee..45e2b59946c 100644 --- a/src/vlib/log.h +++ b/src/vlib/log.h @@ -117,6 +117,7 @@ typedef struct extern vlib_log_main_t log_main; +clib_error_t *vlib_log_init (struct vlib_main_t *vm); vlib_log_class_t vlib_log_register_class (char *vlass, char *subclass); vlib_log_class_t vlib_log_register_class_rate_limit (char *class, char *subclass, @@ -148,6 +149,34 @@ __vlib_add_log_registration_##x (void) \ } \ __VA_ARGS__ vlib_log_class_registration_t x +static_always_inline vlib_log_class_data_t * +vlib_log_get_class_data (vlib_log_class_t ci) +{ + vlib_log_main_t *lm = &log_main; + return vec_elt_at_index (lm->classes, (ci >> 16)); +} + +static_always_inline vlib_log_subclass_data_t * +vlib_log_get_subclass_data (vlib_log_class_t ci) +{ + vlib_log_class_data_t *c = vlib_log_get_class_data (ci); + return vec_elt_at_index (c->subclasses, (ci & 0xffff)); +} + +static_always_inline int +vlib_log_is_enabled (vlib_log_level_t level, vlib_log_class_t class) +{ + vlib_log_subclass_data_t *sc = vlib_log_get_subclass_data (class); + + if (level <= sc->level && sc->level != VLIB_LOG_LEVEL_DISABLED) + return 1; + + if (level <= sc->syslog_level && sc->syslog_level != VLIB_LOG_LEVEL_DISABLED) + return 1; + + return 0; +} + #endif /* included_vlib_log_h */ /* diff --git a/src/vlib/main.c b/src/vlib/main.c index 2f219955e70..04b58762646 100644 --- a/src/vlib/main.c +++ b/src/vlib/main.c @@ -41,75 +41,17 @@ #include <vppinfra/format.h> #include <vlib/vlib.h> #include <vlib/threads.h> +#include <vlib/stats/stats.h> #include <vppinfra/tw_timer_1t_3w_1024sl_ov.h> #include <vlib/unix/unix.h> -/* Actually allocate a few extra slots of vector data to support - speculative vector enqueues which overflow vector data in next frame. */ -#define VLIB_FRAME_SIZE_ALLOC (VLIB_FRAME_SIZE + 4) - -always_inline u32 -vlib_frame_bytes (u32 n_scalar_bytes, u32 n_vector_bytes) -{ - u32 n_bytes; - - /* Make room for vlib_frame_t plus scalar arguments. */ - n_bytes = vlib_frame_vector_byte_offset (n_scalar_bytes); - - /* Make room for vector arguments. - Allocate a few extra slots of vector data to support - speculative vector enqueues which overflow vector data in next frame. */ -#define VLIB_FRAME_SIZE_EXTRA 4 - n_bytes += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * n_vector_bytes; - - /* Magic number is first 32bit number after vector data. - Used to make sure that vector data is never overrun. */ #define VLIB_FRAME_MAGIC (0xabadc0ed) - n_bytes += sizeof (u32); - - /* Pad to cache line. */ - n_bytes = round_pow2 (n_bytes, CLIB_CACHE_LINE_BYTES); - - return n_bytes; -} always_inline u32 * vlib_frame_find_magic (vlib_frame_t * f, vlib_node_t * node) { - void *p = f; - - p += vlib_frame_vector_byte_offset (node->scalar_size); - - p += (VLIB_FRAME_SIZE + VLIB_FRAME_SIZE_EXTRA) * node->vector_size; - - return p; -} - -static inline vlib_frame_size_t * -get_frame_size_info (vlib_node_main_t * nm, - u32 n_scalar_bytes, u32 n_vector_bytes) -{ -#ifdef VLIB_SUPPORTS_ARBITRARY_SCALAR_SIZES - uword key = (n_scalar_bytes << 16) | n_vector_bytes; - uword *p, i; - - p = hash_get (nm->frame_size_hash, key); - if (p) - i = p[0]; - else - { - i = vec_len (nm->frame_sizes); - vec_validate (nm->frame_sizes, i); - hash_set (nm->frame_size_hash, key, i); - } - - return vec_elt_at_index (nm->frame_sizes, i); -#else - ASSERT (vlib_frame_bytes (n_scalar_bytes, n_vector_bytes) - == (vlib_frame_bytes (0, 4))); - return vec_elt_at_index (nm->frame_sizes, 0); -#endif + return (void *) f + node->magic_offset; } static vlib_frame_t * @@ -120,31 +62,35 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_size_t *fs; vlib_node_t *to_node; vlib_frame_t *f; - u32 l, n, scalar_size, vector_size; + u32 l, n; ASSERT (vm == vlib_get_main ()); to_node = vlib_get_node (vm, to_node_index); - scalar_size = to_node->scalar_size; - vector_size = to_node->vector_size; + vec_validate (nm->frame_sizes, to_node->frame_size_index); + fs = vec_elt_at_index (nm->frame_sizes, to_node->frame_size_index); - fs = get_frame_size_info (nm, scalar_size, vector_size); - n = vlib_frame_bytes (scalar_size, vector_size); + if (fs->frame_size == 0) + fs->frame_size = to_node->frame_size; + else + ASSERT (fs->frame_size == to_node->frame_size); + + n = fs->frame_size; if ((l = vec_len (fs->free_frames)) > 0) { /* Allocate from end of free list. */ f = fs->free_frames[l - 1]; - _vec_len (fs->free_frames) = l - 1; + vec_set_len (fs->free_frames, l - 1); } else { - f = clib_mem_alloc_aligned_no_fail (n, VLIB_FRAME_ALIGN); + f = clib_mem_alloc_aligned_no_fail (n, CLIB_CACHE_LINE_BYTES); } /* Poison frame when debugging. */ if (CLIB_DEBUG > 0) - clib_memset (f, 0xfe, n); + clib_memset_u8 (f, 0xfe, n); /* Insert magic number. */ { @@ -156,9 +102,11 @@ vlib_frame_alloc_to_node (vlib_main_t * vm, u32 to_node_index, f->frame_flags = VLIB_FRAME_IS_ALLOCATED | frame_flags; f->n_vectors = 0; - f->scalar_size = scalar_size; - f->vector_size = vector_size; + f->scalar_offset = to_node->scalar_offset; + f->vector_offset = to_node->vector_offset; + f->aux_offset = to_node->aux_offset; f->flags = 0; + f->frame_size_index = to_node->frame_size_index; fs->n_alloc_frames += 1; @@ -239,17 +187,15 @@ vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_t * f) /* Free given frame. */ void -vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f) +vlib_frame_free (vlib_main_t *vm, vlib_frame_t *f) { vlib_node_main_t *nm = &vm->node_main; - vlib_node_t *node; vlib_frame_size_t *fs; ASSERT (vm == vlib_get_main ()); ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED); - node = vlib_get_node (vm, r->node_index); - fs = get_frame_size_info (nm, node->scalar_size, node->vector_size); + fs = vec_elt_at_index (nm->frame_sizes, f->frame_size_index); ASSERT (f->frame_flags & VLIB_FRAME_IS_ALLOCATED); @@ -261,6 +207,7 @@ vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f) } f->frame_flags &= ~(VLIB_FRAME_IS_ALLOCATED | VLIB_FRAME_NO_APPEND); + f->flags = 0; vec_add1 (fs->free_frames, f); ASSERT (fs->n_alloc_frames > 0); @@ -271,30 +218,33 @@ static clib_error_t * show_frame_stats (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - vlib_node_main_t *nm = &vm->node_main; vlib_frame_size_t *fs; - vlib_cli_output (vm, "%=6s%=12s%=12s", "Size", "# Alloc", "# Free"); - vec_foreach (fs, nm->frame_sizes) - { - u32 n_alloc = fs->n_alloc_frames; - u32 n_free = vec_len (fs->free_frames); + vlib_cli_output (vm, "%=8s%=6s%=12s%=12s", "Thread", "Size", "# Alloc", + "# Free"); + foreach_vlib_main () + { + vlib_node_main_t *nm = &this_vlib_main->node_main; + vec_foreach (fs, nm->frame_sizes) + { + u32 n_alloc = fs->n_alloc_frames; + u32 n_free = vec_len (fs->free_frames); - if (n_alloc + n_free > 0) - vlib_cli_output (vm, "%=6d%=12d%=12d", - fs - nm->frame_sizes, n_alloc, n_free); - } + if (n_alloc + n_free > 0) + vlib_cli_output (vm, "%=8d%=6d%=12d%=12d", + this_vlib_main->thread_index, fs->frame_size, + n_alloc, n_free); + } + } return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_frame_stats_cli, static) = { .path = "show vlib frame-allocation", .short_help = "Show node dispatch frame statistics", .function = show_frame_stats, }; -/* *INDENT-ON* */ /* Change ownership of enqueue rights to given next node. */ static void @@ -525,12 +475,8 @@ vlib_put_next_frame (vlib_main_t * vm, if (!(f->frame_flags & VLIB_FRAME_PENDING)) { __attribute__ ((unused)) vlib_node_t *node; - vlib_node_t *next_node; - vlib_node_runtime_t *next_runtime; node = vlib_get_node (vm, r->node_index); - next_node = vlib_get_next_node (vm, r->node_index, next_index); - next_runtime = vlib_node_get_runtime (vm, next_node->index); vec_add2 (nm->pending_frames, p, 1); @@ -539,18 +485,6 @@ vlib_put_next_frame (vlib_main_t * vm, p->next_frame_index = nf - nm->next_frames; nf->flags |= VLIB_FRAME_PENDING; f->frame_flags |= VLIB_FRAME_PENDING; - - /* - * If we're going to dispatch this frame on another thread, - * force allocation of a new frame. Otherwise, we create - * a dangling frame reference. Each thread has its own copy of - * the next_frames vector. - */ - if (0 && r->thread_index != next_runtime->thread_index) - { - nf->frame = NULL; - nf->flags &= ~(VLIB_FRAME_PENDING | VLIB_FRAME_IS_ALLOCATED); - } } /* Copy trace flag from next_frame and from runtime. */ @@ -698,13 +632,11 @@ vlib_cli_elog_clear (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (elog_clear_cli, static) = { .path = "event-logger clear", .short_help = "Clear the event log", .function = vlib_cli_elog_clear, }; -/* *INDENT-ON* */ #ifdef CLIB_UNIX static clib_error_t * @@ -753,13 +685,11 @@ vlib_post_mortem_dump (void) (vgm->post_mortem_callbacks[i]) (); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (elog_save_cli, static) = { .path = "event-logger save", .short_help = "event-logger save <filename> (saves log in /tmp/<filename>)", .function = elog_save_buffer, }; -/* *INDENT-ON* */ static clib_error_t * elog_stop (vlib_main_t * vm, @@ -773,13 +703,11 @@ elog_stop (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (elog_stop_cli, static) = { .path = "event-logger stop", .short_help = "Stop the event-logger", .function = elog_stop, }; -/* *INDENT-ON* */ static clib_error_t * elog_restart (vlib_main_t * vm, @@ -793,13 +721,11 @@ elog_restart (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (elog_restart_cli, static) = { .path = "event-logger restart", .short_help = "Restart the event-logger", .function = elog_restart, }; -/* *INDENT-ON* */ static clib_error_t * elog_resize_command_fn (vlib_main_t * vm, @@ -823,13 +749,11 @@ elog_resize_command_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (elog_resize_cli, static) = { .path = "event-logger resize", .short_help = "event-logger resize <nnn>", .function = elog_resize_command_fn, }; -/* *INDENT-ON* */ #endif /* CLIB_UNIX */ @@ -882,13 +806,11 @@ elog_show_buffer (vlib_main_t * vm, return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (elog_show_cli, static) = { .path = "show event-logger", .short_help = "Show event logger info", .function = elog_show_buffer, }; -/* *INDENT-ON* */ void vlib_gdb_show_event_log (void) @@ -1045,7 +967,6 @@ dispatch_node (vlib_main_t * vm, polling mode and vice versa. */ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_ADAPTIVE_MODE)) { - /* *INDENT-OFF* */ ELOG_TYPE_DECLARE (e) = { .function = (char *) __FUNCTION__, @@ -1056,7 +977,6 @@ dispatch_node (vlib_main_t * vm, "interrupt", "polling", }, }; - /* *INDENT-ON* */ struct { u32 node_name, vector_length, is_polling; @@ -1227,13 +1147,14 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index, /* no new frame has been assigned to this node, use the saved one */ nf->frame = restore_frame; f->n_vectors = 0; + f->flags = 0; } else { /* The node has gained a frame, implying packets from the current frame were re-queued to this same node. we don't need the saved one anymore */ - vlib_frame_free (vm, n, f); + vlib_frame_free (vm, f); } } else @@ -1241,7 +1162,7 @@ dispatch_pending_node (vlib_main_t * vm, uword pending_frame_index, if (f->frame_flags & VLIB_FRAME_FREE_AFTER_DISPATCH) { ASSERT (!(n->flags & VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH)); - vlib_frame_free (vm, n, f); + vlib_frame_free (vm, f); } } @@ -1417,7 +1338,8 @@ vlib_start_process (vlib_main_t * vm, uword process_index) { vlib_node_main_t *nm = &vm->node_main; vlib_process_t *p = vec_elt (nm->processes, process_index); - dispatch_process (vm, p, /* frame */ 0, /* cpu_time_now */ 0); + u64 cpu_time_now = clib_cpu_time_now (); + dispatch_process (vm, p, /* frame */ 0, cpu_time_now); } static u64 @@ -1501,12 +1423,6 @@ dispatch_suspended_process (vlib_main_t * vm, return t; } -void vl_api_send_pending_rpc_requests (vlib_main_t *) __attribute__ ((weak)); -void -vl_api_send_pending_rpc_requests (vlib_main_t * vm) -{ -} - static_always_inline void vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) { @@ -1522,7 +1438,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (is_main) { vec_resize (nm->pending_frames, 32); - _vec_len (nm->pending_frames) = 0; + vec_set_len (nm->pending_frames, 0); } /* Mark time of main loop start. */ @@ -1534,9 +1450,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) else cpu_time_now = clib_cpu_time_now (); - /* Pre-allocate interupt runtime indices and lock. */ - vec_alloc_aligned (nm->pending_interrupts, 1, CLIB_CACHE_LINE_BYTES); - /* Pre-allocate expired nodes. */ if (!nm->polling_threshold_vector_length) nm->polling_threshold_vector_length = 10; @@ -1572,7 +1485,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (PREDICT_FALSE (_vec_len (vm->pending_rpc_requests) > 0)) { if (!is_main) - vl_api_send_pending_rpc_requests (vm); + vlib_worker_flush_pending_rpc_requests (vm); } if (!is_main) @@ -1581,8 +1494,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (PREDICT_FALSE (vm->check_frame_queues + frame_queue_check_counter)) { u32 processed = 0; - vlib_frame_queue_dequeue_fn_t *fn = - vlib_buffer_func_main.frame_queue_dequeue_fn; + vlib_frame_queue_dequeue_fn_t *fn; if (vm->check_frame_queues) { @@ -1591,7 +1503,10 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) } vec_foreach (fqm, tm->frame_queue_mains) - processed += (fn) (vm, fqm); + { + fn = fqm->frame_queue_dequeue_fn; + processed += (fn) (vm, fqm); + } /* No handoff queue work found? */ if (processed) @@ -1613,6 +1528,22 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) /* frame */ 0, cpu_time_now); + if (clib_interrupt_is_any_pending (nm->pre_input_node_interrupts)) + { + int int_num = -1; + + while ((int_num = clib_interrupt_get_next_and_clear ( + nm->pre_input_node_interrupts, int_num)) != -1) + { + vlib_node_runtime_t *n; + n = vec_elt_at_index ( + nm->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT], int_num); + cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_PRE_INPUT, + VLIB_NODE_STATE_INTERRUPT, + /* frame */ 0, cpu_time_now); + } + } + /* Next process input nodes. */ vec_foreach (n, nm->nodes_by_type[VLIB_NODE_TYPE_INPUT]) cpu_time_now = dispatch_node (vm, n, @@ -1624,16 +1555,14 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (PREDICT_TRUE (is_main && vm->queue_signal_pending == 0)) vm->queue_signal_callback (vm); - if (__atomic_load_n (nm->pending_interrupts, __ATOMIC_ACQUIRE)) + if (clib_interrupt_is_any_pending (nm->input_node_interrupts)) { int int_num = -1; - *nm->pending_interrupts = 0; - while ((int_num = - clib_interrupt_get_next (nm->interrupts, int_num)) != -1) + while ((int_num = clib_interrupt_get_next_and_clear ( + nm->input_node_interrupts, int_num)) != -1) { vlib_node_runtime_t *n; - clib_interrupt_clear (nm->interrupts, int_num); n = vec_elt_at_index (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], int_num); cpu_time_now = dispatch_node (vm, n, VLIB_NODE_TYPE_INPUT, @@ -1648,11 +1577,10 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) for (i = 0; i < _vec_len (nm->pending_frames); i++) cpu_time_now = dispatch_pending_node (vm, i, cpu_time_now); /* Reset pending vector for next iteration. */ - _vec_len (nm->pending_frames) = 0; + vec_set_len (nm->pending_frames, 0); if (is_main) { - /* *INDENT-OFF* */ ELOG_TYPE_DECLARE (es) = { .format = "process tw start", @@ -1663,7 +1591,6 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) .format = "process tw end: %d", .format_args = "i4", }; - /* *INDENT-ON* */ struct { @@ -1676,10 +1603,8 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) if (PREDICT_FALSE (vm->elog_trace_graph_dispatch)) ed = ELOG_DATA (&vlib_global_main.elog_main, es); - nm->data_from_advancing_timing_wheel = - TW (tw_timer_expire_timers_vec) - ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm), - nm->data_from_advancing_timing_wheel); + TW (tw_timer_expire_timers) + ((TWT (tw_timer_wheel) *) nm->timing_wheel, vlib_time_now (vm)); ASSERT (nm->data_from_advancing_timing_wheel != 0); @@ -1710,6 +1635,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) vlib_get_node (vm, te->process_node_index); vlib_process_t *p = vec_elt (nm->processes, n->runtime_index); + p->stop_timer_handle = ~0; void *data; data = vlib_process_signal_event_helper (nm, n, p, @@ -1734,7 +1660,7 @@ vlib_main_or_worker_loop (vlib_main_t * vm, int is_main) dispatch_suspended_process (vm, di, cpu_time_now); } } - _vec_len (nm->data_from_advancing_timing_wheel) = 0; + vec_set_len (nm->data_from_advancing_timing_wheel, 0); } } vlib_increment_main_loop_counter (vm); @@ -1875,7 +1801,6 @@ placeholder_queue_signal_callback (vlib_main_t * vm) } #define foreach_weak_reference_stub \ -_(vlib_map_stat_segment_init) \ _(vpe_api_init) \ _(vlibmemory_init) \ _(map_api_segment_init) @@ -1909,6 +1834,23 @@ vl_api_get_elog_trace_api_messages (void) return 0; } +static void +process_expired_timer_cb (u32 *expired_timer_handles) +{ + vlib_main_t *vm = vlib_get_main (); + vlib_node_main_t *nm = &vm->node_main; + u32 *handle; + + vec_foreach (handle, expired_timer_handles) + { + u32 pi = vlib_timing_wheel_data_get_index (*handle); + vlib_process_t *p = vec_elt (nm->processes, pi); + + p->stop_timer_handle = ~0; + } + vec_append (nm->data_from_advancing_timing_wheel, expired_timer_handles); +} + /* Main function. */ int vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) @@ -1936,7 +1878,13 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) goto done; } - if ((error = vlib_map_stat_segment_init (vm))) + if ((error = vlib_log_init (vm))) + { + clib_error_report (error); + goto done; + } + + if ((error = vlib_stats_init (vm))) { clib_error_report (error); goto done; @@ -2005,18 +1953,18 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) CLIB_CACHE_LINE_BYTES); vec_validate (nm->data_from_advancing_timing_wheel, 10); - _vec_len (nm->data_from_advancing_timing_wheel) = 0; + vec_set_len (nm->data_from_advancing_timing_wheel, 0); /* Create the process timing wheel */ - TW (tw_timer_wheel_init) ((TWT (tw_timer_wheel) *) nm->timing_wheel, - 0 /* no callback */ , - 10e-6 /* timer period 10us */ , - ~0 /* max expirations per call */ ); + TW (tw_timer_wheel_init) + ((TWT (tw_timer_wheel) *) nm->timing_wheel, + process_expired_timer_cb /* callback */, 10e-6 /* timer period 10us */, + ~0 /* max expirations per call */); vec_validate (vm->pending_rpc_requests, 0); - _vec_len (vm->pending_rpc_requests) = 0; + vec_set_len (vm->pending_rpc_requests, 0); vec_validate (vm->processing_rpc_requests, 0); - _vec_len (vm->processing_rpc_requests) = 0; + vec_set_len (vm->processing_rpc_requests, 0); /* Default params for the buffer allocator fault injector, if configured */ if (VLIB_BUFFER_ALLOC_FAULT_INJECTOR > 0) @@ -2066,7 +2014,9 @@ vlib_main (vlib_main_t * volatile vm, unformat_input_t * input) vlib_main_loop (vm); done: + /* Stop worker threads, barrier will not be released */ vlib_worker_thread_barrier_sync (vm); + /* Call all exit functions. */ { clib_error_t *sub_error; @@ -2074,7 +2024,6 @@ done: if (sub_error) clib_error_report (sub_error); } - vlib_worker_thread_barrier_release (vm); if (error) clib_error_report (error); diff --git a/src/vlib/main.h b/src/vlib/main.h index a16f603f467..94b8c4fa954 100644 --- a/src/vlib/main.h +++ b/src/vlib/main.h @@ -40,6 +40,7 @@ #ifndef included_vlib_main_h #define included_vlib_main_h +#include <vppinfra/clib.h> #include <vppinfra/callback_data.h> #include <vppinfra/elog.h> #include <vppinfra/format.h> @@ -154,15 +155,6 @@ typedef struct vlib_main_t /* Error marker to use when exiting main loop. */ clib_error_t *main_loop_error; - /* Start of the heap. */ - void *heap_base; - - /* Truncated version, to create frame indices */ - void *heap_aligned_base; - - /* Size of the heap */ - uword heap_size; - /* buffer main structure. */ vlib_buffer_main_t *buffer_main; @@ -220,7 +212,6 @@ typedef struct vlib_main_t volatile u32 queue_signal_pending; volatile u32 api_queue_nonempty; void (*queue_signal_callback) (struct vlib_main_t *); - u8 **argv; /* Top of (worker) dispatch loop callback */ void (**volatile worker_thread_main_loop_callbacks) @@ -283,6 +274,12 @@ typedef struct vlib_global_main_t /* Name for e.g. syslog. */ char *name; + /* full path to main executable */ + char *exec_path; + + /* command line arguments */ + u8 **argv; + /* post-mortem callbacks */ void (**post_mortem_callbacks) (void); @@ -310,6 +307,7 @@ typedef struct vlib_global_main_t _vlib_init_function_list_elt_t *main_loop_enter_function_registrations; _vlib_init_function_list_elt_t *main_loop_exit_function_registrations; _vlib_init_function_list_elt_t *worker_init_function_registrations; + _vlib_init_function_list_elt_t *num_workers_change_function_registrations; _vlib_init_function_list_elt_t *api_init_function_registrations; vlib_config_function_runtime_t *config_function_registrations; @@ -379,7 +377,13 @@ always_inline void vlib_panic_with_error (vlib_main_t * vm, clib_error_t * error) { vm->main_loop_error = error; - clib_longjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_PANIC); + if (vm->main_loop_exit_set) + clib_longjmp (&vm->main_loop_exit, VLIB_MAIN_LOOP_EXIT_PANIC); + else + { + clib_warning ("panic: %U", format_clib_error, error); + abort (); + } } #define vlib_panic_with_msg(vm,args...) \ @@ -469,7 +473,7 @@ vlib_main_init () vgm->init_functions_called = hash_create (0, /* value bytes */ 0); vm = clib_mem_alloc_aligned (sizeof (*vm), CLIB_CACHE_LINE_BYTES); - vec_add1 (vgm->vlib_mains, vm); + vec_add1_ha (vgm->vlib_mains, vm, 0, CLIB_CACHE_LINE_BYTES); } /* Main routine. */ diff --git a/src/vlib/node.c b/src/vlib/node.c index f4329e7c503..8f6c852188b 100644 --- a/src/vlib/node.c +++ b/src/vlib/node.c @@ -130,12 +130,10 @@ vlib_node_runtime_update (vlib_main_t * vm, u32 node_index, u32 next_index) && pf->next_frame_index >= i) pf->next_frame_index += n_insert; } - /* *INDENT-OFF* */ pool_foreach (pf, nm->suspended_process_frames) { if (pf->next_frame_index != ~0 && pf->next_frame_index >= i) pf->next_frame_index += n_insert; } - /* *INDENT-ON* */ r->n_next_nodes = vec_len (node->next_nodes); } @@ -223,7 +221,6 @@ vlib_node_add_next_with_slot (vlib_main_t * vm, { uword sib_node_index, sib_slot; vlib_node_t *sib_node; - /* *INDENT-OFF* */ clib_bitmap_foreach (sib_node_index, node->sibling_bitmap) { sib_node = vec_elt (nm->nodes, sib_node_index); if (sib_node != node) @@ -232,7 +229,6 @@ vlib_node_add_next_with_slot (vlib_main_t * vm, ASSERT (sib_slot == slot); } } - /* *INDENT-ON* */ } vlib_worker_thread_barrier_release (vm); @@ -329,12 +325,54 @@ vlib_node_get_preferred_node_fn_variant (vlib_main_t *vm, } static void -register_node (vlib_main_t * vm, vlib_node_registration_t * r) +vlib_node_add_to_sibling_bitmap (vlib_main_t *vm, vlib_node_t *n, + vlib_node_t *sib) { vlib_node_main_t *nm = &vm->node_main; - vlib_node_t *n; + u32 si; + + clib_bitmap_foreach (si, sib->sibling_bitmap) + { + vlib_node_t *m = vec_elt (nm->nodes, si); + + /* Connect all of sibling's siblings to us. */ + m->sibling_bitmap = clib_bitmap_ori (m->sibling_bitmap, n->index); + + /* Connect us to all of sibling's siblings. */ + n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, si); + } + + /* Connect sibling to us. */ + sib->sibling_bitmap = clib_bitmap_ori (sib->sibling_bitmap, n->index); + + /* Connect us to sibling. */ + n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, sib->index); +} + +u32 +vlib_register_node (vlib_main_t *vm, vlib_node_registration_t *r, char *fmt, + ...) +{ + vlib_node_main_t *nm = &vm->node_main; + vlib_node_t *n, *sib = 0; + va_list va; + u32 size; int i; + if (r->sibling_of) + { + if (r->n_next_nodes > 0) + clib_error ("sibling node should not have any next nodes `%v'", + r->name); + if (nm->flags & VLIB_NODE_MAIN_RUNTIME_STARTED) + { + sib = vlib_get_node_by_name (vm, (u8 *) r->sibling_of); + + if (sib == 0) + clib_error ("unknown sibling node '%s'", r->sibling_of); + } + } + if (CLIB_DEBUG > 0) { /* Default (0) type should match INTERNAL. */ @@ -362,11 +400,9 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r) vec_add1 (nm->nodes, n); - /* Name is always a vector so it can be formatted with %v. */ - if (clib_mem_is_heap_object (vec_header (r->name, 0))) - n->name = vec_dup ((u8 *) r->name); - else - n->name = format (0, "%s", r->name); + va_start (va, fmt); + n->name = va_format (0, fmt, &va); + va_end (va); if (!nm->node_by_name) nm->node_by_name = hash_create_vec ( /* size */ 32, @@ -387,11 +423,6 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r) r->index = n->index; /* save index in registration */ n->function = r->function; - /* Node index of next sibling will be filled in by vlib_node_main_init. */ - n->sibling_of = r->sibling_of; - if (r->sibling_of && r->n_next_nodes > 0) - clib_error ("sibling node should not have any next nodes `%v'", n->name); - if (r->type == VLIB_NODE_TYPE_INTERNAL) ASSERT (r->vector_size > 0); @@ -400,13 +431,66 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r) _(type); _(flags); _(state); - _(scalar_size); - _(vector_size); _(format_buffer); _(unformat_buffer); _(format_trace); _(validate_frame); + size = round_pow2 (sizeof (vlib_frame_t), VLIB_FRAME_DATA_ALIGN); + + /* scalar data size */ + if (r->scalar_size) + { + n->scalar_offset = size; + size += round_pow2 (r->scalar_size, VLIB_FRAME_DATA_ALIGN); + } + else + n->scalar_offset = 0; + + /* Vecor data size */ + n->vector_offset = size; + size += r->vector_size * VLIB_FRAME_SIZE; + + /* Allocate a few extra slots of vector data to support + speculative vector enqueues which overflow vector data in next frame. */ + size += r->vector_size * VLIB_FRAME_SIZE_EXTRA; + + /* space for VLIB_FRAME_MAGIC */ + n->magic_offset = size; + size += sizeof (u32); + + /* round size to VLIB_FRAME_DATA_ALIGN */ + size = round_pow2 (size, VLIB_FRAME_DATA_ALIGN); + + if (r->aux_size) + { + n->aux_offset = size; + size += r->aux_size * VLIB_FRAME_SIZE; + } + else + n->aux_offset = 0; + + /* final size */ + n->frame_size = size = round_pow2 (size, CLIB_CACHE_LINE_BYTES); + ASSERT (size <= __UINT16_MAX__); + + vlib_frame_size_t *fs = 0; + + n->frame_size_index = (u16) ~0; + vec_foreach (fs, nm->frame_sizes) + if (fs->frame_size == size) + { + n->frame_size_index = fs - nm->frame_sizes; + break; + } + + if (n->frame_size_index == (u16) ~0) + { + vec_add2 (nm->frame_sizes, fs, 1); + fs->frame_size = size; + n->frame_size_index = fs - nm->frame_sizes; + } + /* Register error counters. */ vlib_register_errors (vm, n->index, r->n_errors, r->error_strings, r->error_counters); @@ -476,7 +560,10 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r) vec_add2_aligned (nm->nodes_by_type[n->type], rt, 1, /* align */ CLIB_CACHE_LINE_BYTES); if (n->type == VLIB_NODE_TYPE_INPUT) - clib_interrupt_resize (&nm->interrupts, + clib_interrupt_resize (&nm->input_node_interrupts, + vec_len (nm->nodes_by_type[n->type])); + else if (n->type == VLIB_NODE_TYPE_PRE_INPUT) + clib_interrupt_resize (&nm->pre_input_node_interrupts, vec_len (nm->nodes_by_type[n->type])); n->runtime_index = rt - nm->nodes_by_type[n->type]; } @@ -512,13 +599,24 @@ register_node (vlib_main_t * vm, vlib_node_registration_t * r) vec_free (n->runtime_data); } #undef _ -} -/* Register new packet processing node. */ -u32 -vlib_register_node (vlib_main_t * vm, vlib_node_registration_t * r) -{ - register_node (vm, r); + if (sib) + { + u32 slot, i; + + vec_foreach_index (i, sib->next_nodes) + { + slot = + vlib_node_add_next_with_slot (vm, n->index, sib->next_nodes[i], i); + ASSERT (slot == i); + } + + vlib_node_add_to_sibling_bitmap (vm, n, sib); + + r->n_next_nodes = vec_len (n->next_nodes); + } + n->sibling_of = r->sibling_of; + return r->index; } @@ -530,7 +628,7 @@ null_node_fn (vlib_main_t * vm, vlib_node_increment_counter (vm, node->node_index, 0, n_vectors); vlib_buffer_free (vm, vlib_frame_vector_args (frame), n_vectors); - vlib_frame_free (vm, node, frame); + vlib_frame_free (vm, frame); return n_vectors; } @@ -582,19 +680,18 @@ vlib_register_all_static_nodes (vlib_main_t * vm) static vlib_node_registration_t null_node_reg = { .function = null_node_fn, .vector_size = sizeof (u32), - .name = "null-node", .n_errors = 1, .error_strings = null_node_error_strings, }; /* make sure that node index 0 is not used by real node */ - register_node (vm, &null_node_reg); + vlib_register_node (vm, &null_node_reg, "null-node"); r = vgm->node_registrations; while (r) { - register_node (vm, r); + vlib_register_node (vm, r, "%s", r->name); r = r->next_registration; } } @@ -669,16 +766,11 @@ vlib_node_main_init (vlib_main_t * vm) vlib_node_t *n; uword ni; - nm->frame_sizes = vec_new (vlib_frame_size_t, 1); -#ifdef VLIB_SUPPORTS_ARBITRARY_SCALAR_SIZES - nm->frame_size_hash = hash_create (0, sizeof (uword)); -#endif nm->flags |= VLIB_NODE_MAIN_RUNTIME_STARTED; /* Generate sibling relationships */ { vlib_node_t *n, *sib; - uword si; for (ni = 0; ni < vec_len (nm->nodes); ni++) { @@ -695,23 +787,7 @@ vlib_node_main_init (vlib_main_t * vm) goto done; } - /* *INDENT-OFF* */ - clib_bitmap_foreach (si, sib->sibling_bitmap) { - vlib_node_t * m = vec_elt (nm->nodes, si); - - /* Connect all of sibling's siblings to us. */ - m->sibling_bitmap = clib_bitmap_ori (m->sibling_bitmap, n->index); - - /* Connect us to all of sibling's siblings. */ - n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, si); - } - /* *INDENT-ON* */ - - /* Connect sibling to us. */ - sib->sibling_bitmap = clib_bitmap_ori (sib->sibling_bitmap, n->index); - - /* Connect us to sibling. */ - n->sibling_bitmap = clib_bitmap_ori (n->sibling_bitmap, sib->index); + vlib_node_add_to_sibling_bitmap (vm, n, sib); } } @@ -800,14 +876,13 @@ vlib_process_create (vlib_main_t * vm, char *name, memset (&r, 0, sizeof (r)); - r.name = (char *) format (0, "%s", name, 0); r.function = f; r.process_log2_n_stack_bytes = log2_n_stack_bytes; r.type = VLIB_NODE_TYPE_PROCESS; vlib_worker_thread_barrier_sync (vm); - vlib_register_node (vm, &r); + vlib_register_node (vm, &r, "%s", name); vec_free (r.name); vlib_worker_thread_node_runtime_update (); diff --git a/src/vlib/node.h b/src/vlib/node.h index 75a0adba8d1..68813c2c3e1 100644 --- a/src/vlib/node.h +++ b/src/vlib/node.h @@ -149,7 +149,8 @@ typedef struct _vlib_node_registration u8 protocol_hint; /* Size of scalar and vector arguments in bytes. */ - u16 scalar_size, vector_size; + u8 vector_size, aux_size; + u16 scalar_size; /* Number of error codes used by this node. */ u16 n_errors; @@ -200,7 +201,8 @@ static __clib_unused vlib_node_registration_t __clib_unused_##x #endif #define VLIB_NODE_FN(node) \ - uword CLIB_MARCH_SFX (node##_fn) (); \ + uword CLIB_MARCH_SFX (node##_fn) (vlib_main_t *, vlib_node_runtime_t *, \ + vlib_frame_t *); \ static vlib_node_fn_registration_t CLIB_MARCH_SFX ( \ node##_fn_registration) = { \ .function = &CLIB_MARCH_SFX (node##_fn), \ @@ -273,7 +275,7 @@ typedef struct vlib_node_t u32 runtime_index; /* Runtime data for this node. */ - void *runtime_data; + u8 *runtime_data; /* Node flags. */ u16 flags; @@ -309,7 +311,8 @@ typedef struct vlib_node_t u16 n_errors; /* Size of scalar and vector arguments in bytes. */ - u16 scalar_size, vector_size; + u16 frame_size, scalar_offset, vector_offset, magic_offset, aux_offset; + u16 frame_size_index; /* Handle/index in error heap for this node. */ u32 error_heap_handle; @@ -367,7 +370,10 @@ typedef struct vlib_node_t /* Max number of vector elements to process at once per node. */ #define VLIB_FRAME_SIZE 256 -#define VLIB_FRAME_ALIGN CLIB_CACHE_LINE_BYTES +/* Number of extra elements allocated at the end of vecttor. */ +#define VLIB_FRAME_SIZE_EXTRA 4 +/* Frame data alignment */ +#define VLIB_FRAME_DATA_ALIGN 16 /* Calling frame (think stack frame) for a node. */ typedef struct vlib_frame_t @@ -378,15 +384,15 @@ typedef struct vlib_frame_t /* User flags. Used for sending hints to the next node. */ u16 flags; - /* Number of scalar bytes in arguments. */ - u8 scalar_size; - - /* Number of bytes per vector argument. */ - u8 vector_size; + /* Scalar, vector and aux offsets in this frame. */ + u16 scalar_offset, vector_offset, aux_offset; /* Number of vector elements currently in frame. */ u16 n_vectors; + /* Index of frame size corresponding to allocated node. */ + u16 frame_size_index; + /* Scalar and vector arguments to next node. */ u8 arguments[0]; } vlib_frame_t; @@ -501,7 +507,7 @@ typedef struct vlib_node_runtime_t zero before first run of this node. */ - u16 thread_index; /**< thread this node runs on */ + CLIB_ALIGN_MARK (runtime_data_pad, 8); u8 runtime_data[0]; /**< Function dependent node-runtime data. This data is @@ -521,10 +527,15 @@ typedef struct /* Number of allocated frames for this scalar/vector size. */ u32 n_alloc_frames; + /* Frame size */ + u16 frame_size; + /* Vector of free frames for this scalar/vector size. */ vlib_frame_t **free_frames; } vlib_frame_size_t; +STATIC_ASSERT_SIZEOF (vlib_frame_size_t, 16); + typedef struct { /* Users opaque value for event type. */ @@ -566,7 +577,7 @@ typedef struct u32 n_suspends; /* Vectors of pending event data indexed by event type index. */ - void **pending_event_data_by_type_index; + u8 **pending_event_data_by_type_index; /* Bitmap of event type-indices with non-empty vectors. */ uword *non_empty_event_type_bitmap; @@ -679,8 +690,8 @@ typedef struct vlib_node_runtime_t *nodes_by_type[VLIB_N_NODE_TYPE]; /* Node runtime indices for input nodes with pending interrupts. */ - void *interrupts; - volatile u32 *pending_interrupts; + void *input_node_interrupts; + void *pre_input_node_interrupts; /* Input nodes are switched from/to interrupt to/from polling mode when average vector length goes above/below polling/interrupt @@ -721,9 +732,6 @@ typedef struct /* Current counts of nodes in each state. */ u32 input_node_counts_by_state[VLIB_N_NODE_STATE]; - /* Hash of (scalar_size,vector_size) to frame_sizes index. */ - uword *frame_size_hash; - /* Per-size frame allocation information. */ vlib_frame_size_t *frame_sizes; diff --git a/src/vlib/node_cli.c b/src/vlib/node_cli.c index 8cf57948cc4..d0bdf5b9097 100644 --- a/src/vlib/node_cli.c +++ b/src/vlib/node_cli.c @@ -42,6 +42,7 @@ #include <fcntl.h> #include <vlib/vlib.h> #include <vlib/threads.h> +#include <vlib/stats/stats.h> #include <math.h> static int @@ -84,13 +85,11 @@ show_node_graph (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_node_graph_command, static) = { .path = "show vlib graph", .short_help = "Show packet processing node graph", .function = show_node_graph, }; -/* *INDENT-ON* */ static clib_error_t * show_node_graphviz (vlib_main_t * vm, @@ -310,7 +309,7 @@ show_node_graphviz (vlib_main_t * vm, /*? * Dump dot files data to draw a graph of all the nodes. * If the argument 'filter' is provided, only the active nodes (since the last - * "clear run" comand) are selected and they are scaled and colored according + * "clear run" command) are selected and they are scaled and colored according * to their utilization. You can choose to filter nodes that are called, * nodes that receive vectors or both (default). * The 'file' option allows to save data in a temp file. @@ -323,14 +322,12 @@ show_node_graphviz (vlib_main_t * vm, * @cliend * @cliexcmd{show vlib graphviz [filter][calls][vectors][file <filename>]} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_node_graphviz_command, static) = { .path = "show vlib graphviz", .short_help = "Dump packet processing node graph as a graphviz dotfile", .function = show_node_graphviz, .is_mp_safe = 1, }; -/* *INDENT-ON* */ static u8 * format_vlib_node_state (u8 * s, va_list * va) @@ -465,13 +462,6 @@ format_vlib_node_stats (u8 * s, va_list * va) return s; } -f64 vlib_get_stat_segment_update_rate (void) __attribute__ ((weak)); -f64 -vlib_get_stat_segment_update_rate (void) -{ - return 1e70; -} - static clib_error_t * show_node_runtime (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) @@ -498,7 +488,6 @@ show_node_runtime (vlib_main_t * vm, uword i, j; f64 dt; u64 n_input, n_output, n_drop, n_punt; - u64 n_internal_vectors, n_internal_calls; u64 n_clocks, l, v, c, d; int brief = 1; int summary = 0; @@ -557,7 +546,6 @@ show_node_runtime (vlib_main_t * vm, vec_sort_with_function (nodes, node_cmp); n_input = n_output = n_drop = n_punt = n_clocks = 0; - n_internal_vectors = n_internal_calls = 0; for (i = 0; i < vec_len (nodes); i++) { n = nodes[i]; @@ -566,7 +554,6 @@ show_node_runtime (vlib_main_t * vm, n_clocks += l; v = n->stats_total.vectors - n->stats_last_clear.vectors; - c = n->stats_total.calls - n->stats_last_clear.calls; switch (n->type) { @@ -577,11 +564,6 @@ show_node_runtime (vlib_main_t * vm, n_output += (n->flags & VLIB_NODE_FLAG_IS_OUTPUT) ? v : 0; n_drop += (n->flags & VLIB_NODE_FLAG_IS_DROP) ? v : 0; n_punt += (n->flags & VLIB_NODE_FLAG_IS_PUNT) ? v : 0; - if (!(n->flags & VLIB_NODE_FLAG_IS_OUTPUT)) - { - n_internal_vectors += v; - n_internal_calls += c; - } if (n->flags & VLIB_NODE_FLAG_IS_HANDOFF) n_input += v; break; @@ -606,16 +588,14 @@ show_node_runtime (vlib_main_t * vm, } dt = time_now - nm->time_last_runtime_stats_clear; - vlib_cli_output - (vm, - "Time %.1f, %f sec internal node vector rate %.2f loops/sec %.2f\n" - " vector rates in %.4e, out %.4e, drop %.4e, punt %.4e", - dt, - vlib_get_stat_segment_update_rate (), - internal_node_vector_rates[j], - stat_vm->loops_per_second, - (f64) n_input / dt, - (f64) n_output / dt, (f64) n_drop / dt, (f64) n_punt / dt); + vlib_cli_output ( + vm, + "Time %.1f, %f sec internal node vector rate %.2f loops/sec %.2f\n" + " vector rates in %.4e, out %.4e, drop %.4e, punt %.4e", + dt, vlib_stats_get_segment_update_rate (), + internal_node_vector_rates[j], stat_vm->loops_per_second, + (f64) n_input / dt, (f64) n_output / dt, (f64) n_drop / dt, + (f64) n_punt / dt); if (summary == 0) { @@ -646,14 +626,12 @@ show_node_runtime (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_node_runtime_command, static) = { .path = "show runtime", .short_help = "Show packet processing runtime", .function = show_node_runtime, .is_mp_safe = 1, }; -/* *INDENT-ON* */ static clib_error_t * clear_node_runtime (vlib_main_t * vm, @@ -692,6 +670,8 @@ clear_node_runtime (vlib_main_t * vm, nm->time_last_runtime_stats_clear = vlib_time_now (vm); } + vlib_stats_set_timestamp (STAT_COUNTER_LAST_STATS_CLEAR, + vm->node_main.time_last_runtime_stats_clear); vlib_worker_thread_barrier_release (vm); vec_free (stat_vms); @@ -699,13 +679,11 @@ clear_node_runtime (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (clear_node_runtime_command, static) = { .path = "clear runtime", .short_help = "Clear packet processing runtime statistics", .function = clear_node_runtime, }; -/* *INDENT-ON* */ static clib_error_t * show_node (vlib_main_t * vm, unformat_input_t * input, @@ -825,7 +803,6 @@ show_node (vlib_main_t * vm, unformat_input_t * input, if (n->type == VLIB_NODE_TYPE_INTERNAL) { int j = 0; - /* *INDENT-OFF* */ clib_bitmap_foreach (i, n->prev_node_bitmap) { vlib_node_t *pn = vlib_get_node (vm, i); if (j++ % 3 == 0) @@ -834,7 +811,6 @@ show_node (vlib_main_t * vm, unformat_input_t * input, s = format (s, "%-35v", s2); vec_reset_length (s2); } - /* *INDENT-ON* */ if (vec_len (s) == 0) s = format (s, "\n none"); @@ -869,7 +845,6 @@ done: return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_node_command, static) = { .path = "show node", .short_help = "show node [index] <node-name | node-index>", @@ -922,13 +897,11 @@ done: return err; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (set_node_fn_command, static) = { .path = "set node function", .short_help = "set node function <node-name> <variant-name>", .function = set_node_fn, }; -/* *INDENT-ON* */ /* Dummy function to get us linked in. */ void diff --git a/src/vlib/node_format.c b/src/vlib/node_format.c index 54cea9ff804..9e0d1a7de6f 100644 --- a/src/vlib/node_format.c +++ b/src/vlib/node_format.c @@ -73,13 +73,11 @@ format_vlib_node_graph (u8 * s, va_list * va) } j = 0; - /* *INDENT-OFF* */ clib_bitmap_foreach (i, n->prev_node_bitmap) { vec_validate_init_empty (tmps, j, empty); tmps[j].prev_node = i; j++; } - /* *INDENT-ON* */ for (i = 0; i < vec_len (tmps); i++) { diff --git a/src/vlib/node_funcs.h b/src/vlib/node_funcs.h index b1d5c7bcacb..1beac33cf9b 100644 --- a/src/vlib/node_funcs.h +++ b/src/vlib/node_funcs.h @@ -45,6 +45,7 @@ #ifndef included_vlib_node_funcs_h #define included_vlib_node_funcs_h +#include <vppinfra/clib.h> #include <vppinfra/fifo.h> #include <vppinfra/tw_timer_1t_3w_1024sl_ov.h> #include <vppinfra/interrupt.h> @@ -58,7 +59,8 @@ vlib_process_start_switch_stack (vlib_main_t * vm, vlib_process_t * p) { #ifdef CLIB_SANITIZE_ADDR void *stack = p ? (void *) p->stack : vlib_thread_stacks[vm->thread_index]; - u32 stack_bytes = p ? p->log2_n_stack_bytes : VLIB_THREAD_STACK_SIZE; + u32 stack_bytes = + p ? (1ULL < p->log2_n_stack_bytes) : VLIB_THREAD_STACK_SIZE; __sanitizer_start_switch_fiber (&vm->asan_stack_save, stack, stack_bytes); #endif } @@ -250,15 +252,22 @@ vlib_node_set_interrupt_pending (vlib_main_t *vm, u32 node_index) { vlib_node_main_t *nm = &vm->node_main; vlib_node_t *n = vec_elt (nm->nodes, node_index); + void *interrupts = 0; - ASSERT (n->type == VLIB_NODE_TYPE_INPUT); + if (n->type == VLIB_NODE_TYPE_INPUT) + interrupts = nm->input_node_interrupts; + else if (n->type == VLIB_NODE_TYPE_PRE_INPUT) + interrupts = nm->pre_input_node_interrupts; + else + { + ASSERT (0); + return; + } if (vm != vlib_get_main ()) - clib_interrupt_set_atomic (nm->interrupts, n->runtime_index); + clib_interrupt_set_atomic (interrupts, n->runtime_index); else - clib_interrupt_set (nm->interrupts, n->runtime_index); - - __atomic_store_n (nm->pending_interrupts, 1, __ATOMIC_RELEASE); + clib_interrupt_set (interrupts, n->runtime_index); } always_inline vlib_process_t * @@ -283,16 +292,6 @@ vlib_frame_no_append (vlib_frame_t * f) f->frame_flags |= VLIB_FRAME_NO_APPEND; } -/* Byte alignment for vector arguments. */ -#define VLIB_FRAME_VECTOR_ALIGN (1 << 4) - -always_inline u32 -vlib_frame_vector_byte_offset (u32 scalar_size) -{ - return round_pow2 (sizeof (vlib_frame_t) + scalar_size, - VLIB_FRAME_VECTOR_ALIGN); -} - /** \brief Get pointer to frame vector data. @param f vlib_frame_t pointer @return pointer to first vector element in frame @@ -300,7 +299,19 @@ vlib_frame_vector_byte_offset (u32 scalar_size) always_inline void * vlib_frame_vector_args (vlib_frame_t * f) { - return (void *) f + vlib_frame_vector_byte_offset (f->scalar_size); + ASSERT (f->vector_offset); + return (void *) f + f->vector_offset; +} + +/** \brief Get pointer to frame vector aux data. + @param f vlib_frame_t pointer + @return pointer to first vector aux data element in frame +*/ +always_inline void * +vlib_frame_aux_args (vlib_frame_t *f) +{ + ASSERT (f->aux_offset); + return (void *) f + f->aux_offset; } /** \brief Get pointer to frame scalar data. @@ -314,7 +325,8 @@ vlib_frame_vector_args (vlib_frame_t * f) always_inline void * vlib_frame_scalar_args (vlib_frame_t * f) { - return vlib_frame_vector_args (f) - f->scalar_size; + ASSERT (f->scalar_offset); + return (void *) f + f->scalar_offset; } always_inline vlib_next_frame_t * @@ -369,16 +381,34 @@ vlib_frame_t *vlib_get_next_frame_internal (vlib_main_t * vm, u32 next_index, u32 alloc_new_frame); -#define vlib_get_next_frame_macro(vm,node,next_index,vectors,n_vectors_left,alloc_new_frame) \ -do { \ - vlib_frame_t * _f \ - = vlib_get_next_frame_internal ((vm), (node), (next_index), \ - (alloc_new_frame)); \ - u32 _n = _f->n_vectors; \ - (vectors) = vlib_frame_vector_args (_f) + _n * sizeof ((vectors)[0]); \ - (n_vectors_left) = VLIB_FRAME_SIZE - _n; \ -} while (0) - +#define vlib_get_next_frame_macro(vm, node, next_index, vectors, \ + n_vectors_left, alloc_new_frame) \ + do \ + { \ + vlib_frame_t *_f = vlib_get_next_frame_internal ( \ + (vm), (node), (next_index), (alloc_new_frame)); \ + u32 _n = _f->n_vectors; \ + (vectors) = vlib_frame_vector_args (_f) + _n * sizeof ((vectors)[0]); \ + (n_vectors_left) = VLIB_FRAME_SIZE - _n; \ + } \ + while (0) + +#define vlib_get_next_frame_macro_with_aux(vm, node, next_index, vectors, \ + n_vectors_left, alloc_new_frame, \ + aux_data, maybe_no_aux) \ + do \ + { \ + vlib_frame_t *_f = vlib_get_next_frame_internal ( \ + (vm), (node), (next_index), (alloc_new_frame)); \ + u32 _n = _f->n_vectors; \ + (vectors) = vlib_frame_vector_args (_f) + _n * sizeof ((vectors)[0]); \ + if ((maybe_no_aux) && (_f)->aux_offset == 0) \ + (aux_data) = NULL; \ + else \ + (aux_data) = vlib_frame_aux_args (_f) + _n * sizeof ((aux_data)[0]); \ + (n_vectors_left) = VLIB_FRAME_SIZE - _n; \ + } \ + while (0) /** \brief Get pointer to next frame vector data by (@c vlib_node_runtime_t, @c next_index). @@ -392,16 +422,69 @@ do { \ @return @c vectors -- pointer to next available vector slot @return @c n_vectors_left -- number of vector slots available */ -#define vlib_get_next_frame(vm,node,next_index,vectors,n_vectors_left) \ - vlib_get_next_frame_macro (vm, node, next_index, \ - vectors, n_vectors_left, \ +#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left) \ + vlib_get_next_frame_macro (vm, node, next_index, vectors, n_vectors_left, \ /* alloc new frame */ 0) -#define vlib_get_new_next_frame(vm,node,next_index,vectors,n_vectors_left) \ - vlib_get_next_frame_macro (vm, node, next_index, \ - vectors, n_vectors_left, \ +#define vlib_get_new_next_frame(vm, node, next_index, vectors, \ + n_vectors_left) \ + vlib_get_next_frame_macro (vm, node, next_index, vectors, n_vectors_left, \ /* alloc new frame */ 1) +/** \brief Get pointer to next frame vector data and next frame aux data by + (@c vlib_node_runtime_t, @c next_index). + Standard single/dual loop boilerplate element. + @attention This is a MACRO, with SIDE EFFECTS. + @attention This MACRO is unsafe in case the next node does not support + aux_data + + @param vm vlib_main_t pointer, varies by thread + @param node current node vlib_node_runtime_t pointer + @param next_index requested graph arc index + + @return @c vectors -- pointer to next available vector slot + @return @c aux_data -- pointer to next available aux data slot + @return @c n_vectors_left -- number of vector slots available +*/ +#define vlib_get_next_frame_with_aux(vm, node, next_index, vectors, aux_data, \ + n_vectors_left) \ + vlib_get_next_frame_macro_with_aux ( \ + vm, node, next_index, vectors, n_vectors_left, /* alloc new frame */ 0, \ + aux_data, /* maybe_no_aux */ 0) + +#define vlib_get_new_next_frame_with_aux(vm, node, next_index, vectors, \ + aux_data, n_vectors_left) \ + vlib_get_next_frame_macro_with_aux ( \ + vm, node, next_index, vectors, n_vectors_left, /* alloc new frame */ 1, \ + aux_data, /* maybe_no_aux */ 0) + +/** \brief Get pointer to next frame vector data and next frame aux data by + (@c vlib_node_runtime_t, @c next_index). + Standard single/dual loop boilerplate element. + @attention This is a MACRO, with SIDE EFFECTS. + @attention This MACRO is safe in case the next node does not support aux_data. + In that case aux_data is set to NULL. + + @param vm vlib_main_t pointer, varies by thread + @param node current node vlib_node_runtime_t pointer + @param next_index requested graph arc index + + @return @c vectors -- pointer to next available vector slot + @return @c aux_data -- pointer to next available aux data slot + @return @c n_vectors_left -- number of vector slots available +*/ +#define vlib_get_next_frame_with_aux_safe(vm, node, next_index, vectors, \ + aux_data, n_vectors_left) \ + vlib_get_next_frame_macro_with_aux ( \ + vm, node, next_index, vectors, n_vectors_left, /* alloc new frame */ 0, \ + aux_data, /* maybe_no_aux */ 1) + +#define vlib_get_new_next_frame_with_aux_safe(vm, node, next_index, vectors, \ + aux_data, n_vectors_left) \ + vlib_get_next_frame_macro_with_aux ( \ + vm, node, next_index, vectors, n_vectors_left, /* alloc new frame */ 1, \ + aux_data, /* maybe_no_aux */ 1) + /** \brief Release pointer to next frame vector data. Standard single/dual loop boilerplate element. @param vm vlib_main_t pointer, varies by thread @@ -424,6 +507,16 @@ vlib_put_next_frame (vlib_main_t * vm, (v); \ }) +#define vlib_set_next_frame_with_aux_safe(vm, node, next_index, v, aux) \ + ({ \ + uword _n_left; \ + vlib_get_next_frame_with_aux_safe ((vm), (node), (next_index), (v), \ + (aux), _n_left); \ + ASSERT (_n_left > 0); \ + vlib_put_next_frame ((vm), (node), (next_index), _n_left - 1); \ + (v); \ + }) + always_inline void vlib_set_next_frame_buffer (vlib_main_t * vm, vlib_node_runtime_t * node, @@ -434,6 +527,20 @@ vlib_set_next_frame_buffer (vlib_main_t * vm, p[0] = buffer_index; } +always_inline void +vlib_set_next_frame_buffer_with_aux_safe (vlib_main_t *vm, + vlib_node_runtime_t *node, + u32 next_index, u32 buffer_index, + u32 aux) +{ + u32 *p; + u32 *a; + p = vlib_set_next_frame_with_aux_safe (vm, node, next_index, p, a); + p[0] = buffer_index; + if (a) + a[0] = aux; +} + vlib_frame_t *vlib_get_frame_to_node (vlib_main_t * vm, u32 to_node_index); void vlib_put_frame_to_node (vlib_main_t * vm, u32 to_node_index, vlib_frame_t * f); @@ -601,7 +708,7 @@ vlib_process_get_events (vlib_main_t * vm, uword ** data_vector) l = _vec_len (p->pending_event_data_by_type_index[t]); if (data_vector) vec_add (*data_vector, p->pending_event_data_by_type_index[t], l); - _vec_len (p->pending_event_data_by_type_index[t]) = 0; + vec_set_len (p->pending_event_data_by_type_index[t], 0); et = pool_elt_at_index (p->event_type_pool, t); @@ -625,7 +732,7 @@ vlib_process_get_events_helper (vlib_process_t * p, uword t, l = _vec_len (p->pending_event_data_by_type_index[t]); if (data_vector) vec_add (*data_vector, p->pending_event_data_by_type_index[t], l); - _vec_len (p->pending_event_data_by_type_index[t]) = 0; + vec_set_len (p->pending_event_data_by_type_index[t], 0); vlib_process_maybe_free_event_type (p, t); @@ -832,7 +939,8 @@ vlib_process_signal_event_helper (vlib_node_main_t * nm, uword n_data_elts, uword n_data_elt_bytes) { uword p_flags, add_to_pending, delete_from_wheel; - void *data_to_be_written_by_caller; + u8 *data_to_be_written_by_caller; + vec_attr_t va = { .elt_sz = n_data_elt_bytes }; ASSERT (n->type == VLIB_NODE_TYPE_PROCESS); @@ -842,7 +950,7 @@ vlib_process_signal_event_helper (vlib_node_main_t * nm, /* Resize data vector and return caller's data to be written. */ { - void *data_vec = p->pending_event_data_by_type_index[t]; + u8 *data_vec = p->pending_event_data_by_type_index[t]; uword l; if (!data_vec && vec_len (nm->recycled_event_data_vectors)) @@ -853,11 +961,7 @@ vlib_process_signal_event_helper (vlib_node_main_t * nm, l = vec_len (data_vec); - data_vec = _vec_resize (data_vec, - /* length_increment */ n_data_elts, - /* total size after increment */ - (l + n_data_elts) * n_data_elt_bytes, - /* header_bytes */ 0, /* data_align */ 0); + data_vec = _vec_realloc_internal (data_vec, l + n_data_elts, &va); p->pending_event_data_by_type_index[t] = data_vec; data_to_be_written_by_caller = data_vec + l * n_data_elt_bytes; @@ -902,8 +1006,11 @@ vlib_process_signal_event_helper (vlib_node_main_t * nm, p->flags = p_flags | VLIB_PROCESS_RESUME_PENDING; vec_add1 (nm->data_from_advancing_timing_wheel, x); if (delete_from_wheel) - TW (tw_timer_stop) ((TWT (tw_timer_wheel) *) nm->timing_wheel, - p->stop_timer_handle); + { + TW (tw_timer_stop) + ((TWT (tw_timer_wheel) *) nm->timing_wheel, p->stop_timer_handle); + p->stop_timer_handle = ~0; + } } return data_to_be_written_by_caller; @@ -1161,8 +1268,7 @@ vlib_node_vectors_per_main_loop_as_integer (vlib_main_t * vm, u32 node_index) return v >> VLIB_LOG2_MAIN_LOOPS_PER_STATS_UPDATE; } -void -vlib_frame_free (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_frame_t * f); +void vlib_frame_free (vlib_main_t *vm, vlib_frame_t *f); /* Return the edge index if present, ~0 otherwise */ uword vlib_node_get_next (vlib_main_t * vm, uword node, uword next_node); @@ -1208,7 +1314,8 @@ void vlib_node_rename (vlib_main_t * vm, u32 node_index, char *fmt, ...); /* Register new packet processing node. Nodes can be registered dynamically via this call or statically via the VLIB_REGISTER_NODE macro. */ -u32 vlib_register_node (vlib_main_t * vm, vlib_node_registration_t * r); +u32 vlib_register_node (vlib_main_t *vm, vlib_node_registration_t *r, + char *fmt, ...); /* Register all node function variants */ void vlib_register_all_node_march_variants (vlib_main_t *vm); @@ -1276,6 +1383,121 @@ vlib_node_function_t * vlib_node_get_preferred_node_fn_variant (vlib_main_t *vm, vlib_node_fn_registration_t *regs); +/* + * vlib_frame_bitmap functions + */ + +#define VLIB_FRAME_BITMAP_N_UWORDS \ + (((VLIB_FRAME_SIZE + uword_bits - 1) & ~(uword_bits - 1)) / uword_bits) + +typedef uword vlib_frame_bitmap_t[VLIB_FRAME_BITMAP_N_UWORDS]; + +static_always_inline void +vlib_frame_bitmap_init (uword *bmp, u32 n_first_bits_set) +{ + u32 n_left = VLIB_FRAME_BITMAP_N_UWORDS; + while (n_first_bits_set >= (sizeof (uword) * 8) && n_left) + { + bmp++[0] = ~0; + n_first_bits_set -= sizeof (uword) * 8; + n_left--; + } + + if (n_first_bits_set && n_left) + { + bmp++[0] = pow2_mask (n_first_bits_set); + n_left--; + } + + while (n_left--) + bmp++[0] = 0; +} + +static_always_inline void +vlib_frame_bitmap_set_bit_at_index (uword *bmp, uword bit_index) +{ + uword_bitmap_set_bits_at_index (bmp, bit_index, 1); +} + +static_always_inline void +_vlib_frame_bitmap_clear_bit_at_index (uword *bmp, uword bit_index) +{ + uword_bitmap_clear_bits_at_index (bmp, bit_index, 1); +} + +static_always_inline void +vlib_frame_bitmap_set_bits_at_index (uword *bmp, uword bit_index, uword n_bits) +{ + uword_bitmap_set_bits_at_index (bmp, bit_index, n_bits); +} + +static_always_inline void +vlib_frame_bitmap_clear_bits_at_index (uword *bmp, uword bit_index, + uword n_bits) +{ + uword_bitmap_clear_bits_at_index (bmp, bit_index, n_bits); +} + +static_always_inline void +vlib_frame_bitmap_clear (uword *bmp) +{ + u32 n_left = VLIB_FRAME_BITMAP_N_UWORDS; + while (n_left--) + bmp++[0] = 0; +} + +static_always_inline void +vlib_frame_bitmap_xor (uword *bmp, uword *bmp2) +{ + u32 n_left = VLIB_FRAME_BITMAP_N_UWORDS; + while (n_left--) + bmp++[0] ^= bmp2++[0]; +} + +static_always_inline void +vlib_frame_bitmap_or (uword *bmp, uword *bmp2) +{ + u32 n_left = VLIB_FRAME_BITMAP_N_UWORDS; + while (n_left--) + bmp++[0] |= bmp2++[0]; +} + +static_always_inline void +vlib_frame_bitmap_and (uword *bmp, uword *bmp2) +{ + u32 n_left = VLIB_FRAME_BITMAP_N_UWORDS; + while (n_left--) + bmp++[0] &= bmp2++[0]; +} + +static_always_inline uword +vlib_frame_bitmap_count_set_bits (uword *bmp) +{ + return uword_bitmap_count_set_bits (bmp, VLIB_FRAME_BITMAP_N_UWORDS); +} + +static_always_inline uword +vlib_frame_bitmap_is_bit_set (uword *bmp, uword bit_index) +{ + return uword_bitmap_is_bit_set (bmp, bit_index); +} + +static_always_inline uword +vlib_frame_bitmap_find_first_set (uword *bmp) +{ + uword rv = uword_bitmap_find_first_set (bmp); + ASSERT (rv < VLIB_FRAME_BITMAP_N_UWORDS * uword_bits); + return rv; +} + +#define foreach_vlib_frame_bitmap_set_bit_index(i, v) \ + for (uword _off = 0; _off < ARRAY_LEN (v); _off++) \ + for (uword _tmp = \ + (v[_off]) + 0 * (uword) (i = _off * uword_bits + \ + get_lowest_set_bit_index (v[_off])); \ + _tmp; i = _off * uword_bits + get_lowest_set_bit_index ( \ + _tmp = clear_lowest_set_bit (_tmp))) + #endif /* included_vlib_node_funcs_h */ /* diff --git a/src/vlib/pci/pci.c b/src/vlib/pci/pci.c index 1c1f4b636f5..7284e6675fd 100644 --- a/src/vlib/pci/pci.c +++ b/src/vlib/pci/pci.c @@ -47,26 +47,166 @@ #include <dirent.h> #include <sys/ioctl.h> #include <net/if.h> +#ifdef __linux__ #include <linux/ethtool.h> #include <linux/sockios.h> +#endif /* __linux__ */ vlib_pci_main_t pci_main; -vlib_pci_device_info_t * __attribute__ ((weak)) -vlib_pci_get_device_info (vlib_main_t * vm, vlib_pci_addr_t * addr, - clib_error_t ** error) +VLIB_REGISTER_LOG_CLASS (pci_log, static) = { + .class_name = "pci", +}; + +#define log_debug(h, f, ...) \ + vlib_log (VLIB_LOG_LEVEL_DEBUG, pci_log.class, "%U: " f, \ + format_vlib_pci_log, h, ##__VA_ARGS__) + +u8 * +format_vlib_pci_log (u8 *s, va_list *va) +{ + vlib_pci_dev_handle_t h = va_arg (*va, vlib_pci_dev_handle_t); + return format (s, "%U", format_vlib_pci_addr, + vlib_pci_get_addr (vlib_get_main (), h)); +} + +vlib_pci_device_info_t *__attribute__ ((weak)) +vlib_pci_get_device_info (vlib_main_t *vm, vlib_pci_addr_t *addr, + clib_error_t **error) { if (error) *error = clib_error_return (0, "unsupported"); return 0; } +clib_error_t *__attribute__ ((weak)) +vlib_pci_get_device_root_bus (vlib_pci_addr_t *addr, vlib_pci_addr_t *root_bus) +{ + return 0; +} + vlib_pci_addr_t * __attribute__ ((weak)) vlib_pci_get_all_dev_addrs () { return 0; } static clib_error_t * +_vlib_pci_config_set_control_bit (vlib_main_t *vm, vlib_pci_dev_handle_t h, + u16 bit, int new_val, int *already_set) +{ + u16 control, old; + clib_error_t *err; + + err = vlib_pci_read_write_config ( + vm, h, VLIB_READ, STRUCT_OFFSET_OF (vlib_pci_config_t, command), &old, + STRUCT_SIZE_OF (vlib_pci_config_t, command)); + + if (err) + return err; + + control = new_val ? old | bit : old & ~bit; + *already_set = old == control; + if (*already_set) + return 0; + + return vlib_pci_read_write_config ( + vm, h, VLIB_WRITE, STRUCT_OFFSET_OF (vlib_pci_config_t, command), &control, + STRUCT_SIZE_OF (vlib_pci_config_t, command)); +} + +clib_error_t * +vlib_pci_intr_enable (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + const vlib_pci_config_reg_command_t cmd = { .intx_disable = 1 }; + clib_error_t *err; + int already_set = 0; + + err = _vlib_pci_config_set_control_bit (vm, h, cmd.as_u16, 0, &already_set); + log_debug (h, "interrupt%senabled", already_set ? " " : " already "); + return err; +} + +clib_error_t * +vlib_pci_intr_disable (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + const vlib_pci_config_reg_command_t cmd = { .intx_disable = 1 }; + clib_error_t *err; + int already_set = 0; + + err = _vlib_pci_config_set_control_bit (vm, h, cmd.as_u16, 1, &already_set); + log_debug (h, "interrupt%sdisabled", already_set ? " " : " already "); + return err; +} + +clib_error_t * +vlib_pci_bus_master_enable (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + const vlib_pci_config_reg_command_t cmd = { .bus_master = 1 }; + clib_error_t *err; + int already_set = 0; + + err = _vlib_pci_config_set_control_bit (vm, h, cmd.as_u16, 1, &already_set); + log_debug (h, "bus-master%senabled", already_set ? " " : " already "); + return err; +} + +clib_error_t * +vlib_pci_bus_master_disable (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + const vlib_pci_config_reg_command_t cmd = { .bus_master = 1 }; + clib_error_t *err; + int already_set = 0; + + err = _vlib_pci_config_set_control_bit (vm, h, cmd.as_u16, 0, &already_set); + log_debug (h, "bus-master%sdisabled", already_set ? " " : " already "); + return err; +} + +clib_error_t * +vlib_pci_function_level_reset (vlib_main_t *vm, vlib_pci_dev_handle_t h) +{ + vlib_pci_config_t cfg; + pci_capability_pcie_t *cap; + pci_capability_pcie_dev_control_t dev_control; + clib_error_t *err; + u8 offset; + + log_debug (h, "function level reset"); + + err = vlib_pci_read_write_config (vm, h, VLIB_READ, 0, &cfg, sizeof (cfg)); + if (err) + return err; + + offset = cfg.cap_ptr; + do + { + cap = (pci_capability_pcie_t *) (cfg.data + offset); + + if (cap->capability_id == PCI_CAP_ID_PCIE) + break; + + offset = cap->next_offset; + } + while (offset); + + if (cap->capability_id != PCI_CAP_ID_PCIE) + return clib_error_return (0, "PCIe capability config not found"); + + if (cap->dev_caps.flr_capable == 0) + return clib_error_return (0, "PCIe function level reset not supported"); + + dev_control = cap->dev_control; + dev_control.function_level_reset = 1; + + if ((err = vlib_pci_write_config_u16 ( + vm, h, offset + STRUCT_OFFSET_OF (pci_capability_pcie_t, dev_control), + &dev_control.as_u16))) + return err; + + return 0; +} + +static clib_error_t * show_pci_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { @@ -143,27 +283,56 @@ format_vlib_pci_addr (u8 * s, va_list * va) } u8 * -format_vlib_pci_link_speed (u8 * s, va_list * va) +format_vlib_pci_link_port (u8 *s, va_list *va) +{ + vlib_pci_config_t *c = va_arg (*va, vlib_pci_config_t *); + pci_capability_pcie_t *r = pci_config_find_capability (c, PCI_CAP_ID_PCIE); + + if (!r) + return format (s, "unknown"); + + return format (s, "P%d", r->link_caps.port_number); +} + +static u8 * +_vlib_pci_link_speed (u8 *s, u8 speed, u8 width) +{ + static char *speeds[] = { + [1] = "2.5", [2] = "5.0", [3] = "8.0", [4] = "16.0", [5] = "32.0" + }; + + if (speed >= ARRAY_LEN (speeds) || speeds[speed] == 0) + s = format (s, "unknown speed"); + else + s = format (s, "%s GT/s", speeds[speed]); + + return format (s, " x%u", width); +} + +u8 * +format_vlib_pci_link_speed (u8 *s, va_list *va) +{ + vlib_pci_config_t *c = va_arg (*va, vlib_pci_config_t *); + pci_capability_pcie_t *r = pci_config_find_capability (c, PCI_CAP_ID_PCIE); + + if (!r) + return format (s, "unknown"); + + return _vlib_pci_link_speed (s, r->link_status.link_speed, + r->link_status.negotiated_link_width); +} + +u8 * +format_vlib_pci_link_speed_cap (u8 *s, va_list *va) { - vlib_pci_device_info_t *d = va_arg (*va, vlib_pci_device_info_t *); - pcie_config_regs_t *r = - pci_config_find_capability (&d->config0, PCI_CAP_ID_PCIE); - int width; + vlib_pci_config_t *c = va_arg (*va, vlib_pci_config_t *); + pci_capability_pcie_t *r = pci_config_find_capability (c, PCI_CAP_ID_PCIE); if (!r) return format (s, "unknown"); - width = (r->link_status >> 4) & 0x3f; - - if ((r->link_status & 0xf) == 1) - return format (s, "2.5 GT/s x%u", width); - if ((r->link_status & 0xf) == 2) - return format (s, "5.0 GT/s x%u", width); - if ((r->link_status & 0xf) == 3) - return format (s, "8.0 GT/s x%u", width); - if ((r->link_status & 0xf) == 4) - return format (s, "16.0 GT/s x%u", width); - return format (s, "unknown"); + return _vlib_pci_link_speed (s, r->link_caps.max_link_speed, + r->link_caps.max_link_width); } u8 * @@ -238,29 +407,8 @@ format_vlib_pci_vpd (u8 * s, va_list * args) return s; } - -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_pci_command, static) = { .path = "show pci", .short_help = "show pci [all]", .function = show_pci_fn, }; -/* *INDENT-ON* */ - -clib_error_t * -pci_bus_init (vlib_main_t * vm) -{ - vlib_pci_main_t *pm = &pci_main; - pm->log_default = vlib_log_register_class ("pci", 0); - return 0; -} - -VLIB_INIT_FUNCTION (pci_bus_init); - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/pci/pci.h b/src/vlib/pci/pci.h index 5aae597f825..becfa80f37a 100644 --- a/src/vlib/pci/pci.h +++ b/src/vlib/pci/pci.h @@ -43,7 +43,6 @@ #include <vlib/vlib.h> #include <vlib/pci/pci_config.h> -/* *INDENT-OFF* */ typedef CLIB_PACKED (union { struct @@ -55,7 +54,6 @@ typedef CLIB_PACKED (union }; u32 as_u32; }) vlib_pci_addr_t; -/* *INDENT-ON* */ typedef struct vlib_pci_device_info { @@ -72,6 +70,7 @@ typedef struct vlib_pci_device_info u16 device_class; u16 vendor_id; u16 device_id; + u8 revision; /* Vital Product Data */ u8 *product_name; @@ -82,12 +81,7 @@ typedef struct vlib_pci_device_info u8 *driver_name; /* First 64 bytes of configuration space. */ - union - { - pci_config_type0_regs_t config0; - pci_config_type1_regs_t config1; - u8 config_data[256]; - }; + vlib_pci_config_t config; /* IOMMU Group */ int iommu_group; @@ -96,9 +90,11 @@ typedef struct vlib_pci_device_info typedef u32 vlib_pci_dev_handle_t; -vlib_pci_device_info_t *vlib_pci_get_device_info (vlib_main_t * vm, - vlib_pci_addr_t * addr, - clib_error_t ** error); +vlib_pci_device_info_t *vlib_pci_get_device_info (vlib_main_t *vm, + vlib_pci_addr_t *addr, + clib_error_t **error); +clib_error_t *vlib_pci_get_device_root_bus (vlib_pci_addr_t *addr, + vlib_pci_addr_t *root_bus); vlib_pci_addr_t *vlib_pci_get_all_dev_addrs (); vlib_pci_addr_t *vlib_pci_get_addr (vlib_main_t * vm, vlib_pci_dev_handle_t h); @@ -127,6 +123,12 @@ typedef struct u16 vendor_id, device_id; } pci_device_id_t; +#define PCI_DEVICE_IDS(...) \ + (pci_device_id_t[]) \ + { \ + __VA_ARGS__, {} \ + } + typedef void (pci_intx_handler_function_t) (vlib_main_t * vm, vlib_pci_dev_handle_t handle); typedef void (pci_msix_handler_function_t) (vlib_main_t * vm, @@ -180,8 +182,8 @@ static void __vlib_rm_pci_device_registration_##x (void) \ } \ __VA_ARGS__ pci_device_registration_t x -clib_error_t *vlib_pci_bind_to_uio (vlib_main_t * vm, vlib_pci_addr_t * addr, - char *uio_driver_name); +clib_error_t *vlib_pci_bind_to_uio (vlib_main_t *vm, vlib_pci_addr_t *addr, + char *uio_driver_name, int force); /* Configuration space read/write. */ clib_error_t *vlib_pci_read_write_config (vlib_main_t * vm, @@ -196,15 +198,19 @@ clib_error_t *vlib_pci_read_write_io (vlib_main_t * vm, vlib_read_or_write_t read_or_write, uword address, void *data, u32 n_bytes); - -#define _(t, x) \ -static inline clib_error_t * \ -vlib_pci_read_##x##_##t (vlib_main_t *vm, vlib_pci_dev_handle_t h, \ - uword address, t * data) \ -{ \ - return vlib_pci_read_write_##x (vm, h, VLIB_READ,address, data, \ - sizeof (data[0])); \ -} +#define _(t, x) \ + static inline clib_error_t *vlib_pci_read_##x##_##t ( \ + vlib_main_t *vm, vlib_pci_dev_handle_t h, uword address, t *data) \ + { \ + return vlib_pci_read_write_##x (vm, h, VLIB_READ, address, data, \ + sizeof (data[0])); \ + } \ + static inline clib_error_t *vlib_pci_write_##x##_##t ( \ + vlib_main_t *vm, vlib_pci_dev_handle_t h, uword address, t *data) \ + { \ + return vlib_pci_read_write_##x (vm, h, VLIB_WRITE, address, data, \ + sizeof (data[0])); \ + } _(u32, config); _(u16, config); @@ -216,77 +222,6 @@ _(u8, io); #undef _ -#define _(t, x) \ -static inline clib_error_t * \ -vlib_pci_write_##x##_##t (vlib_main_t *vm, vlib_pci_dev_handle_t h, \ - uword address, t * data) \ -{ \ - return vlib_pci_read_write_##x (vm, h, VLIB_WRITE, \ - address, data, sizeof (data[0])); \ -} - -_(u32, config); -_(u16, config); -_(u8, config); - -_(u32, io); -_(u16, io); -_(u8, io); - -#undef _ - -static inline clib_error_t * -vlib_pci_intr_enable (vlib_main_t * vm, vlib_pci_dev_handle_t h) -{ - u16 command; - clib_error_t *err; - - err = vlib_pci_read_config_u16 (vm, h, 4, &command); - - if (err) - return err; - - command &= ~PCI_COMMAND_INTX_DISABLE; - - return vlib_pci_write_config_u16 (vm, h, 4, &command); -} - -static inline clib_error_t * -vlib_pci_intr_disable (vlib_main_t * vm, vlib_pci_dev_handle_t h) -{ - u16 command; - clib_error_t *err; - - err = vlib_pci_read_config_u16 (vm, h, 4, &command); - - if (err) - return err; - - command |= PCI_COMMAND_INTX_DISABLE; - - return vlib_pci_write_config_u16 (vm, h, 4, &command); -} - -static inline clib_error_t * -vlib_pci_bus_master_enable (vlib_main_t * vm, vlib_pci_dev_handle_t h) -{ - clib_error_t *err; - u16 command; - - /* Set bus master enable (BME) */ - err = vlib_pci_read_config_u16 (vm, h, 4, &command); - - if (err) - return err; - - if (command & PCI_COMMAND_BUS_MASTER) - return 0; - - command |= PCI_COMMAND_BUS_MASTER; - - return vlib_pci_write_config_u16 (vm, h, 4, &command); -} - clib_error_t *vlib_pci_device_open (vlib_main_t * vm, vlib_pci_addr_t * addr, pci_device_id_t ids[], vlib_pci_dev_handle_t * handle); @@ -303,11 +238,16 @@ clib_error_t *vlib_pci_register_intx_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, pci_intx_handler_function_t * intx_handler); +clib_error_t *vlib_pci_unregister_intx_handler (vlib_main_t *vm, + vlib_pci_dev_handle_t h); clib_error_t *vlib_pci_register_msix_handler (vlib_main_t * vm, vlib_pci_dev_handle_t h, u32 start, u32 count, pci_msix_handler_function_t * msix_handler); +clib_error_t *vlib_pci_unregister_msix_handler (vlib_main_t *vm, + vlib_pci_dev_handle_t h, + u32 start, u32 count); clib_error_t *vlib_pci_enable_msix_irq (vlib_main_t * vm, vlib_pci_dev_handle_t h, u16 start, u16 count); @@ -321,11 +261,22 @@ uword vlib_pci_get_msix_file_index (vlib_main_t * vm, vlib_pci_dev_handle_t h, int vlib_pci_supports_virtual_addr_dma (vlib_main_t * vm, vlib_pci_dev_handle_t h); +clib_error_t *vlib_pci_intr_enable (vlib_main_t *, vlib_pci_dev_handle_t); +clib_error_t *vlib_pci_intr_disable (vlib_main_t *, vlib_pci_dev_handle_t); +clib_error_t *vlib_pci_bus_master_enable (vlib_main_t *, + vlib_pci_dev_handle_t); +clib_error_t *vlib_pci_bus_master_disable (vlib_main_t *, + vlib_pci_dev_handle_t); +clib_error_t *vlib_pci_function_level_reset (vlib_main_t *, + vlib_pci_dev_handle_t); unformat_function_t unformat_vlib_pci_addr; format_function_t format_vlib_pci_addr; format_function_t format_vlib_pci_link_speed; +format_function_t format_vlib_pci_link_speed_cap; +format_function_t format_vlib_pci_link_port; format_function_t format_vlib_pci_vpd; +format_function_t format_vlib_pci_log; #endif /* included_vlib_pci_h */ diff --git a/src/vlib/pci/pci_config.h b/src/vlib/pci/pci_config.h index b4c38eb53e4..21b40c0f499 100644 --- a/src/vlib/pci/pci_config.h +++ b/src/vlib/pci/pci_config.h @@ -168,210 +168,114 @@ pci_device_class_base (pci_device_class_t c) #define VIRTIO_PCI_LEGACY_DEVICEID_NET 0x1000 #define VIRTIO_PCI_MODERN_DEVICEID_NET 0x1041 -/* - * Under PCI, each device has 256 bytes of configuration address space, - * of which the first 64 bytes are standardized as follows: - */ -typedef struct +typedef union { - u16 vendor_id; - u16 device_id; + struct + { + u16 io_space : 1; + u16 mem_space : 1; + u16 bus_master : 1; + u16 special_cycles : 1; + u16 mem_write_invalidate : 1; + u16 vga_palette_snoop : 1; + u16 parity_err_resp : 1; + u16 _reserved_7 : 1; + u16 serr_enable : 1; + u16 fast_b2b_enable : 1; + u16 intx_disable : 1; + u16 _reserved_11 : 5; + }; + u16 as_u16; +} vlib_pci_config_reg_command_t; + +typedef union +{ + struct + { + u16 _reserved_0 : 3; + u16 intx_status : 1; + u16 capabilities_list : 1; + u16 capaable_66mhz : 1; + u16 _reserved_6 : 1; + u16 fast_b2b_capable : 1; + u16 master_data_parity_error : 1; + u16 devsel_timing : 2; + u16 sig_target_abort : 1; + u16 rec_target_abort : 1; + u16 rec_master_abort : 1; + u16 sig_system_err : 1; + u16 detected_parity_err : 1; + }; + u16 as_u16; +} vlib_pci_config_reg_status_t; - u16 command; -#define PCI_COMMAND_IO (1 << 0) /* Enable response in I/O space */ -#define PCI_COMMAND_MEMORY (1 << 1) /* Enable response in Memory space */ -#define PCI_COMMAND_BUS_MASTER (1 << 2) /* Enable bus mastering */ -#define PCI_COMMAND_SPECIAL (1 << 3) /* Enable response to special cycles */ -#define PCI_COMMAND_WRITE_INVALIDATE (1 << 4) /* Use memory write and invalidate */ -#define PCI_COMMAND_VGA_PALETTE_SNOOP (1 << 5) -#define PCI_COMMAND_PARITY (1 << 6) -#define PCI_COMMAND_WAIT (1 << 7) /* Enable address/data stepping */ -#define PCI_COMMAND_SERR (1 << 8) /* Enable SERR */ -#define PCI_COMMAND_BACK_TO_BACK_WRITE (1 << 9) -#define PCI_COMMAND_INTX_DISABLE (1 << 10) /* INTx Emulation Disable */ - - u16 status; -#define PCI_STATUS_INTX_PENDING (1 << 3) -#define PCI_STATUS_CAPABILITY_LIST (1 << 4) -#define PCI_STATUS_66MHZ (1 << 5) /* Support 66 Mhz PCI 2.1 bus */ -#define PCI_STATUS_UDF (1 << 6) /* Support User Definable Features (obsolete) */ -#define PCI_STATUS_BACK_TO_BACK_WRITE (1 << 7) /* Accept fast-back to back */ -#define PCI_STATUS_PARITY_ERROR (1 << 8) /* Detected parity error */ -#define PCI_STATUS_DEVSEL_GET(x) ((x >> 9) & 3) /* DEVSEL timing */ -#define PCI_STATUS_DEVSEL_FAST (0 << 9) -#define PCI_STATUS_DEVSEL_MEDIUM (1 << 9) -#define PCI_STATUS_DEVSEL_SLOW (2 << 9) -#define PCI_STATUS_SIG_TARGET_ABORT (1 << 11) /* Set on target abort */ -#define PCI_STATUS_REC_TARGET_ABORT (1 << 12) /* Master ack of " */ -#define PCI_STATUS_REC_MASTER_ABORT (1 << 13) /* Set on master abort */ -#define PCI_STATUS_SIG_SYSTEM_ERROR (1 << 14) /* Set when we drive SERR */ -#define PCI_STATUS_DETECTED_PARITY_ERROR (1 << 15) - - u8 revision_id; - u8 programming_interface_class; /* Reg. Level Programming Interface */ - - pci_device_class_t device_class:16; - - u8 cache_size; - u8 latency_timer; - - u8 header_type; -#define PCI_HEADER_TYPE_NORMAL 0 -#define PCI_HEADER_TYPE_BRIDGE 1 -#define PCI_HEADER_TYPE_CARDBUS 2 - - u8 bist; -#define PCI_BIST_CODE_MASK 0x0f /* Return result */ -#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */ -#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */ -} pci_config_header_t; - -/* Byte swap config header. */ -always_inline void -pci_config_header_little_to_host (pci_config_header_t * r) +typedef enum { - if (!CLIB_ARCH_IS_BIG_ENDIAN) - return; -#define _(f,t) r->f = clib_byte_swap_##t (r->f) - _(vendor_id, u16); - _(device_id, u16); - _(command, u16); - _(status, u16); - _(device_class, u16); -#undef _ -} + PCI_HEADER_TYPE_NORMAL = 0, + PCI_HEADER_TYPE_BRIDGE = 1, + PCI_HEADER_TYPE_CARDBUS = 2 +} __clib_packed pci_config_header_type_t; + +#define foreach_pci_config_reg \ + _ (u16, vendor_id) \ + _ (u16, device_id) \ + _ (vlib_pci_config_reg_command_t, command) \ + _ (vlib_pci_config_reg_status_t, status) \ + _ (u8, revision_id) \ + _ (u8, prog_if) \ + _ (u8, subclass) \ + _ (u8, class) \ + _ (u8, cache_line_size) \ + _ (u8, latency_timer) \ + _ (pci_config_header_type_t, header_type) \ + _ (u8, bist) \ + _ (u32, bar, [6]) \ + _ (u32, cardbus_cis_ptr) \ + _ (u16, sub_vendor_id) \ + _ (u16, sub_device_id) \ + _ (u32, exp_rom_base_addr) \ + _ (u8, cap_ptr) \ + _ (u8, _reserved_0x35, [3]) \ + _ (u32, _reserved_0x38) \ + _ (u8, intr_line) \ + _ (u8, intr_pin) \ + _ (u8, min_grant) \ + _ (u8, max_latency) -/* Header type 0 (normal devices) */ typedef struct { - pci_config_header_t header; - - /* - * Base addresses specify locations in memory or I/O space. - * Decoded size can be determined by writing a value of - * 0xffffffff to the register, and reading it back. Only - * 1 bits are decoded. - */ - u32 base_address[6]; - - u16 cardbus_cis; - - u16 subsystem_vendor_id; - u16 subsystem_id; - - u32 rom_address; -#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ -#define PCI_ROM_ADDRESS_ENABLE 0x01 -#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) - - u8 first_capability_offset; - CLIB_PAD_FROM_TO (0x35, 0x3c); - - u8 interrupt_line; - u8 interrupt_pin; - u8 min_grant; - u8 max_latency; +#define _(a, b, ...) a b __VA_ARGS__; + foreach_pci_config_reg +#undef _ +} vlib_pci_config_hdr_t; - u8 capability_data[0]; -} pci_config_type0_regs_t; +STATIC_ASSERT_SIZEOF (vlib_pci_config_hdr_t, 64); -always_inline void -pci_config_type0_little_to_host (pci_config_type0_regs_t * r) +typedef union { - int i; - if (!CLIB_ARCH_IS_BIG_ENDIAN) - return; - pci_config_header_little_to_host (&r->header); -#define _(f,t) r->f = clib_byte_swap_##t (r->f) - for (i = 0; i < ARRAY_LEN (r->base_address); i++) - _(base_address[i], u32); - _(cardbus_cis, u16); - _(subsystem_vendor_id, u16); - _(subsystem_id, u16); - _(rom_address, u32); + struct + { +#define _(a, b, ...) a b __VA_ARGS__; + foreach_pci_config_reg #undef _ -} - -/* Header type 1 (PCI-to-PCI bridges) */ -typedef struct -{ - pci_config_header_t header; - - u32 base_address[2]; - - /* Primary/secondary bus number. */ - u8 primary_bus; - u8 secondary_bus; - - /* Highest bus number behind the bridge */ - u8 subordinate_bus; - - u8 secondary_bus_latency_timer; - - /* I/O range behind bridge. */ - u8 io_base, io_limit; + }; + u8 data[256]; +} vlib_pci_config_t; - /* Secondary status register, only bit 14 used */ - u16 secondary_status; +STATIC_ASSERT_SIZEOF (vlib_pci_config_t, 256); - /* Memory range behind bridge in units of 64k bytes. */ - u16 memory_base, memory_limit; -#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL -#define PCI_MEMORY_RANGE_MASK (~0x0fUL) - - u16 prefetchable_memory_base, prefetchable_memory_limit; -#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL -#define PCI_PREF_RANGE_TYPE_32 0x00 -#define PCI_PREF_RANGE_TYPE_64 0x01 -#define PCI_PREF_RANGE_MASK (~0x0fUL) - - u32 prefetchable_memory_base_upper_32bits; - u32 prefetchable_memory_limit_upper_32bits; - u16 io_base_upper_16bits; - u16 io_limit_upper_16bits; - - /* Same as for type 0. */ - u8 capability_list_offset; - CLIB_PAD_FROM_TO (0x35, 0x37); - - u32 rom_address; - CLIB_PAD_FROM_TO (0x3c, 0x3e); - - u16 bridge_control; -#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */ -#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */ -#define PCI_BRIDGE_CTL_NO_ISA 0x04 /* Disable bridging of ISA ports */ -#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */ -#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */ -#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */ -#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */ - - u8 capability_data[0]; -} pci_config_type1_regs_t; - -always_inline void -pci_config_type1_little_to_host (pci_config_type1_regs_t * r) +typedef union { - int i; - if (!CLIB_ARCH_IS_BIG_ENDIAN) - return; - pci_config_header_little_to_host (&r->header); -#define _(f,t) r->f = clib_byte_swap_##t (r->f) - for (i = 0; i < ARRAY_LEN (r->base_address); i++) - _(base_address[i], u32); - _(secondary_status, u16); - _(memory_base, u16); - _(memory_limit, u16); - _(prefetchable_memory_base, u16); - _(prefetchable_memory_limit, u16); - _(prefetchable_memory_base_upper_32bits, u32); - _(prefetchable_memory_limit_upper_32bits, u32); - _(io_base_upper_16bits, u16); - _(io_limit_upper_16bits, u16); - _(rom_address, u32); - _(bridge_control, u16); + struct + { +#define _(a, b, ...) a b __VA_ARGS__; + foreach_pci_config_reg #undef _ -} + }; + u8 data[4096]; +} vlib_pci_config_ext_t; + +STATIC_ASSERT_SIZEOF (vlib_pci_config_ext_t, 4096); /* Capabilities. */ typedef enum pci_capability_type @@ -418,16 +322,16 @@ typedef struct } __clib_packed pci_capability_regs_t; always_inline void * -pci_config_find_capability (pci_config_type0_regs_t * t, int cap_type) +pci_config_find_capability (vlib_pci_config_t *t, int cap_type) { pci_capability_regs_t *c; u32 next_offset; u32 ttl = 48; - if (!(t->header.status & PCI_STATUS_CAPABILITY_LIST)) + if (!(t->status.capabilities_list)) return 0; - next_offset = t->first_capability_offset; + next_offset = t->cap_ptr; while (ttl-- && next_offset >= 0x40) { c = (void *) t + (next_offset & ~3); @@ -592,77 +496,6 @@ pcie_code_to_size (int code) return size; } -/* PCI Express capability registers */ -typedef struct -{ - pci_capability_regs_t header; - u16 pcie_capabilities; -#define PCIE_CAP_VERSION(x) (((x) >> 0) & 0xf) -#define PCIE_CAP_DEVICE_TYPE(x) (((x) >> 4) & 0xf) -#define PCIE_DEVICE_TYPE_ENDPOINT 0 -#define PCIE_DEVICE_TYPE_LEGACY_ENDPOINT 1 -#define PCIE_DEVICE_TYPE_ROOT_PORT 4 - /* Upstream/downstream port of PCI Express switch. */ -#define PCIE_DEVICE_TYPE_SWITCH_UPSTREAM 5 -#define PCIE_DEVICE_TYPE_SWITCH_DOWNSTREAM 6 -#define PCIE_DEVICE_TYPE_PCIE_TO_PCI_BRIDGE 7 -#define PCIE_DEVICE_TYPE_PCI_TO_PCIE_BRIDGE 8 - /* Root complex integrated endpoint. */ -#define PCIE_DEVICE_TYPE_ROOT_COMPLEX_ENDPOINT 9 -#define PCIE_DEVICE_TYPE_ROOT_COMPLEX_EVENT_COLLECTOR 10 -#define PCIE_CAP_SLOW_IMPLEMENTED (1 << 8) -#define PCIE_CAP_MSI_IRQ(x) (((x) >> 9) & 0x1f) - u32 dev_capabilities; -#define PCIE_DEVCAP_MAX_PAYLOAD(x) (128 << (((x) >> 0) & 0x7)) -#define PCIE_DEVCAP_PHANTOM_BITS(x) (((x) >> 3) & 0x3) -#define PCIE_DEVCAP_EXTENTED_TAG (1 << 5) -#define PCIE_DEVCAP_L0S 0x1c0 /* L0s Acceptable Latency */ -#define PCIE_DEVCAP_L1 0xe00 /* L1 Acceptable Latency */ -#define PCIE_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */ -#define PCIE_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */ -#define PCIE_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */ -#define PCIE_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */ -#define PCIE_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */ - u16 dev_control; -#define PCIE_CTRL_CERE 0x0001 /* Correctable Error Reporting En. */ -#define PCIE_CTRL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ -#define PCIE_CTRL_FERE 0x0004 /* Fatal Error Reporting Enable */ -#define PCIE_CTRL_URRE 0x0008 /* Unsupported Request Reporting En. */ -#define PCIE_CTRL_RELAX_EN 0x0010 /* Enable relaxed ordering */ -#define PCIE_CTRL_MAX_PAYLOAD(n) (((n) & 7) << 5) -#define PCIE_CTRL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ -#define PCIE_CTRL_PHANTOM 0x0200 /* Phantom Functions Enable */ -#define PCIE_CTRL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ -#define PCIE_CTRL_NOSNOOP_EN 0x0800 /* Enable No Snoop */ -#define PCIE_CTRL_MAX_READ_REQUEST(n) (((n) & 7) << 12) - u16 dev_status; -#define PCIE_DEVSTA_AUXPD 0x10 /* AUX Power Detected */ -#define PCIE_DEVSTA_TRPND 0x20 /* Transactions Pending */ - u32 link_capabilities; - u16 link_control; - u16 link_status; - u32 slot_capabilities; - u16 slot_control; - u16 slot_status; - u16 root_control; -#define PCIE_RTCTL_SECEE 0x01 /* System Error on Correctable Error */ -#define PCIE_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */ -#define PCIE_RTCTL_SEFEE 0x04 /* System Error on Fatal Error */ -#define PCIE_RTCTL_PMEIE 0x08 /* PME Interrupt Enable */ -#define PCIE_RTCTL_CRSSVE 0x10 /* CRS Software Visibility Enable */ - u16 root_capabilities; - u32 root_status; - u32 dev_capabilities2; - u16 dev_control2; - u16 dev_status2; - u32 link_capabilities2; - u16 link_control2; - u16 link_status2; - u32 slot_capabilities2; - u16 slot_control2; - u16 slot_status2; -} __clib_packed pcie_config_regs_t; - /* PCI express extended capabilities. */ typedef enum pcie_capability_type { @@ -735,12 +568,178 @@ typedef struct #define PCI_PWR_CAP 12 /* Capability */ #define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ +#define pci_capability_pcie_dev_caps_t_fields \ + _ (3, max_payload_sz) \ + _ (2, phantom_fn_present) \ + _ (1, ext_tags_supported) \ + _ (3, acceptable_l0s_latency) \ + _ (3, acceptable_l1_latency) \ + _ (1, attention_button_present) \ + _ (1, attention_indicator_present) \ + _ (1, power_indicator_present) \ + _ (1, role_based_error_reporting_supported) \ + _ (2, _reserved_16) \ + _ (8, slot_ppower_limit_val) \ + _ (2, slot_power_limit_scale) \ + _ (1, flr_capable) \ + _ (3, _reserved_29) + +#define pci_capability_pcie_dev_control_t_fields \ + _ (1, enable_correctable_error_reporting) \ + _ (1, enable_non_fatal_error_reporting) \ + _ (1, enable_fatal_error_reporting) \ + _ (1, enable_unsupported_request_reporting) \ + _ (1, enable_relaxed_ordering) \ + _ (3, maximum_payload_size) \ + _ (1, extended_tag_field_enable) \ + _ (1, phantom_fn_denable) \ + _ (1, aux_power_pm_enable) \ + _ (1, enable_no_snoop) \ + _ (3, max_read_request_size) \ + _ (1, function_level_reset) + +#define pci_capability_pcie_dev_status_t_fields \ + _ (1, correctable_err_detected) \ + _ (1, non_fatal_err_detected) \ + _ (1, fatal_err_detected) \ + _ (1, unsupported_request_detected) \ + _ (1, aux_power_detected) \ + _ (1, transaction_pending) \ + _ (10, _reserved_6) + +#define pci_capability_pcie_link_caps_t_fields \ + _ (4, max_link_speed) \ + _ (5, max_link_width) \ + _ (2, aspm_support) \ + _ (3, l0s_exit_latency) \ + _ (3, l1_exit_latency) \ + _ (1, clock_power_mgmt_status) \ + _ (1, surprise_down_error_reporting_capable_status) \ + _ (1, data_link_layer_link_active_reporting_capable_status) \ + _ (1, link_bandwidth_notification_capability_status) \ + _ (1, aspm_optionality_compliance) \ + _ (1, _reserved_23) \ + _ (8, port_number) + +#define pci_capability_pcie_link_control_t_fields \ + _ (2, aspm_control) \ + _ (1, _reserved_2) \ + _ (1, read_completion_boundary) \ + _ (1, link_disable) \ + _ (1, retrain_clock) \ + _ (1, common_clock_config) \ + _ (1, extended_synch) \ + _ (1, enable_clock_pwr_mgmt) \ + _ (1, hw_autonomous_width_disable) \ + _ (1, link_bw_mgmt_intr_enable) \ + _ (1, link_autonomous_bw_intr_enable) \ + _ (4, _reserved_12) + +#define pci_capability_pcie_link_status_t_fields \ + _ (4, link_speed) \ + _ (6, negotiated_link_width) \ + _ (1, _reserved_10) \ + _ (1, link_training) \ + _ (1, slot_clock_config) \ + _ (1, data_link_layer_link_active) \ + _ (1, link_bw_mgmt_status) \ + _ (1, _reserved_15) + +#define pci_capability_pcie_dev_caps2_t_fields \ + _ (4, compl_timeout_ranges_supported) \ + _ (1, compl_timeout_disable_supported) \ + _ (1, ari_forwarding_supported) \ + _ (1, atomic_op_routing_supported) \ + _ (1, bit32_atomic_op_completer_supported) \ + _ (1, bit64_atomic_op_completer_supported) \ + _ (1, bit128_cas_completer_supported) \ + _ (1, no_ro_enabled_pr_pr_passing) \ + _ (1, ltr_mechanism_supported) \ + _ (1, tph_completer_supported) \ + _ (18, _reserved_14) + +#define pci_capability_pcie_dev_control2_t_fields \ + _ (4, completion_timeout_value) \ + _ (1, completion_timeout_disable) \ + _ (1, ari_forwarding_enable) \ + _ (1, atomic_op_requester_enable) \ + _ (1, atomic_op_egress_blocking) \ + _ (1, ido_request_enable) \ + _ (1, ido_completion_enable) \ + _ (1, ltr_mechanism_enable) \ + _ (5, _reserved_11) + +#define pci_capability_pcie_link_control2_t_fields \ + _ (4, target_link_speed) \ + _ (1, enter_compliance) \ + _ (1, hw_autonomous_speed_disable) \ + _ (1, selectable_de_emphasis) \ + _ (3, transmit_margin) \ + _ (1, enter_modified_compliance) \ + _ (1, compliance_sos) \ + _ (4, compliance_de_emphasis) + +#define pci_capability_pcie_link_status2_t_fields \ + _ (1, current_de_emphasis_level) \ + _ (15, _reserved_1) + +#define __(t, n) \ + typedef union \ + { \ + struct \ + { \ + n##_fields; \ + }; \ + t as_##t; \ + } n; \ + STATIC_ASSERT_SIZEOF (n, sizeof (t)) + +#define _(b, n) u32 n : b; +__ (u32, pci_capability_pcie_dev_caps_t); +__ (u32, pci_capability_pcie_link_caps_t); +__ (u32, pci_capability_pcie_dev_caps2_t); +#undef _ +#define _(b, n) u16 n : b; +__ (u16, pci_capability_pcie_dev_control_t); +__ (u16, pci_capability_pcie_dev_status_t); +__ (u16, pci_capability_pcie_link_control_t); +__ (u16, pci_capability_pcie_link_status_t); +__ (u16, pci_capability_pcie_dev_control2_t); +__ (u16, pci_capability_pcie_link_control2_t); +__ (u16, pci_capability_pcie_link_status2_t); +#undef _ +#undef __ + +typedef struct +{ + u8 capability_id; + u8 next_offset; + u16 version_id : 3; + u16 _reserved_0_19 : 13; + pci_capability_pcie_dev_caps_t dev_caps; + pci_capability_pcie_dev_control_t dev_control; + pci_capability_pcie_dev_status_t dev_status; + pci_capability_pcie_link_caps_t link_caps; + pci_capability_pcie_link_control_t link_control; + pci_capability_pcie_link_status_t link_status; + u32 _reserved_0x14; + u16 _reserved_0x18; + u16 _reserved_0x1a; + u32 _reserved_0x1c; + u16 _reserved_0x20; + u16 _reserved_0x22; + pci_capability_pcie_dev_caps2_t dev_caps2; + pci_capability_pcie_dev_control2_t dev_control2; + u16 _reserved_0x2a; + u32 _reserved_0x2c; + pci_capability_pcie_link_control2_t link_control2; + pci_capability_pcie_link_status2_t link_status2; + u32 _reserved_0x34; + u16 _reserved_0x38; + u16 _reserved_0x3a; +} pci_capability_pcie_t; + +STATIC_ASSERT_SIZEOF (pci_capability_pcie_t, 60); + #endif /* included_vlib_pci_config_h */ -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ diff --git a/src/vlib/physmem.c b/src/vlib/physmem.c index a36444fdc9f..84c61d2a44f 100644 --- a/src/vlib/physmem.c +++ b/src/vlib/physmem.c @@ -17,11 +17,11 @@ #include <sys/types.h> #include <sys/mount.h> #include <sys/mman.h> -#include <sys/fcntl.h> +#include <fcntl.h> #include <sys/stat.h> #include <unistd.h> -#include <vppinfra/linux/sysfs.h> +#include <vppinfra/clib.h> #include <vlib/vlib.h> #include <vlib/physmem.h> #include <vlib/unix/unix.h> @@ -103,8 +103,10 @@ vlib_physmem_init (vlib_main_t * vm) vpm->flags |= VLIB_PHYSMEM_MAIN_F_HAVE_PAGEMAP; vec_free (pt); +#ifdef __linux__ if ((error = linux_vfio_init (vm))) return error; +#endif /* __linux__ */ p = clib_mem_alloc_aligned (sizeof (clib_pmalloc_main_t), CLIB_CACHE_LINE_BYTES); @@ -160,13 +162,11 @@ show_physmem (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_physmem_command, static) = { .path = "show physmem", .short_help = "show physmem [verbose | detail | map]", .function = show_physmem, }; -/* *INDENT-ON* */ static clib_error_t * vlib_physmem_config (vlib_main_t * vm, unformat_input_t * input) diff --git a/src/vlib/punt.c b/src/vlib/punt.c index 4a5e42db203..b59e5d251be 100644 --- a/src/vlib/punt.c +++ b/src/vlib/punt.c @@ -254,12 +254,10 @@ punt_reg_mk_dp (vlib_punt_reason_t reason) old = punt_dp_db[reason]; - /* *INDENT-OFF* */ hash_foreach (key, pri, punt_reg_db, ({ vec_add1(pris, pri); })); - /* *INDENT-ON* */ /* * A check for an empty vector is done in the DP, so the a zero @@ -594,26 +592,22 @@ punt_client_show (vlib_main_t * vm, { u8 *name; - /* *INDENT-OFF* */ hash_foreach(name, pci, punt_client_db, ({ vlib_cli_output (vm, "%U", format_punt_client, pci, PUNT_FORMAT_FLAG_NONE); })); - /* *INDENT-ON* */ } return (NULL); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (punt_client_show_command, static) = { .path = "show punt client", .short_help = "show client[s] registered with the punt infra", .function = punt_client_show, }; -/* *INDENT-ON* */ static clib_error_t * punt_reason_show (vlib_main_t * vm, @@ -629,14 +623,12 @@ punt_reason_show (vlib_main_t * vm, return (NULL); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (punt_reason_show_command, static) = { .path = "show punt reasons", .short_help = "show all punt reasons", .function = punt_reason_show, }; -/* *INDENT-ON* */ static clib_error_t * punt_db_show (vlib_main_t * vm, @@ -645,12 +637,10 @@ punt_db_show (vlib_main_t * vm, u32 pri, ii, jj; u64 key; - /* *INDENT-OFF* */ hash_foreach (key, pri, punt_reg_db, ({ vlib_cli_output (vm, " %U", format_punt_reg, pri); })); - /* *INDENT-ON* */ vlib_cli_output (vm, "\nDerived data-plane data-base:"); vlib_cli_output (vm, @@ -672,14 +662,12 @@ punt_db_show (vlib_main_t * vm, return (NULL); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (punt_db_show_command, static) = { .path = "show punt db", .short_help = "show the punt DB", .function = punt_db_show, }; -/* *INDENT-ON* */ static clib_error_t * punt_stats_show (vlib_main_t * vm, @@ -699,14 +687,12 @@ punt_stats_show (vlib_main_t * vm, return (NULL); } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (punt_stats_show_command, static) = { .path = "show punt stats", .short_help = "show the punt stats", .function = punt_stats_show, }; -/* *INDENT-ON* */ static clib_error_t * punt_init (vlib_main_t * vm) diff --git a/src/vlib/punt_node.c b/src/vlib/punt_node.c index de721046057..4b81a61715a 100644 --- a/src/vlib/punt_node.c +++ b/src/vlib/punt_node.c @@ -280,7 +280,6 @@ VLIB_NODE_FN (punt_dispatch_node) (vlib_main_t * vm, return frame->n_vectors; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (punt_dispatch_node) = { .name = "punt-dispatch", .vector_size = sizeof (u32), @@ -293,7 +292,6 @@ VLIB_REGISTER_NODE (punt_dispatch_node) = { }, }; -/* *INDENT-ON* */ #ifndef CLIB_MARCH_VARIANT clib_error_t * diff --git a/src/vlib/stat_weak_inlines.h b/src/vlib/stat_weak_inlines.h deleted file mode 100644 index a68566d0fdd..00000000000 --- a/src/vlib/stat_weak_inlines.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2019 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * NOTE: Only include this file from external components that require - * a loose coupling to the stats component. - */ - -#ifndef included_stat_weak_inlines_h -#define included_stat_weak_inlines_h -void *vlib_stats_push_heap (void *) __attribute__ ((weak)); -void * -vlib_stats_push_heap (void *unused) -{ - return 0; -}; - -void vlib_stats_pop_heap (void *, void *, u32, int) __attribute__ ((weak)); -void -vlib_stats_pop_heap (void *notused, void *notused2, u32 i, int type) -{ -}; -void vlib_stats_register_error_index (void *, u8 *, u64 *, u64) - __attribute__ ((weak)); -void -vlib_stats_register_error_index (void * notused, u8 * notused2, u64 * notused3, u64 notused4) -{ -}; - -void vlib_stats_pop_heap2 (void *, u32, void *, int) __attribute__ ((weak)); -void -vlib_stats_pop_heap2 (void *notused, u32 notused2, void *notused3, - int notused4) -{ -}; - -void vlib_stat_segment_lock (void) __attribute__ ((weak)); -void -vlib_stat_segment_lock (void) -{ -} - -void vlib_stat_segment_unlock (void) __attribute__ ((weak)); -void -vlib_stat_segment_unlock (void) -{ -} -void vlib_stats_delete_cm (void *) __attribute__ ((weak)); -void -vlib_stats_delete_cm (void *notused) -{ -} - -void vlib_stats_register_mem_heap (void *) __attribute__ ((weak)); -void -vlib_stats_register_mem_heap (void *notused) -{ -} - -#endif diff --git a/src/vlib/stats/cli.c b/src/vlib/stats/cli.c new file mode 100644 index 00000000000..94a852ac751 --- /dev/null +++ b/src/vlib/stats/cli.c @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vlib/stats/stats.h> + +static int +name_sort_cmp (void *a1, void *a2) +{ + vlib_stats_entry_t *n1 = a1; + vlib_stats_entry_t *n2 = a2; + + return strcmp ((char *) n1->name, (char *) n2->name); +} + +static u8 * +format_stat_dir_entry (u8 *s, va_list *args) +{ + vlib_stats_entry_t *ep = va_arg (*args, vlib_stats_entry_t *); + char *type_name; + char *format_string; + + format_string = "%-74s %-10s %10lld"; + + switch (ep->type) + { + case STAT_DIR_TYPE_SCALAR_INDEX: + type_name = "ScalarPtr"; + break; + + case STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE: + case STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED: + type_name = "CMainPtr"; + break; + + case STAT_DIR_TYPE_NAME_VECTOR: + type_name = "NameVector"; + break; + + case STAT_DIR_TYPE_EMPTY: + type_name = "empty"; + break; + + case STAT_DIR_TYPE_SYMLINK: + type_name = "Symlink"; + break; + + default: + type_name = "illegal!"; + break; + } + + return format (s, format_string, ep->name, type_name, 0); +} +static clib_error_t * +show_stat_segment_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + vlib_stats_entry_t *show_data; + int i; + + int verbose = 0; + + if (unformat (input, "verbose")) + verbose = 1; + + /* Lock even as reader, as this command doesn't handle epoch changes */ + vlib_stats_segment_lock (); + show_data = vec_dup (sm->directory_vector); + vlib_stats_segment_unlock (); + + vec_sort_with_function (show_data, name_sort_cmp); + + vlib_cli_output (vm, "%-74s %10s %10s", "Name", "Type", "Value"); + + for (i = 0; i < vec_len (show_data); i++) + { + vlib_stats_entry_t *ep = vec_elt_at_index (show_data, i); + + if (ep->type == STAT_DIR_TYPE_EMPTY) + continue; + + vlib_cli_output (vm, "%-100U", format_stat_dir_entry, + vec_elt_at_index (show_data, i)); + } + + if (verbose) + { + ASSERT (sm->heap); + vlib_cli_output (vm, "%U", format_clib_mem_heap, sm->heap, + 0 /* verbose */); + } + + return 0; +} + +static clib_error_t * +show_stat_segment_hash_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + char *name; + u32 i; + hash_foreach_mem (name, i, sm->directory_vector_by_name, + ({ vlib_cli_output (vm, "%d: %s\n", i, name); })); + return 0; +} + +VLIB_CLI_COMMAND (show_stat_segment_hash_command, static) = { + .path = "show statistics hash", + .short_help = "show statistics hash", + .function = show_stat_segment_hash_command_fn, +}; + +VLIB_CLI_COMMAND (show_stat_segment_command, static) = { + .path = "show statistics segment", + .short_help = "show statistics segment [verbose]", + .function = show_stat_segment_command_fn, +}; diff --git a/src/vlib/stats/collector.c b/src/vlib/stats/collector.c new file mode 100644 index 00000000000..b23f3df5713 --- /dev/null +++ b/src/vlib/stats/collector.c @@ -0,0 +1,186 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vlib/stats/stats.h> + +enum +{ + NODE_CLOCKS, + NODE_VECTORS, + NODE_CALLS, + NODE_SUSPENDS, + N_NODE_COUNTERS +}; + +struct +{ + u32 entry_index; + char *name; +} node_counters[] = { + [NODE_CLOCKS] = { .name = "clocks" }, + [NODE_VECTORS] = { .name = "vectors" }, + [NODE_CALLS] = { .name = "calls" }, + [NODE_SUSPENDS] = { .name = "suspends" }, +}; + +static struct +{ + u8 *name; + u32 symlinks[N_NODE_COUNTERS]; +} *node_data = 0; + +static vlib_stats_string_vector_t node_names = 0; + +static inline void +update_node_counters (vlib_stats_segment_t *sm) +{ + clib_bitmap_t *bmp = 0; + vlib_main_t **stat_vms = 0; + vlib_node_t ***node_dups = 0; + u32 n_nodes; + int i, j; + + vlib_node_get_nodes (0 /* vm, for barrier sync */, + (u32) ~0 /* all threads */, 1 /* include stats */, + 0 /* barrier sync */, &node_dups, &stat_vms); + + n_nodes = vec_len (node_dups[0]); + + vec_validate (node_data, n_nodes - 1); + + for (i = 0; i < n_nodes; i++) + if (vec_is_equal (node_data[i].name, node_dups[0][i]->name) == 0) + bmp = clib_bitmap_set (bmp, i, 1); + + if (bmp) + { + u32 last_thread = vlib_get_n_threads (); + vlib_stats_segment_lock (); + clib_bitmap_foreach (i, bmp) + { + if (node_data[i].name) + { + vec_free (node_data[i].name); + for (j = 0; j < ARRAY_LEN (node_data->symlinks); j++) + vlib_stats_remove_entry (node_data[i].symlinks[j]); + } + } + /* We can't merge the loops because a node index corresponding to a given + * node name can change between 2 updates. Otherwise, we could add + * already existing symlinks or delete valid ones. + */ + clib_bitmap_foreach (i, bmp) + { + vlib_node_t *n = node_dups[0][i]; + node_data[i].name = vec_dup (n->name); + vlib_stats_set_string_vector (&node_names, n->index, "%v", n->name); + + for (int j = 0; j < ARRAY_LEN (node_counters); j++) + { + vlib_stats_validate (node_counters[j].entry_index, last_thread, + n_nodes - 1); + node_data[i].symlinks[j] = vlib_stats_add_symlink ( + node_counters[j].entry_index, n->index, "/nodes/%U/%s", + format_vlib_stats_symlink, n->name, node_counters[j].name); + ASSERT (node_data[i].symlinks[j] != CLIB_U32_MAX); + } + } + vlib_stats_segment_unlock (); + vec_free (bmp); + } + + for (j = 0; j < vec_len (node_dups); j++) + { + vlib_node_t **nodes = node_dups[j]; + + for (i = 0; i < vec_len (nodes); i++) + { + counter_t **counters; + counter_t *c; + vlib_node_t *n = nodes[i]; + + counters = vlib_stats_get_entry_data_pointer ( + node_counters[NODE_CLOCKS].entry_index); + c = counters[j]; + c[n->index] = n->stats_total.clocks - n->stats_last_clear.clocks; + + counters = vlib_stats_get_entry_data_pointer ( + node_counters[NODE_VECTORS].entry_index); + c = counters[j]; + c[n->index] = n->stats_total.vectors - n->stats_last_clear.vectors; + + counters = vlib_stats_get_entry_data_pointer ( + node_counters[NODE_CALLS].entry_index); + c = counters[j]; + c[n->index] = n->stats_total.calls - n->stats_last_clear.calls; + + counters = vlib_stats_get_entry_data_pointer ( + node_counters[NODE_SUSPENDS].entry_index); + c = counters[j]; + c[n->index] = n->stats_total.suspends - n->stats_last_clear.suspends; + } + vec_free (node_dups[j]); + } + vec_free (node_dups); + vec_free (stat_vms); +} + +static void +do_stat_segment_updates (vlib_main_t *vm, vlib_stats_segment_t *sm) +{ + if (sm->node_counters_enabled) + update_node_counters (sm); + + vlib_stats_collector_t *c; + pool_foreach (c, sm->collectors) + { + vlib_stats_collector_data_t data = { + .entry_index = c->entry_index, + .vector_index = c->vector_index, + .private_data = c->private_data, + .entry = sm->directory_vector + c->entry_index, + }; + c->fn (&data); + } + + /* Heartbeat, so clients detect we're still here */ + sm->directory_vector[STAT_COUNTER_HEARTBEAT].value++; +} + +static uword +stat_segment_collector_process (vlib_main_t *vm, vlib_node_runtime_t *rt, + vlib_frame_t *f) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + + if (sm->node_counters_enabled) + { + node_names = vlib_stats_add_string_vector ("/sys/node/names"); + ASSERT (node_names); + + for (int x = 0; x < ARRAY_LEN (node_counters); x++) + { + node_counters[x].entry_index = vlib_stats_add_counter_vector ( + "/sys/node/%s", node_counters[x].name); + ASSERT (node_counters[x].entry_index != CLIB_U32_MAX); + } + } + + sm->directory_vector[STAT_COUNTER_BOOTTIME].value = unix_time_now (); + + while (1) + { + do_stat_segment_updates (vm, sm); + vlib_process_suspend (vm, sm->update_interval); + } + return 0; /* or not */ +} + +VLIB_REGISTER_NODE (stat_segment_collector, static) = { + .function = stat_segment_collector_process, + .name = "statseg-collector-process", + .type = VLIB_NODE_TYPE_PROCESS, +}; diff --git a/src/vlib/stats/format.c b/src/vlib/stats/format.c new file mode 100644 index 00000000000..54c11aff743 --- /dev/null +++ b/src/vlib/stats/format.c @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vlib/stats/stats.h> + +u8 * +format_vlib_stats_symlink (u8 *s, va_list *args) +{ + u8 *input = va_arg (*args, u8 *); + + for (int i = 0; i < vec_len (input); i++) + if (input[i] == '/') + vec_add1 (s, '_'); + else + vec_add1 (s, input[i]); + + return s; +} diff --git a/src/vlib/stats/init.c b/src/vlib/stats/init.c new file mode 100644 index 00000000000..8b382daf333 --- /dev/null +++ b/src/vlib/stats/init.c @@ -0,0 +1,258 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vlib/stats/stats.h> + +#define STAT_SEGMENT_SOCKET_FILENAME "stats.sock" + +static u32 vlib_loops_stats_counter_index; + +static void +vector_rate_collector_fn (vlib_stats_collector_data_t *d) +{ + vlib_main_t *this_vlib_main; + counter_t **counters, **loops_counters; + counter_t *cb, *loops_cb; + f64 vector_rate = 0.0; + u32 i, n_threads = vlib_get_n_threads (); + + vlib_stats_validate (d->entry_index, 0, n_threads - 1); + counters = d->entry->data; + cb = counters[0]; + + vlib_stats_validate (vlib_loops_stats_counter_index, 0, n_threads - 1); + loops_counters = + vlib_stats_get_entry_data_pointer (vlib_loops_stats_counter_index); + loops_cb = loops_counters[0]; + + for (i = 0; i < n_threads; i++) + { + f64 this_vector_rate; + this_vlib_main = vlib_get_main_by_index (i); + + this_vector_rate = vlib_internal_node_vector_rate (this_vlib_main); + vlib_clear_internal_node_vector_rate (this_vlib_main); + cb[i] = this_vector_rate; + vector_rate += this_vector_rate; + + loops_cb[i] = this_vlib_main->loops_per_second; + } + + /* And set the system average rate */ + vector_rate /= (f64) (i > 1 ? i - 1 : 1); + vlib_stats_set_gauge (d->private_data, vector_rate); +} + +clib_error_t * +vlib_stats_init (vlib_main_t *vm) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + vlib_stats_shared_header_t *shared_header; + vlib_stats_collector_reg_t reg = {}; + + uword memory_size, sys_page_sz; + int mfd; + char *mem_name = "stat segment"; + void *heap, *memaddr; + + memory_size = sm->memory_size; + if (memory_size == 0) + memory_size = STAT_SEGMENT_DEFAULT_SIZE; + + if (sm->log2_page_sz == CLIB_MEM_PAGE_SZ_UNKNOWN) + sm->log2_page_sz = CLIB_MEM_PAGE_SZ_DEFAULT; + + mfd = clib_mem_vm_create_fd (sm->log2_page_sz, mem_name); + + if (mfd == -1) + return clib_error_return (0, "stat segment memory fd failure: %U", + format_clib_error, clib_mem_get_last_error ()); + /* Set size */ + if ((ftruncate (mfd, memory_size)) == -1) + { + close (mfd); + return clib_error_return (0, "stat segment ftruncate failure"); + } + + memaddr = clib_mem_vm_map_shared (0, memory_size, mfd, 0, mem_name); + + if (memaddr == CLIB_MEM_VM_MAP_FAILED) + return clib_error_return (0, "stat segment mmap failure"); + + sys_page_sz = clib_mem_get_page_size (); + + heap = + clib_mem_create_heap (((u8 *) memaddr) + sys_page_sz, + memory_size - sys_page_sz, 1 /* locked */, mem_name); + sm->heap = heap; + sm->memfd = mfd; + + sm->directory_vector_by_name = hash_create_string (0, sizeof (uword)); + sm->shared_header = shared_header = memaddr; + + shared_header->version = STAT_SEGMENT_VERSION; + shared_header->base = memaddr; + + sm->stat_segment_lockp = clib_mem_alloc (sizeof (clib_spinlock_t)); + sm->locking_thread_index = ~0; + sm->n_locks = 0; + clib_spinlock_init (sm->stat_segment_lockp); + + /* Set up the name to counter-vector hash table */ + sm->directory_vector = + vec_new_heap (typeof (sm->directory_vector[0]), STAT_COUNTERS, heap); + sm->dir_vector_first_free_elt = CLIB_U32_MAX; + + shared_header->epoch = 1; + + /* Scalar stats and node counters */ +#define _(E, t, n, p) \ + strcpy (sm->directory_vector[STAT_COUNTER_##E].name, p "/" #n); \ + sm->directory_vector[STAT_COUNTER_##E].type = STAT_DIR_TYPE_##t; + foreach_stat_segment_counter_name +#undef _ + /* Save the vector in the shared segment, for clients */ + shared_header->directory_vector = sm->directory_vector; + + vlib_stats_register_mem_heap (heap); + + reg.collect_fn = vector_rate_collector_fn; + reg.private_data = vlib_stats_add_gauge ("/sys/vector_rate"); + reg.entry_index = + vlib_stats_add_counter_vector ("/sys/vector_rate_per_worker"); + vlib_loops_stats_counter_index = + vlib_stats_add_counter_vector ("/sys/loops_per_worker"); + vlib_stats_register_collector_fn (®); + vlib_stats_validate (reg.entry_index, 0, vlib_get_n_threads ()); + vlib_stats_validate (vlib_loops_stats_counter_index, 0, + vlib_get_n_threads ()); + + return 0; +} + +static clib_error_t * +statseg_config (vlib_main_t *vm, unformat_input_t *input) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + sm->update_interval = 10.0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "socket-name %s", &sm->socket_name)) + ; + /* DEPRECATE: default (does nothing) */ + else if (unformat (input, "default")) + ; + else if (unformat (input, "size %U", unformat_memory_size, + &sm->memory_size)) + ; + else if (unformat (input, "page-size %U", unformat_log2_page_size, + &sm->log2_page_sz)) + ; + else if (unformat (input, "per-node-counters on")) + sm->node_counters_enabled = 1; + else if (unformat (input, "per-node-counters off")) + sm->node_counters_enabled = 0; + else if (unformat (input, "update-interval %f", &sm->update_interval)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + /* + * NULL-terminate socket name string + * clib_socket_init()->socket_config() use C str* + */ + if (vec_len (sm->socket_name)) + vec_terminate_c_string (sm->socket_name); + + return 0; +} + +VLIB_EARLY_CONFIG_FUNCTION (statseg_config, "statseg"); + +/* + * Accept connection on the socket and exchange the fd for the shared + * memory segment. + */ +static clib_error_t * +stats_socket_accept_ready (clib_file_t *uf) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + clib_error_t *err; + clib_socket_t client = { 0 }; + + err = clib_socket_accept (sm->socket, &client); + if (err) + { + clib_error_report (err); + return err; + } + + /* Send the fd across and close */ + err = clib_socket_sendmsg (&client, 0, 0, &sm->memfd, 1); + if (err) + clib_error_report (err); + clib_socket_close (&client); + + return 0; +} + +static clib_error_t * +stats_segment_socket_init (void) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + clib_error_t *error; + clib_socket_t *s = clib_mem_alloc (sizeof (clib_socket_t)); + + memset (s, 0, sizeof (clib_socket_t)); + s->config = (char *) sm->socket_name; + s->flags = CLIB_SOCKET_F_IS_SERVER | CLIB_SOCKET_F_SEQPACKET | + CLIB_SOCKET_F_ALLOW_GROUP_WRITE | CLIB_SOCKET_F_PASSCRED; + + if ((error = clib_socket_init (s))) + return error; + + clib_file_t template = { 0 }; + template.read_function = stats_socket_accept_ready; + template.file_descriptor = s->fd; + template.description = format (0, "stats segment listener %s", s->config); + clib_file_add (&file_main, &template); + + sm->socket = s; + + return 0; +} + +static clib_error_t * +stats_segment_socket_exit (vlib_main_t *vm) +{ + /* + * cleanup the listener socket on exit. + */ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + unlink ((char *) sm->socket_name); + return 0; +} + +VLIB_MAIN_LOOP_EXIT_FUNCTION (stats_segment_socket_exit); + +static clib_error_t * +statseg_init (vlib_main_t *vm) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + + /* set default socket file name when statseg config stanza is empty. */ + if (!vec_len (sm->socket_name)) + sm->socket_name = format (0, "%s/%s%c", vlib_unix_get_runtime_dir (), + STAT_SEGMENT_SOCKET_FILENAME, 0); + return stats_segment_socket_init (); +} + +VLIB_INIT_FUNCTION (statseg_init) = { + .runs_after = VLIB_INITS ("unix_input_init", "linux_epoll_input_init"), +}; diff --git a/src/vlib/stats/provider_mem.c b/src/vlib/stats/provider_mem.c new file mode 100644 index 00000000000..f3a3f5d3ed4 --- /dev/null +++ b/src/vlib/stats/provider_mem.c @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vlib/unix/unix.h> +#include <vlib/stats/stats.h> + +static clib_mem_heap_t **memory_heaps_vec; + +enum +{ + STAT_MEM_TOTAL = 0, + STAT_MEM_USED, + STAT_MEM_FREE, + STAT_MEM_USED_MMAP, + STAT_MEM_TOTAL_ALLOC, + STAT_MEM_FREE_CHUNKS, + STAT_MEM_RELEASABLE, +} stat_mem_usage_e; + +/* + * Called from the stats periodic process to update memory counters. + */ +static void +stat_provider_mem_usage_update_fn (vlib_stats_collector_data_t *d) +{ + clib_mem_usage_t usage; + clib_mem_heap_t *heap; + counter_t **counters = d->entry->data; + counter_t *cb; + + heap = vec_elt (memory_heaps_vec, d->private_data); + clib_mem_get_heap_usage (heap, &usage); + cb = counters[0]; + cb[STAT_MEM_TOTAL] = usage.bytes_total; + cb[STAT_MEM_USED] = usage.bytes_used; + cb[STAT_MEM_FREE] = usage.bytes_free; + cb[STAT_MEM_USED_MMAP] = usage.bytes_used_mmap; + cb[STAT_MEM_TOTAL_ALLOC] = usage.bytes_max; + cb[STAT_MEM_FREE_CHUNKS] = usage.bytes_free_reclaimed; + cb[STAT_MEM_RELEASABLE] = usage.bytes_overhead; +} + +/* + * Provide memory heap counters. + * Two dimensional array of heap index and per-heap gauges. + */ +void +vlib_stats_register_mem_heap (clib_mem_heap_t *heap) +{ + vlib_stats_collector_reg_t r = {}; + u32 idx; + + vec_add1 (memory_heaps_vec, heap); + + r.entry_index = idx = vlib_stats_add_counter_vector ("/mem/%s", heap->name); + vlib_stats_validate (idx, 0, STAT_MEM_RELEASABLE); + + /* Create symlink */ + vlib_stats_add_symlink (idx, STAT_MEM_USED, "/mem/%s/used", heap->name); + vlib_stats_add_symlink (idx, STAT_MEM_TOTAL, "/mem/%s/total", heap->name); + vlib_stats_add_symlink (idx, STAT_MEM_FREE, "/mem/%s/free", heap->name); + + r.private_data = vec_len (memory_heaps_vec) - 1; + r.collect_fn = stat_provider_mem_usage_update_fn; + vlib_stats_register_collector_fn (&r); +} diff --git a/src/vlib/stats/shared.h b/src/vlib/stats/shared.h new file mode 100644 index 00000000000..8e44ce3dc86 --- /dev/null +++ b/src/vlib/stats/shared.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + */ + +#ifndef included_stat_segment_shared_h +#define included_stat_segment_shared_h + +typedef enum +{ + STAT_DIR_TYPE_ILLEGAL = 0, + STAT_DIR_TYPE_SCALAR_INDEX, + STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE, + STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED, + STAT_DIR_TYPE_NAME_VECTOR, + STAT_DIR_TYPE_EMPTY, + STAT_DIR_TYPE_SYMLINK, +} stat_directory_type_t; + +typedef struct +{ + stat_directory_type_t type; + union + { + struct + { + uint32_t index1; + uint32_t index2; + }; + uint64_t index; + uint64_t value; + void *data; + uint8_t **string_vector; + }; +#define VLIB_STATS_MAX_NAME_SZ 128 + char name[VLIB_STATS_MAX_NAME_SZ]; +} vlib_stats_entry_t; + +/* + * Shared header first in the shared memory segment. + */ +typedef struct +{ + uint64_t version; + void *base; + volatile uint64_t epoch; + volatile uint64_t in_progress; + volatile vlib_stats_entry_t *directory_vector; +} vlib_stats_shared_header_t; + +#endif /* included_stat_segment_shared_h */ diff --git a/src/vlib/stats/stats.c b/src/vlib/stats/stats.c new file mode 100644 index 00000000000..b7743ec70f2 --- /dev/null +++ b/src/vlib/stats/stats.c @@ -0,0 +1,574 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + */ + +#include <vlib/vlib.h> +#include <vlib/stats/stats.h> + +vlib_stats_main_t vlib_stats_main; + +void +vlib_stats_segment_lock (void) +{ + vlib_main_t *vm = vlib_get_main (); + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + + /* already locked by us */ + if (sm->shared_header->in_progress && + vm->thread_index == sm->locking_thread_index) + goto done; + + ASSERT (sm->locking_thread_index == ~0); + ASSERT (sm->shared_header->in_progress == 0); + ASSERT (sm->n_locks == 0); + + clib_spinlock_lock (sm->stat_segment_lockp); + + sm->shared_header->in_progress = 1; + sm->locking_thread_index = vm->thread_index; +done: + sm->n_locks++; +} + +void +vlib_stats_segment_unlock (void) +{ + vlib_main_t *vm = vlib_get_main (); + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + + ASSERT (sm->shared_header->in_progress == 1); + ASSERT (sm->locking_thread_index == vm->thread_index); + ASSERT (sm->n_locks > 0); + + sm->n_locks--; + + if (sm->n_locks > 0) + return; + + sm->shared_header->epoch++; + __atomic_store_n (&sm->shared_header->in_progress, 0, __ATOMIC_RELEASE); + sm->locking_thread_index = ~0; + clib_spinlock_unlock (sm->stat_segment_lockp); +} + +/* + * Change heap to the stats shared memory segment + */ +void * +vlib_stats_set_heap () +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + + ASSERT (sm && sm->shared_header); + return clib_mem_set_heap (sm->heap); +} + +u32 +vlib_stats_find_entry_index (char *fmt, ...) +{ + u8 *name; + va_list va; + + va_start (va, fmt); + name = va_format (0, fmt, &va); + va_end (va); + vec_add1 (name, 0); + + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + hash_pair_t *hp = hash_get_pair (sm->directory_vector_by_name, name); + vec_free (name); + return hp ? hp->value[0] : STAT_SEGMENT_INDEX_INVALID; +} + +static void +hash_set_str_key_alloc (uword **h, const char *key, uword v) +{ + int size = strlen (key) + 1; + void *copy = clib_mem_alloc (size); + clib_memcpy_fast (copy, key, size); + hash_set_mem (*h, copy, v); +} + +static void +hash_unset_str_key_free (uword **h, const char *key) +{ + hash_pair_t *hp = hash_get_pair_mem (*h, key); + if (hp) + { + void *_k = uword_to_pointer (hp->key, void *); + hash_unset_mem (*h, _k); + clib_mem_free (_k); + } +} + +u32 +vlib_stats_create_counter (vlib_stats_entry_t *e) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + u32 index; + + if (sm->dir_vector_first_free_elt != CLIB_U32_MAX) + { + index = sm->dir_vector_first_free_elt; + sm->dir_vector_first_free_elt = sm->directory_vector[index].index; + } + else + { + index = vec_len (sm->directory_vector); + vec_validate (sm->directory_vector, index); + } + + sm->directory_vector[index] = *e; + + hash_set_str_key_alloc (&sm->directory_vector_by_name, e->name, index); + + return index; +} + +void +vlib_stats_remove_entry (u32 entry_index) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + vlib_stats_entry_t *e = vlib_stats_get_entry (sm, entry_index); + counter_t **c; + vlib_counter_t **vc; + void *oldheap; + u32 i; + + if (entry_index >= vec_len (sm->directory_vector)) + return; + + vlib_stats_segment_lock (); + + switch (e->type) + { + case STAT_DIR_TYPE_NAME_VECTOR: + for (i = 0; i < vec_len (e->string_vector); i++) + vec_free (e->string_vector[i]); + vec_free (e->string_vector); + break; + + case STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE: + c = e->data; + e->data = 0; + oldheap = clib_mem_set_heap (sm->heap); + for (i = 0; i < vec_len (c); i++) + vec_free (c[i]); + vec_free (c); + clib_mem_set_heap (oldheap); + break; + + case STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED: + vc = e->data; + e->data = 0; + oldheap = clib_mem_set_heap (sm->heap); + for (i = 0; i < vec_len (vc); i++) + vec_free (vc[i]); + vec_free (vc); + clib_mem_set_heap (oldheap); + break; + + case STAT_DIR_TYPE_SCALAR_INDEX: + case STAT_DIR_TYPE_SYMLINK: + break; + default: + ASSERT (0); + } + + vlib_stats_segment_unlock (); + + hash_unset_str_key_free (&sm->directory_vector_by_name, e->name); + + memset (e, 0, sizeof (*e)); + e->type = STAT_DIR_TYPE_EMPTY; + + e->value = sm->dir_vector_first_free_elt; + sm->dir_vector_first_free_elt = entry_index; +} + +static void +vlib_stats_set_entry_name (vlib_stats_entry_t *e, char *s) +{ + u32 i, len = VLIB_STATS_MAX_NAME_SZ - 1; + + for (i = 0; i < len; i++) + { + e->name[i] = s[i]; + if (s[i] == 0) + return; + } + ASSERT (i < VLIB_STATS_MAX_NAME_SZ - 1); + s[i] = 0; +} + +static u32 +vlib_stats_new_entry_internal (stat_directory_type_t t, u8 *name) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + vlib_stats_shared_header_t *shared_header = sm->shared_header; + vlib_stats_entry_t e = { .type = t }; + + ASSERT (shared_header); + + u32 vector_index = vlib_stats_find_entry_index ("%v", name); + if (vector_index != STAT_SEGMENT_INDEX_INVALID) /* Already registered */ + { + vector_index = ~0; + goto done; + } + + vec_add1 (name, 0); + vlib_stats_set_entry_name (&e, (char *) name); + + vlib_stats_segment_lock (); + vector_index = vlib_stats_create_counter (&e); + + shared_header->directory_vector = sm->directory_vector; + + vlib_stats_segment_unlock (); + +done: + vec_free (name); + return vector_index; +} + +u32 +vlib_stats_add_gauge (char *fmt, ...) +{ + va_list va; + u8 *name; + + va_start (va, fmt); + name = va_format (0, fmt, &va); + va_end (va); + return vlib_stats_new_entry_internal (STAT_DIR_TYPE_SCALAR_INDEX, name); +} + +void +vlib_stats_set_gauge (u32 index, u64 value) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + + ASSERT (index < vec_len (sm->directory_vector)); + sm->directory_vector[index].value = value; +} + +u32 +vlib_stats_add_timestamp (char *fmt, ...) +{ + va_list va; + u8 *name; + + va_start (va, fmt); + name = va_format (0, fmt, &va); + va_end (va); + return vlib_stats_new_entry_internal (STAT_DIR_TYPE_SCALAR_INDEX, name); +} + +void +vlib_stats_set_timestamp (u32 entry_index, f64 value) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + + ASSERT (entry_index < vec_len (sm->directory_vector)); + sm->directory_vector[entry_index].value = value; +} + +vlib_stats_string_vector_t +vlib_stats_add_string_vector (char *fmt, ...) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + va_list va; + vlib_stats_header_t *sh; + vlib_stats_string_vector_t sv; + u32 index; + u8 *name; + + va_start (va, fmt); + name = va_format (0, fmt, &va); + va_end (va); + + index = vlib_stats_new_entry_internal (STAT_DIR_TYPE_NAME_VECTOR, name); + if (index == CLIB_U32_MAX) + return 0; + + sv = vec_new_generic (vlib_stats_string_vector_t, 0, + sizeof (vlib_stats_header_t), 0, sm->heap); + sh = vec_header (sv); + sh->entry_index = index; + sm->directory_vector[index].string_vector = sv; + return sv; +} + +void +vlib_stats_set_string_vector (vlib_stats_string_vector_t *svp, + u32 vector_index, char *fmt, ...) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + vlib_stats_header_t *sh = vec_header (*svp); + vlib_stats_entry_t *e = vlib_stats_get_entry (sm, sh->entry_index); + va_list va; + u8 *s; + + if (fmt[0] == 0) + { + if (vec_len (e->string_vector) <= vector_index) + return; + + if (e->string_vector[vector_index] == 0) + return; + + vlib_stats_segment_lock (); + vec_free (e->string_vector[vector_index]); + vlib_stats_segment_unlock (); + return; + } + + vlib_stats_segment_lock (); + + ASSERT (e->string_vector); + + vec_validate (e->string_vector, vector_index); + svp[0] = e->string_vector; + + s = e->string_vector[vector_index]; + + if (s == 0) + s = vec_new_heap (u8 *, 0, sm->heap); + + vec_reset_length (s); + + va_start (va, fmt); + s = va_format (s, fmt, &va); + va_end (va); + vec_add1 (s, 0); + + e->string_vector[vector_index] = s; + + vlib_stats_segment_unlock (); +} + +void +vlib_stats_free_string_vector (vlib_stats_string_vector_t *sv) +{ + vlib_stats_header_t *sh = vec_header (*sv); + vlib_stats_remove_entry (sh->entry_index); +} + +u32 +vlib_stats_add_counter_vector (char *fmt, ...) +{ + va_list va; + u8 *name; + + va_start (va, fmt); + name = va_format (0, fmt, &va); + va_end (va); + return vlib_stats_new_entry_internal (STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE, + name); +} + +u32 +vlib_stats_add_counter_pair_vector (char *fmt, ...) +{ + va_list va; + u8 *name; + + va_start (va, fmt); + name = va_format (0, fmt, &va); + va_end (va); + return vlib_stats_new_entry_internal (STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED, + name); +} + +static int +vlib_stats_validate_will_expand_internal (u32 entry_index, va_list *va) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + vlib_stats_entry_t *e = vlib_stats_get_entry (sm, entry_index); + void *oldheap; + int rv = 1; + + oldheap = clib_mem_set_heap (sm->heap); + if (e->type == STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE) + { + u32 idx0 = va_arg (*va, u32); + u32 idx1 = va_arg (*va, u32); + u64 **data = e->data; + + if (idx0 >= vec_len (data)) + goto done; + + for (u32 i = 0; i <= idx0; i++) + if (idx1 >= vec_max_len (data[i])) + goto done; + } + else if (e->type == STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED) + { + u32 idx0 = va_arg (*va, u32); + u32 idx1 = va_arg (*va, u32); + vlib_counter_t **data = e->data; + + va_end (*va); + + if (idx0 >= vec_len (data)) + goto done; + + for (u32 i = 0; i <= idx0; i++) + if (idx1 >= vec_max_len (data[i])) + goto done; + } + else + ASSERT (0); + + rv = 0; +done: + clib_mem_set_heap (oldheap); + return rv; +} + +int +vlib_stats_validate_will_expand (u32 entry_index, ...) +{ + va_list va; + int rv; + + va_start (va, entry_index); + rv = vlib_stats_validate_will_expand_internal (entry_index, &va); + va_end (va); + return rv; +} + +void +vlib_stats_validate (u32 entry_index, ...) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + vlib_stats_entry_t *e = vlib_stats_get_entry (sm, entry_index); + void *oldheap; + va_list va; + int will_expand; + + va_start (va, entry_index); + will_expand = vlib_stats_validate_will_expand_internal (entry_index, &va); + va_end (va); + + if (will_expand) + vlib_stats_segment_lock (); + + oldheap = clib_mem_set_heap (sm->heap); + + va_start (va, entry_index); + + if (e->type == STAT_DIR_TYPE_COUNTER_VECTOR_SIMPLE) + { + u32 idx0 = va_arg (va, u32); + u32 idx1 = va_arg (va, u32); + u64 **data = e->data; + + vec_validate_aligned (data, idx0, CLIB_CACHE_LINE_BYTES); + + for (u32 i = 0; i <= idx0; i++) + vec_validate_aligned (data[i], idx1, CLIB_CACHE_LINE_BYTES); + e->data = data; + } + else if (e->type == STAT_DIR_TYPE_COUNTER_VECTOR_COMBINED) + { + u32 idx0 = va_arg (va, u32); + u32 idx1 = va_arg (va, u32); + vlib_counter_t **data = e->data; + + vec_validate_aligned (data, idx0, CLIB_CACHE_LINE_BYTES); + + for (u32 i = 0; i <= idx0; i++) + vec_validate_aligned (data[i], idx1, CLIB_CACHE_LINE_BYTES); + e->data = data; + } + else + ASSERT (0); + + va_end (va); + + clib_mem_set_heap (oldheap); + + if (will_expand) + vlib_stats_segment_unlock (); +} + +u32 +vlib_stats_add_symlink (u32 entry_index, u32 vector_index, char *fmt, ...) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + vlib_stats_shared_header_t *shared_header = sm->shared_header; + vlib_stats_entry_t e; + va_list va; + u8 *name; + + ASSERT (shared_header); + ASSERT (entry_index < vec_len (sm->directory_vector)); + + va_start (va, fmt); + name = va_format (0, fmt, &va); + va_end (va); + + if (vlib_stats_find_entry_index ("%v", name) == STAT_SEGMENT_INDEX_INVALID) + { + vec_add1 (name, 0); + vlib_stats_set_entry_name (&e, (char *) name); + e.type = STAT_DIR_TYPE_SYMLINK; + e.index1 = entry_index; + e.index2 = vector_index; + vector_index = vlib_stats_create_counter (&e); + + /* Warn clients to refresh any pointers they might be holding */ + shared_header->directory_vector = sm->directory_vector; + } + else + vector_index = ~0; + + vec_free (name); + return vector_index; +} + +void +vlib_stats_rename_symlink (u64 entry_index, char *fmt, ...) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + vlib_stats_entry_t *e = vlib_stats_get_entry (sm, entry_index); + va_list va; + u8 *new_name; + + hash_unset_str_key_free (&sm->directory_vector_by_name, e->name); + + va_start (va, fmt); + new_name = va_format (0, fmt, &va); + va_end (va); + + vec_add1 (new_name, 0); + vlib_stats_set_entry_name (e, (char *) new_name); + hash_set_str_key_alloc (&sm->directory_vector_by_name, e->name, entry_index); + vec_free (new_name); +} + +f64 +vlib_stats_get_segment_update_rate (void) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + return sm->update_interval; +} + +void +vlib_stats_register_collector_fn (vlib_stats_collector_reg_t *reg) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + vlib_stats_collector_t *c; + + ASSERT (reg->entry_index != ~0); + + pool_get_zero (sm->collectors, c); + c->fn = reg->collect_fn; + c->entry_index = reg->entry_index; + c->vector_index = reg->vector_index; + c->private_data = reg->private_data; + + return; +} diff --git a/src/vlib/stats/stats.h b/src/vlib/stats/stats.h new file mode 100644 index 00000000000..ab1e2828c5a --- /dev/null +++ b/src/vlib/stats/stats.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2022 Cisco Systems, Inc. + */ + +#ifndef included_stats_stats_h +#define included_stats_stats_h + +#include <vppinfra/socket.h> +#include <vppinfra/lock.h> +#include <vlib/stats/shared.h> + +/* Default stat segment 32m */ +#define STAT_SEGMENT_DEFAULT_SIZE (32 << 20) + +/* Shared segment memory layout version */ +#define STAT_SEGMENT_VERSION 2 + +#define STAT_SEGMENT_INDEX_INVALID UINT32_MAX + +typedef enum +{ + STAT_COUNTER_HEARTBEAT = 0, + STAT_COUNTER_LAST_STATS_CLEAR, + STAT_COUNTER_BOOTTIME, + STAT_COUNTERS +} stat_segment_counter_t; + +#define foreach_stat_segment_counter_name \ + _ (LAST_STATS_CLEAR, SCALAR_INDEX, last_stats_clear, "/sys") \ + _ (HEARTBEAT, SCALAR_INDEX, heartbeat, "/sys") \ + _ (BOOTTIME, SCALAR_INDEX, boottime, "/sys") + +typedef struct +{ + u32 entry_index; + u32 vector_index; + u64 private_data; + vlib_stats_entry_t *entry; +} vlib_stats_collector_data_t; + +typedef void (*vlib_stats_collector_fn_t) (vlib_stats_collector_data_t *); + +typedef struct +{ + vlib_stats_collector_fn_t collect_fn; + u32 entry_index; + u32 vector_index; + u64 private_data; +} vlib_stats_collector_reg_t; + +typedef struct +{ + vlib_stats_collector_fn_t fn; + u32 entry_index; + u32 vector_index; + u64 private_data; +} vlib_stats_collector_t; + +typedef struct +{ + /* internal, does not point to shared memory */ + vlib_stats_collector_t *collectors; + + /* statistics segment */ + uword *directory_vector_by_name; + vlib_stats_entry_t *directory_vector; + u32 dir_vector_first_free_elt; + + /* Update interval */ + f64 update_interval; + + clib_spinlock_t *stat_segment_lockp; + u32 locking_thread_index; + u32 n_locks; + clib_socket_t *socket; + u8 *socket_name; + ssize_t memory_size; + clib_mem_page_sz_t log2_page_sz; + u8 node_counters_enabled; + void *heap; + vlib_stats_shared_header_t + *shared_header; /* pointer to shared memory segment */ + int memfd; + +} vlib_stats_segment_t; + +typedef struct +{ + u32 entry_index; +} vlib_stats_header_t; + +typedef struct +{ + vlib_stats_segment_t segment; +} vlib_stats_main_t; + +extern vlib_stats_main_t vlib_stats_main; + +static_always_inline vlib_stats_segment_t * +vlib_stats_get_segment () +{ + return &vlib_stats_main.segment; +} + +static_always_inline vlib_stats_entry_t * +vlib_stats_get_entry (vlib_stats_segment_t *sm, u32 entry_index) +{ + vlib_stats_entry_t *e; + ASSERT (entry_index < vec_len (sm->directory_vector)); + e = sm->directory_vector + entry_index; + ASSERT (e->type != STAT_DIR_TYPE_EMPTY && e->type != STAT_DIR_TYPE_ILLEGAL); + return e; +} + +static_always_inline void * +vlib_stats_get_entry_data_pointer (u32 entry_index) +{ + vlib_stats_segment_t *sm = vlib_stats_get_segment (); + vlib_stats_entry_t *e = vlib_stats_get_entry (sm, entry_index); + return e->data; +} + +clib_error_t *vlib_stats_init (vlib_main_t *vm); +void *vlib_stats_set_heap (); +void vlib_stats_segment_lock (void); +void vlib_stats_segment_unlock (void); +void vlib_stats_register_mem_heap (clib_mem_heap_t *); +f64 vlib_stats_get_segment_update_rate (void); + +/* gauge */ +u32 vlib_stats_add_gauge (char *fmt, ...); +void vlib_stats_set_gauge (u32 entry_index, u64 value); + +/* timestamp */ +u32 vlib_stats_add_timestamp (char *fmt, ...); +void vlib_stats_set_timestamp (u32 entry_index, f64 value); + +/* counter vector */ +u32 vlib_stats_add_counter_vector (char *fmt, ...); + +/* counter pair vector */ +u32 vlib_stats_add_counter_pair_vector (char *fmt, ...); + +/* string vector */ +typedef u8 **vlib_stats_string_vector_t; +vlib_stats_string_vector_t vlib_stats_add_string_vector (char *fmt, ...); +void vlib_stats_set_string_vector (vlib_stats_string_vector_t *sv, u32 index, + char *fmt, ...); +void vlib_stats_free_string_vector (vlib_stats_string_vector_t *sv); + +/* symlink */ +u32 vlib_stats_add_symlink (u32 entry_index, u32 vector_index, char *fmt, ...); +void vlib_stats_rename_symlink (u64 entry_index, char *fmt, ...); + +/* common to all types */ +void vlib_stats_validate (u32 entry_index, ...); +int vlib_stats_validate_will_expand (u32 entry_index, ...); +void vlib_stats_remove_entry (u32 entry_index); +u32 vlib_stats_find_entry_index (char *fmt, ...); +void vlib_stats_register_collector_fn (vlib_stats_collector_reg_t *r); + +format_function_t format_vlib_stats_symlink; + +#endif diff --git a/src/vlib/threads.c b/src/vlib/threads.c index ad677dc32b9..87b71adc2bc 100644 --- a/src/vlib/threads.c +++ b/src/vlib/threads.c @@ -16,15 +16,19 @@ #include <signal.h> #include <math.h> +#ifdef __FreeBSD__ +#include <pthread_np.h> +#endif /* __FreeBSD__ */ #include <vppinfra/format.h> #include <vppinfra/time_range.h> #include <vppinfra/interrupt.h> -#include <vppinfra/linux/sysfs.h> +#include <vppinfra/bitmap.h> +#include <vppinfra/unix.h> #include <vlib/vlib.h> #include <vlib/threads.h> -#include <vlib/stat_weak_inlines.h> +#include <vlib/stats/stats.h> u32 vl (void *p) @@ -179,12 +183,15 @@ vlib_thread_init (vlib_main_t * vm) u32 first_index = 1; u32 i; uword *avail_cpu; + u32 stats_num_worker_threads_dir_index; + + stats_num_worker_threads_dir_index = + vlib_stats_add_gauge ("/sys/num_worker_threads"); + ASSERT (stats_num_worker_threads_dir_index != ~0); /* get bitmaps of active cpu cores and sockets */ - tm->cpu_core_bitmap = - clib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online"); - tm->cpu_socket_bitmap = - clib_sysfs_list_to_bitmap ("/sys/devices/system/node/online"); + tm->cpu_core_bitmap = os_get_online_cpu_core_bitmap (); + tm->cpu_socket_bitmap = os_get_online_cpu_node_bitmap (); avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap); @@ -199,50 +206,44 @@ vlib_thread_init (vlib_main_t * vm) } /* grab cpu for main thread */ - if (tm->main_lcore == ~0) - { - /* if main-lcore is not set, we try to use lcore 1 */ - if (clib_bitmap_get (avail_cpu, 1)) - tm->main_lcore = 1; - else - tm->main_lcore = clib_bitmap_first_set (avail_cpu); - if (tm->main_lcore == (u8) ~ 0) - return clib_error_return (0, "no available cpus to be used for the" - " main thread"); - } - else + if (tm->main_lcore != ~0) { if (clib_bitmap_get (avail_cpu, tm->main_lcore) == 0) return clib_error_return (0, "cpu %u is not available to be used" " for the main thread", tm->main_lcore); + avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0); } - avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0); /* assume that there is socket 0 only if there is no data from sysfs */ if (!tm->cpu_socket_bitmap) tm->cpu_socket_bitmap = clib_bitmap_set (0, 0, 1); /* pin main thread to main_lcore */ - if (tm->cb.vlib_thread_set_lcore_cb) - { - tm->cb.vlib_thread_set_lcore_cb (0, tm->main_lcore); - } - else + if (tm->main_lcore != ~0) { cpu_set_t cpuset; CPU_ZERO (&cpuset); CPU_SET (tm->main_lcore, &cpuset); - pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset); + if (pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), + &cpuset)) + { + return clib_error_return (0, "could not pin main thread to cpu %u", + tm->main_lcore); + } } /* Set up thread 0 */ vec_validate_aligned (vlib_worker_threads, 0, CLIB_CACHE_LINE_BYTES); - _vec_len (vlib_worker_threads) = 1; + vec_set_len (vlib_worker_threads, 1); w = vlib_worker_threads; w->thread_mheap = clib_mem_get_heap (); w->thread_stack = vlib_thread_stacks[0]; w->cpu_id = tm->main_lcore; +#ifdef __FreeBSD__ + w->lwp = pthread_getthreadid_np (); +#else w->lwp = syscall (SYS_gettid); +#endif /* __FreeBSD__ */ w->thread_id = pthread_self (); tm->n_vlib_mains = 1; @@ -285,7 +286,6 @@ vlib_thread_init (vlib_main_t * vm) if (tr->coremask) { uword c; - /* *INDENT-OFF* */ clib_bitmap_foreach (c, tr->coremask) { if (clib_bitmap_get(avail_cpu, c) == 0) return clib_error_return (0, "cpu %u is not available to be used" @@ -293,7 +293,6 @@ vlib_thread_init (vlib_main_t * vm) avail_cpu = clib_bitmap_set(avail_cpu, c, 0); } - /* *INDENT-ON* */ } else { @@ -314,7 +313,8 @@ vlib_thread_init (vlib_main_t * vm) if (c == ~0) return clib_error_return (0, "no available cpus to be used for" - " the '%s' thread", tr->name); + " the '%s' thread #%u", + tr->name, tr->count); avail_cpu = clib_bitmap_set (avail_cpu, 0, avail_c0); avail_cpu = clib_bitmap_set (avail_cpu, c, 0); @@ -326,6 +326,7 @@ vlib_thread_init (vlib_main_t * vm) clib_bitmap_free (avail_cpu); tm->n_vlib_mains = n_vlib_mains; + vlib_stats_set_gauge (stats_num_worker_threads_dir_index, n_vlib_mains - 1); /* * Allocate the remaining worker threads, and thread stack vector slots @@ -405,54 +406,52 @@ vlib_worker_thread_init (vlib_worker_thread_t * w) void * vlib_worker_thread_bootstrap_fn (void *arg) { - void *rv; vlib_worker_thread_t *w = arg; - vlib_main_t *vm = 0; +#ifdef __FreeBSD__ + w->lwp = pthread_getthreadid_np (); +#else w->lwp = syscall (SYS_gettid); +#endif /* __FreeBSD__ */ w->thread_id = pthread_self (); __os_thread_index = w - vlib_worker_threads; - vm = vlib_global_main.vlib_mains[__os_thread_index]; + if (CLIB_DEBUG > 0) + { + void *frame_addr = __builtin_frame_address (0); + if (frame_addr < (void *) w->thread_stack || + frame_addr > (void *) w->thread_stack + VLIB_THREAD_STACK_SIZE) + { + /* heap is not set yet */ + fprintf (stderr, "thread stack is not set properly\n"); + exit (1); + } + } + + w->thread_function (arg); - vlib_process_start_switch_stack (vm, 0); - rv = (void *) clib_calljmp - ((uword (*)(uword)) w->thread_function, - (uword) arg, w->thread_stack + VLIB_THREAD_STACK_SIZE); - /* NOTREACHED, we hope */ - return rv; + return 0; } void vlib_get_thread_core_numa (vlib_worker_thread_t * w, unsigned cpu_id) { - const char *sys_cpu_path = "/sys/devices/system/cpu/cpu"; - const char *sys_node_path = "/sys/devices/system/node/node"; clib_bitmap_t *nbmp = 0, *cbmp = 0; - u32 node; - u8 *p = 0; - int core_id = -1, numa_id = -1; + int node, core_id = -1, numa_id = -1; - p = format (p, "%s%u/topology/core_id%c", sys_cpu_path, cpu_id, 0); - clib_sysfs_read ((char *) p, "%d", &core_id); - vec_reset_length (p); + core_id = os_get_cpu_phys_core_id (cpu_id); + nbmp = os_get_online_cpu_node_bitmap (); - /* *INDENT-OFF* */ - clib_sysfs_read ("/sys/devices/system/node/online", "%U", - unformat_bitmap_list, &nbmp); clib_bitmap_foreach (node, nbmp) { - p = format (p, "%s%u/cpulist%c", sys_node_path, node, 0); - clib_sysfs_read ((char *) p, "%U", unformat_bitmap_list, &cbmp); - if (clib_bitmap_get (cbmp, cpu_id)) - numa_id = node; - vec_reset_length (cbmp); - vec_reset_length (p); + cbmp = os_get_cpu_on_node_bitmap (node); + if (clib_bitmap_get (cbmp, cpu_id)) + numa_id = node; + vec_reset_length (cbmp); } - /* *INDENT-ON* */ + vec_free (nbmp); vec_free (cbmp); - vec_free (p); w->core_id = core_id; w->numa_id = numa_id; @@ -463,6 +462,9 @@ vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned cpu_id) { clib_mem_main_t *mm = &clib_mem_main; vlib_thread_main_t *tm = &vlib_thread_main; + pthread_t worker; + pthread_attr_t attr; + cpu_set_t cpuset; void *(*fp_arg) (void *) = fp; void *numa_heap; @@ -489,29 +491,33 @@ vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned cpu_id) } } - if (tm->cb.vlib_launch_thread_cb && !w->registration->use_pthreads) - return tm->cb.vlib_launch_thread_cb (fp, (void *) w, cpu_id); - else - { - pthread_t worker; - cpu_set_t cpuset; CPU_ZERO (&cpuset); CPU_SET (cpu_id, &cpuset); - if (pthread_create (&worker, NULL /* attr */ , fp_arg, (void *) w)) + if (pthread_attr_init (&attr)) + return clib_error_return_unix (0, "pthread_attr_init"); + + if (pthread_attr_setstack (&attr, w->thread_stack, + VLIB_THREAD_STACK_SIZE)) + return clib_error_return_unix (0, "pthread_attr_setstack"); + + if (pthread_create (&worker, &attr, fp_arg, (void *) w)) return clib_error_return_unix (0, "pthread_create"); if (pthread_setaffinity_np (worker, sizeof (cpu_set_t), &cpuset)) return clib_error_return_unix (0, "pthread_setaffinity_np"); + if (pthread_attr_destroy (&attr)) + return clib_error_return_unix (0, "pthread_attr_destroy"); + return 0; - } } static clib_error_t * start_workers (vlib_main_t * vm) { vlib_global_main_t *vgm = vlib_get_global_main (); + vlib_main_t *fvm = vlib_get_first_main (); int i, j; vlib_worker_thread_t *w; vlib_main_t *vm_clone; @@ -521,6 +527,7 @@ start_workers (vlib_main_t * vm) vlib_node_runtime_t *rt; u32 n_vlib_mains = tm->n_vlib_mains; u32 worker_thread_index; + u32 stats_err_entry_index = fvm->error_main.stats_err_entry_index; clib_mem_heap_t *main_heap = clib_mem_get_per_cpu_heap (); vlib_stats_register_mem_heap (main_heap); @@ -546,7 +553,7 @@ start_workers (vlib_main_t * vm) vec_validate_aligned (vgm->vlib_mains, n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); - _vec_len (vgm->vlib_mains) = 0; + vec_set_len (vgm->vlib_mains, 0); vec_add1_aligned (vgm->vlib_mains, vm, CLIB_CACHE_LINE_BYTES); if (n_vlib_mains > 1) @@ -590,6 +597,7 @@ start_workers (vlib_main_t * vm) for (k = 0; k < tr->count; k++) { vlib_node_t *n; + u64 **c; vec_add2 (vlib_worker_threads, w, 1); /* Currently unused, may not really work */ @@ -625,12 +633,9 @@ start_workers (vlib_main_t * vm) sizeof (*vm_clone)); vm_clone->thread_index = worker_thread_index; - vm_clone->heap_base = w->thread_mheap; - vm_clone->heap_aligned_base = (void *) - (((uword) w->thread_mheap) & ~(VLIB_FRAME_ALIGN - 1)); vm_clone->pending_rpc_requests = 0; vec_validate (vm_clone->pending_rpc_requests, 0); - _vec_len (vm_clone->pending_rpc_requests) = 0; + vec_set_len (vm_clone->pending_rpc_requests, 0); clib_memset (&vm_clone->random_buffer, 0, sizeof (vm_clone->random_buffer)); clib_spinlock_init @@ -660,7 +665,7 @@ start_workers (vlib_main_t * vm) /* fork the frame dispatch queue */ nm_clone->pending_frames = 0; vec_validate (nm_clone->pending_frames, 10); - _vec_len (nm_clone->pending_frames) = 0; + vec_set_len (nm_clone->pending_frames, 0); /* fork nodes */ nm_clone->nodes = 0; @@ -686,7 +691,6 @@ start_workers (vlib_main_t * vm) nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]) { vlib_node_t *n = vlib_get_node (vm, rt->node_index); - rt->thread_index = vm_clone->thread_index; /* copy initial runtime_data from node */ if (n->runtime_data && n->runtime_data_bytes > 0) clib_memcpy (rt->runtime_data, n->runtime_data, @@ -698,12 +702,14 @@ start_workers (vlib_main_t * vm) vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], CLIB_CACHE_LINE_BYTES); clib_interrupt_init ( - &nm_clone->interrupts, + &nm_clone->input_node_interrupts, vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])); + clib_interrupt_init ( + &nm_clone->pre_input_node_interrupts, + vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])); vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) { vlib_node_t *n = vlib_get_node (vm, rt->node_index); - rt->thread_index = vm_clone->thread_index; /* copy initial runtime_data from node */ if (n->runtime_data && n->runtime_data_bytes > 0) clib_memcpy (rt->runtime_data, n->runtime_data, @@ -718,7 +724,6 @@ start_workers (vlib_main_t * vm) nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) { vlib_node_t *n = vlib_get_node (vm, rt->node_index); - rt->thread_index = vm_clone->thread_index; /* copy initial runtime_data from node */ if (n->runtime_data && n->runtime_data_bytes > 0) clib_memcpy (rt->runtime_data, n->runtime_data, @@ -730,10 +735,7 @@ start_workers (vlib_main_t * vm) CLIB_CACHE_LINE_BYTES); /* Create per-thread frame freelist */ - nm_clone->frame_sizes = vec_new (vlib_frame_size_t, 1); -#ifdef VLIB_SUPPORTS_ARBITRARY_SCALAR_SIZES - nm_clone->frame_size_hash = hash_create (0, sizeof (uword)); -#endif + nm_clone->frame_sizes = 0; nm_clone->node_by_error = nm->node_by_error; /* Packet trace buffers are guaranteed to be empty, nothing to do here */ @@ -743,12 +745,10 @@ start_workers (vlib_main_t * vm) CLIB_CACHE_LINE_BYTES); /* Switch to the stats segment ... */ - void *oldheap = vlib_stats_push_heap (0); - vm_clone->error_main.counters = - vec_dup_aligned (vlib_get_first_main ()->error_main.counters, - CLIB_CACHE_LINE_BYTES); - vlib_stats_pop_heap2 (vm_clone->error_main.counters, - worker_thread_index, oldheap, 1); + vlib_stats_validate (stats_err_entry_index, worker_thread_index, + vec_len (fvm->error_main.counters) - 1); + c = vlib_stats_get_entry_data_pointer (stats_err_entry_index); + vm_clone->error_main.counters = c[worker_thread_index]; vm_clone->error_main.counters_last_clear = vec_dup_aligned ( vlib_get_first_main ()->error_main.counters_last_clear, @@ -804,28 +804,37 @@ start_workers (vlib_main_t * vm) { for (j = 0; j < tr->count; j++) { + w = vlib_worker_threads + worker_thread_index++; err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn, w, 0); if (err) - clib_error_report (err); + clib_unix_error ("%U, thread %s init on cpu %d failed", + format_clib_error, err, tr->name, 0); } } else { uword c; - /* *INDENT-OFF* */ clib_bitmap_foreach (c, tr->coremask) { w = vlib_worker_threads + worker_thread_index++; err = vlib_launch_thread_int (vlib_worker_thread_bootstrap_fn, w, c); if (err) - clib_error_report (err); - } - /* *INDENT-ON* */ + clib_unix_error ("%U, thread %s init on cpu %d failed", + format_clib_error, err, tr->name, c); + } } } vlib_worker_thread_barrier_sync (vm); + { + clib_error_t *err; + err = vlib_call_init_exit_functions ( + vm, &vgm->num_workers_change_function_registrations, 1 /* call_once */, + 1 /* is_global */); + if (err) + clib_error_report (err); + } vlib_worker_thread_barrier_release (vm); return 0; } @@ -887,6 +896,7 @@ vlib_worker_thread_node_refork (void) vlib_node_main_t *nm, *nm_clone; vlib_node_t **old_nodes_clone; vlib_node_runtime_t *rt, *old_rt; + u64 **c; vlib_node_t *new_n_clone; @@ -898,24 +908,29 @@ vlib_worker_thread_node_refork (void) nm_clone = &vm_clone->node_main; /* Re-clone error heap */ - u64 *old_counters = vm_clone->error_main.counters; u64 *old_counters_all_clear = vm_clone->error_main.counters_last_clear; clib_memcpy_fast (&vm_clone->error_main, &vm->error_main, sizeof (vm->error_main)); j = vec_len (vm->error_main.counters) - 1; - /* Switch to the stats segment ... */ - void *oldheap = vlib_stats_push_heap (0); - vec_validate_aligned (old_counters, j, CLIB_CACHE_LINE_BYTES); - vm_clone->error_main.counters = old_counters; - vlib_stats_pop_heap2 (vm_clone->error_main.counters, vm_clone->thread_index, - oldheap, 0); + c = vlib_stats_get_entry_data_pointer (vm->error_main.stats_err_entry_index); + vm_clone->error_main.counters = c[vm_clone->thread_index]; vec_validate_aligned (old_counters_all_clear, j, CLIB_CACHE_LINE_BYTES); vm_clone->error_main.counters_last_clear = old_counters_all_clear; - nm_clone = &vm_clone->node_main; + for (j = 0; j < vec_len (nm_clone->next_frames); j++) + { + vlib_next_frame_t *nf = &nm_clone->next_frames[j]; + if ((nf->flags & VLIB_FRAME_IS_ALLOCATED) && nf->frame != NULL) + { + vlib_frame_t *f = nf->frame; + nf->frame = NULL; + vlib_frame_free (vm_clone, f); + } + } + vec_free (nm_clone->next_frames); nm_clone->next_frames = vec_dup_aligned (nm->next_frames, CLIB_CACHE_LINE_BYTES); @@ -990,7 +1005,6 @@ vlib_worker_thread_node_refork (void) vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INTERNAL]) { vlib_node_t *n = vlib_get_node (vm, rt->node_index); - rt->thread_index = vm_clone->thread_index; /* copy runtime_data, will be overwritten later for existing rt */ if (n->runtime_data && n->runtime_data_bytes > 0) clib_memcpy_fast (rt->runtime_data, n->runtime_data, @@ -1015,13 +1029,15 @@ vlib_worker_thread_node_refork (void) vec_dup_aligned (nm->nodes_by_type[VLIB_NODE_TYPE_INPUT], CLIB_CACHE_LINE_BYTES); clib_interrupt_resize ( - &nm_clone->interrupts, + &nm_clone->input_node_interrupts, vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT])); + clib_interrupt_resize ( + &nm_clone->pre_input_node_interrupts, + vec_len (nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT])); vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_INPUT]) { vlib_node_t *n = vlib_get_node (vm, rt->node_index); - rt->thread_index = vm_clone->thread_index; /* copy runtime_data, will be overwritten later for existing rt */ if (n->runtime_data && n->runtime_data_bytes > 0) clib_memcpy_fast (rt->runtime_data, n->runtime_data, @@ -1049,7 +1065,6 @@ vlib_worker_thread_node_refork (void) vec_foreach (rt, nm_clone->nodes_by_type[VLIB_NODE_TYPE_PRE_INPUT]) { vlib_node_t *n = vlib_get_node (vm, rt->node_index); - rt->thread_index = vm_clone->thread_index; /* copy runtime_data, will be overwritten later for existing rt */ if (n->runtime_data && n->runtime_data_bytes > 0) clib_memcpy_fast (rt->runtime_data, n->runtime_data, @@ -1068,6 +1083,7 @@ vlib_worker_thread_node_refork (void) vec_free (old_rt); + vec_free (nm_clone->processes); nm_clone->processes = vec_dup_aligned (nm->processes, CLIB_CACHE_LINE_BYTES); nm_clone->node_by_error = nm->node_by_error; @@ -1406,7 +1422,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm) * rebuilding the stat segment node clones from the * stat thread... */ - vlib_stat_segment_lock (); + vlib_stats_segment_lock (); /* Do stats elements on main thread */ worker_thread_node_runtime_update_internal (); @@ -1457,7 +1473,7 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm) os_panic (); } } - vlib_stat_segment_unlock (); + vlib_stats_segment_unlock (); } t_closed_total = now - vm->barrier_epoch; @@ -1481,6 +1497,57 @@ vlib_worker_thread_barrier_release (vlib_main_t * vm) vm->clib_time.last_cpu_time, 1 /* leave */ ); } +static void +vlib_worker_sync_rpc (void *args) +{ + ASSERT (vlib_thread_is_main_w_barrier ()); + vlib_worker_threads->wait_before_barrier = 0; +} + +void +vlib_workers_sync (void) +{ + if (PREDICT_FALSE (!vlib_num_workers ())) + return; + + if (!(*vlib_worker_threads->wait_at_barrier) && + !clib_atomic_swap_rel_n (&vlib_worker_threads->wait_before_barrier, 1)) + { + u32 thread_index = vlib_get_thread_index (); + vlib_rpc_call_main_thread (vlib_worker_sync_rpc, (u8 *) &thread_index, + sizeof (thread_index)); + vlib_worker_flush_pending_rpc_requests (vlib_get_main ()); + } + + /* Wait until main thread asks for barrier */ + while (!(*vlib_worker_threads->wait_at_barrier)) + ; + + /* Stop before barrier and make sure all threads are either + * at worker barrier or the barrier before it */ + clib_atomic_fetch_add (&vlib_worker_threads->workers_before_barrier, 1); + while (vlib_num_workers () > (*vlib_worker_threads->workers_at_barrier + + vlib_worker_threads->workers_before_barrier)) + ; +} + +void +vlib_workers_continue (void) +{ + if (PREDICT_FALSE (!vlib_num_workers ())) + return; + + clib_atomic_fetch_add (&vlib_worker_threads->done_work_before_barrier, 1); + + /* Wait until all workers are done with work before barrier */ + while (vlib_worker_threads->done_work_before_barrier < + vlib_worker_threads->workers_before_barrier) + ; + + clib_atomic_fetch_add (&vlib_worker_threads->done_work_before_barrier, -1); + clib_atomic_fetch_add (&vlib_worker_threads->workers_before_barrier, -1); +} + /** * Wait until each of the workers has been once around the track */ @@ -1518,16 +1585,26 @@ vlib_worker_wait_one_loop (void) } void +vlib_worker_flush_pending_rpc_requests (vlib_main_t *vm) +{ + vlib_main_t *vm_global = vlib_get_first_main (); + + ASSERT (vm != vm_global); + + clib_spinlock_lock_if_init (&vm_global->pending_rpc_lock); + vec_append (vm_global->pending_rpc_requests, vm->pending_rpc_requests); + vec_reset_length (vm->pending_rpc_requests); + clib_spinlock_unlock_if_init (&vm_global->pending_rpc_lock); +} + +void vlib_worker_thread_fn (void *arg) { vlib_global_main_t *vgm = vlib_get_global_main (); vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; - vlib_thread_main_t *tm = vlib_get_thread_main (); vlib_main_t *vm = vlib_get_main (); clib_error_t *e; - vlib_process_finish_switch_stack (vm); - ASSERT (vm->thread_index == vlib_get_thread_index ()); vlib_worker_thread_init (w); @@ -1542,27 +1619,27 @@ vlib_worker_thread_fn (void *arg) if (e) clib_error_report (e); - /* Wait until the dpdk init sequence is complete */ - while (tm->extern_thread_mgmt && tm->worker_thread_release == 0) - vlib_worker_thread_barrier_check (); - vlib_worker_loop (vm); } -/* *INDENT-OFF* */ VLIB_REGISTER_THREAD (worker_thread_reg, static) = { .name = "workers", .short_name = "wk", .function = vlib_worker_thread_fn, }; -/* *INDENT-ON* */ +extern clib_march_fn_registration + *vlib_frame_queue_dequeue_with_aux_fn_march_fn_registrations; +extern clib_march_fn_registration + *vlib_frame_queue_dequeue_fn_march_fn_registrations; u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts) { vlib_thread_main_t *tm = vlib_get_thread_main (); + vlib_main_t *vm = vlib_get_main (); vlib_frame_queue_main_t *fqm; vlib_frame_queue_t *fq; + vlib_node_t *node; int i; u32 num_threads; @@ -1574,11 +1651,24 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts) vec_add2 (tm->frame_queue_mains, fqm, 1); + node = vlib_get_node (vm, fqm->node_index); + ASSERT (node); + if (node->aux_offset) + { + fqm->frame_queue_dequeue_fn = + CLIB_MARCH_FN_VOID_POINTER (vlib_frame_queue_dequeue_with_aux_fn); + } + else + { + fqm->frame_queue_dequeue_fn = + CLIB_MARCH_FN_VOID_POINTER (vlib_frame_queue_dequeue_fn); + } + fqm->node_index = node_index; fqm->frame_queue_nelts = frame_queue_nelts; vec_validate (fqm->vlib_frame_queues, tm->n_vlib_mains - 1); - _vec_len (fqm->vlib_frame_queues) = 0; + vec_set_len (fqm->vlib_frame_queues, 0); for (i = 0; i < tm->n_vlib_mains; i++) { fq = vlib_frame_queue_alloc (frame_queue_nelts); @@ -1588,19 +1678,6 @@ vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts) return (fqm - tm->frame_queue_mains); } -int -vlib_thread_cb_register (struct vlib_main_t *vm, vlib_thread_callbacks_t * cb) -{ - vlib_thread_main_t *tm = vlib_get_thread_main (); - - if (tm->extern_thread_mgmt) - return -1; - - tm->cb.vlib_launch_thread_cb = cb->vlib_launch_thread_cb; - tm->extern_thread_mgmt = 1; - return 0; -} - void vlib_process_signal_event_mt_helper (vlib_process_signal_event_mt_args_t * args) @@ -1627,17 +1704,21 @@ vlib_rpc_call_main_thread (void *callback, u8 * args, u32 arg_size) clib_error_t * threads_init (vlib_main_t * vm) { + const vlib_thread_main_t *tm = vlib_get_thread_main (); + + if (tm->main_lcore == ~0 && tm->n_vlib_mains > 1) + return clib_error_return (0, "Configuration error, a main core must " + "be specified when using worker threads"); + return 0; } VLIB_INIT_FUNCTION (threads_init); - static clib_error_t * show_clock_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { - int i; int verbose = 0; clib_timebase_t _tb, *tb = &_tb; @@ -1650,36 +1731,29 @@ show_clock_command_fn (vlib_main_t * vm, verbose, format_clib_timebase_time, clib_timebase_now (tb)); - if (vlib_get_n_threads () == 1) - return 0; - vlib_cli_output (vm, "Time last barrier release %.9f", vm->time_last_barrier_release); - for (i = 1; i < vlib_get_n_threads (); i++) + foreach_vlib_main () { - vlib_main_t *ovm = vlib_get_main_by_index (i); - if (ovm == 0) - continue; - - vlib_cli_output (vm, "%d: %U", i, format_clib_time, &ovm->clib_time, - verbose); - - vlib_cli_output ( - vm, "Thread %d offset %.9f error %.9f", i, ovm->time_offset, - vm->time_last_barrier_release - ovm->time_last_barrier_release); + vlib_cli_output (vm, "%d: %U", this_vlib_main->thread_index, + format_clib_time, &this_vlib_main->clib_time, verbose); + + vlib_cli_output (vm, "Thread %d offset %.9f error %.9f", + this_vlib_main->thread_index, + this_vlib_main->time_offset, + vm->time_last_barrier_release - + this_vlib_main->time_last_barrier_release); } return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (f_command, static) = { .path = "show clock", .short_help = "show clock", .function = show_clock_command_fn, }; -/* *INDENT-ON* */ vlib_thread_main_t * vlib_get_thread_main_not_inline (void) diff --git a/src/vlib/threads.h b/src/vlib/threads.h index 91727bacc23..ac0c1d5d266 100644 --- a/src/vlib/threads.h +++ b/src/vlib/threads.h @@ -17,7 +17,11 @@ #include <vlib/main.h> #include <vppinfra/callback.h> +#ifdef __linux__ #include <linux/sched.h> +#elif __FreeBSD__ +#include <sys/sched.h> +#endif /* __linux__ */ void vlib_set_thread_name (char *name); @@ -45,22 +49,6 @@ typedef struct vlib_thread_registration_ uword *coremask; } vlib_thread_registration_t; -/* - * Frames have their cpu / vlib_main_t index in the low-order N bits - * Make VLIB_MAX_CPUS a power-of-two, please... - */ - -#ifndef VLIB_MAX_CPUS -#define VLIB_MAX_CPUS 256 -#endif - -#if VLIB_MAX_CPUS > CLIB_MAX_MHEAPS -#error Please increase number of per-cpu mheaps -#endif - -#define VLIB_CPU_MASK (VLIB_MAX_CPUS - 1) /* 0x3f, max */ -#define VLIB_OFFSET_MASK (~VLIB_CPU_MASK) - #define VLIB_LOG2_THREAD_STACK_SIZE (21) #define VLIB_THREAD_STACK_SIZE (1<<VLIB_LOG2_THREAD_STACK_SIZE) @@ -75,6 +63,7 @@ typedef struct CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); u32 buffer_index[VLIB_FRAME_SIZE]; + u32 aux_data[VLIB_FRAME_SIZE]; } vlib_frame_queue_elt_t; @@ -101,6 +90,9 @@ typedef struct const char *barrier_caller; const char *barrier_context; volatile u32 *node_reforks_required; + volatile u32 wait_before_barrier; + volatile u32 workers_before_barrier; + volatile u32 done_work_before_barrier; long lwp; int cpu_id; @@ -130,7 +122,10 @@ typedef struct } vlib_frame_queue_t; -typedef struct +struct vlib_frame_queue_main_t_; +typedef u32 (vlib_frame_queue_dequeue_fn_t) ( + vlib_main_t *vm, struct vlib_frame_queue_main_t_ *fqm); +typedef struct vlib_frame_queue_main_t_ { u32 node_index; u32 frame_queue_nelts; @@ -140,6 +135,7 @@ typedef struct /* for frame queue tracing */ frame_queue_trace_t *frame_queue_traces; frame_queue_nelt_counter_t *frame_queue_histogram; + vlib_frame_queue_dequeue_fn_t *frame_queue_dequeue_fn; } vlib_frame_queue_main_t; typedef struct @@ -182,6 +178,10 @@ void vlib_worker_thread_node_refork (void); * Wait until each of the workers has been once around the track */ void vlib_worker_wait_one_loop (void); +/** + * Flush worker's pending rpc requests to main thread's rpc queue + */ +void vlib_worker_flush_pending_rpc_requests (vlib_main_t *vm); static_always_inline uword vlib_get_thread_index (void) @@ -218,12 +218,20 @@ __foreach_vlib_main_helper (vlib_main_t *ii, vlib_main_t **p) __foreach_vlib_main_helper (ii, &this_vlib_main); ii++) \ if (this_vlib_main) -#define foreach_sched_policy \ - _(SCHED_OTHER, OTHER, "other") \ - _(SCHED_BATCH, BATCH, "batch") \ - _(SCHED_IDLE, IDLE, "idle") \ - _(SCHED_FIFO, FIFO, "fifo") \ - _(SCHED_RR, RR, "rr") +#define foreach_sched_policy_posix \ + _ (SCHED_OTHER, OTHER, "other") \ + _ (SCHED_FIFO, FIFO, "fifo") \ + _ (SCHED_RR, RR, "rr") +#define foreach_sched_policy_linux \ + _ (SCHED_BATCH, BATCH, "batch") \ + _ (SCHED_IDLE, IDLE, "idle") + +#ifdef __linux__ +#define foreach_sched_policy \ + foreach_sched_policy_posix foreach_sched_policy_linux +#else +#define foreach_sched_policy foreach_sched_policy_posix +#endif /* __linux__ */ typedef enum { @@ -235,13 +243,6 @@ typedef enum typedef struct { - clib_error_t *(*vlib_launch_thread_cb) (void *fp, vlib_worker_thread_t * w, - unsigned cpu_id); - clib_error_t *(*vlib_thread_set_lcore_cb) (u32 thread, u16 cpu); -} vlib_thread_callbacks_t; - -typedef struct -{ /* Link list of registrations, built by constructors */ vlib_thread_registration_t *next; @@ -252,10 +253,6 @@ typedef struct vlib_worker_thread_t *worker_threads; - /* - * Launch all threads as pthreads, - * not eal_rte_launch (strict affinity) threads - */ int use_pthreads; /* Number of vlib_main / vnet_main clones */ @@ -297,10 +294,6 @@ typedef struct /* scheduling policy priority */ u32 sched_priority; - /* callbacks */ - vlib_thread_callbacks_t cb; - int extern_thread_mgmt; - /* NUMA-bound heap size */ uword numa_heap_size; @@ -370,12 +363,10 @@ vlib_worker_thread_barrier_check (void) if (PREDICT_FALSE (vlib_worker_threads->barrier_elog_enabled)) { vlib_worker_thread_t *w = vlib_worker_threads + thread_index; - /* *INDENT-OFF* */ ELOG_TYPE_DECLARE (e) = { .format = "barrier-wait-thread-%d", .format_args = "i4", }; - /* *INDENT-ON* */ struct { @@ -419,12 +410,10 @@ vlib_worker_thread_barrier_check (void) { t = vlib_time_now (vm) - t; vlib_worker_thread_t *w = vlib_worker_threads + thread_index; - /* *INDENT-OFF* */ ELOG_TYPE_DECLARE (e) = { .format = "barrier-refork-thread-%d", .format_args = "i4", }; - /* *INDENT-ON* */ struct { @@ -446,12 +435,10 @@ vlib_worker_thread_barrier_check (void) { t = vlib_time_now (vm) - t; vlib_worker_thread_t *w = vlib_worker_threads + thread_index; - /* *INDENT-OFF* */ ELOG_TYPE_DECLARE (e) = { .format = "barrier-released-thread-%d: %dus", .format_args = "i4i4", }; - /* *INDENT-ON* */ struct { @@ -490,8 +477,6 @@ vlib_thread_is_main_w_barrier (void) } u8 *vlib_thread_stack_init (uword thread_index); -int vlib_thread_cb_register (struct vlib_main_t *vm, - vlib_thread_callbacks_t * cb); extern void *rpc_call_main_thread_cb_fn; void @@ -501,6 +486,17 @@ void vlib_rpc_call_main_thread (void *function, u8 * args, u32 size); void vlib_get_thread_core_numa (vlib_worker_thread_t * w, unsigned cpu_id); vlib_thread_main_t *vlib_get_thread_main_not_inline (void); +/** + * Force workers sync from within worker + * + * Must be paired with @ref vlib_workers_continue + */ +void vlib_workers_sync (void); +/** + * Release barrier after workers sync + */ +void vlib_workers_continue (void); + #endif /* included_vlib_threads_h */ /* diff --git a/src/vlib/threads_cli.c b/src/vlib/threads_cli.c index d14e9c50e27..2872a025d66 100644 --- a/src/vlib/threads_cli.c +++ b/src/vlib/threads_cli.c @@ -43,6 +43,7 @@ static clib_error_t * show_threads_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { + const vlib_thread_main_t *tm = vlib_get_thread_main (); vlib_worker_thread_t *w; int i; @@ -64,7 +65,7 @@ show_threads_fn (vlib_main_t * vm, line = format (line, "%-25U", format_sched_policy_and_priority, w->lwp); int cpu_id = w->cpu_id; - if (cpu_id > -1) + if (cpu_id > -1 && tm->main_lcore != ~0) { int core_id = w->core_id; int numa_id = w->numa_id; @@ -84,13 +85,11 @@ show_threads_fn (vlib_main_t * vm, } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_threads_command, static) = { .path = "show threads", .short_help = "Show threads", .function = show_threads_fn, }; -/* *INDENT-ON* */ /* * Trigger threads to grab frame queue trace data @@ -180,14 +179,12 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_trace_frame_queue,static) = { .path = "trace frame-queue", .short_help = "trace frame-queue (on|off)", .function = trace_frame_queue, .is_mp_safe = 1, }; -/* *INDENT-ON* */ /* @@ -362,21 +359,17 @@ show_frame_queue_histogram (vlib_main_t * vm, unformat_input_t * input, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_show_frame_queue_trace,static) = { .path = "show frame-queue", .short_help = "show frame-queue trace", .function = show_frame_queue_trace, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_show_frame_queue_histogram,static) = { .path = "show frame-queue histogram", .short_help = "show frame-queue histogram", .function = show_frame_queue_histogram, }; -/* *INDENT-ON* */ /* @@ -445,13 +438,11 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_test_frame_queue_nelts,static) = { .path = "test frame-queue nelts", .short_help = "test frame-queue nelts (4,8,16,32)", .function = test_frame_queue_nelts, }; -/* *INDENT-ON* */ /* @@ -524,13 +515,11 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cmd_test_frame_queue_threshold,static) = { .path = "test frame-queue threshold", .short_help = "test frame-queue threshold N (0=no limit)", .function = test_frame_queue_threshold, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vlib/time.c b/src/vlib/time.c new file mode 100644 index 00000000000..cfe45a0643c --- /dev/null +++ b/src/vlib/time.c @@ -0,0 +1,84 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2021 Cisco Systems, Inc. + */ + +/* Virtual time allows to adjust VPP clock by arbitrary amount of time. + * It is done such that the order of timer expirations is maintained, + * and if a timer expiration callback reschedule another timer, this + * timer will also properly expire in the right order. IOW, the order + * of events is preserved. + * + * When moving time forward, each VPP thread (main and workers) runs an + * instance of the input node 'virtual-time-input' below. This node is + * responsible of advancing its own VPP thread clock to the next timer + * expiration. IOW each thread will move its clock independently one + * timer at a time. This also means that while moving time forward, each + * thread might not have the exact same view of what 'now' means. Once + * the main thread has finished moving its time forward, the worker thread + * barrier will ensure the timer between main and workers is synchronized. + * + * Using an input node in poll-mode has several advantages, including + * preventing 'unix-epoll-input' to sleep (as it will not sleep if at + * least one polling node is active). */ + +#include <vlib/vlib.h> +#include <vlib/time.h> + +static f64 vlib_time_virtual_stop; + +static uword +vlib_time_virtual_input (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) +{ + const f64 next = vlib_time_get_next_timer (vm); + /* each thread will advance its own time. In case a thread is much faster + * than another, we must make sure it does not run away... */ + if (vlib_time_now (vm) + next > vlib_time_virtual_stop) + vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_DISABLED); + else + vlib_time_adjust (vm, next); + return 0; +} + +VLIB_REGISTER_NODE (vlib_time_virtual_input_node) = { + .function = vlib_time_virtual_input, + .type = VLIB_NODE_TYPE_INPUT, + .name = "virtual-time-input", + .state = VLIB_NODE_STATE_DISABLED, +}; + +static clib_error_t * +vlib_time_virtual_adjust_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + f64 val; + + if (!unformat (input, "%f", &val)) + return clib_error_create ("unknown input `%U'", format_unformat_error, + input); + + vlib_time_virtual_stop = vlib_time_now (vm) + val; + + foreach_vlib_main () + vlib_node_set_state (this_vlib_main, vlib_time_virtual_input_node.index, + VLIB_NODE_STATE_POLLING); + + vlib_worker_thread_barrier_release (vm); + while ((val = vlib_process_wait_for_event_or_clock (vm, val)) >= 0.001) + ; + /* this barrier sync will resynchronize all the clocks, so even if the main + * thread was faster than some workers, this will make sure the workers will + * disable their virtual-time-input node on their next iteration (as stop + * time is reached). If a worker is too slow, there is a slight chance + * several of its timers expire at the same time at this point. Time will + * tell... */ + vlib_worker_thread_barrier_sync (vm); + return 0; +} + +VLIB_CLI_COMMAND (vlib_time_virtual_command) = { + .path = "set clock adjust", + .short_help = "set clock adjust <nn>", + .function = vlib_time_virtual_adjust_command_fn, +}; diff --git a/src/vlib/time.h b/src/vlib/time.h new file mode 100644 index 00000000000..61873bb2ef3 --- /dev/null +++ b/src/vlib/time.h @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright(c) 2021 Cisco Systems, Inc. + */ + +#ifndef included_vlib_time_h +#define included_vlib_time_h + +#include <vlib/vlib.h> +#include <vppinfra/tw_timer_1t_3w_1024sl_ov.h> + +static inline f64 +vlib_time_get_next_timer (vlib_main_t *vm) +{ + vlib_node_main_t *nm = &vm->node_main; + TWT (tw_timer_wheel) *wheel = nm->timing_wheel; + return TW (tw_timer_first_expires_in_ticks) (wheel) * wheel->timer_interval; +} + +static inline void +vlib_time_adjust (vlib_main_t *vm, f64 offset) +{ + vm->time_offset += offset; +} + +#endif /* included_vlib_time_h */ diff --git a/src/vlib/trace.c b/src/vlib/trace.c index 4bbd9505b71..fa085387e4b 100644 --- a/src/vlib/trace.c +++ b/src/vlib/trace.c @@ -173,12 +173,10 @@ format_vlib_trace (u8 * s, va_list * va) } /* Root of all trace cli commands. */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (trace_cli_command,static) = { .path = "trace", .short_help = "Packet tracer commands", }; -/* *INDENT-ON* */ int trace_time_cmp (void *a1, void *a2) @@ -256,7 +254,6 @@ trace_apply_filter (vlib_main_t * vm) * of any N traces. */ n_accepted = 0; - /* *INDENT-OFF* */ pool_foreach (h, tm->trace_buffer_pool) { accept = filter_accept(tm, h[0]); @@ -266,13 +263,12 @@ trace_apply_filter (vlib_main_t * vm) else n_accepted++; } - /* *INDENT-ON* */ /* remove all traces that we don't want to keep */ for (index = 0; index < vec_len (traces_to_remove); index++) { trace_index = traces_to_remove[index] - tm->trace_buffer_pool; - _vec_len (tm->trace_buffer_pool[trace_index]) = 0; + vec_set_len (tm->trace_buffer_pool[trace_index], 0); pool_put_index (tm->trace_buffer_pool, trace_index); } @@ -357,13 +353,11 @@ cli_show_trace_buffer (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_trace_cli,static) = { .path = "show trace", .short_help = "Show trace buffer [max COUNT]", .function = cli_show_trace_buffer, }; -/* *INDENT-ON* */ int vlib_enable_disable_pkt_trace_filter (int enable) __attribute__ ((weak)); @@ -463,13 +457,6 @@ cli_add_trace_buffer (vlib_main_t * vm, goto done; } - u32 filter_table = classify_get_trace_chain (); - if (filter && filter_table == ~0) - { - error = clib_error_create ("No packet trace filter configured..."); - goto done; - } - trace_update_capture_options (add, node_index, filter, verbose); done: @@ -478,13 +465,11 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (add_trace_cli,static) = { .path = "trace add", .short_help = "trace add <input-graph-node> <add'l-pkts-for-node-> [filter] [verbose]", .function = cli_add_trace_buffer, }; -/* *INDENT-ON* */ /* * Configure a filter for packet traces. @@ -582,13 +567,11 @@ cli_filter_trace (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (filter_trace_cli,static) = { .path = "trace filter", .short_help = "trace filter none | [include|exclude] NODE COUNT", .function = cli_filter_trace, }; -/* *INDENT-ON* */ static clib_error_t * cli_clear_trace_buffer (vlib_main_t * vm, @@ -598,13 +581,11 @@ cli_clear_trace_buffer (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (clear_trace_cli,static) = { .path = "clear trace", .short_help = "Clear trace buffer and free memory", .function = cli_clear_trace_buffer, }; -/* *INDENT-ON* */ /* Placeholder function to get us linked in. */ void @@ -612,18 +593,6 @@ vlib_trace_cli_reference (void) { } -int -vnet_is_packet_traced (vlib_buffer_t * b, - u32 classify_table_index, int func) -__attribute__ ((weak)); - -int -vnet_is_packet_traced (vlib_buffer_t * b, u32 classify_table_index, int func) -{ - clib_warning ("BUG: STUB called"); - return 1; -} - void * vlib_add_trace (vlib_main_t * vm, vlib_node_runtime_t * r, vlib_buffer_t * b, u32 n_data_bytes) @@ -631,8 +600,148 @@ vlib_add_trace (vlib_main_t * vm, return vlib_add_trace_inline (vm, r, b, n_data_bytes); } +vlib_is_packet_traced_fn_t * +vlib_is_packet_traced_function_from_name (const char *name) +{ + vlib_trace_filter_function_registration_t *reg = + vlib_trace_filter_main.trace_filter_registration; + while (reg) + { + if (clib_strcmp (reg->name, name) == 0) + break; + reg = reg->next; + } + if (!reg) + return 0; + return reg->function; +} + +vlib_is_packet_traced_fn_t * +vlib_is_packet_traced_default_function () +{ + vlib_trace_filter_function_registration_t *reg = + vlib_trace_filter_main.trace_filter_registration; + vlib_trace_filter_function_registration_t *tmp_reg = reg; + while (reg) + { + if (reg->priority > tmp_reg->priority) + tmp_reg = reg; + reg = reg->next; + } + return tmp_reg->function; +} + +static clib_error_t * +vlib_trace_filter_function_init (vlib_main_t *vm) +{ + vlib_is_packet_traced_fn_t *default_fn = + vlib_is_packet_traced_default_function (); + foreach_vlib_main () + { + vlib_trace_main_t *tm = &this_vlib_main->trace_main; + tm->current_trace_filter_function = default_fn; + } + return 0; +} + +vlib_trace_filter_main_t vlib_trace_filter_main; + +VLIB_INIT_FUNCTION (vlib_trace_filter_function_init); + +static clib_error_t * +show_trace_filter_function (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + vlib_trace_filter_main_t *tfm = &vlib_trace_filter_main; + vlib_trace_main_t *tm = &vm->trace_main; + vlib_is_packet_traced_fn_t *current_trace_filter_fn = + tm->current_trace_filter_function; + vlib_trace_filter_function_registration_t *reg = + tfm->trace_filter_registration; + + while (reg) + { + vlib_cli_output (vm, "%sname:%s description: %s priority: %u", + reg->function == current_trace_filter_fn ? "(*) " : "", + reg->name, reg->description, reg->priority); + reg = reg->next; + } + return 0; +} + +VLIB_CLI_COMMAND (show_trace_filter_function_cli, static) = { + .path = "show trace filter function", + .short_help = "show trace filter function", + .function = show_trace_filter_function, +}; + +uword +unformat_vlib_trace_filter_function (unformat_input_t *input, va_list *args) +{ + vlib_is_packet_traced_fn_t **res = + va_arg (*args, vlib_is_packet_traced_fn_t **); + vlib_trace_filter_main_t *tfm = &vlib_trace_filter_main; + + vlib_trace_filter_function_registration_t *reg = + tfm->trace_filter_registration; + while (reg) + { + if (unformat (input, reg->name)) + { + *res = reg->function; + return 1; + } + reg = reg->next; + } + return 0; +} + +void +vlib_set_trace_filter_function (vlib_is_packet_traced_fn_t *x) +{ + foreach_vlib_main () + { + this_vlib_main->trace_main.current_trace_filter_function = x; + } +} + +static clib_error_t * +set_trace_filter_function (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t _line_input, *line_input = &_line_input; + vlib_is_packet_traced_fn_t *res = 0; + clib_error_t *error = 0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + + while (unformat_check_input (line_input) != (uword) UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%U", unformat_vlib_trace_filter_function, + &res)) + ; + else + { + error = clib_error_create ( + "expected valid trace filter function, got `%U'", + format_unformat_error, line_input); + goto done; + } + } + vlib_set_trace_filter_function (res); + +done: + unformat_free (line_input); + return error; +} +VLIB_CLI_COMMAND (set_trace_filter_function_cli, static) = { + .path = "set trace filter function", + .short_help = "set trace filter function <func_name>", + .function = set_trace_filter_function, +}; /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vlib/trace.h b/src/vlib/trace.h index d045271f853..196c691ece6 100644 --- a/src/vlib/trace.h +++ b/src/vlib/trace.h @@ -80,6 +80,17 @@ typedef void *(vlib_add_trace_callback_t) (struct vlib_main_t *, struct vlib_buffer_t * b, u32 n_data_bytes); +typedef int (vlib_is_packet_traced_fn_t) (vlib_buffer_t *b, + u32 classify_table_index, int func); +typedef struct vlib_trace_filter_function_registration +{ + const char *name; + const char *description; + int priority; + vlib_is_packet_traced_fn_t *function; + struct vlib_trace_filter_function_registration *next; +} vlib_trace_filter_function_registration_t; + typedef struct { /* Pool of trace buffers. */ @@ -109,10 +120,33 @@ typedef struct /* a callback to enable customized addition of a new trace */ vlib_add_trace_callback_t *add_trace_callback; + vlib_is_packet_traced_fn_t *current_trace_filter_function; + } vlib_trace_main_t; format_function_t format_vlib_trace; - +typedef struct +{ + vlib_trace_filter_function_registration_t *trace_filter_registration; +} vlib_trace_filter_main_t; + +extern vlib_trace_filter_main_t vlib_trace_filter_main; +#define VLIB_REGISTER_TRACE_FILTER_FUNCTION(x, ...) \ + __VA_ARGS__ vlib_trace_filter_function_registration_t \ + __vlib_trace_filter_function_##x; \ + static void __clib_constructor \ + __vlib_trace_filter_function_registration_##x (void) \ + { \ + vlib_trace_filter_main_t *tfm = &vlib_trace_filter_main; \ + __vlib_trace_filter_function_##x.next = tfm->trace_filter_registration; \ + tfm->trace_filter_registration = &__vlib_trace_filter_function_##x; \ + } \ + __VA_ARGS__ vlib_trace_filter_function_registration_t \ + __vlib_trace_filter_function_##x + +vlib_is_packet_traced_fn_t * +vlib_is_packet_traced_function_from_name (const char *name); +vlib_is_packet_traced_fn_t *vlib_is_packet_traced_default_function (); void trace_apply_filter (struct vlib_main_t *vm); int trace_time_cmp (void *a1, void *a2); void vlib_trace_stop_and_clear (void); @@ -121,6 +155,9 @@ void trace_update_capture_options (u32 add, u32 node_index, u32 filter, u8 verbose); void trace_filter_set (u32 node_index, u32 flag, u32 count); void clear_trace_buffer (void); +void vlib_set_trace_filter_function (vlib_is_packet_traced_fn_t *x); +uword unformat_vlib_trace_filter_function (unformat_input_t *input, + va_list *args); #endif /* included_vlib_trace_h */ diff --git a/src/vlib/trace_funcs.h b/src/vlib/trace_funcs.h index 9313d41eb7d..9b45346b467 100644 --- a/src/vlib/trace_funcs.h +++ b/src/vlib/trace_funcs.h @@ -125,7 +125,7 @@ vlib_free_trace (vlib_main_t * vm, vlib_buffer_t * b) vlib_trace_main_t *tm = &vm->trace_main; u32 trace_index = vlib_buffer_get_trace_index (b); vlib_validate_trace (tm, b); - _vec_len (tm->trace_buffer_pool[trace_index]) = 0; + vec_set_len (tm->trace_buffer_pool[trace_index], 0); pool_put_index (tm->trace_buffer_pool, trace_index); } @@ -138,10 +138,7 @@ vlib_trace_next_frame (vlib_main_t * vm, nf->flags |= VLIB_FRAME_TRACE; } -void trace_apply_filter (vlib_main_t * vm); -int vnet_is_packet_traced (vlib_buffer_t * b, - u32 classify_table_index, int func); - +void trace_apply_filter (vlib_main_t *vm); /* * Mark buffer as traced and allocate trace buffer. @@ -164,7 +161,7 @@ vlib_trace_buffer (vlib_main_t * vm, if (PREDICT_FALSE (vlib_global_main.trace_filter.trace_filter_enable)) { /* See if we're supposed to trace this packet... */ - if (vnet_is_packet_traced ( + if (tm->current_trace_filter_function ( b, vlib_global_main.trace_filter.classify_table_index, 0 /* full classify */) != 1) return 0; diff --git a/src/vlib/unix/cli.c b/src/vlib/unix/cli.c index c7f0bccf286..90cf61d811d 100644 --- a/src/vlib/unix/cli.c +++ b/src/vlib/unix/cli.c @@ -62,6 +62,7 @@ #include <netinet/tcp.h> #include <math.h> #include <vppinfra/macros.h> +#include <vppinfra/format_table.h> /** ANSI escape code. */ #define ESC "\x1b" @@ -244,6 +245,9 @@ typedef struct /** Macro tables for this session */ clib_macro_main_t macro_main; + + /** Session name */ + u8 *name; } unix_cli_file_t; /** Resets the pager buffer and other data. @@ -275,6 +279,7 @@ unix_cli_file_free (unix_cli_file_t * f) { vec_free (f->output_vector); vec_free (f->input_vector); + vec_free (f->name); unix_cli_pager_reset (f); } @@ -1312,6 +1317,10 @@ unix_cli_new_session_process (vlib_main_t * vm, vlib_node_runtime_t * rt, /* Add an identifier to the new session list */ unix_cli_new_session_t ns; + /* Check the connection didn't close already */ + if (pool_is_free_index (cm->cli_file_pool, event_data[0])) + break; + ns.cf_index = event_data[0]; ns.deadline = vlib_time_now (vm) + 1.0; @@ -1606,7 +1615,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm, /* Delete the desired text from the command */ memmove (cf->current_command, cf->current_command + j, delta); - _vec_len (cf->current_command) = delta; + vec_set_len (cf->current_command, delta); /* Print the new contents */ unix_vlib_cli_output_cooked (cf, uf, cf->current_command, delta); @@ -1631,7 +1640,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm, unix_vlib_cli_output_cursor_left (cf, uf); /* Truncate the line at the cursor */ - _vec_len (cf->current_command) = cf->cursor; + vec_set_len (cf->current_command, cf->cursor); cf->search_mode = 0; break; @@ -1673,7 +1682,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm, unix_vlib_cli_output_cooked (cf, uf, (u8 *) " ", 1); for (; (cf->current_command + cf->cursor) > save; cf->cursor--) unix_vlib_cli_output_cursor_left (cf, uf); - _vec_len (cf->current_command) -= delta; + vec_dec_len (cf->current_command, delta); } } cf->search_mode = 0; @@ -1730,13 +1739,13 @@ unix_cli_line_process_one (unix_cli_main_t * cm, if (cf->excursion == vec_len (cf->command_history)) { /* down-arrowed to last entry - want a blank line */ - _vec_len (cf->current_command) = 0; + vec_set_len (cf->current_command, 0); } else if (cf->excursion < 0) { /* up-arrowed over the start to the end, want a blank line */ cf->excursion = vec_len (cf->command_history); - _vec_len (cf->current_command) = 0; + vec_set_len (cf->current_command, 0); } else { @@ -1749,7 +1758,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm, vec_validate (cf->current_command, vec_len (prev) - 1); clib_memcpy (cf->current_command, prev, vec_len (prev)); - _vec_len (cf->current_command) = vec_len (prev); + vec_set_len (cf->current_command, vec_len (prev)); unix_vlib_cli_output_cooked (cf, uf, cf->current_command, vec_len (cf->current_command)); } @@ -1836,7 +1845,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm, cf->cursor++; unix_vlib_cli_output_cursor_left (cf, uf); cf->cursor--; - _vec_len (cf->current_command)--; + vec_dec_len (cf->current_command, 1); } else if (cf->cursor > 0) { @@ -1844,7 +1853,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm, j = vec_len (cf->current_command) - cf->cursor; memmove (cf->current_command + cf->cursor - 1, cf->current_command + cf->cursor, j); - _vec_len (cf->current_command)--; + vec_dec_len (cf->current_command, 1); /* redraw the rest of the line */ unix_vlib_cli_output_cursor_left (cf, uf); @@ -1880,7 +1889,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm, j = vec_len (cf->current_command) - cf->cursor - 1; memmove (cf->current_command + cf->cursor, cf->current_command + cf->cursor + 1, j); - _vec_len (cf->current_command)--; + vec_dec_len (cf->current_command, 1); /* redraw the rest of the line */ unix_vlib_cli_output_cooked (cf, uf, cf->current_command + cf->cursor, @@ -1952,7 +1961,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm, vec_resize (save, vec_len (cf->current_command) - cf->cursor); clib_memcpy (save, cf->current_command + cf->cursor, vec_len (cf->current_command) - cf->cursor); - _vec_len (cf->current_command) = cf->cursor; + vec_set_len (cf->current_command, cf->cursor); } else { @@ -1974,7 +1983,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm, cf->cursor--; j--; } - _vec_len (cf->current_command) = j; + vec_set_len (cf->current_command, j); /* replace it with the newly expanded command */ vec_append (cf->current_command, completed); @@ -2381,7 +2390,7 @@ unix_cli_line_process_one (unix_cli_main_t * cm, vec_validate (cf->current_command, vec_len (item) - 1); clib_memcpy (cf->current_command, item, vec_len (item)); - _vec_len (cf->current_command) = vec_len (item); + vec_set_len (cf->current_command, vec_len (item)); unix_vlib_cli_output_cooked (cf, uf, cf->current_command, vec_len (cf->current_command)); @@ -2572,9 +2581,8 @@ more: { static u8 *lv; vec_reset_length (lv); - lv = format (lv, "%U[%d]: %v", - format_timeval, 0 /* current bat-time */ , - 0 /* current bat-format */ , + lv = format (lv, "%U[%d]: %v", format_timeval, + NULL /* current bat-format */, 0 /* current bat-time */, cli_file_index, cf->current_command); if ((vec_len (cf->current_command) > 0) && (cf->current_command[vec_len (cf->current_command) - 1] != '\n')) @@ -2595,7 +2603,7 @@ more: 0 /* level */ , 8 /* max_level */ ); /* Macro processor NULL terminates the return */ - _vec_len (expanded) -= 1; + vec_dec_len (expanded, 1); vec_reset_length (cf->current_command); vec_append (cf->current_command, expanded); vec_free (expanded); @@ -2693,7 +2701,7 @@ unix_cli_kill (unix_cli_main_t * cm, uword cli_file_index) if (ns->cf_index == cli_file_index) { - vec_del1 (cm->new_sessions, i); + ns->cf_index = ~0; break; } } @@ -2750,7 +2758,7 @@ unix_cli_process (vlib_main_t * vm, } if (data) - _vec_len (data) = 0; + vec_set_len (data, 0); } done: @@ -2832,7 +2840,7 @@ unix_cli_read_ready (clib_file_t * uf) return clib_error_return_unix (0, "read"); n_read = n < 0 ? 0 : n; - _vec_len (cf->input_vector) = l + n_read; + vec_set_len (cf->input_vector, l + n_read); } if (!(n < 0)) @@ -2874,47 +2882,16 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd) { unix_main_t *um = &unix_main; clib_file_main_t *fm = &file_main; - vlib_node_main_t *nm = &vlib_get_main ()->node_main; unix_cli_file_t *cf; clib_file_t template = { 0 }; vlib_main_t *vm = um->vlib_main; vlib_node_t *n = 0; - u8 *file_desc = 0; - - file_desc = format (0, "%s", name); - - name = (char *) format (0, "unix-cli-%s", name); if (vec_len (cm->unused_cli_process_node_indices) > 0) { - uword l = vec_len (cm->unused_cli_process_node_indices); - int i; - vlib_main_t *this_vlib_main; - u8 *old_name = 0; - - /* - * Nodes are bulk-copied, so node name pointers are shared. - * Find the cli node in all graph replicas, and give all of them - * the same new name. - * Then, throw away the old shared name-vector. - */ - for (i = 0; i < vlib_get_n_threads (); i++) - { - this_vlib_main = vlib_get_main_by_index (i); - if (this_vlib_main == 0) - continue; - n = vlib_get_node (this_vlib_main, - cm->unused_cli_process_node_indices[l - 1]); - old_name = n->name; - n->name = (u8 *) name; - } - ASSERT (old_name); - hash_unset (nm->node_by_name, old_name); - hash_set (nm->node_by_name, name, n->index); - vec_free (old_name); + n = vlib_get_node (vm, vec_pop (cm->unused_cli_process_node_indices)); vlib_node_set_state (vm, n->index, VLIB_NODE_STATE_POLLING); - _vec_len (cm->unused_cli_process_node_indices) = l - 1; } else { @@ -2923,21 +2900,18 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd) .type = VLIB_NODE_TYPE_PROCESS, .process_log2_n_stack_bytes = 18, }; - - r.name = name; + static u32 count = 0; vlib_worker_thread_barrier_sync (vm); - vlib_register_node (vm, &r); - vec_free (name); + vlib_register_node (vm, &r, "unix-cli-process-%u", count++); n = vlib_get_node (vm, r.index); vlib_worker_thread_node_runtime_update (); vlib_worker_thread_barrier_release (vm); } - pool_get (cm->cli_file_pool, cf); - clib_memset (cf, 0, sizeof (*cf)); + pool_get_zero (cm->cli_file_pool, cf); clib_macro_init (&cf->macro_main); template.read_function = unix_cli_read_ready; @@ -2945,14 +2919,15 @@ unix_cli_file_add (unix_cli_main_t * cm, char *name, int fd) template.error_function = unix_cli_error_detected; template.file_descriptor = fd; template.private_data = cf - cm->cli_file_pool; - template.description = file_desc; + template.description = format (0, "%s", name); + cf->name = format (0, "unix-cli-%s", name); cf->process_node_index = n->index; cf->clib_file_index = clib_file_add (fm, &template); cf->output_vector = 0; cf->input_vector = 0; vec_validate (cf->current_command, 0); - _vec_len (cf->current_command) = 0; + vec_set_len (cf->current_command, 0); vlib_start_process (vm, n->runtime_index); @@ -3342,21 +3317,17 @@ unix_cli_quit (vlib_main_t * vm, * If VPP is running in @em interactive mode and this is the console session * (that is, the session on @c stdin) then this will also terminate VPP. ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (unix_cli_quit_command, static) = { .path = "quit", .short_help = "Exit CLI", .function = unix_cli_quit, }; -/* *INDENT-ON* */ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (unix_cli_q_command, static) = { .path = "q", .short_help = "Exit CLI", .function = unix_cli_quit, }; -/* *INDENT-ON* */ /** CLI command to execute a VPP command script. */ static clib_error_t * @@ -3364,9 +3335,10 @@ unix_cli_exec (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd) { char *file_name; - int fd; - unformat_input_t sub_input; + int fd, rv = 0; + unformat_input_t sub_input, in; clib_error_t *error; + clib_macro_main_t *mm = 0; unix_cli_main_t *cm = &unix_cli_main; unix_cli_file_t *cf; u8 *file_data = 0; @@ -3403,8 +3375,14 @@ unix_cli_exec (vlib_main_t * vm, goto done; } + if (s.st_size < 1) + { + error = clib_error_return (0, "empty file `%s'", file_name); + goto done; + } + /* Read the file */ - vec_validate (file_data, s.st_size); + vec_validate (file_data, s.st_size - 1); if (read (fd, file_data, s.st_size) != s.st_size) { @@ -3414,42 +3392,43 @@ unix_cli_exec (vlib_main_t * vm, goto done; } - /* The macro expander expects a c string... */ - vec_add1 (file_data, 0); - unformat_init_vector (&sub_input, file_data); - /* Run the file contents through the macro processor */ - if (vec_len (sub_input.buffer) > 1) + /* Initial config process? Use the global macro table. */ + if (pool_is_free_index (cm->cli_file_pool, cm->current_input_file_index)) + mm = &cm->macro_main; + else { - u8 *expanded; - clib_macro_main_t *mm = 0; + /* Otherwise, use the per-cli-process macro table */ + cf = pool_elt_at_index (cm->cli_file_pool, cm->current_input_file_index); + mm = &cf->macro_main; + } - /* Initial config process? Use the global macro table. */ - if (pool_is_free_index - (cm->cli_file_pool, cm->current_input_file_index)) - mm = &cm->macro_main; - else + while (rv == 0 && unformat_user (&sub_input, unformat_vlib_cli_line, &in)) + { + /* Run the file contents through the macro processor */ + if (vec_len (in.buffer) > 1) { - /* Otherwise, use the per-cli-process macro table */ - cf = pool_elt_at_index (cm->cli_file_pool, - cm->current_input_file_index); - mm = &cf->macro_main; + u8 *expanded; + + /* The macro expander expects a c string... */ + vec_add1 (in.buffer, 0); + + expanded = + (u8 *) clib_macro_eval (mm, (i8 *) in.buffer, 1 /* complain */, + 0 /* level */, 8 /* max_level */); + /* Macro processor NULL terminates the return */ + vec_dec_len (expanded, 1); + vec_reset_length (in.buffer); + vec_append (in.buffer, expanded); + vec_free (expanded); } - expanded = (u8 *) clib_macro_eval (mm, - (i8 *) sub_input.buffer, - 1 /* complain */ , - 0 /* level */ , - 8 /* max_level */ ); - /* Macro processor NULL terminates the return */ - _vec_len (expanded) -= 1; - vec_reset_length (sub_input.buffer); - vec_append (sub_input.buffer, expanded); - vec_free (expanded); + if ((rv = vlib_cli_input (vm, &in, 0, 0)) != 0) + error = clib_error_return (0, "CLI line error: %U", + format_unformat_error, &in); + unformat_free (&in); } - - vlib_cli_input (vm, &sub_input, 0, 0); unformat_free (&sub_input); done: @@ -3462,7 +3441,7 @@ done: /*? * Executes a sequence of CLI commands which are read from a file. If - * a command is unrecognised or otherwise invalid then the usual CLI + * a command is unrecognized or otherwise invalid then the usual CLI * feedback will be generated, however execution of subsequent commands * from the file will continue. * @@ -3483,14 +3462,12 @@ done: * Example of how to execute a set of CLI commands from a file: * @cliexcmd{exec /usr/share/vpp/scripts/gigup.txt} ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_exec, static) = { .path = "exec", .short_help = "exec <filename>", .function = unix_cli_exec, .is_mp_safe = 1, }; -/* *INDENT-ON* */ /** CLI command to show various unix error statistics. */ static clib_error_t * @@ -3559,13 +3536,11 @@ done: return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_unix_show_errors, static) = { .path = "show unix errors", .short_help = "Show Unix system call error history", .function = unix_show_errors, }; -/* *INDENT-ON* */ /** CLI command to show various unix error statistics. */ static clib_error_t * @@ -3581,7 +3556,6 @@ unix_show_files (vlib_main_t * vm, vlib_cli_output (vm, "%3s %6s %12s %12s %12s %-32s %s", "FD", "Thread", "Read", "Write", "Error", "File Name", "Description"); - /* *INDENT-OFF* */ pool_foreach (f, fm->file_pool) { int rv; @@ -3596,19 +3570,16 @@ unix_show_files (vlib_main_t * vm, path, f->description); vec_reset_length (s); } - /* *INDENT-ON* */ vec_free (s); return error; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_unix_show_files, static) = { .path = "show unix files", .short_help = "Show Unix files in use", .function = unix_show_files, }; -/* *INDENT-ON* */ /** CLI command to show session command history. */ static clib_error_t * @@ -3639,13 +3610,11 @@ unix_cli_show_history (vlib_main_t * vm, /*? * Displays the command history for the current session, if any. ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_unix_cli_show_history, static) = { .path = "history", .short_help = "Show current session command history", .function = unix_cli_show_history, }; -/* *INDENT-ON* */ /** CLI command to show terminal status. */ static clib_error_t * @@ -3662,7 +3631,8 @@ unix_cli_show_terminal (vlib_main_t * vm, n = vlib_get_node (vm, cf->process_node_index); - vlib_cli_output (vm, "Terminal name: %v\n", n->name); + vlib_cli_output (vm, "Terminal name: %v\n", cf->name); + vlib_cli_output (vm, "Terminal node: %v\n", n->name); vlib_cli_output (vm, "Terminal mode: %s\n", cf->line_mode ? "line-by-line" : "char-by-char"); vlib_cli_output (vm, "Terminal width: %d\n", cf->width); @@ -3711,13 +3681,11 @@ unix_cli_show_terminal (vlib_main_t * vm, * CRLF mode: LF * @cliexend ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_unix_cli_show_terminal, static) = { .path = "show terminal", .short_help = "Show current session terminal settings", .function = unix_cli_show_terminal, }; -/* *INDENT-ON* */ /** CLI command to display a list of CLI sessions. */ static clib_error_t * @@ -3727,31 +3695,34 @@ unix_cli_show_cli_sessions (vlib_main_t * vm, { unix_cli_main_t *cm = &unix_cli_main; clib_file_main_t *fm = &file_main; + table_t table = {}, *t = &table; unix_cli_file_t *cf; clib_file_t *uf; - vlib_node_t *n; - vlib_cli_output (vm, "%-5s %-5s %-20s %s", "PNI", "FD", "Name", "Flags"); + table_add_header_col (t, 4, "PNI ", "FD ", "Name", "Flags"); #define fl(x, y) ( (x) ? toupper((y)) : tolower((y)) ) - /* *INDENT-OFF* */ - pool_foreach (cf, cm->cli_file_pool) { - uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); - n = vlib_get_node (vm, cf->process_node_index); - vlib_cli_output (vm, - "%-5d %-5d %-20v %c%c%c%c%c\n", - cf->process_node_index, - uf->file_descriptor, - n->name, - fl (cf->is_interactive, 'i'), - fl (cf->is_socket, 's'), - fl (cf->line_mode, 'l'), - fl (cf->has_epipe, 'p'), - fl (cf->ansi_capable, 'a')); - } - /* *INDENT-ON* */ + int i = 0; + pool_foreach (cf, cm->cli_file_pool) + { + int j = 0; + + uf = pool_elt_at_index (fm->file_pool, cf->clib_file_index); + table_format_cell (t, i, j++, "%u", cf->process_node_index); + table_format_cell (t, i, j++, "%u", uf->file_descriptor); + table_format_cell (t, i, j++, "%v", cf->name); + table_format_cell (t, i++, j++, "%c%c%c%c%c", + fl (cf->is_interactive, 'i'), fl (cf->is_socket, 's'), + fl (cf->line_mode, 'l'), fl (cf->has_epipe, 'p'), + fl (cf->ansi_capable, 'a')); + } #undef fl + t->default_body.align = TTAA_LEFT; + t->default_header_col.align = TTAA_LEFT; + vlib_cli_output (vm, "%U", format_table, t); + table_free (t); + return 0; } @@ -3791,13 +3762,11 @@ unix_cli_show_cli_sessions (vlib_main_t * vm, * - @em P EPIPE detected on connection; it will close soon. * - @em A ANSI-capable terminal. ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_unix_cli_show_cli_sessions, static) = { .path = "show cli-sessions", .short_help = "Show current CLI sessions", .function = unix_cli_show_cli_sessions, }; -/* *INDENT-ON* */ /** CLI command to set terminal pager settings. */ static clib_error_t * @@ -3848,13 +3817,11 @@ done: * Additionally allows the pager buffer size to be set; though note that * this value is set globally and not per session. ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_unix_cli_set_terminal_pager, static) = { .path = "set terminal pager", .short_help = "set terminal pager [on|off] [limit <lines>]", .function = unix_cli_set_terminal_pager, }; -/* *INDENT-ON* */ /** CLI command to set terminal history settings. */ static clib_error_t * @@ -3919,13 +3886,11 @@ done: * This command also allows the maximum size of the history buffer for * this session to be altered. ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_unix_cli_set_terminal_history, static) = { .path = "set terminal history", .short_help = "set terminal history [on|off] [limit <lines>]", .function = unix_cli_set_terminal_history, }; -/* *INDENT-ON* */ /** CLI command to set terminal ANSI settings. */ static clib_error_t * @@ -3958,13 +3923,11 @@ unix_cli_set_terminal_ansi (vlib_main_t * vm, * ANSI control sequences are used in a small number of places to provide, * for example, color text output and to control the cursor in the pager. ?*/ -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_unix_cli_set_terminal_ansi, static) = { .path = "set terminal ansi", .short_help = "set terminal ansi [on|off]", .function = unix_cli_set_terminal_ansi, }; -/* *INDENT-ON* */ #define MAX_CLI_WAIT 86400 @@ -3998,13 +3961,11 @@ unix_wait_cmd (vlib_main_t * vm, unformat_free (line_input); return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_unix_wait_cmd, static) = { .path = "wait", .short_help = "wait <sec>", .function = unix_wait_cmd, }; -/* *INDENT-ON* */ static clib_error_t * echo_cmd (vlib_main_t * vm, @@ -4025,13 +3986,11 @@ echo_cmd (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (cli_unix_echo_cmd, static) = { .path = "echo", .short_help = "echo <rest-of-line>", .function = echo_cmd, }; -/* *INDENT-ON* */ static clib_error_t * define_cmd_fn (vlib_main_t * vm, @@ -4063,14 +4022,12 @@ define_cmd_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (define_cmd, static) = { .path = "define", .short_help = "define <variable-name> <value>", .function = define_cmd_fn, }; -/* *INDENT-ON* */ static clib_error_t * undefine_cmd_fn (vlib_main_t * vm, @@ -4089,13 +4046,11 @@ undefine_cmd_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (undefine_cmd, static) = { .path = "undefine", .short_help = "undefine <variable-name>", .function = undefine_cmd_fn, }; -/* *INDENT-ON* */ static clib_error_t * show_macro_cmd_fn (vlib_main_t * vm, @@ -4113,13 +4068,11 @@ show_macro_cmd_fn (vlib_main_t * vm, return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (show_macro, static) = { .path = "show macro", .short_help = "show macro [noevaluate]", .function = show_macro_cmd_fn, }; -/* *INDENT-ON* */ static clib_error_t * unix_cli_init (vlib_main_t * vm) diff --git a/src/vlib/unix/input.c b/src/vlib/unix/input.c index 9c7c54f6b1b..e96cd902466 100644 --- a/src/vlib/unix/input.c +++ b/src/vlib/unix/input.c @@ -250,7 +250,10 @@ linux_epoll_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, while (nanosleep (&ts, &tsrem) < 0) ts = tsrem; if (*vlib_worker_threads->wait_at_barrier || - *nm->pending_interrupts) + clib_interrupt_is_any_pending ( + nm->input_node_interrupts) || + clib_interrupt_is_any_pending ( + nm->pre_input_node_interrupts)) goto done; } } @@ -367,13 +370,11 @@ linux_epoll_input (vlib_main_t * vm, return linux_epoll_input_inline (vm, node, frame, thread_index); } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (linux_epoll_input_node,static) = { .function = linux_epoll_input, .type = VLIB_NODE_TYPE_PRE_INPUT, .name = "unix-epoll-input", }; -/* *INDENT-ON* */ clib_error_t * linux_epoll_input_init (vlib_main_t * vm) @@ -416,12 +417,10 @@ unix_input_init (vlib_main_t * vm) return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (unix_input_init) = { .runs_before = VLIB_INITS ("linux_epoll_input_init"), }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vlib/unix/main.c b/src/vlib/unix/main.c index 4ef96652470..ee28ca8f1aa 100644 --- a/src/vlib/unix/main.c +++ b/src/vlib/unix/main.c @@ -39,7 +39,9 @@ #include <vlib/vlib.h> #include <vlib/unix/unix.h> #include <vlib/unix/plugin.h> +#include <vppinfra/unix.h> +#include <limits.h> #include <signal.h> #include <sys/ucontext.h> #include <syslog.h> @@ -70,12 +72,10 @@ unix_main_init (vlib_main_t * vm) return 0; } -/* *INDENT-OFF* */ VLIB_INIT_FUNCTION (unix_main_init) = { .runs_before = VLIB_INITS ("unix_input_init"), }; -/* *INDENT-ON* */ static int unsetup_signal_handlers (int sig) @@ -144,17 +144,6 @@ unix_signal_handler (int signum, siginfo_t * si, ucontext_t * uc) break; } -#ifdef CLIB_GCOV - /* - * Test framework sends SIGTERM, so we need to flush the - * code coverage stats here. - */ - { - void __gcov_flush (void); - __gcov_flush (); - } -#endif - /* Null terminate. */ vec_add1 (syslog_msg, 0); @@ -210,6 +199,7 @@ setup_signal_handlers (unix_main_t * um) { /* these signals take the default action */ case SIGKILL: + case SIGCONT: case SIGSTOP: case SIGUSR1: case SIGUSR2: @@ -246,14 +236,7 @@ unix_error_handler (void *arg, u8 * msg, int msg_len) } else { - char save = msg[msg_len - 1]; - - /* Null Terminate. */ - msg[msg_len - 1] = 0; - - syslog (LOG_ERR | LOG_DAEMON, "%s", msg); - - msg[msg_len - 1] = save; + syslog (LOG_ERR | LOG_DAEMON, "%.*s", msg_len, msg); } } @@ -266,20 +249,10 @@ vlib_unix_error_report (vlib_main_t * vm, clib_error_t * error) return; { - char save; - u8 *msg; - u32 msg_len; - - msg = error->what; - msg_len = vec_len (msg); - - /* Null Terminate. */ - save = msg[msg_len - 1]; - msg[msg_len - 1] = 0; - - syslog (LOG_ERR | LOG_DAEMON, "%s", msg); - - msg[msg_len - 1] = save; + u8 *msg = error->what; + u32 len = vec_len (msg); + int msg_len = (len > INT_MAX) ? INT_MAX : len; + syslog (LOG_ERR | LOG_DAEMON, "%.*s", msg_len, msg); } } @@ -288,98 +261,34 @@ startup_config_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { unix_main_t *um = &unix_main; - u8 *buf = 0; - uword l, n = 1; + unformat_input_t in; vlib_process_suspend (vm, 2.0); while (um->unix_config_complete == 0) vlib_process_suspend (vm, 0.1); - if (um->startup_config_filename) + if (!um->startup_config_filename) { - unformat_input_t sub_input; - int fd; - struct stat s; - char *fn = (char *) um->startup_config_filename; - - fd = open (fn, O_RDONLY); - if (fd < 0) - { - clib_warning ("failed to open `%s'", fn); - return 0; - } + return 0; + } - if (fstat (fd, &s) < 0) - { - clib_warning ("failed to stat `%s'", fn); - bail: - close (fd); - return 0; - } + unformat_init_vector (&in, + format (0, "exec %s", um->startup_config_filename)); - if (!(S_ISREG (s.st_mode) || S_ISLNK (s.st_mode))) - { - clib_warning ("not a regular file: `%s'", fn); - goto bail; - } + vlib_cli_input (vm, &in, 0, 0); - while (n > 0) - { - l = vec_len (buf); - vec_resize (buf, 4096); - n = read (fd, buf + l, 4096); - if (n > 0) - { - _vec_len (buf) = l + n; - if (n < 4096) - break; - } - else - break; - } - if (um->log_fd && vec_len (buf)) - { - u8 *lv = 0; - lv = format (lv, "%U: ***** Startup Config *****\n%v", - format_timeval, 0 /* current bat-time */ , - 0 /* current bat-format */ , - buf); - { - int rv __attribute__ ((unused)) = - write (um->log_fd, lv, vec_len (lv)); - } - vec_reset_length (lv); - lv = format (lv, "%U: ***** End Startup Config *****\n", - format_timeval, 0 /* current bat-time */ , - 0 /* current bat-format */ ); - { - int rv __attribute__ ((unused)) = - write (um->log_fd, lv, vec_len (lv)); - } - vec_free (lv); - } + unformat_free (&in); - if (vec_len (buf)) - { - unformat_init_vector (&sub_input, buf); - vlib_cli_input (vm, &sub_input, 0, 0); - /* frees buf for us */ - unformat_free (&sub_input); - } - close (fd); - } return 0; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (startup_config_node,static) = { .function = startup_config_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "startup-config-process", .process_log2_n_stack_bytes = 18, }; -/* *INDENT-ON* */ static clib_error_t * unix_config (vlib_main_t * vm, unformat_input_t * input) @@ -480,9 +389,8 @@ unix_config (vlib_main_t * vm, unformat_input_t * input) { u8 *lv = 0; lv = format (0, "%U: ***** Start: PID %d *****\n", - format_timeval, 0 /* current bat-time */ , - 0 /* current bat-format */ , - getpid ()); + format_timeval, NULL /* current bat-format */, + 0 /* current bat-time */, getpid ()); { int rv __attribute__ ((unused)) = write (um->log_fd, lv, vec_len (lv)); @@ -518,6 +426,9 @@ unix_config (vlib_main_t * vm, unformat_input_t * input) if (error) return error; + if (chdir ((char *) um->runtime_dir) < 0) + return clib_error_return_unix (0, "chdir('%s')", um->runtime_dir); + error = setup_signal_handlers (um); if (error) return error; @@ -662,12 +573,13 @@ static uword thread0 (uword arg) { vlib_main_t *vm = (vlib_main_t *) arg; + vlib_global_main_t *vgm = vlib_get_global_main (); unformat_input_t input; int i; vlib_process_finish_switch_stack (vm); - unformat_init_command_line (&input, (char **) vm->argv); + unformat_init_command_line (&input, (char **) vgm->argv); i = vlib_main (vm, &input); unformat_free (&input); @@ -690,6 +602,10 @@ vlib_thread_stack_init (uword thread_index) return stack; } +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + int vlib_unix_main (int argc, char *argv[]) { @@ -701,12 +617,24 @@ vlib_unix_main (int argc, char *argv[]) vec_validate_aligned (vgm->vlib_mains, 0, CLIB_CACHE_LINE_BYTES); - vm->argv = (u8 **) argv; - vgm->name = argv[0]; - vm->heap_base = clib_mem_get_heap (); - vm->heap_aligned_base = (void *) - (((uword) vm->heap_base) & ~(VLIB_FRAME_ALIGN - 1)); - ASSERT (vm->heap_base); + vgm->exec_path = (char *) os_get_exec_path (); + + if (vgm->exec_path) + { + for (i = vec_len (vgm->exec_path) - 1; i > 0; i--) + if (vgm->exec_path[i - 1] == '/') + break; + + vgm->name = 0; + + vec_add (vgm->name, vgm->exec_path + i, vec_len (vgm->exec_path) - i); + vec_add1 (vgm->exec_path, 0); + vec_add1 (vgm->name, 0); + } + else + vgm->exec_path = vgm->name = argv[0]; + + vgm->argv = (u8 **) argv; clib_time_init (&vm->clib_time); @@ -715,7 +643,7 @@ vlib_unix_main (int argc, char *argv[]) elog_init (vlib_get_elog_main (), vgm->configured_elog_ring_size); elog_enable_disable (vlib_get_elog_main (), 1); - unformat_init_command_line (&input, (char **) vm->argv); + unformat_init_command_line (&input, (char **) vgm->argv); if ((e = vlib_plugin_config (vm, &input))) { clib_error_report (e); @@ -727,7 +655,7 @@ vlib_unix_main (int argc, char *argv[]) if (i) return i; - unformat_init_command_line (&input, (char **) vm->argv); + unformat_init_command_line (&input, (char **) vgm->argv); if (vgm->init_functions_called == 0) vgm->init_functions_called = hash_create (0, /* value bytes */ 0); e = vlib_call_all_config_functions (vm, &input, 1 /* early */ ); @@ -739,7 +667,7 @@ vlib_unix_main (int argc, char *argv[]) unformat_free (&input); /* always load symbols, for signal handler and mheap memory get/put backtrace */ - clib_elf_main_init (vgm->name); + clib_elf_main_init (vgm->exec_path); vec_validate (vlib_thread_stacks, 0); vlib_thread_stack_init (0); diff --git a/src/vlib/unix/mc_socket.c b/src/vlib/unix/mc_socket.c index 9800b1e744c..1f3b4e9a8f1 100644 --- a/src/vlib/unix/mc_socket.c +++ b/src/vlib/unix/mc_socket.c @@ -90,7 +90,7 @@ sendmsg_helper (mc_socket_main_t * msm, h.msg_namelen = sizeof (tx_addr[0]); if (msm->iovecs) - _vec_len (msm->iovecs) = 0; + vec_set_len (msm->iovecs, 0); n_bytes = append_buffer_index_to_iovec (vm, buffer_index, &msm->iovecs); ASSERT (n_bytes <= msm->mc_main.transport.max_packet_size); @@ -177,7 +177,7 @@ recvmsg_helper (mc_socket_main_t * msm, vec_validate (msm->rx_buffers, max_alloc - 1); n_alloc = vlib_buffer_alloc (vm, msm->rx_buffers + n_left, max_alloc - n_left); - _vec_len (msm->rx_buffers) = n_left + n_alloc; + vec_set_len (msm->rx_buffers, n_left + n_alloc); } ASSERT (vec_len (msm->rx_buffers) >= n_mtu); @@ -192,7 +192,7 @@ recvmsg_helper (mc_socket_main_t * msm, msm->iovecs[i].iov_base = b->data; msm->iovecs[i].iov_len = buffer_size; } - _vec_len (msm->iovecs) = n_mtu; + vec_set_len (msm->iovecs, n_mtu); { struct msghdr h; @@ -237,7 +237,7 @@ recvmsg_helper (mc_socket_main_t * msm, b->next_buffer = msm->rx_buffers[i_rx]; } - _vec_len (msm->rx_buffers) = i_rx; + vec_set_len (msm->rx_buffers, i_rx); return 0 /* no error */ ; } @@ -418,7 +418,7 @@ catchup_socket_read_ready (clib_file_t * uf, int is_server) } } - _vec_len (c->input_vector) = l + n; + vec_set_len (c->input_vector, l + n); if (is_eof && vec_len (c->input_vector) > 0) { @@ -426,7 +426,7 @@ catchup_socket_read_ready (clib_file_t * uf, int is_server) { mc_msg_catchup_request_handler (mcm, (void *) c->input_vector, c - msm->catchups); - _vec_len (c->input_vector) = 0; + vec_set_len (c->input_vector, 0); } else { diff --git a/src/vlib/unix/plugin.c b/src/vlib/unix/plugin.c index 4a7ff2753f2..5cac9abc8fe 100644 --- a/src/vlib/unix/plugin.c +++ b/src/vlib/unix/plugin.c @@ -35,7 +35,7 @@ char *vlib_plugin_app_version __attribute__ ((weak)); char *vlib_plugin_app_version = ""; void * -vlib_get_plugin_symbol (char *plugin_name, char *symbol_name) +vlib_get_plugin_symbol (const char *plugin_name, const char *symbol_name) { plugin_main_t *pm = &vlib_plugin_main; uword *p; @@ -194,6 +194,8 @@ load_one_plugin (plugin_main_t * pm, plugin_info_t * pi, int from_early_init) reread_reg = 0; goto process_reg; } + else + clib_error_free (error); error = elf_get_section_by_name (&em, ".vlib_plugin_registration", §ion); @@ -304,8 +306,12 @@ process_reg: } vec_free (version_required); +#if defined(RTLD_DEEPBIND) handle = dlopen ((char *) pi->filename, RTLD_LAZY | (reg->deep_bind ? RTLD_DEEPBIND : 0)); +#else + handle = dlopen ((char *) pi->filename, RTLD_LAZY); +#endif if (handle == 0) { @@ -594,7 +600,12 @@ vlib_plugin_early_init (vlib_main_t * vm) 0x7FFFFFFF /* aka no rate limit */ ); if (pm->plugin_path == 0) - pm->plugin_path = format (0, "%s%c", vlib_plugin_path, 0); + pm->plugin_path = format (0, "%s", vlib_plugin_path); + + if (pm->plugin_path_add) + pm->plugin_path = format (pm->plugin_path, ":%s", pm->plugin_path_add); + + pm->plugin_path = format (pm->plugin_path, "%c", 0); PLUGIN_LOG_DBG ("plugin path %s", pm->plugin_path); @@ -633,7 +644,6 @@ vlib_plugins_show_cmd_fn (vlib_main_t * vm, s = format (s, " Plugin path is: %s\n\n", pm->plugin_path); s = format (s, " %-41s%-33s%s\n", "Plugin", "Version", "Description"); - /* *INDENT-OFF* */ hash_foreach_mem (key, value, pm->plugin_by_name_hash, { if (key != 0) @@ -645,21 +655,18 @@ vlib_plugins_show_cmd_fn (vlib_main_t * vm, index++; } }); - /* *INDENT-ON* */ vlib_cli_output (vm, "%v", s); vec_free (s); return 0; } -/* *INDENT-OFF* */ VLIB_CLI_COMMAND (plugins_show_cmd, static) = { .path = "show plugins", .short_help = "show loaded plugins", .function = vlib_plugins_show_cmd_fn, }; -/* *INDENT-ON* */ static clib_error_t * config_one_plugin (vlib_main_t * vm, char *name, unformat_input_t * input) @@ -756,6 +763,8 @@ done: u8 *s = 0; if (unformat (input, "path %s", &s)) pm->plugin_path = s; + else if (unformat (input, "add-path %s", &s)) + pm->plugin_path_add = s; else if (unformat (input, "name-filter %s", &s)) pm->plugin_name_filter = s; else if (unformat (input, "vat-path %s", &s)) diff --git a/src/vlib/unix/plugin.h b/src/vlib/unix/plugin.h index e3555fe3ba9..a7d9b9449a5 100644 --- a/src/vlib/unix/plugin.h +++ b/src/vlib/unix/plugin.h @@ -58,14 +58,15 @@ typedef struct { + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); u8 default_disabled : 1; u8 deep_bind : 1; - const char version[32]; - const char version_required[32]; + const char version[64]; + const char version_required[64]; const char overrides[256]; const char *early_init; const char *description; -} __clib_packed vlib_plugin_registration_t; +} vlib_plugin_registration_t; /* * Plugins may also use this registration format, which is @@ -123,6 +124,7 @@ typedef struct /* paths and name filters */ u8 *plugin_path; + u8 *plugin_path_add; u8 *plugin_name_filter; u8 *vat_plugin_path; u8 *vat_plugin_name_filter; @@ -144,12 +146,12 @@ extern plugin_main_t vlib_plugin_main; clib_error_t *vlib_plugin_config (vlib_main_t * vm, unformat_input_t * input); int vlib_plugin_early_init (vlib_main_t * vm); int vlib_load_new_plugins (plugin_main_t * pm, int from_early_init); -void *vlib_get_plugin_symbol (char *plugin_name, char *symbol_name); +void *vlib_get_plugin_symbol (const char *plugin_name, + const char *symbol_name); u8 *vlib_get_vat_plugin_path (void); #define VLIB_PLUGIN_REGISTER() \ vlib_plugin_registration_t vlib_plugin_registration \ - CLIB_NOSANITIZE_PLUGIN_REG_SECTION \ __clib_export __clib_section(".vlib_plugin_registration") /* Call a plugin init function: used for init function dependencies. */ diff --git a/src/vlib/unix/util.c b/src/vlib/unix/util.c index 03aef364357..04cd6f593ac 100644 --- a/src/vlib/unix/util.c +++ b/src/vlib/unix/util.c @@ -86,8 +86,8 @@ foreach_directory_file (char *dir_name, s = format (s, "%s/%s", dir_name, e->d_name); t = format (t, "%s", e->d_name); error = f (arg, s, t); - _vec_len (s) = 0; - _vec_len (t) = 0; + vec_set_len (s, 0); + vec_set_len (t, 0); if (error) break; @@ -116,7 +116,7 @@ vlib_unix_recursive_mkdir (char *path) error = clib_error_return_unix (0, "mkdir '%s'", c); goto done; } - _vec_len (c)--; + vec_dec_len (c, 1); } vec_add1 (c, path[i]); i++; |