/* * Copyright (c) 2015 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define _GNU_SOURCE #include #include #include #include #include #include DECLARE_CJ_GLOBAL_LOG; #define FRAME_QUEUE_NELTS 64 u32 vl (void *p) { return vec_len (p); } vlib_worker_thread_t *vlib_worker_threads; vlib_thread_main_t vlib_thread_main; /* * Barrier tracing can be enabled on a normal build to collect information * on barrier use, including timings and call stacks. Deliberately not * keyed off CLIB_DEBUG, because that can add significant overhead which * imapacts observed timings. */ #ifdef BARRIER_TRACING /* * Output of barrier tracing can be to syslog or elog as suits */ #ifdef BARRIER_TRACING_ELOG static u32 elog_id_for_msg_name (const char *msg_name) { uword *p, r; static uword *h; u8 *name_copy; if (!h) h = hash_create_string (0, sizeof (uword)); p = hash_get_mem (h, msg_name); if (p) return p[0]; r = elog_string (&vlib_global_main.elog_main, "%s", msg_name); name_copy = format (0, "%s%c", msg_name, 0); hash_set_mem (h, name_copy, r); return r; } /* * elog Barrier trace functions, which are nulled out if BARRIER_TRACING isn't * defined */ static inline void barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed) { /* *INDENT-OFF* */ ELOG_TYPE_DECLARE (e) = { .format = "barrier <%d#%s(O:%dus:%dus)(%dus)", .format_args = "i4T4i4i4i4", }; /* *INDENT-ON* */ struct { u32 count, caller, t_entry, t_open, t_closed; } *ed = 0; ed = ELOG_DATA (&vlib_global_main.elog_main, e); ed->count = (int) vlib_worker_threads[0].barrier_sync_count; ed->caller = elog_id_for_msg_name (vlib_worker_threads[0].barrier_caller); ed->t_entry = (int) (1000000.0 * t_entry); ed->t_open = (int) (1000000.0 * t_open); ed->t_closed = (int) (1000000.0 * t_closed); } static inline void barrier_trace_sync_rec (f64 t_entry) { /* *INDENT-OFF* */ ELOG_TYPE_DECLARE (e) = { .format = "barrier <%d(%dus)%s", .format_args = "i4i4T4", }; /* *INDENT-ON* */ struct { u32 depth, t_entry, caller; } *ed = 0; ed = ELOG_DATA (&vlib_global_main.elog_main, e); ed->depth = (int) vlib_worker_threads[0].recursion_level - 1; ed->t_entry = (int) (1000000.0 * t_entry); ed->caller = elog_id_for_msg_name (vlib_worker_threads[0].barrier_caller); } static inline void barrier_trace_release_rec (f64 t_entry) { /* *INDENT-OFF* */ ELOG_TYPE_DECLARE (e) = { .format = "barrier (%dus)%d>", .format_args = "i4i4", }; /* *INDENT-ON* */ struct { u32 t_entry, depth; } *ed = 0; ed = ELOG_DATA (&vlib_global_main.elog_main, e); ed->t_entry = (int) (1000000.0 * t_entry); ed->depth = (int) vlib_worker_threads[0].recursion_level; } static inline void barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main) { /* *INDENT-OFF* */ ELOG_TYPE_DECLARE (e) = { .format = "barrier (%dus){%d}(C:%dus)#%d>", .format_args = "i4i4i4i4", }; /* *INDENT-ON* */ struct { u32 t_entry, t_update_main, t_closed_total, count; } *ed = 0; ed = ELOG_DATA (&vlib_global_main.elog_main, e); ed->t_entry = (int) (1000000.0 * t_entry); ed->t_update_main = (int) (1000000.0 * t_update_main); ed->t_closed_total = (int) (1000000.0 * t_closed_total); ed->count = (int) vlib_worker_threads[0].barrier_sync_count; /* Reset context for next trace */ vlib_worker_threads[0].barrier_context = NULL; } #else char barrier_trace[65536]; char *btp = barrier_trace; /* * syslog Barrier trace functions, which are nulled out if BARRIER_TRACING * isn't defined */ static inline void barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed) { btp += sprintf (btp, "<%u#%s", (unsigned int) vlib_worker_threads[0].barrier_sync_count, vlib_worker_threads[0].barrier_caller); if (vlib_worker_threads[0].barrier_context) { btp += sprintf (btp, "[%s]", vlib_worker_threads[0].barrier_context); } btp += sprintf (btp, "(O:%dus:%dus)(%dus):", (int) (1000000.0 * t_entry), (int) (1000000.0 * t_open), (int) (1000000.0 * t_closed)); } static inline void barrier_trace_sync_rec (f64 t_entry) { btp += sprintf (btp, "<%u(%dus)%s:", (int) vlib_worker_threads[0].recursion_level - 1, (int) (1000000.0 * t_entry), vlib_worker_threads[0].barrier_caller); } static inline void barrier_trace_release_rec (f64 t_entry) { btp += sprintf (btp, ":(%dus)%u>", (int) (1000000.0 * t_entry), (int) vlib_worker_threads[0].recursion_level); } static inline void barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main) { btp += sprintf (btp, ":(%dus)", (int) (1000000.0 * t_entry)); if (t_update_main > 0) { btp += sprintf (btp, "{%dus}", (int) (1000000.0 * t_update_main)); } btp += sprintf (btp, "(C:%dus)#%u>", (int) (1000000.0 * t_closed_total), (int) vlib_worker_threads[0].barrier_sync_count); /* Dump buffer to syslog, and reset for next trace */ fformat (stderr, "BTRC %s\n", barrier_trace); btp = barrier_trace; vlib_worker_threads[0].barrier_context = NULL; } #endif #else /* Null functions for default case where barrier tracing isn't used */ static inline void barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed) { } static inline void barrier_trace_sync_rec (f64 t_entry) { } static inline void barrier_trace_release_rec (f64 t_entry) { } static inline void barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main) { } #endif uword os_get_nthreads (void) { u32 len; len = vec_len (vlib_thread_stacks); if (len == 0) return 1; else return len; } void vlib_set_thread_name (char *name) { int pthread_setname_np (pthread_t __target_thread, const char *__name); int rv; pthread_t thread = pthread_self (); if (thread) { rv = pthread_setname_np (thread, name); if (rv) clib_warning ("pthread_setname_np returned %d", rv); } } static int sort_registrations_by_no_clone (void *a0, void *a1) { vlib_thread_registration_t **tr0 = a0; vlib_thread_registration_t **tr1 = a1; return ((i32) ((*tr0)->no_data_structure_clone) - ((i32) ((*tr1)->no_data_structure_clone))); } static uword * clib_sysfs_list_to_bitmap (char *filename) { FILE *fp; uword *r = 0; fp = fopen (filename, "r"); if (fp != NULL) { u8 *buffer = 0; vec_validate (buffer, 256 - 1); if (fgets ((char *) buffer, 256, fp)) { unformat_input_t in; unformat_init_string (&in, (char *) buffer, strlen ((char *) buffer)); if (unformat (&in, "%U", unformat_bitmap_list, &r) != 1) clib_warning ("unformat_bitmap_list failed"); unformat_free (&in); } vec_free (buffer); fclose (fp); } return r; } /* Called early in the init sequence */ clib_error_t * vlib_thread_init (vlib_main_t * vm) { vlib_thread_main_t *tm = &vlib_thread_main; vlib_worker_thread_t *w; vlib_thread_registration_t *tr; u32 n_vlib_mains = 1; u32 first_index = 1; u32 i; uword *avail_cpu; /* get bitmaps of active cpu cores and sockets */ tm->cpu_core_bitmap = clib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online"); tm->cpu_socket_bitmap = clib_sysfs_list_to_bitmap ("/sys/devices/system/node/online"); avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap); /* skip cores */ for (i = 0; i < tm->skip_cores; i++) { uword c = clib_bitmap_first_set (avail_cpu); if (c == ~0) return clib_error_return (0, "no available cpus to skip"); avail_cpu = clib_bitmap_set (avail_cpu, c, 0); } /* grab cpu for main thread */ if (tm->main_lcore == ~0) { /* if main-lcore is not set, we try to use lcore 1 */ if (clib_bitmap_get (avail_cpu, 1)) tm->main_lcore = 1; else tm->main_lcore = clib_bitmap_first_set (avail_cpu); if (tm->main_lcore == (u8) ~ 0) return clib_error_return (0, "no available cpus to be used for the" " main thread"); } else { if (clib_bitmap_get (avail_cpu, tm->main_lcore) == 0) return clib_error_return (0, "cpu %u is not available to be used" " for the main thread", tm->main_lcore); } avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0); /* assume that there is socket 0 only if there is no data from sysfs */ if (!tm->cpu_socket_bitmap) tm->cpu_socket_bitmap = clib_bitmap_set (0, 0, 1); /* pin main thread to main_lcore */ if (tm->cb.vlib_thread_set_lcore_cb) { tm->cb.vlib_thread_set_lcore_cb (0, tm->main_lcore); } else { cpu_set_t cpuset; CPU_ZERO (&cpuset); CPU_SET (tm->main_lcore, &cpuset); pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset); } /* as many threads as stacks... */ vec_validate_aligned (vlib_worker_threads, vec_len (vlib_thread_stacks) - 1, CLIB_CACHE_LINE_BYTES); /* Preallocate thread 0 */ _vec_len (vlib_worker_threads) = 1; w = vlib_worker_threads; w->thread_mheap = clib_mem_get_heap (); w->thread_stack = vlib_thread_stacks[0]; w->lcore_id = tm->main_lcore; w->lwp = syscall (SYS_gettid); w->thread_id = pthread_self (); tm->n_vlib_mains = 1; if (tm->sched_policy != ~0) { struct sched_param sched_param; if (!sched_getparam (w->lwp, &sched_param)) { if (tm->sched_priority != ~0) sched_param.sched_priority = tm->sched_priority; sched_setscheduler (w->lwp, tm->sched_policy, &sched_param); } } /* assign threads to cores and set n_vlib_mains */ tr = tm->next; while (tr) { vec_add1 (tm->registrations, tr); tr = tr->next; } vec_sort_with_function (tm->registrations, sort_registrations_by_no_clone); for (i = 0; i < vec_len (tm->registrations); i++) { int j; tr = tm->registrations[i]; tr->first_index = first_index; first_index += tr->count; n_vlib_mains += (tr->no_data_structure_clone == 0) ? tr->count : 0; /* construct coremask */ if (tr->use_pthreads || !tr->count) continue; if (tr->coremask) { uword c; /* *INDENT-OFF* */ clib_bitmap_foreach (c, tr->coremask, ({ if (clib_bitmap_get(avail_cpu, c) == 0) return clib_error_return (0, "cpu %u is not available to be used" " for the '%s' thread",c, tr->name); avail_cpu = clib_bitmap_set(avail_cpu, c, 0); })); /* *INDENT-ON* */ } else { for (j = 0; j < tr->count; j++) { uword c = clib_bitmap_first_set (avail_cpu); if (c == ~0) return clib_error_return (0, "no available cpus to be used for" " the '%s' thread", tr->name); avail_cpu = clib_bitmap_set (avail_cpu, c, 0); tr->coremask = clib_bitmap_set (tr->coremask, c, 1); } } } clib_bitmap_free (avail_cpu); tm->n_vlib_mains = n_vlib_mains; vec_validate_aligned (vlib_worker_threads, first_index - 1, CLIB_CACHE_LINE_BYTES); return 0; } vlib_frame_queue_t * vlib_frame_queue_alloc (int nelts) { vlib_frame_queue_t *fq; fq = clib_mem_alloc_aligned (sizeof (*fq), CLIB_CACHE_LINE_BYTES); memset (fq, 0, sizeof (*fq)); fq->nelts = nelts; fq->vector_threshold = 128; // packets vec_validate_aligned (fq->elts, nelts - 1, CLIB_CACHE_LINE_BYTES); if (1) { if (((uword) & fq->tail) & (CLIB_CACHE_LINE_BYTES - 1)) fformat (stderr, "WARNING: fq->tail unaligned\n"); if (((uword) & fq->head) & (CLIB_CACHE_LINE_BYTES - 1)) fformat (stderr, "WARNING: fq->head unaligned\n"); if (((uword) fq->elts) & (CLIB_CACHE_LINE_BYTES - 1)) fformat (stderr, "WARNING: fq->elts unaligned\n"); if (sizeof (fq->elts[0]) % CLIB_CACHE_LINE_BYTES) fformat (stderr, "WARNING: fq->elts[0] size %d\n", sizeof (fq->elts[0])); if (nelts & (nelts - 1)) { fformat (stderr, "FATAL: nelts MUST be a power of 2\n"); abort (); } } return (fq); } void vl_msg_api_handler_no_free (void *) __attribute__ ((weak)); void vl_msg_api_handler_no_free (void *v) { } /* Turned off, save as reference material... */ #if 0 static inline int vlib_frame_queue_dequeue_internal (int thread_id, vlib_main_t * vm, vlib_node_main_t * nm) { vlib_frame_queue_t *fq = vlib_frame_queues[thread_id]; vlib_frame_queue_elt_t *elt; vlib_frame_t *f; vlib_pending_frame_t *p; vlib_node_runtime_t *r; u32 node_runtime_index; int msg_type; u64 before; int processed = 0; ASSERT (vm == vlib_mains[thread_id]); while (1) { if (fq->head == fq->tail) return processed; elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1)); if (!elt->valid)
/*
 * Copyright (c) 2015 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef included_nsh_h
#define included_nsh_h

#include <vnet/vnet.h>
#include <nsh/nsh_packet.h>
#include <vnet/ip/ip4_packet.h>

typedef struct {
  u16 class;
  u8 type;
  u8 pad;
} nsh_option_map_by_key_t;

typedef struct {
  u32 option_id;
} nsh_option_map_t;

#define MAX_METADATA_LEN 62
/** Note:
 * rewrite and rewrite_size used to support varied nsh header
 */
typedef struct {
  /* Required for pool_get_aligned  */
  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);

  nsh_base_header_t nsh_base;
  union {
     nsh_md1_data_t md1_data;
     nsh_md2_data_t md2_data;
   } md;
  u8 tlvs_len;    /* configured md2 metadata's length, unit: byte */
  u8 * tlvs_data; /* configured md2 metadata, network order */

  /** Rewrite string. network order
   * contains base header and metadata */
  u8 * rewrite;
  u8  rewrite_size; /* unit: byte */
} nsh_entry_t;

typedef struct {
  u8 is_add;
  nsh_entry_t nsh_entry;
} nsh_add_del_entry_args_t;

typedef struct {
  /* Required for pool_get_aligned  */
  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);

  /** Key for nsh_header_t entry: 24bit NSP 8bit NSI */
  u32 nsp_nsi;
  /** Key for nsh_header_t entry to map to. : 24bit NSP 8bit NSI
   *  This may be ~0 if next action is to decap to NSH next protocol
   *  Note the following heuristic:
   *  if nsp_nsi == mapped_nsp_nsi then use-case is like SFC SFF
   *  if nsp_nsi != mapped_nsp_nsi then use-case is like SFC SF
   *  Note: these are heuristics. Rules about NSI decrement are out of scope
   */
  u32 mapped_nsp_nsi;
  /* NSH Header action: swap, push and pop */
  u32 nsh_action;

  /** vnet intfc hw_if_index */
  u32 nsh_hw_if;
  /* vnet intfc sw_if_index */
  u32 nsh_sw_if;

  /* encap if index */
  u32 sw_if_index;
  u32 rx_sw_if_index;
  u32 next_node;
  u32 adj_index;
} nsh_map_t;

typedef struct {
  u8 is_add;
  nsh_map_t map;
} nsh_add_del_map_args_t;

typedef struct {
  u32 transport_type; /* 1:vxlan; */
  u32 transport_index; /* transport's sw_if_index */
} nsh_proxy_session_by_key_t;

typedef struct {
  /* 24bit NSP 8bit NSI */
  u32 nsp_nsi;
} nsh_proxy_session_t;

#define MAX_MD2_OPTIONS 256

typedef struct {
  /* API message ID base */
  u16 msg_id_base;

  /* vector of nsh_header entry instances */
  nsh_entry_t *nsh_entries;

  /* hash lookup nsh header by key: {u32: nsp_nsi} */
  uword * nsh_entry_by_key;

  /* vector of nsh_mappings */
  nsh_map_t *nsh_mappings;

  /* hash lookup nsh mapping by key: {u32: nsp_nsi} */
  uword * nsh_mapping_by_key;
  uword * nsh_mapping_by_mapped_key; // for use in NSHSFC

  /* vector of nsh_proxy */
  nsh_proxy_session_t *nsh_proxy_sessions;

  /* hash lookup nsh_proxy by key */
  uword * nsh_proxy_session_by_key;

  /** Free vlib hw_if_indices */
  u32 * free_nsh_tunnel_hw_if_indices;
  /** Mapping from sw_if_index to tunnel index */
  u32 * tunnel_index_by_sw_if_index;

  /* vector of nsh_option_map */
  nsh_option_map_t * nsh_option_mappings;
  /* hash lookup nsh_option_map by key */
  uword * nsh_option_map_by_key;

  /* Array of function pointers to process MD-Type 2 handling routines */
  /*
   * For API or CLI configuration and construct the rewrite buffer, invokes add_options() function.
   * In the encap node, i.e. when performing PUSH nsh header, invokes options() function.
   * In the swap node, i.e. when performing SWAP nsh header, invokes swap_options() function.
   * In the decap node, i.e. when performing POP nsh header, invokes pop_options() function.
   */
  u8 options_size[MAX_MD2_OPTIONS];  /* sum of header and metadata */
  int (*add_options[MAX_MD2_OPTIONS]) (u8 * opt,
					   u8 * opt_size);
  int (*options[MAX_MD2_OPTIONS]) (vlib_buffer_t * b,
                                   nsh_tlv_header_t * opt);
  int (*swap_options[MAX_MD2_OPTIONS]) (vlib_buffer_t * b,
                                        nsh_tlv_header_t * old_opt,
					nsh_tlv_header_t * new_opt);
  int (*pop_options[MAX_MD2_OPTIONS]) (vlib_buffer_t * b,
				       nsh_tlv_header_t * opt);
  u8 *(*trace[MAX_MD2_OPTIONS]) (u8 * s, nsh_tlv_header_t * opt);
  uword decap_v4_next_override;

  /* Feature arc indices */
  u8 input_feature_arc_index;
  u8 output_feature_arc_index;

  /* convenience */
  vlib_main_t * vlib_main;
  vnet_main_t * vnet_main;
} nsh_main_t;

nsh_main_t nsh_main;

extern vlib_node_registration_t nsh_aware_vnf_proxy_node;
extern vlib_node_registration_t nsh_eth_output_node;

typedef struct {
   u8 trace_data[256];
} nsh_input_trace_t;

u8 * format_nsh_input_map_trace (u8 * s, va_list * args);
u8 * format_nsh_header_with_length (u8 * s, va_list * args);

/* Helper macros used in nsh.c and nsh_test.c */
#define foreach_copy_nsh_base_hdr_field         \
_(ver_o_c)					\
_(length)					\
_(md_type)					\
_(next_protocol)				\
_(nsp_nsi)

/* Statistics (not really errors) */
#define foreach_nsh_node_error    \
_(MAPPED, "NSH header found and mapped") \
_(NO_MAPPING, "no mapping for nsh key") \
_(NO_ENTRY, "no entry for nsh key") \
_(NO_PROXY, "no proxy for transport key") \
_(INVALID_NEXT_PROTOCOL, "invalid next protocol") \
_(INVALID_OPTIONS, "invalid md2 options") \
_(INVALID_TTL, "ttl equals zero") \

typedef enum {
#define _(sym,str) NSH_NODE_ERROR_##sym,
  foreach_nsh_node_error
#undef _
  NSH_NODE_N_ERROR,

} nsh_input_error_t;

#define foreach_nsh_node_next        \
  _(DROP, "error-drop")			\
  _(ENCAP_GRE4, "gre4-input" )		\
  _(ENCAP_GRE6, "gre6-input" )		\
  _(ENCAP_VXLANGPE, "vxlan-gpe-encap" ) \
  _(ENCAP_VXLAN4, "vxlan4-encap" )  \
  _(ENCAP_VXLAN6, "vxlan6-encap" )  \
  _(DECAP_ETH_INPUT, "ethernet-input" ) \
  _(ENCAP_LISP_GPE, "interface-output" )  \
  _(ENCAP_ETHERNET, "nsh-eth-output")   \
/*   _(DECAP_IP4_INPUT,  "ip4-input") \ */
/*   _(DECAP_IP6_INPUT,  "ip6-input" ) \  */

typedef enum {
#define _(s,n) NSH_NODE_NEXT_##s,
  foreach_nsh_node_next
#undef _
  NSH_NODE_N_NEXT,
} nsh_node_next_t;

typedef enum {
  NSH_ACTION_SWAP,
  NSH_ACTION_PUSH,
  NSH_ACTION_POP,
} nsh_action_type;

typedef enum {
  NSH_INPUT_TYPE,
  NSH_PROXY_TYPE,
  NSH_CLASSIFIER_TYPE,
  NSH_AWARE_VNF_PROXY_TYPE,
} nsh_entity_type;

#define VNET_SW_INTERFACE_FLAG_ADMIN_DOWN 0

/* md2 class and type definition */
#define NSH_MD2_IOAM_CLASS 0x9
#define NSH_MD2_IOAM_OPTION_TYPE_TRACE   0x3B
#define NSH_MD2_IOAM_OPTION_TYPE_PROOF_OF_TRANSIT 0x3C

#define NSH_MD2_IOAM_TRACE_DUMMY_LEN 0x8

#define MAX_NSH_HEADER_LEN  256
#define MAX_NSH_OPTION_LEN  128

int
nsh_md2_register_option (u16 class,
                      u8 type,
                      u8 option_size,
                      int add_options (u8 * opt,
                                       u8 * opt_size),
                      int options(vlib_buffer_t * b,
                                  nsh_tlv_header_t * opt),
                      int swap_options (vlib_buffer_t * b,
				        nsh_tlv_header_t * old_opt,
		                        nsh_tlv_header_t * new_opt),
                      int pop_options (vlib_buffer_t * b,
                                       nsh_tlv_header_t * opt),
                      u8 * trace (u8 * s,
                                  nsh_tlv_header_t * opt));

typedef struct _nsh_main_dummy
{
  u8 output_feature_arc_index;
} nsh_main_dummy_t;

#endif /* included_nsh_h */
/* Update (all) node runtimes before releasing the barrier, if needed */ if (vm->need_vlib_worker_thread_node_runtime_update) { /* * Lock stat segment here, so we's safe when * rebuilding the stat segment node clones from the * stat thread... */ vlib_stat_segment_lock (); /* Do stats elements on main thread */ worker_thread_node_runtime_update_internal (); vm->need_vlib_worker_thread_node_runtime_update = 0; /* Do per thread rebuilds in parallel */ refork_needed = 1; clib_smp_atomic_add (vlib_worker_threads->node_reforks_required, (vec_len (vlib_mains) - 1)); now = vlib_time_now (vm); t_update_main = now - vm->barrier_epoch; } deadline = now + BARRIER_SYNC_TIMEOUT; *vlib_worker_threads->wait_at_barrier = 0; while (*vlib_worker_threads->workers_at_barrier > 0) { if ((now = vlib_time_now (vm)) > deadline) { fformat (stderr, "%s: worker thread deadlock\n", __FUNCTION__); os_panic (); } } /* Wait for reforks before continuing */ if (refork_needed) { now = vlib_time_now (vm); deadline = now + BARRIER_SYNC_TIMEOUT; while (*vlib_worker_threads->node_reforks_required > 0) { if ((now = vlib_time_now (vm)) > deadline) { fformat (stderr, "%s: worker thread refork deadlock\n", __FUNCTION__); os_panic (); } } vlib_stat_segment_unlock (); } t_closed_total = now - vm->barrier_epoch; minimum_open = t_closed_total * BARRIER_MINIMUM_OPEN_FACTOR; if (minimum_open > BARRIER_MINIMUM_OPEN_LIMIT) { minimum_open = BARRIER_MINIMUM_OPEN_LIMIT; } vm->barrier_no_close_before = now + minimum_open; /* Record barrier epoch (used to enforce minimum open time) */ vm->barrier_epoch = now; barrier_trace_release (t_entry, t_closed_total, t_update_main); } /* * Check the frame queue to see if any frames are available. * If so, pull the packets off the frames and put them to * the handoff node. */ int vlib_frame_queue_dequeue (vlib_main_t * vm, vlib_frame_queue_main_t * fqm) { u32 thread_id = vm->thread_index; vlib_frame_queue_t *fq = fqm->vlib_frame_queues[thread_id]; vlib_frame_queue_elt_t *elt; u32 *from, *to; vlib_frame_t *f; int msg_type; int processed = 0; u32 n_left_to_node; u32 vectors = 0; ASSERT (fq); ASSERT (vm == vlib_mains[thread_id]); if (PREDICT_FALSE (fqm->node_index == ~0)) return 0; /* * Gather trace data for frame queues */ if (PREDICT_FALSE (fq->trace)) { frame_queue_trace_t *fqt; frame_queue_nelt_counter_t *fqh; u32 elix; fqt = &fqm->frame_queue_traces[thread_id]; fqt->nelts = fq->nelts; fqt->head = fq->head; fqt->head_hint = fq->head_hint; fqt->tail = fq->tail; fqt->threshold = fq->vector_threshold; fqt->n_in_use = fqt->tail - fqt->head; if (fqt->n_in_use >= fqt->nelts) { // if beyond max then use max fqt->n_in_use = fqt->nelts - 1; } /* Record the number of elements in use in the histogram */ fqh = &fqm->frame_queue_histogram[thread_id]; fqh->count[fqt->n_in_use]++; /* Record a snapshot of the elements in use */ for (elix = 0; elix < fqt->nelts; elix++) { elt = fq->elts + ((fq->head + 1 + elix) & (fq->nelts - 1)); if (1 || elt->valid) { fqt->n_vectors[elix] = elt->n_vectors; } } fqt->written = 1; } while (1) { if (fq->head == fq->tail) { fq->head_hint = fq->head; return processed; } elt = fq->elts + ((fq->head + 1) & (fq->nelts - 1)); if (!elt->valid) { fq->head_hint = fq->head; return processed; } from = elt->buffer_index; msg_type = elt->msg_type; ASSERT (msg_type == VLIB_FRAME_QUEUE_ELT_DISPATCH_FRAME); ASSERT (elt->n_vectors <= VLIB_FRAME_SIZE); f = vlib_get_frame_to_node (vm, fqm->node_index); to = vlib_frame_vector_args (f); n_left_to_node = elt->n_vectors; while (n_left_to_node >= 4) { to[0] = from[0]; to[1] = from[1]; to[2] = from[2]; to[3] = from[3]; to += 4; from += 4; n_left_to_node -= 4; } while (n_left_to_node > 0) { to[0] = from[0]; to++; from++; n_left_to_node--; } vectors += elt->n_vectors; f->n_vectors = elt->n_vectors; vlib_put_frame_to_node (vm, fqm->node_index, f); elt->valid = 0; elt->n_vectors = 0; elt->msg_type = 0xfefefefe; CLIB_MEMORY_BARRIER (); fq->head++; processed++; /* * Limit the number of packets pushed into the graph */ if (vectors >= fq->vector_threshold) { fq->head_hint = fq->head; return processed; } } ASSERT (0); return processed; } void vlib_worker_thread_fn (void *arg) { vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg; vlib_thread_main_t *tm = vlib_get_thread_main (); vlib_main_t *vm = vlib_get_main (); clib_error_t *e; ASSERT (vm->thread_index == vlib_get_thread_index ()); vlib_worker_thread_init (w); clib_time_init (&vm->clib_time); clib_mem_set_heap (w->thread_mheap); /* Wait until the dpdk init sequence is complete */ while (tm->extern_thread_mgmt && tm->worker_thread_release == 0) vlib_worker_thread_barrier_check (); e = vlib_call_init_exit_functions (vm, vm->worker_init_function_registrations, 1 /* call_once */ ); if (e) clib_error_report (e); vlib_worker_loop (vm); } /* *INDENT-OFF* */ VLIB_REGISTER_THREAD (worker_thread_reg, static) = { .name = "workers", .short_name = "wk", .function = vlib_worker_thread_fn, }; /* *INDENT-ON* */ u32 vlib_frame_queue_main_init (u32 node_index, u32 frame_queue_nelts) { vlib_thread_main_t *tm = vlib_get_thread_main (); vlib_frame_queue_main_t *fqm; vlib_frame_queue_t *fq; int i; if (frame_queue_nelts == 0) frame_queue_nelts = FRAME_QUEUE_NELTS; ASSERT (frame_queue_nelts >= 8); vec_add2 (tm->frame_queue_mains, fqm, 1); fqm->node_index = node_index; fqm->frame_queue_nelts = frame_queue_nelts; fqm->queue_hi_thresh = frame_queue_nelts - 2; vec_validate (fqm->vlib_frame_queues, tm->n_vlib_mains - 1); vec_validate (fqm->per_thread_data, tm->n_vlib_mains - 1); _vec_len (fqm->vlib_frame_queues) = 0; for (i = 0; i < tm->n_vlib_mains; i++) { vlib_frame_queue_per_thread_data_t *ptd; fq = vlib_frame_queue_alloc (frame_queue_nelts); vec_add1 (fqm->vlib_frame_queues, fq); ptd = vec_elt_at_index (fqm->per_thread_data, i); vec_validate (ptd->handoff_queue_elt_by_thread_index, tm->n_vlib_mains - 1); vec_validate_init_empty (ptd->congested_handoff_queue_by_thread_index, tm->n_vlib_mains - 1, (vlib_frame_queue_t *) (~0)); } return (fqm - tm->frame_queue_mains); } int vlib_thread_cb_register (struct vlib_main_t *vm, vlib_thread_callbacks_t * cb) { vlib_thread_main_t *tm = vlib_get_thread_main (); if (tm->extern_thread_mgmt) return -1; tm->cb.vlib_launch_thread_cb = cb->vlib_launch_thread_cb; tm->extern_thread_mgmt = 1; return 0; } void vlib_process_signal_event_mt_helper (vlib_process_signal_event_mt_args_t * args) { ASSERT (vlib_get_thread_index () == 0); vlib_process_signal_event (vlib_get_main (), args->node_index, args->type_opaque, args->data); } void *rpc_call_main_thread_cb_fn; void vlib_rpc_call_main_thread (void *callback, u8 * args, u32 arg_size) { if (rpc_call_main_thread_cb_fn) { void (*fp) (void *, u8 *, u32) = rpc_call_main_thread_cb_fn; (*fp) (callback, args, arg_size); } else clib_warning ("BUG: rpc_call_main_thread_cb_fn NULL!"); } clib_error_t * threads_init (vlib_main_t * vm) { return 0; } VLIB_INIT_FUNCTION (threads_init); /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */