/* * Copyright (c) 2015 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include u32 vl (void *p) { return vec_len (p); } vlib_worker_thread_t *vlib_worker_threads; vlib_thread_main_t vlib_thread_main; /* * Barrier tracing can be enabled on a normal build to collect information * on barrier use, including timings and call stacks. Deliberately not * keyed off CLIB_DEBUG, because that can add significant overhead which * imapacts observed timings. */ static inline void barrier_trace_sync (f64 t_entry, f64 t_open, f64 t_closed) { if (!vlib_worker_threads->barrier_elog_enabled) return; ELOG_TYPE_DECLARE (e) = { .format = "bar-trace-%s-#%d", .format_args = "T4i4", }; struct { u32 caller, count, t_entry, t_open, t_closed; } *ed = 0; ed = ELOG_DATA (&vlib_global_main.elog_main, e); ed->count = (int) vlib_worker_threads[0].barrier_sync_count; ed->caller = elog_string (&vlib_global_main.elog_main, (char *) vlib_worker_threads[0].barrier_caller); ed->t_entry = (int) (1000000.0 * t_entry); ed->t_open = (int) (1000000.0 * t_open); ed->t_closed = (int) (1000000.0 * t_closed); } static inline void barrier_trace_sync_rec (f64 t_entry) { if (!vlib_worker_threads->barrier_elog_enabled) return; ELOG_TYPE_DECLARE (e) = { .format = "bar-syncrec-%s-#%d", .format_args = "T4i4", }; struct { u32 caller, depth; } *ed = 0; ed = ELOG_DATA (&vlib_global_main.elog_main, e); ed->depth = (int) vlib_worker_threads[0].recursion_level - 1; ed->caller = elog_string (&vlib_global_main.elog_main, (char *) vlib_worker_threads[0].barrier_caller); } static inline void barrier_trace_release_rec (f64 t_entry) { if (!vlib_worker_threads->barrier_elog_enabled) return; ELOG_TYPE_DECLARE (e) = { .format = "bar-relrrec-#%d", .format_args = "i4", }; struct { u32 depth; } *ed = 0; ed = ELOG_DATA (&vlib_global_main.elog_main, e); ed->depth = (int) vlib_worker_threads[0].recursion_level; } static inline void barrier_trace_release (f64 t_entry, f64 t_closed_total, f64 t_update_main) { if (!vlib_worker_threads->barrier_elog_enabled) return; ELOG_TYPE_DECLARE (e) = { .format = "bar-rel-#%d-e%d-u%d-t%d", .format_args = "i4i4i4i4", }; struct { u32 count, t_entry, t_update_main, t_closed_total; } *ed = 0; ed = ELOG_DATA (&vlib_global_main.elog_main, e); ed->t_entry = (int) (1000000.0 * t_entry); ed->t_update_main = (int) (1000000.0 * t_update_main); ed->t_closed_total = (int) (1000000.0 * t_closed_total); ed->count = (int) vlib_worker_threads[0].barrier_sync_count; /* Reset context for next trace */ vlib_worker_threads[0].barrier_context = NULL; } uword os_get_nthreads (void) { return vec_len (vlib_thread_stacks); } void vlib_set_thread_name (char *name) { int pthread_setname_np (pthread_t __target_thread, const char *__name); int rv; pthread_t thread = pthread_self (); if (thread) { rv = pthread_setname_np (thread, name); if (rv) clib_warning ("pthread_setname_np returned %d", rv); } } static int sort_registrations_by_no_clone (void *a0, void *a1) { vlib_thread_registration_t **tr0 = a0; vlib_thread_registration_t **tr1 = a1; return ((i32) ((*tr0)->no_data_structure_clone) - ((i32) ((*tr1)->no_data_structure_clone))); } /* Called early in the init sequence */ clib_error_t * vlib_thread_init (vlib_main_t * vm) { vlib_thread_main_t *tm = &vlib_thread_main; vlib_worker_thread_t *w; vlib_thread_registration_t *tr; u32 n_vlib_mains = 1; u32 first_index = 1; u32 i; uword *avail_cpu; u32 stats_num_worker_threads_dir_index; stats_num_worker_threads_dir_index = vlib_stats_add_gauge ("/sys/num_worker_threads"); ASSERT (stats_num_worker_threads_dir_index != ~0); /* get bitmaps of active cpu cores and sockets */ tm->cpu_core_bitmap = clib_sysfs_list_to_bitmap ("/sys/devices/system/cpu/online"); tm->cpu_socket_bitmap = clib_sysfs_list_to_bitmap ("/sys/devices/system/node/online"); avail_cpu = clib_bitmap_dup (tm->cpu_core_bitmap); /* skip cores */ for (i = 0; i < tm->skip_cores; i++) { uword c = clib_bitmap_first_set (avail_cpu); if (c == ~0) return clib_error_return (0, "no available cpus to skip"); avail_cpu = clib_bitmap_set (avail_cpu, c, 0); } /* grab cpu for main thread */ if (tm->main_lcore != ~0) { if (clib_bitmap_get (avail_cpu, tm->main_lcore) == 0) return clib_error_return (0, "cpu %u is not available to be used" " for the main thread", tm->main_lcore); avail_cpu = clib_bitmap_set (avail_cpu, tm->main_lcore, 0); } /* assume that there is socket 0 only if there is no data from sysfs */ if (!tm->cpu_socket_bitmap) tm->cpu_socket_bitmap = clib_bitmap_set (0, 0, 1); /* pin main thread to main_lcore */ if (tm->main_lcore != ~0) { cpu_set_t cpuset; CPU_ZERO (&cpuset); CPU_SET (tm->main_lcore, &cpuset); pthread_setaffinity_np (pthread_self (), sizeof (cpu_set_t), &cpuset); } /* Set up thread 0 */ vec_validate_aligned (vlib_worker_threads, 0, CLIB_CACHE_LINE_BYTES); vec_set_len (vlib_worker_threads, 1); w = vlib_worker_threads; w->thread_mheap = clib_mem_get_heap (); w->thread_stack = vlib_thread_stacks[0]; w->cpu_id = tm->main_lcore; w->lwp = syscall (SYS_gettid); w->thread_id = pthread_self (); tm->n_vlib_mains = 1; vlib_get_thread_core_numa (w, w->cpu_id); if (tm->sched_policy != ~0) { struct sched_param sched_param; if (!sched_getparam (w->lwp, &sched_param)) { if (tm->sched_priority != ~0) sched_param.sched_priority = tm->sched_priority; sched_setscheduler (w->lwp, tm->sched_policy, &sched_param); } } /* assign threads to cores and set n_vlib_mains */ tr = tm->next; while (tr) { vec_add1 (tm->registrations, tr); tr = tr->next; } vec_sort_with_function (tm->registrations, sort_registrations_by_no_clone); for (i = 0; i < vec_len (tm->registrations); i++) { int j; tr = tm->registrations[i]; tr->first_index = first_index; first_index += tr->count; n_vlib_mains += (tr->no_data_structure_clone == 0) ? tr->count : 0; /* construct coremask */ if (tr->use_pthreads || !tr->count) continue; if (tr->coremask) { uword c; /* *INDENT-OFF* */ clib_bitmap_foreach (c, tr->coremask) { if (clib_bitmap_get(avail_cpu, c) == 0) return clib_error_return (0, "cpu %u is not available to be used" " for the '%s' thread",c, tr->name); avail_cpu = clib_bitmap_set(avail_cpu, c, 0); } /* *INDENT-ON* */ } else { for (j = 0; j < tr->count; j++) { /* Do not use CPU 0 by default - leave it to the host and IRQs */ uword avail_c0 = clib_bitmap_get (avail_cpu, 0); avail_cpu = clib_bitmap_set (avail_cpu, 0, 0); uword c = clib_bitmap_first_set (avail_cpu); /* Use CPU 0 as a last resort */ if (c == ~0 && avail_c0) { c = 0; avail_c0 = 0; } if (c == ~0) return clib_error_return (0, "no available cpus to be used for" " the '%s' thread", tr->name); avail_cpu = clib_bitmap_set (avail_cpu, 0, avail_c0); avail_cpu = clib_bitmap_set (avail_cpu, c, 0); tr->coremask = clib_bitmap_set (tr->coremask, c, 1); } } } clib_bitmap_free (avail_cpu); tm->n_vlib_mains = n_vlib_mains; vlib_stats_set_gauge (stats_num_worker_threads_dir_index, n_vlib_mains - 1); /* * Allocate the remaining worker threads, and thread stack vector slots * from now on, calls to os_get_nthreads() will return the correct * answer. */ vec_validate_aligned (vlib_worker_threads, first_index - 1, CLIB_CACHE_LINE_BYTES); vec_validate (vlib_thread_stacks, vec_len (vlib_worker_threads) - 1); return 0; } vlib_frame_queue_t * vlib_frame_queue_alloc (int nelts) { vlib_frame_queue_t *fq; fq = clib_mem_alloc_aligned (sizeof (*fq), CLIB_CACHE_LINE_BYTES); clib_memset (fq, 0, sizeof (*fq)); fq->nelts = nelts; fq->vector_threshold = 2 * VLIB_FRAME_SIZE; vec_validate_aligned (fq->elts, nelts - 1, CLIB_CACHE_LINE_BYTES); if (nelts & (nelts - 1)) { fformat (stderr, "FATAL: nelts MUST be a power of 2\n"); abort (); } return (fq); } void vl_msg_api_handler_no_free (void *) __attribute__ ((weak)); void vl_msg_api_handler_no_free (void *v) { } /* To be called by vlib worker threads upon startup */ void vlib_worker_thread_init (vlib_worker_thread_t * w) { vlib_thread_main_t *tm = vlib_get_thread_main (); /* * Note: disabling signals in worker threads as follows * prevents the api post-mortem dump scheme from working * { * sigset_t s; * sigfillset (&s); * pthread_sigmask (SIG_SETMASK, &s, 0); * } */ clib_mem_set_heap (w->thread_mheap); if (vec_len (tm->thread_prefix) && w->registration->short_name) { w->name = format (0, "%v_%s_%d%c", tm->thread_prefix, w->registration->short_name, w->instance_id, '\0'); vlib_set_thread_name ((char *) w->name); } if (!w->registration->use_pthreads) { /* Initial barrier sync, for both worker and i/o threads */ clib_atomic_fetch_add (vlib_worker_threads->workers_at_barrier, 1); while (*vlib_worker_threads->wait_at_barrier) ; clib_atomic_fetch_add (vlib_worker_threads->workers_at_barrier, -1); } } void * vlib_worker_thread_bootstrap_fn (void *arg) { vlib_worker_thread_t *w = arg; w->lwp = syscall (SYS_gettid); w->thread_id = pthread_self (); __os_thread_index = w - vlib_worker_threads; if (CLIB_DEBUG > 0) { void *frame_addr = __builtin_frame_address (0); if (frame_addr < (void *) w->thread_stack || frame_addr > (void *) w->thread_stack + VLIB_THREAD_STACK_SIZE) { /* heap is not set yet */ fprintf (stderr, "thread stack is not set properly\n"); exit (1); } } w->thread_function (arg); return 0; } void vlib_get_thread_core_numa (vlib_worker_thread_t * w, unsigned cpu_id) { const char *sys_cpu_path = "/sys/devices/system/cpu/cpu"; const char *sys_node_path = "/sys/devices/system/node/node"; clib_bitmap_t *nbmp = 0, *cbmp = 0; u32 node; u8 *p = 0; int core_id = -1, numa_id = -1; p = format (p, "%s%u/topology/core_id%c", sys_cpu_path, cpu_id, 0); clib_sysfs_read ((char *) p, "%d", &core_id); vec_reset_length (p); /* *INDENT-OFF* */ clib_sysfs_read ("/sys/devices/system/node/online", "%U", unformat_bitmap_list, &nbmp); clib_bitmap_foreach (node, nbmp) { p = format (p, "%s%u/cpulist%c", sys_node_path, node, 0); clib_sysfs_read ((char *) p, "%U", unformat_bitmap_list, &cbmp); if (clib_bitmap_get (cbmp, cpu_id)) numa_id = node; vec_reset_length (cbmp); vec_reset_length (p); } /* *INDENT-ON* */ vec_free (nbmp); vec_free (cbmp); vec_free (p); w->core_id = core_id; w->numa_id = numa_id; } static clib_error_t * vlib_launch_thread_int (void *fp, vlib_worker_thread_t * w, unsigned cpu_id) { clib_mem_main_t *mm = &clib_mem_main; vlib_thread_main_t *tm = &vlib_thread_main; pthread_t worker; pthread_attr_t attr; cpu_set_t cpuset; void *(*fp_arg) (void *) = fp; void *numa_heap; w->cpu_id = cpu_id; vlib_get_thread_core_numa (w, cpu_id); /* Set up NUMA-bound heap if indicated */ if (mm->per_numa_mheaps[w->numa_id] == 0) { /* If the user requested a NUMA heap, create it... */ if (tm->numa_heap_size) { clib_mem_set_numa_affinity (w->numa_id, 1 /* force */ ); numa_heap = clib_mem_create_heap (0 /* DIY */ , tm->numa_heap_size, 1 /* is_locked */ , "numa %u heap", w->numa_id); clib_mem_set_default_numa_affinity (); mm->per_numa_mheaps[w->numa_id] = numa_heap; } else { /* Or, use the main heap */ mm->per_numa_mheaps[w->numa_id] = w->thread_mheap; } } CPU_ZERO (&cpuset); CPU_SET (cpu_id, &cpuset); if (pthread_attr_init (&attr)) return clib_error_return_unix (0, "pthread_attr_init"); if (pthread_attr_setstack (&attr, w->thread_stack, VLIB_THREAD_STACK_SIZE)) return clib_error_return_unix (0, "pthread_attr_setstack"); if (pthread_create (&worker, &attr, fp_arg, (void *) w)) return clib_error_return_unix (0, "pthread_create"); if (pthread_setaffinity_np (worker, sizeof (cpu_set_t), &cpuset)) return clib_error_return_unix (0, "pthread_setaffinity_np"); if (pthread_attr_destroy (&attr)) return clib_error_return_unix (0, "pthread_attr_destroy"); return 0; } static clib_error_t * start_workers (vlib_main_t * vm) { vlib_global_main_t *vgm = vlib_get_global_main (); vlib_main_t *fvm = vlib_get_first_main (); int i, j; vlib_worker_thread_t *w; vlib_main_t *vm_clone; void *oldheap; vlib_thread_main_t *tm = &vlib_thread_main; vlib_thread_registration_t *tr; vlib_node_runtime_t *rt; u32 n_vlib_mains = tm->n_vlib_mains; u32 worker_thread_index; u32 stats_err_entry_index = fvm->error_main.stats_err_entry_index; clib_mem_heap_t *main_heap = clib_mem_get_per_cpu_heap (); vlib_stats_register_mem_heap (main_heap); vec_reset_length (vlib_worker_threads); /* Set up the main thread */ vec_add2_aligned (vlib_worker_threads, w, 1, CLIB_CACHE_LINE_BYTES); w->elog_track.name = "main thread"; elog_track_register (vlib_get_elog_main (), &w->elog_track); if (vec_len (tm->thread_prefix)) { w->name = format (0, "%v_main%c", tm->thread_prefix, '\0'); vlib_set_thread_name ((char *) w->name); } vgm->elog_main.lock = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, CLIB_CACHE_LINE_BYTES); vgm->elog_main.lock[0] = 0; clib_callback_data_init (&vm->vlib_node_runtime_perf_callbacks, &vm->worker_thread_main_loop_callback_lock); vec_validate_aligned (vgm->vlib_mains, n_vlib_mains - 1, CLIB_CACHE_LINE_BYTES); vec_set_len (vgm->vlib_mains, 0); vec_add1_aligned (vgm->vlib_mains, vm, CLIB_CACHE_LINE_BYTES); if (n_vlib_mains > 1) {
/*
 * Copyright (c) 2011-2016 Cisco and/or its affiliates.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 * @file
 * @brief LLDP packet generation implementation
 */
#include <lldp/lldp_node.h>

static void
lldp_build_mgmt_addr_tlv (u8 ** t0p, u8 subtype, u8 addr_len, u8 * addr,
			  u32 if_index, u8 oid_len, u8 * oid)
{
  lldp_tlv_t *t = (lldp_tlv_t *) * t0p;

  lldp_tlv_set_code (t, LLDP_TLV_NAME (mgmt_addr));
  t->v[0] = addr_len + 1;	/* address string length */
  t->v[1] = subtype;		/* address subtype */
  clib_memcpy_fast (&(t->v[2]), addr, addr_len);	/* address */
  t->v[addr_len + 2] = 2;	/* interface numbering subtype: ifIndex */
  t->v[addr_len + 3] = (if_index >> 24) & 0xFF;	/* interface number */
  t->v[addr_len + 4] = (if_index >> 16) & 0xFF;
  t->v[addr_len + 5] = (if_index >> 8) & 0xFF;
  t->v[addr_len + 6] = (if_index >> 0) & 0xFF;
  t->v[addr_len + 7] = oid_len;	/* OID string length */

  if (oid_len > 0)
    clib_memcpy_fast ((u8 *) & (t->v[addr_len + 8]), oid, oid_len);

  lldp_tlv_set_length (t, addr_len + oid_len + 8);
  *t0p += STRUCT_SIZE_OF (lldp_tlv_t, head) + addr_len + oid_len + 8;
}

static void
lldp_add_chassis_id (const vnet_hw_interface_t * hw, u8 ** t0p)
{
  lldp_chassis_id_tlv_t *t = (lldp_chassis_id_tlv_t *) * t0p;

  lldp_tlv_set_code ((lldp_tlv_t *) t, LLDP_TLV_NAME (chassis_id));
  t->subtype = LLDP_CHASS_ID_SUBTYPE_NAME (mac_addr);

  const size_t addr_len = 6;
  clib_memcpy_fast (&t->id, hw->hw_address, addr_len);
  const size_t len =
    STRUCT_SIZE_OF (lldp_chassis_id_tlv_t, subtype) + addr_len;
  lldp_tlv_set_length ((lldp_tlv_t *) t, len);
  *t0p += STRUCT_SIZE_OF (lldp_tlv_t, head) + len;
}

static void
lldp_add_port_id (const vnet_hw_interface_t * hw, u8 ** t0p)
{
  lldp_port_id_tlv_t *t = (lldp_port_id_tlv_t *) * t0p;

  lldp_tlv_set_code ((lldp_tlv_t *) t, LLDP_TLV_NAME (port_id));
  t->subtype = LLDP_PORT_ID_SUBTYPE_NAME (intf_name);

  const size_t name_len = vec_len (hw->name);
  clib_memcpy_fast (&t->id, hw->name, name_len);
  const size_t len = STRUCT_SIZE_OF (lldp_port_id_tlv_t, subtype) + name_len;
  lldp_tlv_set_length ((lldp_tlv_t *) t, len);
  *t0p += STRUCT_SIZE_OF (lldp_tlv_t, head) + len;
}

static void
lldp_add_ttl (const lldp_main_t * lm, u8 ** t0p, int shutdown)
{
  lldp_ttl_tlv_t *t = (lldp_ttl_tlv_t *) * t0p;
  lldp_tlv_set_code ((lldp_tlv_t *) t, LLDP_TLV_NAME (ttl));
  if (shutdown)
    {
      t->ttl = 0;
    }
  else
    {
      if ((size_t) lm->msg_tx_interval * lm->msg_tx_hold + 1 > (1 << 16) - 1)
	{
	  t->ttl = htons ((1 << 16) - 1);
	}
      else
	{
	  t->ttl = htons (lm->msg_tx_hold * lm->msg_tx_interval + 1);
	}
    }
  const size_t len = STRUCT_SIZE_OF (lldp_ttl_tlv_t, ttl);
  lldp_tlv_set_length ((lldp_tlv_t *) t, len);
  *t0p += STRUCT_SIZE_OF (lldp_tlv_t, head) + len;
}

static void
lldp_add_port_desc (const lldp_main_t * lm, lldp_intf_t * n, u8 ** t0p)
{
  const size_t len = vec_len (n->port_desc);
  if (len)
    {
      lldp_tlv_t *t = (lldp_tlv_t *) * t0p;
      lldp_tlv_set_code (t, LLDP_TLV_NAME (port_desc));
      lldp_tlv_set_length (t, len);
      clib_memcpy_fast (t->v, n->port_desc, len);
      *t0p += STRUCT_SIZE_OF (lldp_tlv_t, head) + len;
    }
}

static void
lldp_add_sys_name (const lldp_main_t * lm, u8 ** t0p)
{
  const size_t len = vec_len (lm->sys_name);
  if (len)
    {
      lldp_tlv_t *t = (lldp_tlv_t *) * t0p;
      lldp_tlv_set_code (t, LLDP_TLV_NAME (sys_name));
      lldp_tlv_set_length (t, len);
      clib_memcpy_fast (t->v, lm->sys_name, len);
      *t0p += STRUCT_SIZE_OF (lldp_tlv_t, head) + len;
    }
}

static void
lldp_add_mgmt_addr (const lldp_intf_t * n, const vnet_hw_interface_t * hw,
		    u8 ** t0p)
{
  const size_t len_ip4 = vec_len (n->mgmt_ip4);
  const size_t len_ip6 = vec_len (n->mgmt_ip6);

  if (!(len_ip4 | len_ip6))
    {
      /*
         If no management address is configured, the interface port's MAC
         address is sent in one TLV.
       */

      lldp_build_mgmt_addr_tlv (t0p, 1,	/* address subtype: Ipv4 */
				6,	/* address string length */
				hw->hw_address,	/* address */
				hw->hw_if_index,	/* if index */
				vec_len (n->mgmt_oid),