summaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2017-11-24 14:34:18 +0100
committerDave Barach <openvpp@barachs.net>2017-11-24 15:02:42 +0000
commit952181e0bcbc838c646bd4c710033dd3bc20cec6 (patch)
tree4f21702cf89ff559e5bf12b0d9f0132caaa0ff41 /src/plugins
parentfd920609819a5b10d3d7c8d34fe4fa4214c7da22 (diff)
dpdk: enable tx checksum offloads as default, add disable knob
New startup.conf knob: dpdk { ... no-tx-checksum-offload ... } Change-Id: I337fd57616dd77687300861b411b420a3cb75149 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/dpdk/device/dpdk.h1
-rwxr-xr-xsrc/plugins/dpdk/device/init.c12
2 files changed, 11 insertions, 2 deletions
diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h
index efcc5a766bf..c3c98f882e7 100644
--- a/src/plugins/dpdk/device/dpdk.h
+++ b/src/plugins/dpdk/device/dpdk.h
@@ -321,6 +321,7 @@ typedef struct
u8 *uio_driver_name;
u8 no_multi_seg;
u8 enable_tcp_udp_checksum;
+ u8 no_tx_checksum_offload;
/* Required config parameters */
u8 coremask_set_manually;
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
index 8d08da1308f..60f8ce67583 100755
--- a/src/plugins/dpdk/device/init.c
+++ b/src/plugins/dpdk/device/init.c
@@ -331,6 +331,10 @@ dpdk_lib_init (dpdk_main_t * dm)
clib_memcpy (&xd->tx_conf, &dev_info.default_txconf,
sizeof (struct rte_eth_txconf));
+
+ if (dm->conf->no_tx_checksum_offload == 0)
+ xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOXSUMS;
+
if (dm->conf->no_multi_seg)
{
xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
@@ -636,8 +640,9 @@ dpdk_lib_init (dpdk_main_t * dm)
hi = vnet_get_hw_interface (dm->vnet_main, xd->hw_if_index);
- if (xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD)
- hi->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD;
+ if (dm->conf->no_tx_checksum_offload == 0)
+ if (xd->flags & DPDK_DEVICE_FLAG_TX_OFFLOAD)
+ hi->flags |= VNET_HW_INTERFACE_FLAG_SUPPORTS_TX_L4_CKSUM_OFFLOAD;
dpdk_device_setup (xd);
@@ -915,6 +920,9 @@ dpdk_config (vlib_main_t * vm, unformat_input_t * input)
else if (unformat (input, "enable-tcp-udp-checksum"))
conf->enable_tcp_udp_checksum = 1;
+ else if (unformat (input, "no-tx-checksum-offload"))
+ conf->no_tx_checksum_offload = 1;
+
else if (unformat (input, "decimal-interface-names"))
conf->interface_name_format_decimal = 1;
'#n246'>246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
/*
 * perfmon_periodic.c - skeleton plug-in periodic function
 *
 * Copyright (c) <current-year> <your-organization>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at:
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <vlib/vlib.h>
#include <vppinfra/error.h>
#include <perfmon/perfmon.h>
#include <asm/unistd.h>
#include <sys/ioctl.h>

static long
perf_event_open (struct perf_event_attr *hw_event, pid_t pid, int cpu,
		 int group_fd, unsigned long flags)
{
  int ret;

  ret = syscall (__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
  return ret;
}

static u64
read_current_perf_counter (vlib_main_t * vm)
{
  if (vm->perf_counter_id)
    return clib_rdpmc (vm->perf_counter_id);
  else
    {
      u64 sw_value;
      if (read (vm->perf_counter_fd, &sw_value, sizeof (sw_value)) !=
	  sizeof (sw_value))
	{
	  clib_unix_warning ("counter read failed, disable collection...");
	  vm->vlib_node_runtime_perf_counter_cb = 0;
	  return 0ULL;
	}
      return sw_value;
    }
}

static void
clear_counters (perfmon_main_t * pm)
{
  int i, j;
  vlib_main_t *vm = pm->vlib_main;
  vlib_main_t *stat_vm;
  vlib_node_main_t *nm;
  vlib_node_t *n;

  vlib_worker_thread_barrier_sync (vm);

  for (j = 0; j < vec_len (vlib_mains); j++)
    {
      stat_vm = vlib_mains[j];
      if (stat_vm == 0)
	continue;

      nm = &stat_vm->node_main;

      /* Clear the node runtime perfmon counters */
      for (i = 0; i < vec_len (nm->nodes); i++)
	{
	  n = nm->nodes[i];
	  vlib_node_sync_stats (stat_vm, n);
	}

      /* And clear the node perfmon counters */
      for (i = 0; i < vec_len (nm->nodes); i++)
	{
	  n = nm->nodes[i];
	  n->stats_total.perf_counter_ticks = 0;
	  n->stats_total.perf_counter_vectors = 0;
	  n->stats_last_clear.perf_counter_ticks = 0;
	  n->stats_last_clear.perf_counter_vectors = 0;
	}
    }
  vlib_worker_thread_barrier_release (vm);
}

static void
enable_current_event (perfmon_main_t * pm)
{
  struct perf_event_attr pe;
  int fd;
  struct perf_event_mmap_page *p = 0;
  perfmon_event_config_t *c;
  vlib_main_t *vm = vlib_get_main ();
  u32 my_thread_index = vm->thread_index;

  c = vec_elt_at_index (pm->events_to_collect, pm->current_event);

  memset (&pe, 0, sizeof (struct perf_event_attr));
  pe.type = c->pe_type;
  pe.size = sizeof (struct perf_event_attr);
  pe.config = c->pe_config;
  pe.disabled = 1;
  pe.pinned = 1;
  /*
   * Note: excluding the kernel makes the
   * (software) context-switch counter read 0...
   */
  if (pe.type != PERF_TYPE_SOFTWARE)
    {
      /* Exclude kernel and hypervisor */
      pe.exclude_kernel = 1;
      pe.exclude_hv = 1;
    }

  fd = perf_event_open (&pe, 0, -1, -1, 0);
  if (fd == -1)
    {
      clib_unix_warning ("event open: type %d config %d", c->pe_type,
			 c->pe_config);
      return;
    }

  if (pe.type != PERF_TYPE_SOFTWARE)
    {
      p = mmap (0, pm->page_size, PROT_READ, MAP_SHARED, fd, 0);
      if (p == MAP_FAILED)
	{
	  clib_unix_warning ("mmap");
	  close (fd);
	  return;
	}
    }

  if (ioctl (fd, PERF_EVENT_IOC_RESET, 0) < 0)
    clib_unix_warning ("reset ioctl");

  if (ioctl (fd, PERF_EVENT_IOC_ENABLE, 0) < 0)
    clib_unix_warning ("enable ioctl");

  /*
   * Software event counters - and others not capable of being
   * read via the "rdpmc" instruction - will be read
   * by system calls.
   */
  if (pe.type == PERF_TYPE_SOFTWARE || p->cap_user_rdpmc == 0)
    pm->rdpmc_indices[my_thread_index] = 0;
  else				/* use rdpmc instrs */
    pm->rdpmc_indices[my_thread_index] = p->index - 1;
  pm->perf_event_pages[my_thread_index] = (void *) p;

  pm->pm_fds[my_thread_index] = fd;

  /* Enable the main loop counter snapshot mechanism */
  vm->perf_counter_id = pm->rdpmc_indices[my_thread_index];
  vm->perf_counter_fd = fd;
  vm->vlib_node_runtime_perf_counter_cb = read_current_perf_counter;
}

static void
disable_event (perfmon_main_t * pm)
{
  vlib_main_t *vm = vlib_get_main ();
  u32 my_thread_index = vm->thread_index;

  if (pm->pm_fds[my_thread_index] == 0)
    return;

  /* Stop main loop collection */
  vm->vlib_node_runtime_perf_counter_cb = 0;

  if (ioctl (pm->pm_fds[my_thread_index], PERF_EVENT_IOC_DISABLE, 0) < 0)
    clib_unix_warning ("disable ioctl");

  if (pm->perf_event_pages[my_thread_index])
    if (munmap (pm->perf_event_pages[my_thread_index], pm->page_size) < 0)
      clib_unix_warning ("munmap");

  (void) close (pm->pm_fds[my_thread_index]);
  pm->pm_fds[my_thread_index] = 0;
}

static void
worker_thread_start_event (vlib_main_t * vm)
{
  perfmon_main_t *pm = &perfmon_main;

  enable_current_event (pm);
  vm->worker_thread_main_loop_callback = 0;
}

static void
worker_thread_stop_event (vlib_main_t * vm)
{
  perfmon_main_t *pm = &perfmon_main;
  disable_event (pm);
  vm->worker_thread_main_loop_callback = 0;
}

static void
start_event (perfmon_main_t * pm, f64 now, uword event_data)
{
  int i;
  pm->current_event = 0;
  if (vec_len (pm->events_to_collect) == 0)
    {
      pm->state = PERFMON_STATE_OFF;
      return;
    }
  pm->state = PERFMON_STATE_RUNNING;
  clear_counters (pm);

  /* Start collection on this thread */
  enable_current_event (pm);

  /* And also on worker threads */
  for (i = 1; i < vec_len (vlib_mains); i++)
    {
      if (vlib_mains[i] == 0)
	continue;
      vlib_mains[i]->worker_thread_main_loop_callback = (void *)
	worker_thread_start_event;
    }
}

void
scrape_and_clear_counters (perfmon_main_t * pm)
{
  int i, j;
  vlib_main_t *vm = pm->vlib_main;
  vlib_main_t *stat_vm;
  vlib_node_main_t *nm;
  vlib_node_t ***node_dups = 0;
  vlib_node_t **nodes;
  vlib_node_t *n;
  perfmon_capture_t *c;
  perfmon_event_config_t *current_event;
  uword *p;
  u8 *counter_name;
  u64 counter_value;
  u64 vectors_this_counter;

  /* snapshoot the nodes, including pm counters */
  vlib_worker_thread_barrier_sync (vm);

  for (j = 0; j < vec_len (vlib_mains); j++)
    {
      stat_vm = vlib_mains[j];
      if (stat_vm == 0)
	continue;

      nm = &stat_vm->node_main;

      for (i = 0; i < vec_len (nm->nodes); i++)
	{
	  n = nm->nodes[i];
	  vlib_node_sync_stats (stat_vm, n);
	}

      nodes = 0;
      vec_validate (nodes, vec_len (nm->nodes) - 1);
      vec_add1 (node_dups, nodes);

      /* Snapshoot and clear the per-node perfmon counters */
      for (i = 0; i < vec_len (nm->nodes); i++)
	{
	  n = nm->nodes[i];
	  nodes[i] = clib_mem_alloc (sizeof (*n));
	  clib_memcpy (nodes[i], n, sizeof (*n));
	  n->stats_total.perf_counter_ticks = 0;
	  n->stats_total.perf_counter_vectors = 0;
	  n->stats_last_clear.perf_counter_ticks = 0;
	  n->stats_last_clear.perf_counter_vectors = 0;
	}
    }

  vlib_worker_thread_barrier_release (vm);

  current_event = pm->events_to_collect + pm->current_event;

  for (j = 0; j < vec_len (vlib_mains); j++)
    {
      stat_vm = vlib_mains[j];
      if (stat_vm == 0)
	continue;

      nodes = node_dups[j];

      for (i = 0; i < vec_len (nodes); i++)
	{
	  u8 *capture_name;

	  n = nodes[i];
	  if (n->stats_total.perf_counter_ticks == 0)
	    {
	      clib_mem_free (n);
	      continue;
	    }

	  capture_name = format (0, "t%d-%v%c", j, n->name, 0);

	  p = hash_get_mem (pm->capture_by_thread_and_node_name,
			    capture_name);

	  if (p == 0)
	    {
	      pool_get (pm->capture_pool, c);
	      memset (c, 0, sizeof (*c));
	      c->thread_and_node_name = capture_name;
	      hash_set_mem (pm->capture_by_thread_and_node_name,
			    capture_name, c - pm->capture_pool);
	    }
	  else
	    c = pool_elt_at_index (pm->capture_pool, p[0]);

	  /* Snapshoot counters, etc. into the capture */
	  counter_name = (u8 *) current_event->name;
	  counter_value = n->stats_total.perf_counter_ticks -
	    n->stats_last_clear.perf_counter_ticks;
	  vectors_this_counter = n->stats_total.perf_counter_vectors -
	    n->stats_last_clear.perf_counter_vectors;

	  vec_add1 (c->counter_names, counter_name);
	  vec_add1 (c->counter_values, counter_value);
	  vec_add1 (c->vectors_this_counter, vectors_this_counter);
	  clib_mem_free (n);
	}
      vec_free (nodes);
    }
  vec_free (node_dups);
}

static void
handle_timeout (perfmon_main_t * pm, f64 now)
{
  int i;
  disable_event (pm);

  /* And also on worker threads */
  for (i = 1; i < vec_len (vlib_mains); i++)
    {
      if (vlib_mains[i] == 0)
	continue;
      vlib_mains[i]->worker_thread_main_loop_callback = (void *)
	worker_thread_stop_event;
    }

  /* Short delay to make sure workers have stopped collection */
  if (i > 1)
    vlib_process_suspend (pm->vlib_main, 1e-3);
  scrape_and_clear_counters (pm);
  pm->current_event++;
  if (pm->current_event >= vec_len (pm->events_to_collect))
    {
      pm->current_event = 0;
      pm->state = PERFMON_STATE_OFF;
      return;
    }
  enable_current_event (pm);

  /* And also on worker threads */
  for (i = 1; i < vec_len (vlib_mains); i++)
    {
      if (vlib_mains[i] == 0)
	continue;
      vlib_mains[i]->worker_thread_main_loop_callback = (void *)
	worker_thread_start_event;
    }
}

static uword
perfmon_periodic_process (vlib_main_t * vm,
			  vlib_node_runtime_t * rt, vlib_frame_t * f)
{
  perfmon_main_t *pm = &perfmon_main;
  f64 now;
  uword *event_data = 0;
  uword event_type;
  int i;

  while (1)
    {
      if (pm->state == PERFMON_STATE_RUNNING)
	vlib_process_wait_for_event_or_clock (vm, pm->timeout_interval);
      else
	vlib_process_wait_for_event (vm);

      now = vlib_time_now (vm);

      event_type = vlib_process_get_events (vm, (uword **) & event_data);

      switch (event_type)
	{
	case PERFMON_START:
	  for (i = 0; i < vec_len (event_data); i++)
	    start_event (pm, now, event_data[i]);
	  break;

	  /* Handle timeout */
	case ~0:
	  handle_timeout (pm, now);
	  break;

	default:
	  clib_warning ("Unexpected event %d", event_type);
	  break;
	}
      vec_reset_length (event_data);
    }
  return 0;			/* or not */
}

/* *INDENT-OFF* */
VLIB_REGISTER_NODE (perfmon_periodic_node) =
{
  .function = perfmon_periodic_process,
  .type = VLIB_NODE_TYPE_PROCESS,
  .name = "perfmon-periodic-process",
};
/* *INDENT-ON* */

/*
 * fd.io coding-style-patch-verification: ON
 *
 * Local Variables:
 * eval: (c-set-style "gnu")
 * End:
 */