summaryrefslogtreecommitdiffstats
path: root/vnet/vnet/devices/dpdk
diff options
context:
space:
mode:
Diffstat (limited to 'vnet/vnet/devices/dpdk')
-rw-r--r--vnet/vnet/devices/dpdk/cli.c1296
-rw-r--r--vnet/vnet/devices/dpdk/device.c840
-rw-r--r--vnet/vnet/devices/dpdk/dpdk.h534
-rw-r--r--vnet/vnet/devices/dpdk/dpdk_priv.h132
-rw-r--r--vnet/vnet/devices/dpdk/format.c763
-rw-r--r--vnet/vnet/devices/dpdk/hqos.c775
-rwxr-xr-xvnet/vnet/devices/dpdk/init.c1803
-rw-r--r--vnet/vnet/devices/dpdk/ipsec/cli.c141
-rw-r--r--vnet/vnet/devices/dpdk/ipsec/crypto_node.c210
-rw-r--r--vnet/vnet/devices/dpdk/ipsec/dir.dox18
-rw-r--r--vnet/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md73
-rw-r--r--vnet/vnet/devices/dpdk/ipsec/esp.h295
-rw-r--r--vnet/vnet/devices/dpdk/ipsec/esp_decrypt.c583
-rw-r--r--vnet/vnet/devices/dpdk/ipsec/esp_encrypt.c598
-rw-r--r--vnet/vnet/devices/dpdk/ipsec/ipsec.c313
-rw-r--r--vnet/vnet/devices/dpdk/ipsec/ipsec.h227
-rw-r--r--vnet/vnet/devices/dpdk/node.c687
-rw-r--r--vnet/vnet/devices/dpdk/qos_doc.md404
18 files changed, 0 insertions, 9692 deletions
diff --git a/vnet/vnet/devices/dpdk/cli.c b/vnet/vnet/devices/dpdk/cli.c
deleted file mode 100644
index 538a00fd975..00000000000
--- a/vnet/vnet/devices/dpdk/cli.c
+++ /dev/null
@@ -1,1296 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vnet/vnet.h>
-#include <vppinfra/vec.h>
-#include <vppinfra/error.h>
-#include <vppinfra/format.h>
-#include <vppinfra/xxhash.h>
-
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/dpdk/dpdk.h>
-#include <vnet/classify/vnet_classify.h>
-#include <vnet/mpls/packet.h>
-
-#include "dpdk_priv.h"
-
-static clib_error_t *
-pcap_trace_command_fn (vlib_main_t * vm,
- unformat_input_t * input, vlib_cli_command_t * cmd)
-{
- dpdk_main_t *dm = &dpdk_main;
- u8 *filename;
- u32 max;
- int matched = 0;
- clib_error_t *error = 0;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "on"))
- {
- if (dm->tx_pcap_enable == 0)
- {
- if (dm->pcap_filename == 0)
- dm->pcap_filename = format (0, "/tmp/vpe.pcap%c", 0);
-
- memset (&dm->pcap_main, 0, sizeof (dm->pcap_main));
- dm->pcap_main.file_name = (char *) dm->pcap_filename;
- dm->pcap_main.n_packets_to_capture = 100;
- if (dm->pcap_pkts_to_capture)
- dm->pcap_main.n_packets_to_capture = dm->pcap_pkts_to_capture;
-
- dm->pcap_main.packet_type = PCAP_PACKET_TYPE_ethernet;
- dm->tx_pcap_enable = 1;
- matched = 1;
- vlib_cli_output (vm, "pcap tx capture on...");
- }
- else
- {
- vlib_cli_output (vm, "pcap tx capture already on...");
- }
- matched = 1;
- }
- else if (unformat (input, "off"))
- {
- if (dm->tx_pcap_enable)
- {
- vlib_cli_output (vm, "captured %d pkts...",
- dm->pcap_main.n_packets_captured + 1);
- if (dm->pcap_main.n_packets_captured)
- {
- dm->pcap_main.n_packets_to_capture =
- dm->pcap_main.n_packets_captured;
- error = pcap_write (&dm->pcap_main);
- if (error)
- clib_error_report (error);
- else
- vlib_cli_output (vm, "saved to %s...", dm->pcap_filename);
- }
- }
- else
- {
- vlib_cli_output (vm, "pcap tx capture already off...");
- }
-
- dm->tx_pcap_enable = 0;
- matched = 1;
- }
- else if (unformat (input, "max %d", &max))
- {
- dm->pcap_pkts_to_capture = max;
- matched = 1;
- }
-
- else if (unformat (input, "intfc %U",
- unformat_vnet_sw_interface, dm->vnet_main,
- &dm->pcap_sw_if_index))
- matched = 1;
- else if (unformat (input, "intfc any"))
- {
- dm->pcap_sw_if_index = 0;
- matched = 1;
- }
- else if (unformat (input, "file %s", &filename))
- {
- u8 *chroot_filename;
- /* Brain-police user path input */
- if (strstr ((char *) filename, "..")
- || index ((char *) filename, '/'))
- {
- vlib_cli_output (vm, "illegal characters in filename '%s'",
- filename);
- continue;
- }
-
- chroot_filename = format (0, "/tmp/%s%c", filename, 0);
- vec_free (filename);
-
- if (dm->pcap_filename)
- vec_free (dm->pcap_filename);
- vec_add1 (filename, 0);
- dm->pcap_filename = chroot_filename;
- matched = 1;
- }
- else if (unformat (input, "status"))
- {
- if (dm->tx_pcap_enable == 0)
- {
- vlib_cli_output (vm, "pcap tx capture is off...");
- continue;
- }
-
- vlib_cli_output (vm, "pcap tx capture: %d of %d pkts...",
- dm->pcap_main.n_packets_captured,
- dm->pcap_main.n_packets_to_capture);
- matched = 1;
- }
-
- else
- break;
- }
-
- if (matched == 0)
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (pcap_trace_command, static) = {
- .path = "pcap tx trace",
- .short_help =
- "pcap tx trace on off max <nn> intfc <intfc> file <name> status",
- .function = pcap_trace_command_fn,
-};
-/* *INDENT-ON* */
-
-
-static clib_error_t *
-show_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- struct rte_mempool *rmp;
- int i;
-
- for (i = 0; i < vec_len (vm->buffer_main->pktmbuf_pools); i++)
- {
- rmp = vm->buffer_main->pktmbuf_pools[i];
- if (rmp)
- {
- unsigned count = rte_mempool_avail_count (rmp);
- unsigned free_count = rte_mempool_in_use_count (rmp);
-
- vlib_cli_output (vm,
- "name=\"%s\" available = %7d allocated = %7d total = %7d\n",
- rmp->name, (u32) count, (u32) free_count,
- (u32) (count + free_count));
- }
- else
- {
- vlib_cli_output (vm, "rte_mempool is NULL (!)\n");
- }
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_show_dpdk_bufferr,static) = {
- .path = "show dpdk buffer",
- .short_help = "show dpdk buffer state",
- .function = show_dpdk_buffer,
- .is_mp_safe = 1,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-test_dpdk_buffer (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- static u32 *allocated_buffers;
- u32 n_alloc = 0;
- u32 n_free = 0;
- u32 first, actual_alloc;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "allocate %d", &n_alloc))
- ;
- else if (unformat (input, "free %d", &n_free))
- ;
- else
- break;
- }
-
- if (n_free)
- {
- if (vec_len (allocated_buffers) < n_free)
- return clib_error_return (0, "Can't free %d, only %d allocated",
- n_free, vec_len (allocated_buffers));
-
- first = vec_len (allocated_buffers) - n_free;
- vlib_buffer_free (vm, allocated_buffers + first, n_free);
- _vec_len (allocated_buffers) = first;
- }
- if (n_alloc)
- {
- first = vec_len (allocated_buffers);
- vec_validate (allocated_buffers,
- vec_len (allocated_buffers) + n_alloc - 1);
-
- actual_alloc = vlib_buffer_alloc (vm, allocated_buffers + first,
- n_alloc);
- _vec_len (allocated_buffers) = first + actual_alloc;
-
- if (actual_alloc < n_alloc)
- vlib_cli_output (vm, "WARNING: only allocated %d buffers",
- actual_alloc);
- }
-
- vlib_cli_output (vm, "Currently %d buffers allocated",
- vec_len (allocated_buffers));
-
- if (allocated_buffers && vec_len (allocated_buffers) == 0)
- vec_free (allocated_buffers);
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_test_dpdk_buffer,static) = {
- .path = "test dpdk buffer",
- .short_help = "test dpdk buffer [allocate <nn>][free <nn>]",
- .function = test_dpdk_buffer,
- .is_mp_safe = 1,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- dpdk_main_t *dm = &dpdk_main;
- vnet_hw_interface_t *hw;
- dpdk_device_t *xd;
- u32 hw_if_index = (u32) ~ 0;
- u32 nb_rx_desc = (u32) ~ 0;
- u32 nb_tx_desc = (u32) ~ 0;
- clib_error_t *rv;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat
- (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
- &hw_if_index))
- ;
- else if (unformat (line_input, "tx %d", &nb_tx_desc))
- ;
- else if (unformat (line_input, "rx %d", &nb_rx_desc))
- ;
- else
- return clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- }
-
- unformat_free (line_input);
-
- if (hw_if_index == (u32) ~ 0)
- return clib_error_return (0, "please specify valid interface name");
-
- hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
- xd = vec_elt_at_index (dm->devices, hw->dev_instance);
-
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- return clib_error_return (0, "number of descriptors can be set only for "
- "physical devices");
-
- if ((nb_rx_desc == (u32) ~ 0 || nb_rx_desc == xd->nb_rx_desc) &&
- (nb_tx_desc == (u32) ~ 0 || nb_tx_desc == xd->nb_tx_desc))
- return clib_error_return (0, "nothing changed");
-
- if (nb_rx_desc != (u32) ~ 0)
- xd->nb_rx_desc = nb_rx_desc;
-
- if (nb_tx_desc != (u32) ~ 0)
- xd->nb_tx_desc = nb_tx_desc;
-
- rv = dpdk_port_setup (dm, xd);
-
- return rv;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_set_dpdk_if_desc,static) = {
- .path = "set dpdk interface descriptors",
- .short_help = "set dpdk interface descriptors <if-name> [rx <n>] [tx <n>]",
- .function = set_dpdk_if_desc,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-show_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_and_queue_t *dq;
- int cpu;
-
- if (tm->n_vlib_mains == 1)
- vlib_cli_output (vm, "All interfaces are handled by main thread");
-
- for (cpu = 0; cpu < vec_len (dm->devices_by_cpu); cpu++)
- {
- if (vec_len (dm->devices_by_cpu[cpu]))
- vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu,
- vlib_worker_threads[cpu].name,
- vlib_worker_threads[cpu].lcore_id);
-
- /* *INDENT-OFF* */
- vec_foreach(dq, dm->devices_by_cpu[cpu])
- {
- u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index;
- vnet_hw_interface_t * hi = vnet_get_hw_interface(dm->vnet_main, hw_if_index);
- vlib_cli_output(vm, " %v queue %u", hi->name, dq->queue_id);
- }
- /* *INDENT-ON* */
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_show_dpdk_if_placement,static) = {
- .path = "show dpdk interface placement",
- .short_help = "show dpdk interface placement",
- .function = show_dpdk_if_placement,
-};
-/* *INDENT-ON* */
-
-static int
-dpdk_device_queue_sort (void *a1, void *a2)
-{
- dpdk_device_and_queue_t *dq1 = a1;
- dpdk_device_and_queue_t *dq2 = a2;
-
- if (dq1->device > dq2->device)
- return 1;
- else if (dq1->device < dq2->device)
- return -1;
- else if (dq1->queue_id > dq2->queue_id)
- return 1;
- else if (dq1->queue_id < dq2->queue_id)
- return -1;
- else
- return 0;
-}
-
-static clib_error_t *
-set_dpdk_if_placement (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_and_queue_t *dq;
- vnet_hw_interface_t *hw;
- dpdk_device_t *xd;
- u32 hw_if_index = (u32) ~ 0;
- u32 queue = (u32) 0;
- u32 cpu = (u32) ~ 0;
- int i;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat
- (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
- &hw_if_index))
- ;
- else if (unformat (line_input, "queue %d", &queue))
- ;
- else if (unformat (line_input, "thread %d", &cpu))
- ;
- else
- return clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- }
-
- unformat_free (line_input);
-
- if (hw_if_index == (u32) ~ 0)
- return clib_error_return (0, "please specify valid interface name");
-
- if (cpu < dm->input_cpu_first_index ||
- cpu >= (dm->input_cpu_first_index + dm->input_cpu_count))
- return clib_error_return (0, "please specify valid thread id");
-
- hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
- xd = vec_elt_at_index (dm->devices, hw->dev_instance);
-
- for (i = 0; i < vec_len (dm->devices_by_cpu); i++)
- {
- /* *INDENT-OFF* */
- vec_foreach(dq, dm->devices_by_cpu[i])
- {
- if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index &&
- queue == dq->queue_id)
- {
- if (cpu == i) /* nothing to do */
- return 0;
-
- vec_del1(dm->devices_by_cpu[i], dq - dm->devices_by_cpu[i]);
- vec_add2(dm->devices_by_cpu[cpu], dq, 1);
- dq->queue_id = queue;
- dq->device = xd->device_index;
- xd->cpu_socket_id_by_queue[queue] =
- rte_lcore_to_socket_id(vlib_worker_threads[cpu].lcore_id);
-
- vec_sort_with_function(dm->devices_by_cpu[i],
- dpdk_device_queue_sort);
-
- vec_sort_with_function(dm->devices_by_cpu[cpu],
- dpdk_device_queue_sort);
-
- if (vec_len(dm->devices_by_cpu[i]) == 0)
- vlib_node_set_state (vlib_mains[i], dpdk_input_node.index,
- VLIB_NODE_STATE_DISABLED);
-
- if (vec_len(dm->devices_by_cpu[cpu]) == 1)
- vlib_node_set_state (vlib_mains[cpu], dpdk_input_node.index,
- VLIB_NODE_STATE_POLLING);
-
- return 0;
- }
- }
- /* *INDENT-ON* */
- }
-
- return clib_error_return (0, "not found");
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_set_dpdk_if_placement,static) = {
- .path = "set dpdk interface placement",
- .short_help = "set dpdk interface placement <if-name> [queue <n>] thread <n>",
- .function = set_dpdk_if_placement,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-show_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_and_queue_t *dq;
- int cpu;
-
- if (tm->n_vlib_mains == 1)
- vlib_cli_output (vm, "All interfaces are handled by main thread");
-
- for (cpu = 0; cpu < vec_len (dm->devices_by_hqos_cpu); cpu++)
- {
- if (vec_len (dm->devices_by_hqos_cpu[cpu]))
- vlib_cli_output (vm, "Thread %u (%s at lcore %u):", cpu,
- vlib_worker_threads[cpu].name,
- vlib_worker_threads[cpu].lcore_id);
-
- vec_foreach (dq, dm->devices_by_hqos_cpu[cpu])
- {
- u32 hw_if_index = dm->devices[dq->device].vlib_hw_if_index;
- vnet_hw_interface_t *hi =
- vnet_get_hw_interface (dm->vnet_main, hw_if_index);
- vlib_cli_output (vm, " %v queue %u", hi->name, dq->queue_id);
- }
- }
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos_placement, static) = {
- .path = "show dpdk interface hqos placement",
- .short_help = "show dpdk interface hqos placement",
- .function = show_dpdk_if_hqos_placement,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-set_dpdk_if_hqos_placement (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_and_queue_t *dq;
- vnet_hw_interface_t *hw;
- dpdk_device_t *xd;
- u32 hw_if_index = (u32) ~ 0;
- u32 cpu = (u32) ~ 0;
- int i;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat
- (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
- &hw_if_index))
- ;
- else if (unformat (line_input, "thread %d", &cpu))
- ;
- else
- return clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- }
-
- unformat_free (line_input);
-
- if (hw_if_index == (u32) ~ 0)
- return clib_error_return (0, "please specify valid interface name");
-
- if (cpu < dm->hqos_cpu_first_index ||
- cpu >= (dm->hqos_cpu_first_index + dm->hqos_cpu_count))
- return clib_error_return (0, "please specify valid thread id");
-
- hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
- xd = vec_elt_at_index (dm->devices, hw->dev_instance);
-
- for (i = 0; i < vec_len (dm->devices_by_hqos_cpu); i++)
- {
- vec_foreach (dq, dm->devices_by_hqos_cpu[i])
- {
- if (hw_if_index == dm->devices[dq->device].vlib_hw_if_index)
- {
- if (cpu == i) /* nothing to do */
- return 0;
-
- vec_del1 (dm->devices_by_hqos_cpu[i],
- dq - dm->devices_by_hqos_cpu[i]);
- vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
- dq->queue_id = 0;
- dq->device = xd->device_index;
-
- vec_sort_with_function (dm->devices_by_hqos_cpu[i],
- dpdk_device_queue_sort);
-
- vec_sort_with_function (dm->devices_by_hqos_cpu[cpu],
- dpdk_device_queue_sort);
-
- return 0;
- }
- }
- }
-
- return clib_error_return (0, "not found");
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_placement, static) = {
- .path = "set dpdk interface hqos placement",
- .short_help = "set dpdk interface hqos placement <if-name> thread <n>",
- .function = set_dpdk_if_hqos_placement,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-set_dpdk_if_hqos_pipe (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- dpdk_main_t *dm = &dpdk_main;
- vnet_hw_interface_t *hw;
- dpdk_device_t *xd;
- u32 hw_if_index = (u32) ~ 0;
- u32 subport_id = (u32) ~ 0;
- u32 pipe_id = (u32) ~ 0;
- u32 profile_id = (u32) ~ 0;
- int rv;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat
- (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
- &hw_if_index))
- ;
- else if (unformat (line_input, "subport %d", &subport_id))
- ;
- else if (unformat (line_input, "pipe %d", &pipe_id))
- ;
- else if (unformat (line_input, "profile %d", &profile_id))
- ;
- else
- return clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- }
-
- unformat_free (line_input);
-
- if (hw_if_index == (u32) ~ 0)
- return clib_error_return (0, "please specify valid interface name");
-
- hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
- xd = vec_elt_at_index (dm->devices, hw->dev_instance);
-
- rv =
- rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id,
- profile_id);
- if (rv)
- return clib_error_return (0, "pipe configuration failed");
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pipe, static) =
-{
- .path = "set dpdk interface hqos pipe",
- .short_help = "set dpdk interface hqos pipe <if-name> subport <n> pipe <n> "
- "profile <n>",
- .function = set_dpdk_if_hqos_pipe,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-set_dpdk_if_hqos_subport (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- dpdk_main_t *dm = &dpdk_main;
- vnet_hw_interface_t *hw;
- dpdk_device_t *xd;
- u32 hw_if_index = (u32) ~ 0;
- u32 subport_id = (u32) ~ 0;
- struct rte_sched_subport_params p = {
- .tb_rate = 1250000000, /* 10GbE */
- .tb_size = 1000000,
- .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000},
- .tc_period = 10,
- };
- int rv;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat
- (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
- &hw_if_index))
- ;
- else if (unformat (line_input, "subport %d", &subport_id))
- ;
- else if (unformat (line_input, "rate %d", &p.tb_rate))
- {
- p.tc_rate[0] = p.tb_rate;
- p.tc_rate[1] = p.tb_rate;
- p.tc_rate[2] = p.tb_rate;
- p.tc_rate[3] = p.tb_rate;
- }
- else if (unformat (line_input, "bktsize %d", &p.tb_size))
- ;
- else if (unformat (line_input, "tc0 %d", &p.tc_rate[0]))
- ;
- else if (unformat (line_input, "tc1 %d", &p.tc_rate[1]))
- ;
- else if (unformat (line_input, "tc2 %d", &p.tc_rate[2]))
- ;
- else if (unformat (line_input, "tc3 %d", &p.tc_rate[3]))
- ;
- else if (unformat (line_input, "period %d", &p.tc_period))
- ;
- else
- return clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- }
-
- unformat_free (line_input);
-
- if (hw_if_index == (u32) ~ 0)
- return clib_error_return (0, "please specify valid interface name");
-
- hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
- xd = vec_elt_at_index (dm->devices, hw->dev_instance);
-
- rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport_id, &p);
- if (rv)
- return clib_error_return (0, "subport configuration failed");
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_subport, static) = {
- .path = "set dpdk interface hqos subport",
- .short_help = "set dpdk interface hqos subport <if-name> subport <n> "
- "[rate <n>] [bktsize <n>] [tc0 <n>] [tc1 <n>] [tc2 <n>] [tc3 <n>] "
- "[period <n>]",
- .function = set_dpdk_if_hqos_subport,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-set_dpdk_if_hqos_tctbl (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- dpdk_main_t *dm = &dpdk_main;
- vnet_hw_interface_t *hw;
- dpdk_device_t *xd;
- u32 hw_if_index = (u32) ~ 0;
- u32 tc = (u32) ~ 0;
- u32 queue = (u32) ~ 0;
- u32 entry = (u32) ~ 0;
- u32 val, i;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat
- (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
- &hw_if_index))
- ;
- else if (unformat (line_input, "entry %d", &entry))
- ;
- else if (unformat (line_input, "tc %d", &tc))
- ;
- else if (unformat (line_input, "queue %d", &queue))
- ;
- else
- return clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- }
-
- unformat_free (line_input);
-
- if (hw_if_index == (u32) ~ 0)
- return clib_error_return (0, "please specify valid interface name");
- if (entry >= 64)
- return clib_error_return (0, "invalid entry");
- if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE)
- return clib_error_return (0, "invalid traffic class");
- if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS)
- return clib_error_return (0, "invalid traffic class");
-
- hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
- xd = vec_elt_at_index (dm->devices, hw->dev_instance);
-
- /* Detect the set of worker threads */
- uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers");
- /* Should never happen, shut up Coverity warning */
- if (p == 0)
- return clib_error_return (0, "no worker registrations?");
-
- vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0];
- int worker_thread_first = tr->first_index;
- int worker_thread_count = tr->count;
-
- val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue;
- for (i = 0; i < worker_thread_count; i++)
- xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val;
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_tctbl, static) = {
- .path = "set dpdk interface hqos tctbl",
- .short_help = "set dpdk interface hqos tctbl <if-name> entry <n> tc <n> queue <n>",
- .function = set_dpdk_if_hqos_tctbl,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-set_dpdk_if_hqos_pktfield (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- dpdk_main_t *dm = &dpdk_main;
-
- /* Device specific data */
- struct rte_eth_dev_info dev_info;
- dpdk_device_config_t *devconf = 0;
- vnet_hw_interface_t *hw;
- dpdk_device_t *xd;
- u32 hw_if_index = (u32) ~ 0;
-
- /* Detect the set of worker threads */
- uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers");
- /* Should never happen, shut up Coverity warning */
- if (p == 0)
- return clib_error_return (0, "no worker registrations?");
-
- vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0];
- int worker_thread_first = tr->first_index;
- int worker_thread_count = tr->count;
-
- /* Packet field configuration */
- u64 mask = (u64) ~ 0;
- u32 id = (u32) ~ 0;
- u32 offset = (u32) ~ 0;
-
- /* HQoS params */
- u32 n_subports_per_port, n_pipes_per_subport, tctbl_size;
-
- u32 i;
-
- /* Parse input arguments */
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat
- (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
- &hw_if_index))
- ;
- else if (unformat (line_input, "id %d", &id))
- ;
- else if (unformat (line_input, "offset %d", &offset))
- ;
- else if (unformat (line_input, "mask %llx", &mask))
- ;
- else
- return clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- }
-
- unformat_free (line_input);
-
- /* Get interface */
- if (hw_if_index == (u32) ~ 0)
- return clib_error_return (0, "please specify valid interface name");
-
- hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
- xd = vec_elt_at_index (dm->devices, hw->dev_instance);
-
- rte_eth_dev_info_get (xd->device_index, &dev_info);
- if (dev_info.pci_dev)
- { /* bonded interface has no pci info */
- vlib_pci_addr_t pci_addr;
-
- pci_addr.domain = dev_info.pci_dev->addr.domain;
- pci_addr.bus = dev_info.pci_dev->addr.bus;
- pci_addr.slot = dev_info.pci_dev->addr.devid;
- pci_addr.function = dev_info.pci_dev->addr.function;
-
- p =
- hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
- }
-
- if (p)
- devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
- else
- devconf = &dm->conf->default_devconf;
-
- if (devconf->hqos_enabled == 0)
- {
- vlib_cli_output (vm, "HQoS disabled for this interface");
- return 0;
- }
-
- n_subports_per_port = devconf->hqos.port.n_subports_per_port;
- n_pipes_per_subport = devconf->hqos.port.n_pipes_per_subport;
- tctbl_size = RTE_DIM (devconf->hqos.tc_table);
-
- /* Validate packet field configuration: id, offset and mask */
- if (id >= 3)
- return clib_error_return (0, "invalid packet field id");
-
- switch (id)
- {
- case 0:
- if (dpdk_hqos_validate_mask (mask, n_subports_per_port) != 0)
- return clib_error_return (0, "invalid subport ID mask "
- "(n_subports_per_port = %u)",
- n_subports_per_port);
- break;
- case 1:
- if (dpdk_hqos_validate_mask (mask, n_pipes_per_subport) != 0)
- return clib_error_return (0, "invalid pipe ID mask "
- "(n_pipes_per_subport = %u)",
- n_pipes_per_subport);
- break;
- case 2:
- default:
- if (dpdk_hqos_validate_mask (mask, tctbl_size) != 0)
- return clib_error_return (0, "invalid TC table index mask "
- "(TC table size = %u)", tctbl_size);
- }
-
- /* Propagate packet field configuration to all workers */
- for (i = 0; i < worker_thread_count; i++)
- switch (id)
- {
- case 0:
- xd->hqos_wt[worker_thread_first + i].hqos_field0_slabpos = offset;
- xd->hqos_wt[worker_thread_first + i].hqos_field0_slabmask = mask;
- xd->hqos_wt[worker_thread_first + i].hqos_field0_slabshr =
- __builtin_ctzll (mask);
- break;
- case 1:
- xd->hqos_wt[worker_thread_first + i].hqos_field1_slabpos = offset;
- xd->hqos_wt[worker_thread_first + i].hqos_field1_slabmask = mask;
- xd->hqos_wt[worker_thread_first + i].hqos_field1_slabshr =
- __builtin_ctzll (mask);
- break;
- case 2:
- default:
- xd->hqos_wt[worker_thread_first + i].hqos_field2_slabpos = offset;
- xd->hqos_wt[worker_thread_first + i].hqos_field2_slabmask = mask;
- xd->hqos_wt[worker_thread_first + i].hqos_field2_slabshr =
- __builtin_ctzll (mask);
- }
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_set_dpdk_if_hqos_pktfield, static) = {
- .path = "set dpdk interface hqos pktfield",
- .short_help = "set dpdk interface hqos pktfield <if-name> id <n> offset <n> "
- "mask <n>",
- .function = set_dpdk_if_hqos_pktfield,
-};
-/* *INDENT-ON* */
-
-static clib_error_t *
-show_dpdk_if_hqos (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- dpdk_main_t *dm = &dpdk_main;
- vnet_hw_interface_t *hw;
- dpdk_device_t *xd;
- dpdk_device_config_hqos_t *cfg;
- dpdk_device_hqos_per_hqos_thread_t *ht;
- dpdk_device_hqos_per_worker_thread_t *wk;
- u32 *tctbl;
- u32 hw_if_index = (u32) ~ 0;
- u32 profile_id, i;
- struct rte_eth_dev_info dev_info;
- dpdk_device_config_t *devconf = 0;
- vlib_thread_registration_t *tr;
- uword *p = 0;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat
- (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
- &hw_if_index))
- ;
- else
- return clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- }
-
- unformat_free (line_input);
-
- if (hw_if_index == (u32) ~ 0)
- return clib_error_return (0, "please specify interface name!!");
-
- hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
- xd = vec_elt_at_index (dm->devices, hw->dev_instance);
-
- rte_eth_dev_info_get (xd->device_index, &dev_info);
- if (dev_info.pci_dev)
- { /* bonded interface has no pci info */
- vlib_pci_addr_t pci_addr;
-
- pci_addr.domain = dev_info.pci_dev->addr.domain;
- pci_addr.bus = dev_info.pci_dev->addr.bus;
- pci_addr.slot = dev_info.pci_dev->addr.devid;
- pci_addr.function = dev_info.pci_dev->addr.function;
-
- p =
- hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
- }
-
- if (p)
- devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
- else
- devconf = &dm->conf->default_devconf;
-
- if (devconf->hqos_enabled == 0)
- {
- vlib_cli_output (vm, "HQoS disabled for this interface");
- return 0;
- }
-
- /* Detect the set of worker threads */
- p = hash_get_mem (tm->thread_registrations_by_name, "workers");
-
- /* Should never happen, shut up Coverity warning */
- if (p == 0)
- return clib_error_return (0, "no worker registrations?");
-
- tr = (vlib_thread_registration_t *) p[0];
-
- cfg = &devconf->hqos;
- ht = xd->hqos_ht;
- wk = &xd->hqos_wt[tr->first_index];
- tctbl = wk->hqos_tc_table;
-
- vlib_cli_output (vm, " Thread:");
- vlib_cli_output (vm, " Input SWQ size = %u packets", cfg->swq_size);
- vlib_cli_output (vm, " Enqueue burst size = %u packets",
- ht->hqos_burst_enq);
- vlib_cli_output (vm, " Dequeue burst size = %u packets",
- ht->hqos_burst_deq);
-
- vlib_cli_output (vm,
- " Packet field 0: slab position = %4u, slab bitmask = 0x%016llx",
- wk->hqos_field0_slabpos, wk->hqos_field0_slabmask);
- vlib_cli_output (vm,
- " Packet field 1: slab position = %4u, slab bitmask = 0x%016llx",
- wk->hqos_field1_slabpos, wk->hqos_field1_slabmask);
- vlib_cli_output (vm,
- " Packet field 2: slab position = %4u, slab bitmask = 0x%016llx",
- wk->hqos_field2_slabpos, wk->hqos_field2_slabmask);
- vlib_cli_output (vm, " Packet field 2 translation table:");
- vlib_cli_output (vm, " [ 0 .. 15]: "
- "%2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u",
- tctbl[0], tctbl[1], tctbl[2], tctbl[3],
- tctbl[4], tctbl[5], tctbl[6], tctbl[7],
- tctbl[8], tctbl[9], tctbl[10], tctbl[11],
- tctbl[12], tctbl[13], tctbl[14], tctbl[15]);
- vlib_cli_output (vm, " [16 .. 31]: "
- "%2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u",
- tctbl[16], tctbl[17], tctbl[18], tctbl[19],
- tctbl[20], tctbl[21], tctbl[22], tctbl[23],
- tctbl[24], tctbl[25], tctbl[26], tctbl[27],
- tctbl[28], tctbl[29], tctbl[30], tctbl[31]);
- vlib_cli_output (vm, " [32 .. 47]: "
- "%2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u",
- tctbl[32], tctbl[33], tctbl[34], tctbl[35],
- tctbl[36], tctbl[37], tctbl[38], tctbl[39],
- tctbl[40], tctbl[41], tctbl[42], tctbl[43],
- tctbl[44], tctbl[45], tctbl[46], tctbl[47]);
- vlib_cli_output (vm, " [48 .. 63]: "
- "%2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u %2u",
- tctbl[48], tctbl[49], tctbl[50], tctbl[51],
- tctbl[52], tctbl[53], tctbl[54], tctbl[55],
- tctbl[56], tctbl[57], tctbl[58], tctbl[59],
- tctbl[60], tctbl[61], tctbl[62], tctbl[63]);
-
- vlib_cli_output (vm, " Port:");
- vlib_cli_output (vm, " Rate = %u bytes/second", cfg->port.rate);
- vlib_cli_output (vm, " MTU = %u bytes", cfg->port.mtu);
- vlib_cli_output (vm, " Frame overhead = %u bytes",
- cfg->port.frame_overhead);
- vlib_cli_output (vm, " Number of subports = %u",
- cfg->port.n_subports_per_port);
- vlib_cli_output (vm, " Number of pipes per subport = %u",
- cfg->port.n_pipes_per_subport);
- vlib_cli_output (vm,
- " Packet queue size: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u packets",
- cfg->port.qsize[0], cfg->port.qsize[1], cfg->port.qsize[2],
- cfg->port.qsize[3]);
- vlib_cli_output (vm, " Number of pipe profiles = %u",
- cfg->port.n_pipe_profiles);
-
- for (profile_id = 0; profile_id < vec_len (cfg->pipe); profile_id++)
- {
- vlib_cli_output (vm, " Pipe profile %u:", profile_id);
- vlib_cli_output (vm, " Rate = %u bytes/second",
- cfg->pipe[profile_id].tb_rate);
- vlib_cli_output (vm, " Token bucket size = %u bytes",
- cfg->pipe[profile_id].tb_size);
- vlib_cli_output (vm,
- " Traffic class rate: TC0 = %u, TC1 = %u, TC2 = %u, TC3 = %u bytes/second",
- cfg->pipe[profile_id].tc_rate[0],
- cfg->pipe[profile_id].tc_rate[1],
- cfg->pipe[profile_id].tc_rate[2],
- cfg->pipe[profile_id].tc_rate[3]);
- vlib_cli_output (vm, " TC period = %u milliseconds",
- cfg->pipe[profile_id].tc_period);
-#ifdef RTE_SCHED_SUBPORT_TC_OV
- vlib_cli_output (vm, " TC3 oversubscription_weight = %u",
- cfg->pipe[profile_id].tc_ov_weight);
-#endif
-
- for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
- {
- vlib_cli_output (vm,
- " TC%u WRR weights: Q0 = %u, Q1 = %u, Q2 = %u, Q3 = %u",
- i, cfg->pipe[profile_id].wrr_weights[i * 4],
- cfg->pipe[profile_id].wrr_weights[i * 4 + 1],
- cfg->pipe[profile_id].wrr_weights[i * 4 + 2],
- cfg->pipe[profile_id].wrr_weights[i * 4 + 3]);
- }
- }
-
-#ifdef RTE_SCHED_RED
- vlib_cli_output (vm, " Weighted Random Early Detection (WRED):");
- for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++)
- {
- vlib_cli_output (vm, " TC%u min: G = %u, Y = %u, R = %u", i,
- cfg->port.red_params[i][e_RTE_METER_GREEN].min_th,
- cfg->port.red_params[i][e_RTE_METER_YELLOW].min_th,
- cfg->port.red_params[i][e_RTE_METER_RED].min_th);
-
- vlib_cli_output (vm, " TC%u max: G = %u, Y = %u, R = %u", i,
- cfg->port.red_params[i][e_RTE_METER_GREEN].max_th,
- cfg->port.red_params[i][e_RTE_METER_YELLOW].max_th,
- cfg->port.red_params[i][e_RTE_METER_RED].max_th);
-
- vlib_cli_output (vm,
- " TC%u inverted probability: G = %u, Y = %u, R = %u",
- i, cfg->port.red_params[i][e_RTE_METER_GREEN].maxp_inv,
- cfg->port.red_params[i][e_RTE_METER_YELLOW].maxp_inv,
- cfg->port.red_params[i][e_RTE_METER_RED].maxp_inv);
-
- vlib_cli_output (vm, " TC%u weight: R = %u, Y = %u, R = %u", i,
- cfg->port.red_params[i][e_RTE_METER_GREEN].wq_log2,
- cfg->port.red_params[i][e_RTE_METER_YELLOW].wq_log2,
- cfg->port.red_params[i][e_RTE_METER_RED].wq_log2);
- }
-#endif
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_show_dpdk_if_hqos, static) = {
- .path = "show dpdk interface hqos",
- .short_help = "show dpdk interface hqos <if-name>",
- .function = show_dpdk_if_hqos,
-};
-
-/* *INDENT-ON* */
-
-static clib_error_t *
-show_dpdk_hqos_queue_stats (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- dpdk_main_t *dm = &dpdk_main;
- u32 hw_if_index = (u32) ~ 0;
- u32 subport = (u32) ~ 0;
- u32 pipe = (u32) ~ 0;
- u32 tc = (u32) ~ 0;
- u32 tc_q = (u32) ~ 0;
- vnet_hw_interface_t *hw;
- dpdk_device_t *xd;
- uword *p = 0;
- struct rte_eth_dev_info dev_info;
- dpdk_device_config_t *devconf = 0;
- u32 qindex;
- struct rte_sched_queue_stats stats;
- u16 qlen;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat
- (line_input, "%U", unformat_vnet_hw_interface, dm->vnet_main,
- &hw_if_index))
- ;
-
- else if (unformat (line_input, "subport %d", &subport))
- ;
-
- else if (unformat (line_input, "pipe %d", &pipe))
- ;
-
- else if (unformat (line_input, "tc %d", &tc))
- ;
-
- else if (unformat (line_input, "tc_q %d", &tc_q))
- ;
-
- else
- return clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- }
-
- unformat_free (line_input);
-
- if (hw_if_index == (u32) ~ 0)
- return clib_error_return (0, "please specify interface name!!");
-
- hw = vnet_get_hw_interface (dm->vnet_main, hw_if_index);
- xd = vec_elt_at_index (dm->devices, hw->dev_instance);
-
- rte_eth_dev_info_get (xd->device_index, &dev_info);
- if (dev_info.pci_dev)
- { /* bonded interface has no pci info */
- vlib_pci_addr_t pci_addr;
-
- pci_addr.domain = dev_info.pci_dev->addr.domain;
- pci_addr.bus = dev_info.pci_dev->addr.bus;
- pci_addr.slot = dev_info.pci_dev->addr.devid;
- pci_addr.function = dev_info.pci_dev->addr.function;
-
- p =
- hash_get (dm->conf->device_config_index_by_pci_addr, pci_addr.as_u32);
- }
-
- if (p)
- devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
- else
- devconf = &dm->conf->default_devconf;
-
- if (devconf->hqos_enabled == 0)
- {
- vlib_cli_output (vm, "HQoS disabled for this interface");
- return 0;
- }
-
- /*
- * Figure out which queue to query. cf rte_sched_port_qindex. (Not sure why
- * that method isn't made public by DPDK - how _should_ we get the queue ID?)
- */
- qindex = subport * devconf->hqos.port.n_pipes_per_subport + pipe;
- qindex = qindex * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + tc;
- qindex = qindex * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + tc_q;
-
- if (rte_sched_queue_read_stats (xd->hqos_ht->hqos, qindex, &stats, &qlen) !=
- 0)
- return clib_error_return (0, "failed to read stats");
-
- vlib_cli_output (vm, "%=24s%=16s", "Stats Parameter", "Value");
- vlib_cli_output (vm, "%=24s%=16d", "Packets", stats.n_pkts);
- vlib_cli_output (vm, "%=24s%=16d", "Packets dropped", stats.n_pkts_dropped);
-#ifdef RTE_SCHED_RED
- vlib_cli_output (vm, "%=24s%=16d", "Packets dropped (RED)",
- stats.n_pkts_red_dropped);
-#endif
- vlib_cli_output (vm, "%=24s%=16d", "Bytes", stats.n_bytes);
- vlib_cli_output (vm, "%=24s%=16d", "Bytes dropped", stats.n_bytes_dropped);
-
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (cmd_show_dpdk_hqos_queue_stats, static) = {
- .path = "show dpdk hqos queue",
- .short_help = "show dpdk hqos queue <if-name> subport <subport> pipe <pipe> tc <tc> tc_q <tc_q>",
- .function = show_dpdk_hqos_queue_stats,
-};
-/* *INDENT-ON* */
-
-clib_error_t *
-dpdk_cli_init (vlib_main_t * vm)
-{
- return 0;
-}
-
-VLIB_INIT_FUNCTION (dpdk_cli_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/dpdk/device.c b/vnet/vnet/devices/dpdk/device.c
deleted file mode 100644
index b22fbf2e69e..00000000000
--- a/vnet/vnet/devices/dpdk/device.c
+++ /dev/null
@@ -1,840 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vnet/vnet.h>
-#include <vppinfra/vec.h>
-#include <vppinfra/format.h>
-#include <vlib/unix/cj.h>
-#include <assert.h>
-
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/dpdk/dpdk.h>
-
-#include "dpdk_priv.h"
-#include <vppinfra/error.h>
-
-#define foreach_dpdk_tx_func_error \
- _(BAD_RETVAL, "DPDK tx function returned an error") \
- _(RING_FULL, "Tx packet drops (ring full)") \
- _(PKT_DROP, "Tx packet drops (dpdk tx failure)") \
- _(REPL_FAIL, "Tx packet drops (replication failure)")
-
-typedef enum
-{
-#define _(f,s) DPDK_TX_FUNC_ERROR_##f,
- foreach_dpdk_tx_func_error
-#undef _
- DPDK_TX_FUNC_N_ERROR,
-} dpdk_tx_func_error_t;
-
-static char *dpdk_tx_func_error_strings[] = {
-#define _(n,s) s,
- foreach_dpdk_tx_func_error
-#undef _
-};
-
-clib_error_t *
-dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address)
-{
- int error;
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance);
-
- error = rte_eth_dev_default_mac_addr_set (xd->device_index,
- (struct ether_addr *) address);
-
- if (error)
- {
- return clib_error_return (0, "mac address set failed: %d", error);
- }
- else
- {
- return NULL;
- }
-}
-
-clib_error_t *
-dpdk_set_mc_filter (vnet_hw_interface_t * hi,
- struct ether_addr mc_addr_vec[], int naddr)
-{
- int error;
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance);
-
- error = rte_eth_dev_set_mc_addr_list (xd->device_index, mc_addr_vec, naddr);
-
- if (error)
- {
- return clib_error_return (0, "mc addr list failed: %d", error);
- }
- else
- {
- return NULL;
- }
-}
-
-struct rte_mbuf *
-dpdk_replicate_packet_mb (vlib_buffer_t * b)
-{
- vlib_main_t *vm = vlib_get_main ();
- vlib_buffer_main_t *bm = vm->buffer_main;
- struct rte_mbuf **mbufs = 0, *s, *d;
- u8 nb_segs;
- unsigned socket_id = rte_socket_id ();
- int i;
-
- ASSERT (bm->pktmbuf_pools[socket_id]);
- s = rte_mbuf_from_vlib_buffer (b);
- nb_segs = s->nb_segs;
- vec_validate (mbufs, nb_segs - 1);
-
- if (rte_pktmbuf_alloc_bulk (bm->pktmbuf_pools[socket_id], mbufs, nb_segs))
- {
- vec_free (mbufs);
- return 0;
- }
-
- d = mbufs[0];
- d->nb_segs = s->nb_segs;
- d->data_len = s->data_len;
- d->pkt_len = s->pkt_len;
- d->data_off = s->data_off;
- clib_memcpy (d->buf_addr, s->buf_addr, RTE_PKTMBUF_HEADROOM + s->data_len);
-
- for (i = 1; i < nb_segs; i++)
- {
- d->next = mbufs[i];
- d = mbufs[i];
- s = s->next;
- d->data_len = s->data_len;
- clib_memcpy (d->buf_addr, s->buf_addr,
- RTE_PKTMBUF_HEADROOM + s->data_len);
- }
-
- d = mbufs[0];
- vec_free (mbufs);
- return d;
-}
-
-static void
-dpdk_tx_trace_buffer (dpdk_main_t * dm,
- vlib_node_runtime_t * node,
- dpdk_device_t * xd,
- u16 queue_id, u32 buffer_index, vlib_buffer_t * buffer)
-{
- vlib_main_t *vm = vlib_get_main ();
- dpdk_tx_dma_trace_t *t0;
- struct rte_mbuf *mb;
-
- mb = rte_mbuf_from_vlib_buffer (buffer);
-
- t0 = vlib_add_trace (vm, node, buffer, sizeof (t0[0]));
- t0->queue_index = queue_id;
- t0->device_index = xd->device_index;
- t0->buffer_index = buffer_index;
- clib_memcpy (&t0->mb, mb, sizeof (t0->mb));
- clib_memcpy (&t0->buffer, buffer,
- sizeof (buffer[0]) - sizeof (buffer->pre_data));
- clib_memcpy (t0->buffer.pre_data, buffer->data + buffer->current_data,
- sizeof (t0->buffer.pre_data));
-}
-
-static_always_inline void
-dpdk_validate_rte_mbuf (vlib_main_t * vm, vlib_buffer_t * b,
- int maybe_multiseg)
-{
- struct rte_mbuf *mb, *first_mb, *last_mb;
-
- /* buffer is coming from non-dpdk source so we need to init
- rte_mbuf header */
- if (PREDICT_FALSE ((b->flags & VNET_BUFFER_RTE_MBUF_VALID) == 0))
- {
- vlib_buffer_t *b2 = b;
- last_mb = mb = rte_mbuf_from_vlib_buffer (b2);
- rte_pktmbuf_reset (mb);
- while (maybe_multiseg && (b2->flags & VLIB_BUFFER_NEXT_PRESENT))
- {
- b2 = vlib_get_buffer (vm, b2->next_buffer);
- mb = rte_mbuf_from_vlib_buffer (b2);
- last_mb->next = mb;
- last_mb = mb;
- rte_pktmbuf_reset (mb);
- }
- }
-
- first_mb = mb = rte_mbuf_from_vlib_buffer (b);
- first_mb->nb_segs = 1;
- mb->data_len = b->current_length;
- mb->pkt_len = maybe_multiseg ? vlib_buffer_length_in_chain (vm, b) :
- b->current_length;
- mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data;
-
- while (maybe_multiseg && (b->flags & VLIB_BUFFER_NEXT_PRESENT))
- {
- b = vlib_get_buffer (vm, b->next_buffer);
- mb = rte_mbuf_from_vlib_buffer (b);
- mb->data_len = b->current_length;
- mb->pkt_len = b->current_length;
- mb->data_off = VLIB_BUFFER_PRE_DATA_SIZE + b->current_data;
- first_mb->nb_segs++;
- }
-}
-
-/*
- * This function calls the dpdk's tx_burst function to transmit the packets
- * on the tx_vector. It manages a lock per-device if the device does not
- * support multiple queues. It returns the number of packets untransmitted
- * on the tx_vector. If all packets are transmitted (the normal case), the
- * function returns 0.
- *
- * The function assumes there is at least one packet on the tx_vector.
- */
-static_always_inline
- u32 tx_burst_vector_internal (vlib_main_t * vm,
- dpdk_device_t * xd,
- struct rte_mbuf **tx_vector)
-{
- dpdk_main_t *dm = &dpdk_main;
- u32 n_packets;
- u32 tx_head;
- u32 tx_tail;
- u32 n_retry;
- int rv;
- int queue_id;
- tx_ring_hdr_t *ring;
-
- ring = vec_header (tx_vector, sizeof (*ring));
-
- n_packets = ring->tx_head - ring->tx_tail;
-
- tx_head = ring->tx_head % xd->nb_tx_desc;
-
- /*
- * Ensure rte_eth_tx_burst is not called with 0 packets, which can lead to
- * unpredictable results.
- */
- ASSERT (n_packets > 0);
-
- /*
- * Check for tx_vector overflow. If this fails it is a system configuration
- * error. The ring should be sized big enough to handle the largest un-flowed
- * off burst from a traffic manager. A larger size also helps performance
- * a bit because it decreases the probability of having to issue two tx_burst
- * calls due to a ring wrap.
- */
- ASSERT (n_packets < xd->nb_tx_desc);
- ASSERT (ring->tx_tail == 0);
-
- n_retry = 16;
- queue_id = vm->cpu_index;
-
- do
- {
- /* start the burst at the tail */
- tx_tail = ring->tx_tail % xd->nb_tx_desc;
-
- /*
- * This device only supports one TX queue,
- * and we're running multi-threaded...
- */
- if (PREDICT_FALSE (xd->lockp != 0))
- {
- queue_id = queue_id % xd->tx_q_used;
- while (__sync_lock_test_and_set (xd->lockp[queue_id], 1))
- /* zzzz */
- queue_id = (queue_id + 1) % xd->tx_q_used;
- }
-
- if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HQOS)) /* HQoS ON */
- {
- /* no wrap, transmit in one burst */
- dpdk_device_hqos_per_worker_thread_t *hqos =
- &xd->hqos_wt[vm->cpu_index];
-
- ASSERT (hqos->swq != NULL);
-
- dpdk_hqos_metadata_set (hqos,
- &tx_vector[tx_tail], tx_head - tx_tail);
- rv = rte_ring_sp_enqueue_burst (hqos->swq,
- (void **) &tx_vector[tx_tail],
- (uint16_t) (tx_head - tx_tail));
- }
- else if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD))
- {
- /* no wrap, transmit in one burst */
- rv = rte_eth_tx_burst (xd->device_index,
- (uint16_t) queue_id,
- &tx_vector[tx_tail],
- (uint16_t) (tx_head - tx_tail));
- }
- else
- {
- ASSERT (0);
- rv = 0;
- }
-
- if (PREDICT_FALSE (xd->lockp != 0))
- *xd->lockp[queue_id] = 0;
-
- if (PREDICT_FALSE (rv < 0))
- {
- // emit non-fatal message, bump counter
- vnet_main_t *vnm = dm->vnet_main;
- vnet_interface_main_t *im = &vnm->interface_main;
- u32 node_index;
-
- node_index = vec_elt_at_index (im->hw_interfaces,
- xd->vlib_hw_if_index)->tx_node_index;
-
- vlib_error_count (vm, node_index, DPDK_TX_FUNC_ERROR_BAD_RETVAL, 1);
- clib_warning ("rte_eth_tx_burst[%d]: error %d", xd->device_index,
- rv);
- return n_packets; // untransmitted packets
- }
- ring->tx_tail += (u16) rv;
- n_packets -= (uint16_t) rv;
- }
- while (rv && n_packets && (n_retry > 0));
-
- return n_packets;
-}
-
-static_always_inline void
-dpdk_prefetch_buffer_by_index (vlib_main_t * vm, u32 bi)
-{
- vlib_buffer_t *b;
- struct rte_mbuf *mb;
- b = vlib_get_buffer (vm, bi);
- mb = rte_mbuf_from_vlib_buffer (b);
- CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, LOAD);
-}
-
-static_always_inline void
-dpdk_buffer_recycle (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_buffer_t * b, u32 bi, struct rte_mbuf **mbp)
-{
- dpdk_main_t *dm = &dpdk_main;
- u32 my_cpu = vm->cpu_index;
- struct rte_mbuf *mb_new;
-
- if (PREDICT_FALSE (b->flags & VLIB_BUFFER_RECYCLE) == 0)
- return;
-
- mb_new = dpdk_replicate_packet_mb (b);
- if (PREDICT_FALSE (mb_new == 0))
- {
- vlib_error_count (vm, node->node_index,
- DPDK_TX_FUNC_ERROR_REPL_FAIL, 1);
- b->flags |= VLIB_BUFFER_REPL_FAIL;
- }
- else
- *mbp = mb_new;
-
- vec_add1 (dm->recycle[my_cpu], bi);
-}
-
-/*
- * Transmits the packets on the frame to the interface associated with the
- * node. It first copies packets on the frame to a tx_vector containing the
- * rte_mbuf pointers. It then passes this vector to tx_burst_vector_internal
- * which calls the dpdk tx_burst function.
- */
-static uword
-dpdk_interface_tx (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * f)
-{
- dpdk_main_t *dm = &dpdk_main;
- vnet_interface_output_runtime_t *rd = (void *) node->runtime_data;
- dpdk_device_t *xd = vec_elt_at_index (dm->devices, rd->dev_instance);
- u32 n_packets = f->n_vectors;
- u32 n_left;
- u32 *from;
- struct rte_mbuf **tx_vector;
- u16 i;
- u16 nb_tx_desc = xd->nb_tx_desc;
- int queue_id;
- u32 my_cpu;
- u32 tx_pkts = 0;
- tx_ring_hdr_t *ring;
- u32 n_on_ring;
-
- my_cpu = vm->cpu_index;
-
- queue_id = my_cpu;
-
- tx_vector = xd->tx_vectors[queue_id];
- ring = vec_header (tx_vector, sizeof (*ring));
-
- n_on_ring = ring->tx_head - ring->tx_tail;
- from = vlib_frame_vector_args (f);
-
- ASSERT (n_packets <= VLIB_FRAME_SIZE);
-
- if (PREDICT_FALSE (n_on_ring + n_packets > nb_tx_desc))
- {
- /*
- * Overflowing the ring should never happen.
- * If it does then drop the whole frame.
- */
- vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_RING_FULL,
- n_packets);
-
- while (n_packets--)
- {
- u32 bi0 = from[n_packets];
- vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
- struct rte_mbuf *mb0 = rte_mbuf_from_vlib_buffer (b0);
- rte_pktmbuf_free (mb0);
- }
- return n_on_ring;
- }
-
- if (PREDICT_FALSE (dm->tx_pcap_enable))
- {
- n_left = n_packets;
- while (n_left > 0)
- {
- u32 bi0 = from[0];
- vlib_buffer_t *b0 = vlib_get_buffer (vm, bi0);
- if (dm->pcap_sw_if_index == 0 ||
- dm->pcap_sw_if_index == vnet_buffer (b0)->sw_if_index[VLIB_TX])
- pcap_add_buffer (&dm->pcap_main, vm, bi0, 512);
- from++;
- n_left--;
- }
- }
-
- from = vlib_frame_vector_args (f);
- n_left = n_packets;
- i = ring->tx_head % nb_tx_desc;
-
- while (n_left >= 8)
- {
- u32 bi0, bi1, bi2, bi3;
- struct rte_mbuf *mb0, *mb1, *mb2, *mb3;
- vlib_buffer_t *b0, *b1, *b2, *b3;
- u32 or_flags;
-
- dpdk_prefetch_buffer_by_index (vm, from[4]);
- dpdk_prefetch_buffer_by_index (vm, from[5]);
- dpdk_prefetch_buffer_by_index (vm, from[6]);
- dpdk_prefetch_buffer_by_index (vm, from[7]);
-
- bi0 = from[0];
- bi1 = from[1];
- bi2 = from[2];
- bi3 = from[3];
- from += 4;
-
- b0 = vlib_get_buffer (vm, bi0);
- b1 = vlib_get_buffer (vm, bi1);
- b2 = vlib_get_buffer (vm, bi2);
- b3 = vlib_get_buffer (vm, bi3);
-
- or_flags = b0->flags | b1->flags | b2->flags | b3->flags;
-
- if (or_flags & VLIB_BUFFER_NEXT_PRESENT)
- {
- dpdk_validate_rte_mbuf (vm, b0, 1);
- dpdk_validate_rte_mbuf (vm, b1, 1);
- dpdk_validate_rte_mbuf (vm, b2, 1);
- dpdk_validate_rte_mbuf (vm, b3, 1);
- }
- else
- {
- dpdk_validate_rte_mbuf (vm, b0, 0);
- dpdk_validate_rte_mbuf (vm, b1, 0);
- dpdk_validate_rte_mbuf (vm, b2, 0);
- dpdk_validate_rte_mbuf (vm, b3, 0);
- }
-
- mb0 = rte_mbuf_from_vlib_buffer (b0);
- mb1 = rte_mbuf_from_vlib_buffer (b1);
- mb2 = rte_mbuf_from_vlib_buffer (b2);
- mb3 = rte_mbuf_from_vlib_buffer (b3);
-
- if (PREDICT_FALSE (or_flags & VLIB_BUFFER_RECYCLE))
- {
- dpdk_buffer_recycle (vm, node, b0, bi0, &mb0);
- dpdk_buffer_recycle (vm, node, b1, bi1, &mb1);
- dpdk_buffer_recycle (vm, node, b2, bi2, &mb2);
- dpdk_buffer_recycle (vm, node, b3, bi3, &mb3);
-
- /* dont enqueue packets if replication failed as they must
- be sent back to recycle */
- if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
- tx_vector[i++ % nb_tx_desc] = mb0;
- if (PREDICT_TRUE ((b1->flags & VLIB_BUFFER_REPL_FAIL) == 0))
- tx_vector[i++ % nb_tx_desc] = mb1;
- if (PREDICT_TRUE ((b2->flags & VLIB_BUFFER_REPL_FAIL) == 0))
- tx_vector[i++ % nb_tx_desc] = mb2;
- if (PREDICT_TRUE ((b3->flags & VLIB_BUFFER_REPL_FAIL) == 0))
- tx_vector[i++ % nb_tx_desc] = mb3;
- }
- else
- {
- if (PREDICT_FALSE (i + 3 >= nb_tx_desc))
- {
- tx_vector[i++ % nb_tx_desc] = mb0;
- tx_vector[i++ % nb_tx_desc] = mb1;
- tx_vector[i++ % nb_tx_desc] = mb2;
- tx_vector[i++ % nb_tx_desc] = mb3;
- i %= nb_tx_desc;
- }
- else
- {
- tx_vector[i++] = mb0;
- tx_vector[i++] = mb1;
- tx_vector[i++] = mb2;
- tx_vector[i++] = mb3;
- }
- }
-
-
- if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
- {
- if (b0->flags & VLIB_BUFFER_IS_TRACED)
- dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
- if (b1->flags & VLIB_BUFFER_IS_TRACED)
- dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi1, b1);
- if (b2->flags & VLIB_BUFFER_IS_TRACED)
- dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi2, b2);
- if (b3->flags & VLIB_BUFFER_IS_TRACED)
- dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi3, b3);
- }
-
- n_left -= 4;
- }
- while (n_left > 0)
- {
- u32 bi0;
- struct rte_mbuf *mb0;
- vlib_buffer_t *b0;
-
- bi0 = from[0];
- from++;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- dpdk_validate_rte_mbuf (vm, b0, 1);
-
- mb0 = rte_mbuf_from_vlib_buffer (b0);
- dpdk_buffer_recycle (vm, node, b0, bi0, &mb0);
-
- if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
- if (b0->flags & VLIB_BUFFER_IS_TRACED)
- dpdk_tx_trace_buffer (dm, node, xd, queue_id, bi0, b0);
-
- if (PREDICT_TRUE ((b0->flags & VLIB_BUFFER_REPL_FAIL) == 0))
- {
- tx_vector[i % nb_tx_desc] = mb0;
- i++;
- }
- n_left--;
- }
-
- /* account for additional packets in the ring */
- ring->tx_head += n_packets;
- n_on_ring = ring->tx_head - ring->tx_tail;
-
- /* transmit as many packets as possible */
- n_packets = tx_burst_vector_internal (vm, xd, tx_vector);
-
- /*
- * tx_pkts is the number of packets successfully transmitted
- * This is the number originally on ring minus the number remaining on ring
- */
- tx_pkts = n_on_ring - n_packets;
-
- {
- /* If there is no callback then drop any non-transmitted packets */
- if (PREDICT_FALSE (n_packets))
- {
- vlib_simple_counter_main_t *cm;
- vnet_main_t *vnm = vnet_get_main ();
-
- cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
- VNET_INTERFACE_COUNTER_TX_ERROR);
-
- vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
- n_packets);
-
- vlib_error_count (vm, node->node_index, DPDK_TX_FUNC_ERROR_PKT_DROP,
- n_packets);
-
- while (n_packets--)
- rte_pktmbuf_free (tx_vector[ring->tx_tail + n_packets]);
- }
-
- /* Reset head/tail to avoid unnecessary wrap */
- ring->tx_head = 0;
- ring->tx_tail = 0;
- }
-
- /* Recycle replicated buffers */
- if (PREDICT_FALSE (vec_len (dm->recycle[my_cpu])))
- {
- vlib_buffer_free (vm, dm->recycle[my_cpu],
- vec_len (dm->recycle[my_cpu]));
- _vec_len (dm->recycle[my_cpu]) = 0;
- }
-
- ASSERT (ring->tx_head >= ring->tx_tail);
-
- return tx_pkts;
-}
-
-static void
-dpdk_clear_hw_interface_counters (u32 instance)
-{
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd = vec_elt_at_index (dm->devices, instance);
-
- /*
- * Set the "last_cleared_stats" to the current stats, so that
- * things appear to clear from a display perspective.
- */
- dpdk_update_counters (xd, vlib_time_now (dm->vlib_main));
-
- clib_memcpy (&xd->last_cleared_stats, &xd->stats, sizeof (xd->stats));
- clib_memcpy (xd->last_cleared_xstats, xd->xstats,
- vec_len (xd->last_cleared_xstats) *
- sizeof (xd->last_cleared_xstats[0]));
-
-}
-
-static clib_error_t *
-dpdk_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
-{
- vnet_hw_interface_t *hif = vnet_get_hw_interface (vnm, hw_if_index);
- uword is_up = (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP) != 0;
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd = vec_elt_at_index (dm->devices, hif->dev_instance);
- int rv = 0;
-
- if (is_up)
- {
- f64 now = vlib_time_now (dm->vlib_main);
-
- if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
- rv = rte_eth_dev_start (xd->device_index);
-
- if (xd->flags & DPDK_DEVICE_FLAG_PROMISC)
- rte_eth_promiscuous_enable (xd->device_index);
- else
- rte_eth_promiscuous_disable (xd->device_index);
-
- rte_eth_allmulticast_enable (xd->device_index);
- xd->flags |= DPDK_DEVICE_FLAG_ADMIN_UP;
- dpdk_update_counters (xd, now);
- dpdk_update_link_state (xd, now);
- }
- else
- {
- xd->flags &= ~DPDK_DEVICE_FLAG_ADMIN_UP;
-
- rte_eth_allmulticast_disable (xd->device_index);
- vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, 0);
- rte_eth_dev_stop (xd->device_index);
-
- /* For bonded interface, stop slave links */
- if (xd->pmd == VNET_DPDK_PMD_BOND)
- {
- u8 slink[16];
- int nlink = rte_eth_bond_slaves_get (xd->device_index, slink, 16);
- while (nlink >= 1)
- {
- u8 dpdk_port = slink[--nlink];
- rte_eth_dev_stop (dpdk_port);
- }
- }
- }
-
- if (rv < 0)
- clib_warning ("rte_eth_dev_%s error: %d", is_up ? "start" : "stop", rv);
-
- return /* no error */ 0;
-}
-
-/*
- * Dynamically redirect all pkts from a specific interface
- * to the specified node
- */
-static void
-dpdk_set_interface_next_node (vnet_main_t * vnm, u32 hw_if_index,
- u32 node_index)
-{
- dpdk_main_t *xm = &dpdk_main;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance);
-
- /* Shut off redirection */
- if (node_index == ~0)
- {
- xd->per_interface_next_index = node_index;
- return;
- }
-
- xd->per_interface_next_index =
- vlib_node_add_next (xm->vlib_main, dpdk_input_node.index, node_index);
-}
-
-
-static clib_error_t *
-dpdk_subif_add_del_function (vnet_main_t * vnm,
- u32 hw_if_index,
- struct vnet_sw_interface_t *st, int is_add)
-{
- dpdk_main_t *xm = &dpdk_main;
- vnet_hw_interface_t *hw = vnet_get_hw_interface (vnm, hw_if_index);
- dpdk_device_t *xd = vec_elt_at_index (xm->devices, hw->dev_instance);
- vnet_sw_interface_t *t = (vnet_sw_interface_t *) st;
- int r, vlan_offload;
- u32 prev_subifs = xd->num_subifs;
- clib_error_t *err = 0;
-
- if (is_add)
- xd->num_subifs++;
- else if (xd->num_subifs)
- xd->num_subifs--;
-
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- goto done;
-
- /* currently we program VLANS only for IXGBE VF and I40E VF */
- if ((xd->pmd != VNET_DPDK_PMD_IXGBEVF) && (xd->pmd != VNET_DPDK_PMD_I40EVF))
- goto done;
-
- if (t->sub.eth.flags.no_tags == 1)
- goto done;
-
- if ((t->sub.eth.flags.one_tag != 1) || (t->sub.eth.flags.exact_match != 1))
- {
- xd->num_subifs = prev_subifs;
- err = clib_error_return (0, "unsupported VLAN setup");
- goto done;
- }
-
- vlan_offload = rte_eth_dev_get_vlan_offload (xd->device_index);
- vlan_offload |= ETH_VLAN_FILTER_OFFLOAD;
-
- if ((r = rte_eth_dev_set_vlan_offload (xd->device_index, vlan_offload)))
- {
- xd->num_subifs = prev_subifs;
- err = clib_error_return (0, "rte_eth_dev_set_vlan_offload[%d]: err %d",
- xd->device_index, r);
- goto done;
- }
-
-
- if ((r =
- rte_eth_dev_vlan_filter (xd->device_index, t->sub.eth.outer_vlan_id,
- is_add)))
- {
- xd->num_subifs = prev_subifs;
- err = clib_error_return (0, "rte_eth_dev_vlan_filter[%d]: err %d",
- xd->device_index, r);
- goto done;
- }
-
-done:
- if (xd->num_subifs)
- xd->flags |= DPDK_DEVICE_FLAG_HAVE_SUBIF;
- else
- xd->flags &= ~DPDK_DEVICE_FLAG_HAVE_SUBIF;
-
- return err;
-}
-
-/* *INDENT-OFF* */
-VNET_DEVICE_CLASS (dpdk_device_class) = {
- .name = "dpdk",
- .tx_function = dpdk_interface_tx,
- .tx_function_n_errors = DPDK_TX_FUNC_N_ERROR,
- .tx_function_error_strings = dpdk_tx_func_error_strings,
- .format_device_name = format_dpdk_device_name,
- .format_device = format_dpdk_device,
- .format_tx_trace = format_dpdk_tx_dma_trace,
- .clear_counters = dpdk_clear_hw_interface_counters,
- .admin_up_down_function = dpdk_interface_admin_up_down,
- .subif_add_del_function = dpdk_subif_add_del_function,
- .rx_redirect_to_node = dpdk_set_interface_next_node,
- .mac_addr_change_function = dpdk_set_mac_address,
-};
-
-VLIB_DEVICE_TX_FUNCTION_MULTIARCH (dpdk_device_class, dpdk_interface_tx)
-/* *INDENT-ON* */
-
-#define UP_DOWN_FLAG_EVENT 1
-
-uword
-admin_up_down_process (vlib_main_t * vm,
- vlib_node_runtime_t * rt, vlib_frame_t * f)
-{
- clib_error_t *error = 0;
- uword event_type;
- uword *event_data = 0;
- u32 sw_if_index;
- u32 flags;
-
- while (1)
- {
- vlib_process_wait_for_event (vm);
-
- event_type = vlib_process_get_events (vm, &event_data);
-
- dpdk_main.admin_up_down_in_progress = 1;
-
- switch (event_type)
- {
- case UP_DOWN_FLAG_EVENT:
- {
- if (vec_len (event_data) == 2)
- {
- sw_if_index = event_data[0];
- flags = event_data[1];
- error =
- vnet_sw_interface_set_flags (vnet_get_main (), sw_if_index,
- flags);
- clib_error_report (error);
- }
- }
- break;
- }
-
- vec_reset_length (event_data);
-
- dpdk_main.admin_up_down_in_progress = 0;
-
- }
- return 0; /* or not */
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (admin_up_down_process_node,static) = {
- .function = admin_up_down_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "admin-up-down-process",
- .process_log2_n_stack_bytes = 17, // 256KB
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/dpdk/dpdk.h b/vnet/vnet/devices/dpdk/dpdk.h
deleted file mode 100644
index d8f378d2b54..00000000000
--- a/vnet/vnet/devices/dpdk/dpdk.h
+++ /dev/null
@@ -1,534 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __included_dpdk_h__
-#define __included_dpdk_h__
-
-/* $$$$ We should rename always_inline -> clib_always_inline */
-#undef always_inline
-
-#include <rte_config.h>
-
-#include <rte_common.h>
-#include <rte_dev.h>
-#include <rte_log.h>
-#include <rte_memory.h>
-#include <rte_memzone.h>
-#include <rte_tailq.h>
-#include <rte_eal.h>
-#include <rte_per_lcore.h>
-#include <rte_launch.h>
-#include <rte_atomic.h>
-#include <rte_cycles.h>
-#include <rte_prefetch.h>
-#include <rte_lcore.h>
-#include <rte_per_lcore.h>
-#include <rte_branch_prediction.h>
-#include <rte_interrupts.h>
-#include <rte_pci.h>
-#include <rte_random.h>
-#include <rte_debug.h>
-#include <rte_ether.h>
-#include <rte_ethdev.h>
-#include <rte_ring.h>
-#include <rte_mempool.h>
-#include <rte_mbuf.h>
-#include <rte_virtio_net.h>
-#include <rte_version.h>
-#include <rte_eth_bond.h>
-#include <rte_sched.h>
-
-#include <vnet/unix/pcap.h>
-#include <vnet/devices/devices.h>
-
-#if CLIB_DEBUG > 0
-#define always_inline static inline
-#else
-#define always_inline static inline __attribute__ ((__always_inline__))
-#endif
-
-#include <vlib/pci/pci.h>
-
-#define NB_MBUF (16<<10)
-
-extern vnet_device_class_t dpdk_device_class;
-extern vlib_node_registration_t dpdk_input_node;
-extern vlib_node_registration_t handoff_dispatch_node;
-
-#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
-#define foreach_dpdk_pmd \
- _ ("net_thunderx", THUNDERX) \
- _ ("net_e1000_em", E1000EM) \
- _ ("net_e1000_igb", IGB) \
- _ ("net_e1000_igb_vf", IGBVF) \
- _ ("net_ixgbe", IXGBE) \
- _ ("net_ixgbe_vf", IXGBEVF) \
- _ ("net_i40e", I40E) \
- _ ("net_i40e_vf", I40EVF) \
- _ ("net_virtio", VIRTIO) \
- _ ("net_enic", ENIC) \
- _ ("net_vmxnet3", VMXNET3) \
- _ ("net_af_packet", AF_PACKET) \
- _ ("rte_bond_pmd", BOND) \
- _ ("net_fm10k", FM10K) \
- _ ("net_cxgbe", CXGBE) \
- _ ("net_mlx5", MLX5) \
- _ ("net_dpaa2", DPAA2)
-#else
-#define foreach_dpdk_pmd \
- _ ("rte_nicvf_pmd", THUNDERX) \
- _ ("rte_em_pmd", E1000EM) \
- _ ("rte_igb_pmd", IGB) \
- _ ("rte_igbvf_pmd", IGBVF) \
- _ ("rte_ixgbe_pmd", IXGBE) \
- _ ("rte_ixgbevf_pmd", IXGBEVF) \
- _ ("rte_i40e_pmd", I40E) \
- _ ("rte_i40evf_pmd", I40EVF) \
- _ ("rte_virtio_pmd", VIRTIO) \
- _ ("rte_enic_pmd", ENIC) \
- _ ("rte_vmxnet3_pmd", VMXNET3) \
- _ ("AF_PACKET PMD", AF_PACKET) \
- _ ("rte_bond_pmd", BOND) \
- _ ("rte_pmd_fm10k", FM10K) \
- _ ("rte_cxgbe_pmd", CXGBE) \
- _ ("rte_dpaa2_dpni", DPAA2)
-#endif
-
-typedef enum
-{
- VNET_DPDK_PMD_NONE,
-#define _(s,f) VNET_DPDK_PMD_##f,
- foreach_dpdk_pmd
-#undef _
- VNET_DPDK_PMD_UNKNOWN, /* must be last */
-} dpdk_pmd_t;
-
-typedef enum
-{
- VNET_DPDK_PORT_TYPE_ETH_1G,
- VNET_DPDK_PORT_TYPE_ETH_10G,
- VNET_DPDK_PORT_TYPE_ETH_40G,
- VNET_DPDK_PORT_TYPE_ETH_100G,
- VNET_DPDK_PORT_TYPE_ETH_BOND,
- VNET_DPDK_PORT_TYPE_ETH_SWITCH,
- VNET_DPDK_PORT_TYPE_AF_PACKET,
- VNET_DPDK_PORT_TYPE_UNKNOWN,
-} dpdk_port_type_t;
-
-/*
- * The header for the tx_vector in dpdk_device_t.
- * Head and tail are indexes into the tx_vector and are of type
- * u64 so they never overflow.
- */
-typedef struct
-{
- u64 tx_head;
- u64 tx_tail;
-} tx_ring_hdr_t;
-
-typedef struct
-{
- struct rte_ring *swq;
-
- u64 hqos_field0_slabmask;
- u32 hqos_field0_slabpos;
- u32 hqos_field0_slabshr;
- u64 hqos_field1_slabmask;
- u32 hqos_field1_slabpos;
- u32 hqos_field1_slabshr;
- u64 hqos_field2_slabmask;
- u32 hqos_field2_slabpos;
- u32 hqos_field2_slabshr;
- u32 hqos_tc_table[64];
-} dpdk_device_hqos_per_worker_thread_t;
-
-typedef struct
-{
- struct rte_ring **swq;
- struct rte_mbuf **pkts_enq;
- struct rte_mbuf **pkts_deq;
- struct rte_sched_port *hqos;
- u32 hqos_burst_enq;
- u32 hqos_burst_deq;
- u32 pkts_enq_len;
- u32 swq_pos;
- u32 flush_count;
-} dpdk_device_hqos_per_hqos_thread_t;
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
- volatile u32 **lockp;
-
- /* Instance ID */
- u32 device_index;
-
- u32 vlib_hw_if_index;
- u32 vlib_sw_if_index;
-
- /* next node index if we decide to steal the rx graph arc */
- u32 per_interface_next_index;
-
- /* dpdk rte_mbuf rx and tx vectors, VLIB_FRAME_SIZE */
- struct rte_mbuf ***tx_vectors; /* one per worker thread */
- struct rte_mbuf ***rx_vectors;
-
- /* vector of traced contexts, per device */
- u32 **d_trace_buffers;
-
- dpdk_pmd_t pmd:8;
- i8 cpu_socket;
-
- u16 flags;
-#define DPDK_DEVICE_FLAG_ADMIN_UP (1 << 0)
-#define DPDK_DEVICE_FLAG_PROMISC (1 << 1)
-#define DPDK_DEVICE_FLAG_PMD (1 << 2)
-#define DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE (1 << 3)
-#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 4)
-#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5)
-#define DPDK_DEVICE_FLAG_HQOS (1 << 6)
-
- u16 nb_tx_desc;
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
-
- u8 *interface_name_suffix;
-
- /* number of sub-interfaces */
- u16 num_subifs;
-
- /* PMD related */
- u16 tx_q_used;
- u16 rx_q_used;
- u16 nb_rx_desc;
- u16 *cpu_socket_id_by_queue;
- struct rte_eth_conf port_conf;
- struct rte_eth_txconf tx_conf;
-
- /* HQoS related */
- dpdk_device_hqos_per_worker_thread_t *hqos_wt;
- dpdk_device_hqos_per_hqos_thread_t *hqos_ht;
-
- /* af_packet */
- u8 af_packet_port_id;
-
- struct rte_eth_link link;
- f64 time_last_link_update;
-
- struct rte_eth_stats stats;
- struct rte_eth_stats last_stats;
- struct rte_eth_stats last_cleared_stats;
- struct rte_eth_xstat *xstats;
- struct rte_eth_xstat *last_cleared_xstats;
- f64 time_last_stats_update;
- dpdk_port_type_t port_type;
-} dpdk_device_t;
-
-#define DPDK_STATS_POLL_INTERVAL (10.0)
-#define DPDK_MIN_STATS_POLL_INTERVAL (0.001) /* 1msec */
-
-#define DPDK_LINK_POLL_INTERVAL (3.0)
-#define DPDK_MIN_LINK_POLL_INTERVAL (0.001) /* 1msec */
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-
- /* total input packet counter */
- u64 aggregate_rx_packets;
-} dpdk_worker_t;
-
-typedef struct
-{
- CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
-
- /* total input packet counter */
- u64 aggregate_rx_packets;
-} dpdk_hqos_thread_t;
-
-typedef struct
-{
- u32 device;
- u16 queue_id;
-} dpdk_device_and_queue_t;
-
-#ifndef DPDK_HQOS_DBG_BYPASS
-#define DPDK_HQOS_DBG_BYPASS 0
-#endif
-
-#ifndef HQOS_FLUSH_COUNT_THRESHOLD
-#define HQOS_FLUSH_COUNT_THRESHOLD 100000
-#endif
-
-typedef struct dpdk_device_config_hqos_t
-{
- u32 hqos_thread;
- u32 hqos_thread_valid;
-
- u32 swq_size;
- u32 burst_enq;
- u32 burst_deq;
-
- u32 pktfield0_slabpos;
- u32 pktfield1_slabpos;
- u32 pktfield2_slabpos;
- u64 pktfield0_slabmask;
- u64 pktfield1_slabmask;
- u64 pktfield2_slabmask;
- u32 tc_table[64];
-
- struct rte_sched_port_params port;
- struct rte_sched_subport_params *subport;
- struct rte_sched_pipe_params *pipe;
- uint32_t *pipe_map;
-} dpdk_device_config_hqos_t;
-
-int dpdk_hqos_validate_mask (u64 mask, u32 n);
-void dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t *
- hqos, u32 pipe_profile_id);
-void dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos);
-clib_error_t *dpdk_port_setup_hqos (dpdk_device_t * xd,
- dpdk_device_config_hqos_t * hqos);
-void dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos,
- struct rte_mbuf **pkts, u32 n_pkts);
-
-#define foreach_dpdk_device_config_item \
- _ (num_rx_queues) \
- _ (num_tx_queues) \
- _ (num_rx_desc) \
- _ (num_tx_desc) \
- _ (rss_fn)
-
-typedef struct
-{
- vlib_pci_addr_t pci_addr;
- u8 is_blacklisted;
- u8 vlan_strip_offload;
-#define DPDK_DEVICE_VLAN_STRIP_DEFAULT 0
-#define DPDK_DEVICE_VLAN_STRIP_OFF 1
-#define DPDK_DEVICE_VLAN_STRIP_ON 2
-
-#define _(x) uword x;
- foreach_dpdk_device_config_item
-#undef _
- clib_bitmap_t * workers;
- u32 hqos_enabled;
- dpdk_device_config_hqos_t hqos;
-} dpdk_device_config_t;
-
-typedef struct
-{
-
- /* Config stuff */
- u8 **eal_init_args;
- u8 *eal_init_args_str;
- u8 *uio_driver_name;
- u8 no_multi_seg;
- u8 enable_tcp_udp_checksum;
-
- /* Required config parameters */
- u8 coremask_set_manually;
- u8 nchannels_set_manually;
- u32 coremask;
- u32 nchannels;
- u32 num_mbufs;
- u8 num_kni; /* while kni_init allows u32, port_id in callback fn is only u8 */
-
- /*
- * format interface names ala xxxEthernet%d/%d/%d instead of
- * xxxEthernet%x/%x/%x.
- */
- u8 interface_name_format_decimal;
-
- /* per-device config */
- dpdk_device_config_t default_devconf;
- dpdk_device_config_t *dev_confs;
- uword *device_config_index_by_pci_addr;
-
-} dpdk_config_main_t;
-
-dpdk_config_main_t dpdk_config_main;
-
-typedef struct
-{
-
- /* Devices */
- dpdk_device_t *devices;
- dpdk_device_and_queue_t **devices_by_cpu;
- dpdk_device_and_queue_t **devices_by_hqos_cpu;
-
- /* per-thread recycle lists */
- u32 **recycle;
-
- /* buffer flags template, configurable to enable/disable tcp / udp cksum */
- u32 buffer_flags_template;
-
- /* vlib buffer free list, must be same size as an rte_mbuf */
- u32 vlib_buffer_free_list_index;
-
- /* dpdk worker "threads" */
- dpdk_worker_t *workers;
-
- /* dpdk HQoS "threads" */
- dpdk_hqos_thread_t *hqos_threads;
-
- /* Ethernet input node index */
- u32 ethernet_input_node_index;
-
- /* pcap tracing [only works if (CLIB_DEBUG > 0)] */
- int tx_pcap_enable;
- pcap_main_t pcap_main;
- u8 *pcap_filename;
- u32 pcap_sw_if_index;
- u32 pcap_pkts_to_capture;
-
- /* hashes */
- uword *dpdk_device_by_kni_port_id;
- uword *vu_sw_if_index_by_listener_fd;
- uword *vu_sw_if_index_by_sock_fd;
- u32 *vu_inactive_interfaces_device_index;
-
- /*
- * flag indicating that a posted admin up/down
- * (via post_sw_interface_set_flags) is in progress
- */
- u8 admin_up_down_in_progress;
-
- u8 use_rss;
-
- /* which cpus are running dpdk-input */
- int input_cpu_first_index;
- int input_cpu_count;
-
- /* which cpus are running I/O TX */
- int hqos_cpu_first_index;
- int hqos_cpu_count;
-
- /* control interval of dpdk link state and stat polling */
- f64 link_state_poll_interval;
- f64 stat_poll_interval;
-
- /* Sleep for this many MS after each device poll */
- u32 poll_sleep;
-
- /* convenience */
- vlib_main_t *vlib_main;
- vnet_main_t *vnet_main;
- dpdk_config_main_t *conf;
-} dpdk_main_t;
-
-dpdk_main_t dpdk_main;
-
-typedef struct
-{
- u32 buffer_index;
- u16 device_index;
- u8 queue_index;
- struct rte_mbuf mb;
- /* Copy of VLIB buffer; packet data stored in pre_data. */
- vlib_buffer_t buffer;
-} dpdk_tx_dma_trace_t;
-
-typedef struct
-{
- u32 buffer_index;
- u16 device_index;
- u16 queue_index;
- struct rte_mbuf mb;
- vlib_buffer_t buffer; /* Copy of VLIB buffer; pkt data stored in pre_data. */
- u8 data[256]; /* First 256 data bytes, used for hexdump */
-} dpdk_rx_dma_trace_t;
-
-void vnet_buffer_needs_dpdk_mb (vlib_buffer_t * b);
-
-clib_error_t *dpdk_set_mac_address (vnet_hw_interface_t * hi, char *address);
-
-clib_error_t *dpdk_set_mc_filter (vnet_hw_interface_t * hi,
- struct ether_addr mc_addr_vec[], int naddr);
-
-void dpdk_thread_input (dpdk_main_t * dm, dpdk_device_t * xd);
-
-clib_error_t *dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd);
-
-u32 dpdk_interface_tx_vector (vlib_main_t * vm, u32 dev_instance);
-
-struct rte_mbuf *dpdk_replicate_packet_mb (vlib_buffer_t * b);
-struct rte_mbuf *dpdk_zerocopy_replicate_packet_mb (vlib_buffer_t * b);
-
-#define foreach_dpdk_error \
- _(NONE, "no error") \
- _(RX_PACKET_ERROR, "Rx packet errors") \
- _(RX_BAD_FCS, "Rx bad fcs") \
- _(IP_CHECKSUM_ERROR, "Rx ip checksum errors") \
- _(RX_ALLOC_FAIL, "rx buf alloc from free list failed") \
- _(RX_ALLOC_NO_PHYSMEM, "rx buf alloc failed no physmem") \
- _(RX_ALLOC_DROP_PKTS, "rx packets dropped due to alloc error")
-
-typedef enum
-{
-#define _(f,s) DPDK_ERROR_##f,
- foreach_dpdk_error
-#undef _
- DPDK_N_ERROR,
-} dpdk_error_t;
-
-int dpdk_set_stat_poll_interval (f64 interval);
-int dpdk_set_link_state_poll_interval (f64 interval);
-void dpdk_update_link_state (dpdk_device_t * xd, f64 now);
-void dpdk_device_lock_init (dpdk_device_t * xd);
-void dpdk_device_lock_free (dpdk_device_t * xd);
-
-static inline u64
-vnet_get_aggregate_rx_packets (void)
-{
- dpdk_main_t *dm = &dpdk_main;
- u64 sum = 0;
- dpdk_worker_t *dw;
-
- vec_foreach (dw, dm->workers) sum += dw->aggregate_rx_packets;
-
- return sum;
-}
-
-void dpdk_rx_trace (dpdk_main_t * dm,
- vlib_node_runtime_t * node,
- dpdk_device_t * xd,
- u16 queue_id, u32 * buffers, uword n_buffers);
-
-#define EFD_OPERATION_LESS_THAN 0
-#define EFD_OPERATION_GREATER_OR_EQUAL 1
-
-format_function_t format_dpdk_device_name;
-format_function_t format_dpdk_device;
-format_function_t format_dpdk_tx_dma_trace;
-format_function_t format_dpdk_rx_dma_trace;
-format_function_t format_dpdk_rte_mbuf;
-format_function_t format_dpdk_rx_rte_mbuf;
-unformat_function_t unformat_socket_mem;
-clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn);
-clib_error_t *unformat_hqos (unformat_input_t * input,
- dpdk_device_config_hqos_t * hqos);
-
-uword
-admin_up_down_process (vlib_main_t * vm,
- vlib_node_runtime_t * rt, vlib_frame_t * f);
-
-#endif /* __included_dpdk_h__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/dpdk/dpdk_priv.h b/vnet/vnet/devices/dpdk/dpdk_priv.h
deleted file mode 100644
index 0c81dbc3beb..00000000000
--- a/vnet/vnet/devices/dpdk/dpdk_priv.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#define DPDK_NB_RX_DESC_DEFAULT 1024
-#define DPDK_NB_TX_DESC_DEFAULT 1024
-#define DPDK_NB_RX_DESC_VIRTIO 256
-#define DPDK_NB_TX_DESC_VIRTIO 256
-
-#define I40E_DEV_ID_SFP_XL710 0x1572
-#define I40E_DEV_ID_QSFP_A 0x1583
-#define I40E_DEV_ID_QSFP_B 0x1584
-#define I40E_DEV_ID_QSFP_C 0x1585
-#define I40E_DEV_ID_10G_BASE_T 0x1586
-#define I40E_DEV_ID_VF 0x154C
-
-/* These args appear by themselves */
-#define foreach_eal_double_hyphen_predicate_arg \
-_(no-shconf) \
-_(no-hpet) \
-_(no-huge) \
-_(vmware-tsc-map)
-
-#define foreach_eal_single_hyphen_mandatory_arg \
-_(coremask, c) \
-_(nchannels, n) \
-
-#define foreach_eal_single_hyphen_arg \
-_(blacklist, b) \
-_(mem-alloc-request, m) \
-_(force-ranks, r)
-
-/* These args are preceeded by "--" and followed by a single string */
-#define foreach_eal_double_hyphen_arg \
-_(huge-dir) \
-_(proc-type) \
-_(file-prefix) \
-_(vdev)
-
-static inline void
-dpdk_get_xstats (dpdk_device_t * xd)
-{
- int len;
- if ((len = rte_eth_xstats_get (xd->device_index, NULL, 0)) > 0)
- {
- vec_validate (xd->xstats, len - 1);
- vec_validate (xd->last_cleared_xstats, len - 1);
-
- len =
- rte_eth_xstats_get (xd->device_index, xd->xstats,
- vec_len (xd->xstats));
-
- ASSERT (vec_len (xd->xstats) == len);
- ASSERT (vec_len (xd->last_cleared_xstats) == len);
-
- _vec_len (xd->xstats) = len;
- _vec_len (xd->last_cleared_xstats) = len;
-
- }
-}
-
-
-static inline void
-dpdk_update_counters (dpdk_device_t * xd, f64 now)
-{
- vlib_simple_counter_main_t *cm;
- vnet_main_t *vnm = vnet_get_main ();
- u32 my_cpu = os_get_cpu_number ();
- u64 rxerrors, last_rxerrors;
-
- /* only update counters for PMD interfaces */
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- return;
-
- xd->time_last_stats_update = now ? now : xd->time_last_stats_update;
- clib_memcpy (&xd->last_stats, &xd->stats, sizeof (xd->last_stats));
- rte_eth_stats_get (xd->device_index, &xd->stats);
-
- /* maybe bump interface rx no buffer counter */
- if (PREDICT_FALSE (xd->stats.rx_nombuf != xd->last_stats.rx_nombuf))
- {
- cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
- VNET_INTERFACE_COUNTER_RX_NO_BUF);
-
- vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
- xd->stats.rx_nombuf -
- xd->last_stats.rx_nombuf);
- }
-
- /* missed pkt counter */
- if (PREDICT_FALSE (xd->stats.imissed != xd->last_stats.imissed))
- {
- cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
- VNET_INTERFACE_COUNTER_RX_MISS);
-
- vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
- xd->stats.imissed -
- xd->last_stats.imissed);
- }
- rxerrors = xd->stats.ierrors;
- last_rxerrors = xd->last_stats.ierrors;
-
- if (PREDICT_FALSE (rxerrors != last_rxerrors))
- {
- cm = vec_elt_at_index (vnm->interface_main.sw_if_counters,
- VNET_INTERFACE_COUNTER_RX_ERROR);
-
- vlib_increment_simple_counter (cm, my_cpu, xd->vlib_sw_if_index,
- rxerrors - last_rxerrors);
- }
-
- dpdk_get_xstats (xd);
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/dpdk/format.c b/vnet/vnet/devices/dpdk/format.c
deleted file mode 100644
index ff7c7a5a41c..00000000000
--- a/vnet/vnet/devices/dpdk/format.c
+++ /dev/null
@@ -1,763 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vnet/vnet.h>
-#include <vppinfra/vec.h>
-#include <vppinfra/format.h>
-#include <vlib/unix/cj.h>
-#include <assert.h>
-
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/dpdk/dpdk.h>
-
-#include "dpdk_priv.h"
-#include <vppinfra/error.h>
-
-#define foreach_dpdk_counter \
- _ (tx_frames_ok, opackets) \
- _ (tx_bytes_ok, obytes) \
- _ (tx_errors, oerrors) \
- _ (rx_frames_ok, ipackets) \
- _ (rx_bytes_ok, ibytes) \
- _ (rx_errors, ierrors) \
- _ (rx_missed, imissed) \
- _ (rx_no_bufs, rx_nombuf)
-
-#define foreach_dpdk_q_counter \
- _ (rx_frames_ok, q_ipackets) \
- _ (tx_frames_ok, q_opackets) \
- _ (rx_bytes_ok, q_ibytes) \
- _ (tx_bytes_ok, q_obytes) \
- _ (rx_errors, q_errors)
-
-#define foreach_dpdk_rss_hf \
- _(ETH_RSS_FRAG_IPV4, "ipv4-frag") \
- _(ETH_RSS_NONFRAG_IPV4_TCP, "ipv4-tcp") \
- _(ETH_RSS_NONFRAG_IPV4_UDP, "ipv4-udp") \
- _(ETH_RSS_NONFRAG_IPV4_SCTP, "ipv4-sctp") \
- _(ETH_RSS_NONFRAG_IPV4_OTHER, "ipv4-other") \
- _(ETH_RSS_IPV4, "ipv4") \
- _(ETH_RSS_IPV6_TCP_EX, "ipv6-tcp-ex") \
- _(ETH_RSS_IPV6_UDP_EX, "ipv6-udp-ex") \
- _(ETH_RSS_FRAG_IPV6, "ipv6-frag") \
- _(ETH_RSS_NONFRAG_IPV6_TCP, "ipv6-tcp") \
- _(ETH_RSS_NONFRAG_IPV6_UDP, "ipv6-udp") \
- _(ETH_RSS_NONFRAG_IPV6_SCTP, "ipv6-sctp") \
- _(ETH_RSS_NONFRAG_IPV6_OTHER, "ipv6-other") \
- _(ETH_RSS_L2_PAYLOAD, "l2-payload") \
- _(ETH_RSS_IPV6_EX, "ipv6-ex") \
- _(ETH_RSS_IPV6, "ipv6")
-
-
-#define foreach_dpdk_rx_offload_caps \
- _(DEV_RX_OFFLOAD_VLAN_STRIP, "vlan-strip") \
- _(DEV_RX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \
- _(DEV_RX_OFFLOAD_UDP_CKSUM , "udp-cksum") \
- _(DEV_RX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \
- _(DEV_RX_OFFLOAD_TCP_LRO , "rcp-lro") \
- _(DEV_RX_OFFLOAD_QINQ_STRIP, "qinq-strip")
-
-#define foreach_dpdk_tx_offload_caps \
- _(DEV_TX_OFFLOAD_VLAN_INSERT, "vlan-insert") \
- _(DEV_TX_OFFLOAD_IPV4_CKSUM, "ipv4-cksum") \
- _(DEV_TX_OFFLOAD_UDP_CKSUM , "udp-cksum") \
- _(DEV_TX_OFFLOAD_TCP_CKSUM , "tcp-cksum") \
- _(DEV_TX_OFFLOAD_SCTP_CKSUM , "sctp-cksum") \
- _(DEV_TX_OFFLOAD_TCP_TSO , "tcp-tso") \
- _(DEV_TX_OFFLOAD_UDP_TSO , "udp-tso") \
- _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \
- _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert")
-
-#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
-/* New ol_flags bits added in DPDK-16.11 */
-#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7)
-#define PKT_RX_L4_CKSUM_GOOD (1ULL << 8)
-#endif
-
-#define foreach_dpdk_pkt_rx_offload_flag \
- _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \
- _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \
- _ (PKT_RX_FDIR, "RX packet with FDIR infos") \
- _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \
- _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \
- _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped") \
- _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \
- _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \
- _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \
- _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \
- _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped")
-
-#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
-/* PTYPE added in DPDK-16.11 */
-#define RTE_PTYPE_L2_ETHER_VLAN 0x00000006
-#define RTE_PTYPE_L2_ETHER_QINQ 0x00000007
-#endif
-
-#define foreach_dpdk_pkt_type \
- _ (L2, ETHER, "Ethernet packet") \
- _ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync") \
- _ (L2, ETHER_ARP, "ARP packet") \
- _ (L2, ETHER_LLDP, "LLDP (Link Layer Discovery Protocol) packet") \
- _ (L2, ETHER_NSH, "NSH (Network Service Header) packet") \
- _ (L2, ETHER_VLAN, "VLAN packet") \
- _ (L2, ETHER_QINQ, "QinQ packet") \
- _ (L3, IPV4, "IPv4 packet without extension headers") \
- _ (L3, IPV4_EXT, "IPv4 packet with extension headers") \
- _ (L3, IPV4_EXT_UNKNOWN, "IPv4 packet with or without extension headers") \
- _ (L3, IPV6, "IPv6 packet without extension headers") \
- _ (L3, IPV6_EXT, "IPv6 packet with extension headers") \
- _ (L3, IPV6_EXT_UNKNOWN, "IPv6 packet with or without extension headers") \
- _ (L4, TCP, "TCP packet") \
- _ (L4, UDP, "UDP packet") \
- _ (L4, FRAG, "Fragmented IP packet") \
- _ (L4, SCTP, "SCTP (Stream Control Transmission Protocol) packet") \
- _ (L4, ICMP, "ICMP packet") \
- _ (L4, NONFRAG, "Non-fragmented IP packet") \
- _ (TUNNEL, GRE, "GRE tunneling packet") \
- _ (TUNNEL, VXLAN, "VXLAN tunneling packet") \
- _ (TUNNEL, NVGRE, "NVGRE Tunneling packet") \
- _ (TUNNEL, GENEVE, "GENEVE Tunneling packet") \
- _ (TUNNEL, GRENAT, "Teredo, VXLAN or GRE Tunneling packet") \
- _ (INNER_L2, ETHER, "Inner Ethernet packet") \
- _ (INNER_L2, ETHER_VLAN, "Inner Ethernet packet with VLAN") \
- _ (INNER_L3, IPV4, "Inner IPv4 packet without extension headers") \
- _ (INNER_L3, IPV4_EXT, "Inner IPv4 packet with extension headers") \
- _ (INNER_L3, IPV4_EXT_UNKNOWN, "Inner IPv4 packet with or without extension headers") \
- _ (INNER_L3, IPV6, "Inner IPv6 packet without extension headers") \
- _ (INNER_L3, IPV6_EXT, "Inner IPv6 packet with extension headers") \
- _ (INNER_L3, IPV6_EXT_UNKNOWN, "Inner IPv6 packet with or without extension headers") \
- _ (INNER_L4, TCP, "Inner TCP packet") \
- _ (INNER_L4, UDP, "Inner UDP packet") \
- _ (INNER_L4, FRAG, "Inner fagmented IP packet") \
- _ (INNER_L4, SCTP, "Inner SCTP (Stream Control Transmission Protocol) packet") \
- _ (INNER_L4, ICMP, "Inner ICMP packet") \
- _ (INNER_L4, NONFRAG, "Inner non-fragmented IP packet")
-
-#define foreach_dpdk_pkt_tx_offload_flag \
- _ (PKT_TX_VLAN_PKT, "TX packet is a 802.1q VLAN packet") \
- _ (PKT_TX_IP_CKSUM, "IP cksum of TX pkt. computed by NIC") \
- _ (PKT_TX_TCP_CKSUM, "TCP cksum of TX pkt. computed by NIC") \
- _ (PKT_TX_SCTP_CKSUM, "SCTP cksum of TX pkt. computed by NIC") \
- _ (PKT_TX_IEEE1588_TMST, "TX IEEE1588 packet to timestamp")
-
-#define foreach_dpdk_pkt_offload_flag \
- foreach_dpdk_pkt_rx_offload_flag \
- foreach_dpdk_pkt_tx_offload_flag
-
-u8 *
-format_dpdk_device_name (u8 * s, va_list * args)
-{
- dpdk_main_t *dm = &dpdk_main;
- char *devname_format;
- char *device_name;
- u32 i = va_arg (*args, u32);
- struct rte_eth_dev_info dev_info;
- u8 *ret;
-
- if (dm->conf->interface_name_format_decimal)
- devname_format = "%s%d/%d/%d";
- else
- devname_format = "%s%x/%x/%x";
-
- switch (dm->devices[i].port_type)
- {
- case VNET_DPDK_PORT_TYPE_ETH_1G:
- device_name = "GigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_10G:
- device_name = "TenGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_40G:
- device_name = "FortyGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_100G:
- device_name = "HundredGigabitEthernet";
- break;
-
- case VNET_DPDK_PORT_TYPE_ETH_BOND:
- return format (s, "BondEthernet%d", dm->devices[i].device_index);
-
- case VNET_DPDK_PORT_TYPE_ETH_SWITCH:
- device_name = "EthernetSwitch";
- break;
-
- case VNET_DPDK_PORT_TYPE_AF_PACKET:
- rte_eth_dev_info_get (i, &dev_info);
- return format (s, "af_packet%d", dm->devices[i].af_packet_port_id);
-
- default:
- case VNET_DPDK_PORT_TYPE_UNKNOWN:
- device_name = "UnknownEthernet";
- break;
- }
-
- rte_eth_dev_info_get (i, &dev_info);
-
- if (dev_info.pci_dev)
- ret = format (s, devname_format, device_name, dev_info.pci_dev->addr.bus,
- dev_info.pci_dev->addr.devid,
- dev_info.pci_dev->addr.function);
- else
- ret = format (s, "%s%d", device_name, dm->devices[i].device_index);
-
- if (dm->devices[i].interface_name_suffix)
- return format (ret, "/%s", dm->devices[i].interface_name_suffix);
- return ret;
-}
-
-static u8 *
-format_dpdk_device_type (u8 * s, va_list * args)
-{
- dpdk_main_t *dm = &dpdk_main;
- char *dev_type;
- u32 i = va_arg (*args, u32);
-
- switch (dm->devices[i].pmd)
- {
- case VNET_DPDK_PMD_E1000EM:
- dev_type = "Intel 82540EM (e1000)";
- break;
-
- case VNET_DPDK_PMD_IGB:
- dev_type = "Intel e1000";
- break;
-
- case VNET_DPDK_PMD_I40E:
- dev_type = "Intel X710/XL710 Family";
- break;
-
- case VNET_DPDK_PMD_I40EVF:
- dev_type = "Intel X710/XL710 Family VF";
- break;
-
- case VNET_DPDK_PMD_FM10K:
- dev_type = "Intel FM10000 Family Ethernet Switch";
- break;
-
- case VNET_DPDK_PMD_IGBVF:
- dev_type = "Intel e1000 VF";
- break;
-
- case VNET_DPDK_PMD_VIRTIO:
- dev_type = "Red Hat Virtio";
- break;
-
- case VNET_DPDK_PMD_IXGBEVF:
- dev_type = "Intel 82599 VF";
- break;
-
- case VNET_DPDK_PMD_IXGBE:
- dev_type = "Intel 82599";
- break;
-
- case VNET_DPDK_PMD_ENIC:
- dev_type = "Cisco VIC";
- break;
-
- case VNET_DPDK_PMD_CXGBE:
- dev_type = "Chelsio T4/T5";
- break;
-
- case VNET_DPDK_PMD_MLX5:
- dev_type = "Mellanox ConnectX-4 Family";
- break;
-
- case VNET_DPDK_PMD_VMXNET3:
- dev_type = "VMware VMXNET3";
- break;
-
- case VNET_DPDK_PMD_AF_PACKET:
- dev_type = "af_packet";
- break;
-
- case VNET_DPDK_PMD_BOND:
- dev_type = "Ethernet Bonding";
- break;
-
- case VNET_DPDK_PMD_DPAA2:
- dev_type = "NXP DPAA2 Mac";
- break;
-
- default:
- case VNET_DPDK_PMD_UNKNOWN:
- dev_type = "### UNKNOWN ###";
- break;
- }
-
- return format (s, dev_type);
-}
-
-static u8 *
-format_dpdk_link_status (u8 * s, va_list * args)
-{
- dpdk_device_t *xd = va_arg (*args, dpdk_device_t *);
- struct rte_eth_link *l = &xd->link;
- vnet_main_t *vnm = vnet_get_main ();
- vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->vlib_hw_if_index);
-
- s = format (s, "%s ", l->link_status ? "up" : "down");
- if (l->link_status)
- {
- u32 promisc = rte_eth_promiscuous_get (xd->device_index);
-
- s = format (s, "%s duplex ", (l->link_duplex == ETH_LINK_FULL_DUPLEX) ?
- "full" : "half");
- s = format (s, "speed %u mtu %d %s\n", l->link_speed,
- hi->max_packet_bytes, promisc ? " promisc" : "");
- }
- else
- s = format (s, "\n");
-
- return s;
-}
-
-#define _line_len 72
-#define _(v, str) \
-if (bitmap & v) { \
- if (format_get_indent (s) > next_split ) { \
- next_split += _line_len; \
- s = format(s,"\n%U", format_white_space, indent); \
- } \
- s = format(s, "%s ", str); \
-}
-
-static u8 *
-format_dpdk_rss_hf_name (u8 * s, va_list * args)
-{
- u64 bitmap = va_arg (*args, u64);
- int next_split = _line_len;
- int indent = format_get_indent (s);
-
- if (!bitmap)
- return format (s, "none");
-
- foreach_dpdk_rss_hf return s;
-}
-
-static u8 *
-format_dpdk_rx_offload_caps (u8 * s, va_list * args)
-{
- u32 bitmap = va_arg (*args, u32);
- int next_split = _line_len;
- int indent = format_get_indent (s);
-
- if (!bitmap)
- return format (s, "none");
-
- foreach_dpdk_rx_offload_caps return s;
-}
-
-static u8 *
-format_dpdk_tx_offload_caps (u8 * s, va_list * args)
-{
- u32 bitmap = va_arg (*args, u32);
- int next_split = _line_len;
- int indent = format_get_indent (s);
- if (!bitmap)
- return format (s, "none");
-
- foreach_dpdk_tx_offload_caps return s;
-}
-
-#undef _line_len
-#undef _
-
-u8 *
-format_dpdk_device (u8 * s, va_list * args)
-{
- u32 dev_instance = va_arg (*args, u32);
- int verbose = va_arg (*args, int);
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
- uword indent = format_get_indent (s);
- f64 now = vlib_time_now (dm->vlib_main);
- struct rte_eth_dev_info di;
-
- dpdk_update_counters (xd, now);
- dpdk_update_link_state (xd, now);
-
- s = format (s, "%U\n%Ucarrier %U",
- format_dpdk_device_type, xd->device_index,
- format_white_space, indent + 2, format_dpdk_link_status, xd);
-
- rte_eth_dev_info_get (xd->device_index, &di);
-
- if (verbose > 1 && xd->flags & DPDK_DEVICE_FLAG_PMD)
- {
- struct rte_pci_device *pci;
- struct rte_eth_rss_conf rss_conf;
- int vlan_off;
- int retval;
-
- rss_conf.rss_key = 0;
- retval = rte_eth_dev_rss_hash_conf_get (xd->device_index, &rss_conf);
- if (retval < 0)
- clib_warning ("rte_eth_dev_rss_hash_conf_get returned %d", retval);
- pci = di.pci_dev;
-
- if (pci)
- s =
- format (s,
- "%Upci id: device %04x:%04x subsystem %04x:%04x\n"
- "%Upci address: %04x:%02x:%02x.%02x\n",
- format_white_space, indent + 2, pci->id.vendor_id,
- pci->id.device_id, pci->id.subsystem_vendor_id,
- pci->id.subsystem_device_id, format_white_space, indent + 2,
- pci->addr.domain, pci->addr.bus, pci->addr.devid,
- pci->addr.function);
- s =
- format (s, "%Umax rx packet len: %d\n", format_white_space,
- indent + 2, di.max_rx_pktlen);
- s =
- format (s, "%Umax num of queues: rx %d tx %d\n", format_white_space,
- indent + 2, di.max_rx_queues, di.max_tx_queues);
- s =
- format (s, "%Upromiscuous: unicast %s all-multicast %s\n",
- format_white_space, indent + 2,
- rte_eth_promiscuous_get (xd->device_index) ? "on" : "off",
- rte_eth_promiscuous_get (xd->device_index) ? "on" : "off");
- vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index);
- s = format (s, "%Uvlan offload: strip %s filter %s qinq %s\n",
- format_white_space, indent + 2,
- vlan_off & ETH_VLAN_STRIP_OFFLOAD ? "on" : "off",
- vlan_off & ETH_VLAN_FILTER_OFFLOAD ? "on" : "off",
- vlan_off & ETH_VLAN_EXTEND_OFFLOAD ? "on" : "off");
- s = format (s, "%Urx offload caps: %U\n",
- format_white_space, indent + 2,
- format_dpdk_rx_offload_caps, di.rx_offload_capa);
- s = format (s, "%Utx offload caps: %U\n",
- format_white_space, indent + 2,
- format_dpdk_tx_offload_caps, di.tx_offload_capa);
- s = format (s, "%Urss active: %U\n"
- "%Urss supported: %U\n",
- format_white_space, indent + 2,
- format_dpdk_rss_hf_name, rss_conf.rss_hf,
- format_white_space, indent + 2,
- format_dpdk_rss_hf_name, di.flow_type_rss_offloads);
- }
-
- s = format (s, "%Urx queues %d, rx desc %d, tx queues %d, tx desc %d\n",
- format_white_space, indent + 2,
- xd->rx_q_used, xd->nb_rx_desc, xd->tx_q_used, xd->nb_tx_desc);
-
- if (xd->cpu_socket > -1)
- s = format (s, "%Ucpu socket %d\n",
- format_white_space, indent + 2, xd->cpu_socket);
-
- /* $$$ MIB counters */
- {
-#define _(N, V) \
- if ((xd->stats.V - xd->last_cleared_stats.V) != 0) { \
- s = format (s, "\n%U%-40U%16Ld", \
- format_white_space, indent + 2, \
- format_c_identifier, #N, \
- xd->stats.V - xd->last_cleared_stats.V); \
- } \
-
- foreach_dpdk_counter
-#undef _
- }
-
- u8 *xs = 0;
- u32 i = 0;
- struct rte_eth_xstat *xstat, *last_xstat;
- struct rte_eth_xstat_name *xstat_names = 0;
- int len = rte_eth_xstats_get_names (xd->device_index, NULL, 0);
- vec_validate (xstat_names, len - 1);
- rte_eth_xstats_get_names (xd->device_index, xstat_names, len);
-
- ASSERT (vec_len (xd->xstats) == vec_len (xd->last_cleared_xstats));
-
- /* *INDENT-OFF* */
- vec_foreach_index(i, xd->xstats)
- {
- u64 delta = 0;
- xstat = vec_elt_at_index(xd->xstats, i);
- last_xstat = vec_elt_at_index(xd->last_cleared_xstats, i);
-
- delta = xstat->value - last_xstat->value;
- if (verbose == 2 || (verbose && delta))
- {
- /* format_c_identifier doesn't like c strings inside vector */
- u8 * name = format(0,"%s", xstat_names[i].name);
- xs = format(xs, "\n%U%-38U%16Ld",
- format_white_space, indent + 4,
- format_c_identifier, name, delta);
- vec_free(name);
- }
- }
- /* *INDENT-ON* */
-
- vec_free (xstat_names);
-
- if (xs)
- {
- s = format (s, "\n%Uextended stats:%v",
- format_white_space, indent + 2, xs);
- vec_free (xs);
- }
-
- return s;
-}
-
-u8 *
-format_dpdk_tx_dma_trace (u8 * s, va_list * va)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
- CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main ();
- dpdk_tx_dma_trace_t *t = va_arg (*va, dpdk_tx_dma_trace_t *);
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index);
- uword indent = format_get_indent (s);
- vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index);
-
- s = format (s, "%U tx queue %d",
- format_vnet_sw_interface_name, vnm, sw, t->queue_index);
-
- s = format (s, "\n%Ubuffer 0x%x: %U",
- format_white_space, indent,
- t->buffer_index, format_vlib_buffer, &t->buffer);
-
- s = format (s, "\n%U%U", format_white_space, indent,
- format_ethernet_header_with_length, t->buffer.pre_data,
- sizeof (t->buffer.pre_data));
-
- return s;
-}
-
-u8 *
-format_dpdk_rx_dma_trace (u8 * s, va_list * va)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*va, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*va, vlib_node_t *);
- CLIB_UNUSED (vnet_main_t * vnm) = vnet_get_main ();
- dpdk_rx_dma_trace_t *t = va_arg (*va, dpdk_rx_dma_trace_t *);
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd = vec_elt_at_index (dm->devices, t->device_index);
- format_function_t *f;
- uword indent = format_get_indent (s);
- vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->vlib_sw_if_index);
-
- s = format (s, "%U rx queue %d",
- format_vnet_sw_interface_name, vnm, sw, t->queue_index);
-
- s = format (s, "\n%Ubuffer 0x%x: %U",
- format_white_space, indent,
- t->buffer_index, format_vlib_buffer, &t->buffer);
-
- s = format (s, "\n%U%U",
- format_white_space, indent,
- format_dpdk_rte_mbuf, &t->mb, &t->data);
-
- if (vm->trace_main.verbose)
- {
- s = format (s, "\n%UPacket Dump%s", format_white_space, indent + 2,
- t->mb.data_len > sizeof (t->data) ? " (truncated)" : "");
- s = format (s, "\n%U%U", format_white_space, indent + 4,
- format_hexdump, &t->data,
- t->mb.data_len >
- sizeof (t->data) ? sizeof (t->data) : t->mb.data_len);
- }
- f = node->format_buffer;
- if (!f)
- f = format_hex_bytes;
- s = format (s, "\n%U%U", format_white_space, indent,
- f, t->buffer.pre_data, sizeof (t->buffer.pre_data));
-
- return s;
-}
-
-
-static inline u8 *
-format_dpdk_pkt_types (u8 * s, va_list * va)
-{
- u32 *pkt_types = va_arg (*va, u32 *);
- uword indent __attribute__ ((unused)) = format_get_indent (s) + 2;
-
- if (!*pkt_types)
- return s;
-
- s = format (s, "Packet Types");
-
-#define _(L, F, S) \
- if ((*pkt_types & RTE_PTYPE_##L##_MASK) == RTE_PTYPE_##L##_##F) \
- { \
- s = format (s, "\n%U%s (0x%04x) %s", format_white_space, indent, \
- "RTE_PTYPE_" #L "_" #F, RTE_PTYPE_##L##_##F, S); \
- }
-
- foreach_dpdk_pkt_type
-#undef _
- return s;
-}
-
-static inline u8 *
-format_dpdk_pkt_offload_flags (u8 * s, va_list * va)
-{
- u64 *ol_flags = va_arg (*va, u64 *);
- uword indent = format_get_indent (s) + 2;
-
- if (!*ol_flags)
- return s;
-
- s = format (s, "Packet Offload Flags");
-
-#define _(F, S) \
- if (*ol_flags & F) \
- { \
- s = format (s, "\n%U%s (0x%04x) %s", \
- format_white_space, indent, #F, F, S); \
- }
-
- foreach_dpdk_pkt_offload_flag
-#undef _
- return s;
-}
-
-u8 *
-format_dpdk_rte_mbuf_vlan (u8 * s, va_list * va)
-{
- ethernet_vlan_header_tv_t *vlan_hdr =
- va_arg (*va, ethernet_vlan_header_tv_t *);
-
- if (clib_net_to_host_u16 (vlan_hdr->type) == ETHERNET_TYPE_DOT1AD)
- {
- s = format (s, "%U 802.1q vlan ",
- format_ethernet_vlan_tci,
- clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id));
- vlan_hdr++;
- }
-
- s = format (s, "%U",
- format_ethernet_vlan_tci,
- clib_net_to_host_u16 (vlan_hdr->priority_cfi_and_id));
-
- return s;
-}
-
-u8 *
-format_dpdk_rte_mbuf (u8 * s, va_list * va)
-{
- struct rte_mbuf *mb = va_arg (*va, struct rte_mbuf *);
- ethernet_header_t *eth_hdr = va_arg (*va, ethernet_header_t *);
- uword indent = format_get_indent (s) + 2;
-
- s = format (s, "PKT MBUF: port %d, nb_segs %d, pkt_len %d"
- "\n%Ubuf_len %d, data_len %d, ol_flags 0x%x, data_off %d, phys_addr 0x%x"
- "\n%Upacket_type 0x%x",
- mb->port, mb->nb_segs, mb->pkt_len,
- format_white_space, indent,
- mb->buf_len, mb->data_len, mb->ol_flags, mb->data_off,
- mb->buf_physaddr, format_white_space, indent, mb->packet_type);
-
- if (mb->ol_flags)
- s = format (s, "\n%U%U", format_white_space, indent,
- format_dpdk_pkt_offload_flags, &mb->ol_flags);
-
- if ((mb->ol_flags & PKT_RX_VLAN_PKT) &&
- ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0))
- {
- ethernet_vlan_header_tv_t *vlan_hdr =
- ((ethernet_vlan_header_tv_t *) & (eth_hdr->type));
- s = format (s, " %U", format_dpdk_rte_mbuf_vlan, vlan_hdr);
- }
-
- if (mb->packet_type)
- s = format (s, "\n%U%U", format_white_space, indent,
- format_dpdk_pkt_types, &mb->packet_type);
-
- return s;
-}
-
-uword
-unformat_socket_mem (unformat_input_t * input, va_list * va)
-{
- uword **r = va_arg (*va, uword **);
- int i = 0;
- u32 mem;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, ","))
- hash_set (*r, i, 1024);
- else if (unformat (input, "%u,", &mem))
- hash_set (*r, i, mem);
- else if (unformat (input, "%u", &mem))
- hash_set (*r, i, mem);
- else
- {
- unformat_put_input (input);
- goto done;
- }
- i++;
- }
-
-done:
- return 1;
-}
-
-clib_error_t *
-unformat_rss_fn (unformat_input_t * input, uword * rss_fn)
-{
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (0)
- ;
-#undef _
-#define _(f, s) \
- else if (unformat (input, s)) \
- *rss_fn |= f;
-
- foreach_dpdk_rss_hf
-#undef _
- else
- {
- return clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- }
- }
- return 0;
-}
-
-clib_error_t *
-unformat_hqos (unformat_input_t * input, dpdk_device_config_hqos_t * hqos)
-{
- clib_error_t *error = 0;
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "hqos-thread %u", &hqos->hqos_thread))
- hqos->hqos_thread_valid = 1;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- break;
- }
- }
-
- return error;
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/dpdk/hqos.c b/vnet/vnet/devices/dpdk/hqos.c
deleted file mode 100644
index d68bc48f80b..00000000000
--- a/vnet/vnet/devices/dpdk/hqos.c
+++ /dev/null
@@ -1,775 +0,0 @@
-/*
- * Copyright(c) 2016 Intel Corporation. All rights reserved.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <sys/mount.h>
-#include <string.h>
-#include <fcntl.h>
-
-#include <vppinfra/vec.h>
-#include <vppinfra/error.h>
-#include <vppinfra/format.h>
-#include <vppinfra/bitmap.h>
-
-#include <vnet/vnet.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/dpdk/dpdk.h>
-
-#include <vlib/unix/physmem.h>
-#include <vlib/pci/pci.h>
-#include <vlibmemory/api.h>
-#include <vlibmemory/vl_memory_msg_enum.h> /* enumerate all vlib messages */
-
-#define vl_typedefs /* define message structures */
-#include <vlibmemory/vl_memory_api_h.h>
-#undef vl_typedefs
-
-/* instantiate all the print functions we know about */
-#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
-#define vl_printfun
-#include <vlibmemory/vl_memory_api_h.h>
-#undef vl_printfun
-
-#include "dpdk_priv.h"
-
-dpdk_main_t dpdk_main;
-
-/***
- *
- * HQoS default configuration values
- *
- ***/
-
-static dpdk_device_config_hqos_t hqos_params_default = {
- .hqos_thread_valid = 0,
-
- .swq_size = 4096,
- .burst_enq = 256,
- .burst_deq = 220,
-
- /*
- * Packet field to identify the subport.
- *
- * Default value: Since only one subport is defined by default (see below:
- * n_subports_per_port = 1), the subport ID is hardcoded to 0.
- */
- .pktfield0_slabpos = 0,
- .pktfield0_slabmask = 0,
-
- /*
- * Packet field to identify the pipe.
- *
- * Default value: Assuming Ethernet/IPv4/UDP packets, UDP payload bits 12 .. 23
- */
- .pktfield1_slabpos = 40,
- .pktfield1_slabmask = 0x0000000FFF000000LLU,
-
- /* Packet field used as index into TC translation table to identify the traffic
- * class and queue.
- *
- * Default value: Assuming Ethernet/IPv4 packets, IPv4 DSCP field
- */
- .pktfield2_slabpos = 8,
- .pktfield2_slabmask = 0x00000000000000FCLLU,
- .tc_table = {
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- },
-
- /* port */
- .port = {
- .name = NULL, /* Set at init */
- .socket = 0, /* Set at init */
- .rate = 1250000000, /* Assuming 10GbE port */
- .mtu = 14 + 1500, /* Assuming Ethernet/IPv4 pkt (Ethernet FCS not included) */
- .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT,
- .n_subports_per_port = 1,
- .n_pipes_per_subport = 4096,
- .qsize = {64, 64, 64, 64},
- .pipe_profiles = NULL, /* Set at config */
- .n_pipe_profiles = 1,
-
-#ifdef RTE_SCHED_RED
- .red_params = {
- /* Traffic Class 0 Colors Green / Yellow / Red */
- [0][0] = {.min_th = 48,.max_th = 64,.maxp_inv =
- 10,.wq_log2 = 9},
- [0][1] = {.min_th = 40,.max_th = 64,.maxp_inv =
- 10,.wq_log2 = 9},
- [0][2] = {.min_th = 32,.max_th = 64,.maxp_inv =
- 10,.wq_log2 = 9},
-
- /* Traffic Class 1 - Colors Green / Yellow / Red */
- [1][0] = {.min_th = 48,.max_th = 64,.maxp_inv =
- 10,.wq_log2 = 9},
- [1][1] = {.min_th = 40,.max_th = 64,.maxp_inv =
- 10,.wq_log2 = 9},
- [1][2] = {.min_th = 32,.max_th = 64,.maxp_inv =
- 10,.wq_log2 = 9},
-
- /* Traffic Class 2 - Colors Green / Yellow / Red */
- [2][0] = {.min_th = 48,.max_th = 64,.maxp_inv =
- 10,.wq_log2 = 9},
- [2][1] = {.min_th = 40,.max_th = 64,.maxp_inv =
- 10,.wq_log2 = 9},
- [2][2] = {.min_th = 32,.max_th = 64,.maxp_inv =
- 10,.wq_log2 = 9},
-
- /* Traffic Class 3 - Colors Green / Yellow / Red */
- [3][0] = {.min_th = 48,.max_th = 64,.maxp_inv =
- 10,.wq_log2 = 9},
- [3][1] = {.min_th = 40,.max_th = 64,.maxp_inv =
- 10,.wq_log2 = 9},
- [3][2] = {.min_th = 32,.max_th = 64,.maxp_inv =
- 10,.wq_log2 = 9}
- },
-#endif /* RTE_SCHED_RED */
- },
-};
-
-static struct rte_sched_subport_params hqos_subport_params_default = {
- .tb_rate = 1250000000, /* 10GbE line rate (measured in bytes/second) */
- .tb_size = 1000000,
- .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000},
- .tc_period = 10,
-};
-
-static struct rte_sched_pipe_params hqos_pipe_params_default = {
- .tb_rate = 305175, /* 10GbE line rate divided by 4K pipes */
- .tb_size = 1000000,
- .tc_rate = {305175, 305175, 305175, 305175},
- .tc_period = 40,
-#ifdef RTE_SCHED_SUBPORT_TC_OV
- .tc_ov_weight = 1,
-#endif
- .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
-};
-
-/***
- *
- * HQoS configuration
- *
- ***/
-
-int
-dpdk_hqos_validate_mask (u64 mask, u32 n)
-{
- int count = __builtin_popcountll (mask);
- int pos_lead = sizeof (u64) * 8 - __builtin_clzll (mask);
- int pos_trail = __builtin_ctzll (mask);
- int count_expected = __builtin_popcount (n - 1);
-
- /* Handle the exceptions */
- if (n == 0)
- return -1; /* Error */
-
- if ((mask == 0) && (n == 1))
- return 0; /* OK */
-
- if (((mask == 0) && (n != 1)) || ((mask != 0) && (n == 1)))
- return -2; /* Error */
-
- /* Check that mask is contiguous */
- if ((pos_lead - pos_trail) != count)
- return -3; /* Error */
-
- /* Check that mask contains the expected number of bits set */
- if (count != count_expected)
- return -4; /* Error */
-
- return 0; /* OK */
-}
-
-void
-dpdk_device_config_hqos_pipe_profile_default (dpdk_device_config_hqos_t *
- hqos, u32 pipe_profile_id)
-{
- memcpy (&hqos->pipe[pipe_profile_id], &hqos_pipe_params_default,
- sizeof (hqos_pipe_params_default));
-}
-
-void
-dpdk_device_config_hqos_default (dpdk_device_config_hqos_t * hqos)
-{
- struct rte_sched_subport_params *subport_params;
- struct rte_sched_pipe_params *pipe_params;
- u32 *pipe_map;
- u32 i;
-
- memcpy (hqos, &hqos_params_default, sizeof (hqos_params_default));
-
- /* pipe */
- vec_add2 (hqos->pipe, pipe_params, hqos->port.n_pipe_profiles);
-
- for (i = 0; i < vec_len (hqos->pipe); i++)
- memcpy (&pipe_params[i],
- &hqos_pipe_params_default, sizeof (hqos_pipe_params_default));
-
- hqos->port.pipe_profiles = hqos->pipe;
-
- /* subport */
- vec_add2 (hqos->subport, subport_params, hqos->port.n_subports_per_port);
-
- for (i = 0; i < vec_len (hqos->subport); i++)
- memcpy (&subport_params[i],
- &hqos_subport_params_default,
- sizeof (hqos_subport_params_default));
-
- /* pipe profile */
- vec_add2 (hqos->pipe_map,
- pipe_map,
- hqos->port.n_subports_per_port * hqos->port.n_pipes_per_subport);
-
- for (i = 0; i < vec_len (hqos->pipe_map); i++)
- pipe_map[i] = 0;
-}
-
-/***
- *
- * HQoS init
- *
- ***/
-
-clib_error_t *
-dpdk_port_setup_hqos (dpdk_device_t * xd, dpdk_device_config_hqos_t * hqos)
-{
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- char name[32];
- u32 subport_id, i;
- int rv;
-
- /* Detect the set of worker threads */
- int worker_thread_first = 0;
- int worker_thread_count = 0;
-
- uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers");
- vlib_thread_registration_t *tr =
- p ? (vlib_thread_registration_t *) p[0] : 0;
-
- if (tr && tr->count > 0)
- {
- worker_thread_first = tr->first_index;
- worker_thread_count = tr->count;
- }
-
- /* Allocate the per-thread device data array */
- vec_validate_aligned (xd->hqos_wt, tm->n_vlib_mains - 1,
- CLIB_CACHE_LINE_BYTES);
- memset (xd->hqos_wt, 0, tm->n_vlib_mains * sizeof (xd->hqos_wt[0]));
-
- vec_validate_aligned (xd->hqos_ht, 0, CLIB_CACHE_LINE_BYTES);
- memset (xd->hqos_ht, 0, sizeof (xd->hqos_ht[0]));
-
- /* Allocate space for one SWQ per worker thread in the I/O TX thread data structure */
- vec_validate (xd->hqos_ht->swq, worker_thread_count);
-
- /* SWQ */
- for (i = 0; i < worker_thread_count + 1; i++)
- {
- u32 swq_flags = RING_F_SP_ENQ | RING_F_SC_DEQ;
-
- snprintf (name, sizeof (name), "SWQ-worker%u-to-device%u", i,
- xd->device_index);
- xd->hqos_ht->swq[i] =
- rte_ring_create (name, hqos->swq_size, xd->cpu_socket, swq_flags);
- if (xd->hqos_ht->swq[i] == NULL)
- return clib_error_return (0,
- "SWQ-worker%u-to-device%u: rte_ring_create err",
- i, xd->device_index);
- }
-
- /*
- * HQoS
- */
-
- /* HQoS port */
- snprintf (name, sizeof (name), "HQoS%u", xd->device_index);
- hqos->port.name = strdup (name);
- if (hqos->port.name == NULL)
- return clib_error_return (0, "HQoS%u: strdup err", xd->device_index);
-
- hqos->port.socket = rte_eth_dev_socket_id (xd->device_index);
- if (hqos->port.socket == SOCKET_ID_ANY)
- hqos->port.socket = 0;
-
- xd->hqos_ht->hqos = rte_sched_port_config (&hqos->port);
- if (xd->hqos_ht->hqos == NULL)
- return clib_error_return (0, "HQoS%u: rte_sched_port_config err",
- xd->device_index);
-
- /* HQoS subport */
- for (subport_id = 0; subport_id < hqos->port.n_subports_per_port;
- subport_id++)
- {
- u32 pipe_id;
-
- rv =
- rte_sched_subport_config (xd->hqos_ht->hqos, subport_id,
- &hqos->subport[subport_id]);
- if (rv)
- return clib_error_return (0,
- "HQoS%u subport %u: rte_sched_subport_config err (%d)",
- xd->device_index, subport_id, rv);
-
- /* HQoS pipe */
- for (pipe_id = 0; pipe_id < hqos->port.n_pipes_per_subport; pipe_id++)
- {
- u32 pos = subport_id * hqos->port.n_pipes_per_subport + pipe_id;
- u32 profile_id = hqos->pipe_map[pos];
-
- rv =
- rte_sched_pipe_config (xd->hqos_ht->hqos, subport_id, pipe_id,
- profile_id);
- if (rv)
- return clib_error_return (0,
- "HQoS%u subport %u pipe %u: rte_sched_pipe_config err (%d)",
- xd->device_index, subport_id, pipe_id,
- rv);
- }
- }
-
- /* Set up per-thread device data for the I/O TX thread */
- xd->hqos_ht->hqos_burst_enq = hqos->burst_enq;
- xd->hqos_ht->hqos_burst_deq = hqos->burst_deq;
- vec_validate (xd->hqos_ht->pkts_enq, 2 * hqos->burst_enq - 1);
- vec_validate (xd->hqos_ht->pkts_deq, hqos->burst_deq - 1);
- xd->hqos_ht->pkts_enq_len = 0;
- xd->hqos_ht->swq_pos = 0;
- xd->hqos_ht->flush_count = 0;
-
- /* Set up per-thread device data for each worker thread */
- for (i = 0; i < worker_thread_count + 1; i++)
- {
- u32 tid;
- if (i)
- tid = worker_thread_first + (i - 1);
- else
- tid = i;
-
- xd->hqos_wt[tid].swq = xd->hqos_ht->swq[i];
- xd->hqos_wt[tid].hqos_field0_slabpos = hqos->pktfield0_slabpos;
- xd->hqos_wt[tid].hqos_field0_slabmask = hqos->pktfield0_slabmask;
- xd->hqos_wt[tid].hqos_field0_slabshr =
- __builtin_ctzll (hqos->pktfield0_slabmask);
- xd->hqos_wt[tid].hqos_field1_slabpos = hqos->pktfield1_slabpos;
- xd->hqos_wt[tid].hqos_field1_slabmask = hqos->pktfield1_slabmask;
- xd->hqos_wt[tid].hqos_field1_slabshr =
- __builtin_ctzll (hqos->pktfield1_slabmask);
- xd->hqos_wt[tid].hqos_field2_slabpos = hqos->pktfield2_slabpos;
- xd->hqos_wt[tid].hqos_field2_slabmask = hqos->pktfield2_slabmask;
- xd->hqos_wt[tid].hqos_field2_slabshr =
- __builtin_ctzll (hqos->pktfield2_slabmask);
- memcpy (xd->hqos_wt[tid].hqos_tc_table, hqos->tc_table,
- sizeof (hqos->tc_table));
- }
-
- return 0;
-}
-
-/***
- *
- * HQoS run-time
- *
- ***/
-/*
- * dpdk_hqos_thread - Contains the main loop of an HQoS thread.
- *
- * w
- * Information for the current thread
- */
-static_always_inline void
-dpdk_hqos_thread_internal_hqos_dbg_bypass (vlib_main_t * vm)
-{
- dpdk_main_t *dm = &dpdk_main;
- u32 cpu_index = vm->cpu_index;
- u32 dev_pos;
-
- dev_pos = 0;
- while (1)
- {
- vlib_worker_thread_barrier_check ();
-
- u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]);
- if (dev_pos >= n_devs)
- dev_pos = 0;
-
- dpdk_device_and_queue_t *dq =
- vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos);
- dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device);
-
- dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht;
- u32 device_index = xd->device_index;
- u16 queue_id = dq->queue_id;
-
- struct rte_mbuf **pkts_enq = hqos->pkts_enq;
- u32 pkts_enq_len = hqos->pkts_enq_len;
- u32 swq_pos = hqos->swq_pos;
- u32 n_swq = vec_len (hqos->swq), i;
- u32 flush_count = hqos->flush_count;
-
- for (i = 0; i < n_swq; i++)
- {
- /* Get current SWQ for this device */
- struct rte_ring *swq = hqos->swq[swq_pos];
-
- /* Read SWQ burst to packet buffer of this device */
- pkts_enq_len += rte_ring_sc_dequeue_burst (swq,
- (void **)
- &pkts_enq[pkts_enq_len],
- hqos->hqos_burst_enq);
-
- /* Get next SWQ for this device */
- swq_pos++;
- if (swq_pos >= n_swq)
- swq_pos = 0;
- hqos->swq_pos = swq_pos;
-
- /* HWQ TX enqueue when burst available */
- if (pkts_enq_len >= hqos->hqos_burst_enq)
- {
- u32 n_pkts = rte_eth_tx_burst (device_index,
- (uint16_t) queue_id,
- pkts_enq,
- (uint16_t) pkts_enq_len);
-
- for (; n_pkts < pkts_enq_len; n_pkts++)
- rte_pktmbuf_free (pkts_enq[n_pkts]);
-
- pkts_enq_len = 0;
- flush_count = 0;
- break;
- }
- }
- if (pkts_enq_len)
- {
- flush_count++;
- if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD))
- {
- rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len);
-
- pkts_enq_len = 0;
- flush_count = 0;
- }
- }
- hqos->pkts_enq_len = pkts_enq_len;
- hqos->flush_count = flush_count;
-
- /* Advance to next device */
- dev_pos++;
- }
-}
-
-static_always_inline void
-dpdk_hqos_thread_internal (vlib_main_t * vm)
-{
- dpdk_main_t *dm = &dpdk_main;
- u32 cpu_index = vm->cpu_index;
- u32 dev_pos;
-
- dev_pos = 0;
- while (1)
- {
- vlib_worker_thread_barrier_check ();
-
- u32 n_devs = vec_len (dm->devices_by_hqos_cpu[cpu_index]);
- if (PREDICT_FALSE (n_devs == 0))
- {
- dev_pos = 0;
- continue;
- }
- if (dev_pos >= n_devs)
- dev_pos = 0;
-
- dpdk_device_and_queue_t *dq =
- vec_elt_at_index (dm->devices_by_hqos_cpu[cpu_index], dev_pos);
- dpdk_device_t *xd = vec_elt_at_index (dm->devices, dq->device);
-
- dpdk_device_hqos_per_hqos_thread_t *hqos = xd->hqos_ht;
- u32 device_index = xd->device_index;
- u16 queue_id = dq->queue_id;
-
- struct rte_mbuf **pkts_enq = hqos->pkts_enq;
- struct rte_mbuf **pkts_deq = hqos->pkts_deq;
- u32 pkts_enq_len = hqos->pkts_enq_len;
- u32 swq_pos = hqos->swq_pos;
- u32 n_swq = vec_len (hqos->swq), i;
- u32 flush_count = hqos->flush_count;
-
- /*
- * SWQ dequeue and HQoS enqueue for current device
- */
- for (i = 0; i < n_swq; i++)
- {
- /* Get current SWQ for this device */
- struct rte_ring *swq = hqos->swq[swq_pos];
-
- /* Read SWQ burst to packet buffer of this device */
- pkts_enq_len += rte_ring_sc_dequeue_burst (swq,
- (void **)
- &pkts_enq[pkts_enq_len],
- hqos->hqos_burst_enq);
-
- /* Get next SWQ for this device */
- swq_pos++;
- if (swq_pos >= n_swq)
- swq_pos = 0;
- hqos->swq_pos = swq_pos;
-
- /* HQoS enqueue when burst available */
- if (pkts_enq_len >= hqos->hqos_burst_enq)
- {
- rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len);
-
- pkts_enq_len = 0;
- flush_count = 0;
- break;
- }
- }
- if (pkts_enq_len)
- {
- flush_count++;
- if (PREDICT_FALSE (flush_count == HQOS_FLUSH_COUNT_THRESHOLD))
- {
- rte_sched_port_enqueue (hqos->hqos, pkts_enq, pkts_enq_len);
-
- pkts_enq_len = 0;
- flush_count = 0;
- }
- }
- hqos->pkts_enq_len = pkts_enq_len;
- hqos->flush_count = flush_count;
-
- /*
- * HQoS dequeue and HWQ TX enqueue for current device
- */
- {
- u32 pkts_deq_len, n_pkts;
-
- pkts_deq_len = rte_sched_port_dequeue (hqos->hqos,
- pkts_deq,
- hqos->hqos_burst_deq);
-
- for (n_pkts = 0; n_pkts < pkts_deq_len;)
- n_pkts += rte_eth_tx_burst (device_index,
- (uint16_t) queue_id,
- &pkts_deq[n_pkts],
- (uint16_t) (pkts_deq_len - n_pkts));
- }
-
- /* Advance to next device */
- dev_pos++;
- }
-}
-
-void
-dpdk_hqos_thread (vlib_worker_thread_t * w)
-{
- vlib_main_t *vm;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- dpdk_main_t *dm = &dpdk_main;
-
- vm = vlib_get_main ();
-
- ASSERT (vm->cpu_index == os_get_cpu_number ());
-
- clib_time_init (&vm->clib_time);
- clib_mem_set_heap (w->thread_mheap);
-
- /* Wait until the dpdk init sequence is complete */
- while (tm->worker_thread_release == 0)
- vlib_worker_thread_barrier_check ();
-
- if (vec_len (dm->devices_by_hqos_cpu[vm->cpu_index]) == 0)
- return
- clib_error
- ("current I/O TX thread does not have any devices assigned to it");
-
- if (DPDK_HQOS_DBG_BYPASS)
- dpdk_hqos_thread_internal_hqos_dbg_bypass (vm);
- else
- dpdk_hqos_thread_internal (vm);
-}
-
-void
-dpdk_hqos_thread_fn (void *arg)
-{
- vlib_worker_thread_t *w = (vlib_worker_thread_t *) arg;
- vlib_worker_thread_init (w);
- dpdk_hqos_thread (w);
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_THREAD (hqos_thread_reg, static) =
-{
- .name = "hqos-threads",
- .short_name = "hqos-threads",
- .function = dpdk_hqos_thread_fn,
-};
-/* *INDENT-ON* */
-
-/*
- * HQoS run-time code to be called by the worker threads
- */
-#define BITFIELD(byte_array, slab_pos, slab_mask, slab_shr) \
-({ \
- u64 slab = *((u64 *) &byte_array[slab_pos]); \
- u64 val = (rte_be_to_cpu_64(slab) & slab_mask) >> slab_shr; \
- val; \
-})
-
-#define RTE_SCHED_PORT_HIERARCHY(subport, pipe, traffic_class, queue, color) \
- ((((u64) (queue)) & 0x3) | \
- ((((u64) (traffic_class)) & 0x3) << 2) | \
- ((((u64) (color)) & 0x3) << 4) | \
- ((((u64) (subport)) & 0xFFFF) << 16) | \
- ((((u64) (pipe)) & 0xFFFFFFFF) << 32))
-
-void
-dpdk_hqos_metadata_set (dpdk_device_hqos_per_worker_thread_t * hqos,
- struct rte_mbuf **pkts, u32 n_pkts)
-{
- u32 i;
-
- for (i = 0; i < (n_pkts & (~0x3)); i += 4)
- {
- struct rte_mbuf *pkt0 = pkts[i];
- struct rte_mbuf *pkt1 = pkts[i + 1];
- struct rte_mbuf *pkt2 = pkts[i + 2];
- struct rte_mbuf *pkt3 = pkts[i + 3];
-
- u8 *pkt0_data = rte_pktmbuf_mtod (pkt0, u8 *);
- u8 *pkt1_data = rte_pktmbuf_mtod (pkt1, u8 *);
- u8 *pkt2_data = rte_pktmbuf_mtod (pkt2, u8 *);
- u8 *pkt3_data = rte_pktmbuf_mtod (pkt3, u8 *);
-
- u64 pkt0_subport = BITFIELD (pkt0_data, hqos->hqos_field0_slabpos,
- hqos->hqos_field0_slabmask,
- hqos->hqos_field0_slabshr);
- u64 pkt0_pipe = BITFIELD (pkt0_data, hqos->hqos_field1_slabpos,
- hqos->hqos_field1_slabmask,
- hqos->hqos_field1_slabshr);
- u64 pkt0_dscp = BITFIELD (pkt0_data, hqos->hqos_field2_slabpos,
- hqos->hqos_field2_slabmask,
- hqos->hqos_field2_slabshr);
- u32 pkt0_tc = hqos->hqos_tc_table[pkt0_dscp & 0x3F] >> 2;
- u32 pkt0_tc_q = hqos->hqos_tc_table[pkt0_dscp & 0x3F] & 0x3;
-
- u64 pkt1_subport = BITFIELD (pkt1_data, hqos->hqos_field0_slabpos,
- hqos->hqos_field0_slabmask,
- hqos->hqos_field0_slabshr);
- u64 pkt1_pipe = BITFIELD (pkt1_data, hqos->hqos_field1_slabpos,
- hqos->hqos_field1_slabmask,
- hqos->hqos_field1_slabshr);
- u64 pkt1_dscp = BITFIELD (pkt1_data, hqos->hqos_field2_slabpos,
- hqos->hqos_field2_slabmask,
- hqos->hqos_field2_slabshr);
- u32 pkt1_tc = hqos->hqos_tc_table[pkt1_dscp & 0x3F] >> 2;
- u32 pkt1_tc_q = hqos->hqos_tc_table[pkt1_dscp & 0x3F] & 0x3;
-
- u64 pkt2_subport = BITFIELD (pkt2_data, hqos->hqos_field0_slabpos,
- hqos->hqos_field0_slabmask,
- hqos->hqos_field0_slabshr);
- u64 pkt2_pipe = BITFIELD (pkt2_data, hqos->hqos_field1_slabpos,
- hqos->hqos_field1_slabmask,
- hqos->hqos_field1_slabshr);
- u64 pkt2_dscp = BITFIELD (pkt2_data, hqos->hqos_field2_slabpos,
- hqos->hqos_field2_slabmask,
- hqos->hqos_field2_slabshr);
- u32 pkt2_tc = hqos->hqos_tc_table[pkt2_dscp & 0x3F] >> 2;
- u32 pkt2_tc_q = hqos->hqos_tc_table[pkt2_dscp & 0x3F] & 0x3;
-
- u64 pkt3_subport = BITFIELD (pkt3_data, hqos->hqos_field0_slabpos,
- hqos->hqos_field0_slabmask,
- hqos->hqos_field0_slabshr);
- u64 pkt3_pipe = BITFIELD (pkt3_data, hqos->hqos_field1_slabpos,
- hqos->hqos_field1_slabmask,
- hqos->hqos_field1_slabshr);
- u64 pkt3_dscp = BITFIELD (pkt3_data, hqos->hqos_field2_slabpos,
- hqos->hqos_field2_slabmask,
- hqos->hqos_field2_slabshr);
- u32 pkt3_tc = hqos->hqos_tc_table[pkt3_dscp & 0x3F] >> 2;
- u32 pkt3_tc_q = hqos->hqos_tc_table[pkt3_dscp & 0x3F] & 0x3;
-
- u64 pkt0_sched = RTE_SCHED_PORT_HIERARCHY (pkt0_subport,
- pkt0_pipe,
- pkt0_tc,
- pkt0_tc_q,
- 0);
- u64 pkt1_sched = RTE_SCHED_PORT_HIERARCHY (pkt1_subport,
- pkt1_pipe,
- pkt1_tc,
- pkt1_tc_q,
- 0);
- u64 pkt2_sched = RTE_SCHED_PORT_HIERARCHY (pkt2_subport,
- pkt2_pipe,
- pkt2_tc,
- pkt2_tc_q,
- 0);
- u64 pkt3_sched = RTE_SCHED_PORT_HIERARCHY (pkt3_subport,
- pkt3_pipe,
- pkt3_tc,
- pkt3_tc_q,
- 0);
-
- pkt0->hash.sched.lo = pkt0_sched & 0xFFFFFFFF;
- pkt0->hash.sched.hi = pkt0_sched >> 32;
- pkt1->hash.sched.lo = pkt1_sched & 0xFFFFFFFF;
- pkt1->hash.sched.hi = pkt1_sched >> 32;
- pkt2->hash.sched.lo = pkt2_sched & 0xFFFFFFFF;
- pkt2->hash.sched.hi = pkt2_sched >> 32;
- pkt3->hash.sched.lo = pkt3_sched & 0xFFFFFFFF;
- pkt3->hash.sched.hi = pkt3_sched >> 32;
- }
-
- for (; i < n_pkts; i++)
- {
- struct rte_mbuf *pkt = pkts[i];
-
- u8 *pkt_data = rte_pktmbuf_mtod (pkt, u8 *);
-
- u64 pkt_subport = BITFIELD (pkt_data, hqos->hqos_field0_slabpos,
- hqos->hqos_field0_slabmask,
- hqos->hqos_field0_slabshr);
- u64 pkt_pipe = BITFIELD (pkt_data, hqos->hqos_field1_slabpos,
- hqos->hqos_field1_slabmask,
- hqos->hqos_field1_slabshr);
- u64 pkt_dscp = BITFIELD (pkt_data, hqos->hqos_field2_slabpos,
- hqos->hqos_field2_slabmask,
- hqos->hqos_field2_slabshr);
- u32 pkt_tc = hqos->hqos_tc_table[pkt_dscp & 0x3F] >> 2;
- u32 pkt_tc_q = hqos->hqos_tc_table[pkt_dscp & 0x3F] & 0x3;
-
- u64 pkt_sched = RTE_SCHED_PORT_HIERARCHY (pkt_subport,
- pkt_pipe,
- pkt_tc,
- pkt_tc_q,
- 0);
-
- pkt->hash.sched.lo = pkt_sched & 0xFFFFFFFF;
- pkt->hash.sched.hi = pkt_sched >> 32;
- }
-}
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c
deleted file mode 100755
index 693ca985130..00000000000
--- a/vnet/vnet/devices/dpdk/init.c
+++ /dev/null
@@ -1,1803 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vnet/vnet.h>
-#include <vppinfra/vec.h>
-#include <vppinfra/error.h>
-#include <vppinfra/format.h>
-#include <vppinfra/bitmap.h>
-
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/dpdk/dpdk.h>
-#include <vlib/unix/physmem.h>
-#include <vlib/pci/pci.h>
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/stat.h>
-#include <sys/mount.h>
-#include <string.h>
-#include <fcntl.h>
-
-#include "dpdk_priv.h"
-
-dpdk_main_t dpdk_main;
-
-/* force linker to link functions used by vlib and declared weak */
-void *vlib_weakly_linked_functions[] = {
- &rte_pktmbuf_init,
- &rte_pktmbuf_pool_init,
-};
-
-#define LINK_STATE_ELOGS 0
-
-#define DEFAULT_HUGE_DIR "/run/vpp/hugepages"
-#define VPP_RUN_DIR "/run/vpp"
-
-/* Port configuration, mildly modified Intel app values */
-
-static struct rte_eth_conf port_conf_template = {
- .rxmode = {
- .split_hdr_size = 0,
- .header_split = 0, /**< Header Split disabled */
- .hw_ip_checksum = 0, /**< IP checksum offload disabled */
- .hw_vlan_filter = 0, /**< VLAN filtering disabled */
- .hw_strip_crc = 0, /**< CRC stripped by hardware */
- },
- .txmode = {
- .mq_mode = ETH_MQ_TX_NONE,
- },
-};
-
-clib_error_t *
-dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd)
-{
- vlib_main_t *vm = vlib_get_main ();
- vlib_buffer_main_t *bm = vm->buffer_main;
- int rv;
- int j;
-
- ASSERT (os_get_cpu_number () == 0);
-
- if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
- {
- vnet_hw_interface_set_flags (dm->vnet_main, xd->vlib_hw_if_index, 0);
- rte_eth_dev_stop (xd->device_index);
- }
-
- rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used,
- xd->tx_q_used, &xd->port_conf);
-
- if (rv < 0)
- return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d",
- xd->device_index, rv);
-
- /* Set up one TX-queue per worker thread */
- for (j = 0; j < xd->tx_q_used; j++)
- {
- rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc,
- xd->cpu_socket, &xd->tx_conf);
-
- /* retry with any other CPU socket */
- if (rv < 0)
- rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc,
- SOCKET_ID_ANY, &xd->tx_conf);
- if (rv < 0)
- break;
- }
-
- if (rv < 0)
- return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d",
- xd->device_index, rv);
-
- for (j = 0; j < xd->rx_q_used; j++)
- {
-
- rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc,
- xd->cpu_socket, 0,
- bm->
- pktmbuf_pools[xd->cpu_socket_id_by_queue
- [j]]);
-
- /* retry with any other CPU socket */
- if (rv < 0)
- rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc,
- SOCKET_ID_ANY, 0,
- bm->
- pktmbuf_pools[xd->cpu_socket_id_by_queue
- [j]]);
- if (rv < 0)
- return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d",
- xd->device_index, rv);
- }
-
- if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
- {
- int rv;
- rv = rte_eth_dev_start (xd->device_index);
- if (rv < 0)
- clib_warning ("rte_eth_dev_start %d returned %d",
- xd->device_index, rv);
- }
- return 0;
-}
-
-static u32
-dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags)
-{
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance);
- u32 old = 0;
-
- if (ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC (flags))
- {
- old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0;
-
- if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL)
- xd->flags |= DPDK_DEVICE_FLAG_PROMISC;
- else
- xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC;
-
- if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
- {
- if (xd->flags & DPDK_DEVICE_FLAG_PROMISC)
- rte_eth_promiscuous_enable (xd->device_index);
- else
- rte_eth_promiscuous_disable (xd->device_index);
- }
- }
- else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags))
- {
- /*
- * DAW-FIXME: The Cisco VIC firmware does not provide an api for a
- * driver to dynamically change the mtu. If/when the
- * VIC firmware gets fixed, then this should be removed.
- */
- if (xd->pmd == VNET_DPDK_PMD_ENIC)
- {
- struct rte_eth_dev_info dev_info;
-
- /*
- * Restore mtu to what has been set by CIMC in the firmware cfg.
- */
- rte_eth_dev_info_get (xd->device_index, &dev_info);
- hi->max_packet_bytes = dev_info.max_rx_pktlen;
-
- vlib_cli_output (vlib_get_main (),
- "Cisco VIC mtu can only be changed "
- "using CIMC then rebooting the server!");
- }
- else
- {
- int rv;
-
- xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes;
-
- if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
- rte_eth_dev_stop (xd->device_index);
-
- rv = rte_eth_dev_configure
- (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf);
-
- if (rv < 0)
- vlib_cli_output (vlib_get_main (),
- "rte_eth_dev_configure[%d]: err %d",
- xd->device_index, rv);
-
- rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes);
-
- if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
- {
- int rv = rte_eth_dev_start (xd->device_index);
- if (rv < 0)
- clib_warning ("rte_eth_dev_start %d returned %d",
- xd->device_index, rv);
- }
- }
- }
- return old;
-}
-
-void
-dpdk_device_lock_init (dpdk_device_t * xd)
-{
- int q;
- vec_validate (xd->lockp, xd->tx_q_used - 1);
- for (q = 0; q < xd->tx_q_used; q++)
- {
- xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
- CLIB_CACHE_LINE_BYTES);
- memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES);
- }
-}
-
-void
-dpdk_device_lock_free (dpdk_device_t * xd)
-{
- int q;
-
- for (q = 0; q < vec_len (xd->lockp); q++)
- clib_mem_free ((void *) xd->lockp[q]);
- vec_free (xd->lockp);
- xd->lockp = 0;
-}
-
-static clib_error_t *
-dpdk_lib_init (dpdk_main_t * dm)
-{
- u32 nports;
- u32 nb_desc = 0;
- int i;
- clib_error_t *error;
- vlib_main_t *vm = vlib_get_main ();
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- vnet_sw_interface_t *sw;
- vnet_hw_interface_t *hi;
- dpdk_device_t *xd;
- vlib_pci_addr_t last_pci_addr;
- u32 last_pci_addr_port = 0;
- vlib_thread_registration_t *tr, *tr_hqos;
- uword *p, *p_hqos;
-
- u32 next_cpu = 0, next_hqos_cpu = 0;
- u8 af_packet_port_id = 0;
- last_pci_addr.as_u32 = ~0;
-
- dm->input_cpu_first_index = 0;
- dm->input_cpu_count = 1;
-
- /* find out which cpus will be used for input */
- p = hash_get_mem (tm->thread_registrations_by_name, "workers");
- tr = p ? (vlib_thread_registration_t *) p[0] : 0;
-
- if (tr && tr->count > 0)
- {
- dm->input_cpu_first_index = tr->first_index;
- dm->input_cpu_count = tr->count;
- }
-
- vec_validate_aligned (dm->devices_by_cpu, tm->n_vlib_mains - 1,
- CLIB_CACHE_LINE_BYTES);
-
- vec_validate_aligned (dm->workers, tm->n_vlib_mains - 1,
- CLIB_CACHE_LINE_BYTES);
-
- dm->hqos_cpu_first_index = 0;
- dm->hqos_cpu_count = 0;
-
- /* find out which cpus will be used for I/O TX */
- p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads");
- tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0;
-
- if (tr_hqos && tr_hqos->count > 0)
- {
- dm->hqos_cpu_first_index = tr_hqos->first_index;
- dm->hqos_cpu_count = tr_hqos->count;
- }
-
- vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1,
- CLIB_CACHE_LINE_BYTES);
-
- vec_validate_aligned (dm->hqos_threads, tm->n_vlib_mains - 1,
- CLIB_CACHE_LINE_BYTES);
-
- nports = rte_eth_dev_count ();
- if (nports < 1)
- {
- clib_warning ("DPDK drivers found no ports...");
- }
-
- if (CLIB_DEBUG > 0)
- clib_warning ("DPDK drivers found %d ports...", nports);
-
- /*
- * All buffers are all allocated from the same rte_mempool.
- * Thus they all have the same number of data bytes.
- */
- dm->vlib_buffer_free_list_index =
- vlib_buffer_get_or_create_free_list (vm,
- VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES,
- "dpdk rx");
-
- if (dm->conf->enable_tcp_udp_checksum)
- dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT
- | IP_BUFFER_L4_CHECKSUM_COMPUTED);
-
- for (i = 0; i < nports; i++)
- {
- u8 addr[6];
- u8 vlan_strip = 0;
- int j;
- struct rte_eth_dev_info dev_info;
- clib_error_t *rv;
- struct rte_eth_link l;
- dpdk_device_config_t *devconf = 0;
- vlib_pci_addr_t pci_addr;
- uword *p = 0;
-
- rte_eth_dev_info_get (i, &dev_info);
- if (dev_info.pci_dev) /* bonded interface has no pci info */
- {
- pci_addr.domain = dev_info.pci_dev->addr.domain;
- pci_addr.bus = dev_info.pci_dev->addr.bus;
- pci_addr.slot = dev_info.pci_dev->addr.devid;
- pci_addr.function = dev_info.pci_dev->addr.function;
- p =
- hash_get (dm->conf->device_config_index_by_pci_addr,
- pci_addr.as_u32);
- }
-
- if (p)
- devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]);
- else
- devconf = &dm->conf->default_devconf;
-
- /* Create vnet interface */
- vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES);
- xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT;
- xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT;
- xd->cpu_socket = (i8) rte_eth_dev_socket_id (i);
-
- /* Handle interface naming for devices with multiple ports sharing same PCI ID */
- if (dev_info.pci_dev)
- {
- struct rte_eth_dev_info di = { 0 };
- rte_eth_dev_info_get (i + 1, &di);
- if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 &&
- memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr,
- sizeof (struct rte_pci_addr)) == 0)
- {
- xd->interface_name_suffix = format (0, "0");
- last_pci_addr.as_u32 = pci_addr.as_u32;
- last_pci_addr_port = i;
- }
- else if (pci_addr.as_u32 == last_pci_addr.as_u32)
- {
- xd->interface_name_suffix =
- format (0, "%u", i - last_pci_addr_port);
- }
- else
- {
- last_pci_addr.as_u32 = ~0;
- }
- }
- else
- last_pci_addr.as_u32 = ~0;
-
- clib_memcpy (&xd->tx_conf, &dev_info.default_txconf,
- sizeof (struct rte_eth_txconf));
- if (dm->conf->no_multi_seg)
- {
- xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
- port_conf_template.rxmode.jumbo_frame = 0;
- }
- else
- {
- xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS;
- port_conf_template.rxmode.jumbo_frame = 1;
- xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG;
- }
-
- clib_memcpy (&xd->port_conf, &port_conf_template,
- sizeof (struct rte_eth_conf));
-
- xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains);
-
- if (devconf->num_tx_queues > 0
- && devconf->num_tx_queues < xd->tx_q_used)
- xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues);
-
- if (devconf->num_rx_queues > 1 && dm->use_rss == 0)
- {
- dm->use_rss = 1;
- }
-
- if (devconf->num_rx_queues > 1
- && dev_info.max_rx_queues >= devconf->num_rx_queues)
- {
- xd->rx_q_used = devconf->num_rx_queues;
- xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
- if (devconf->rss_fn == 0)
- xd->port_conf.rx_adv_conf.rss_conf.rss_hf =
- ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;
- else
- xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn;
- }
- else
- xd->rx_q_used = 1;
-
- xd->flags |= DPDK_DEVICE_FLAG_PMD;
-
- /* workaround for drivers not setting driver_name */
- if ((!dev_info.driver_name) && (dev_info.pci_dev))
-#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
- dev_info.driver_name = dev_info.pci_dev->driver->name;
-#else
- dev_info.driver_name = dev_info.pci_dev->driver->driver.name;
-#endif
- ASSERT (dev_info.driver_name);
-
- if (!xd->pmd)
- {
-
-
-#define _(s,f) else if (dev_info.driver_name && \
- !strcmp(dev_info.driver_name, s)) \
- xd->pmd = VNET_DPDK_PMD_##f;
- if (0)
- ;
- foreach_dpdk_pmd
-#undef _
- else
- xd->pmd = VNET_DPDK_PMD_UNKNOWN;
-
- xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
- xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT;
- xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT;
-
- switch (xd->pmd)
- {
- /* 1G adapters */
- case VNET_DPDK_PMD_E1000EM:
- case VNET_DPDK_PMD_IGB:
- case VNET_DPDK_PMD_IGBVF:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G;
- break;
-
- /* 10G adapters */
- case VNET_DPDK_PMD_IXGBE:
- case VNET_DPDK_PMD_IXGBEVF:
- case VNET_DPDK_PMD_THUNDERX:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G;
- break;
- case VNET_DPDK_PMD_DPAA2:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G;
- break;
-
- /* Cisco VIC */
- case VNET_DPDK_PMD_ENIC:
- rte_eth_link_get_nowait (i, &l);
- xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE;
- if (l.link_speed == 40000)
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G;
- else
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G;
- break;
-
- /* Intel Fortville */
- case VNET_DPDK_PMD_I40E:
- case VNET_DPDK_PMD_I40EVF:
- xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE;
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G;
-
- switch (dev_info.pci_dev->id.device_id)
- {
- case I40E_DEV_ID_10G_BASE_T:
- case I40E_DEV_ID_SFP_XL710:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G;
- break;
- case I40E_DEV_ID_QSFP_A:
- case I40E_DEV_ID_QSFP_B:
- case I40E_DEV_ID_QSFP_C:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G;
- break;
- case I40E_DEV_ID_VF:
- rte_eth_link_get_nowait (i, &l);
- xd->port_type = l.link_speed == 10000 ?
- VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G;
- break;
- default:
- xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
- }
- break;
-
- case VNET_DPDK_PMD_CXGBE:
- switch (dev_info.pci_dev->id.device_id)
- {
- case 0x540d: /* T580-CR */
- case 0x5410: /* T580-LP-cr */
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G;
- break;
- case 0x5403: /* T540-CR */
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G;
- break;
- default:
- xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
- }
- break;
-
- case VNET_DPDK_PMD_MLX5:
- {
- char *pn_100g[] = { "MCX415A-CCAT", "MCX416A-CCAT", 0 };
- char *pn_40g[] = { "MCX413A-BCAT", "MCX414A-BCAT",
- "MCX415A-BCAT", "MCX416A-BCAT", "MCX4131A-BCAT", 0
- };
- char *pn_10g[] = { "MCX4111A-XCAT", "MCX4121A-XCAT", 0 };
-
- vlib_pci_device_t *pd = vlib_get_pci_device (&pci_addr);
- u8 *pn = 0;
- char **c;
- int found = 0;
- pn = format (0, "%U%c",
- format_vlib_pci_vpd, pd->vpd_r, "PN", 0);
-
- if (!pn)
- break;
-
- c = pn_100g;
- while (!found && c[0])
- {
- if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0)
- {
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_100G;
- break;
- }
- c++;
- }
-
- c = pn_40g;
- while (!found && c[0])
- {
- if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0)
- {
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G;
- break;
- }
- c++;
- }
-
- c = pn_10g;
- while (!found && c[0])
- {
- if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0)
- {
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G;
- break;
- }
- c++;
- }
-
- vec_free (pn);
- }
-
- break;
- /* Intel Red Rock Canyon */
- case VNET_DPDK_PMD_FM10K:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH;
- break;
-
- /* virtio */
- case VNET_DPDK_PMD_VIRTIO:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G;
- xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO;
- xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO;
- break;
-
- /* vmxnet3 */
- case VNET_DPDK_PMD_VMXNET3:
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G;
- xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
- break;
-
- case VNET_DPDK_PMD_AF_PACKET:
- xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET;
- xd->af_packet_port_id = af_packet_port_id++;
- break;
-
- case VNET_DPDK_PMD_BOND:
- xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE;
- xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND;
- break;
-
- default:
- xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN;
- }
-
- if (devconf->num_rx_desc)
- xd->nb_rx_desc = devconf->num_rx_desc;
-
- if (devconf->num_tx_desc)
- xd->nb_tx_desc = devconf->num_tx_desc;
- }
-
- /*
- * Ensure default mtu is not > the mtu read from the hardware.
- * Otherwise rte_eth_dev_configure() will fail and the port will
- * not be available.
- */
- if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen)
- {
- /*
- * This device does not support the platforms's max frame
- * size. Use it's advertised mru instead.
- */
- xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen;
- }
- else
- {
- xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES;
-
- /*
- * Some platforms do not account for Ethernet FCS (4 bytes) in
- * MTU calculations. To interop with them increase mru but only
- * if the device's settings can support it.
- */
- if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) &&
- xd->port_conf.rxmode.hw_strip_crc)
- {
- /*
- * Allow additional 4 bytes (for Ethernet FCS). These bytes are
- * stripped by h/w and so will not consume any buffer memory.
- */
- xd->port_conf.rxmode.max_rx_pkt_len += 4;
- }
- }
-
- if (xd->pmd == VNET_DPDK_PMD_AF_PACKET)
- {
- f64 now = vlib_time_now (vm);
- u32 rnd;
- rnd = (u32) (now * 1e6);
- rnd = random_u32 (&rnd);
- clib_memcpy (addr + 2, &rnd, sizeof (rnd));
- addr[0] = 2;
- addr[1] = 0xfe;
- }
- else
- rte_eth_macaddr_get (i, (struct ether_addr *) addr);
-
- if (xd->tx_q_used < tm->n_vlib_mains)
- dpdk_device_lock_init (xd);
-
- xd->device_index = xd - dm->devices;
- ASSERT (i == xd->device_index);
- xd->per_interface_next_index = ~0;
-
- /* assign interface to input thread */
- dpdk_device_and_queue_t *dq;
- int q;
-
- if (devconf->workers)
- {
- int i;
- q = 0;
- /* *INDENT-OFF* */
- clib_bitmap_foreach (i, devconf->workers, ({
- int cpu = dm->input_cpu_first_index + i;
- unsigned lcore = vlib_worker_threads[cpu].lcore_id;
- vec_validate(xd->cpu_socket_id_by_queue, q);
- xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore);
- vec_add2(dm->devices_by_cpu[cpu], dq, 1);
- dq->device = xd->device_index;
- dq->queue_id = q++;
- }));
- /* *INDENT-ON* */
- }
- else
- for (q = 0; q < xd->rx_q_used; q++)
- {
- int cpu = dm->input_cpu_first_index + next_cpu;
- unsigned lcore = vlib_worker_threads[cpu].lcore_id;
-
- /*
- * numa node for worker thread handling this queue
- * needed for taking buffers from the right mempool
- */
- vec_validate (xd->cpu_socket_id_by_queue, q);
- xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore);
-
- /*
- * construct vector of (device,queue) pairs for each worker thread
- */
- vec_add2 (dm->devices_by_cpu[cpu], dq, 1);
- dq->device = xd->device_index;
- dq->queue_id = q;
-
- next_cpu++;
- if (next_cpu == dm->input_cpu_count)
- next_cpu = 0;
- }
-
-
- if (devconf->hqos_enabled)
- {
- xd->flags |= DPDK_DEVICE_FLAG_HQOS;
-
- if (devconf->hqos.hqos_thread_valid)
- {
- int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread;
-
- if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count)
- return clib_error_return (0, "invalid HQoS thread index");
-
- vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
- dq->device = xd->device_index;
- dq->queue_id = 0;
- }
- else
- {
- int cpu = dm->hqos_cpu_first_index + next_hqos_cpu;
-
- if (dm->hqos_cpu_count == 0)
- return clib_error_return (0, "no HQoS threads available");
-
- vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1);
- dq->device = xd->device_index;
- dq->queue_id = 0;
-
- next_hqos_cpu++;
- if (next_hqos_cpu == dm->hqos_cpu_count)
- next_hqos_cpu = 0;
-
- devconf->hqos.hqos_thread_valid = 1;
- devconf->hqos.hqos_thread = cpu;
- }
- }
-
- vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains,
- CLIB_CACHE_LINE_BYTES);
- for (j = 0; j < tm->n_vlib_mains; j++)
- {
- vec_validate_ha (xd->tx_vectors[j], xd->nb_tx_desc,
- sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES);
- vec_reset_length (xd->tx_vectors[j]);
- }
-
- vec_validate_aligned (xd->rx_vectors, xd->rx_q_used,
- CLIB_CACHE_LINE_BYTES);
- for (j = 0; j < xd->rx_q_used; j++)
- {
- vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1,
- CLIB_CACHE_LINE_BYTES);
- vec_reset_length (xd->rx_vectors[j]);
- }
-
- vec_validate_aligned (xd->d_trace_buffers, tm->n_vlib_mains,
- CLIB_CACHE_LINE_BYTES);
-
- rv = dpdk_port_setup (dm, xd);
-
- if (rv)
- return rv;
-
- if (devconf->hqos_enabled)
- {
- rv = dpdk_port_setup_hqos (xd, &devconf->hqos);
- if (rv)
- return rv;
- }
-
- /* count the number of descriptors used for this device */
- nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used;
-
- error = ethernet_register_interface
- (dm->vnet_main, dpdk_device_class.index, xd->device_index,
- /* ethernet address */ addr,
- &xd->vlib_hw_if_index, dpdk_flag_change);
- if (error)
- return error;
-
- sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index);
- xd->vlib_sw_if_index = sw->sw_if_index;
- hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index);
-
- /*
- * DAW-FIXME: The Cisco VIC firmware does not provide an api for a
- * driver to dynamically change the mtu. If/when the
- * VIC firmware gets fixed, then this should be removed.
- */
- if (xd->pmd == VNET_DPDK_PMD_ENIC)
- {
- /*
- * Initialize mtu to what has been set by CIMC in the firmware cfg.
- */
- hi->max_packet_bytes = dev_info.max_rx_pktlen;
- if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF)
- vlan_strip = 1; /* remove vlan tag from VIC port by default */
- else
- clib_warning ("VLAN strip disabled for interface\n");
- }
- else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON)
- vlan_strip = 1;
-
- if (vlan_strip)
- {
- int vlan_off;
- vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index);
- vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
- xd->port_conf.rxmode.hw_vlan_strip = vlan_off;
- if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0)
- clib_warning ("VLAN strip enabled for interface\n");
- else
- clib_warning ("VLAN strip cannot be supported by interface\n");
- }
-
- hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] =
- xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t);
-
- rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes);
- }
-
- if (nb_desc > dm->conf->num_mbufs)
- clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n",
- dm->conf->num_mbufs, nb_desc);
-
- return 0;
-}
-
-static void
-dpdk_bind_devices_to_uio (dpdk_config_main_t * conf)
-{
- vlib_pci_main_t *pm = &pci_main;
- clib_error_t *error;
- vlib_pci_device_t *d;
- u8 *pci_addr = 0;
- int num_whitelisted = vec_len (conf->dev_confs);
-
- /* *INDENT-OFF* */
- pool_foreach (d, pm->pci_devs, ({
- dpdk_device_config_t * devconf = 0;
- vec_reset_length (pci_addr);
- pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0);
-
- if (d->device_class != PCI_CLASS_NETWORK_ETHERNET)
- continue;
-
- if (num_whitelisted)
- {
- uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32);
-
- if (!p)
- continue;
-
- devconf = pool_elt_at_index (conf->dev_confs, p[0]);
- }
-
- /* virtio */
- if (d->vendor_id == 0x1af4 && d->device_id == 0x1000)
- ;
- /* vmxnet3 */
- else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0)
- ;
- /* all Intel devices */
- else if (d->vendor_id == 0x8086)
- ;
- /* Cisco VIC */
- else if (d->vendor_id == 0x1137 && d->device_id == 0x0043)
- ;
- /* Chelsio T4/T5 */
- else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000)
- ;
- else
- {
- clib_warning ("Unsupported Ethernet PCI device 0x%04x:0x%04x found "
- "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id,
- pci_addr);
- continue;
- }
-
- error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name);
-
- if (error)
- {
- if (devconf == 0)
- {
- pool_get (conf->dev_confs, devconf);
- hash_set (conf->device_config_index_by_pci_addr, d->bus_address.as_u32,
- devconf - conf->dev_confs);
- devconf->pci_addr.as_u32 = d->bus_address.as_u32;
- }
- devconf->is_blacklisted = 1;
- clib_error_report (error);
- }
- }));
- /* *INDENT-ON* */
- vec_free (pci_addr);
-}
-
-static clib_error_t *
-dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr,
- unformat_input_t * input, u8 is_default)
-{
- clib_error_t *error = 0;
- uword *p;
- dpdk_device_config_t *devconf;
- unformat_input_t sub_input;
-
- if (is_default)
- {
- devconf = &conf->default_devconf;
- }
- else
- {
- p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32);
-
- if (!p)
- {
- pool_get (conf->dev_confs, devconf);
- hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32,
- devconf - conf->dev_confs);
- }
- else
- return clib_error_return (0,
- "duplicate configuration for PCI address %U",
- format_vlib_pci_addr, &pci_addr);
- }
-
- devconf->pci_addr.as_u32 = pci_addr.as_u32;
- devconf->hqos_enabled = 0;
- dpdk_device_config_hqos_default (&devconf->hqos);
-
- if (!input)
- return 0;
-
- unformat_skip_white_space (input);
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues))
- ;
- else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues))
- ;
- else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc))
- ;
- else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc))
- ;
- else if (unformat (input, "workers %U", unformat_bitmap_list,
- &devconf->workers))
- ;
- else
- if (unformat
- (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input))
- {
- error = unformat_rss_fn (&sub_input, &devconf->rss_fn);
- if (error)
- break;
- }
- else if (unformat (input, "vlan-strip-offload off"))
- devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF;
- else if (unformat (input, "vlan-strip-offload on"))
- devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON;
- else
- if (unformat
- (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input))
- {
- devconf->hqos_enabled = 1;
- error = unformat_hqos (&sub_input, &devconf->hqos);
- if (error)
- break;
- }
- else if (unformat (input, "hqos"))
- {
- devconf->hqos_enabled = 1;
- }
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- break;
- }
- }
-
- if (error)
- return error;
-
- if (devconf->workers && devconf->num_rx_queues == 0)
- devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers);
- else if (devconf->workers &&
- clib_bitmap_count_set_bits (devconf->workers) !=
- devconf->num_rx_queues)
- error =
- clib_error_return (0,
- "%U: number of worker threadds must be "
- "equal to number of rx queues", format_vlib_pci_addr,
- &pci_addr);
-
- return error;
-}
-
-static clib_error_t *
-dpdk_config (vlib_main_t * vm, unformat_input_t * input)
-{
- clib_error_t *error = 0;
- dpdk_main_t *dm = &dpdk_main;
- dpdk_config_main_t *conf = &dpdk_config_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- dpdk_device_config_t *devconf;
- vlib_pci_addr_t pci_addr;
- unformat_input_t sub_input;
- u8 *s, *tmp = 0;
- u8 *rte_cmd = 0, *ethname = 0;
- u32 log_level;
- int ret, i;
- int num_whitelisted = 0;
- u8 no_pci = 0;
- u8 no_huge = 0;
- u8 huge_dir = 0;
- u8 file_prefix = 0;
- u8 *socket_mem = 0;
-
- conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword));
-
- while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
- {
- /* Prime the pump */
- if (unformat (input, "no-hugetlb"))
- {
- vec_add1 (conf->eal_init_args, (u8 *) "no-huge");
- no_huge = 1;
- }
-
- else if (unformat (input, "enable-tcp-udp-checksum"))
- conf->enable_tcp_udp_checksum = 1;
-
- else if (unformat (input, "decimal-interface-names"))
- conf->interface_name_format_decimal = 1;
-
- else if (unformat (input, "no-multi-seg"))
- conf->no_multi_seg = 1;
-
- else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input,
- &sub_input))
- {
- error =
- dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input,
- 1);
-
- if (error)
- return error;
- }
- else
- if (unformat
- (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr,
- unformat_vlib_cli_sub_input, &sub_input))
- {
- error = dpdk_device_config (conf, pci_addr, &sub_input, 0);
-
- if (error)
- return error;
-
- num_whitelisted++;
- }
- else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr))
- {
- error = dpdk_device_config (conf, pci_addr, 0, 0);
-
- if (error)
- return error;
-
- num_whitelisted++;
- }
- else if (unformat (input, "num-mbufs %d", &conf->num_mbufs))
- ;
- else if (unformat (input, "kni %d", &conf->num_kni))
- ;
- else if (unformat (input, "uio-driver %s", &conf->uio_driver_name))
- ;
- else if (unformat (input, "socket-mem %s", &socket_mem))
- ;
- else if (unformat (input, "no-pci"))
- {
- no_pci = 1;
- tmp = format (0, "--no-pci%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- }
- else if (unformat (input, "poll-sleep %d", &dm->poll_sleep))
- ;
-
-#define _(a) \
- else if (unformat(input, #a)) \
- { \
- tmp = format (0, "--%s%c", #a, 0); \
- vec_add1 (conf->eal_init_args, tmp); \
- }
- foreach_eal_double_hyphen_predicate_arg
-#undef _
-#define _(a) \
- else if (unformat(input, #a " %s", &s)) \
- { \
- if (!strncmp(#a, "huge-dir", 8)) \
- huge_dir = 1; \
- else if (!strncmp(#a, "file-prefix", 11)) \
- file_prefix = 1; \
- tmp = format (0, "--%s%c", #a, 0); \
- vec_add1 (conf->eal_init_args, tmp); \
- vec_add1 (s, 0); \
- vec_add1 (conf->eal_init_args, s); \
- }
- foreach_eal_double_hyphen_arg
-#undef _
-#define _(a,b) \
- else if (unformat(input, #a " %s", &s)) \
- { \
- tmp = format (0, "-%s%c", #b, 0); \
- vec_add1 (conf->eal_init_args, tmp); \
- vec_add1 (s, 0); \
- vec_add1 (conf->eal_init_args, s); \
- }
- foreach_eal_single_hyphen_arg
-#undef _
-#define _(a,b) \
- else if (unformat(input, #a " %s", &s)) \
- { \
- tmp = format (0, "-%s%c", #b, 0); \
- vec_add1 (conf->eal_init_args, tmp); \
- vec_add1 (s, 0); \
- vec_add1 (conf->eal_init_args, s); \
- conf->a##_set_manually = 1; \
- }
- foreach_eal_single_hyphen_mandatory_arg
-#undef _
- else if (unformat (input, "default"))
- ;
-
- else if (unformat_skip_white_space (input))
- ;
- else
- {
- error = clib_error_return (0, "unknown input `%U'",
- format_unformat_error, input);
- goto done;
- }
- }
-
- if (!conf->uio_driver_name)
- conf->uio_driver_name = format (0, "igb_uio%c", 0);
-
- /*
- * Use 1G huge pages if available.
- */
- if (!no_huge && !huge_dir)
- {
- u32 x, *mem_by_socket = 0;
- uword c = 0;
- u8 use_1g = 1;
- u8 use_2m = 1;
- u8 less_than_1g = 1;
- int rv;
-
- umount (DEFAULT_HUGE_DIR);
-
- /* Process "socket-mem" parameter value */
- if (vec_len (socket_mem))
- {
- unformat_input_t in;
- unformat_init_vector (&in, socket_mem);
- while (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (&in, "%u,", &x))
- ;
- else if (unformat (&in, "%u", &x))
- ;
- else if (unformat (&in, ","))
- x = 0;
- else
- break;
-
- vec_add1 (mem_by_socket, x);
-
- if (x > 1023)
- less_than_1g = 0;
- }
- /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */
- unformat_free (&in);
- socket_mem = 0;
- }
- else
- {
- /* *INDENT-OFF* */
- clib_bitmap_foreach (c, tm->cpu_socket_bitmap, (
- {
- vec_validate(mem_by_socket, c);
- mem_by_socket[c] = 256; /* default per-socket mem */
- }
- ));
- /* *INDENT-ON* */
- }
-
- /* check if available enough 1GB pages for each socket */
- /* *INDENT-OFF* */
- clib_bitmap_foreach (c, tm->cpu_socket_bitmap, (
- {
- int pages_avail, page_size, mem;
-
- vec_validate(mem_by_socket, c);
- mem = mem_by_socket[c];
-
- page_size = 1024;
- pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024);
-
- if (pages_avail < 0 || page_size * pages_avail < mem)
- use_1g = 0;
-
- page_size = 2;
- pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024);
-
- if (pages_avail < 0 || page_size * pages_avail < mem)
- use_2m = 0;
- }));
- /* *INDENT-ON* */
-
- if (mem_by_socket == 0)
- {
- error = clib_error_return (0, "mem_by_socket NULL");
- goto done;
- }
- _vec_len (mem_by_socket) = c + 1;
-
- /* regenerate socket_mem string */
- vec_foreach_index (x, mem_by_socket)
- socket_mem = format (socket_mem, "%s%u",
- socket_mem ? "," : "", mem_by_socket[x]);
- socket_mem = format (socket_mem, "%c", 0);
-
- vec_free (mem_by_socket);
-
- rv = mkdir (VPP_RUN_DIR, 0755);
- if (rv && errno != EEXIST)
- {
- error = clib_error_return (0, "mkdir '%s' failed errno %d",
- VPP_RUN_DIR, errno);
- goto done;
- }
-
- rv = mkdir (DEFAULT_HUGE_DIR, 0755);
- if (rv && errno != EEXIST)
- {
- error = clib_error_return (0, "mkdir '%s' failed errno %d",
- DEFAULT_HUGE_DIR, errno);
- goto done;
- }
-
- if (use_1g && !(less_than_1g && use_2m))
- {
- rv =
- mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G");
- }
- else if (use_2m)
- {
- rv = mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL);
- }
- else
- {
- return clib_error_return (0, "not enough free huge pages");
- }
-
- if (rv)
- {
- error = clib_error_return (0, "mount failed %d", errno);
- goto done;
- }
-
- tmp = format (0, "--huge-dir%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0);
- vec_add1 (conf->eal_init_args, tmp);
- if (!file_prefix)
- {
- tmp = format (0, "--file-prefix%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- tmp = format (0, "vpp%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- }
- }
-
- vec_free (rte_cmd);
- vec_free (ethname);
-
- if (error)
- return error;
-
- /* I'll bet that -c and -n must be the first and second args... */
- if (!conf->coremask_set_manually)
- {
- vlib_thread_registration_t *tr;
- uword *coremask = 0;
- int i;
-
- /* main thread core */
- coremask = clib_bitmap_set (coremask, tm->main_lcore, 1);
-
- for (i = 0; i < vec_len (tm->registrations); i++)
- {
- tr = tm->registrations[i];
- coremask = clib_bitmap_or (coremask, tr->coremask);
- }
-
- vec_insert (conf->eal_init_args, 2, 1);
- conf->eal_init_args[1] = (u8 *) "-c";
- tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0);
- conf->eal_init_args[2] = tmp;
- clib_bitmap_free (coremask);
- }
-
- if (!conf->nchannels_set_manually)
- {
- vec_insert (conf->eal_init_args, 2, 3);
- conf->eal_init_args[3] = (u8 *) "-n";
- tmp = format (0, "%d", conf->nchannels);
- conf->eal_init_args[4] = tmp;
- }
-
- if (no_pci == 0 && geteuid () == 0)
- dpdk_bind_devices_to_uio (conf);
-
-#define _(x) \
- if (devconf->x == 0 && conf->default_devconf.x > 0) \
- devconf->x = conf->default_devconf.x ;
-
- /* *INDENT-OFF* */
- pool_foreach (devconf, conf->dev_confs, ({
-
- /* default per-device config items */
- foreach_dpdk_device_config_item
-
- /* add DPDK EAL whitelist/blacklist entry */
- if (num_whitelisted > 0 && devconf->is_blacklisted == 0)
- {
- tmp = format (0, "-w%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
- vec_add1 (conf->eal_init_args, tmp);
- }
- else if (num_whitelisted == 0 && devconf->is_blacklisted != 0)
- {
- tmp = format (0, "-b%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0);
- vec_add1 (conf->eal_init_args, tmp);
- }
- }));
- /* *INDENT-ON* */
-
-#undef _
-
- /* set master-lcore */
- tmp = format (0, "--master-lcore%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- tmp = format (0, "%u%c", tm->main_lcore, 0);
- vec_add1 (conf->eal_init_args, tmp);
-
- /* set socket-mem */
- tmp = format (0, "--socket-mem%c", 0);
- vec_add1 (conf->eal_init_args, tmp);
- tmp = format (0, "%s%c", socket_mem, 0);
- vec_add1 (conf->eal_init_args, tmp);
-
- /* NULL terminate the "argv" vector, in case of stupidity */
- vec_add1 (conf->eal_init_args, 0);
- _vec_len (conf->eal_init_args) -= 1;
-
- /* Set up DPDK eal and packet mbuf pool early. */
-
- log_level = (CLIB_DEBUG > 0) ? RTE_LOG_DEBUG : RTE_LOG_NOTICE;
-
- rte_set_log_level (log_level);
-
- vm = vlib_get_main ();
-
- /* make copy of args as rte_eal_init tends to mess up with arg array */
- for (i = 1; i < vec_len (conf->eal_init_args); i++)
- conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ",
- conf->eal_init_args[i]);
-
- ret =
- rte_eal_init (vec_len (conf->eal_init_args),
- (char **) conf->eal_init_args);
-
- /* lazy umount hugepages */
- umount2 (DEFAULT_HUGE_DIR, MNT_DETACH);
-
- if (ret < 0)
- return clib_error_return (0, "rte_eal_init returned %d", ret);
-
- /* Dump the physical memory layout prior to creating the mbuf_pool */
- fprintf (stdout, "DPDK physical memory layout:\n");
- rte_dump_physmem_layout (stdout);
-
- /* main thread 1st */
- error = vlib_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ());
- if (error)
- return error;
-
- for (i = 0; i < RTE_MAX_LCORE; i++)
- {
- error = vlib_buffer_pool_create (vm, conf->num_mbufs,
- rte_lcore_to_socket_id (i));
- if (error)
- return error;
- }
-
-done:
- return error;
-}
-
-VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk");
-
-void
-dpdk_update_link_state (dpdk_device_t * xd, f64 now)
-{
- vnet_main_t *vnm = vnet_get_main ();
- struct rte_eth_link prev_link = xd->link;
- u32 hw_flags = 0;
- u8 hw_flags_chg = 0;
-
- /* only update link state for PMD interfaces */
- if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0)
- return;
-
- xd->time_last_link_update = now ? now : xd->time_last_link_update;
- memset (&xd->link, 0, sizeof (xd->link));
- rte_eth_link_get_nowait (xd->device_index, &xd->link);
-
- if (LINK_STATE_ELOGS)
- {
- vlib_main_t *vm = vlib_get_main ();
- ELOG_TYPE_DECLARE (e) =
- {
- .format =
- "update-link-state: sw_if_index %d, admin_up %d,"
- "old link_state %d new link_state %d",.format_args = "i4i1i1i1",};
-
- struct
- {
- u32 sw_if_index;
- u8 admin_up;
- u8 old_link_state;
- u8 new_link_state;
- } *ed;
- ed = ELOG_DATA (&vm->elog_main, e);
- ed->sw_if_index = xd->vlib_sw_if_index;
- ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0;
- ed->old_link_state = (u8)
- vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index);
- ed->new_link_state = (u8) xd->link.link_status;
- }
-
- if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) &&
- ((xd->link.link_status != 0) ^
- vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index)))
- {
- hw_flags_chg = 1;
- hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0);
- }
-
- if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex))
- {
- hw_flags_chg = 1;
- switch (xd->link.link_duplex)
- {
- case ETH_LINK_HALF_DUPLEX:
- hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX;
- break;
- case ETH_LINK_FULL_DUPLEX:
- hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX;
- break;
- default:
- break;
- }
- }
- if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed))
- {
- hw_flags_chg = 1;
- switch (xd->link.link_speed)
- {
- case ETH_SPEED_NUM_10M:
- hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M;
- break;
- case ETH_SPEED_NUM_100M:
- hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M;
- break;
- case ETH_SPEED_NUM_1G:
- hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G;
- break;
- case ETH_SPEED_NUM_10G:
- hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G;
- break;
- case ETH_SPEED_NUM_40G:
- hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G;
- break;
- case 0:
- break;
- default:
- clib_warning ("unknown link speed %d", xd->link.link_speed);
- break;
- }
- }
- if (hw_flags_chg)
- {
- if (LINK_STATE_ELOGS)
- {
- vlib_main_t *vm = vlib_get_main ();
-
- ELOG_TYPE_DECLARE (e) =
- {
- .format =
- "update-link-state: sw_if_index %d, new flags %d",.format_args
- = "i4i4",};
-
- struct
- {
- u32 sw_if_index;
- u32 flags;
- } *ed;
- ed = ELOG_DATA (&vm->elog_main, e);
- ed->sw_if_index = xd->vlib_sw_if_index;
- ed->flags = hw_flags;
- }
- vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, hw_flags);
- }
-}
-
-static uword
-dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f)
-{
- clib_error_t *error;
- vnet_main_t *vnm = vnet_get_main ();
- dpdk_main_t *dm = &dpdk_main;
- ethernet_main_t *em = &ethernet_main;
- dpdk_device_t *xd;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- int i;
-
- error = dpdk_lib_init (dm);
-
- /*
- * Turn on the input node if we found some devices to drive
- * and we're not running worker threads or i/o threads
- */
-
- if (error == 0 && vec_len (dm->devices) > 0)
- {
- if (tm->n_vlib_mains == 1)
- vlib_node_set_state (vm, dpdk_input_node.index,
- VLIB_NODE_STATE_POLLING);
- else
- for (i = 0; i < tm->n_vlib_mains; i++)
- if (vec_len (dm->devices_by_cpu[i]) > 0)
- vlib_node_set_state (vlib_mains[i], dpdk_input_node.index,
- VLIB_NODE_STATE_POLLING);
- }
-
- if (error)
- clib_error_report (error);
-
- tm->worker_thread_release = 1;
-
- f64 now = vlib_time_now (vm);
- vec_foreach (xd, dm->devices)
- {
- dpdk_update_link_state (xd, now);
- }
-
- {
- /*
- * Extra set up for bond interfaces:
- * 1. Setup MACs for bond interfaces and their slave links which was set
- * in dpdk_port_setup() but needs to be done again here to take effect.
- * 2. Set up info for bond interface related CLI support.
- */
- int nports = rte_eth_dev_count ();
- if (nports > 0)
- {
- for (i = 0; i < nports; i++)
- {
- struct rte_eth_dev_info dev_info;
- rte_eth_dev_info_get (i, &dev_info);
- if (!dev_info.driver_name)
-#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
- dev_info.driver_name = dev_info.pci_dev->driver->name;
-#else
- dev_info.driver_name = dev_info.pci_dev->driver->driver.name;
-#endif
- ASSERT (dev_info.driver_name);
- if (strncmp (dev_info.driver_name, "rte_bond_pmd", 12) == 0)
- {
- u8 addr[6];
- u8 slink[16];
- int nlink = rte_eth_bond_slaves_get (i, slink, 16);
- if (nlink > 0)
- {
- vnet_hw_interface_t *bhi;
- ethernet_interface_t *bei;
- int rv;
-
- /* Get MAC of 1st slave link */
- rte_eth_macaddr_get (slink[0],
- (struct ether_addr *) addr);
- /* Set MAC of bounded interface to that of 1st slave link */
- rv =
- rte_eth_bond_mac_address_set (i,
- (struct ether_addr *)
- addr);
- if (rv < 0)
- clib_warning ("Failed to set MAC address");
-
- /* Populate MAC of bonded interface in VPP hw tables */
- bhi =
- vnet_get_hw_interface (vnm,
- dm->devices[i].vlib_hw_if_index);
- bei =
- pool_elt_at_index (em->interfaces, bhi->hw_instance);
- clib_memcpy (bhi->hw_address, addr, 6);
- clib_memcpy (bei->address, addr, 6);
- /* Init l3 packet size allowed on bonded interface */
- bhi->max_packet_bytes = ETHERNET_MAX_PACKET_BYTES;
- bhi->max_l3_packet_bytes[VLIB_RX] =
- bhi->max_l3_packet_bytes[VLIB_TX] =
- ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t);
- while (nlink >= 1)
- { /* for all slave links */
- int slave = slink[--nlink];
- dpdk_device_t *sdev = &dm->devices[slave];
- vnet_hw_interface_t *shi;
- vnet_sw_interface_t *ssi;
- /* Add MAC to all slave links except the first one */
- if (nlink)
- rte_eth_dev_mac_addr_add (slave,
- (struct ether_addr *)
- addr, 0);
- /* Set slaves bitmap for bonded interface */
- bhi->bond_info =
- clib_bitmap_set (bhi->bond_info,
- sdev->vlib_hw_if_index, 1);
- /* Set slave link flags on slave interface */
- shi =
- vnet_get_hw_interface (vnm, sdev->vlib_hw_if_index);
- ssi =
- vnet_get_sw_interface (vnm, sdev->vlib_sw_if_index);
- shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE;
- ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE;
-
- /* Set l3 packet size allowed as the lowest of slave */
- if (bhi->max_l3_packet_bytes[VLIB_RX] >
- shi->max_l3_packet_bytes[VLIB_RX])
- bhi->max_l3_packet_bytes[VLIB_RX] =
- bhi->max_l3_packet_bytes[VLIB_TX] =
- shi->max_l3_packet_bytes[VLIB_RX];
-
- /* Set max packet size allowed as the lowest of slave */
- if (bhi->max_packet_bytes > shi->max_packet_bytes)
- bhi->max_packet_bytes = shi->max_packet_bytes;
- }
- }
- }
- }
- }
- }
-
- while (1)
- {
- /*
- * check each time through the loop in case intervals are changed
- */
- f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ?
- dm->link_state_poll_interval : dm->stat_poll_interval;
-
- vlib_process_wait_for_event_or_clock (vm, min_wait);
-
- if (dm->admin_up_down_in_progress)
- /* skip the poll if an admin up down is in progress (on any interface) */
- continue;
-
- vec_foreach (xd, dm->devices)
- {
- f64 now = vlib_time_now (vm);
- if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval)
- dpdk_update_counters (xd, now);
- if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval)
- dpdk_update_link_state (xd, now);
-
- }
- }
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (dpdk_process_node,static) = {
- .function = dpdk_process,
- .type = VLIB_NODE_TYPE_PROCESS,
- .name = "dpdk-process",
- .process_log2_n_stack_bytes = 17,
-};
-/* *INDENT-ON* */
-
-int
-dpdk_set_stat_poll_interval (f64 interval)
-{
- if (interval < DPDK_MIN_STATS_POLL_INTERVAL)
- return (VNET_API_ERROR_INVALID_VALUE);
-
- dpdk_main.stat_poll_interval = interval;
-
- return 0;
-}
-
-int
-dpdk_set_link_state_poll_interval (f64 interval)
-{
- if (interval < DPDK_MIN_LINK_POLL_INTERVAL)
- return (VNET_API_ERROR_INVALID_VALUE);
-
- dpdk_main.link_state_poll_interval = interval;
-
- return 0;
-}
-
-clib_error_t *
-dpdk_init (vlib_main_t * vm)
-{
- dpdk_main_t *dm = &dpdk_main;
- vlib_node_t *ei;
- clib_error_t *error = 0;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
-
- /* verify that structs are cacheline aligned */
- STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0,
- "Cache line marker must be 1st element in dpdk_device_t");
- STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) ==
- CLIB_CACHE_LINE_BYTES,
- "Data in cache line 0 is bigger than cache line size");
- STATIC_ASSERT (offsetof (dpdk_worker_t, cacheline0) == 0,
- "Cache line marker must be 1st element in dpdk_worker_t");
- STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0,
- "Cache line marker must be 1st element in frame_queue_trace_t");
-
- dm->vlib_main = vm;
- dm->vnet_main = vnet_get_main ();
- dm->conf = &dpdk_config_main;
-
- ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input");
- if (ei == 0)
- return clib_error_return (0, "ethernet-input node AWOL");
-
- dm->ethernet_input_node_index = ei->index;
-
- dm->conf->nchannels = 4;
- dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF;
- vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet");
-
- dm->dpdk_device_by_kni_port_id = hash_create (0, sizeof (uword));
- dm->vu_sw_if_index_by_listener_fd = hash_create (0, sizeof (uword));
- dm->vu_sw_if_index_by_sock_fd = hash_create (0, sizeof (uword));
-
- /* $$$ use n_thread_stacks since it's known-good at this point */
- vec_validate (dm->recycle, tm->n_thread_stacks - 1);
-
- /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */
- dm->buffer_flags_template =
- (VLIB_BUFFER_TOTAL_LENGTH_VALID | VNET_BUFFER_RTE_MBUF_VALID
- | IP_BUFFER_L4_CHECKSUM_COMPUTED | IP_BUFFER_L4_CHECKSUM_CORRECT);
-
- dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL;
- dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL;
-
- /* init CLI */
- if ((error = vlib_call_init_function (vm, dpdk_cli_init)))
- return error;
-
- return error;
-}
-
-VLIB_INIT_FUNCTION (dpdk_init);
-
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/dpdk/ipsec/cli.c b/vnet/vnet/devices/dpdk/ipsec/cli.c
deleted file mode 100644
index 3b634e036da..00000000000
--- a/vnet/vnet/devices/dpdk/ipsec/cli.c
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2016 Intel and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/vnet.h>
-#include <vnet/devices/dpdk/ipsec/ipsec.h>
-
-static void
-dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display)
-{
- dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- u32 i, skip_master;
-
- if (detail_display)
- vlib_cli_output (vm, "worker\t%10s\t%15s\tdir\tdev\tqp\n",
- "cipher", "auth");
- else
- vlib_cli_output (vm, "worker\tcrypto device id(type)\n");
-
- skip_master = vlib_num_workers () > 0;
-
- for (i = 0; i < tm->n_vlib_mains; i++)
- {
- uword key, data;
- u32 cpu_index = vlib_mains[i]->cpu_index;
- crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
- u8 *s = 0;
-
- if (skip_master)
- {
- skip_master = 0;
- continue;
- }
-
- if (!detail_display)
- {
- i32 last_cdev = -1;
- crypto_qp_data_t *qpd;
-
- s = format (s, "%u\t", cpu_index);
-
- /* *INDENT-OFF* */
- vec_foreach (qpd, cwm->qp_data)
- {
- u32 dev_id = qpd->dev_id;
-
- if ((u16) last_cdev != dev_id)
- {
- struct rte_cryptodev_info cdev_info;
-
- rte_cryptodev_info_get (dev_id, &cdev_info);
-
- s = format(s, "%u(%s)\t", dev_id, cdev_info.feature_flags &
- RTE_CRYPTODEV_FF_HW_ACCELERATED ? "HW" : "SW");
- }
- last_cdev = dev_id;
- }
- /* *INDENT-ON* */
- vlib_cli_output (vm, "%s", s);
- }
- else
- {
- char cipher_str[15], auth_str[15];
- struct rte_cryptodev_capabilities cap;
- crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key;
- /* *INDENT-OFF* */
- hash_foreach (key, data, cwm->algo_qp_map,
- ({
- cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
- cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER;
- cap.sym.cipher.algo = p_key->cipher_algo;
- check_algo_is_supported (&cap, cipher_str);
- cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
- cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_AUTH;
- cap.sym.auth.algo = p_key->auth_algo;
- check_algo_is_supported (&cap, auth_str);
- vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n",
- vlib_mains[i]->cpu_index, cipher_str, auth_str,
- p_key->is_outbound ? "out" : "in",
- cwm->qp_data[data].dev_id,
- cwm->qp_data[data].qp_id);
- }));
- /* *INDENT-ON* */
- }
- }
-}
-
-static clib_error_t *
-lcore_cryptodev_map_fn (vlib_main_t * vm, unformat_input_t * input,
- vlib_cli_command_t * cmd)
-{
- unformat_input_t _line_input, *line_input = &_line_input;
- u16 detail = 0;
-
- if (!unformat_user (input, unformat_line_input, line_input))
- return 0;
-
- while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
- {
- if (unformat (line_input, "verbose"))
- detail = 1;
- else
- return clib_error_return (0, "parse error: '%U'",
- format_unformat_error, line_input);
- }
-
- unformat_free (line_input);
-
- dpdk_ipsec_show_mapping (vm, detail);
-
- return 0;
-}
-
-/* *INDENT-OFF* */
-VLIB_CLI_COMMAND (lcore_cryptodev_map, static) = {
- .path = "show crypto device mapping",
- .short_help =
- "show cryptodev device mapping <verbose>",
- .function = lcore_cryptodev_map_fn,
-};
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/dpdk/ipsec/crypto_node.c b/vnet/vnet/devices/dpdk/ipsec/crypto_node.c
deleted file mode 100644
index 7b32704ec05..00000000000
--- a/vnet/vnet/devices/dpdk/ipsec/crypto_node.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- *------------------------------------------------------------------
- * crypto_node.c - DPDK Cryptodev input node
- *
- * Copyright (c) 2016 Intel and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *------------------------------------------------------------------
- */
-
-#include <vlib/vlib.h>
-#include <vnet/ip/ip.h>
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/ipsec/ipsec.h>
-
-#include <vnet/devices/dpdk/ipsec/ipsec.h>
-
-#define foreach_dpdk_crypto_input_next \
- _(DROP, "error-drop") \
- _(ENCRYPT_POST, "dpdk-esp-encrypt-post") \
- _(DECRYPT_POST, "dpdk-esp-decrypt-post")
-
-typedef enum
-{
-#define _(f,s) DPDK_CRYPTO_INPUT_NEXT_##f,
- foreach_dpdk_crypto_input_next
-#undef _
- DPDK_CRYPTO_INPUT_N_NEXT,
-} dpdk_crypto_input_next_t;
-
-#define foreach_dpdk_crypto_input_error \
- _(DQ_COPS, "Crypto ops dequeued") \
- _(COP_FAILED, "Crypto op failed")
-
-typedef enum
-{
-#define _(f,s) DPDK_CRYPTO_INPUT_ERROR_##f,
- foreach_dpdk_crypto_input_error
-#undef _
- DPDK_CRYPTO_INPUT_N_ERROR,
-} dpdk_crypto_input_error_t;
-
-static char *dpdk_crypto_input_error_strings[] = {
-#define _(n, s) s,
- foreach_dpdk_crypto_input_error
-#undef _
-};
-
-vlib_node_registration_t dpdk_crypto_input_node;
-
-typedef struct
-{
- u32 cdev;
- u32 qp;
- u32 status;
- u32 sa_idx;
- u32 next_index;
-} dpdk_crypto_input_trace_t;
-
-static u8 *
-format_dpdk_crypto_input_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- dpdk_crypto_input_trace_t *t = va_arg (*args, dpdk_crypto_input_trace_t *);
-
- s = format (s, "dpdk_crypto: cryptodev-id %u queue-pair %u next-index %d",
- t->cdev, t->qp, t->next_index);
-
- s = format (s, " status %u sa-idx %u\n", t->status, t->sa_idx);
-
- return s;
-}
-
-static_always_inline u32
-dpdk_crypto_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node,
- crypto_qp_data_t * qpd)
-{
- u32 n_deq, *to_next = 0, next_index, n_cops, def_next_index;
- struct rte_crypto_op **cops = qpd->cops;
-
- if (qpd->inflights == 0)
- return 0;
-
- if (qpd->is_outbound)
- def_next_index = DPDK_CRYPTO_INPUT_NEXT_ENCRYPT_POST;
- else
- def_next_index = DPDK_CRYPTO_INPUT_NEXT_DECRYPT_POST;
-
- n_cops = rte_cryptodev_dequeue_burst (qpd->dev_id, qpd->qp_id,
- cops, VLIB_FRAME_SIZE);
- n_deq = n_cops;
- next_index = def_next_index;
-
- qpd->inflights -= n_cops;
- ASSERT (qpd->inflights >= 0);
-
- while (n_cops > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_cops > 0 && n_left_to_next > 0)
- {
- u32 bi0, next0;
- vlib_buffer_t *b0 = 0;
- struct rte_crypto_op *cop;
- struct rte_crypto_sym_op *sym_cop;
-
- cop = cops[0];
- cops += 1;
- n_cops -= 1;
- n_left_to_next -= 1;
-
- next0 = def_next_index;
-
- if (PREDICT_FALSE (cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS))
- {
- next0 = DPDK_CRYPTO_INPUT_NEXT_DROP;
- vlib_node_increment_counter (vm, dpdk_crypto_input_node.index,
- DPDK_CRYPTO_INPUT_ERROR_COP_FAILED,
- 1);
- }
- cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
-
- sym_cop = (struct rte_crypto_sym_op *) (cop + 1);
- b0 = vlib_buffer_from_rte_mbuf (sym_cop->m_src);
- bi0 = vlib_get_buffer_index (vm, b0);
-
- to_next[0] = bi0;
- to_next += 1;
-
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- vlib_trace_next_frame (vm, node, next0);
- dpdk_crypto_input_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->cdev = qpd->dev_id;
- tr->qp = qpd->qp_id;
- tr->status = cop->status;
- tr->next_index = next0;
- tr->sa_idx = vnet_buffer (b0)->ipsec.sad_index;
- }
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- crypto_free_cop (qpd, qpd->cops, n_deq);
-
- vlib_node_increment_counter (vm, dpdk_crypto_input_node.index,
- DPDK_CRYPTO_INPUT_ERROR_DQ_COPS, n_deq);
- return n_deq;
-}
-
-static uword
-dpdk_crypto_input_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
- vlib_frame_t * frame)
-{
- u32 cpu_index = os_get_cpu_number ();
- dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
- crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
- crypto_qp_data_t *qpd;
- u32 n_deq = 0;
-
- /* *INDENT-OFF* */
- vec_foreach (qpd, cwm->qp_data)
- n_deq += dpdk_crypto_dequeue(vm, node, qpd);
- /* *INDENT-ON* */
-
- return n_deq;
-}
-
-VLIB_REGISTER_NODE (dpdk_crypto_input_node) =
-{
- .function = dpdk_crypto_input_fn,.name = "dpdk-crypto-input",.format_trace =
- format_dpdk_crypto_input_trace,.type = VLIB_NODE_TYPE_INPUT,.state =
- VLIB_NODE_STATE_DISABLED,.n_errors =
- DPDK_CRYPTO_INPUT_N_ERROR,.error_strings =
- dpdk_crypto_input_error_strings,.n_next_nodes =
- DPDK_CRYPTO_INPUT_N_NEXT,.next_nodes =
- {
-#define _(s,n) [DPDK_CRYPTO_INPUT_NEXT_##s] = n,
- foreach_dpdk_crypto_input_next
-#undef _
- }
-,};
-
-#if DPDK_CRYPTO==1
-VLIB_NODE_FUNCTION_MULTIARCH (dpdk_crypto_input_node, dpdk_crypto_input_fn)
-#endif
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/dpdk/ipsec/dir.dox b/vnet/vnet/devices/dpdk/ipsec/dir.dox
deleted file mode 100644
index ffebfc4d62e..00000000000
--- a/vnet/vnet/devices/dpdk/ipsec/dir.dox
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Copyright (c) 2016 Intel and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- @dir vnet/vnet/devices/dpdk/ipsec
- @brief IPSec ESP encrypt/decrypt using DPDK Cryptodev API
-*/
diff --git a/vnet/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md b/vnet/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md
deleted file mode 100644
index 8089696f4a0..00000000000
--- a/vnet/vnet/devices/dpdk/ipsec/dpdk_crypto_ipsec_doc.md
+++ /dev/null
@@ -1,73 +0,0 @@
-# VPP IPSec implementation using DPDK Cryptodev API {#dpdk_crypto_ipsec_doc}
-
-This document is meant to contain all related information about implementation and usability.
-
-
-## VPP IPsec with DPDK Cryptodev
-
-DPDK Cryptodev is an asynchronous crypto API that supports both Hardware and Software implementations (for more details refer to [DPDK Cryptography Device Library documentation](http://dpdk.org/doc/guides/prog_guide/cryptodev_lib.html)).
-
-When DPDK Cryptodev support is enabled, the node graph is modified by adding and replacing some of the nodes.
-
-The following nodes are replaced:
-* esp-encrypt -> dpdk-esp-encrypt
-* esp-decrypt -> dpdk-esp-decrypt
-
-The following nodes are added:
-* dpdk-crypto-input : polling input node, basically dequeuing from crypto devices.
-* dpdk-esp-encrypt-post : internal node.
-* dpdk-esp-decrypt-post : internal node.
-
-
-### How to enable VPP IPSec with DPDK Cryptodev support
-
-To enable DPDK Cryptodev support (disabled by default), we need the following env option:
-
- vpp_uses_dpdk_cryptodev=yes
-
-A couple of ways to achive this:
-* uncomment/add it in the platforms config (ie. build-data/platforms/vpp.mk)
-* set the option when building vpp (ie. make vpp_uses_dpdk_cryptodev=yes build-release)
-
-
-### Crypto Resources allocation
-
-VPP allocates crypto resources based on a best effort approach:
-* first allocate Hardware crypto resources, then Software.
-* if there are not enough crypto resources for all workers, all packets will be dropped if they reach ESP encrypt/decrypt nodes, displaying the warning:
-
- 0: dpdk_ipsec_init: not enough cryptodevs for ipsec
-
-
-### Configuration example
-
-No especial IPsec configuration is required.
-
-Once DPDK Cryptodev is enabled, the user just needs to provide cryptodevs in the startup.conf.
-
-Example startup.conf:
-
-```
-dpdk {
- socket-mem 1024,1024
- num-mbufs 131072
- dev 0000:81:00.0
- dev 0000:81:00.1
- dev 0000:85:01.0
- dev 0000:85:01.1
- vdev cryptodev_aesni_mb_pmd,socket_id=1
- vdev cryptodev_aesni_mb_pmd,socket_id=1
-}
-```
-
-In the above configuration:
-* 0000:85:01.0 and 0000:85:01.1 are crypto BDFs and they require the same driver binding as DPDK Ethernet devices but they do not support any extra configuration options.
-* Two AESNI-MB Software Cryptodev PMDs are created in NUMA node 1.
-
-For further details refer to [DPDK Crypto Device Driver documentation](http://dpdk.org/doc/guides/cryptodevs/index.html)
-
-### Operational data
-
-The following CLI command displays the Cryptodev/Worker mapping:
-
- show crypto device mapping [verbose]
diff --git a/vnet/vnet/devices/dpdk/ipsec/esp.h b/vnet/vnet/devices/dpdk/ipsec/esp.h
deleted file mode 100644
index 7ef90c49816..00000000000
--- a/vnet/vnet/devices/dpdk/ipsec/esp.h
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- * Copyright (c) 2016 Intel and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __DPDK_ESP_H__
-#define __DPDK_ESP_H__
-
-#include <vnet/devices/dpdk/ipsec/ipsec.h>
-#include <vnet/ipsec/ipsec.h>
-#include <vnet/ipsec/esp.h>
-
-typedef struct
-{
- enum rte_crypto_cipher_algorithm algo;
- u8 key_len;
- u8 iv_len;
-} dpdk_esp_crypto_alg_t;
-
-typedef struct
-{
- enum rte_crypto_auth_algorithm algo;
- u8 trunc_size;
-} dpdk_esp_integ_alg_t;
-
-typedef struct
-{
- dpdk_esp_crypto_alg_t *esp_crypto_algs;
- dpdk_esp_integ_alg_t *esp_integ_algs;
-} dpdk_esp_main_t;
-
-dpdk_esp_main_t dpdk_esp_main;
-
-static_always_inline void
-dpdk_esp_init ()
-{
- dpdk_esp_main_t *em = &dpdk_esp_main;
- dpdk_esp_integ_alg_t *i;
- dpdk_esp_crypto_alg_t *c;
-
- vec_validate (em->esp_crypto_algs, IPSEC_CRYPTO_N_ALG - 1);
-
- c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_128];
- c->algo = RTE_CRYPTO_CIPHER_AES_CBC;
- c->key_len = 16;
- c->iv_len = 16;
-
- c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_192];
- c->algo = RTE_CRYPTO_CIPHER_AES_CBC;
- c->key_len = 24;
- c->iv_len = 16;
-
- c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_CBC_256];
- c->algo = RTE_CRYPTO_CIPHER_AES_CBC;
- c->key_len = 32;
- c->iv_len = 16;
-
- c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_GCM_128];
- c->algo = RTE_CRYPTO_CIPHER_AES_GCM;
- c->key_len = 16;
- c->iv_len = 8;
-
- vec_validate (em->esp_integ_algs, IPSEC_INTEG_N_ALG - 1);
-
- i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA1_96];
- i->algo = RTE_CRYPTO_AUTH_SHA1_HMAC;
- i->trunc_size = 12;
-
- i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_96];
- i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC;
- i->trunc_size = 12;
-
- i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_256_128];
- i->algo = RTE_CRYPTO_AUTH_SHA256_HMAC;
- i->trunc_size = 16;
-
- i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_384_192];
- i->algo = RTE_CRYPTO_AUTH_SHA384_HMAC;
- i->trunc_size = 24;
-
- i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_512_256];
- i->algo = RTE_CRYPTO_AUTH_SHA512_HMAC;
- i->trunc_size = 32;
-
- i = &em->esp_integ_algs[IPSEC_INTEG_ALG_AES_GCM_128];
- i->algo = RTE_CRYPTO_AUTH_AES_GCM;
- i->trunc_size = 16;
-}
-
-static_always_inline int
-add_del_sa_sess (u32 sa_index, u8 is_add)
-{
- dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
- crypto_worker_main_t *cwm;
- u8 skip_master = vlib_num_workers () > 0;
-
- /* *INDENT-OFF* */
- vec_foreach (cwm, dcm->workers_main)
- {
- crypto_sa_session_t *sa_sess;
- u8 is_outbound;
-
- if (skip_master)
- {
- skip_master = 0;
- continue;
- }
-
- for (is_outbound = 0; is_outbound < 2; is_outbound++)
- {
- if (is_add)
- {
- pool_get (cwm->sa_sess_d[is_outbound], sa_sess);
- }
- else
- {
- u8 dev_id;
-
- sa_sess = pool_elt_at_index (cwm->sa_sess_d[is_outbound], sa_index);
- dev_id = cwm->qp_data[sa_sess->qp_index].dev_id;
-
- if (!sa_sess->sess)
- continue;
-
- if (rte_cryptodev_sym_session_free(dev_id, sa_sess->sess))
- {
- clib_warning("failed to free session");
- return -1;
- }
- memset(sa_sess, 0, sizeof(sa_sess[0]));
- }
- }
- }
- /* *INDENT-OFF* */
-
- return 0;
-}
-
-static_always_inline int
-translate_crypto_algo(ipsec_crypto_alg_t crypto_algo,
- struct rte_crypto_sym_xform *cipher_xform)
-{
- switch (crypto_algo)
- {
- case IPSEC_CRYPTO_ALG_NONE:
- cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_NULL;
- break;
- case IPSEC_CRYPTO_ALG_AES_CBC_128:
- case IPSEC_CRYPTO_ALG_AES_CBC_192:
- case IPSEC_CRYPTO_ALG_AES_CBC_256:
- cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC;
- break;
- case IPSEC_CRYPTO_ALG_AES_GCM_128:
- cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_GCM;
- break;
- default:
- return -1;
- }
-
- cipher_xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
-
- return 0;
-}
-
-static_always_inline int
-translate_integ_algo(ipsec_integ_alg_t integ_alg,
- struct rte_crypto_sym_xform *auth_xform, int use_esn)
-{
- switch (integ_alg) {
- case IPSEC_INTEG_ALG_NONE:
- auth_xform->auth.algo = RTE_CRYPTO_AUTH_NULL;
- auth_xform->auth.digest_length = 0;
- break;
- case IPSEC_INTEG_ALG_SHA1_96:
- auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA1_HMAC;
- auth_xform->auth.digest_length = 12;
- break;
- case IPSEC_INTEG_ALG_SHA_256_96:
- auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC;
- auth_xform->auth.digest_length = 12;
- break;
- case IPSEC_INTEG_ALG_SHA_256_128:
- auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA256_HMAC;
- auth_xform->auth.digest_length = 16;
- break;
- case IPSEC_INTEG_ALG_SHA_384_192:
- auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA384_HMAC;
- auth_xform->auth.digest_length = 24;
- break;
- case IPSEC_INTEG_ALG_SHA_512_256:
- auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA512_HMAC;
- auth_xform->auth.digest_length = 32;
- break;
- case IPSEC_INTEG_ALG_AES_GCM_128:
- auth_xform->auth.algo = RTE_CRYPTO_AUTH_AES_GCM;
- auth_xform->auth.digest_length = 16;
- auth_xform->auth.add_auth_data_length = use_esn? 12 : 8;
- break;
- default:
- return -1;
- }
-
- auth_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH;
-
- return 0;
-}
-
-static_always_inline int
-create_sym_sess(ipsec_sa_t *sa, crypto_sa_session_t *sa_sess, u8 is_outbound)
-{
- u32 cpu_index = os_get_cpu_number();
- dpdk_crypto_main_t * dcm = &dpdk_crypto_main;
- crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
- struct rte_crypto_sym_xform cipher_xform = {0};
- struct rte_crypto_sym_xform auth_xform = {0};
- struct rte_crypto_sym_xform *xfs;
- uword key = 0, *data;
- crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *)&key;
-
- if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
- {
- sa->crypto_key_len -= 4;
- clib_memcpy(&sa->salt, &sa->crypto_key[sa->crypto_key_len], 4);
- }
- else
- {
- sa->salt = (u32) rand();
- }
-
- cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER;
- cipher_xform.cipher.key.data = sa->crypto_key;
- cipher_xform.cipher.key.length = sa->crypto_key_len;
-
- auth_xform.type = RTE_CRYPTO_SYM_XFORM_AUTH;
- auth_xform.auth.key.data = sa->integ_key;
- auth_xform.auth.key.length = sa->integ_key_len;
-
- if (translate_crypto_algo(sa->crypto_alg, &cipher_xform) < 0)
- return -1;
- p_key->cipher_algo = cipher_xform.cipher.algo;
-
- if (translate_integ_algo(sa->integ_alg, &auth_xform, sa->use_esn) < 0)
- return -1;
- p_key->auth_algo = auth_xform.auth.algo;
-
- if (is_outbound)
- {
- cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
- auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE;
- cipher_xform.next = &auth_xform;
- xfs = &cipher_xform;
- }
- else
- {
- cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT;
- auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_VERIFY;
- auth_xform.next = &cipher_xform;
- xfs = &auth_xform;
- }
-
- p_key->is_outbound = is_outbound;
-
- data = hash_get(cwm->algo_qp_map, key);
- if (!data)
- return -1;
-
- sa_sess->sess =
- rte_cryptodev_sym_session_create(cwm->qp_data[*data].dev_id, xfs);
-
- if (!sa_sess->sess)
- return -1;
-
- sa_sess->qp_index = (u8)*data;
-
- return 0;
-}
-
-#endif /* __DPDK_ESP_H__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/dpdk/ipsec/esp_decrypt.c b/vnet/vnet/devices/dpdk/ipsec/esp_decrypt.c
deleted file mode 100644
index 89ab9f9bc43..00000000000
--- a/vnet/vnet/devices/dpdk/ipsec/esp_decrypt.c
+++ /dev/null
@@ -1,583 +0,0 @@
-/*
- * esp_decrypt.c : IPSec ESP Decrypt node using DPDK Cryptodev
- *
- * Copyright (c) 2016 Intel and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/vnet.h>
-#include <vnet/api_errno.h>
-#include <vnet/ip/ip.h>
-
-#include <vnet/ipsec/ipsec.h>
-#include <vnet/devices/dpdk/ipsec/ipsec.h>
-#include <vnet/devices/dpdk/ipsec/esp.h>
-
-#define foreach_esp_decrypt_next \
-_(DROP, "error-drop") \
-_(IP4_INPUT, "ip4-input") \
-_(IP6_INPUT, "ip6-input")
-
-#define _(v, s) ESP_DECRYPT_NEXT_##v,
-typedef enum {
- foreach_esp_decrypt_next
-#undef _
- ESP_DECRYPT_N_NEXT,
-} esp_decrypt_next_t;
-
-#define foreach_esp_decrypt_error \
- _(RX_PKTS, "ESP pkts received") \
- _(DECRYPTION_FAILED, "ESP decryption failed") \
- _(REPLAY, "SA replayed packet") \
- _(NOT_IP, "Not IP packet (dropped)") \
- _(ENQ_FAIL, "Enqueue failed (buffer full)") \
- _(NO_CRYPTODEV, "Cryptodev not configured") \
- _(BAD_LEN, "Invalid ciphertext length") \
- _(UNSUPPORTED, "Cipher/Auth not supported")
-
-
-typedef enum {
-#define _(sym,str) ESP_DECRYPT_ERROR_##sym,
- foreach_esp_decrypt_error
-#undef _
- ESP_DECRYPT_N_ERROR,
-} esp_decrypt_error_t;
-
-static char * esp_decrypt_error_strings[] = {
-#define _(sym,string) string,
- foreach_esp_decrypt_error
-#undef _
-};
-
-vlib_node_registration_t dpdk_esp_decrypt_node;
-
-typedef struct {
- ipsec_crypto_alg_t crypto_alg;
- ipsec_integ_alg_t integ_alg;
-} esp_decrypt_trace_t;
-
-/* packet trace format function */
-static u8 * format_esp_decrypt_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- esp_decrypt_trace_t * t = va_arg (*args, esp_decrypt_trace_t *);
-
- s = format (s, "esp: crypto %U integrity %U",
- format_ipsec_crypto_alg, t->crypto_alg,
- format_ipsec_integ_alg, t->integ_alg);
- return s;
-}
-
-static uword
-dpdk_esp_decrypt_node_fn (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- u32 n_left_from, *from, *to_next, next_index;
- ipsec_main_t *im = &ipsec_main;
- u32 cpu_index = os_get_cpu_number();
- dpdk_crypto_main_t * dcm = &dpdk_crypto_main;
- dpdk_esp_main_t * em = &dpdk_esp_main;
- u32 i;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
-
- if (PREDICT_FALSE(!dcm->workers_main))
- {
- vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
- ESP_DECRYPT_ERROR_NO_CRYPTODEV, n_left_from);
- vlib_buffer_free(vm, from, n_left_from);
- return n_left_from;
- }
-
- crypto_worker_main_t *cwm = vec_elt_at_index(dcm->workers_main, cpu_index);
- u32 n_qps = vec_len(cwm->qp_data);
- struct rte_crypto_op ** cops_to_enq[n_qps];
- u32 n_cop_qp[n_qps], * bi_to_enq[n_qps];
-
- for (i = 0; i < n_qps; i++)
- {
- bi_to_enq[i] = cwm->qp_data[i].bi;
- cops_to_enq[i] = cwm->qp_data[i].cops;
- }
-
- memset(n_cop_qp, 0, n_qps * sizeof(u32));
-
- crypto_alloc_cops();
-
- next_index = ESP_DECRYPT_NEXT_DROP;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0, sa_index0 = ~0, seq, icv_size, iv_size;
- vlib_buffer_t * b0;
- esp_header_t * esp0;
- ipsec_sa_t * sa0;
- struct rte_mbuf * mb0 = 0;
- const int BLOCK_SIZE = 16;
- crypto_sa_session_t * sa_sess;
- void * sess;
- u16 qp_index;
- struct rte_crypto_op * cop = 0;
-
- bi0 = from[0];
- from += 1;
- n_left_from -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- esp0 = vlib_buffer_get_current (b0);
-
- sa_index0 = vnet_buffer(b0)->ipsec.sad_index;
- sa0 = pool_elt_at_index (im->sad, sa_index0);
-
- seq = clib_host_to_net_u32(esp0->seq);
-
- /* anti-replay check */
- if (sa0->use_anti_replay)
- {
- int rv = 0;
-
- if (PREDICT_TRUE(sa0->use_esn))
- rv = esp_replay_check_esn(sa0, seq);
- else
- rv = esp_replay_check(sa0, seq);
-
- if (PREDICT_FALSE(rv))
- {
- clib_warning ("anti-replay SPI %u seq %u", sa0->spi, seq);
- vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
- ESP_DECRYPT_ERROR_REPLAY, 1);
- to_next[0] = bi0;
- to_next += 1;
- n_left_to_next -= 1;
- goto trace;
- }
- }
-
- if (PREDICT_FALSE(sa0->integ_alg == IPSEC_INTEG_ALG_NONE) ||
- PREDICT_FALSE(sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE))
- {
- clib_warning ("SPI %u : only cipher + auth supported", sa0->spi);
- vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
- ESP_DECRYPT_ERROR_UNSUPPORTED, 1);
- to_next[0] = bi0;
- to_next += 1;
- n_left_to_next -= 1;
- goto trace;
- }
-
- sa_sess = pool_elt_at_index(cwm->sa_sess_d[0], sa_index0);
-
- if (PREDICT_FALSE(!sa_sess->sess))
- {
- int ret = create_sym_sess(sa0, sa_sess, 0);
- ASSERT(ret == 0);
- }
-
- sess = sa_sess->sess;
- qp_index = sa_sess->qp_index;
-
- ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0);
- cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops);
- ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED);
-
- cops_to_enq[qp_index][0] = cop;
- cops_to_enq[qp_index] += 1;
- n_cop_qp[qp_index] += 1;
- bi_to_enq[qp_index][0] = bi0;
- bi_to_enq[qp_index] += 1;
-
- rte_crypto_op_attach_sym_session(cop, sess);
-
- icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size;
- iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len;
-
- /* Convert vlib buffer to mbuf */
- mb0 = rte_mbuf_from_vlib_buffer(b0);
- mb0->data_len = b0->current_length;
- mb0->pkt_len = b0->current_length;
- mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data;
-
- /* Outer IP header has already been stripped */
- u16 payload_len = rte_pktmbuf_pkt_len(mb0) - sizeof (esp_header_t) -
- iv_size - icv_size;
-
- if ((payload_len & (BLOCK_SIZE - 1)) || (payload_len <= 0))
- {
- clib_warning ("payload %u not multiple of %d\n",
- payload_len, BLOCK_SIZE);
- vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
- ESP_DECRYPT_ERROR_BAD_LEN, 1);
- vec_add (vec_elt (cwm->qp_data, qp_index).free_cops, &cop, 1);
- bi_to_enq[qp_index] -= 1;
- cops_to_enq[qp_index] -= 1;
- n_cop_qp[qp_index] -= 1;
- to_next[0] = bi0;
- to_next += 1;
- n_left_to_next -= 1;
- goto trace;
- }
-
- struct rte_crypto_sym_op *sym_cop = (struct rte_crypto_sym_op *)(cop + 1);
-
- sym_cop->m_src = mb0;
- sym_cop->cipher.data.offset = sizeof (esp_header_t) + iv_size;
- sym_cop->cipher.data.length = payload_len;
-
- u8 *iv = rte_pktmbuf_mtod_offset(mb0, void*, sizeof (esp_header_t));
- dpdk_cop_priv_t * priv = (dpdk_cop_priv_t *)(sym_cop + 1);
-
- if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
- {
- dpdk_gcm_cnt_blk *icb = &priv->cb;
- icb->salt = sa0->salt;
- clib_memcpy(icb->iv, iv, 8);
- icb->cnt = clib_host_to_net_u32(1);
- sym_cop->cipher.iv.data = (u8 *)icb;
- sym_cop->cipher.iv.phys_addr = cop->phys_addr +
- (uintptr_t)icb - (uintptr_t)cop;
- sym_cop->cipher.iv.length = 16;
-
- u8 *aad = priv->aad;
- clib_memcpy(aad, iv - sizeof(esp_header_t), 8);
- sym_cop->auth.aad.data = aad;
- sym_cop->auth.aad.phys_addr = cop->phys_addr +
- (uintptr_t)aad - (uintptr_t)cop;
- if (sa0->use_esn)
- {
- *((u32*)&aad[8]) = sa0->seq_hi;
- sym_cop->auth.aad.length = 12;
- }
- else
- {
- sym_cop->auth.aad.length = 8;
- }
-
- sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*,
- rte_pktmbuf_pkt_len(mb0) - icv_size);
- sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0,
- rte_pktmbuf_pkt_len(mb0) - icv_size);
- sym_cop->auth.digest.length = icv_size;
-
- }
- else
- {
- sym_cop->cipher.iv.data = rte_pktmbuf_mtod_offset(mb0, void*,
- sizeof (esp_header_t));
- sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(mb0,
- sizeof (esp_header_t));
- sym_cop->cipher.iv.length = iv_size;
-
- if (sa0->use_esn)
- {
- dpdk_cop_priv_t* priv = (dpdk_cop_priv_t*) (sym_cop + 1);
- u8* payload_end = rte_pktmbuf_mtod_offset(
- mb0, u8*, sizeof(esp_header_t) + iv_size + payload_len);
-
- clib_memcpy (priv->icv, payload_end, icv_size);
- *((u32*) payload_end) = sa0->seq_hi;
- sym_cop->auth.data.offset = 0;
- sym_cop->auth.data.length = sizeof(esp_header_t) + iv_size
- + payload_len + sizeof(sa0->seq_hi);
- sym_cop->auth.digest.data = priv->icv;
- sym_cop->auth.digest.phys_addr = cop->phys_addr
- + (uintptr_t) priv->icv - (uintptr_t) cop;
- sym_cop->auth.digest.length = icv_size;
- }
- else
- {
- sym_cop->auth.data.offset = 0;
- sym_cop->auth.data.length = sizeof(esp_header_t) +
- iv_size + payload_len;
-
- sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*,
- rte_pktmbuf_pkt_len(mb0) - icv_size);
- sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0,
- rte_pktmbuf_pkt_len(mb0) - icv_size);
- sym_cop->auth.digest.length = icv_size;
- }
- }
-
-trace:
- if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->crypto_alg = sa0->crypto_alg;
- tr->integ_alg = sa0->integ_alg;
- }
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
- vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
- ESP_DECRYPT_ERROR_RX_PKTS,
- from_frame->n_vectors);
- crypto_qp_data_t *qpd;
- /* *INDENT-OFF* */
- vec_foreach_index (i, cwm->qp_data)
- {
- u32 enq;
-
- qpd = vec_elt_at_index(cwm->qp_data, i);
- enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id,
- qpd->cops, n_cop_qp[i]);
- qpd->inflights += enq;
-
- if (PREDICT_FALSE(enq < n_cop_qp[i]))
- {
- crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq);
- vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq);
-
- vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
- ESP_DECRYPT_ERROR_ENQ_FAIL,
- n_cop_qp[i] - enq);
- }
- }
- /* *INDENT-ON* */
-
- return from_frame->n_vectors;
-}
-
-VLIB_REGISTER_NODE (dpdk_esp_decrypt_node) = {
- .function = dpdk_esp_decrypt_node_fn,
- .name = "dpdk-esp-decrypt",
- .vector_size = sizeof (u32),
- .format_trace = format_esp_decrypt_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(esp_decrypt_error_strings),
- .error_strings = esp_decrypt_error_strings,
-
- .n_next_nodes = ESP_DECRYPT_N_NEXT,
- .next_nodes = {
-#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n,
- foreach_esp_decrypt_next
-#undef _
- },
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_node, dpdk_esp_decrypt_node_fn)
-
-/*
- * Decrypt Post Node
- */
-
-#define foreach_esp_decrypt_post_error \
- _(PKTS, "ESP post pkts")
-
-typedef enum {
-#define _(sym,str) ESP_DECRYPT_POST_ERROR_##sym,
- foreach_esp_decrypt_post_error
-#undef _
- ESP_DECRYPT_POST_N_ERROR,
-} esp_decrypt_post_error_t;
-
-static char * esp_decrypt_post_error_strings[] = {
-#define _(sym,string) string,
- foreach_esp_decrypt_post_error
-#undef _
-};
-
-vlib_node_registration_t dpdk_esp_decrypt_post_node;
-
-static u8 * format_esp_decrypt_post_trace (u8 * s, va_list * args)
-{
- return s;
-}
-
-static uword
-dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- u32 n_left_from, *from, *to_next = 0, next_index;
- ipsec_sa_t * sa0;
- u32 sa_index0 = ~0;
- ipsec_main_t *im = &ipsec_main;
- dpdk_esp_main_t *em = &dpdk_esp_main;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
-
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- esp_footer_t * f0;
- u32 bi0, next0, icv_size, iv_size;
- vlib_buffer_t * b0 = 0;
- ip4_header_t *ih4 = 0, *oh4 = 0;
- ip6_header_t *ih6 = 0, *oh6 = 0;
- u8 tunnel_mode = 1;
- u8 transport_ip6 = 0;
-
- next0 = ESP_DECRYPT_NEXT_DROP;
-
- bi0 = from[0];
- from += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- sa_index0 = vnet_buffer(b0)->ipsec.sad_index;
- sa0 = pool_elt_at_index (im->sad, sa_index0);
-
- to_next[0] = bi0;
- to_next += 1;
-
- icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size;
- iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len;
-
- if (sa0->use_anti_replay)
- {
- esp_header_t * esp0 = vlib_buffer_get_current (b0);
- u32 seq;
- seq = clib_host_to_net_u32(esp0->seq);
- if (PREDICT_TRUE(sa0->use_esn))
- esp_replay_advance_esn(sa0, seq);
- else
- esp_replay_advance(sa0, seq);
- }
-
- ih4 = (ip4_header_t *) (b0->data + sizeof(ethernet_header_t));
- vlib_buffer_advance (b0, sizeof (esp_header_t) + iv_size);
-
- b0->current_length -= (icv_size + 2);
- b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID;
- f0 = (esp_footer_t *) ((u8 *) vlib_buffer_get_current (b0) +
- b0->current_length);
- b0->current_length -= f0->pad_length;
-
- /* transport mode */
- if (PREDICT_FALSE(!sa0->is_tunnel && !sa0->is_tunnel_ip6))
- {
- tunnel_mode = 0;
-
- if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) != 0x40))
- {
- if (PREDICT_TRUE((ih4->ip_version_and_header_length & 0xF0) == 0x60))
- transport_ip6 = 1;
- else
- {
- clib_warning("next header: 0x%x", f0->next_header);
- vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
- ESP_DECRYPT_ERROR_NOT_IP, 1);
- goto trace;
- }
- }
- }
-
- if (PREDICT_TRUE (tunnel_mode))
- {
- if (PREDICT_TRUE(f0->next_header == IP_PROTOCOL_IP_IN_IP))
- next0 = ESP_DECRYPT_NEXT_IP4_INPUT;
- else if (f0->next_header == IP_PROTOCOL_IPV6)
- next0 = ESP_DECRYPT_NEXT_IP6_INPUT;
- else
- {
- clib_warning("next header: 0x%x", f0->next_header);
- vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index,
- ESP_DECRYPT_ERROR_DECRYPTION_FAILED,
- 1);
- goto trace;
- }
- }
- /* transport mode */
- else
- {
- if (PREDICT_FALSE(transport_ip6))
- {
- ih6 = (ip6_header_t *) (b0->data + sizeof(ethernet_header_t));
- vlib_buffer_advance (b0, -sizeof(ip6_header_t));
- oh6 = vlib_buffer_get_current (b0);
- memmove(oh6, ih6, sizeof(ip6_header_t));
-
- next0 = ESP_DECRYPT_NEXT_IP6_INPUT;
- oh6->protocol = f0->next_header;
- oh6->payload_length =
- clib_host_to_net_u16 (
- vlib_buffer_length_in_chain(vm, b0) -
- sizeof (ip6_header_t));
- }
- else
- {
- vlib_buffer_advance (b0, -sizeof(ip4_header_t));
- oh4 = vlib_buffer_get_current (b0);
- memmove(oh4, ih4, sizeof(ip4_header_t));
-
- next0 = ESP_DECRYPT_NEXT_IP4_INPUT;
- oh4->ip_version_and_header_length = 0x45;
- oh4->fragment_id = 0;
- oh4->flags_and_fragment_offset = 0;
- oh4->protocol = f0->next_header;
- oh4->length = clib_host_to_net_u16 (
- vlib_buffer_length_in_chain (vm, b0));
- oh4->checksum = ip4_header_checksum (oh4);
- }
- }
-
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32)~0;
-
-trace:
- if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- esp_decrypt_trace_t *tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->crypto_alg = sa0->crypto_alg;
- tr->integ_alg = sa0->integ_alg;
- }
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next, bi0, next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
- vlib_node_increment_counter (vm, dpdk_esp_decrypt_post_node.index,
- ESP_DECRYPT_POST_ERROR_PKTS,
- from_frame->n_vectors);
-
- return from_frame->n_vectors;
-}
-
-VLIB_REGISTER_NODE (dpdk_esp_decrypt_post_node) = {
- .function = dpdk_esp_decrypt_post_node_fn,
- .name = "dpdk-esp-decrypt-post",
- .vector_size = sizeof (u32),
- .format_trace = format_esp_decrypt_post_trace,
- .type = VLIB_NODE_TYPE_INTERNAL,
-
- .n_errors = ARRAY_LEN(esp_decrypt_post_error_strings),
- .error_strings = esp_decrypt_post_error_strings,
-
- .n_next_nodes = ESP_DECRYPT_N_NEXT,
- .next_nodes = {
-#define _(s,n) [ESP_DECRYPT_NEXT_##s] = n,
- foreach_esp_decrypt_next
-#undef _
- },
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_decrypt_post_node, dpdk_esp_decrypt_post_node_fn)
diff --git a/vnet/vnet/devices/dpdk/ipsec/esp_encrypt.c b/vnet/vnet/devices/dpdk/ipsec/esp_encrypt.c
deleted file mode 100644
index 10bb4616eef..00000000000
--- a/vnet/vnet/devices/dpdk/ipsec/esp_encrypt.c
+++ /dev/null
@@ -1,598 +0,0 @@
-/*
- * esp_encrypt.c : IPSec ESP encrypt node using DPDK Cryptodev
- *
- * Copyright (c) 2016 Intel and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vnet/vnet.h>
-#include <vnet/api_errno.h>
-#include <vnet/ip/ip.h>
-
-#include <vnet/ipsec/ipsec.h>
-#include <vnet/devices/dpdk/ipsec/ipsec.h>
-#include <vnet/devices/dpdk/ipsec/esp.h>
-
-#define foreach_esp_encrypt_next \
-_(DROP, "error-drop") \
-_(IP4_LOOKUP, "ip4-lookup") \
-_(IP6_LOOKUP, "ip6-lookup") \
-_(INTERFACE_OUTPUT, "interface-output")
-
-#define _(v, s) ESP_ENCRYPT_NEXT_##v,
-typedef enum
-{
- foreach_esp_encrypt_next
-#undef _
- ESP_ENCRYPT_N_NEXT,
-} esp_encrypt_next_t;
-
-#define foreach_esp_encrypt_error \
- _(RX_PKTS, "ESP pkts received") \
- _(SEQ_CYCLED, "sequence number cycled") \
- _(ENQ_FAIL, "Enqueue failed (buffer full)") \
- _(NO_CRYPTODEV, "Cryptodev not configured") \
- _(UNSUPPORTED, "Cipher/Auth not supported")
-
-
-typedef enum
-{
-#define _(sym,str) ESP_ENCRYPT_ERROR_##sym,
- foreach_esp_encrypt_error
-#undef _
- ESP_ENCRYPT_N_ERROR,
-} esp_encrypt_error_t;
-
-static char *esp_encrypt_error_strings[] = {
-#define _(sym,string) string,
- foreach_esp_encrypt_error
-#undef _
-};
-
-vlib_node_registration_t dpdk_esp_encrypt_node;
-
-typedef struct
-{
- u32 spi;
- u32 seq;
- ipsec_crypto_alg_t crypto_alg;
- ipsec_integ_alg_t integ_alg;
-} esp_encrypt_trace_t;
-
-/* packet trace format function */
-static u8 *
-format_esp_encrypt_trace (u8 * s, va_list * args)
-{
- CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
- CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- esp_encrypt_trace_t *t = va_arg (*args, esp_encrypt_trace_t *);
-
- s = format (s, "esp: spi %u seq %u crypto %U integrity %U",
- t->spi, t->seq,
- format_ipsec_crypto_alg, t->crypto_alg,
- format_ipsec_integ_alg, t->integ_alg);
- return s;
-}
-
-static uword
-dpdk_esp_encrypt_node_fn (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- u32 n_left_from, *from, *to_next, next_index;
- ipsec_main_t *im = &ipsec_main;
- u32 cpu_index = os_get_cpu_number ();
- dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
- dpdk_esp_main_t *em = &dpdk_esp_main;
- u32 i;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
-
- if (PREDICT_FALSE (!dcm->workers_main))
- {
- /* Likely there are not enough cryptodevs, so drop frame */
- vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index,
- ESP_ENCRYPT_ERROR_NO_CRYPTODEV,
- n_left_from);
- vlib_buffer_free (vm, from, n_left_from);
- return n_left_from;
- }
-
- crypto_worker_main_t *cwm = vec_elt_at_index (dcm->workers_main, cpu_index);
- u32 n_qps = vec_len (cwm->qp_data);
- struct rte_crypto_op **cops_to_enq[n_qps];
- u32 n_cop_qp[n_qps], *bi_to_enq[n_qps];
-
- for (i = 0; i < n_qps; i++)
- {
- bi_to_enq[i] = cwm->qp_data[i].bi;
- cops_to_enq[i] = cwm->qp_data[i].cops;
- }
-
- memset (n_cop_qp, 0, n_qps * sizeof (u32));
-
- crypto_alloc_cops ();
-
- next_index = ESP_ENCRYPT_NEXT_DROP;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0, next0;
- vlib_buffer_t *b0 = 0;
- u32 sa_index0;
- ipsec_sa_t *sa0;
- ip4_and_esp_header_t *ih0, *oh0 = 0;
- ip6_and_esp_header_t *ih6_0, *oh6_0 = 0;
- struct rte_mbuf *mb0 = 0;
- esp_footer_t *f0;
- u8 is_ipv6;
- u8 ip_hdr_size;
- u8 next_hdr_type;
- u8 transport_mode = 0;
- const int BLOCK_SIZE = 16;
- u32 iv_size;
- u16 orig_sz;
- crypto_sa_session_t *sa_sess;
- void *sess;
- struct rte_crypto_op *cop = 0;
- u16 qp_index;
-
- bi0 = from[0];
- from += 1;
- n_left_from -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- sa_index0 = vnet_buffer (b0)->ipsec.sad_index;
- sa0 = pool_elt_at_index (im->sad, sa_index0);
-
- if (PREDICT_FALSE (esp_seq_advance (sa0)))
- {
- clib_warning ("sequence number counter has cycled SPI %u",
- sa0->spi);
- vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index,
- ESP_ENCRYPT_ERROR_SEQ_CYCLED, 1);
- //TODO: rekey SA
- to_next[0] = bi0;
- to_next += 1;
- n_left_to_next -= 1;
- goto trace;
- }
-
- sa_sess = pool_elt_at_index (cwm->sa_sess_d[1], sa_index0);
- if (PREDICT_FALSE (!sa_sess->sess))
- {
- int ret = create_sym_sess (sa0, sa_sess, 1);
- ASSERT (ret == 0);
- }
-
- qp_index = sa_sess->qp_index;
- sess = sa_sess->sess;
-
- ASSERT (vec_len (vec_elt (cwm->qp_data, qp_index).free_cops) > 0);
- cop = vec_pop (vec_elt (cwm->qp_data, qp_index).free_cops);
- ASSERT (cop->status == RTE_CRYPTO_OP_STATUS_NOT_PROCESSED);
-
- cops_to_enq[qp_index][0] = cop;
- cops_to_enq[qp_index] += 1;
- n_cop_qp[qp_index] += 1;
- bi_to_enq[qp_index][0] = bi0;
- bi_to_enq[qp_index] += 1;
-
- ssize_t adv;
- iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len;
- ih0 = vlib_buffer_get_current (b0);
- orig_sz = b0->current_length;
- is_ipv6 = (ih0->ip4.ip_version_and_header_length & 0xF0) == 0x60;
- /* is ipv6 */
- if (PREDICT_TRUE (sa0->is_tunnel))
- {
- if (PREDICT_TRUE (!is_ipv6))
- adv = -sizeof (ip4_and_esp_header_t);
- else
- adv = -sizeof (ip6_and_esp_header_t);
- }
- else
- {
- adv = -sizeof (esp_header_t);
- if (PREDICT_TRUE (!is_ipv6))
- orig_sz -= sizeof (ip4_header_t);
- else
- orig_sz -= sizeof (ip6_header_t);
- }
-
- /*transport mode save the eth header before it is overwritten */
- if (PREDICT_FALSE (!sa0->is_tunnel))
- {
- ethernet_header_t *ieh0 = (ethernet_header_t *)
- ((u8 *) vlib_buffer_get_current (b0) -
- sizeof (ethernet_header_t));
- ethernet_header_t *oeh0 =
- (ethernet_header_t *) ((u8 *) ieh0 + (adv - iv_size));
- clib_memcpy (oeh0, ieh0, sizeof (ethernet_header_t));
- }
-
- vlib_buffer_advance (b0, adv - iv_size);
-
- /* XXX IP6/ip4 and IP4/IP6 not supported, only IP4/IP4 and IP6/IP6 */
-
- /* is ipv6 */
- if (PREDICT_FALSE (is_ipv6))
- {
- ih6_0 = (ip6_and_esp_header_t *) ih0;
- ip_hdr_size = sizeof (ip6_header_t);
- oh6_0 = vlib_buffer_get_current (b0);
-
- if (PREDICT_TRUE (sa0->is_tunnel))
- {
- next_hdr_type = IP_PROTOCOL_IPV6;
- oh6_0->ip6.ip_version_traffic_class_and_flow_label =
- ih6_0->ip6.ip_version_traffic_class_and_flow_label;
- }
- else
- {
- next_hdr_type = ih6_0->ip6.protocol;
- memmove (oh6_0, ih6_0, sizeof (ip6_header_t));
- }
-
- oh6_0->ip6.protocol = IP_PROTOCOL_IPSEC_ESP;
- oh6_0->ip6.hop_limit = 254;
- oh6_0->esp.spi = clib_net_to_host_u32 (sa0->spi);
- oh6_0->esp.seq = clib_net_to_host_u32 (sa0->seq);
- }
- else
- {
- ip_hdr_size = sizeof (ip4_header_t);
- oh0 = vlib_buffer_get_current (b0);
-
- if (PREDICT_TRUE (sa0->is_tunnel))
- {
- next_hdr_type = IP_PROTOCOL_IP_IN_IP;
- oh0->ip4.tos = ih0->ip4.tos;
- }
- else
- {
- next_hdr_type = ih0->ip4.protocol;
- memmove (oh0, ih0, sizeof (ip4_header_t));
- }
-
- oh0->ip4.ip_version_and_header_length = 0x45;
- oh0->ip4.fragment_id = 0;
- oh0->ip4.flags_and_fragment_offset = 0;
- oh0->ip4.ttl = 254;
- oh0->ip4.protocol = IP_PROTOCOL_IPSEC_ESP;
- oh0->esp.spi = clib_net_to_host_u32 (sa0->spi);
- oh0->esp.seq = clib_net_to_host_u32 (sa0->seq);
- }
-
- if (PREDICT_TRUE (sa0->is_tunnel && !sa0->is_tunnel_ip6))
- {
- oh0->ip4.src_address.as_u32 = sa0->tunnel_src_addr.ip4.as_u32;
- oh0->ip4.dst_address.as_u32 = sa0->tunnel_dst_addr.ip4.as_u32;
-
- /* in tunnel mode send it back to FIB */
- next0 = ESP_ENCRYPT_NEXT_IP4_LOOKUP;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- }
- else if (sa0->is_tunnel && sa0->is_tunnel_ip6)
- {
- oh6_0->ip6.src_address.as_u64[0] =
- sa0->tunnel_src_addr.ip6.as_u64[0];
- oh6_0->ip6.src_address.as_u64[1] =
- sa0->tunnel_src_addr.ip6.as_u64[1];
- oh6_0->ip6.dst_address.as_u64[0] =
- sa0->tunnel_dst_addr.ip6.as_u64[0];
- oh6_0->ip6.dst_address.as_u64[1] =
- sa0->tunnel_dst_addr.ip6.as_u64[1];
-
- /* in tunnel mode send it back to FIB */
- next0 = ESP_ENCRYPT_NEXT_IP6_LOOKUP;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- }
- else
- {
- next0 = ESP_ENCRYPT_NEXT_INTERFACE_OUTPUT;
- transport_mode = 1;
- }
-
- ASSERT (sa0->crypto_alg < IPSEC_CRYPTO_N_ALG);
- ASSERT (sa0->crypto_alg != IPSEC_CRYPTO_ALG_NONE);
-
- int blocks = 1 + (orig_sz + 1) / BLOCK_SIZE;
-
- /* pad packet in input buffer */
- u8 pad_bytes = BLOCK_SIZE * blocks - 2 - orig_sz;
- u8 i;
- u8 *padding = vlib_buffer_get_current (b0) + b0->current_length;
-
- for (i = 0; i < pad_bytes; ++i)
- padding[i] = i + 1;
-
- f0 = vlib_buffer_get_current (b0) + b0->current_length + pad_bytes;
- f0->pad_length = pad_bytes;
- f0->next_header = next_hdr_type;
- b0->current_length += pad_bytes + 2 +
- em->esp_integ_algs[sa0->integ_alg].trunc_size;
-
- vnet_buffer (b0)->sw_if_index[VLIB_RX] =
- vnet_buffer (b0)->sw_if_index[VLIB_RX];
- b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
-
- struct rte_crypto_sym_op *sym_cop;
- sym_cop = (struct rte_crypto_sym_op *) (cop + 1);
-
- dpdk_cop_priv_t *priv = (dpdk_cop_priv_t *) (sym_cop + 1);
-
- vnet_buffer (b0)->unused[0] = next0;
-
- mb0 = rte_mbuf_from_vlib_buffer (b0);
- mb0->data_len = b0->current_length;
- mb0->pkt_len = b0->current_length;
- mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data;
-
- rte_crypto_op_attach_sym_session (cop, sess);
-
- sym_cop->m_src = mb0;
-
- dpdk_gcm_cnt_blk *icb = &priv->cb;
- icb->salt = sa0->salt;
- icb->iv[0] = sa0->seq;
- icb->iv[1] = sa0->seq_hi;
-
- if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128)
- {
- icb->cnt = clib_host_to_net_u32 (1);
- clib_memcpy (vlib_buffer_get_current (b0) + ip_hdr_size +
- sizeof (esp_header_t), icb->iv, 8);
- sym_cop->cipher.data.offset =
- ip_hdr_size + sizeof (esp_header_t) + iv_size;
- sym_cop->cipher.data.length = BLOCK_SIZE * blocks;
- sym_cop->cipher.iv.length = 16;
- }
- else
- {
- sym_cop->cipher.data.offset =
- ip_hdr_size + sizeof (esp_header_t);
- sym_cop->cipher.data.length = BLOCK_SIZE * blocks + iv_size;
- sym_cop->cipher.iv.length = iv_size;
- }
-
- sym_cop->cipher.iv.data = (u8 *) icb;
- sym_cop->cipher.iv.phys_addr = cop->phys_addr + (uintptr_t) icb
- - (uintptr_t) cop;
-
-
- ASSERT (sa0->integ_alg < IPSEC_INTEG_N_ALG);
- ASSERT (sa0->integ_alg != IPSEC_INTEG_ALG_NONE);
-
- if (PREDICT_FALSE (sa0->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128))
- {
- u8 *aad = priv->aad;
- clib_memcpy (aad, vlib_buffer_get_current (b0) + ip_hdr_size,
- 8);
- sym_cop->auth.aad.data = aad;
- sym_cop->auth.aad.phys_addr = cop->phys_addr +
- (uintptr_t) aad - (uintptr_t) cop;
-
- if (PREDICT_FALSE (sa0->use_esn))
- {
- *((u32 *) & aad[8]) = sa0->seq_hi;
- sym_cop->auth.aad.length = 12;
- }
- else
- {
- sym_cop->auth.aad.length = 8;
- }
- }
- else
- {
- sym_cop->auth.data.offset = ip_hdr_size;
- sym_cop->auth.data.length = b0->current_length - ip_hdr_size
- - em->esp_integ_algs[sa0->integ_alg].trunc_size;
-
- if (PREDICT_FALSE (sa0->use_esn))
- {
- u8 *payload_end =
- vlib_buffer_get_current (b0) + b0->current_length;
- *((u32 *) payload_end) = sa0->seq_hi;
- sym_cop->auth.data.length += sizeof (sa0->seq_hi);
- }
- }
- sym_cop->auth.digest.data = vlib_buffer_get_current (b0) +
- b0->current_length -
- em->esp_integ_algs[sa0->integ_alg].trunc_size;
- sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset (mb0,
- b0->current_length
- -
- em->esp_integ_algs
- [sa0->integ_alg].trunc_size);
- sym_cop->auth.digest.length =
- em->esp_integ_algs[sa0->integ_alg].trunc_size;
-
-
- if (PREDICT_FALSE (is_ipv6))
- {
- oh6_0->ip6.payload_length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0) -
- sizeof (ip6_header_t));
- }
- else
- {
- oh0->ip4.length =
- clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, b0));
- oh0->ip4.checksum = ip4_header_checksum (&oh0->ip4);
- }
-
- if (transport_mode)
- vlib_buffer_advance (b0, -sizeof (ethernet_header_t));
-
- trace:
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- esp_encrypt_trace_t *tr =
- vlib_add_trace (vm, node, b0, sizeof (*tr));
- tr->spi = sa0->spi;
- tr->seq = sa0->seq - 1;
- tr->crypto_alg = sa0->crypto_alg;
- tr->integ_alg = sa0->integ_alg;
- }
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
- vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index,
- ESP_ENCRYPT_ERROR_RX_PKTS,
- from_frame->n_vectors);
- crypto_qp_data_t *qpd;
- /* *INDENT-OFF* */
- vec_foreach_index (i, cwm->qp_data)
- {
- u32 enq;
-
- qpd = vec_elt_at_index(cwm->qp_data, i);
- enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id,
- qpd->cops, n_cop_qp[i]);
- qpd->inflights += enq;
-
- if (PREDICT_FALSE(enq < n_cop_qp[i]))
- {
- crypto_free_cop (qpd, &qpd->cops[enq], n_cop_qp[i] - enq);
- vlib_buffer_free (vm, &qpd->bi[enq], n_cop_qp[i] - enq);
-
- vlib_node_increment_counter (vm, dpdk_esp_encrypt_node.index,
- ESP_ENCRYPT_ERROR_ENQ_FAIL,
- n_cop_qp[i] - enq);
- }
- }
- /* *INDENT-ON* */
-
- return from_frame->n_vectors;
-}
-
-VLIB_REGISTER_NODE (dpdk_esp_encrypt_node) =
-{
- .function = dpdk_esp_encrypt_node_fn,.name = "dpdk-esp-encrypt",.flags =
- VLIB_NODE_FLAG_IS_OUTPUT,.vector_size = sizeof (u32),.format_trace =
- format_esp_encrypt_trace,.n_errors =
- ARRAY_LEN (esp_encrypt_error_strings),.error_strings =
- esp_encrypt_error_strings,.n_next_nodes = 1,.next_nodes =
- {
- [ESP_ENCRYPT_NEXT_DROP] = "error-drop",}
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_node, dpdk_esp_encrypt_node_fn)
-/*
- * ESP Encrypt Post Node
- */
-#define foreach_esp_encrypt_post_error \
- _(PKTS, "ESP post pkts")
- typedef enum
- {
-#define _(sym,str) ESP_ENCRYPT_POST_ERROR_##sym,
- foreach_esp_encrypt_post_error
-#undef _
- ESP_ENCRYPT_POST_N_ERROR,
- } esp_encrypt_post_error_t;
-
- static char *esp_encrypt_post_error_strings[] = {
-#define _(sym,string) string,
- foreach_esp_encrypt_post_error
-#undef _
- };
-
-vlib_node_registration_t dpdk_esp_encrypt_post_node;
-
-static u8 *
-format_esp_encrypt_post_trace (u8 * s, va_list * args)
-{
- return s;
-}
-
-static uword
-dpdk_esp_encrypt_post_node_fn (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame)
-{
- u32 n_left_from, *from, *to_next = 0, next_index;
-
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
-
- next_index = node->cached_next_index;
-
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0, next0;
- vlib_buffer_t *b0 = 0;
-
- bi0 = from[0];
- from += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- to_next[0] = bi0;
- to_next += 1;
-
- next0 = vnet_buffer (b0)->unused[0];
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next, bi0,
- next0);
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- vlib_node_increment_counter (vm, dpdk_esp_encrypt_post_node.index,
- ESP_ENCRYPT_POST_ERROR_PKTS,
- from_frame->n_vectors);
-
- return from_frame->n_vectors;
-}
-
-VLIB_REGISTER_NODE (dpdk_esp_encrypt_post_node) =
-{
- .function = dpdk_esp_encrypt_post_node_fn,.name =
- "dpdk-esp-encrypt-post",.vector_size = sizeof (u32),.format_trace =
- format_esp_encrypt_post_trace,.type = VLIB_NODE_TYPE_INTERNAL,.n_errors =
- ARRAY_LEN (esp_encrypt_post_error_strings),.error_strings =
- esp_encrypt_post_error_strings,.n_next_nodes =
- ESP_ENCRYPT_N_NEXT,.next_nodes =
- {
-#define _(s,n) [ESP_ENCRYPT_NEXT_##s] = n,
- foreach_esp_encrypt_next
-#undef _
- }
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (dpdk_esp_encrypt_post_node,
- dpdk_esp_encrypt_post_node_fn)
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/dpdk/ipsec/ipsec.c b/vnet/vnet/devices/dpdk/ipsec/ipsec.c
deleted file mode 100644
index de253f02636..00000000000
--- a/vnet/vnet/devices/dpdk/ipsec/ipsec.c
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- * Copyright (c) 2016 Intel and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vnet/vnet.h>
-#include <vnet/ip/ip.h>
-#include <vnet/api_errno.h>
-#include <vnet/devices/dpdk/dpdk.h>
-#include <vnet/devices/dpdk/ipsec/ipsec.h>
-#include <vnet/devices/dpdk/ipsec/esp.h>
-#include <vnet/ipsec/ipsec.h>
-
-#define DPDK_CRYPTO_NB_OBJS 2048
-#define DPDK_CRYPTO_CACHE_SIZE 512
-#define DPDK_CRYPTO_PRIV_SIZE 128
-#define DPDK_CRYPTO_N_QUEUE_DESC 512
-#define DPDK_CRYPTO_NB_COPS (1024 * 4)
-
-/*
- * return:
- * -1: update failed
- * 0: already exist
- * 1: mapped
- */
-static int
-update_qp_data (crypto_worker_main_t * cwm,
- u8 cdev_id, u16 qp_id, u8 is_outbound, u16 * idx)
-{
- crypto_qp_data_t *qpd;
-
- /* *INDENT-OFF* */
- vec_foreach_index (*idx, cwm->qp_data)
- {
- qpd = vec_elt_at_index(cwm->qp_data, *idx);
-
- if (qpd->dev_id == cdev_id && qpd->qp_id == qp_id &&
- qpd->is_outbound == is_outbound)
- return 0;
- }
- /* *INDENT-ON* */
-
- vec_add2 (cwm->qp_data, qpd, 1);
-
- qpd->dev_id = cdev_id;
- qpd->qp_id = qp_id;
- qpd->is_outbound = is_outbound;
-
- return 1;
-}
-
-/*
- * return:
- * -1: error
- * 0: already exist
- * 1: mapped
- */
-static int
-add_mapping (crypto_worker_main_t * cwm,
- u8 cdev_id, u16 qp, u8 is_outbound,
- const struct rte_cryptodev_capabilities *cipher_cap,
- const struct rte_cryptodev_capabilities *auth_cap)
-{
- int mapped;
- u16 qp_index;
- uword key = 0, data, *ret;
- crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key;
-
- p_key->cipher_algo = (u8) cipher_cap->sym.cipher.algo;
- p_key->auth_algo = (u8) auth_cap->sym.auth.algo;
- p_key->is_outbound = is_outbound;
-
- ret = hash_get (cwm->algo_qp_map, key);
- if (ret)
- return 0;
-
- mapped = update_qp_data (cwm, cdev_id, qp, is_outbound, &qp_index);
- if (mapped < 0)
- return -1;
-
- data = (uword) qp_index;
-
- ret = hash_set (cwm->algo_qp_map, key, data);
- if (!ret)
- rte_panic ("Failed to insert hash table\n");
-
- return mapped;
-}
-
-/*
- * return:
- * 0: already exist
- * 1: mapped
- */
-static int
-add_cdev_mapping (crypto_worker_main_t * cwm,
- struct rte_cryptodev_info *dev_info, u8 cdev_id,
- u16 qp, u8 is_outbound)
-{
- const struct rte_cryptodev_capabilities *i, *j;
- u32 mapped = 0;
-
- for (i = dev_info->capabilities; i->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; i++)
- {
- if (i->sym.xform_type != RTE_CRYPTO_SYM_XFORM_CIPHER)
- continue;
-
- if (check_algo_is_supported (i, NULL) != 0)
- continue;
-
- for (j = dev_info->capabilities; j->op != RTE_CRYPTO_OP_TYPE_UNDEFINED;
- j++)
- {
- int status = 0;
-
- if (j->sym.xform_type != RTE_CRYPTO_SYM_XFORM_AUTH)
- continue;
-
- if (check_algo_is_supported (j, NULL) != 0)
- continue;
-
- status = add_mapping (cwm, cdev_id, qp, is_outbound, i, j);
- if (status == 1)
- mapped += 1;
- if (status < 0)
- return status;
- }
- }
-
- return mapped;
-}
-
-static int
-check_cryptodev_queues ()
-{
- u32 n_qs = 0;
- u8 cdev_id;
- u32 n_req_qs = 2;
-
- if (vlib_num_workers () > 0)
- n_req_qs = vlib_num_workers () * 2;
-
- for (cdev_id = 0; cdev_id < rte_cryptodev_count (); cdev_id++)
- {
- struct rte_cryptodev_info cdev_info;
-
- rte_cryptodev_info_get (cdev_id, &cdev_info);
-
- if (!
- (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING))
- continue;
-
- n_qs += cdev_info.max_nb_queue_pairs;
- }
-
- if (n_qs >= n_req_qs)
- return 0;
- else
- return -1;
-}
-
-static clib_error_t *
-dpdk_ipsec_init (vlib_main_t * vm)
-{
- dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
- vlib_thread_main_t *tm = vlib_get_thread_main ();
- struct rte_cryptodev_config dev_conf;
- struct rte_cryptodev_qp_conf qp_conf;
- struct rte_cryptodev_info cdev_info;
- struct rte_mempool *rmp;
- i32 dev_id, ret;
- u32 i, skip_master;
-
- if (check_cryptodev_queues () < 0)
- return clib_error_return (0, "not enough cryptodevs for ipsec");
-
- vec_alloc (dcm->workers_main, tm->n_vlib_mains);
- _vec_len (dcm->workers_main) = tm->n_vlib_mains;
-
- fprintf (stdout, "DPDK Cryptodevs info:\n");
- fprintf (stdout, "dev_id\tn_qp\tnb_obj\tcache_size\n");
- /* HW cryptodevs have higher dev_id, use HW first */
- for (dev_id = rte_cryptodev_count () - 1; dev_id >= 0; dev_id--)
- {
- u16 max_nb_qp, qp = 0;
- skip_master = vlib_num_workers () > 0;
-
- rte_cryptodev_info_get (dev_id, &cdev_info);
-
- if (!
- (cdev_info.feature_flags & RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING))
- continue;
-
- max_nb_qp = cdev_info.max_nb_queue_pairs;
-
- for (i = 0; i < tm->n_vlib_mains; i++)
- {
- u8 is_outbound;
- crypto_worker_main_t *cwm;
- uword *map;
-
- if (skip_master)
- {
- skip_master = 0;
- continue;
- }
-
- cwm = vec_elt_at_index (dcm->workers_main, i);
- map = cwm->algo_qp_map;
-
- if (!map)
- {
- map = hash_create (0, sizeof (crypto_worker_qp_key_t));
- if (!map)
- return clib_error_return (0, "unable to create hash table "
- "for worker %u",
- vlib_mains[i]->cpu_index);
- cwm->algo_qp_map = map;
- }
-
- for (is_outbound = 0; is_outbound < 2 && qp < max_nb_qp;
- is_outbound++)
- {
- int mapped = add_cdev_mapping (cwm, &cdev_info,
- dev_id, qp, is_outbound);
- if (mapped > 0)
- qp++;
-
- if (mapped < 0)
- return clib_error_return (0,
- "too many queues for one worker");
- }
- }
-
- if (qp == 0)
- continue;
-
- dev_conf.socket_id = rte_cryptodev_socket_id (dev_id);
- dev_conf.nb_queue_pairs = cdev_info.max_nb_queue_pairs;
- dev_conf.session_mp.nb_objs = DPDK_CRYPTO_NB_OBJS;
- dev_conf.session_mp.cache_size = DPDK_CRYPTO_CACHE_SIZE;
-
- ret = rte_cryptodev_configure (dev_id, &dev_conf);
- if (ret < 0)
- return clib_error_return (0, "cryptodev %u config error", dev_id);
-
- qp_conf.nb_descriptors = DPDK_CRYPTO_N_QUEUE_DESC;
- for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++)
- {
- ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf,
- dev_conf.socket_id);
- if (ret < 0)
- return clib_error_return (0, "cryptodev %u qp %u setup error",
- dev_id, qp);
- }
- fprintf (stdout, "%u\t%u\t%u\t%u\n", dev_id, dev_conf.nb_queue_pairs,
- DPDK_CRYPTO_NB_OBJS, DPDK_CRYPTO_CACHE_SIZE);
- }
-
- u32 socket_id = rte_socket_id ();
-
- vec_validate_aligned (dcm->cop_pools, socket_id, CLIB_CACHE_LINE_BYTES);
-
- /* pool already exists, nothing to do */
- if (dcm->cop_pools[socket_id])
- return 0;
-
- u8 *pool_name = format (0, "crypto_op_pool_socket%u%c", socket_id, 0);
-
- rmp = rte_crypto_op_pool_create ((char *) pool_name,
- RTE_CRYPTO_OP_TYPE_SYMMETRIC,
- DPDK_CRYPTO_NB_COPS *
- (1 + vlib_num_workers ()),
- DPDK_CRYPTO_CACHE_SIZE,
- DPDK_CRYPTO_PRIV_SIZE, socket_id);
- vec_free (pool_name);
-
- if (!rmp)
- return clib_error_return (0, "failed to allocate mempool on socket %u",
- socket_id);
- dcm->cop_pools[socket_id] = rmp;
-
- dpdk_esp_init ();
-
- if (vec_len (vlib_mains) == 0)
- vlib_node_set_state (&vlib_global_main, dpdk_crypto_input_node.index,
- VLIB_NODE_STATE_POLLING);
- else
- for (i = 1; i < tm->n_vlib_mains; i++)
- vlib_node_set_state (vlib_mains[i], dpdk_crypto_input_node.index,
- VLIB_NODE_STATE_POLLING);
-
- return 0;
-}
-
-VLIB_MAIN_LOOP_ENTER_FUNCTION (dpdk_ipsec_init);
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/dpdk/ipsec/ipsec.h b/vnet/vnet/devices/dpdk/ipsec/ipsec.h
deleted file mode 100644
index e6c7498c0d3..00000000000
--- a/vnet/vnet/devices/dpdk/ipsec/ipsec.h
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Copyright (c) 2016 Intel and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef __DPDK_IPSEC_H__
-#define __DPDK_IPSEC_H__
-
-#include <vnet/vnet.h>
-
-#undef always_inline
-#include <rte_crypto.h>
-#include <rte_cryptodev.h>
-
-#if CLIB_DEBUG > 0
-#define always_inline static inline
-#else
-#define always_inline static inline __attribute__ ((__always_inline__))
-#endif
-
-
-#define MAX_QP_PER_LCORE 16
-
-typedef struct
-{
- u32 salt;
- u32 iv[2];
- u32 cnt;
-} dpdk_gcm_cnt_blk;
-
-typedef struct
-{
- dpdk_gcm_cnt_blk cb;
- union
- {
- u8 aad[12];
- u8 icv[64];
- };
-} dpdk_cop_priv_t;
-
-typedef struct
-{
- u8 cipher_algo;
- u8 auth_algo;
- u8 is_outbound;
-} crypto_worker_qp_key_t;
-
-typedef struct
-{
- u16 dev_id;
- u16 qp_id;
- u16 is_outbound;
- i16 inflights;
- u32 bi[VLIB_FRAME_SIZE];
- struct rte_crypto_op *cops[VLIB_FRAME_SIZE];
- struct rte_crypto_op **free_cops;
-} crypto_qp_data_t;
-
-typedef struct
-{
- u8 qp_index;
- void *sess;
-} crypto_sa_session_t;
-
-typedef struct
-{
- crypto_sa_session_t *sa_sess_d[2];
- crypto_qp_data_t *qp_data;
- uword *algo_qp_map;
-} crypto_worker_main_t;
-
-typedef struct
-{
- struct rte_mempool **cop_pools;
- crypto_worker_main_t *workers_main;
-} dpdk_crypto_main_t;
-
-dpdk_crypto_main_t dpdk_crypto_main;
-
-extern vlib_node_registration_t dpdk_crypto_input_node;
-
-#define CRYPTO_N_FREE_COPS (VLIB_FRAME_SIZE * 3)
-
-static_always_inline void
-crypto_alloc_cops ()
-{
- dpdk_crypto_main_t *dcm = &dpdk_crypto_main;
- u32 cpu_index = os_get_cpu_number ();
- crypto_worker_main_t *cwm = &dcm->workers_main[cpu_index];
- unsigned socket_id = rte_socket_id ();
- crypto_qp_data_t *qpd;
-
- /* *INDENT-OFF* */
- vec_foreach (qpd, cwm->qp_data)
- {
- u32 l = vec_len (qpd->free_cops);
-
- if (PREDICT_FALSE (l < VLIB_FRAME_SIZE))
- {
- u32 n_alloc;
-
- if (PREDICT_FALSE (!qpd->free_cops))
- vec_alloc (qpd->free_cops, CRYPTO_N_FREE_COPS);
-
- n_alloc = rte_crypto_op_bulk_alloc (dcm->cop_pools[socket_id],
- RTE_CRYPTO_OP_TYPE_SYMMETRIC,
- &qpd->free_cops[l],
- CRYPTO_N_FREE_COPS - l - 1);
-
- _vec_len (qpd->free_cops) = l + n_alloc;
- }
- }
- /* *INDENT-ON* */
-}
-
-static_always_inline void
-crypto_free_cop (crypto_qp_data_t * qpd, struct rte_crypto_op **cops, u32 n)
-{
- u32 l = vec_len (qpd->free_cops);
-
- if (l + n >= CRYPTO_N_FREE_COPS)
- {
- l -= VLIB_FRAME_SIZE;
- rte_mempool_put_bulk (cops[0]->mempool,
- (void **) &qpd->free_cops[l], VLIB_FRAME_SIZE);
- }
- clib_memcpy (&qpd->free_cops[l], cops, sizeof (*cops) * n);
-
- _vec_len (qpd->free_cops) = l + n;
-}
-
-static_always_inline int
-check_algo_is_supported (const struct rte_cryptodev_capabilities *cap,
- char *name)
-{
- struct
- {
- uint8_t cipher_algo;
- enum rte_crypto_sym_xform_type type;
- union
- {
- enum rte_crypto_auth_algorithm auth;
- enum rte_crypto_cipher_algorithm cipher;
- };
- char *name;
- } supported_algo[] =
- {
- {
- .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher =
- RTE_CRYPTO_CIPHER_NULL,.name = "NULL"},
- {
- .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher =
- RTE_CRYPTO_CIPHER_AES_CBC,.name = "AES_CBC"},
- {
- .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher =
- RTE_CRYPTO_CIPHER_AES_CTR,.name = "AES_CTR"},
- {
- .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher =
- RTE_CRYPTO_CIPHER_3DES_CBC,.name = "3DES-CBC"},
- {
- .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.auth =
- RTE_CRYPTO_CIPHER_AES_GCM,.name = "AES-GCM"},
- {
- .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
- RTE_CRYPTO_AUTH_SHA1_HMAC,.name = "HMAC-SHA1"},
- {
- .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
- RTE_CRYPTO_AUTH_SHA256_HMAC,.name = "HMAC-SHA256"},
- {
- .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
- RTE_CRYPTO_AUTH_SHA384_HMAC,.name = "HMAC-SHA384"},
- {
- .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
- RTE_CRYPTO_AUTH_SHA512_HMAC,.name = "HMAC-SHA512"},
- {
- .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
- RTE_CRYPTO_AUTH_AES_XCBC_MAC,.name = "AES-XCBC-MAC"},
- {
- .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth =
- RTE_CRYPTO_AUTH_AES_GCM,.name = "AES-GCM"},
- {
- /* tail */
- .type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED},};
- uint32_t i = 0;
-
- if (cap->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC)
- return -1;
-
- while (supported_algo[i].type != RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED)
- {
- if (cap->sym.xform_type == supported_algo[i].type)
- {
- if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_CIPHER &&
- cap->sym.cipher.algo == supported_algo[i].cipher) ||
- (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH &&
- cap->sym.auth.algo == supported_algo[i].auth))
- {
- if (name)
- strcpy (name, supported_algo[i].name);
- return 0;
- }
- }
-
- i++;
- }
-
- return -1;
-}
-
-#endif /* __DPDK_IPSEC_H__ */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c
deleted file mode 100644
index e541cdbcbd2..00000000000
--- a/vnet/vnet/devices/dpdk/node.c
+++ /dev/null
@@ -1,687 +0,0 @@
-/*
- * Copyright (c) 2015 Cisco and/or its affiliates.
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <vnet/vnet.h>
-#include <vppinfra/vec.h>
-#include <vppinfra/error.h>
-#include <vppinfra/format.h>
-#include <vppinfra/xxhash.h>
-
-#include <vnet/ethernet/ethernet.h>
-#include <vnet/devices/dpdk/dpdk.h>
-#include <vnet/classify/vnet_classify.h>
-#include <vnet/mpls/packet.h>
-#include <vnet/handoff.h>
-#include <vnet/devices/devices.h>
-#include <vnet/feature/feature.h>
-
-#include "dpdk_priv.h"
-
-static char *dpdk_error_strings[] = {
-#define _(n,s) s,
- foreach_dpdk_error
-#undef _
-};
-
-always_inline int
-vlib_buffer_is_ip4 (vlib_buffer_t * b)
-{
- ethernet_header_t *h = (ethernet_header_t *) b->data;
- return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4));
-}
-
-always_inline int
-vlib_buffer_is_ip6 (vlib_buffer_t * b)
-{
- ethernet_header_t *h = (ethernet_header_t *) b->data;
- return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6));
-}
-
-always_inline int
-vlib_buffer_is_mpls (vlib_buffer_t * b)
-{
- ethernet_header_t *h = (ethernet_header_t *) b->data;
- return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST));
-}
-
-#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0)
-/* New ol_flags bits added in DPDK-16.11 */
-#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7)
-#endif
-
-always_inline u32
-dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0)
-{
- if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0)))
- if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0))
- return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
- else
- return VNET_DEVICE_INPUT_NEXT_IP4_INPUT;
- else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0)))
- return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
- else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
- return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
- else
- return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
-}
-
-always_inline int
-dpdk_mbuf_is_vlan (struct rte_mbuf *mb)
-{
-#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0)
- return (mb->packet_type & RTE_PTYPE_L2_ETHER_VLAN) ==
- RTE_PTYPE_L2_ETHER_VLAN;
-#else
- return
- (mb->ol_flags &
- (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) ==
- PKT_RX_VLAN_PKT;
-#endif
-}
-
-always_inline int
-dpdk_mbuf_is_ip4 (struct rte_mbuf *mb)
-{
- return RTE_ETH_IS_IPV4_HDR (mb->packet_type) != 0;
-}
-
-always_inline int
-dpdk_mbuf_is_ip6 (struct rte_mbuf *mb)
-{
- return RTE_ETH_IS_IPV6_HDR (mb->packet_type) != 0;
-}
-
-always_inline u32
-dpdk_rx_next_from_mb (struct rte_mbuf * mb, vlib_buffer_t * b0)
-{
- if (PREDICT_FALSE (dpdk_mbuf_is_vlan (mb)))
- return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- else if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb)))
- return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT;
- else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb)))
- return VNET_DEVICE_INPUT_NEXT_IP6_INPUT;
- else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0)))
- return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT;
- else
- return dpdk_rx_next_from_etype (mb, b0);
-}
-
-always_inline void
-dpdk_rx_error_from_mb (struct rte_mbuf *mb, u32 * next, u8 * error)
-{
- if (mb->ol_flags & PKT_RX_IP_CKSUM_BAD)
- {
- *error = DPDK_ERROR_IP_CHECKSUM_ERROR;
- *next = VNET_DEVICE_INPUT_NEXT_DROP;
- }
- else
- *error = DPDK_ERROR_NONE;
-}
-
-void
-dpdk_rx_trace (dpdk_main_t * dm,
- vlib_node_runtime_t * node,
- dpdk_device_t * xd,
- u16 queue_id, u32 * buffers, uword n_buffers)
-{
- vlib_main_t *vm = vlib_get_main ();
- u32 *b, n_left;
- u32 next0;
-
- n_left = n_buffers;
- b = buffers;
-
- while (n_left >= 1)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- dpdk_rx_dma_trace_t *t0;
- struct rte_mbuf *mb;
- u8 error0;
-
- bi0 = b[0];
- n_left -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
- mb = rte_mbuf_from_vlib_buffer (b0);
-
- if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
- next0 = xd->per_interface_next_index;
- else if (PREDICT_TRUE
- ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0))
- next0 = dpdk_rx_next_from_mb (mb, b0);
- else
- next0 = dpdk_rx_next_from_etype (mb, b0);
-
- dpdk_rx_error_from_mb (mb, &next0, &error0);
-
- vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0);
- t0 = vlib_add_trace (vm, node, b0, sizeof (t0[0]));
- t0->queue_index = queue_id;
- t0->device_index = xd->device_index;
- t0->buffer_index = bi0;
-
- clib_memcpy (&t0->mb, mb, sizeof (t0->mb));
- clib_memcpy (&t0->buffer, b0, sizeof (b0[0]) - sizeof (b0->pre_data));
- clib_memcpy (t0->buffer.pre_data, b0->data,
- sizeof (t0->buffer.pre_data));
- clib_memcpy (&t0->data, mb->buf_addr + mb->data_off, sizeof (t0->data));
-
- b += 1;
- }
-}
-
-static inline u32
-dpdk_rx_burst (dpdk_main_t * dm, dpdk_device_t * xd, u16 queue_id)
-{
- u32 n_buffers;
- u32 n_left;
- u32 n_this_chunk;
-
- n_left = VLIB_FRAME_SIZE;
- n_buffers = 0;
-
- if (PREDICT_TRUE (xd->flags & DPDK_DEVICE_FLAG_PMD))
- {
- while (n_left)
- {
- n_this_chunk = rte_eth_rx_burst (xd->device_index, queue_id,
- xd->rx_vectors[queue_id] +
- n_buffers, n_left);
- n_buffers += n_this_chunk;
- n_left -= n_this_chunk;
-
- /* Empirically, DPDK r1.8 produces vectors w/ 32 or fewer elts */
- if (n_this_chunk < 32)
- break;
- }
- }
- else
- {
- ASSERT (0);
- }
-
- return n_buffers;
-}
-
-
-static_always_inline void
-dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b,
- struct rte_mbuf *mb, vlib_buffer_free_list_t * fl)
-{
- u8 nb_seg = 1;
- struct rte_mbuf *mb_seg = 0;
- vlib_buffer_t *b_seg, *b_chain = 0;
- mb_seg = mb->next;
- b_chain = b;
-
- while ((mb->nb_segs > 1) && (nb_seg < mb->nb_segs))
- {
- ASSERT (mb_seg != 0);
-
- b_seg = vlib_buffer_from_rte_mbuf (mb_seg);
- vlib_buffer_init_for_free_list (b_seg, fl);
-
- ASSERT ((b_seg->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
- ASSERT (b_seg->current_data == 0);
-
- /*
- * The driver (e.g. virtio) may not put the packet data at the start
- * of the segment, so don't assume b_seg->current_data == 0 is correct.
- */
- b_seg->current_data =
- (mb_seg->buf_addr + mb_seg->data_off) - (void *) b_seg->data;
-
- b_seg->current_length = mb_seg->data_len;
- b->total_length_not_including_first_buffer += mb_seg->data_len;
-
- b_chain->flags |= VLIB_BUFFER_NEXT_PRESENT;
- b_chain->next_buffer = vlib_get_buffer_index (vm, b_seg);
-
- b_chain = b_seg;
- mb_seg = mb_seg->next;
- nb_seg++;
- }
-}
-
-static_always_inline void
-dpdk_prefetch_buffer (struct rte_mbuf *mb)
-{
- vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
- CLIB_PREFETCH (mb, CLIB_CACHE_LINE_BYTES, LOAD);
- CLIB_PREFETCH (b, CLIB_CACHE_LINE_BYTES, STORE);
-}
-
-/*
- * This function is used when there are no worker threads.
- * The main thread performs IO and forwards the packets.
- */
-static_always_inline u32
-dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd,
- vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id)
-{
- u32 n_buffers;
- u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT;
- u32 n_left_to_next, *to_next;
- u32 mb_index;
- vlib_main_t *vm = vlib_get_main ();
- uword n_rx_bytes = 0;
- u32 n_trace, trace_cnt __attribute__ ((unused));
- vlib_buffer_free_list_t *fl;
- u32 buffer_flags_template;
-
- if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0)
- return 0;
-
- n_buffers = dpdk_rx_burst (dm, xd, queue_id);
-
- if (n_buffers == 0)
- {
- return 0;
- }
-
- buffer_flags_template = dm->buffer_flags_template;
-
- vec_reset_length (xd->d_trace_buffers[cpu_index]);
- trace_cnt = n_trace = vlib_get_trace_count (vm, node);
-
- if (n_trace > 0)
- {
- u32 n = clib_min (n_trace, n_buffers);
- mb_index = 0;
-
- while (n--)
- {
- struct rte_mbuf *mb = xd->rx_vectors[queue_id][mb_index++];
- vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb);
- vec_add1 (xd->d_trace_buffers[cpu_index],
- vlib_get_buffer_index (vm, b));
- }
- }
-
- fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
-
- mb_index = 0;
-
- while (n_buffers > 0)
- {
- vlib_buffer_t *b0, *b1, *b2, *b3;
- u32 bi0, next0, l3_offset0;
- u32 bi1, next1, l3_offset1;
- u32 bi2, next2, l3_offset2;
- u32 bi3, next3, l3_offset3;
- u8 error0, error1, error2, error3;
- u64 or_ol_flags;
-
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
- while (n_buffers > 8 && n_left_to_next > 4)
- {
- struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index];
- struct rte_mbuf *mb1 = xd->rx_vectors[queue_id][mb_index + 1];
- struct rte_mbuf *mb2 = xd->rx_vectors[queue_id][mb_index + 2];
- struct rte_mbuf *mb3 = xd->rx_vectors[queue_id][mb_index + 3];
-
- dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 4]);
- dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 5]);
- dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 6]);
- dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 7]);
-
- if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
- {
- if (PREDICT_FALSE (mb0->nb_segs > 1))
- dpdk_prefetch_buffer (mb0->next);
- if (PREDICT_FALSE (mb1->nb_segs > 1))
- dpdk_prefetch_buffer (mb1->next);
- if (PREDICT_FALSE (mb2->nb_segs > 1))
- dpdk_prefetch_buffer (mb2->next);
- if (PREDICT_FALSE (mb3->nb_segs > 1))
- dpdk_prefetch_buffer (mb3->next);
- }
-
- ASSERT (mb0);
- ASSERT (mb1);
- ASSERT (mb2);
- ASSERT (mb3);
-
- or_ol_flags = (mb0->ol_flags | mb1->ol_flags |
- mb2->ol_flags | mb3->ol_flags);
- b0 = vlib_buffer_from_rte_mbuf (mb0);
- b1 = vlib_buffer_from_rte_mbuf (mb1);
- b2 = vlib_buffer_from_rte_mbuf (mb2);
- b3 = vlib_buffer_from_rte_mbuf (mb3);
-
- vlib_buffer_init_for_free_list (b0, fl);
- vlib_buffer_init_for_free_list (b1, fl);
- vlib_buffer_init_for_free_list (b2, fl);
- vlib_buffer_init_for_free_list (b3, fl);
-
- bi0 = vlib_get_buffer_index (vm, b0);
- bi1 = vlib_get_buffer_index (vm, b1);
- bi2 = vlib_get_buffer_index (vm, b2);
- bi3 = vlib_get_buffer_index (vm, b3);
-
- to_next[0] = bi0;
- to_next[1] = bi1;
- to_next[2] = bi2;
- to_next[3] = bi3;
- to_next += 4;
- n_left_to_next -= 4;
-
- if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
- {
- next0 = next1 = next2 = next3 = xd->per_interface_next_index;
- }
- else if (PREDICT_TRUE
- ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0))
- {
- next0 = dpdk_rx_next_from_mb (mb0, b0);
- next1 = dpdk_rx_next_from_mb (mb1, b1);
- next2 = dpdk_rx_next_from_mb (mb2, b2);
- next3 = dpdk_rx_next_from_mb (mb3, b3);
- }
- else
- {
- next0 = dpdk_rx_next_from_etype (mb0, b0);
- next1 = dpdk_rx_next_from_etype (mb1, b1);
- next2 = dpdk_rx_next_from_etype (mb2, b2);
- next3 = dpdk_rx_next_from_etype (mb3, b3);
- }
-
- if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD))
- {
- dpdk_rx_error_from_mb (mb0, &next0, &error0);
- dpdk_rx_error_from_mb (mb1, &next1, &error1);
- dpdk_rx_error_from_mb (mb2, &next2, &error2);
- dpdk_rx_error_from_mb (mb3, &next3, &error3);
- b0->error = node->errors[error0];
- b1->error = node->errors[error1];
- b2->error = node->errors[error2];
- b3->error = node->errors[error3];
- }
- else
- {
- b0->error = b1->error = node->errors[DPDK_ERROR_NONE];
- b2->error = b3->error = node->errors[DPDK_ERROR_NONE];
- }
-
- l3_offset0 = device_input_next_node_advance[next0];
- l3_offset1 = device_input_next_node_advance[next1];
- l3_offset2 = device_input_next_node_advance[next2];
- l3_offset3 = device_input_next_node_advance[next3];
-
- b0->current_data = l3_offset0 + mb0->data_off;
- b1->current_data = l3_offset1 + mb1->data_off;
- b2->current_data = l3_offset2 + mb2->data_off;
- b3->current_data = l3_offset3 + mb3->data_off;
-
- b0->current_data -= RTE_PKTMBUF_HEADROOM;
- b1->current_data -= RTE_PKTMBUF_HEADROOM;
- b2->current_data -= RTE_PKTMBUF_HEADROOM;
- b3->current_data -= RTE_PKTMBUF_HEADROOM;
-
- b0->current_length = mb0->data_len - l3_offset0;
- b1->current_length = mb1->data_len - l3_offset1;
- b2->current_length = mb2->data_len - l3_offset2;
- b3->current_length = mb3->data_len - l3_offset3;
-
- b0->flags = buffer_flags_template;
- b1->flags = buffer_flags_template;
- b2->flags = buffer_flags_template;
- b3->flags = buffer_flags_template;
-
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
- vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
- vnet_buffer (b2)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
- vnet_buffer (b3)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
-
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- vnet_buffer (b2)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- vnet_buffer (b3)->sw_if_index[VLIB_TX] = (u32) ~ 0;
-
- n_rx_bytes += mb0->pkt_len;
- n_rx_bytes += mb1->pkt_len;
- n_rx_bytes += mb2->pkt_len;
- n_rx_bytes += mb3->pkt_len;
-
- /* Process subsequent segments of multi-segment packets */
- if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG)
- {
- dpdk_process_subseq_segs (vm, b0, mb0, fl);
- dpdk_process_subseq_segs (vm, b1, mb1, fl);
- dpdk_process_subseq_segs (vm, b2, mb2, fl);
- dpdk_process_subseq_segs (vm, b3, mb3, fl);
- }
-
- /*
- * Turn this on if you run into
- * "bad monkey" contexts, and you want to know exactly
- * which nodes they've visited... See main.c...
- */
- VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
- VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b1);
- VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b2);
- VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b3);
-
- /* Do we have any driver RX features configured on the interface? */
- vnet_feature_start_device_input_x4 (xd->vlib_sw_if_index,
- &next0, &next1, &next2, &next3,
- b0, b1, b2, b3,
- l3_offset0, l3_offset1,
- l3_offset2, l3_offset3);
-
- vlib_validate_buffer_enqueue_x4 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, bi1, bi2, bi3,
- next0, next1, next2, next3);
- n_buffers -= 4;
- mb_index += 4;
- }
- while (n_buffers > 0 && n_left_to_next > 0)
- {
- struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index];
-
- ASSERT (mb0);
-
- b0 = vlib_buffer_from_rte_mbuf (mb0);
-
- /* Prefetch one next segment if it exists. */
- if (PREDICT_FALSE (mb0->nb_segs > 1))
- dpdk_prefetch_buffer (mb0->next);
-
- vlib_buffer_init_for_free_list (b0, fl);
-
- bi0 = vlib_get_buffer_index (vm, b0);
-
- to_next[0] = bi0;
- to_next++;
- n_left_to_next--;
-
- if (PREDICT_FALSE (xd->per_interface_next_index != ~0))
- next0 = xd->per_interface_next_index;
- else if (PREDICT_TRUE
- ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0))
- next0 = dpdk_rx_next_from_mb (mb0, b0);
- else
- next0 = dpdk_rx_next_from_etype (mb0, b0);
-
- dpdk_rx_error_from_mb (mb0, &next0, &error0);
- b0->error = node->errors[error0];
-
- l3_offset0 = device_input_next_node_advance[next0];
-
- b0->current_data = l3_offset0;
- b0->current_data += mb0->data_off - RTE_PKTMBUF_HEADROOM;
- b0->current_length = mb0->data_len - l3_offset0;
-
- b0->flags = buffer_flags_template;
-
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index;
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
- n_rx_bytes += mb0->pkt_len;
-
- /* Process subsequent segments of multi-segment packets */
- dpdk_process_subseq_segs (vm, b0, mb0, fl);
-
- /*
- * Turn this on if you run into
- * "bad monkey" contexts, and you want to know exactly
- * which nodes they've visited... See main.c...
- */
- VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0);
-
- /* Do we have any driver RX features configured on the interface? */
- vnet_feature_start_device_input_x1 (xd->vlib_sw_if_index, &next0,
- b0, l3_offset0);
-
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- n_buffers--;
- mb_index++;
- }
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
- }
-
- if (PREDICT_FALSE (vec_len (xd->d_trace_buffers[cpu_index]) > 0))
- {
- dpdk_rx_trace (dm, node, xd, queue_id, xd->d_trace_buffers[cpu_index],
- vec_len (xd->d_trace_buffers[cpu_index]));
- vlib_set_trace_count (vm, node, n_trace -
- vec_len (xd->d_trace_buffers[cpu_index]));
- }
-
- vlib_increment_combined_counter
- (vnet_get_main ()->interface_main.combined_sw_if_counters
- + VNET_INTERFACE_COUNTER_RX,
- cpu_index, xd->vlib_sw_if_index, mb_index, n_rx_bytes);
-
- dpdk_worker_t *dw = vec_elt_at_index (dm->workers, cpu_index);
- dw->aggregate_rx_packets += mb_index;
-
- return mb_index;
-}
-
-static inline void
-poll_rate_limit (dpdk_main_t * dm)
-{
- /* Limit the poll rate by sleeping for N msec between polls */
- if (PREDICT_FALSE (dm->poll_sleep != 0))
- {
- struct timespec ts, tsrem;
-
- ts.tv_sec = 0;
- ts.tv_nsec = 1000 * 1000 * dm->poll_sleep; /* 1ms */
-
- while (nanosleep (&ts, &tsrem) < 0)
- {
- ts = tsrem;
- }
- }
-}
-
-/** \brief Main DPDK input node
- @node dpdk-input
-
- This is the main DPDK input node: across each assigned interface,
- call rte_eth_rx_burst(...) or similar to obtain a vector of
- packets to process. Handle early packet discard. Derive @c
- vlib_buffer_t metadata from <code>struct rte_mbuf</code> metadata,
- Depending on the resulting metadata: adjust <code>b->current_data,
- b->current_length </code> and dispatch directly to
- ip4-input-no-checksum, or ip6-input. Trace the packet if required.
-
- @param vm vlib_main_t corresponding to the current thread
- @param node vlib_node_runtime_t
- @param f vlib_frame_t input-node, not used.
-
- @par Graph mechanics: buffer metadata, next index usage
-
- @em Uses:
- - <code>struct rte_mbuf mb->ol_flags</code>
- - PKT_RX_IP_CKSUM_BAD
- - <code> RTE_ETH_IS_xxx_HDR(mb->packet_type) </code>
- - packet classification result
-
- @em Sets:
- - <code>b->error</code> if the packet is to be dropped immediately
- - <code>b->current_data, b->current_length</code>
- - adjusted as needed to skip the L2 header in direct-dispatch cases
- - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code>
- - rx interface sw_if_index
- - <code>vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0</code>
- - required by ipX-lookup
- - <code>b->flags</code>
- - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc.
-
- <em>Next Nodes:</em>
- - Static arcs to: error-drop, ethernet-input,
- ip4-input-no-checksum, ip6-input, mpls-input
- - per-interface redirection, controlled by
- <code>xd->per_interface_next_index</code>
-*/
-
-static uword
-dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f)
-{
- dpdk_main_t *dm = &dpdk_main;
- dpdk_device_t *xd;
- uword n_rx_packets = 0;
- dpdk_device_and_queue_t *dq;
- u32 cpu_index = os_get_cpu_number ();
-
- /*
- * Poll all devices on this cpu for input/interrupts.
- */
- /* *INDENT-OFF* */
- vec_foreach (dq, dm->devices_by_cpu[cpu_index])
- {
- xd = vec_elt_at_index(dm->devices, dq->device);
- n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id);
- }
- /* *INDENT-ON* */
-
- poll_rate_limit (dm);
-
- return n_rx_packets;
-}
-
-/* *INDENT-OFF* */
-VLIB_REGISTER_NODE (dpdk_input_node) = {
- .function = dpdk_input,
- .type = VLIB_NODE_TYPE_INPUT,
- .name = "dpdk-input",
- .sibling_of = "device-input",
-
- /* Will be enabled if/when hardware is detected. */
- .state = VLIB_NODE_STATE_DISABLED,
-
- .format_buffer = format_ethernet_header_with_length,
- .format_trace = format_dpdk_rx_dma_trace,
-
- .n_errors = DPDK_N_ERROR,
- .error_strings = dpdk_error_strings,
-};
-
-VLIB_NODE_FUNCTION_MULTIARCH (dpdk_input_node, dpdk_input);
-/* *INDENT-ON* */
-
-/*
- * fd.io coding-style-patch-verification: ON
- *
- * Local Variables:
- * eval: (c-set-style "gnu")
- * End:
- */
diff --git a/vnet/vnet/devices/dpdk/qos_doc.md b/vnet/vnet/devices/dpdk/qos_doc.md
deleted file mode 100644
index 9bd0659d616..00000000000
--- a/vnet/vnet/devices/dpdk/qos_doc.md
+++ /dev/null
@@ -1,404 +0,0 @@
-# QoS Hierarchical Scheduler {#qos_doc}
-
-The Quality-of-Service (QoS) scheduler performs egress-traffic management by
-prioritizing the transmission of the packets of different type services and
-subcribers based on the Service Level Agreements (SLAs). The QoS scheduler can
-be enabled on one or more NIC output interfaces depending upon the
-requirement.
-
-
-## Overview
-
-The QoS schdeuler supports a number of scheduling and shaping levels which
-construct hierarchical-tree. The first level in the hierarchy is port (i.e.
-the physical interface) that constitutes the root node of the tree. The
-subsequent level is subport which represents the group of the
-users/subscribers. The individual user/subscriber is represented by the pipe
-at the next level. Each user can have different traffic type based on the
-criteria of specific loss rate, jitter, and latency. These traffic types are
-represented at the traffic-class level in the form of different traffic-
-classes. The last level contains number of queues which are grouped together
-to host the packets of the specific class type traffic.
-
-The QoS scheduler implementation requires flow classification, enqueue and
-dequeue operations. The flow classification is mandatory stage for HQoS where
-incoming packets are classified by mapping the packet fields information to
-5-tuple (HQoS subport, pipe, traffic class, queue within traffic class, and
-color) and storing that information in mbuf sched field. The enqueue operation
-uses this information to determine the queue for storing the packet, and at
-this stage, if the specific queue is full, QoS drops the packet. The dequeue
-operation consists of scheduling the packet based on its length and available
-credits, and handing over the scheduled packet to the output interface.
-
-For more information on QoS Scheduler, please refer DPDK Programmer's Guide-
-http://dpdk.org/doc/guides/prog_guide/qos_framework.html
-
-
-### QoS Schdeuler Parameters
-
-Following illustrates the default HQoS configuration for each 10GbE output
-port:
-
-Single subport (subport 0):
- - Subport rate set to 100% of port rate
- - Each of the 4 traffic classes has rate set to 100% of port rate
-
-4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration:
- - Pipe rate set to 1/4K of port rate
- - Each of the 4 traffic classes has rate set to 100% of pipe rate
- - Within each traffic class, the byte-level WRR weights for the 4 queues are set to 1:1:1:1
-
-
-#### Port configuration
-
-```
-port {
- rate 1250000000 /* Assuming 10GbE port */
- frame_overhead 24 /* Overhead fields per Ethernet frame:
- * 7B (Preamble) +
- * 1B (Start of Frame Delimiter (SFD)) +
- * 4B (Frame Check Sequence (FCS)) +
- * 12B (Inter Frame Gap (IFG))
- */
- mtu 1522 /* Assuming Ethernet/IPv4 pkt (FCS not included) */
- n_subports_per_port 1 /* Number of subports per output interface */
- n_pipes_per_subport 4096 /* Number of pipes (users/subscribers) */
- queue_sizes 64 64 64 64 /* Packet queue size for each traffic class.
- * All queues within the same pipe traffic class
- * have the same size. Queues from different
- * pipes serving the same traffic class have
- * the same size. */
-}
-```
-
-
-#### Subport configuration
-
-```
-subport 0 {
- tb_rate 1250000000 /* Subport level token bucket rate (bytes per second) */
- tb_size 1000000 /* Subport level token bucket size (bytes) */
- tc0_rate 1250000000 /* Subport level token bucket rate for traffic class 0 (bytes per second) */
- tc1_rate 1250000000 /* Subport level token bucket rate for traffic class 1 (bytes per second) */
- tc2_rate 1250000000 /* Subport level token bucket rate for traffic class 2 (bytes per second) */
- tc3_rate 1250000000 /* Subport level token bucket rate for traffic class 3 (bytes per second) */
- tc_period 10 /* Time interval for refilling the token bucket associated with traffic class (Milliseconds) */
- pipe 0 4095 profile 0 /* pipes (users/subscribers) configured with pipe profile 0 */
-}
-```
-
-
-#### Pipe configuration
-
-```
-pipe_profile 0 {
- tb_rate 305175 /* Pipe level token bucket rate (bytes per second) */
- tb_size 1000000 /* Pipe level token bucket size (bytes) */
- tc0_rate 305175 /* Pipe level token bucket rate for traffic class 0 (bytes per second) */
- tc1_rate 305175 /* Pipe level token bucket rate for traffic class 1 (bytes per second) */
- tc2_rate 305175 /* Pipe level token bucket rate for traffic class 2 (bytes per second) */
- tc3_rate 305175 /* Pipe level token bucket rate for traffic class 3 (bytes per second) */
- tc_period 40 /* Time interval for refilling the token bucket associated with traffic class at pipe level (Milliseconds) */
- tc3_oversubscription_weight 1 /* Weight traffic class 3 oversubscription */
- tc0_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 0 */
- tc1_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 1 */
- tc2_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 2 */
- tc3_wrr_weights 1 1 1 1 /* Pipe queues WRR weights for traffic class 3 */
-}
-```
-
-
-#### Random Early Detection (RED) parameters per traffic class and color (Green / Yellow / Red)
-
-```
-red {
- tc0_wred_min 48 40 32 /* Minimum threshold for traffic class 0 queue (min_th) in number of packets */
- tc0_wred_max 64 64 64 /* Maximum threshold for traffic class 0 queue (max_th) in number of packets */
- tc0_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 0 queue (maxp = 1 / maxp_inv) */
- tc0_wred_weight 9 9 9 /* Traffic Class 0 queue weight */
- tc1_wred_min 48 40 32 /* Minimum threshold for traffic class 1 queue (min_th) in number of packets */
- tc1_wred_max 64 64 64 /* Maximum threshold for traffic class 1 queue (max_th) in number of packets */
- tc1_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 1 queue (maxp = 1 / maxp_inv) */
- tc1_wred_weight 9 9 9 /* Traffic Class 1 queue weight */
- tc2_wred_min 48 40 32 /* Minimum threshold for traffic class 2 queue (min_th) in number of packets */
- tc2_wred_max 64 64 64 /* Maximum threshold for traffic class 2 queue (max_th) in number of packets */
- tc2_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 2 queue (maxp = 1 / maxp_inv) */
- tc2_wred_weight 9 9 9 /* Traffic Class 2 queue weight */
- tc3_wred_min 48 40 32 /* Minimum threshold for traffic class 3 queue (min_th) in number of packets */
- tc3_wred_max 64 64 64 /* Maximum threshold for traffic class 3 queue (max_th) in number of packets */
- tc3_wred_inv_prob 10 10 10 /* Inverse of packet marking probability for traffic class 3 queue (maxp = 1 / maxp_inv) */
- tc3_wred_weight 9 9 9 /* Traffic Class 3 queue weight */
-}
-```
-
-
-### DPDK QoS Scheduler Integration in VPP
-
-The Hierarchical Quaity-of-Service (HQoS) scheduler object could be seen as
-part of the logical NIC output interface. To enable HQoS on specific output
-interface, vpp startup.conf file has to be configured accordingly. The output
-interface that requires HQoS, should have "hqos" parameter specified in dpdk
-section. Another optional parameter "hqos-thread" has been defined which can
-be used to associate the output interface with specific hqos thread. In cpu
-section of the config file, "corelist-hqos-threads" is introduced to assign
-logical cpu cores to run the HQoS threads. A HQoS thread can run multiple HQoS
-objects each associated with different output interfaces. All worker threads
-instead of writing packets to NIC TX queue directly, write the packets to a
-software queues. The hqos_threads read the software queues, and enqueue the
-packets to HQoS objects, as well as dequeue packets from HQOS objects and
-write them to NIC output interfaces. The worker threads need to be able to
-send the packets to any output interface, therefore, each HQoS object
-associated with NIC output interface should have software queues equal to
-worker threads count.
-
-Following illustrates the sample startup configuration file with 4x worker
-threads feeding 2x hqos threads that handle each QoS scheduler for 1x output
-interface.
-
-```
-dpdk {
- socket-mem 16384,16384
-
- dev 0000:02:00.0 {
- num-rx-queues 2
- hqos
- }
- dev 0000:06:00.0 {
- num-rx-queues 2
- hqos
- }
-
- num-mbufs 1000000
-}
-
-cpu {
- main-core 0
- corelist-workers 1, 2, 3, 4
- corelist-hqos-threads 5, 6
-}
-```
-
-
-### QoS scheduler CLI Commands
-
-Each QoS scheduler instance is initialised with default parameters required to
-configure hqos port, subport, pipe and queues. Some of the parameters can be
-re-configured in run-time through CLI commands.
-
-
-#### Configuration
-
-Following commands can be used to configure QoS scheduler parameters.
-
-The command below can be used to set the subport level parameters such as
-token bucket rate (bytes per seconds), token bucket size (bytes), traffic
-class rates (bytes per seconds) and token update period (Milliseconds).
-
-```
-set dpdk interface hqos subport <if-name> subport <n> [rate <n>]
- [bktsize <n>] [tc0 <n>] [tc1 <n>] [tc2 <n>] [tc3 <n>] [period <n>]
-```
-
-For setting the pipe profile, following command can be used.
-
-```
-set dpdk interface hqos pipe <if-name> subport <n> pipe <n> profile <n>
-```
-
-To assign QoS scheduler instance to the specific thread, following command can
-be used.
-
-```
-set dpdk interface hqos placement <if-name> thread <n>
-```
-
-The command below is used to set the packet fields required for classifiying
-the incoming packet. As a result of classification process, packet field
-information will be mapped to 5 tuples (subport, pipe, traffic class, pipe,
-color) and stored in packet mbuf.
-
-```
-set dpdk interface hqos pktfield <if-name> id <n> offset <n> mask <n>
-```
-
-The DSCP table entries used for idenfiying the traffic class and queue can be set using the command below;
-
-```
-set dpdk interface hqos tctbl <if-name> entry <n> tc <n> queue <n>
-```
-
-
-#### Show Command
-
-The QoS Scheduler configuration can displayed using the command below.
-
-```
- vpp# show dpdk interface hqos TenGigabitEthernet2/0/0
- Thread:
- Input SWQ size = 4096 packets
- Enqueue burst size = 256 packets
- Dequeue burst size = 220 packets
- Packet field 0: slab position = 0, slab bitmask = 0x0000000000000000
- Packet field 1: slab position = 40, slab bitmask = 0x0000000fff000000
- Packet field 2: slab position = 8, slab bitmask = 0x00000000000000fc
- Packet field 2 translation table:
- [ 0 .. 15]: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
- [16 .. 31]: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
- [32 .. 47]: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
- [48 .. 63]: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
- Port:
- Rate = 1250000000 bytes/second
- MTU = 1514 bytes
- Frame overhead = 24 bytes
- Number of subports = 1
- Number of pipes per subport = 4096
- Packet queue size: TC0 = 64, TC1 = 64, TC2 = 64, TC3 = 64 packets
- Number of pipe profiles = 1
- Pipe profile 0:
- Rate = 305175 bytes/second
- Token bucket size = 1000000 bytes
- Traffic class rate: TC0 = 305175, TC1 = 305175, TC2 = 305175, TC3 = 305175 bytes/second
- TC period = 40 milliseconds
- TC0 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
- TC1 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
- TC2 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
- TC3 WRR weights: Q0 = 1, Q1 = 1, Q2 = 1, Q3 = 1
-```
-
-The QoS Scheduler placement over the logical cpu cores can be displayed using
-below command.
-
-```
- vpp# show dpdk interface hqos placement
- Thread 5 (vpp_hqos-threads_0 at lcore 5):
- TenGigabitEthernet2/0/0 queue 0
- Thread 6 (vpp_hqos-threads_1 at lcore 6):
- TenGigabitEthernet4/0/1 queue 0
-```
-
-
-### QoS Scheduler Binary APIs
-
-This section explans the available binary APIs for configuring QoS scheduler
-parameters in run-time.
-
-The following API can be used to set the pipe profile of a pipe that belongs
-to a given subport:
-
-```
-sw_interface_set_dpdk_hqos_pipe rx <intfc> | sw_if_index <id>
- subport <subport-id> pipe <pipe-id> profile <profile-id>
-```
-
-The data structures used for set the pipe profile parameter are as follows;
-
-```
- /** \\brief DPDK interface HQoS pipe profile set request
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param sw_if_index - the interface
- @param subport - subport ID
- @param pipe - pipe ID within its subport
- @param profile - pipe profile ID
- */
- define sw_interface_set_dpdk_hqos_pipe {
- u32 client_index;
- u32 context;
- u32 sw_if_index;
- u32 subport;
- u32 pipe;
- u32 profile;
- };
-
- /** \\brief DPDK interface HQoS pipe profile set reply
- @param context - sender context, to match reply w/ request
- @param retval - request return code
- */
- define sw_interface_set_dpdk_hqos_pipe_reply {
- u32 context;
- i32 retval;
- };
-```
-
-The following API can be used to set the subport level parameters, for
-example- token bucket rate (bytes per seconds), token bucket size (bytes),
-traffic class rate (bytes per seconds) and tokens update period.
-
-```
-sw_interface_set_dpdk_hqos_subport rx <intfc> | sw_if_index <id>
- subport <subport-id> [rate <n>] [bktsize <n>]
- [tc0 <n>] [tc1 <n>] [tc2 <n>] [tc3 <n>] [period <n>]
-```
-
-The data structures used for set the subport level parameter are as follows;
-
-```
- /** \\brief DPDK interface HQoS subport parameters set request
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param sw_if_index - the interface
- @param subport - subport ID
- @param tb_rate - subport token bucket rate (measured in bytes/second)
- @param tb_size - subport token bucket size (measured in credits)
- @param tc_rate - subport traffic class 0 .. 3 rates (measured in bytes/second)
- @param tc_period - enforcement period for rates (measured in milliseconds)
- */
- define sw_interface_set_dpdk_hqos_subport {
- u32 client_index;
- u32 context;
- u32 sw_if_index;
- u32 subport;
- u32 tb_rate;
- u32 tb_size;
- u32 tc_rate[4];
- u32 tc_period;
- };
-
- /** \\brief DPDK interface HQoS subport parameters set reply
- @param context - sender context, to match reply w/ request
- @param retval - request return code
- */
- define sw_interface_set_dpdk_hqos_subport_reply {
- u32 context;
- i32 retval;
- };
-```
-
-The following API can be used set the DSCP table entry. The DSCP table have
-64 entries to map the packet DSCP field onto traffic class and hqos input
-queue.
-
-```
-sw_interface_set_dpdk_hqos_tctbl rx <intfc> | sw_if_index <id>
- entry <n> tc <n> queue <n>
-```
-
-The data structures used for setting DSCP table entries are given below.
-
-```
- /** \\brief DPDK interface HQoS tctbl entry set request
- @param client_index - opaque cookie to identify the sender
- @param context - sender context, to match reply w/ request
- @param sw_if_index - the interface
- @param entry - entry index ID
- @param tc - traffic class (0 .. 3)
- @param queue - traffic class queue (0 .. 3)
- */
- define sw_interface_set_dpdk_hqos_tctbl {
- u32 client_index;
- u32 context;
- u32 sw_if_index;
- u32 entry;
- u32 tc;
- u32 queue;
- };
-
- /** \\brief DPDK interface HQoS tctbl entry set reply
- @param context - sender context, to match reply w/ request
- @param retval - request return code
- */
- define sw_interface_set_dpdk_hqos_tctbl_reply {
- u32 context;
- i32 retval;
- };
-```