From c3a814be9dc769be942ff8029c7b6eccd4b3af05 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Tue, 28 Feb 2017 19:22:22 +0100 Subject: dpdk: be a plugin Change-Id: I238258cdeb77035adc5e88903d824593d0a1da90 Signed-off-by: Damjan Marion --- src/plugins/dpdk.am | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 src/plugins/dpdk.am (limited to 'src/plugins/dpdk.am') diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am new file mode 100644 index 00000000..212bbb73 --- /dev/null +++ b/src/plugins/dpdk.am @@ -0,0 +1,50 @@ +# Copyright (c) 2016 Cisco Systems, Inc. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +vppapitestplugins_LTLIBRARIES += dpdk_test_plugin.la +vppplugins_LTLIBRARIES += dpdk_plugin.la + +dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive,-lm,-ldl + +dpdk_plugin_la_SOURCES = \ + dpdk/init.c \ + dpdk/main.c \ + dpdk/buffer.c \ + dpdk/thread.c \ + dpdk/device/cli.c \ + dpdk/device/dpdk_priv.h \ + dpdk/device/device.c \ + dpdk/device/format.c \ + dpdk/device/node.c \ + dpdk/hqos/hqos.c \ + dpdk/ipsec/esp_encrypt.c \ + dpdk/ipsec/esp_decrypt.c \ + dpdk/ipsec/crypto_node.c \ + dpdk/ipsec/cli.c \ + dpdk/ipsec/ipsec.c \ + dpdk/api/dpdk_plugin.api.h + +API_FILES += dpdk/api/dpdk.api + +nobase_include_HEADERS += \ + dpdk/device/dpdk.h \ + dpdk/api/dpdk_all_api_h.h + +nobase_include_HEADERS += \ + dpdk/ipsec/ipsec.h \ + dpdk/ipsec/esp.h + +dpdk_test_plugin_la_SOURCES = \ + dpdk/api/dpdk_test.c dpdk/api/dpdk_plugin.api.h + +# vi:syntax=automake -- cgit 1.2.3-korg From 53865c0e55c2fa0347df4d8338dca5f709c31f16 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 6 Mar 2017 12:06:29 +0100 Subject: dpdk: init.c should be under device/ Change-Id: I80831cee062a38a0f5ab1f1e56c2dc6dcd512b9d Signed-off-by: Damjan Marion --- src/plugins/dpdk.am | 2 +- src/plugins/dpdk/device/init.c | 2074 ++++++++++++++++++++++++++++++++++++++++ src/plugins/dpdk/init.c | 2074 ---------------------------------------- 3 files changed, 2075 insertions(+), 2075 deletions(-) create mode 100755 src/plugins/dpdk/device/init.c delete mode 100755 src/plugins/dpdk/init.c (limited to 'src/plugins/dpdk.am') diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am index 212bbb73..01383de6 100644 --- a/src/plugins/dpdk.am +++ b/src/plugins/dpdk.am @@ -17,7 +17,6 @@ vppplugins_LTLIBRARIES += dpdk_plugin.la dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive,-lm,-ldl dpdk_plugin_la_SOURCES = \ - dpdk/init.c \ dpdk/main.c \ dpdk/buffer.c \ dpdk/thread.c \ @@ -25,6 +24,7 @@ dpdk_plugin_la_SOURCES = \ dpdk/device/dpdk_priv.h \ dpdk/device/device.c \ dpdk/device/format.c \ + dpdk/device/init.c \ dpdk/device/node.c \ dpdk/hqos/hqos.c \ dpdk/ipsec/esp_encrypt.c \ diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c new file mode 100755 index 00000000..e009ef3e --- /dev/null +++ b/src/plugins/dpdk/device/init.c @@ -0,0 +1,2074 @@ +/* + * Copyright (c) 2015 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +dpdk_main_t dpdk_main; + +#include +#include + +/* define message IDs */ +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include +#undef vl_api_version + +/* Macro to finish up custom dump fns */ +#define FINISH \ + vec_add1 (s, 0); \ + vl_print (handle, (char *)s); \ + vec_free (s); \ + return handle; + +#include + +static void + vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler + (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + u32 pipe = ntohl (mp->pipe); + u32 profile = ntohl (mp->profile); + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_pipe_t_print + (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_pipe "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = format (s, "subport %u pipe %u profile %u ", + ntohl (mp->subport), ntohl (mp->pipe), ntohl (mp->profile)); + + FINISH; +} + +static void + vl_api_sw_interface_set_dpdk_hqos_subport_t_handler + (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + struct rte_sched_subport_params p; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + p.tb_rate = ntohl (mp->tb_rate); + p.tb_size = ntohl (mp->tb_size); + p.tc_rate[0] = ntohl (mp->tc_rate[0]); + p.tc_rate[1] = ntohl (mp->tc_rate[1]); + p.tc_rate[2] = ntohl (mp->tc_rate[2]); + p.tc_rate[3] = ntohl (mp->tc_rate[3]); + p.tc_period = ntohl (mp->tc_period); + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_subport_t_print + (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_subport "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = + format (s, + "subport %u rate %u bkt_size %u tc0 %u tc1 %u tc2 %u tc3 %u period %u", + ntohl (mp->subport), ntohl (mp->tb_rate), ntohl (mp->tb_size), + ntohl (mp->tc_rate[0]), ntohl (mp->tc_rate[1]), + ntohl (mp->tc_rate[2]), ntohl (mp->tc_rate[3]), + ntohl (mp->tc_period)); + + FINISH; +} + +static void + vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler + (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 entry = ntohl (mp->entry); + u32 tc = ntohl (mp->tc); + u32 queue = ntohl (mp->queue); + u32 val, i; + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + { + clib_warning ("invalid traffic class !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + { + clib_warning ("invalid queue !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + + if (p == 0) + { + clib_warning ("worker thread registration AWOL !!"); + rv = VNET_API_ERROR_INVALID_VALUE_2; + goto done; + } + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; + for (i = 0; i < worker_thread_count; i++) + xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; + + BAD_SW_IF_INDEX_LABEL; +done: + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print + (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_tctbl "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = format (s, "entry %u tc %u queue %u", + ntohl (mp->entry), ntohl (mp->tc), ntohl (mp->queue)); + + FINISH; +} + +#define foreach_dpdk_plugin_api_msg \ +_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ +_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ +_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) + +/* Set up the API message handling tables */ +static clib_error_t * +dpdk_plugin_api_hookup (vlib_main_t * vm) +{ + dpdk_main_t *dm __attribute__ ((unused)) = &dpdk_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + dm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_dpdk_plugin_api_msg; +#undef _ + return 0; +} + +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (dpdk_main_t * dm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + dm->msg_id_base); + foreach_vl_msg_name_crc_dpdk; +#undef _ +} + +// TODO +/* +static void plugin_custom_dump_configure (dpdk_main_t * dm) +{ +#define _(n,f) dm->api_main->msg_print_handlers \ + [VL_API_##n + dm->msg_id_base] \ + = (void *) vl_api_##f##_t_print; + foreach_dpdk_plugin_api_msg; +#undef _ +} +*/ +/* force linker to link functions used by vlib and declared weak */ +void *vlib_weakly_linked_functions[] = { + &rte_pktmbuf_init, + &rte_pktmbuf_pool_init, +}; + +#define LINK_STATE_ELOGS 0 + +#define DEFAULT_HUGE_DIR "/run/vpp/hugepages" +#define VPP_RUN_DIR "/run/vpp" + +/* Port configuration, mildly modified Intel app values */ + +static struct rte_eth_conf port_conf_template = { + .rxmode = { + .split_hdr_size = 0, + .header_split = 0, /**< Header Split disabled */ + .hw_ip_checksum = 0, /**< IP checksum offload disabled */ + .hw_vlan_filter = 0, /**< VLAN filtering disabled */ + .hw_strip_crc = 0, /**< CRC stripped by hardware */ + }, + .txmode = { + .mq_mode = ETH_MQ_TX_NONE, + }, +}; + +clib_error_t * +dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) +{ + int rv; + int j; + + ASSERT (os_get_cpu_number () == 0); + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + vnet_hw_interface_set_flags (dm->vnet_main, xd->vlib_hw_if_index, 0); + rte_eth_dev_stop (xd->device_index); + } + + rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, + xd->tx_q_used, &xd->port_conf); + + if (rv < 0) + return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d", + xd->device_index, rv); + + /* Set up one TX-queue per worker thread */ + for (j = 0; j < xd->tx_q_used; j++) + { + rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, + xd->cpu_socket, &xd->tx_conf); + + /* retry with any other CPU socket */ + if (rv < 0) + rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, + SOCKET_ID_ANY, &xd->tx_conf); + if (rv < 0) + break; + } + + if (rv < 0) + return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d", + xd->device_index, rv); + + for (j = 0; j < xd->rx_q_used; j++) + { + + rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, + xd->cpu_socket, 0, + dm-> + pktmbuf_pools[xd->cpu_socket_id_by_queue + [j]]); + + /* retry with any other CPU socket */ + if (rv < 0) + rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, + SOCKET_ID_ANY, 0, + dm-> + pktmbuf_pools[xd->cpu_socket_id_by_queue + [j]]); + if (rv < 0) + return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d", + xd->device_index, rv); + } + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + int rv; + rv = rte_eth_dev_start (xd->device_index); + if (!rv && xd->default_mac_address) + rv = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); + if (rv < 0) + clib_warning ("rte_eth_dev_start %d returned %d", + xd->device_index, rv); + } + return 0; +} + +static u32 +dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) +{ + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); + u32 old = 0; + + if (ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC (flags)) + { + old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0; + + if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) + xd->flags |= DPDK_DEVICE_FLAG_PROMISC; + else + xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC; + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) + rte_eth_promiscuous_enable (xd->device_index); + else + rte_eth_promiscuous_disable (xd->device_index); + } + } + else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags)) + { + /* + * DAW-FIXME: The Cisco VIC firmware does not provide an api for a + * driver to dynamically change the mtu. If/when the + * VIC firmware gets fixed, then this should be removed. + */ + if (xd->pmd == VNET_DPDK_PMD_ENIC) + { + struct rte_eth_dev_info dev_info; + + /* + * Restore mtu to what has been set by CIMC in the firmware cfg. + */ + rte_eth_dev_info_get (xd->device_index, &dev_info); + hi->max_packet_bytes = dev_info.max_rx_pktlen; + + vlib_cli_output (vlib_get_main (), + "Cisco VIC mtu can only be changed " + "using CIMC then rebooting the server!"); + } + else + { + int rv; + + xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + rte_eth_dev_stop (xd->device_index); + + rv = rte_eth_dev_configure + (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); + + if (rv < 0) + vlib_cli_output (vlib_get_main (), + "rte_eth_dev_configure[%d]: err %d", + xd->device_index, rv); + + rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + int rv = rte_eth_dev_start (xd->device_index); + if (!rv && xd->default_mac_address) + rv = rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); + if (rv < 0) + clib_warning ("rte_eth_dev_start %d returned %d", + xd->device_index, rv); + } + } + } + return old; +} + +void +dpdk_device_lock_init (dpdk_device_t * xd) +{ + int q; + vec_validate (xd->lockp, xd->tx_q_used - 1); + for (q = 0; q < xd->tx_q_used; q++) + { + xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, + CLIB_CACHE_LINE_BYTES); + memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES); + } +} + +void +dpdk_device_lock_free (dpdk_device_t * xd) +{ + int q; + + for (q = 0; q < vec_len (xd->lockp); q++) + clib_mem_free ((void *) xd->lockp[q]); + vec_free (xd->lockp); + xd->lockp = 0; +} + +static clib_error_t * +dpdk_lib_init (dpdk_main_t * dm) +{ + u32 nports; + u32 nb_desc = 0; + int i; + clib_error_t *error; + vlib_main_t *vm = vlib_get_main (); + vlib_thread_main_t *tm = vlib_get_thread_main (); + vnet_sw_interface_t *sw; + vnet_hw_interface_t *hi; + dpdk_device_t *xd; + vlib_pci_addr_t last_pci_addr; + u32 last_pci_addr_port = 0; + vlib_thread_registration_t *tr, *tr_hqos; + uword *p, *p_hqos; + + u32 next_cpu = 0, next_hqos_cpu = 0; + u8 af_packet_port_id = 0; + last_pci_addr.as_u32 = ~0; + + dm->input_cpu_first_index = 0; + dm->input_cpu_count = 1; + + /* find out which cpus will be used for input */ + p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + tr = p ? (vlib_thread_registration_t *) p[0] : 0; + + if (tr && tr->count > 0) + { + dm->input_cpu_first_index = tr->first_index; + dm->input_cpu_count = tr->count; + } + + vec_validate_aligned (dm->devices_by_cpu, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + dm->hqos_cpu_first_index = 0; + dm->hqos_cpu_count = 0; + + /* find out which cpus will be used for I/O TX */ + p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads"); + tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0; + + if (tr_hqos && tr_hqos->count > 0) + { + dm->hqos_cpu_first_index = tr_hqos->first_index; + dm->hqos_cpu_count = tr_hqos->count; + } + + vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + + nports = rte_eth_dev_count (); + if (nports < 1) + { + clib_warning ("DPDK drivers found no ports..."); + } + + if (CLIB_DEBUG > 0) + clib_warning ("DPDK drivers found %d ports...", nports); + + /* + * All buffers are all allocated from the same rte_mempool. + * Thus they all have the same number of data bytes. + */ + dm->vlib_buffer_free_list_index = + vlib_buffer_get_or_create_free_list (vm, + VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, + "dpdk rx"); + + if (dm->conf->enable_tcp_udp_checksum) + dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT + | IP_BUFFER_L4_CHECKSUM_COMPUTED); + + for (i = 0; i < nports; i++) + { + u8 addr[6]; + u8 vlan_strip = 0; + int j; + struct rte_eth_dev_info dev_info; + clib_error_t *rv; + struct rte_eth_link l; + dpdk_device_config_t *devconf = 0; + vlib_pci_addr_t pci_addr; + uword *p = 0; + + rte_eth_dev_info_get (i, &dev_info); + if (dev_info.pci_dev) /* bonded interface has no pci info */ + { + pci_addr.domain = dev_info.pci_dev->addr.domain; + pci_addr.bus = dev_info.pci_dev->addr.bus; + pci_addr.slot = dev_info.pci_dev->addr.devid; + pci_addr.function = dev_info.pci_dev->addr.function; + p = + hash_get (dm->conf->device_config_index_by_pci_addr, + pci_addr.as_u32); + } + + if (p) + devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); + else + devconf = &dm->conf->default_devconf; + + /* Create vnet interface */ + vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); + xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; + xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; + xd->cpu_socket = (i8) rte_eth_dev_socket_id (i); + + /* Handle interface naming for devices with multiple ports sharing same PCI ID */ + if (dev_info.pci_dev) + { + struct rte_eth_dev_info di = { 0 }; + rte_eth_dev_info_get (i + 1, &di); + if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 && + memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr, + sizeof (struct rte_pci_addr)) == 0) + { + xd->interface_name_suffix = format (0, "0"); + last_pci_addr.as_u32 = pci_addr.as_u32; + last_pci_addr_port = i; + } + else if (pci_addr.as_u32 == last_pci_addr.as_u32) + { + xd->interface_name_suffix = + format (0, "%u", i - last_pci_addr_port); + } + else + { + last_pci_addr.as_u32 = ~0; + } + } + else + last_pci_addr.as_u32 = ~0; + + clib_memcpy (&xd->tx_conf, &dev_info.default_txconf, + sizeof (struct rte_eth_txconf)); + if (dm->conf->no_multi_seg) + { + xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + port_conf_template.rxmode.jumbo_frame = 0; + } + else + { + xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS; + port_conf_template.rxmode.jumbo_frame = 1; + xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG; + } + + clib_memcpy (&xd->port_conf, &port_conf_template, + sizeof (struct rte_eth_conf)); + + xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains); + + if (devconf->num_tx_queues > 0 + && devconf->num_tx_queues < xd->tx_q_used) + xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues); + + if (devconf->num_rx_queues > 1 && dm->use_rss == 0) + { + dm->use_rss = 1; + } + + if (devconf->num_rx_queues > 1 + && dev_info.max_rx_queues >= devconf->num_rx_queues) + { + xd->rx_q_used = devconf->num_rx_queues; + xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; + if (devconf->rss_fn == 0) + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = + ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP; + else + xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn; + } + else + xd->rx_q_used = 1; + + xd->flags |= DPDK_DEVICE_FLAG_PMD; + + /* workaround for drivers not setting driver_name */ + if ((!dev_info.driver_name) && (dev_info.pci_dev)) + dev_info.driver_name = dev_info.pci_dev->driver->driver.name; + + ASSERT (dev_info.driver_name); + + if (!xd->pmd) + { + + +#define _(s,f) else if (dev_info.driver_name && \ + !strcmp(dev_info.driver_name, s)) \ + xd->pmd = VNET_DPDK_PMD_##f; + if (0) + ; + foreach_dpdk_pmd +#undef _ + else + xd->pmd = VNET_DPDK_PMD_UNKNOWN; + + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; + xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; + + switch (xd->pmd) + { + /* 1G adapters */ + case VNET_DPDK_PMD_E1000EM: + case VNET_DPDK_PMD_IGB: + case VNET_DPDK_PMD_IGBVF: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + break; + + /* 10G adapters */ + case VNET_DPDK_PMD_IXGBE: + case VNET_DPDK_PMD_IXGBEVF: + case VNET_DPDK_PMD_THUNDERX: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + case VNET_DPDK_PMD_DPAA2: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + + /* Cisco VIC */ + case VNET_DPDK_PMD_ENIC: + rte_eth_link_get_nowait (i, &l); + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + if (l.link_speed == 40000) + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + else + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + + /* Intel Fortville */ + case VNET_DPDK_PMD_I40E: + case VNET_DPDK_PMD_I40EVF: + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + + switch (dev_info.pci_dev->id.device_id) + { + case I40E_DEV_ID_10G_BASE_T: + case I40E_DEV_ID_SFP_XL710: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + case I40E_DEV_ID_QSFP_A: + case I40E_DEV_ID_QSFP_B: + case I40E_DEV_ID_QSFP_C: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + case I40E_DEV_ID_VF: + rte_eth_link_get_nowait (i, &l); + xd->port_type = l.link_speed == 10000 ? + VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G; + break; + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + break; + + case VNET_DPDK_PMD_CXGBE: + switch (dev_info.pci_dev->id.device_id) + { + case 0x540d: /* T580-CR */ + case 0x5410: /* T580-LP-cr */ + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + case 0x5403: /* T540-CR */ + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + break; + + case VNET_DPDK_PMD_MLX5: + { + char *pn_100g[] = { "MCX415A-CCAT", "MCX416A-CCAT", 0 }; + char *pn_40g[] = { "MCX413A-BCAT", "MCX414A-BCAT", + "MCX415A-BCAT", "MCX416A-BCAT", "MCX4131A-BCAT", 0 + }; + char *pn_10g[] = { "MCX4111A-XCAT", "MCX4121A-XCAT", 0 }; + + vlib_pci_device_t *pd = vlib_get_pci_device (&pci_addr); + u8 *pn = 0; + char **c; + int found = 0; + pn = format (0, "%U%c", + format_vlib_pci_vpd, pd->vpd_r, "PN", 0); + + if (!pn) + break; + + c = pn_100g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_100G; + break; + } + c++; + } + + c = pn_40g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; + break; + } + c++; + } + + c = pn_10g; + while (!found && c[0]) + { + if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) + { + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; + break; + } + c++; + } + + vec_free (pn); + } + + break; + /* Intel Red Rock Canyon */ + case VNET_DPDK_PMD_FM10K: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; + break; + + /* virtio */ + case VNET_DPDK_PMD_VIRTIO: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO; + xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO; + break; + + /* vmxnet3 */ + case VNET_DPDK_PMD_VMXNET3: + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; + xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; + break; + + case VNET_DPDK_PMD_AF_PACKET: + xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET; + xd->af_packet_port_id = af_packet_port_id++; + break; + + case VNET_DPDK_PMD_BOND: + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; + xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND; + break; + + default: + xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; + } + + if (devconf->num_rx_desc) + xd->nb_rx_desc = devconf->num_rx_desc; + + if (devconf->num_tx_desc) + xd->nb_tx_desc = devconf->num_tx_desc; + } + + /* + * Ensure default mtu is not > the mtu read from the hardware. + * Otherwise rte_eth_dev_configure() will fail and the port will + * not be available. + */ + if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen) + { + /* + * This device does not support the platforms's max frame + * size. Use it's advertised mru instead. + */ + xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen; + } + else + { + xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES; + + /* + * Some platforms do not account for Ethernet FCS (4 bytes) in + * MTU calculations. To interop with them increase mru but only + * if the device's settings can support it. + */ + if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) && + xd->port_conf.rxmode.hw_strip_crc) + { + /* + * Allow additional 4 bytes (for Ethernet FCS). These bytes are + * stripped by h/w and so will not consume any buffer memory. + */ + xd->port_conf.rxmode.max_rx_pkt_len += 4; + } + } + + if (xd->pmd == VNET_DPDK_PMD_AF_PACKET) + { + f64 now = vlib_time_now (vm); + u32 rnd; + rnd = (u32) (now * 1e6); + rnd = random_u32 (&rnd); + clib_memcpy (addr + 2, &rnd, sizeof (rnd)); + addr[0] = 2; + addr[1] = 0xfe; + } + else + rte_eth_macaddr_get (i, (struct ether_addr *) addr); + + if (xd->tx_q_used < tm->n_vlib_mains) + dpdk_device_lock_init (xd); + + xd->device_index = xd - dm->devices; + ASSERT (i == xd->device_index); + xd->per_interface_next_index = ~0; + + /* assign interface to input thread */ + dpdk_device_and_queue_t *dq; + int q; + + if (devconf->workers) + { + int i; + q = 0; + /* *INDENT-OFF* */ + clib_bitmap_foreach (i, devconf->workers, ({ + int cpu = dm->input_cpu_first_index + i; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; + vec_validate(xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); + vec_add2(dm->devices_by_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = q++; + })); + /* *INDENT-ON* */ + } + else + for (q = 0; q < xd->rx_q_used; q++) + { + int cpu = dm->input_cpu_first_index + next_cpu; + unsigned lcore = vlib_worker_threads[cpu].lcore_id; + + /* + * numa node for worker thread handling this queue + * needed for taking buffers from the right mempool + */ + vec_validate (xd->cpu_socket_id_by_queue, q); + xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore); + + /* + * construct vector of (device,queue) pairs for each worker thread + */ + vec_add2 (dm->devices_by_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = q; + + next_cpu++; + if (next_cpu == dm->input_cpu_count) + next_cpu = 0; + } + + + if (devconf->hqos_enabled) + { + xd->flags |= DPDK_DEVICE_FLAG_HQOS; + + if (devconf->hqos.hqos_thread_valid) + { + int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread; + + if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count) + return clib_error_return (0, "invalid HQoS thread index"); + + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + } + else + { + int cpu = dm->hqos_cpu_first_index + next_hqos_cpu; + + if (dm->hqos_cpu_count == 0) + return clib_error_return (0, "no HQoS threads available"); + + vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); + dq->device = xd->device_index; + dq->queue_id = 0; + + next_hqos_cpu++; + if (next_hqos_cpu == dm->hqos_cpu_count) + next_hqos_cpu = 0; + + devconf->hqos.hqos_thread_valid = 1; + devconf->hqos.hqos_thread = cpu; + } + } + + vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < tm->n_vlib_mains; j++) + { + vec_validate_ha (xd->tx_vectors[j], xd->nb_tx_desc, + sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->tx_vectors[j]); + } + + vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, + CLIB_CACHE_LINE_BYTES); + for (j = 0; j < xd->rx_q_used; j++) + { + vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1, + CLIB_CACHE_LINE_BYTES); + vec_reset_length (xd->rx_vectors[j]); + } + + vec_validate_aligned (xd->d_trace_buffers, tm->n_vlib_mains, + CLIB_CACHE_LINE_BYTES); + + rv = dpdk_port_setup (dm, xd); + + if (rv) + return rv; + + if (devconf->hqos_enabled) + { + rv = dpdk_port_setup_hqos (xd, &devconf->hqos); + if (rv) + return rv; + } + + /* count the number of descriptors used for this device */ + nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used; + + error = ethernet_register_interface + (dm->vnet_main, dpdk_device_class.index, xd->device_index, + /* ethernet address */ addr, + &xd->vlib_hw_if_index, dpdk_flag_change); + if (error) + return error; + + sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); + xd->vlib_sw_if_index = sw->sw_if_index; + hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); + + /* + * DAW-FIXME: The Cisco VIC firmware does not provide an api for a + * driver to dynamically change the mtu. If/when the + * VIC firmware gets fixed, then this should be removed. + */ + if (xd->pmd == VNET_DPDK_PMD_ENIC) + { + /* + * Initialize mtu to what has been set by CIMC in the firmware cfg. + */ + hi->max_packet_bytes = dev_info.max_rx_pktlen; + if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF) + vlan_strip = 1; /* remove vlan tag from VIC port by default */ + else + clib_warning ("VLAN strip disabled for interface\n"); + } + else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON) + vlan_strip = 1; + + if (vlan_strip) + { + int vlan_off; + vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); + vlan_off |= ETH_VLAN_STRIP_OFFLOAD; + xd->port_conf.rxmode.hw_vlan_strip = vlan_off; + if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0) + clib_warning ("VLAN strip enabled for interface\n"); + else + clib_warning ("VLAN strip cannot be supported by interface\n"); + } + + hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = + xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t); + + rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); + } + + if (nb_desc > dm->conf->num_mbufs) + clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n", + dm->conf->num_mbufs, nb_desc); + + return 0; +} + +static void +dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) +{ + vlib_pci_main_t *pm = &pci_main; + clib_error_t *error; + vlib_pci_device_t *d; + u8 *pci_addr = 0; + int num_whitelisted = vec_len (conf->dev_confs); + + /* *INDENT-OFF* */ + pool_foreach (d, pm->pci_devs, ({ + dpdk_device_config_t * devconf = 0; + vec_reset_length (pci_addr); + pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); + + if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO) + continue; + + if (num_whitelisted) + { + uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32); + + if (!p) + continue; + + devconf = pool_elt_at_index (conf->dev_confs, p[0]); + } + + /* virtio */ + if (d->vendor_id == 0x1af4 && d->device_id == 0x1000) + ; + /* vmxnet3 */ + else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0) + ; + /* all Intel devices */ + else if (d->vendor_id == 0x8086) + ; + /* Cisco VIC */ + else if (d->vendor_id == 0x1137 && d->device_id == 0x0043) + ; + /* Chelsio T4/T5 */ + else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000) + ; + else + { + clib_warning ("Unsupported Ethernet PCI device 0x%04x:0x%04x found " + "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id, + pci_addr); + continue; + } + + error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name); + + if (error) + { + if (devconf == 0) + { + pool_get (conf->dev_confs, devconf); + hash_set (conf->device_config_index_by_pci_addr, d->bus_address.as_u32, + devconf - conf->dev_confs); + devconf->pci_addr.as_u32 = d->bus_address.as_u32; + } + devconf->is_blacklisted = 1; + clib_error_report (error); + } + })); + /* *INDENT-ON* */ + vec_free (pci_addr); +} + +static clib_error_t * +dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, + unformat_input_t * input, u8 is_default) +{ + clib_error_t *error = 0; + uword *p; + dpdk_device_config_t *devconf; + unformat_input_t sub_input; + + if (is_default) + { + devconf = &conf->default_devconf; + } + else + { + p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32); + + if (!p) + { + pool_get (conf->dev_confs, devconf); + hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32, + devconf - conf->dev_confs); + } + else + return clib_error_return (0, + "duplicate configuration for PCI address %U", + format_vlib_pci_addr, &pci_addr); + } + + devconf->pci_addr.as_u32 = pci_addr.as_u32; + devconf->hqos_enabled = 0; + dpdk_device_config_hqos_default (&devconf->hqos); + + if (!input) + return 0; + + unformat_skip_white_space (input); + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues)) + ; + else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues)) + ; + else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc)) + ; + else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc)) + ; + else if (unformat (input, "workers %U", unformat_bitmap_list, + &devconf->workers)) + ; + else + if (unformat + (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input)) + { + error = unformat_rss_fn (&sub_input, &devconf->rss_fn); + if (error) + break; + } + else if (unformat (input, "vlan-strip-offload off")) + devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF; + else if (unformat (input, "vlan-strip-offload on")) + devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON; + else + if (unformat + (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input)) + { + devconf->hqos_enabled = 1; + error = unformat_hqos (&sub_input, &devconf->hqos); + if (error) + break; + } + else if (unformat (input, "hqos")) + { + devconf->hqos_enabled = 1; + } + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + break; + } + } + + if (error) + return error; + + if (devconf->workers && devconf->num_rx_queues == 0) + devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers); + else if (devconf->workers && + clib_bitmap_count_set_bits (devconf->workers) != + devconf->num_rx_queues) + error = + clib_error_return (0, + "%U: number of worker threadds must be " + "equal to number of rx queues", format_vlib_pci_addr, + &pci_addr); + + return error; +} + +static clib_error_t * +dpdk_config (vlib_main_t * vm, unformat_input_t * input) +{ + clib_error_t *error = 0; + dpdk_main_t *dm = &dpdk_main; + dpdk_config_main_t *conf = &dpdk_config_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_device_config_t *devconf; + vlib_pci_addr_t pci_addr; + unformat_input_t sub_input; + u8 *s, *tmp = 0; + u8 *rte_cmd = 0, *ethname = 0; + u32 log_level; + int ret, i; + int num_whitelisted = 0; + u8 no_pci = 0; + u8 no_huge = 0; + u8 huge_dir = 0; + u8 file_prefix = 0; + u8 *socket_mem = 0; + + conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + /* Prime the pump */ + if (unformat (input, "no-hugetlb")) + { + vec_add1 (conf->eal_init_args, (u8 *) "no-huge"); + no_huge = 1; + } + + else if (unformat (input, "enable-tcp-udp-checksum")) + conf->enable_tcp_udp_checksum = 1; + + else if (unformat (input, "decimal-interface-names")) + conf->interface_name_format_decimal = 1; + + else if (unformat (input, "no-multi-seg")) + conf->no_multi_seg = 1; + + else if (unformat (input, "enable-cryptodev")) + conf->cryptodev = 1; + + else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input, + &sub_input)) + { + error = + dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input, + 1); + + if (error) + return error; + } + else + if (unformat + (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr, + unformat_vlib_cli_sub_input, &sub_input)) + { + error = dpdk_device_config (conf, pci_addr, &sub_input, 0); + + if (error) + return error; + + num_whitelisted++; + } + else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr)) + { + error = dpdk_device_config (conf, pci_addr, 0, 0); + + if (error) + return error; + + num_whitelisted++; + } + else if (unformat (input, "num-mbufs %d", &conf->num_mbufs)) + ; + else if (unformat (input, "kni %d", &conf->num_kni)) + ; + else if (unformat (input, "uio-driver %s", &conf->uio_driver_name)) + ; + else if (unformat (input, "socket-mem %s", &socket_mem)) + ; + else if (unformat (input, "no-pci")) + { + no_pci = 1; + tmp = format (0, "--no-pci%c", 0); + vec_add1 (conf->eal_init_args, tmp); + } + else if (unformat (input, "poll-sleep %d", &dm->poll_sleep)) + ; + +#define _(a) \ + else if (unformat(input, #a)) \ + { \ + tmp = format (0, "--%s%c", #a, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + } + foreach_eal_double_hyphen_predicate_arg +#undef _ +#define _(a) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + if (!strncmp(#a, "huge-dir", 8)) \ + huge_dir = 1; \ + else if (!strncmp(#a, "file-prefix", 11)) \ + file_prefix = 1; \ + tmp = format (0, "--%s%c", #a, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + if (!strncmp(#a, "vdev", 4)) \ + if (strstr((char*)s, "af_packet")) \ + clib_warning ("af_packet obsoleted. Use CLI 'create host-interface'."); \ + vec_add1 (conf->eal_init_args, s); \ + } + foreach_eal_double_hyphen_arg +#undef _ +#define _(a,b) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + tmp = format (0, "-%s%c", #b, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + vec_add1 (conf->eal_init_args, s); \ + } + foreach_eal_single_hyphen_arg +#undef _ +#define _(a,b) \ + else if (unformat(input, #a " %s", &s)) \ + { \ + tmp = format (0, "-%s%c", #b, 0); \ + vec_add1 (conf->eal_init_args, tmp); \ + vec_add1 (s, 0); \ + vec_add1 (conf->eal_init_args, s); \ + conf->a##_set_manually = 1; \ + } + foreach_eal_single_hyphen_mandatory_arg +#undef _ + else if (unformat (input, "default")) + ; + + else if (unformat_skip_white_space (input)) + ; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + goto done; + } + } + + if (!conf->uio_driver_name) + conf->uio_driver_name = format (0, "uio_pci_generic%c", 0); + + /* + * Use 1G huge pages if available. + */ + if (!no_huge && !huge_dir) + { + u32 x, *mem_by_socket = 0; + uword c = 0; + u8 use_1g = 1; + u8 use_2m = 1; + u8 less_than_1g = 1; + int rv; + + umount (DEFAULT_HUGE_DIR); + + /* Process "socket-mem" parameter value */ + if (vec_len (socket_mem)) + { + unformat_input_t in; + unformat_init_vector (&in, socket_mem); + while (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT) + { + if (unformat (&in, "%u,", &x)) + ; + else if (unformat (&in, "%u", &x)) + ; + else if (unformat (&in, ",")) + x = 0; + else + break; + + vec_add1 (mem_by_socket, x); + + if (x > 1023) + less_than_1g = 0; + } + /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */ + unformat_free (&in); + socket_mem = 0; + } + else + { + /* *INDENT-OFF* */ + clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( + { + vec_validate(mem_by_socket, c); + mem_by_socket[c] = 256; /* default per-socket mem */ + } + )); + /* *INDENT-ON* */ + } + + /* check if available enough 1GB pages for each socket */ + /* *INDENT-OFF* */ + clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( + { + int pages_avail, page_size, mem; + + vec_validate(mem_by_socket, c); + mem = mem_by_socket[c]; + + page_size = 1024; + pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); + + if (pages_avail < 0 || page_size * pages_avail < mem) + use_1g = 0; + + page_size = 2; + pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); + + if (pages_avail < 0 || page_size * pages_avail < mem) + use_2m = 0; + })); + /* *INDENT-ON* */ + + if (mem_by_socket == 0) + { + error = clib_error_return (0, "mem_by_socket NULL"); + goto done; + } + _vec_len (mem_by_socket) = c + 1; + + /* regenerate socket_mem string */ + vec_foreach_index (x, mem_by_socket) + socket_mem = format (socket_mem, "%s%u", + socket_mem ? "," : "", mem_by_socket[x]); + socket_mem = format (socket_mem, "%c", 0); + + vec_free (mem_by_socket); + + rv = mkdir (VPP_RUN_DIR, 0755); + if (rv && errno != EEXIST) + { + error = clib_error_return (0, "mkdir '%s' failed errno %d", + VPP_RUN_DIR, errno); + goto done; + } + + rv = mkdir (DEFAULT_HUGE_DIR, 0755); + if (rv && errno != EEXIST) + { + error = clib_error_return (0, "mkdir '%s' failed errno %d", + DEFAULT_HUGE_DIR, errno); + goto done; + } + + if (use_1g && !(less_than_1g && use_2m)) + { + rv = + mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G"); + } + else if (use_2m) + { + rv = mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL); + } + else + { + return clib_error_return (0, "not enough free huge pages"); + } + + if (rv) + { + error = clib_error_return (0, "mount failed %d", errno); + goto done; + } + + tmp = format (0, "--huge-dir%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0); + vec_add1 (conf->eal_init_args, tmp); + if (!file_prefix) + { + tmp = format (0, "--file-prefix%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "vpp%c", 0); + vec_add1 (conf->eal_init_args, tmp); + } + } + + vec_free (rte_cmd); + vec_free (ethname); + + if (error) + return error; + + /* I'll bet that -c and -n must be the first and second args... */ + if (!conf->coremask_set_manually) + { + vlib_thread_registration_t *tr; + uword *coremask = 0; + int i; + + /* main thread core */ + coremask = clib_bitmap_set (coremask, tm->main_lcore, 1); + + for (i = 0; i < vec_len (tm->registrations); i++) + { + tr = tm->registrations[i]; + coremask = clib_bitmap_or (coremask, tr->coremask); + } + + vec_insert (conf->eal_init_args, 2, 1); + conf->eal_init_args[1] = (u8 *) "-c"; + tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0); + conf->eal_init_args[2] = tmp; + clib_bitmap_free (coremask); + } + + if (!conf->nchannels_set_manually) + { + vec_insert (conf->eal_init_args, 2, 3); + conf->eal_init_args[3] = (u8 *) "-n"; + tmp = format (0, "%d", conf->nchannels); + conf->eal_init_args[4] = tmp; + } + + if (no_pci == 0 && geteuid () == 0) + dpdk_bind_devices_to_uio (conf); + +#define _(x) \ + if (devconf->x == 0 && conf->default_devconf.x > 0) \ + devconf->x = conf->default_devconf.x ; + + /* *INDENT-OFF* */ + pool_foreach (devconf, conf->dev_confs, ({ + + /* default per-device config items */ + foreach_dpdk_device_config_item + + /* add DPDK EAL whitelist/blacklist entry */ + if (num_whitelisted > 0 && devconf->is_blacklisted == 0) + { + tmp = format (0, "-w%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); + vec_add1 (conf->eal_init_args, tmp); + } + else if (num_whitelisted == 0 && devconf->is_blacklisted != 0) + { + tmp = format (0, "-b%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); + vec_add1 (conf->eal_init_args, tmp); + } + })); + /* *INDENT-ON* */ + +#undef _ + + /* set master-lcore */ + tmp = format (0, "--master-lcore%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%u%c", tm->main_lcore, 0); + vec_add1 (conf->eal_init_args, tmp); + + /* set socket-mem */ + tmp = format (0, "--socket-mem%c", 0); + vec_add1 (conf->eal_init_args, tmp); + tmp = format (0, "%s%c", socket_mem, 0); + vec_add1 (conf->eal_init_args, tmp); + + /* NULL terminate the "argv" vector, in case of stupidity */ + vec_add1 (conf->eal_init_args, 0); + _vec_len (conf->eal_init_args) -= 1; + + /* Set up DPDK eal and packet mbuf pool early. */ + + log_level = (CLIB_DEBUG > 0) ? RTE_LOG_DEBUG : RTE_LOG_NOTICE; + + rte_set_log_level (log_level); + + vm = vlib_get_main (); + + /* make copy of args as rte_eal_init tends to mess up with arg array */ + for (i = 1; i < vec_len (conf->eal_init_args); i++) + conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ", + conf->eal_init_args[i]); + + ret = + rte_eal_init (vec_len (conf->eal_init_args), + (char **) conf->eal_init_args); + + /* lazy umount hugepages */ + umount2 (DEFAULT_HUGE_DIR, MNT_DETACH); + + if (ret < 0) + return clib_error_return (0, "rte_eal_init returned %d", ret); + + /* Dump the physical memory layout prior to creating the mbuf_pool */ + fprintf (stdout, "DPDK physical memory layout:\n"); + rte_dump_physmem_layout (stdout); + + /* main thread 1st */ + error = vlib_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); + if (error) + return error; + + for (i = 0; i < RTE_MAX_LCORE; i++) + { + error = vlib_buffer_pool_create (vm, conf->num_mbufs, + rte_lcore_to_socket_id (i)); + if (error) + return error; + } + +done: + return error; +} + +VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk"); + +void +dpdk_update_link_state (dpdk_device_t * xd, f64 now) +{ + vnet_main_t *vnm = vnet_get_main (); + struct rte_eth_link prev_link = xd->link; + u32 hw_flags = 0; + u8 hw_flags_chg = 0; + + /* only update link state for PMD interfaces */ + if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) + return; + + xd->time_last_link_update = now ? now : xd->time_last_link_update; + memset (&xd->link, 0, sizeof (xd->link)); + rte_eth_link_get_nowait (xd->device_index, &xd->link); + + if (LINK_STATE_ELOGS) + { + vlib_main_t *vm = vlib_get_main (); + ELOG_TYPE_DECLARE (e) = + { + .format = + "update-link-state: sw_if_index %d, admin_up %d," + "old link_state %d new link_state %d",.format_args = "i4i1i1i1",}; + + struct + { + u32 sw_if_index; + u8 admin_up; + u8 old_link_state; + u8 new_link_state; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->sw_if_index = xd->vlib_sw_if_index; + ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0; + ed->old_link_state = (u8) + vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index); + ed->new_link_state = (u8) xd->link.link_status; + } + + if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) && + ((xd->link.link_status != 0) ^ + vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index))) + { + hw_flags_chg = 1; + hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); + } + + if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex)) + { + hw_flags_chg = 1; + switch (xd->link.link_duplex) + { + case ETH_LINK_HALF_DUPLEX: + hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX; + break; + case ETH_LINK_FULL_DUPLEX: + hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX; + break; + default: + break; + } + } + if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) + { + hw_flags_chg = 1; + switch (xd->link.link_speed) + { + case ETH_SPEED_NUM_10M: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; + break; + case ETH_SPEED_NUM_100M: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; + break; + case ETH_SPEED_NUM_1G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; + break; + case ETH_SPEED_NUM_10G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; + break; + case ETH_SPEED_NUM_40G: + hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; + break; + case 0: + break; + default: + clib_warning ("unknown link speed %d", xd->link.link_speed); + break; + } + } + if (hw_flags_chg) + { + if (LINK_STATE_ELOGS) + { + vlib_main_t *vm = vlib_get_main (); + + ELOG_TYPE_DECLARE (e) = + { + .format = + "update-link-state: sw_if_index %d, new flags %d",.format_args + = "i4i4",}; + + struct + { + u32 sw_if_index; + u32 flags; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->sw_if_index = xd->vlib_sw_if_index; + ed->flags = hw_flags; + } + vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, hw_flags); + } +} + +static uword +dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) +{ + clib_error_t *error; + vnet_main_t *vnm = vnet_get_main (); + dpdk_main_t *dm = &dpdk_main; + ethernet_main_t *em = ðernet_main; + dpdk_device_t *xd; + vlib_thread_main_t *tm = vlib_get_thread_main (); + int i; + + error = dpdk_lib_init (dm); + + /* + * Turn on the input node if we found some devices to drive + * and we're not running worker threads or i/o threads + */ + + if (error == 0 && vec_len (dm->devices) > 0) + { + if (tm->n_vlib_mains == 1) + vlib_node_set_state (vm, dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + else + for (i = 0; i < tm->n_vlib_mains; i++) + if (vec_len (dm->devices_by_cpu[i]) > 0) + vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, + VLIB_NODE_STATE_POLLING); + } + + if (error) + clib_error_report (error); + + tm->worker_thread_release = 1; + + f64 now = vlib_time_now (vm); + vec_foreach (xd, dm->devices) + { + dpdk_update_link_state (xd, now); + } + + { + /* + * Extra set up for bond interfaces: + * 1. Setup MACs for bond interfaces and their slave links which was set + * in dpdk_port_setup() but needs to be done again here to take effect. + * 2. Set up info for bond interface related CLI support. + */ + int nports = rte_eth_dev_count (); + if (nports > 0) + { + for (i = 0; i < nports; i++) + { + struct rte_eth_dev_info dev_info; + rte_eth_dev_info_get (i, &dev_info); + if (!dev_info.driver_name) + dev_info.driver_name = dev_info.pci_dev->driver->driver.name; + + ASSERT (dev_info.driver_name); + if (strncmp (dev_info.driver_name, "rte_bond_pmd", 12) == 0) + { + u8 addr[6]; + u8 slink[16]; + int nlink = rte_eth_bond_slaves_get (i, slink, 16); + if (nlink > 0) + { + vnet_hw_interface_t *bhi; + ethernet_interface_t *bei; + int rv; + + /* Get MAC of 1st slave link */ + rte_eth_macaddr_get (slink[0], + (struct ether_addr *) addr); + /* Set MAC of bounded interface to that of 1st slave link */ + rv = + rte_eth_bond_mac_address_set (i, + (struct ether_addr *) + addr); + if (rv < 0) + clib_warning ("Failed to set MAC address"); + + /* Populate MAC of bonded interface in VPP hw tables */ + bhi = + vnet_get_hw_interface (vnm, + dm->devices[i].vlib_hw_if_index); + bei = + pool_elt_at_index (em->interfaces, bhi->hw_instance); + clib_memcpy (bhi->hw_address, addr, 6); + clib_memcpy (bei->address, addr, 6); + /* Init l3 packet size allowed on bonded interface */ + bhi->max_packet_bytes = ETHERNET_MAX_PACKET_BYTES; + bhi->max_l3_packet_bytes[VLIB_RX] = + bhi->max_l3_packet_bytes[VLIB_TX] = + ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t); + while (nlink >= 1) + { /* for all slave links */ + int slave = slink[--nlink]; + dpdk_device_t *sdev = &dm->devices[slave]; + vnet_hw_interface_t *shi; + vnet_sw_interface_t *ssi; + /* Add MAC to all slave links except the first one */ + if (nlink) + rte_eth_dev_mac_addr_add (slave, + (struct ether_addr *) + addr, 0); + /* Set slaves bitmap for bonded interface */ + bhi->bond_info = + clib_bitmap_set (bhi->bond_info, + sdev->vlib_hw_if_index, 1); + /* Set slave link flags on slave interface */ + shi = + vnet_get_hw_interface (vnm, sdev->vlib_hw_if_index); + ssi = + vnet_get_sw_interface (vnm, sdev->vlib_sw_if_index); + shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE; + ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE; + + /* Set l3 packet size allowed as the lowest of slave */ + if (bhi->max_l3_packet_bytes[VLIB_RX] > + shi->max_l3_packet_bytes[VLIB_RX]) + bhi->max_l3_packet_bytes[VLIB_RX] = + bhi->max_l3_packet_bytes[VLIB_TX] = + shi->max_l3_packet_bytes[VLIB_RX]; + + /* Set max packet size allowed as the lowest of slave */ + if (bhi->max_packet_bytes > shi->max_packet_bytes) + bhi->max_packet_bytes = shi->max_packet_bytes; + } + } + } + } + } + } + + while (1) + { + /* + * check each time through the loop in case intervals are changed + */ + f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ? + dm->link_state_poll_interval : dm->stat_poll_interval; + + vlib_process_wait_for_event_or_clock (vm, min_wait); + + if (dm->admin_up_down_in_progress) + /* skip the poll if an admin up down is in progress (on any interface) */ + continue; + + vec_foreach (xd, dm->devices) + { + f64 now = vlib_time_now (vm); + if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval) + dpdk_update_counters (xd, now); + if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval) + dpdk_update_link_state (xd, now); + + } + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (dpdk_process_node,static) = { + .function = dpdk_process, + .type = VLIB_NODE_TYPE_PROCESS, + .name = "dpdk-process", + .process_log2_n_stack_bytes = 17, +}; +/* *INDENT-ON* */ + +int +dpdk_set_stat_poll_interval (f64 interval) +{ + if (interval < DPDK_MIN_STATS_POLL_INTERVAL) + return (VNET_API_ERROR_INVALID_VALUE); + + dpdk_main.stat_poll_interval = interval; + + return 0; +} + +int +dpdk_set_link_state_poll_interval (f64 interval) +{ + if (interval < DPDK_MIN_LINK_POLL_INTERVAL) + return (VNET_API_ERROR_INVALID_VALUE); + + dpdk_main.link_state_poll_interval = interval; + + return 0; +} + +clib_error_t * +dpdk_init (vlib_main_t * vm) +{ + dpdk_main_t *dm = &dpdk_main; + vlib_node_t *ei; + clib_error_t *error = 0; + vlib_thread_main_t *tm = vlib_get_thread_main (); + + /* verify that structs are cacheline aligned */ + STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0, + "Cache line marker must be 1st element in dpdk_device_t"); + STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) == + CLIB_CACHE_LINE_BYTES, + "Data in cache line 0 is bigger than cache line size"); + STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0, + "Cache line marker must be 1st element in frame_queue_trace_t"); + + u8 *name; + name = format (0, "dpdk_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + dm->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + vec_free (name); + + dm->vlib_main = vm; + dm->vnet_main = vnet_get_main (); + dm->conf = &dpdk_config_main; + + error = dpdk_plugin_api_hookup (vm); + + /* Add our API messages to the global name_crc hash table */ + setup_message_id_table (dm, &api_main); + +// TODO +// plugin_custom_dump_configure (dm); + + ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); + if (ei == 0) + return clib_error_return (0, "ethernet-input node AWOL"); + + dm->ethernet_input_node_index = ei->index; + + dm->conf->nchannels = 4; + dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF; + vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet"); + + dm->dpdk_device_by_kni_port_id = hash_create (0, sizeof (uword)); + dm->vu_sw_if_index_by_listener_fd = hash_create (0, sizeof (uword)); + dm->vu_sw_if_index_by_sock_fd = hash_create (0, sizeof (uword)); + + /* $$$ use n_thread_stacks since it's known-good at this point */ + vec_validate (dm->recycle, tm->n_thread_stacks - 1); + + /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ + dm->buffer_flags_template = + (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID + | IP_BUFFER_L4_CHECKSUM_COMPUTED | IP_BUFFER_L4_CHECKSUM_CORRECT); + + dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; + dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL; + + /* init CLI */ + if ((error = vlib_call_init_function (vm, dpdk_cli_init))) + return error; + + return error; +} + +VLIB_INIT_FUNCTION (dpdk_init); + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/init.c b/src/plugins/dpdk/init.c deleted file mode 100755 index e009ef3e..00000000 --- a/src/plugins/dpdk/init.c +++ /dev/null @@ -1,2074 +0,0 @@ -/* - * Copyright (c) 2015 Cisco and/or its affiliates. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at: - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -dpdk_main_t dpdk_main; - -#include -#include - -/* define message IDs */ -#include - -#define vl_typedefs /* define message structures */ -#include -#undef vl_typedefs - -#define vl_endianfun /* define message structures */ -#include -#undef vl_endianfun - -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) - -/* Get the API version number. */ -#define vl_api_version(n,v) static u32 api_version=(v); -#include -#undef vl_api_version - -/* Macro to finish up custom dump fns */ -#define FINISH \ - vec_add1 (s, 0); \ - vl_print (handle, (char *)s); \ - vec_free (s); \ - return handle; - -#include - -static void - vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler - (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; - int rv = 0; - - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 subport = ntohl (mp->subport); - u32 pipe = ntohl (mp->pipe); - u32 profile = ntohl (mp->profile); - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); - - BAD_SW_IF_INDEX_LABEL; - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); -} - -static void *vl_api_sw_interface_set_dpdk_hqos_pipe_t_print - (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_pipe "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = format (s, "subport %u pipe %u profile %u ", - ntohl (mp->subport), ntohl (mp->pipe), ntohl (mp->profile)); - - FINISH; -} - -static void - vl_api_sw_interface_set_dpdk_hqos_subport_t_handler - (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; - int rv = 0; - - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - struct rte_sched_subport_params p; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 subport = ntohl (mp->subport); - p.tb_rate = ntohl (mp->tb_rate); - p.tb_size = ntohl (mp->tb_size); - p.tc_rate[0] = ntohl (mp->tc_rate[0]); - p.tc_rate[1] = ntohl (mp->tc_rate[1]); - p.tc_rate[2] = ntohl (mp->tc_rate[2]); - p.tc_rate[3] = ntohl (mp->tc_rate[3]); - p.tc_period = ntohl (mp->tc_period); - - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); - - BAD_SW_IF_INDEX_LABEL; - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); -} - -static void *vl_api_sw_interface_set_dpdk_hqos_subport_t_print - (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_subport "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = - format (s, - "subport %u rate %u bkt_size %u tc0 %u tc1 %u tc2 %u tc3 %u period %u", - ntohl (mp->subport), ntohl (mp->tb_rate), ntohl (mp->tb_size), - ntohl (mp->tc_rate[0]), ntohl (mp->tc_rate[1]), - ntohl (mp->tc_rate[2]), ntohl (mp->tc_rate[3]), - ntohl (mp->tc_period)); - - FINISH; -} - -static void - vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler - (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; - int rv = 0; - - dpdk_main_t *dm = &dpdk_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_device_t *xd; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 entry = ntohl (mp->entry); - u32 tc = ntohl (mp->tc); - u32 queue = ntohl (mp->queue); - u32 val, i; - - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) - { - clib_warning ("invalid traffic class !!"); - rv = VNET_API_ERROR_INVALID_VALUE; - goto done; - } - if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) - { - clib_warning ("invalid queue !!"); - rv = VNET_API_ERROR_INVALID_VALUE; - goto done; - } - - /* Detect the set of worker threads */ - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - - if (p == 0) - { - clib_warning ("worker thread registration AWOL !!"); - rv = VNET_API_ERROR_INVALID_VALUE_2; - goto done; - } - - vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; - int worker_thread_first = tr->first_index; - int worker_thread_count = tr->count; - - val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; - for (i = 0; i < worker_thread_count; i++) - xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; - - BAD_SW_IF_INDEX_LABEL; -done: - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); -} - -static void *vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print - (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_tctbl "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = format (s, "entry %u tc %u queue %u", - ntohl (mp->entry), ntohl (mp->tc), ntohl (mp->queue)); - - FINISH; -} - -#define foreach_dpdk_plugin_api_msg \ -_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ -_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ -_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) - -/* Set up the API message handling tables */ -static clib_error_t * -dpdk_plugin_api_hookup (vlib_main_t * vm) -{ - dpdk_main_t *dm __attribute__ ((unused)) = &dpdk_main; -#define _(N,n) \ - vl_msg_api_set_handlers((VL_API_##N + dm->msg_id_base), \ - #n, \ - vl_api_##n##_t_handler, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_dpdk_plugin_api_msg; -#undef _ - return 0; -} - -#define vl_msg_name_crc_list -#include -#undef vl_msg_name_crc_list - -static void -setup_message_id_table (dpdk_main_t * dm, api_main_t * am) -{ -#define _(id,n,crc) \ - vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + dm->msg_id_base); - foreach_vl_msg_name_crc_dpdk; -#undef _ -} - -// TODO -/* -static void plugin_custom_dump_configure (dpdk_main_t * dm) -{ -#define _(n,f) dm->api_main->msg_print_handlers \ - [VL_API_##n + dm->msg_id_base] \ - = (void *) vl_api_##f##_t_print; - foreach_dpdk_plugin_api_msg; -#undef _ -} -*/ -/* force linker to link functions used by vlib and declared weak */ -void *vlib_weakly_linked_functions[] = { - &rte_pktmbuf_init, - &rte_pktmbuf_pool_init, -}; - -#define LINK_STATE_ELOGS 0 - -#define DEFAULT_HUGE_DIR "/run/vpp/hugepages" -#define VPP_RUN_DIR "/run/vpp" - -/* Port configuration, mildly modified Intel app values */ - -static struct rte_eth_conf port_conf_template = { - .rxmode = { - .split_hdr_size = 0, - .header_split = 0, /**< Header Split disabled */ - .hw_ip_checksum = 0, /**< IP checksum offload disabled */ - .hw_vlan_filter = 0, /**< VLAN filtering disabled */ - .hw_strip_crc = 0, /**< CRC stripped by hardware */ - }, - .txmode = { - .mq_mode = ETH_MQ_TX_NONE, - }, -}; - -clib_error_t * -dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) -{ - int rv; - int j; - - ASSERT (os_get_cpu_number () == 0); - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - vnet_hw_interface_set_flags (dm->vnet_main, xd->vlib_hw_if_index, 0); - rte_eth_dev_stop (xd->device_index); - } - - rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, - xd->tx_q_used, &xd->port_conf); - - if (rv < 0) - return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d", - xd->device_index, rv); - - /* Set up one TX-queue per worker thread */ - for (j = 0; j < xd->tx_q_used; j++) - { - rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, - xd->cpu_socket, &xd->tx_conf); - - /* retry with any other CPU socket */ - if (rv < 0) - rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, - SOCKET_ID_ANY, &xd->tx_conf); - if (rv < 0) - break; - } - - if (rv < 0) - return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d", - xd->device_index, rv); - - for (j = 0; j < xd->rx_q_used; j++) - { - - rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, - xd->cpu_socket, 0, - dm-> - pktmbuf_pools[xd->cpu_socket_id_by_queue - [j]]); - - /* retry with any other CPU socket */ - if (rv < 0) - rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, - SOCKET_ID_ANY, 0, - dm-> - pktmbuf_pools[xd->cpu_socket_id_by_queue - [j]]); - if (rv < 0) - return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d", - xd->device_index, rv); - } - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - int rv; - rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - if (rv < 0) - clib_warning ("rte_eth_dev_start %d returned %d", - xd->device_index, rv); - } - return 0; -} - -static u32 -dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) -{ - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd = vec_elt_at_index (dm->devices, hi->dev_instance); - u32 old = 0; - - if (ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC (flags)) - { - old = (xd->flags & DPDK_DEVICE_FLAG_PROMISC) != 0; - - if (flags & ETHERNET_INTERFACE_FLAG_ACCEPT_ALL) - xd->flags |= DPDK_DEVICE_FLAG_PROMISC; - else - xd->flags &= ~DPDK_DEVICE_FLAG_PROMISC; - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) - rte_eth_promiscuous_enable (xd->device_index); - else - rte_eth_promiscuous_disable (xd->device_index); - } - } - else if (ETHERNET_INTERFACE_FLAG_CONFIG_MTU (flags)) - { - /* - * DAW-FIXME: The Cisco VIC firmware does not provide an api for a - * driver to dynamically change the mtu. If/when the - * VIC firmware gets fixed, then this should be removed. - */ - if (xd->pmd == VNET_DPDK_PMD_ENIC) - { - struct rte_eth_dev_info dev_info; - - /* - * Restore mtu to what has been set by CIMC in the firmware cfg. - */ - rte_eth_dev_info_get (xd->device_index, &dev_info); - hi->max_packet_bytes = dev_info.max_rx_pktlen; - - vlib_cli_output (vlib_get_main (), - "Cisco VIC mtu can only be changed " - "using CIMC then rebooting the server!"); - } - else - { - int rv; - - xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - rte_eth_dev_stop (xd->device_index); - - rv = rte_eth_dev_configure - (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); - - if (rv < 0) - vlib_cli_output (vlib_get_main (), - "rte_eth_dev_configure[%d]: err %d", - xd->device_index, rv); - - rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - int rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - if (rv < 0) - clib_warning ("rte_eth_dev_start %d returned %d", - xd->device_index, rv); - } - } - } - return old; -} - -void -dpdk_device_lock_init (dpdk_device_t * xd) -{ - int q; - vec_validate (xd->lockp, xd->tx_q_used - 1); - for (q = 0; q < xd->tx_q_used; q++) - { - xd->lockp[q] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES, - CLIB_CACHE_LINE_BYTES); - memset ((void *) xd->lockp[q], 0, CLIB_CACHE_LINE_BYTES); - } -} - -void -dpdk_device_lock_free (dpdk_device_t * xd) -{ - int q; - - for (q = 0; q < vec_len (xd->lockp); q++) - clib_mem_free ((void *) xd->lockp[q]); - vec_free (xd->lockp); - xd->lockp = 0; -} - -static clib_error_t * -dpdk_lib_init (dpdk_main_t * dm) -{ - u32 nports; - u32 nb_desc = 0; - int i; - clib_error_t *error; - vlib_main_t *vm = vlib_get_main (); - vlib_thread_main_t *tm = vlib_get_thread_main (); - vnet_sw_interface_t *sw; - vnet_hw_interface_t *hi; - dpdk_device_t *xd; - vlib_pci_addr_t last_pci_addr; - u32 last_pci_addr_port = 0; - vlib_thread_registration_t *tr, *tr_hqos; - uword *p, *p_hqos; - - u32 next_cpu = 0, next_hqos_cpu = 0; - u8 af_packet_port_id = 0; - last_pci_addr.as_u32 = ~0; - - dm->input_cpu_first_index = 0; - dm->input_cpu_count = 1; - - /* find out which cpus will be used for input */ - p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - tr = p ? (vlib_thread_registration_t *) p[0] : 0; - - if (tr && tr->count > 0) - { - dm->input_cpu_first_index = tr->first_index; - dm->input_cpu_count = tr->count; - } - - vec_validate_aligned (dm->devices_by_cpu, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - - dm->hqos_cpu_first_index = 0; - dm->hqos_cpu_count = 0; - - /* find out which cpus will be used for I/O TX */ - p_hqos = hash_get_mem (tm->thread_registrations_by_name, "hqos-threads"); - tr_hqos = p_hqos ? (vlib_thread_registration_t *) p_hqos[0] : 0; - - if (tr_hqos && tr_hqos->count > 0) - { - dm->hqos_cpu_first_index = tr_hqos->first_index; - dm->hqos_cpu_count = tr_hqos->count; - } - - vec_validate_aligned (dm->devices_by_hqos_cpu, tm->n_vlib_mains - 1, - CLIB_CACHE_LINE_BYTES); - - nports = rte_eth_dev_count (); - if (nports < 1) - { - clib_warning ("DPDK drivers found no ports..."); - } - - if (CLIB_DEBUG > 0) - clib_warning ("DPDK drivers found %d ports...", nports); - - /* - * All buffers are all allocated from the same rte_mempool. - * Thus they all have the same number of data bytes. - */ - dm->vlib_buffer_free_list_index = - vlib_buffer_get_or_create_free_list (vm, - VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES, - "dpdk rx"); - - if (dm->conf->enable_tcp_udp_checksum) - dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT - | IP_BUFFER_L4_CHECKSUM_COMPUTED); - - for (i = 0; i < nports; i++) - { - u8 addr[6]; - u8 vlan_strip = 0; - int j; - struct rte_eth_dev_info dev_info; - clib_error_t *rv; - struct rte_eth_link l; - dpdk_device_config_t *devconf = 0; - vlib_pci_addr_t pci_addr; - uword *p = 0; - - rte_eth_dev_info_get (i, &dev_info); - if (dev_info.pci_dev) /* bonded interface has no pci info */ - { - pci_addr.domain = dev_info.pci_dev->addr.domain; - pci_addr.bus = dev_info.pci_dev->addr.bus; - pci_addr.slot = dev_info.pci_dev->addr.devid; - pci_addr.function = dev_info.pci_dev->addr.function; - p = - hash_get (dm->conf->device_config_index_by_pci_addr, - pci_addr.as_u32); - } - - if (p) - devconf = pool_elt_at_index (dm->conf->dev_confs, p[0]); - else - devconf = &dm->conf->default_devconf; - - /* Create vnet interface */ - vec_add2_aligned (dm->devices, xd, 1, CLIB_CACHE_LINE_BYTES); - xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; - xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; - xd->cpu_socket = (i8) rte_eth_dev_socket_id (i); - - /* Handle interface naming for devices with multiple ports sharing same PCI ID */ - if (dev_info.pci_dev) - { - struct rte_eth_dev_info di = { 0 }; - rte_eth_dev_info_get (i + 1, &di); - if (di.pci_dev && pci_addr.as_u32 != last_pci_addr.as_u32 && - memcmp (&dev_info.pci_dev->addr, &di.pci_dev->addr, - sizeof (struct rte_pci_addr)) == 0) - { - xd->interface_name_suffix = format (0, "0"); - last_pci_addr.as_u32 = pci_addr.as_u32; - last_pci_addr_port = i; - } - else if (pci_addr.as_u32 == last_pci_addr.as_u32) - { - xd->interface_name_suffix = - format (0, "%u", i - last_pci_addr_port); - } - else - { - last_pci_addr.as_u32 = ~0; - } - } - else - last_pci_addr.as_u32 = ~0; - - clib_memcpy (&xd->tx_conf, &dev_info.default_txconf, - sizeof (struct rte_eth_txconf)); - if (dm->conf->no_multi_seg) - { - xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; - port_conf_template.rxmode.jumbo_frame = 0; - } - else - { - xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS; - port_conf_template.rxmode.jumbo_frame = 1; - xd->flags |= DPDK_DEVICE_FLAG_MAYBE_MULTISEG; - } - - clib_memcpy (&xd->port_conf, &port_conf_template, - sizeof (struct rte_eth_conf)); - - xd->tx_q_used = clib_min (dev_info.max_tx_queues, tm->n_vlib_mains); - - if (devconf->num_tx_queues > 0 - && devconf->num_tx_queues < xd->tx_q_used) - xd->tx_q_used = clib_min (xd->tx_q_used, devconf->num_tx_queues); - - if (devconf->num_rx_queues > 1 && dm->use_rss == 0) - { - dm->use_rss = 1; - } - - if (devconf->num_rx_queues > 1 - && dev_info.max_rx_queues >= devconf->num_rx_queues) - { - xd->rx_q_used = devconf->num_rx_queues; - xd->port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS; - if (devconf->rss_fn == 0) - xd->port_conf.rx_adv_conf.rss_conf.rss_hf = - ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP; - else - xd->port_conf.rx_adv_conf.rss_conf.rss_hf = devconf->rss_fn; - } - else - xd->rx_q_used = 1; - - xd->flags |= DPDK_DEVICE_FLAG_PMD; - - /* workaround for drivers not setting driver_name */ - if ((!dev_info.driver_name) && (dev_info.pci_dev)) - dev_info.driver_name = dev_info.pci_dev->driver->driver.name; - - ASSERT (dev_info.driver_name); - - if (!xd->pmd) - { - - -#define _(s,f) else if (dev_info.driver_name && \ - !strcmp(dev_info.driver_name, s)) \ - xd->pmd = VNET_DPDK_PMD_##f; - if (0) - ; - foreach_dpdk_pmd -#undef _ - else - xd->pmd = VNET_DPDK_PMD_UNKNOWN; - - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - xd->nb_rx_desc = DPDK_NB_RX_DESC_DEFAULT; - xd->nb_tx_desc = DPDK_NB_TX_DESC_DEFAULT; - - switch (xd->pmd) - { - /* 1G adapters */ - case VNET_DPDK_PMD_E1000EM: - case VNET_DPDK_PMD_IGB: - case VNET_DPDK_PMD_IGBVF: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - break; - - /* 10G adapters */ - case VNET_DPDK_PMD_IXGBE: - case VNET_DPDK_PMD_IXGBEVF: - case VNET_DPDK_PMD_THUNDERX: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - case VNET_DPDK_PMD_DPAA2: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - - /* Cisco VIC */ - case VNET_DPDK_PMD_ENIC: - rte_eth_link_get_nowait (i, &l); - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; - if (l.link_speed == 40000) - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - else - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - - /* Intel Fortville */ - case VNET_DPDK_PMD_I40E: - case VNET_DPDK_PMD_I40EVF: - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - - switch (dev_info.pci_dev->id.device_id) - { - case I40E_DEV_ID_10G_BASE_T: - case I40E_DEV_ID_SFP_XL710: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - case I40E_DEV_ID_QSFP_A: - case I40E_DEV_ID_QSFP_B: - case I40E_DEV_ID_QSFP_C: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - case I40E_DEV_ID_VF: - rte_eth_link_get_nowait (i, &l); - xd->port_type = l.link_speed == 10000 ? - VNET_DPDK_PORT_TYPE_ETH_10G : VNET_DPDK_PORT_TYPE_ETH_40G; - break; - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - break; - - case VNET_DPDK_PMD_CXGBE: - switch (dev_info.pci_dev->id.device_id) - { - case 0x540d: /* T580-CR */ - case 0x5410: /* T580-LP-cr */ - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - case 0x5403: /* T540-CR */ - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - break; - - case VNET_DPDK_PMD_MLX5: - { - char *pn_100g[] = { "MCX415A-CCAT", "MCX416A-CCAT", 0 }; - char *pn_40g[] = { "MCX413A-BCAT", "MCX414A-BCAT", - "MCX415A-BCAT", "MCX416A-BCAT", "MCX4131A-BCAT", 0 - }; - char *pn_10g[] = { "MCX4111A-XCAT", "MCX4121A-XCAT", 0 }; - - vlib_pci_device_t *pd = vlib_get_pci_device (&pci_addr); - u8 *pn = 0; - char **c; - int found = 0; - pn = format (0, "%U%c", - format_vlib_pci_vpd, pd->vpd_r, "PN", 0); - - if (!pn) - break; - - c = pn_100g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_100G; - break; - } - c++; - } - - c = pn_40g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; - break; - } - c++; - } - - c = pn_10g; - while (!found && c[0]) - { - if (strncmp ((char *) pn, c[0], strlen (c[0])) == 0) - { - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_10G; - break; - } - c++; - } - - vec_free (pn); - } - - break; - /* Intel Red Rock Canyon */ - case VNET_DPDK_PMD_FM10K: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_SWITCH; - break; - - /* virtio */ - case VNET_DPDK_PMD_VIRTIO: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - xd->nb_rx_desc = DPDK_NB_RX_DESC_VIRTIO; - xd->nb_tx_desc = DPDK_NB_TX_DESC_VIRTIO; - break; - - /* vmxnet3 */ - case VNET_DPDK_PMD_VMXNET3: - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_1G; - xd->tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS; - break; - - case VNET_DPDK_PMD_AF_PACKET: - xd->port_type = VNET_DPDK_PORT_TYPE_AF_PACKET; - xd->af_packet_port_id = af_packet_port_id++; - break; - - case VNET_DPDK_PMD_BOND: - xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; - xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND; - break; - - default: - xd->port_type = VNET_DPDK_PORT_TYPE_UNKNOWN; - } - - if (devconf->num_rx_desc) - xd->nb_rx_desc = devconf->num_rx_desc; - - if (devconf->num_tx_desc) - xd->nb_tx_desc = devconf->num_tx_desc; - } - - /* - * Ensure default mtu is not > the mtu read from the hardware. - * Otherwise rte_eth_dev_configure() will fail and the port will - * not be available. - */ - if (ETHERNET_MAX_PACKET_BYTES > dev_info.max_rx_pktlen) - { - /* - * This device does not support the platforms's max frame - * size. Use it's advertised mru instead. - */ - xd->port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen; - } - else - { - xd->port_conf.rxmode.max_rx_pkt_len = ETHERNET_MAX_PACKET_BYTES; - - /* - * Some platforms do not account for Ethernet FCS (4 bytes) in - * MTU calculations. To interop with them increase mru but only - * if the device's settings can support it. - */ - if ((dev_info.max_rx_pktlen >= (ETHERNET_MAX_PACKET_BYTES + 4)) && - xd->port_conf.rxmode.hw_strip_crc) - { - /* - * Allow additional 4 bytes (for Ethernet FCS). These bytes are - * stripped by h/w and so will not consume any buffer memory. - */ - xd->port_conf.rxmode.max_rx_pkt_len += 4; - } - } - - if (xd->pmd == VNET_DPDK_PMD_AF_PACKET) - { - f64 now = vlib_time_now (vm); - u32 rnd; - rnd = (u32) (now * 1e6); - rnd = random_u32 (&rnd); - clib_memcpy (addr + 2, &rnd, sizeof (rnd)); - addr[0] = 2; - addr[1] = 0xfe; - } - else - rte_eth_macaddr_get (i, (struct ether_addr *) addr); - - if (xd->tx_q_used < tm->n_vlib_mains) - dpdk_device_lock_init (xd); - - xd->device_index = xd - dm->devices; - ASSERT (i == xd->device_index); - xd->per_interface_next_index = ~0; - - /* assign interface to input thread */ - dpdk_device_and_queue_t *dq; - int q; - - if (devconf->workers) - { - int i; - q = 0; - /* *INDENT-OFF* */ - clib_bitmap_foreach (i, devconf->workers, ({ - int cpu = dm->input_cpu_first_index + i; - unsigned lcore = vlib_worker_threads[cpu].lcore_id; - vec_validate(xd->cpu_socket_id_by_queue, q); - xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id(lcore); - vec_add2(dm->devices_by_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = q++; - })); - /* *INDENT-ON* */ - } - else - for (q = 0; q < xd->rx_q_used; q++) - { - int cpu = dm->input_cpu_first_index + next_cpu; - unsigned lcore = vlib_worker_threads[cpu].lcore_id; - - /* - * numa node for worker thread handling this queue - * needed for taking buffers from the right mempool - */ - vec_validate (xd->cpu_socket_id_by_queue, q); - xd->cpu_socket_id_by_queue[q] = rte_lcore_to_socket_id (lcore); - - /* - * construct vector of (device,queue) pairs for each worker thread - */ - vec_add2 (dm->devices_by_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = q; - - next_cpu++; - if (next_cpu == dm->input_cpu_count) - next_cpu = 0; - } - - - if (devconf->hqos_enabled) - { - xd->flags |= DPDK_DEVICE_FLAG_HQOS; - - if (devconf->hqos.hqos_thread_valid) - { - int cpu = dm->hqos_cpu_first_index + devconf->hqos.hqos_thread; - - if (devconf->hqos.hqos_thread >= dm->hqos_cpu_count) - return clib_error_return (0, "invalid HQoS thread index"); - - vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = 0; - } - else - { - int cpu = dm->hqos_cpu_first_index + next_hqos_cpu; - - if (dm->hqos_cpu_count == 0) - return clib_error_return (0, "no HQoS threads available"); - - vec_add2 (dm->devices_by_hqos_cpu[cpu], dq, 1); - dq->device = xd->device_index; - dq->queue_id = 0; - - next_hqos_cpu++; - if (next_hqos_cpu == dm->hqos_cpu_count) - next_hqos_cpu = 0; - - devconf->hqos.hqos_thread_valid = 1; - devconf->hqos.hqos_thread = cpu; - } - } - - vec_validate_aligned (xd->tx_vectors, tm->n_vlib_mains, - CLIB_CACHE_LINE_BYTES); - for (j = 0; j < tm->n_vlib_mains; j++) - { - vec_validate_ha (xd->tx_vectors[j], xd->nb_tx_desc, - sizeof (tx_ring_hdr_t), CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->tx_vectors[j]); - } - - vec_validate_aligned (xd->rx_vectors, xd->rx_q_used, - CLIB_CACHE_LINE_BYTES); - for (j = 0; j < xd->rx_q_used; j++) - { - vec_validate_aligned (xd->rx_vectors[j], VLIB_FRAME_SIZE - 1, - CLIB_CACHE_LINE_BYTES); - vec_reset_length (xd->rx_vectors[j]); - } - - vec_validate_aligned (xd->d_trace_buffers, tm->n_vlib_mains, - CLIB_CACHE_LINE_BYTES); - - rv = dpdk_port_setup (dm, xd); - - if (rv) - return rv; - - if (devconf->hqos_enabled) - { - rv = dpdk_port_setup_hqos (xd, &devconf->hqos); - if (rv) - return rv; - } - - /* count the number of descriptors used for this device */ - nb_desc += xd->nb_rx_desc + xd->nb_tx_desc * xd->tx_q_used; - - error = ethernet_register_interface - (dm->vnet_main, dpdk_device_class.index, xd->device_index, - /* ethernet address */ addr, - &xd->vlib_hw_if_index, dpdk_flag_change); - if (error) - return error; - - sw = vnet_get_hw_sw_interface (dm->vnet_main, xd->vlib_hw_if_index); - xd->vlib_sw_if_index = sw->sw_if_index; - hi = vnet_get_hw_interface (dm->vnet_main, xd->vlib_hw_if_index); - - /* - * DAW-FIXME: The Cisco VIC firmware does not provide an api for a - * driver to dynamically change the mtu. If/when the - * VIC firmware gets fixed, then this should be removed. - */ - if (xd->pmd == VNET_DPDK_PMD_ENIC) - { - /* - * Initialize mtu to what has been set by CIMC in the firmware cfg. - */ - hi->max_packet_bytes = dev_info.max_rx_pktlen; - if (devconf->vlan_strip_offload != DPDK_DEVICE_VLAN_STRIP_OFF) - vlan_strip = 1; /* remove vlan tag from VIC port by default */ - else - clib_warning ("VLAN strip disabled for interface\n"); - } - else if (devconf->vlan_strip_offload == DPDK_DEVICE_VLAN_STRIP_ON) - vlan_strip = 1; - - if (vlan_strip) - { - int vlan_off; - vlan_off = rte_eth_dev_get_vlan_offload (xd->device_index); - vlan_off |= ETH_VLAN_STRIP_OFFLOAD; - xd->port_conf.rxmode.hw_vlan_strip = vlan_off; - if (rte_eth_dev_set_vlan_offload (xd->device_index, vlan_off) == 0) - clib_warning ("VLAN strip enabled for interface\n"); - else - clib_warning ("VLAN strip cannot be supported by interface\n"); - } - - hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = - xd->port_conf.rxmode.max_rx_pkt_len - sizeof (ethernet_header_t); - - rte_eth_dev_set_mtu (xd->device_index, hi->max_packet_bytes); - } - - if (nb_desc > dm->conf->num_mbufs) - clib_warning ("%d mbufs allocated but total rx/tx ring size is %d\n", - dm->conf->num_mbufs, nb_desc); - - return 0; -} - -static void -dpdk_bind_devices_to_uio (dpdk_config_main_t * conf) -{ - vlib_pci_main_t *pm = &pci_main; - clib_error_t *error; - vlib_pci_device_t *d; - u8 *pci_addr = 0; - int num_whitelisted = vec_len (conf->dev_confs); - - /* *INDENT-OFF* */ - pool_foreach (d, pm->pci_devs, ({ - dpdk_device_config_t * devconf = 0; - vec_reset_length (pci_addr); - pci_addr = format (pci_addr, "%U%c", format_vlib_pci_addr, &d->bus_address, 0); - - if (d->device_class != PCI_CLASS_NETWORK_ETHERNET && d->device_class != PCI_CLASS_PROCESSOR_CO) - continue; - - if (num_whitelisted) - { - uword * p = hash_get (conf->device_config_index_by_pci_addr, d->bus_address.as_u32); - - if (!p) - continue; - - devconf = pool_elt_at_index (conf->dev_confs, p[0]); - } - - /* virtio */ - if (d->vendor_id == 0x1af4 && d->device_id == 0x1000) - ; - /* vmxnet3 */ - else if (d->vendor_id == 0x15ad && d->device_id == 0x07b0) - ; - /* all Intel devices */ - else if (d->vendor_id == 0x8086) - ; - /* Cisco VIC */ - else if (d->vendor_id == 0x1137 && d->device_id == 0x0043) - ; - /* Chelsio T4/T5 */ - else if (d->vendor_id == 0x1425 && (d->device_id & 0xe000) == 0x4000) - ; - else - { - clib_warning ("Unsupported Ethernet PCI device 0x%04x:0x%04x found " - "at PCI address %s\n", (u16) d->vendor_id, (u16) d->device_id, - pci_addr); - continue; - } - - error = vlib_pci_bind_to_uio (d, (char *) conf->uio_driver_name); - - if (error) - { - if (devconf == 0) - { - pool_get (conf->dev_confs, devconf); - hash_set (conf->device_config_index_by_pci_addr, d->bus_address.as_u32, - devconf - conf->dev_confs); - devconf->pci_addr.as_u32 = d->bus_address.as_u32; - } - devconf->is_blacklisted = 1; - clib_error_report (error); - } - })); - /* *INDENT-ON* */ - vec_free (pci_addr); -} - -static clib_error_t * -dpdk_device_config (dpdk_config_main_t * conf, vlib_pci_addr_t pci_addr, - unformat_input_t * input, u8 is_default) -{ - clib_error_t *error = 0; - uword *p; - dpdk_device_config_t *devconf; - unformat_input_t sub_input; - - if (is_default) - { - devconf = &conf->default_devconf; - } - else - { - p = hash_get (conf->device_config_index_by_pci_addr, pci_addr.as_u32); - - if (!p) - { - pool_get (conf->dev_confs, devconf); - hash_set (conf->device_config_index_by_pci_addr, pci_addr.as_u32, - devconf - conf->dev_confs); - } - else - return clib_error_return (0, - "duplicate configuration for PCI address %U", - format_vlib_pci_addr, &pci_addr); - } - - devconf->pci_addr.as_u32 = pci_addr.as_u32; - devconf->hqos_enabled = 0; - dpdk_device_config_hqos_default (&devconf->hqos); - - if (!input) - return 0; - - unformat_skip_white_space (input); - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - if (unformat (input, "num-rx-queues %u", &devconf->num_rx_queues)) - ; - else if (unformat (input, "num-tx-queues %u", &devconf->num_tx_queues)) - ; - else if (unformat (input, "num-rx-desc %u", &devconf->num_rx_desc)) - ; - else if (unformat (input, "num-tx-desc %u", &devconf->num_tx_desc)) - ; - else if (unformat (input, "workers %U", unformat_bitmap_list, - &devconf->workers)) - ; - else - if (unformat - (input, "rss %U", unformat_vlib_cli_sub_input, &sub_input)) - { - error = unformat_rss_fn (&sub_input, &devconf->rss_fn); - if (error) - break; - } - else if (unformat (input, "vlan-strip-offload off")) - devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_OFF; - else if (unformat (input, "vlan-strip-offload on")) - devconf->vlan_strip_offload = DPDK_DEVICE_VLAN_STRIP_ON; - else - if (unformat - (input, "hqos %U", unformat_vlib_cli_sub_input, &sub_input)) - { - devconf->hqos_enabled = 1; - error = unformat_hqos (&sub_input, &devconf->hqos); - if (error) - break; - } - else if (unformat (input, "hqos")) - { - devconf->hqos_enabled = 1; - } - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - break; - } - } - - if (error) - return error; - - if (devconf->workers && devconf->num_rx_queues == 0) - devconf->num_rx_queues = clib_bitmap_count_set_bits (devconf->workers); - else if (devconf->workers && - clib_bitmap_count_set_bits (devconf->workers) != - devconf->num_rx_queues) - error = - clib_error_return (0, - "%U: number of worker threadds must be " - "equal to number of rx queues", format_vlib_pci_addr, - &pci_addr); - - return error; -} - -static clib_error_t * -dpdk_config (vlib_main_t * vm, unformat_input_t * input) -{ - clib_error_t *error = 0; - dpdk_main_t *dm = &dpdk_main; - dpdk_config_main_t *conf = &dpdk_config_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_device_config_t *devconf; - vlib_pci_addr_t pci_addr; - unformat_input_t sub_input; - u8 *s, *tmp = 0; - u8 *rte_cmd = 0, *ethname = 0; - u32 log_level; - int ret, i; - int num_whitelisted = 0; - u8 no_pci = 0; - u8 no_huge = 0; - u8 huge_dir = 0; - u8 file_prefix = 0; - u8 *socket_mem = 0; - - conf->device_config_index_by_pci_addr = hash_create (0, sizeof (uword)); - - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) - { - /* Prime the pump */ - if (unformat (input, "no-hugetlb")) - { - vec_add1 (conf->eal_init_args, (u8 *) "no-huge"); - no_huge = 1; - } - - else if (unformat (input, "enable-tcp-udp-checksum")) - conf->enable_tcp_udp_checksum = 1; - - else if (unformat (input, "decimal-interface-names")) - conf->interface_name_format_decimal = 1; - - else if (unformat (input, "no-multi-seg")) - conf->no_multi_seg = 1; - - else if (unformat (input, "enable-cryptodev")) - conf->cryptodev = 1; - - else if (unformat (input, "dev default %U", unformat_vlib_cli_sub_input, - &sub_input)) - { - error = - dpdk_device_config (conf, (vlib_pci_addr_t) (u32) ~ 1, &sub_input, - 1); - - if (error) - return error; - } - else - if (unformat - (input, "dev %U %U", unformat_vlib_pci_addr, &pci_addr, - unformat_vlib_cli_sub_input, &sub_input)) - { - error = dpdk_device_config (conf, pci_addr, &sub_input, 0); - - if (error) - return error; - - num_whitelisted++; - } - else if (unformat (input, "dev %U", unformat_vlib_pci_addr, &pci_addr)) - { - error = dpdk_device_config (conf, pci_addr, 0, 0); - - if (error) - return error; - - num_whitelisted++; - } - else if (unformat (input, "num-mbufs %d", &conf->num_mbufs)) - ; - else if (unformat (input, "kni %d", &conf->num_kni)) - ; - else if (unformat (input, "uio-driver %s", &conf->uio_driver_name)) - ; - else if (unformat (input, "socket-mem %s", &socket_mem)) - ; - else if (unformat (input, "no-pci")) - { - no_pci = 1; - tmp = format (0, "--no-pci%c", 0); - vec_add1 (conf->eal_init_args, tmp); - } - else if (unformat (input, "poll-sleep %d", &dm->poll_sleep)) - ; - -#define _(a) \ - else if (unformat(input, #a)) \ - { \ - tmp = format (0, "--%s%c", #a, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - } - foreach_eal_double_hyphen_predicate_arg -#undef _ -#define _(a) \ - else if (unformat(input, #a " %s", &s)) \ - { \ - if (!strncmp(#a, "huge-dir", 8)) \ - huge_dir = 1; \ - else if (!strncmp(#a, "file-prefix", 11)) \ - file_prefix = 1; \ - tmp = format (0, "--%s%c", #a, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - vec_add1 (s, 0); \ - if (!strncmp(#a, "vdev", 4)) \ - if (strstr((char*)s, "af_packet")) \ - clib_warning ("af_packet obsoleted. Use CLI 'create host-interface'."); \ - vec_add1 (conf->eal_init_args, s); \ - } - foreach_eal_double_hyphen_arg -#undef _ -#define _(a,b) \ - else if (unformat(input, #a " %s", &s)) \ - { \ - tmp = format (0, "-%s%c", #b, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - vec_add1 (s, 0); \ - vec_add1 (conf->eal_init_args, s); \ - } - foreach_eal_single_hyphen_arg -#undef _ -#define _(a,b) \ - else if (unformat(input, #a " %s", &s)) \ - { \ - tmp = format (0, "-%s%c", #b, 0); \ - vec_add1 (conf->eal_init_args, tmp); \ - vec_add1 (s, 0); \ - vec_add1 (conf->eal_init_args, s); \ - conf->a##_set_manually = 1; \ - } - foreach_eal_single_hyphen_mandatory_arg -#undef _ - else if (unformat (input, "default")) - ; - - else if (unformat_skip_white_space (input)) - ; - else - { - error = clib_error_return (0, "unknown input `%U'", - format_unformat_error, input); - goto done; - } - } - - if (!conf->uio_driver_name) - conf->uio_driver_name = format (0, "uio_pci_generic%c", 0); - - /* - * Use 1G huge pages if available. - */ - if (!no_huge && !huge_dir) - { - u32 x, *mem_by_socket = 0; - uword c = 0; - u8 use_1g = 1; - u8 use_2m = 1; - u8 less_than_1g = 1; - int rv; - - umount (DEFAULT_HUGE_DIR); - - /* Process "socket-mem" parameter value */ - if (vec_len (socket_mem)) - { - unformat_input_t in; - unformat_init_vector (&in, socket_mem); - while (unformat_check_input (&in) != UNFORMAT_END_OF_INPUT) - { - if (unformat (&in, "%u,", &x)) - ; - else if (unformat (&in, "%u", &x)) - ; - else if (unformat (&in, ",")) - x = 0; - else - break; - - vec_add1 (mem_by_socket, x); - - if (x > 1023) - less_than_1g = 0; - } - /* Note: unformat_free vec_frees(in.buffer), aka socket_mem... */ - unformat_free (&in); - socket_mem = 0; - } - else - { - /* *INDENT-OFF* */ - clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( - { - vec_validate(mem_by_socket, c); - mem_by_socket[c] = 256; /* default per-socket mem */ - } - )); - /* *INDENT-ON* */ - } - - /* check if available enough 1GB pages for each socket */ - /* *INDENT-OFF* */ - clib_bitmap_foreach (c, tm->cpu_socket_bitmap, ( - { - int pages_avail, page_size, mem; - - vec_validate(mem_by_socket, c); - mem = mem_by_socket[c]; - - page_size = 1024; - pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); - - if (pages_avail < 0 || page_size * pages_avail < mem) - use_1g = 0; - - page_size = 2; - pages_avail = vlib_sysfs_get_free_hugepages(c, page_size * 1024); - - if (pages_avail < 0 || page_size * pages_avail < mem) - use_2m = 0; - })); - /* *INDENT-ON* */ - - if (mem_by_socket == 0) - { - error = clib_error_return (0, "mem_by_socket NULL"); - goto done; - } - _vec_len (mem_by_socket) = c + 1; - - /* regenerate socket_mem string */ - vec_foreach_index (x, mem_by_socket) - socket_mem = format (socket_mem, "%s%u", - socket_mem ? "," : "", mem_by_socket[x]); - socket_mem = format (socket_mem, "%c", 0); - - vec_free (mem_by_socket); - - rv = mkdir (VPP_RUN_DIR, 0755); - if (rv && errno != EEXIST) - { - error = clib_error_return (0, "mkdir '%s' failed errno %d", - VPP_RUN_DIR, errno); - goto done; - } - - rv = mkdir (DEFAULT_HUGE_DIR, 0755); - if (rv && errno != EEXIST) - { - error = clib_error_return (0, "mkdir '%s' failed errno %d", - DEFAULT_HUGE_DIR, errno); - goto done; - } - - if (use_1g && !(less_than_1g && use_2m)) - { - rv = - mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, "pagesize=1G"); - } - else if (use_2m) - { - rv = mount ("none", DEFAULT_HUGE_DIR, "hugetlbfs", 0, NULL); - } - else - { - return clib_error_return (0, "not enough free huge pages"); - } - - if (rv) - { - error = clib_error_return (0, "mount failed %d", errno); - goto done; - } - - tmp = format (0, "--huge-dir%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%s%c", DEFAULT_HUGE_DIR, 0); - vec_add1 (conf->eal_init_args, tmp); - if (!file_prefix) - { - tmp = format (0, "--file-prefix%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "vpp%c", 0); - vec_add1 (conf->eal_init_args, tmp); - } - } - - vec_free (rte_cmd); - vec_free (ethname); - - if (error) - return error; - - /* I'll bet that -c and -n must be the first and second args... */ - if (!conf->coremask_set_manually) - { - vlib_thread_registration_t *tr; - uword *coremask = 0; - int i; - - /* main thread core */ - coremask = clib_bitmap_set (coremask, tm->main_lcore, 1); - - for (i = 0; i < vec_len (tm->registrations); i++) - { - tr = tm->registrations[i]; - coremask = clib_bitmap_or (coremask, tr->coremask); - } - - vec_insert (conf->eal_init_args, 2, 1); - conf->eal_init_args[1] = (u8 *) "-c"; - tmp = format (0, "%U%c", format_bitmap_hex, coremask, 0); - conf->eal_init_args[2] = tmp; - clib_bitmap_free (coremask); - } - - if (!conf->nchannels_set_manually) - { - vec_insert (conf->eal_init_args, 2, 3); - conf->eal_init_args[3] = (u8 *) "-n"; - tmp = format (0, "%d", conf->nchannels); - conf->eal_init_args[4] = tmp; - } - - if (no_pci == 0 && geteuid () == 0) - dpdk_bind_devices_to_uio (conf); - -#define _(x) \ - if (devconf->x == 0 && conf->default_devconf.x > 0) \ - devconf->x = conf->default_devconf.x ; - - /* *INDENT-OFF* */ - pool_foreach (devconf, conf->dev_confs, ({ - - /* default per-device config items */ - foreach_dpdk_device_config_item - - /* add DPDK EAL whitelist/blacklist entry */ - if (num_whitelisted > 0 && devconf->is_blacklisted == 0) - { - tmp = format (0, "-w%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); - vec_add1 (conf->eal_init_args, tmp); - } - else if (num_whitelisted == 0 && devconf->is_blacklisted != 0) - { - tmp = format (0, "-b%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%U%c", format_vlib_pci_addr, &devconf->pci_addr, 0); - vec_add1 (conf->eal_init_args, tmp); - } - })); - /* *INDENT-ON* */ - -#undef _ - - /* set master-lcore */ - tmp = format (0, "--master-lcore%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%u%c", tm->main_lcore, 0); - vec_add1 (conf->eal_init_args, tmp); - - /* set socket-mem */ - tmp = format (0, "--socket-mem%c", 0); - vec_add1 (conf->eal_init_args, tmp); - tmp = format (0, "%s%c", socket_mem, 0); - vec_add1 (conf->eal_init_args, tmp); - - /* NULL terminate the "argv" vector, in case of stupidity */ - vec_add1 (conf->eal_init_args, 0); - _vec_len (conf->eal_init_args) -= 1; - - /* Set up DPDK eal and packet mbuf pool early. */ - - log_level = (CLIB_DEBUG > 0) ? RTE_LOG_DEBUG : RTE_LOG_NOTICE; - - rte_set_log_level (log_level); - - vm = vlib_get_main (); - - /* make copy of args as rte_eal_init tends to mess up with arg array */ - for (i = 1; i < vec_len (conf->eal_init_args); i++) - conf->eal_init_args_str = format (conf->eal_init_args_str, "%s ", - conf->eal_init_args[i]); - - ret = - rte_eal_init (vec_len (conf->eal_init_args), - (char **) conf->eal_init_args); - - /* lazy umount hugepages */ - umount2 (DEFAULT_HUGE_DIR, MNT_DETACH); - - if (ret < 0) - return clib_error_return (0, "rte_eal_init returned %d", ret); - - /* Dump the physical memory layout prior to creating the mbuf_pool */ - fprintf (stdout, "DPDK physical memory layout:\n"); - rte_dump_physmem_layout (stdout); - - /* main thread 1st */ - error = vlib_buffer_pool_create (vm, conf->num_mbufs, rte_socket_id ()); - if (error) - return error; - - for (i = 0; i < RTE_MAX_LCORE; i++) - { - error = vlib_buffer_pool_create (vm, conf->num_mbufs, - rte_lcore_to_socket_id (i)); - if (error) - return error; - } - -done: - return error; -} - -VLIB_CONFIG_FUNCTION (dpdk_config, "dpdk"); - -void -dpdk_update_link_state (dpdk_device_t * xd, f64 now) -{ - vnet_main_t *vnm = vnet_get_main (); - struct rte_eth_link prev_link = xd->link; - u32 hw_flags = 0; - u8 hw_flags_chg = 0; - - /* only update link state for PMD interfaces */ - if ((xd->flags & DPDK_DEVICE_FLAG_PMD) == 0) - return; - - xd->time_last_link_update = now ? now : xd->time_last_link_update; - memset (&xd->link, 0, sizeof (xd->link)); - rte_eth_link_get_nowait (xd->device_index, &xd->link); - - if (LINK_STATE_ELOGS) - { - vlib_main_t *vm = vlib_get_main (); - ELOG_TYPE_DECLARE (e) = - { - .format = - "update-link-state: sw_if_index %d, admin_up %d," - "old link_state %d new link_state %d",.format_args = "i4i1i1i1",}; - - struct - { - u32 sw_if_index; - u8 admin_up; - u8 old_link_state; - u8 new_link_state; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->sw_if_index = xd->vlib_sw_if_index; - ed->admin_up = (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) != 0; - ed->old_link_state = (u8) - vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index); - ed->new_link_state = (u8) xd->link.link_status; - } - - if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) && - ((xd->link.link_status != 0) ^ - vnet_hw_interface_is_link_up (vnm, xd->vlib_hw_if_index))) - { - hw_flags_chg = 1; - hw_flags |= (xd->link.link_status ? VNET_HW_INTERFACE_FLAG_LINK_UP : 0); - } - - if (hw_flags_chg || (xd->link.link_duplex != prev_link.link_duplex)) - { - hw_flags_chg = 1; - switch (xd->link.link_duplex) - { - case ETH_LINK_HALF_DUPLEX: - hw_flags |= VNET_HW_INTERFACE_FLAG_HALF_DUPLEX; - break; - case ETH_LINK_FULL_DUPLEX: - hw_flags |= VNET_HW_INTERFACE_FLAG_FULL_DUPLEX; - break; - default: - break; - } - } - if (hw_flags_chg || (xd->link.link_speed != prev_link.link_speed)) - { - hw_flags_chg = 1; - switch (xd->link.link_speed) - { - case ETH_SPEED_NUM_10M: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10M; - break; - case ETH_SPEED_NUM_100M: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_100M; - break; - case ETH_SPEED_NUM_1G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_1G; - break; - case ETH_SPEED_NUM_10G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_10G; - break; - case ETH_SPEED_NUM_40G: - hw_flags |= VNET_HW_INTERFACE_FLAG_SPEED_40G; - break; - case 0: - break; - default: - clib_warning ("unknown link speed %d", xd->link.link_speed); - break; - } - } - if (hw_flags_chg) - { - if (LINK_STATE_ELOGS) - { - vlib_main_t *vm = vlib_get_main (); - - ELOG_TYPE_DECLARE (e) = - { - .format = - "update-link-state: sw_if_index %d, new flags %d",.format_args - = "i4i4",}; - - struct - { - u32 sw_if_index; - u32 flags; - } *ed; - ed = ELOG_DATA (&vm->elog_main, e); - ed->sw_if_index = xd->vlib_sw_if_index; - ed->flags = hw_flags; - } - vnet_hw_interface_set_flags (vnm, xd->vlib_hw_if_index, hw_flags); - } -} - -static uword -dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) -{ - clib_error_t *error; - vnet_main_t *vnm = vnet_get_main (); - dpdk_main_t *dm = &dpdk_main; - ethernet_main_t *em = ðernet_main; - dpdk_device_t *xd; - vlib_thread_main_t *tm = vlib_get_thread_main (); - int i; - - error = dpdk_lib_init (dm); - - /* - * Turn on the input node if we found some devices to drive - * and we're not running worker threads or i/o threads - */ - - if (error == 0 && vec_len (dm->devices) > 0) - { - if (tm->n_vlib_mains == 1) - vlib_node_set_state (vm, dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); - else - for (i = 0; i < tm->n_vlib_mains; i++) - if (vec_len (dm->devices_by_cpu[i]) > 0) - vlib_node_set_state (vlib_mains[i], dpdk_input_node.index, - VLIB_NODE_STATE_POLLING); - } - - if (error) - clib_error_report (error); - - tm->worker_thread_release = 1; - - f64 now = vlib_time_now (vm); - vec_foreach (xd, dm->devices) - { - dpdk_update_link_state (xd, now); - } - - { - /* - * Extra set up for bond interfaces: - * 1. Setup MACs for bond interfaces and their slave links which was set - * in dpdk_port_setup() but needs to be done again here to take effect. - * 2. Set up info for bond interface related CLI support. - */ - int nports = rte_eth_dev_count (); - if (nports > 0) - { - for (i = 0; i < nports; i++) - { - struct rte_eth_dev_info dev_info; - rte_eth_dev_info_get (i, &dev_info); - if (!dev_info.driver_name) - dev_info.driver_name = dev_info.pci_dev->driver->driver.name; - - ASSERT (dev_info.driver_name); - if (strncmp (dev_info.driver_name, "rte_bond_pmd", 12) == 0) - { - u8 addr[6]; - u8 slink[16]; - int nlink = rte_eth_bond_slaves_get (i, slink, 16); - if (nlink > 0) - { - vnet_hw_interface_t *bhi; - ethernet_interface_t *bei; - int rv; - - /* Get MAC of 1st slave link */ - rte_eth_macaddr_get (slink[0], - (struct ether_addr *) addr); - /* Set MAC of bounded interface to that of 1st slave link */ - rv = - rte_eth_bond_mac_address_set (i, - (struct ether_addr *) - addr); - if (rv < 0) - clib_warning ("Failed to set MAC address"); - - /* Populate MAC of bonded interface in VPP hw tables */ - bhi = - vnet_get_hw_interface (vnm, - dm->devices[i].vlib_hw_if_index); - bei = - pool_elt_at_index (em->interfaces, bhi->hw_instance); - clib_memcpy (bhi->hw_address, addr, 6); - clib_memcpy (bei->address, addr, 6); - /* Init l3 packet size allowed on bonded interface */ - bhi->max_packet_bytes = ETHERNET_MAX_PACKET_BYTES; - bhi->max_l3_packet_bytes[VLIB_RX] = - bhi->max_l3_packet_bytes[VLIB_TX] = - ETHERNET_MAX_PACKET_BYTES - sizeof (ethernet_header_t); - while (nlink >= 1) - { /* for all slave links */ - int slave = slink[--nlink]; - dpdk_device_t *sdev = &dm->devices[slave]; - vnet_hw_interface_t *shi; - vnet_sw_interface_t *ssi; - /* Add MAC to all slave links except the first one */ - if (nlink) - rte_eth_dev_mac_addr_add (slave, - (struct ether_addr *) - addr, 0); - /* Set slaves bitmap for bonded interface */ - bhi->bond_info = - clib_bitmap_set (bhi->bond_info, - sdev->vlib_hw_if_index, 1); - /* Set slave link flags on slave interface */ - shi = - vnet_get_hw_interface (vnm, sdev->vlib_hw_if_index); - ssi = - vnet_get_sw_interface (vnm, sdev->vlib_sw_if_index); - shi->bond_info = VNET_HW_INTERFACE_BOND_INFO_SLAVE; - ssi->flags |= VNET_SW_INTERFACE_FLAG_BOND_SLAVE; - - /* Set l3 packet size allowed as the lowest of slave */ - if (bhi->max_l3_packet_bytes[VLIB_RX] > - shi->max_l3_packet_bytes[VLIB_RX]) - bhi->max_l3_packet_bytes[VLIB_RX] = - bhi->max_l3_packet_bytes[VLIB_TX] = - shi->max_l3_packet_bytes[VLIB_RX]; - - /* Set max packet size allowed as the lowest of slave */ - if (bhi->max_packet_bytes > shi->max_packet_bytes) - bhi->max_packet_bytes = shi->max_packet_bytes; - } - } - } - } - } - } - - while (1) - { - /* - * check each time through the loop in case intervals are changed - */ - f64 min_wait = dm->link_state_poll_interval < dm->stat_poll_interval ? - dm->link_state_poll_interval : dm->stat_poll_interval; - - vlib_process_wait_for_event_or_clock (vm, min_wait); - - if (dm->admin_up_down_in_progress) - /* skip the poll if an admin up down is in progress (on any interface) */ - continue; - - vec_foreach (xd, dm->devices) - { - f64 now = vlib_time_now (vm); - if ((now - xd->time_last_stats_update) >= dm->stat_poll_interval) - dpdk_update_counters (xd, now); - if ((now - xd->time_last_link_update) >= dm->link_state_poll_interval) - dpdk_update_link_state (xd, now); - - } - } - - return 0; -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (dpdk_process_node,static) = { - .function = dpdk_process, - .type = VLIB_NODE_TYPE_PROCESS, - .name = "dpdk-process", - .process_log2_n_stack_bytes = 17, -}; -/* *INDENT-ON* */ - -int -dpdk_set_stat_poll_interval (f64 interval) -{ - if (interval < DPDK_MIN_STATS_POLL_INTERVAL) - return (VNET_API_ERROR_INVALID_VALUE); - - dpdk_main.stat_poll_interval = interval; - - return 0; -} - -int -dpdk_set_link_state_poll_interval (f64 interval) -{ - if (interval < DPDK_MIN_LINK_POLL_INTERVAL) - return (VNET_API_ERROR_INVALID_VALUE); - - dpdk_main.link_state_poll_interval = interval; - - return 0; -} - -clib_error_t * -dpdk_init (vlib_main_t * vm) -{ - dpdk_main_t *dm = &dpdk_main; - vlib_node_t *ei; - clib_error_t *error = 0; - vlib_thread_main_t *tm = vlib_get_thread_main (); - - /* verify that structs are cacheline aligned */ - STATIC_ASSERT (offsetof (dpdk_device_t, cacheline0) == 0, - "Cache line marker must be 1st element in dpdk_device_t"); - STATIC_ASSERT (offsetof (dpdk_device_t, cacheline1) == - CLIB_CACHE_LINE_BYTES, - "Data in cache line 0 is bigger than cache line size"); - STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0, - "Cache line marker must be 1st element in frame_queue_trace_t"); - - u8 *name; - name = format (0, "dpdk_%08x%c", api_version, 0); - - /* Ask for a correctly-sized block of API message decode slots */ - dm->msg_id_base = vl_msg_api_get_msg_ids - ((char *) name, VL_MSG_FIRST_AVAILABLE); - vec_free (name); - - dm->vlib_main = vm; - dm->vnet_main = vnet_get_main (); - dm->conf = &dpdk_config_main; - - error = dpdk_plugin_api_hookup (vm); - - /* Add our API messages to the global name_crc hash table */ - setup_message_id_table (dm, &api_main); - -// TODO -// plugin_custom_dump_configure (dm); - - ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); - if (ei == 0) - return clib_error_return (0, "ethernet-input node AWOL"); - - dm->ethernet_input_node_index = ei->index; - - dm->conf->nchannels = 4; - dm->conf->num_mbufs = dm->conf->num_mbufs ? dm->conf->num_mbufs : NB_MBUF; - vec_add1 (dm->conf->eal_init_args, (u8 *) "vnet"); - - dm->dpdk_device_by_kni_port_id = hash_create (0, sizeof (uword)); - dm->vu_sw_if_index_by_listener_fd = hash_create (0, sizeof (uword)); - dm->vu_sw_if_index_by_sock_fd = hash_create (0, sizeof (uword)); - - /* $$$ use n_thread_stacks since it's known-good at this point */ - vec_validate (dm->recycle, tm->n_thread_stacks - 1); - - /* Default vlib_buffer_t flags, DISABLES tcp/udp checksumming... */ - dm->buffer_flags_template = - (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID - | IP_BUFFER_L4_CHECKSUM_COMPUTED | IP_BUFFER_L4_CHECKSUM_CORRECT); - - dm->stat_poll_interval = DPDK_STATS_POLL_INTERVAL; - dm->link_state_poll_interval = DPDK_LINK_POLL_INTERVAL; - - /* init CLI */ - if ((error = vlib_call_init_function (vm, dpdk_cli_init))) - return error; - - return error; -} - -VLIB_INIT_FUNCTION (dpdk_init); - - -/* - * fd.io coding-style-patch-verification: ON - * - * Local Variables: - * eval: (c-set-style "gnu") - * End: - */ -- cgit 1.2.3-korg From 25f635852aee76255f7210c43d43668a80fdccce Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Mon, 6 Mar 2017 21:51:00 +0100 Subject: dpdk: dpdk-input optimizations and fixes - fix issue caused by assumption that ethertype starts at 0 - intoduce buffer templates to speed-up vlib_buffer_t metadata initialization - avoid check for multiseg buffers inside loop if multiseg is disabled - interleave prefetches to reduce load on L1 cache Change-Id: I3b76e6d3e1e15ed28f01625edb7fbe9f38112e03 Signed-off-by: Damjan Marion --- src/plugins/dpdk.am | 4 + src/plugins/dpdk/device/dpdk.h | 3 + src/plugins/dpdk/device/init.c | 15 ++++ src/plugins/dpdk/device/node.c | 200 +++++++++++++++++++++-------------------- 4 files changed, 126 insertions(+), 96 deletions(-) (limited to 'src/plugins/dpdk.am') diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am index 01383de6..f1a37ae2 100644 --- a/src/plugins/dpdk.am +++ b/src/plugins/dpdk.am @@ -16,6 +16,10 @@ vppplugins_LTLIBRARIES += dpdk_plugin.la dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive,-lm,-ldl +# due to internal compiler error in GCC when compiling dpdk/device/node.c +# debug sysmbols level reduced to 1. See GCC PR #79953 for details +CFLAGS += -g1 + dpdk_plugin_la_SOURCES = \ dpdk/main.c \ dpdk/buffer.c \ diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h index 092c7dca..84f86ae2 100644 --- a/src/plugins/dpdk/device/dpdk.h +++ b/src/plugins/dpdk/device/dpdk.h @@ -338,6 +338,9 @@ typedef struct /* per-thread recycle lists */ u32 **recycle; + /* per-thread buffer templates */ + vlib_buffer_t *buffer_templates; + /* buffer flags template, configurable to enable/disable tcp / udp cksum */ u32 buffer_flags_template; diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 85ecde25..110d7457 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -572,6 +572,21 @@ dpdk_lib_init (dpdk_main_t * dm) dm->buffer_flags_template &= ~(IP_BUFFER_L4_CHECKSUM_CORRECT | IP_BUFFER_L4_CHECKSUM_COMPUTED); + /* vlib_buffer_t template */ + vec_validate_aligned (dm->buffer_templates, tm->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + for (i = 0; i < tm->n_vlib_mains; i++) + { + vlib_buffer_free_list_t *fl; + vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, i); + fl = vlib_buffer_get_free_list (vm, + VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + vlib_buffer_init_for_free_list (bt, fl); + bt->flags = dm->buffer_flags_template; + bt->current_data = -RTE_PKTMBUF_HEADROOM; + vnet_buffer (bt)->sw_if_index[VLIB_TX] = (u32) ~ 0; + } + for (i = 0; i < nports; i++) { u8 addr[6]; diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index 2120069e..e8d502ca 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -37,21 +37,21 @@ static char *dpdk_error_strings[] = { always_inline int vlib_buffer_is_ip4 (vlib_buffer_t * b) { - ethernet_header_t *h = (ethernet_header_t *) b->data; + ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b); return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); } always_inline int vlib_buffer_is_ip6 (vlib_buffer_t * b) { - ethernet_header_t *h = (ethernet_header_t *) b->data; + ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b); return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); } always_inline int vlib_buffer_is_mpls (vlib_buffer_t * b) { - ethernet_header_t *h = (ethernet_header_t *) b->data; + ethernet_header_t *h = (ethernet_header_t *) vlib_buffer_get_current (b); return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); } @@ -217,10 +217,35 @@ dpdk_prefetch_buffer (struct rte_mbuf *mb) } static_always_inline void -dpdk_prefetch_buffer_data (struct rte_mbuf *mb) +dpdk_prefetch_ethertype (struct rte_mbuf *mb) { - vlib_buffer_t *b = vlib_buffer_from_rte_mbuf (mb); - CLIB_PREFETCH (b->data, CLIB_CACHE_LINE_BYTES, STORE); + CLIB_PREFETCH (mb->buf_addr + mb->data_off + + STRUCT_OFFSET_OF (ethernet_header_t, type), + CLIB_CACHE_LINE_BYTES, LOAD); +} + + +/* + This function should fill 1st cacheline of vlib_buffer_t metadata with data + from buffer template. Instead of filling field by field, we construct + template and then use 128/256 bit vector instruction to copy data. + This code first loads whole cacheline into 4 128-bit registers (xmm) + or two 256 bit registers (ymm) and then stores data into all 4 buffers + efectively saving on register load operations. +*/ + +static_always_inline void +dpdk_buffer_init_from_template (void *d0, void *d1, void *d2, void *d3, + void *s) +{ + int i; + for (i = 0; i < 2; i++) + { + *(u8x32 *) (((u8 *) d0) + i * 32) = + *(u8x32 *) (((u8 *) d1) + i * 32) = + *(u8x32 *) (((u8 *) d2) + i * 32) = + *(u8x32 *) (((u8 *) d3) + i * 32) = *(u8x32 *) (((u8 *) s) + i * 32); + } } /* @@ -229,7 +254,8 @@ dpdk_prefetch_buffer_data (struct rte_mbuf *mb) */ static_always_inline u32 dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, - vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id) + vlib_node_runtime_t * node, u32 cpu_index, u16 queue_id, + int maybe_multiseg) { u32 n_buffers; u32 next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; @@ -239,7 +265,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, uword n_rx_bytes = 0; u32 n_trace, trace_cnt __attribute__ ((unused)); vlib_buffer_free_list_t *fl; - u32 buffer_flags_template; + vlib_buffer_t *bt = vec_elt_at_index (dm->buffer_templates, cpu_index); if ((xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) == 0) return 0; @@ -251,8 +277,6 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, return 0; } - buffer_flags_template = dm->buffer_flags_template; - vec_reset_length (xd->d_trace_buffers[cpu_index]); trace_cnt = n_trace = vlib_get_trace_count (vm, node); @@ -272,33 +296,44 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, fl = vlib_buffer_get_free_list (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + /* Update buffer template */ + vnet_buffer (bt)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; + bt->error = node->errors[DPDK_ERROR_NONE]; + mb_index = 0; while (n_buffers > 0) { vlib_buffer_t *b0, *b1, *b2, *b3; - u32 bi0, next0, l3_offset0; - u32 bi1, next1, l3_offset1; - u32 bi2, next2, l3_offset2; - u32 bi3, next3, l3_offset3; + u32 bi0, next0; + u32 bi1, next1; + u32 bi2, next2; + u32 bi3, next3; u8 error0, error1, error2, error3; u64 or_ol_flags; vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); - while (n_buffers > 8 && n_left_to_next > 4) + while (n_buffers >= 12 && n_left_to_next >= 4) { - struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; - struct rte_mbuf *mb1 = xd->rx_vectors[queue_id][mb_index + 1]; - struct rte_mbuf *mb2 = xd->rx_vectors[queue_id][mb_index + 2]; - struct rte_mbuf *mb3 = xd->rx_vectors[queue_id][mb_index + 3]; + struct rte_mbuf *mb0, *mb1, *mb2, *mb3; - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 4]); - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 5]); - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 6]); - dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 7]); + /* prefetches are interleaved with the rest of the code to reduce + pressure on L1 cache */ + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 8]); + dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 4]); - if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) + mb0 = xd->rx_vectors[queue_id][mb_index]; + mb1 = xd->rx_vectors[queue_id][mb_index + 1]; + mb2 = xd->rx_vectors[queue_id][mb_index + 2]; + mb3 = xd->rx_vectors[queue_id][mb_index + 3]; + + ASSERT (mb0); + ASSERT (mb1); + ASSERT (mb2); + ASSERT (mb3); + + if (maybe_multiseg) { if (PREDICT_FALSE (mb0->nb_segs > 1)) dpdk_prefetch_buffer (mb0->next); @@ -310,22 +345,29 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, dpdk_prefetch_buffer (mb3->next); } - ASSERT (mb0); - ASSERT (mb1); - ASSERT (mb2); - ASSERT (mb3); - - or_ol_flags = (mb0->ol_flags | mb1->ol_flags | - mb2->ol_flags | mb3->ol_flags); b0 = vlib_buffer_from_rte_mbuf (mb0); b1 = vlib_buffer_from_rte_mbuf (mb1); b2 = vlib_buffer_from_rte_mbuf (mb2); b3 = vlib_buffer_from_rte_mbuf (mb3); - vlib_buffer_init_for_free_list (b0, fl); - vlib_buffer_init_for_free_list (b1, fl); - vlib_buffer_init_for_free_list (b2, fl); - vlib_buffer_init_for_free_list (b3, fl); + dpdk_buffer_init_from_template (b0, b1, b2, b3, bt); + + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 9]); + dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 5]); + + /* current_data must be set to -RTE_PKTMBUF_HEADROOM in template */ + b0->current_data += mb0->data_off; + b1->current_data += mb1->data_off; + b2->current_data += mb2->data_off; + b3->current_data += mb3->data_off; + + b0->current_length = mb0->data_len; + b1->current_length = mb1->data_len; + b2->current_length = mb2->data_len; + b3->current_length = mb3->data_len; + + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 10]); + dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 7]); bi0 = vlib_get_buffer_index (vm, b0); bi1 = vlib_get_buffer_index (vm, b1); @@ -345,21 +387,17 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, } else { - /* prefetch packet data for faster access to the ethertype */ - dpdk_prefetch_buffer_data (xd->rx_vectors[queue_id] - [mb_index + 4]); - dpdk_prefetch_buffer_data (xd->rx_vectors[queue_id] - [mb_index + 5]); - dpdk_prefetch_buffer_data (xd->rx_vectors[queue_id] - [mb_index + 6]); - dpdk_prefetch_buffer_data (xd->rx_vectors[queue_id] - [mb_index + 7]); next0 = dpdk_rx_next_from_etype (mb0, b0); next1 = dpdk_rx_next_from_etype (mb1, b1); next2 = dpdk_rx_next_from_etype (mb2, b2); next3 = dpdk_rx_next_from_etype (mb3, b3); } + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 11]); + dpdk_prefetch_ethertype (xd->rx_vectors[queue_id][mb_index + 6]); + + or_ol_flags = (mb0->ol_flags | mb1->ol_flags | + mb2->ol_flags | mb3->ol_flags); if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD)) { dpdk_rx_error_from_mb (mb0, &next0, &error0); @@ -371,46 +409,11 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, b2->error = node->errors[error2]; b3->error = node->errors[error3]; } - else - { - b0->error = b1->error = node->errors[DPDK_ERROR_NONE]; - b2->error = b3->error = node->errors[DPDK_ERROR_NONE]; - } - l3_offset0 = device_input_next_node_advance[next0]; - l3_offset1 = device_input_next_node_advance[next1]; - l3_offset2 = device_input_next_node_advance[next2]; - l3_offset3 = device_input_next_node_advance[next3]; - - b0->current_data = l3_offset0 + mb0->data_off; - b1->current_data = l3_offset1 + mb1->data_off; - b2->current_data = l3_offset2 + mb2->data_off; - b3->current_data = l3_offset3 + mb3->data_off; - - b0->current_data -= RTE_PKTMBUF_HEADROOM; - b1->current_data -= RTE_PKTMBUF_HEADROOM; - b2->current_data -= RTE_PKTMBUF_HEADROOM; - b3->current_data -= RTE_PKTMBUF_HEADROOM; - - b0->current_length = mb0->data_len - l3_offset0; - b1->current_length = mb1->data_len - l3_offset1; - b2->current_length = mb2->data_len - l3_offset2; - b3->current_length = mb3->data_len - l3_offset3; - - b0->flags = buffer_flags_template; - b1->flags = buffer_flags_template; - b2->flags = buffer_flags_template; - b3->flags = buffer_flags_template; - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b1)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b2)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b3)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (b1)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (b2)->sw_if_index[VLIB_TX] = (u32) ~ 0; - vnet_buffer (b3)->sw_if_index[VLIB_TX] = (u32) ~ 0; + vlib_buffer_advance (b0, device_input_next_node_advance[next0]); + vlib_buffer_advance (b1, device_input_next_node_advance[next1]); + vlib_buffer_advance (b2, device_input_next_node_advance[next2]); + vlib_buffer_advance (b3, device_input_next_node_advance[next3]); n_rx_bytes += mb0->pkt_len; n_rx_bytes += mb1->pkt_len; @@ -418,7 +421,7 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, n_rx_bytes += mb3->pkt_len; /* Process subsequent segments of multi-segment packets */ - if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) + if (maybe_multiseg) { dpdk_process_subseq_segs (vm, b0, mb0, fl); dpdk_process_subseq_segs (vm, b1, mb1, fl); @@ -452,6 +455,13 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, { struct rte_mbuf *mb0 = xd->rx_vectors[queue_id][mb_index]; + if (PREDICT_TRUE (n_buffers > 3)) + { + dpdk_prefetch_buffer (xd->rx_vectors[queue_id][mb_index + 2]); + dpdk_prefetch_ethertype (xd->rx_vectors[queue_id] + [mb_index + 1]); + } + ASSERT (mb0); b0 = vlib_buffer_from_rte_mbuf (mb0); @@ -460,7 +470,11 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, if (PREDICT_FALSE (mb0->nb_segs > 1)) dpdk_prefetch_buffer (mb0->next); - vlib_buffer_init_for_free_list (b0, fl); + clib_memcpy (b0, bt, CLIB_CACHE_LINE_BYTES); + + ASSERT (b0->current_data == -RTE_PKTMBUF_HEADROOM); + b0->current_data += mb0->data_off; + b0->current_length = mb0->data_len; bi0 = vlib_get_buffer_index (vm, b0); @@ -474,18 +488,9 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, next0 = dpdk_rx_next_from_etype (mb0, b0); dpdk_rx_error_from_mb (mb0, &next0, &error0); - b0->error = node->errors[error0]; - - l3_offset0 = device_input_next_node_advance[next0]; - b0->current_data = l3_offset0; - b0->current_data += mb0->data_off - RTE_PKTMBUF_HEADROOM; - b0->current_length = mb0->data_len - l3_offset0; + vlib_buffer_advance (b0, device_input_next_node_advance[next0]); - b0->flags = buffer_flags_template; - - vnet_buffer (b0)->sw_if_index[VLIB_RX] = xd->vlib_sw_if_index; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; n_rx_bytes += mb0->pkt_len; /* Process subsequent segments of multi-segment packets */ @@ -604,7 +609,10 @@ dpdk_input (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * f) vec_foreach (dq, dm->devices_by_cpu[cpu_index]) { xd = vec_elt_at_index(dm->devices, dq->device); - n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id); + if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) + n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, /* maybe_multiseg */ 1); + else + n_rx_packets += dpdk_device_input (dm, xd, node, cpu_index, dq->queue_id, /* maybe_multiseg */ 0); } /* *INDENT-ON* */ -- cgit 1.2.3-korg From 145bb0f2a644c3af4bd4adf47f1dcd75f6d346ff Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 8 Mar 2017 22:16:00 +0100 Subject: dpdk: remove unnecesary CFLAGS modification Change-Id: Ie9945462d529fea6f237a521138ade76bd02e0d9 Signed-off-by: Damjan Marion --- src/plugins/dpdk.am | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/plugins/dpdk.am') diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am index f1a37ae2..01383de6 100644 --- a/src/plugins/dpdk.am +++ b/src/plugins/dpdk.am @@ -16,10 +16,6 @@ vppplugins_LTLIBRARIES += dpdk_plugin.la dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive,-lm,-ldl -# due to internal compiler error in GCC when compiling dpdk/device/node.c -# debug sysmbols level reduced to 1. See GCC PR #79953 for details -CFLAGS += -g1 - dpdk_plugin_la_SOURCES = \ dpdk/main.c \ dpdk/buffer.c \ -- cgit 1.2.3-korg From fa80f2e421e7e85f429437aad2da9971a2a60a24 Mon Sep 17 00:00:00 2001 From: Sergio Gonzalez Monroy Date: Tue, 7 Mar 2017 14:39:29 +0000 Subject: dpdk: fix plugin linking with sw crypto libraries Change-Id: I3e3bf786ab3c7672ff2cc7acd221421072e3ac8b Signed-off-by: Sergio Gonzalez Monroy --- dpdk/Makefile | 45 ++++++++++++++++++++++++++------------------- src/plugins/dpdk.am | 7 ++++++- 2 files changed, 32 insertions(+), 20 deletions(-) (limited to 'src/plugins/dpdk.am') diff --git a/dpdk/Makefile b/dpdk/Makefile index 8e187cc2..fc93f9c6 100644 --- a/dpdk/Makefile +++ b/dpdk/Makefile @@ -25,7 +25,7 @@ DPDK_MLX5_PMD ?= n B := $(DPDK_BUILD_DIR) I := $(DPDK_INSTALL_DIR) DPDK_VERSION ?= 17.02 -PKG_SUFFIX ?= vpp1 +PKG_SUFFIX ?= vpp2 DPDK_BASE_URL ?= http://fast.dpdk.org/rel DPDK_TARBALL := dpdk-$(DPDK_VERSION).tar.xz DPDK_TAR_URL := $(DPDK_BASE_URL)/$(DPDK_TARBALL) @@ -34,9 +34,9 @@ DPDK_17.02_TARBALL_MD5_CKSUM := 6b9f7387c35641f4e8dbba3e528f2376 DPDK_SOURCE := $(B)/dpdk-$(DPDK_VERSION) ifeq ($(DPDK_CRYPTO_SW_PMD),y) -AESNIMB_LIB_TARBALL := v0.44.tar.gz +AESNIMB_LIB_TARBALL := v0.44-gcm.2.tar.gz AESNIMB_LIB_TARBALL_URL := http://github.com/01org/intel-ipsec-mb/archive/$(AESNIMB_LIB_TARBALL) -AESNIMB_LIB_SOURCE := $(B)/intel-ipsec-mb-0.44 +AESNIMB_LIB_SOURCE := $(B)/intel-ipsec-mb-0.44-gcm.2 ISA_L_CRYPTO_LIB_TARBALL := isa_l_crypto.tar.gz ISA_L_CRYPTO_LIB_TARBALL_URL := http://github.com/01org/isa-l_crypto/archive/master.tar.gz ISA_L_CRYPTO_LIB_SOURCE := $(B)/isa-l_crypto-master @@ -87,8 +87,8 @@ DPDK_EXTRA_CFLAGS := -g -O0 endif ifeq ($(DPDK_CRYPTO_SW_PMD),y) -DPDK_EXTRA_CFLAGS += -I$(ISA_L_CRYPTO_LIB_SOURCE) -DPDK_EXTRA_LDFLAGS += -L$(ISA_L_CRYPTO_LIB_SOURCE)/.libs +DPDK_EXTRA_CFLAGS += -I$(I)/include +DPDK_EXTRA_LDFLAGS += -L$(I)/lib DPDK_MAKE_EXTRA_ARGS += AESNI_MULTI_BUFFER_LIB_PATH=$(AESNIMB_LIB_SOURCE) endif @@ -158,19 +158,27 @@ $(CURDIR)/$(DPDK_TARBALL): then cp $(DPDK_DOWNLOAD_DIR)/$(DPDK_TARBALL) $(CURDIR) ; \ else curl -o $(CURDIR)/$(DPDK_TARBALL) -LO $(DPDK_TAR_URL) ; \ fi -ifeq ($(DPDK_CRYPTO_SW_PMD),y) + @rm -f $(B)/.download.ok + +$(CURDIR)/$(AESNIMB_LIB_TARBALL): @if [ -e $(DPDK_DOWNLOAD_DIR)/$(AESNIMB_LIB_TARBALL) ] ; \ then cp $(DPDK_DOWNLOAD_DIR)/$(AESNIMB_LIB_TARBALL) $(CURDIR) ; \ - else curl -o $(CURDIR)/$(AESNIMB_LIB_TARBALL) -LO $(AESNIMB_LIB_TARBALL_URL) ; \ + else curl -o $@ -LO $(AESNIMB_LIB_TARBALL_URL) ; \ fi + +$(CURDIR)/$(ISA_L_CRYPTO_LIB_TARBALL): @if [ -e $(DPDK_DOWNLOAD_DIR)/$(ISA_L_CRYPTO_LIB_TARBALL) ] ; \ then cp $(DPDK_DOWNLOAD_DIR)/$(ISA_L_CRYPTO_LIB_TARBALL) $(CURDIR) ; \ - else curl -o $(CURDIR)/$(ISA_L_CRYPTO_LIB_TARBALL) -LO $(ISA_L_CRYPTO_LIB_TARBALL_URL) ; \ + else curl -o $@ -LO $(ISA_L_CRYPTO_LIB_TARBALL_URL) ; \ fi + +DPDK_DOWNLOADS = $(CURDIR)/$(DPDK_TARBALL) +ifeq ($(DPDK_CRYPTO_SW_PMD),y) +DPDK_DOWNLOADS += $(CURDIR)/$(AESNIMB_LIB_TARBALL) +DPDK_DOWNLOADS += $(CURDIR)/$(ISA_L_CRYPTO_LIB_TARBALL) endif - @rm -f $(B)/.download.ok -$(B)/.download.ok: $(CURDIR)/$(DPDK_TARBALL) +$(B)/.download.ok: $(DPDK_DOWNLOADS) @mkdir -p $(B) @openssl md5 $< | cut -f 2 -d " " - > $(B)/$(DPDK_TARBALL).md5sum @([ "$$(<$(B)/$(DPDK_TARBALL).md5sum)" = "$(DPDK_$(DPDK_VERSION)_TARBALL_MD5_CKSUM)" ] || \ @@ -209,12 +217,6 @@ endif patch: $(B)/.patch.ok $(B)/.config.ok: $(B)/.patch.ok $(B)/custom-config -ifeq ($(DPDK_CRYPTO_SW_PMD),y) - @make -C $(AESNIMB_LIB_SOURCE) - @cd $(ISA_L_CRYPTO_LIB_SOURCE) && ./autogen.sh && ./configure - @make -C $(ISA_L_CRYPTO_LIB_SOURCE) - @cp $(ISA_L_CRYPTO_LIB_SOURCE)/include $(ISA_L_CRYPTO_LIB_SOURCE)/isa-l_crypto -r -endif @make $(DPDK_MAKE_ARGS) config @touch $@ @@ -223,11 +225,16 @@ config: $(B)/.config.ok $(B)/.build.ok: $(DPDK_SOURCE_FILES) @if [ ! -e $(B)/.config.ok ] ; then echo 'Please run "make config" first' && false ; fi - @make $(DPDK_MAKE_ARGS) install ifeq ($(DPDK_CRYPTO_SW_PMD),y) - @cp $(AESNIMB_LIB_SOURCE)/libIPSec_MB.a $(I)/lib/ - @cp $(ISA_L_CRYPTO_LIB_SOURCE)/.libs/libisal_crypto.a $(I)/lib/ + # Build IPsec_MB library + mkdir -p $(I)/lib/ + make -C $(AESNIMB_LIB_SOURCE) -j NO_GCM=y + cp $(AESNIMB_LIB_SOURCE)/libIPSec_MB.a $(I)/lib/ + # Build ISA-L Crypto library + cd $(ISA_L_CRYPTO_LIB_SOURCE) && ./autogen.sh && ./configure --prefix=$(I) + make -C $(ISA_L_CRYPTO_LIB_SOURCE) -j install endif + @make $(DPDK_MAKE_ARGS) install @touch $@ .PHONY: build diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am index 01383de6..b857429d 100644 --- a/src/plugins/dpdk.am +++ b/src/plugins/dpdk.am @@ -14,7 +14,12 @@ vppapitestplugins_LTLIBRARIES += dpdk_test_plugin.la vppplugins_LTLIBRARIES += dpdk_plugin.la -dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive,-lm,-ldl +dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive +if WITH_DPDK_CRYPTO_SW +dpdk_plugin_la_LDFLAGS += -Wl,--exclude-libs,libIPSec_MB.a,-l:libIPSec_MB.a +dpdk_plugin_la_LDFLAGS += -Wl,--exclude-libs,libisal_crypto.a,-l:libisal_crypto.a +endif +dpdk_plugin_la_LDFLAGS += -Wl,-lm,-ldl dpdk_plugin_la_SOURCES = \ dpdk/main.c \ -- cgit 1.2.3-korg From 8424af7bd5daf0d754daeb8f49c57c760b9c5baa Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Wed, 29 Mar 2017 10:50:43 +0200 Subject: dpdk: fix --with-dpdk-mlx5-pmd link issue Change-Id: I8edb927289c080929380bfbf2b760749b7984d4b Signed-off-by: Damjan Marion --- src/plugins/dpdk.am | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/plugins/dpdk.am') diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am index b857429d..a233be7a 100644 --- a/src/plugins/dpdk.am +++ b/src/plugins/dpdk.am @@ -20,6 +20,9 @@ dpdk_plugin_la_LDFLAGS += -Wl,--exclude-libs,libIPSec_MB.a,-l:libIPSec_MB.a dpdk_plugin_la_LDFLAGS += -Wl,--exclude-libs,libisal_crypto.a,-l:libisal_crypto.a endif dpdk_plugin_la_LDFLAGS += -Wl,-lm,-ldl +if WITH_DPDK_MLX5_PMD +dpdk_plugin_la_LDFLAGS += -Wl,-libverbs +endif dpdk_plugin_la_SOURCES = \ dpdk/main.c \ -- cgit 1.2.3-korg From c903793662e16309a67161a58500f6a1a15d37f6 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Thu, 27 Apr 2017 21:12:17 +0200 Subject: dpdk: cleanup, move APIs to separate .c file Change-Id: Id632ff1b30be808d9f270e2f77260391569fbda2 Signed-off-by: Damjan Marion --- src/plugins/dpdk.am | 1 + src/plugins/dpdk/api/dpdk_api.c | 333 ++++++++++++++++++++++++++++++++++++++++ src/plugins/dpdk/device/dpdk.h | 12 -- src/plugins/dpdk/device/init.c | 316 +------------------------------------- src/plugins/dpdk/device/node.c | 2 +- 5 files changed, 337 insertions(+), 327 deletions(-) create mode 100755 src/plugins/dpdk/api/dpdk_api.c (limited to 'src/plugins/dpdk.am') diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am index a233be7a..bb46ae6e 100644 --- a/src/plugins/dpdk.am +++ b/src/plugins/dpdk.am @@ -28,6 +28,7 @@ dpdk_plugin_la_SOURCES = \ dpdk/main.c \ dpdk/buffer.c \ dpdk/thread.c \ + dpdk/api/dpdk_api.c \ dpdk/device/cli.c \ dpdk/device/dpdk_priv.h \ dpdk/device/device.c \ diff --git a/src/plugins/dpdk/api/dpdk_api.c b/src/plugins/dpdk/api/dpdk_api.c new file mode 100755 index 00000000..08afdd70 --- /dev/null +++ b/src/plugins/dpdk/api/dpdk_api.c @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +/* define message IDs */ +#include + +#define vl_typedefs /* define message structures */ +#include +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include +#undef vl_endianfun + +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) + +/* Get the API version number. */ +#define vl_api_version(n,v) static u32 api_version=(v); +#include +#undef vl_api_version + +/* Macro to finish up custom dump fns */ +#define FINISH \ + vec_add1 (s, 0); \ + vl_print (handle, (char *)s); \ + vec_free (s); \ + return handle; + +#include + +static void + vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler + (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + u32 pipe = ntohl (mp->pipe); + u32 profile = ntohl (mp->profile); + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_pipe_t_print + (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_pipe "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = format (s, "subport %u pipe %u profile %u ", + ntohl (mp->subport), ntohl (mp->pipe), ntohl (mp->profile)); + + FINISH; +} + +static void + vl_api_sw_interface_set_dpdk_hqos_subport_t_handler + (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd; + struct rte_sched_subport_params p; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 subport = ntohl (mp->subport); + p.tb_rate = ntohl (mp->tb_rate); + p.tb_size = ntohl (mp->tb_size); + p.tc_rate[0] = ntohl (mp->tc_rate[0]); + p.tc_rate[1] = ntohl (mp->tc_rate[1]); + p.tc_rate[2] = ntohl (mp->tc_rate[2]); + p.tc_rate[3] = ntohl (mp->tc_rate[3]); + p.tc_period = ntohl (mp->tc_period); + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); + + BAD_SW_IF_INDEX_LABEL; + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_subport_t_print + (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_subport "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = + format (s, + "subport %u rate %u bkt_size %u tc0 %u tc1 %u tc2 %u tc3 %u period %u", + ntohl (mp->subport), ntohl (mp->tb_rate), ntohl (mp->tb_size), + ntohl (mp->tc_rate[0]), ntohl (mp->tc_rate[1]), + ntohl (mp->tc_rate[2]), ntohl (mp->tc_rate[3]), + ntohl (mp->tc_period)); + + FINISH; +} + +static void + vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler + (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) +{ + vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; + int rv = 0; + + dpdk_main_t *dm = &dpdk_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + dpdk_device_t *xd; + + u32 sw_if_index = ntohl (mp->sw_if_index); + u32 entry = ntohl (mp->entry); + u32 tc = ntohl (mp->tc); + u32 queue = ntohl (mp->queue); + u32 val, i; + + vnet_hw_interface_t *hw; + + VALIDATE_SW_IF_INDEX (mp); + + /* hw_if & dpdk device */ + hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); + + xd = vec_elt_at_index (dm->devices, hw->dev_instance); + + if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) + { + clib_warning ("invalid traffic class !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + { + clib_warning ("invalid queue !!"); + rv = VNET_API_ERROR_INVALID_VALUE; + goto done; + } + + /* Detect the set of worker threads */ + uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); + + if (p == 0) + { + clib_warning ("worker thread registration AWOL !!"); + rv = VNET_API_ERROR_INVALID_VALUE_2; + goto done; + } + + vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; + int worker_thread_first = tr->first_index; + int worker_thread_count = tr->count; + + val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; + for (i = 0; i < worker_thread_count; i++) + xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; + + BAD_SW_IF_INDEX_LABEL; +done: + + REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); +} + +static void *vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print + (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp, void *handle) +{ + u8 *s; + + s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_tctbl "); + + s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); + + s = format (s, "entry %u tc %u queue %u", + ntohl (mp->entry), ntohl (mp->tc), ntohl (mp->queue)); + + FINISH; +} + +#define foreach_dpdk_plugin_api_msg \ +_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ +_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ +_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) + +/* Set up the API message handling tables */ +static clib_error_t * +dpdk_plugin_api_hookup (vlib_main_t * vm) +{ + dpdk_main_t *dm __attribute__ ((unused)) = &dpdk_main; +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + dm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_dpdk_plugin_api_msg; +#undef _ + return 0; +} + +#define vl_msg_name_crc_list +#include +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (dpdk_main_t * dm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + dm->msg_id_base); + foreach_vl_msg_name_crc_dpdk; +#undef _ +} + +// TODO +/* +static void plugin_custom_dump_configure (dpdk_main_t * dm) +{ +#define _(n,f) dm->api_main->msg_print_handlers \ + [VL_API_##n + dm->msg_id_base] \ + = (void *) vl_api_##f##_t_print; + foreach_dpdk_plugin_api_msg; +#undef _ +} +*/ +/* force linker to link functions used by vlib and declared weak */ + +static clib_error_t * +dpdk_api_init (vlib_main_t * vm) +{ + dpdk_main_t *dm = &dpdk_main; + clib_error_t *error = 0; + + /* init CLI */ + if ((error = vlib_call_init_function (vm, dpdk_init))) + return error; + + u8 *name; + name = format (0, "dpdk_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + dm->msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + vec_free (name); + + error = dpdk_plugin_api_hookup (vm); + + /* Add our API messages to the global name_crc hash table */ + setup_message_id_table (dm, &api_main); + +// TODO +// plugin_custom_dump_configure (dm); + + return error; +} + +VLIB_INIT_FUNCTION (dpdk_api_init); + + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h index 9afd0b35..90e93e75 100644 --- a/src/plugins/dpdk/device/dpdk.h +++ b/src/plugins/dpdk/device/dpdk.h @@ -469,19 +469,7 @@ typedef enum DPDK_N_ERROR, } dpdk_error_t; -int dpdk_set_stat_poll_interval (f64 interval); -int dpdk_set_link_state_poll_interval (f64 interval); void dpdk_update_link_state (dpdk_device_t * xd, f64 now); -void dpdk_device_lock_init (dpdk_device_t * xd); -void dpdk_device_lock_free (dpdk_device_t * xd); - -void dpdk_rx_trace (dpdk_main_t * dm, - vlib_node_runtime_t * node, - dpdk_device_t * xd, - u16 queue_id, u32 * buffers, uword n_buffers); - -#define EFD_OPERATION_LESS_THAN 0 -#define EFD_OPERATION_GREATER_OR_EQUAL 1 format_function_t format_dpdk_device_name; format_function_t format_dpdk_device; diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index d3763e0b..33ecde44 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -35,269 +35,6 @@ dpdk_main_t dpdk_main; -#include -#include - -/* define message IDs */ -#include - -#define vl_typedefs /* define message structures */ -#include -#undef vl_typedefs - -#define vl_endianfun /* define message structures */ -#include -#undef vl_endianfun - -#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) - -/* Get the API version number. */ -#define vl_api_version(n,v) static u32 api_version=(v); -#include -#undef vl_api_version - -/* Macro to finish up custom dump fns */ -#define FINISH \ - vec_add1 (s, 0); \ - vl_print (handle, (char *)s); \ - vec_free (s); \ - return handle; - -#include - -static void - vl_api_sw_interface_set_dpdk_hqos_pipe_t_handler - (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_pipe_reply_t *rmp; - int rv = 0; - - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 subport = ntohl (mp->subport); - u32 pipe = ntohl (mp->pipe); - u32 profile = ntohl (mp->profile); - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = rte_sched_pipe_config (xd->hqos_ht->hqos, subport, pipe, profile); - - BAD_SW_IF_INDEX_LABEL; - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_PIPE_REPLY); -} - -static void *vl_api_sw_interface_set_dpdk_hqos_pipe_t_print - (vl_api_sw_interface_set_dpdk_hqos_pipe_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_pipe "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = format (s, "subport %u pipe %u profile %u ", - ntohl (mp->subport), ntohl (mp->pipe), ntohl (mp->profile)); - - FINISH; -} - -static void - vl_api_sw_interface_set_dpdk_hqos_subport_t_handler - (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_subport_reply_t *rmp; - int rv = 0; - - dpdk_main_t *dm = &dpdk_main; - dpdk_device_t *xd; - struct rte_sched_subport_params p; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 subport = ntohl (mp->subport); - p.tb_rate = ntohl (mp->tb_rate); - p.tb_size = ntohl (mp->tb_size); - p.tc_rate[0] = ntohl (mp->tc_rate[0]); - p.tc_rate[1] = ntohl (mp->tc_rate[1]); - p.tc_rate[2] = ntohl (mp->tc_rate[2]); - p.tc_rate[3] = ntohl (mp->tc_rate[3]); - p.tc_period = ntohl (mp->tc_period); - - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - rv = rte_sched_subport_config (xd->hqos_ht->hqos, subport, &p); - - BAD_SW_IF_INDEX_LABEL; - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_SUBPORT_REPLY); -} - -static void *vl_api_sw_interface_set_dpdk_hqos_subport_t_print - (vl_api_sw_interface_set_dpdk_hqos_subport_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_subport "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = - format (s, - "subport %u rate %u bkt_size %u tc0 %u tc1 %u tc2 %u tc3 %u period %u", - ntohl (mp->subport), ntohl (mp->tb_rate), ntohl (mp->tb_size), - ntohl (mp->tc_rate[0]), ntohl (mp->tc_rate[1]), - ntohl (mp->tc_rate[2]), ntohl (mp->tc_rate[3]), - ntohl (mp->tc_period)); - - FINISH; -} - -static void - vl_api_sw_interface_set_dpdk_hqos_tctbl_t_handler - (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp) -{ - vl_api_sw_interface_set_dpdk_hqos_tctbl_reply_t *rmp; - int rv = 0; - - dpdk_main_t *dm = &dpdk_main; - vlib_thread_main_t *tm = vlib_get_thread_main (); - dpdk_device_t *xd; - - u32 sw_if_index = ntohl (mp->sw_if_index); - u32 entry = ntohl (mp->entry); - u32 tc = ntohl (mp->tc); - u32 queue = ntohl (mp->queue); - u32 val, i; - - vnet_hw_interface_t *hw; - - VALIDATE_SW_IF_INDEX (mp); - - /* hw_if & dpdk device */ - hw = vnet_get_sup_hw_interface (dm->vnet_main, sw_if_index); - - xd = vec_elt_at_index (dm->devices, hw->dev_instance); - - if (tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE) - { - clib_warning ("invalid traffic class !!"); - rv = VNET_API_ERROR_INVALID_VALUE; - goto done; - } - if (queue >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) - { - clib_warning ("invalid queue !!"); - rv = VNET_API_ERROR_INVALID_VALUE; - goto done; - } - - /* Detect the set of worker threads */ - uword *p = hash_get_mem (tm->thread_registrations_by_name, "workers"); - - if (p == 0) - { - clib_warning ("worker thread registration AWOL !!"); - rv = VNET_API_ERROR_INVALID_VALUE_2; - goto done; - } - - vlib_thread_registration_t *tr = (vlib_thread_registration_t *) p[0]; - int worker_thread_first = tr->first_index; - int worker_thread_count = tr->count; - - val = tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; - for (i = 0; i < worker_thread_count; i++) - xd->hqos_wt[worker_thread_first + i].hqos_tc_table[entry] = val; - - BAD_SW_IF_INDEX_LABEL; -done: - - REPLY_MACRO (VL_API_SW_INTERFACE_SET_DPDK_HQOS_TCTBL_REPLY); -} - -static void *vl_api_sw_interface_set_dpdk_hqos_tctbl_t_print - (vl_api_sw_interface_set_dpdk_hqos_tctbl_t * mp, void *handle) -{ - u8 *s; - - s = format (0, "SCRIPT: sw_interface_set_dpdk_hqos_tctbl "); - - s = format (s, "sw_if_index %u ", ntohl (mp->sw_if_index)); - - s = format (s, "entry %u tc %u queue %u", - ntohl (mp->entry), ntohl (mp->tc), ntohl (mp->queue)); - - FINISH; -} - -#define foreach_dpdk_plugin_api_msg \ -_(SW_INTERFACE_SET_DPDK_HQOS_PIPE, sw_interface_set_dpdk_hqos_pipe) \ -_(SW_INTERFACE_SET_DPDK_HQOS_SUBPORT, sw_interface_set_dpdk_hqos_subport) \ -_(SW_INTERFACE_SET_DPDK_HQOS_TCTBL, sw_interface_set_dpdk_hqos_tctbl) - -/* Set up the API message handling tables */ -static clib_error_t * -dpdk_plugin_api_hookup (vlib_main_t * vm) -{ - dpdk_main_t *dm __attribute__ ((unused)) = &dpdk_main; -#define _(N,n) \ - vl_msg_api_set_handlers((VL_API_##N + dm->msg_id_base), \ - #n, \ - vl_api_##n##_t_handler, \ - vl_noop_handler, \ - vl_api_##n##_t_endian, \ - vl_api_##n##_t_print, \ - sizeof(vl_api_##n##_t), 1); - foreach_dpdk_plugin_api_msg; -#undef _ - return 0; -} - -#define vl_msg_name_crc_list -#include -#undef vl_msg_name_crc_list - -static void -setup_message_id_table (dpdk_main_t * dm, api_main_t * am) -{ -#define _(id,n,crc) \ - vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + dm->msg_id_base); - foreach_vl_msg_name_crc_dpdk; -#undef _ -} - -// TODO -/* -static void plugin_custom_dump_configure (dpdk_main_t * dm) -{ -#define _(n,f) dm->api_main->msg_print_handlers \ - [VL_API_##n + dm->msg_id_base] \ - = (void *) vl_api_##f##_t_print; - foreach_dpdk_plugin_api_msg; -#undef _ -} -*/ -/* force linker to link functions used by vlib and declared weak */ -void *vlib_weakly_linked_functions[] = { - &rte_pktmbuf_init, - &rte_pktmbuf_pool_init, -}; - #define LINK_STATE_ELOGS 0 #define DEFAULT_HUGE_DIR "/run/vpp/hugepages" @@ -452,7 +189,7 @@ dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) return old; } -void +static void dpdk_device_lock_init (dpdk_device_t * xd) { int q; @@ -465,17 +202,6 @@ dpdk_device_lock_init (dpdk_device_t * xd) } } -void -dpdk_device_lock_free (dpdk_device_t * xd) -{ - int q; - - for (q = 0; q < vec_len (xd->lockp); q++) - clib_mem_free ((void *) xd->lockp[q]); - vec_free (xd->lockp); - xd->lockp = 0; -} - static clib_error_t * dpdk_lib_init (dpdk_main_t * dm) { @@ -1925,29 +1651,7 @@ VLIB_REGISTER_NODE (dpdk_process_node,static) = { }; /* *INDENT-ON* */ -int -dpdk_set_stat_poll_interval (f64 interval) -{ - if (interval < DPDK_MIN_STATS_POLL_INTERVAL) - return (VNET_API_ERROR_INVALID_VALUE); - - dpdk_main.stat_poll_interval = interval; - - return 0; -} - -int -dpdk_set_link_state_poll_interval (f64 interval) -{ - if (interval < DPDK_MIN_LINK_POLL_INTERVAL) - return (VNET_API_ERROR_INVALID_VALUE); - - dpdk_main.link_state_poll_interval = interval; - - return 0; -} - -clib_error_t * +static clib_error_t * dpdk_init (vlib_main_t * vm) { dpdk_main_t *dm = &dpdk_main; @@ -1964,26 +1668,10 @@ dpdk_init (vlib_main_t * vm) STATIC_ASSERT (offsetof (frame_queue_trace_t, cacheline0) == 0, "Cache line marker must be 1st element in frame_queue_trace_t"); - u8 *name; - name = format (0, "dpdk_%08x%c", api_version, 0); - - /* Ask for a correctly-sized block of API message decode slots */ - dm->msg_id_base = vl_msg_api_get_msg_ids - ((char *) name, VL_MSG_FIRST_AVAILABLE); - vec_free (name); - dm->vlib_main = vm; dm->vnet_main = vnet_get_main (); dm->conf = &dpdk_config_main; - error = dpdk_plugin_api_hookup (vm); - - /* Add our API messages to the global name_crc hash table */ - setup_message_id_table (dm, &api_main); - -// TODO -// plugin_custom_dump_configure (dm); - ei = vlib_get_node_by_name (vm, (u8 *) "ethernet-input"); if (ei == 0) return clib_error_return (0, "ethernet-input node AWOL"); diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index 5cc611cd..0562b48a 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -114,7 +114,7 @@ dpdk_rx_error_from_mb (struct rte_mbuf *mb, u32 * next, u8 * error) *error = DPDK_ERROR_NONE; } -void +static void dpdk_rx_trace (dpdk_main_t * dm, vlib_node_runtime_t * node, dpdk_device_t * xd, -- cgit 1.2.3-korg From e6c5941b982083840a48e1de06f5324893153d8f Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 28 Apr 2017 17:10:38 +0200 Subject: dpdk: refactor setup, start, stop code Change-Id: I0fec86914ec027383ff511b7092beac2363f55f7 Signed-off-by: Damjan Marion --- src/plugins/dpdk.am | 1 + src/plugins/dpdk/device/cli.c | 2 +- src/plugins/dpdk/device/common.c | 156 +++++++++++++++++++++++++++++++++++++++ src/plugins/dpdk/device/dpdk.h | 4 +- src/plugins/dpdk/device/init.c | 91 ++--------------------- 5 files changed, 167 insertions(+), 87 deletions(-) create mode 100644 src/plugins/dpdk/device/common.c (limited to 'src/plugins/dpdk.am') diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am index bb46ae6e..0b242502 100644 --- a/src/plugins/dpdk.am +++ b/src/plugins/dpdk.am @@ -30,6 +30,7 @@ dpdk_plugin_la_SOURCES = \ dpdk/thread.c \ dpdk/api/dpdk_api.c \ dpdk/device/cli.c \ + dpdk/device/common.c \ dpdk/device/dpdk_priv.h \ dpdk/device/device.c \ dpdk/device/format.c \ diff --git a/src/plugins/dpdk/device/cli.c b/src/plugins/dpdk/device/cli.c index 17d1bfe1..c7e5090d 100644 --- a/src/plugins/dpdk/device/cli.c +++ b/src/plugins/dpdk/device/cli.c @@ -537,7 +537,7 @@ set_dpdk_if_desc (vlib_main_t * vm, unformat_input_t * input, if (nb_tx_desc != (u32) ~ 0) xd->nb_tx_desc = nb_tx_desc; - error = dpdk_port_setup (dm, xd); + error = dpdk_device_setup (xd); done: unformat_free (line_input); diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c new file mode 100644 index 00000000..79c5888d --- /dev/null +++ b/src/plugins/dpdk/device/common.c @@ -0,0 +1,156 @@ +/* + * Copyright (c) 2017 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +clib_error_t * +dpdk_error_return (clib_error_t * error, char *str, dpdk_device_t * xd, + int rv) +{ + return clib_error_return (error, "%s[%d]: %s(%d)", str, xd->device_index, + rte_strerror (rv), rv); +} + +clib_error_t * +dpdk_device_setup (dpdk_device_t * xd) +{ + dpdk_main_t *dm = &dpdk_main; + clib_error_t *err = 0; + int rv; + int j; + + ASSERT (vlib_get_thread_index () == 0); + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + { + vnet_hw_interface_set_flags (dm->vnet_main, xd->hw_if_index, 0); + dpdk_device_stop (xd); + } + + rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, + xd->tx_q_used, &xd->port_conf); + + if (rv < 0) + return dpdk_error_return (err, "rte_eth_dev_configure", xd, rv); + + /* Set up one TX-queue per worker thread */ + for (j = 0; j < xd->tx_q_used; j++) + { + rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, + xd->cpu_socket, &xd->tx_conf); + + /* retry with any other CPU socket */ + if (rv < 0) + rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, + SOCKET_ID_ANY, &xd->tx_conf); + if (rv < 0) + err = dpdk_error_return (err, "rte_eth_tx_queue_setup", xd, rv); + } + + for (j = 0; j < xd->rx_q_used; j++) + { + uword tidx = vnet_get_device_input_thread_index (dm->vnet_main, + xd->hw_if_index, j); + unsigned lcore = vlib_worker_threads[tidx].lcore_id; + u16 socket_id = rte_lcore_to_socket_id (lcore); + + rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, + xd->cpu_socket, 0, + dm->pktmbuf_pools[socket_id]); + + /* retry with any other CPU socket */ + if (rv < 0) + rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, + SOCKET_ID_ANY, 0, + dm->pktmbuf_pools[socket_id]); + + if (rv < 0) + err = dpdk_error_return (err, "rte_eth_rx_queue_setup", xd, rv); + } + + if (err) + return err; + + if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) + err = dpdk_device_start (xd); + + return err; +} + +clib_error_t * +dpdk_device_start (dpdk_device_t * xd) +{ + int rv; + clib_error_t *err = 0; + + rv = rte_eth_dev_start (xd->device_index); + + if (rv) + return dpdk_error_return (err, "rte_eth_dev_start", xd, rv); + + if (xd->default_mac_address) + rv = + rte_eth_dev_default_mac_addr_set (xd->device_index, + (struct ether_addr *) + xd->default_mac_address); + + if (rv) + err = dpdk_error_return (err, "rte_eth_dev_default_mac_addr_set", xd, rv); + + if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) + rte_eth_promiscuous_enable (xd->device_index); + else + rte_eth_promiscuous_disable (xd->device_index); + + rte_eth_allmulticast_enable (xd->device_index); + + if (xd->pmd == VNET_DPDK_PMD_BOND) + { + u8 slink[16]; + int nlink = rte_eth_bond_slaves_get (xd->device_index, slink, 16); + while (nlink >= 1) + { + u8 dpdk_port = slink[--nlink]; + rte_eth_allmulticast_enable (dpdk_port); + } + } + + return err; +} + +clib_error_t * +dpdk_device_stop (dpdk_device_t * xd) +{ + rte_eth_dev_stop (xd->device_index); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h index 8dbc0915..583d2cd9 100644 --- a/src/plugins/dpdk/device/dpdk.h +++ b/src/plugins/dpdk/device/dpdk.h @@ -418,7 +418,9 @@ typedef struct u8 data[256]; /* First 256 data bytes, used for hexdump */ } dpdk_rx_dma_trace_t; -clib_error_t *dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd); +clib_error_t *dpdk_device_setup (dpdk_device_t * xd); +clib_error_t *dpdk_device_start (dpdk_device_t * xd); +clib_error_t *dpdk_device_stop (dpdk_device_t * xd); #define foreach_dpdk_error \ _(NONE, "no error") \ diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index c45edc9b..3aba031c 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -55,80 +55,6 @@ static struct rte_eth_conf port_conf_template = { }, }; -clib_error_t * -dpdk_port_setup (dpdk_main_t * dm, dpdk_device_t * xd) -{ - int rv; - int j; - - ASSERT (vlib_get_thread_index () == 0); - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - vnet_hw_interface_set_flags (dm->vnet_main, xd->hw_if_index, 0); - rte_eth_dev_stop (xd->device_index); - } - - rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, - xd->tx_q_used, &xd->port_conf); - - if (rv < 0) - return clib_error_return (0, "rte_eth_dev_configure[%d]: err %d", - xd->device_index, rv); - - /* Set up one TX-queue per worker thread */ - for (j = 0; j < xd->tx_q_used; j++) - { - rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, - xd->cpu_socket, &xd->tx_conf); - - /* retry with any other CPU socket */ - if (rv < 0) - rv = rte_eth_tx_queue_setup (xd->device_index, j, xd->nb_tx_desc, - SOCKET_ID_ANY, &xd->tx_conf); - if (rv < 0) - break; - } - - if (rv < 0) - return clib_error_return (0, "rte_eth_tx_queue_setup[%d]: err %d", - xd->device_index, rv); - - for (j = 0; j < xd->rx_q_used; j++) - { - uword tidx = vnet_get_device_input_thread_index (dm->vnet_main, - xd->hw_if_index, j); - unsigned lcore = vlib_worker_threads[tidx].lcore_id; - u16 socket_id = rte_lcore_to_socket_id (lcore); - - rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, - xd->cpu_socket, 0, - dm->pktmbuf_pools[socket_id]); - - /* retry with any other CPU socket */ - if (rv < 0) - rv = rte_eth_rx_queue_setup (xd->device_index, j, xd->nb_rx_desc, - SOCKET_ID_ANY, 0, - dm->pktmbuf_pools[socket_id]); - if (rv < 0) - return clib_error_return (0, "rte_eth_rx_queue_setup[%d]: err %d", - xd->device_index, rv); - } - - if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - { - int rv; - rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - if (rv < 0) - clib_warning ("rte_eth_dev_start %d returned %d", - xd->device_index, rv); - } - return 0; -} static u32 dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) @@ -161,7 +87,7 @@ dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) xd->port_conf.rxmode.max_rx_pkt_len = hi->max_packet_bytes; if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) - rte_eth_dev_stop (xd->device_index); + dpdk_device_stop (xd); rv = rte_eth_dev_configure (xd->device_index, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); @@ -175,14 +101,9 @@ dpdk_flag_change (vnet_main_t * vnm, vnet_hw_interface_t * hi, u32 flags) if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) { - int rv = rte_eth_dev_start (xd->device_index); - if (!rv && xd->default_mac_address) - rv = rte_eth_dev_default_mac_addr_set (xd->device_index, - (struct ether_addr *) - xd->default_mac_address); - if (rv < 0) - clib_warning ("rte_eth_dev_start %d returned %d", - xd->device_index, rv); + clib_error_t *error; + error = dpdk_device_start (xd); + clib_error_report (error); } } @@ -716,7 +637,7 @@ dpdk_lib_init (dpdk_main_t * dm) hi = vnet_get_hw_interface (dm->vnet_main, xd->hw_if_index); - rv = dpdk_port_setup (dm, xd); + rv = dpdk_device_setup (xd); if (rv) return rv; @@ -1505,7 +1426,7 @@ dpdk_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) /* * Extra set up for bond interfaces: * 1. Setup MACs for bond interfaces and their slave links which was set - * in dpdk_port_setup() but needs to be done again here to take effect. + * in dpdk_device_setup() but needs to be done again here to take effect. * 2. Set up info for bond interface related CLI support. */ int nports = rte_eth_dev_count (); -- cgit 1.2.3-korg From f73dae3c45b8fd8c0d9e26d386c56a90cfaf67fd Mon Sep 17 00:00:00 2001 From: Marco Varlese Date: Wed, 10 May 2017 17:02:58 +0200 Subject: Unneeded linking Removed the linking which does not appear to be necessary when using in repo dpdk and which causes a build failure when using the shared-dpdk mode. Change-Id: I6bad2bd11d6db40fbd2def78f98c6beba66ff416 Signed-off-by: Marco Varlese --- src/plugins/dpdk.am | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/plugins/dpdk.am') diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am index 0b242502..75bfb967 100644 --- a/src/plugins/dpdk.am +++ b/src/plugins/dpdk.am @@ -14,7 +14,11 @@ vppapitestplugins_LTLIBRARIES += dpdk_test_plugin.la vppplugins_LTLIBRARIES += dpdk_plugin.la +if ENABLE_DPDK_SHARED +dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -ldpdk +else dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive +endif if WITH_DPDK_CRYPTO_SW dpdk_plugin_la_LDFLAGS += -Wl,--exclude-libs,libIPSec_MB.a,-l:libIPSec_MB.a dpdk_plugin_la_LDFLAGS += -Wl,--exclude-libs,libisal_crypto.a,-l:libisal_crypto.a -- cgit 1.2.3-korg From 3b12cdc59c8ec98b580b356f20b68e3554f91e97 Mon Sep 17 00:00:00 2001 From: Sergio Gonzalez Monroy Date: Tue, 6 Jun 2017 15:29:16 +0100 Subject: dpdk: update build Current optional DPDK PMDs are: - AESNI MB PMD (SW crypto) - AESNI GCM PMD (SW crypto) - MLX4 PMD - MLX5 PMD This change will always build DPDK SW crypto PMDs and required SW crypto libraries, while MLX PMDs are still optional and the user has to build required libraries. Now the configure script detects if any of the optional DPDK PMDs were built and link against their required libraries/dependencies. Change-Id: I1560bebd71035d6486483f22da90042ec2ce40a1 Signed-off-by: Sergio Gonzalez Monroy --- dpdk/Makefile | 59 ++++++++++++++++++++++++++++------------------------- src/configure.ac | 48 ++++++++++++++++++++++++++++++++++++++++--- src/plugins/dpdk.am | 7 ++++++- 3 files changed, 82 insertions(+), 32 deletions(-) (limited to 'src/plugins/dpdk.am') diff --git a/dpdk/Makefile b/dpdk/Makefile index 06ba1270..659439c0 100644 --- a/dpdk/Makefile +++ b/dpdk/Makefile @@ -19,14 +19,13 @@ DPDK_INSTALL_DIR ?= $(CURDIR)/_install DPDK_PKTMBUF_HEADROOM ?= 128 DPDK_DOWNLOAD_DIR ?= $(HOME)/Downloads DPDK_DEBUG ?= n -DPDK_CRYPTO_SW_PMD ?= n DPDK_MLX4_PMD ?= n DPDK_MLX5_PMD ?= n B := $(DPDK_BUILD_DIR) I := $(DPDK_INSTALL_DIR) DPDK_VERSION ?= 17.05 -PKG_SUFFIX ?= vpp5 +PKG_SUFFIX ?= vpp6 DPDK_BASE_URL ?= http://fast.dpdk.org/rel DPDK_TARBALL := dpdk-$(DPDK_VERSION).tar.xz DPDK_TAR_URL := $(DPDK_BASE_URL)/$(DPDK_TARBALL) @@ -34,14 +33,15 @@ DPDK_17.02_TARBALL_MD5_CKSUM := 6b9f7387c35641f4e8dbba3e528f2376 DPDK_17.05_TARBALL_MD5_CKSUM := 0a68c31cd6a6cabeed0a4331073e4c05 DPDK_SOURCE := $(B)/dpdk-$(DPDK_VERSION) -ifeq ($(DPDK_CRYPTO_SW_PMD),y) -AESNIMB_LIB_TARBALL := v0.44-gcm.2.tar.gz +IPSEC_MB_VER := 0.45 +AESNIMB_LIB_TARBALL := v$(IPSEC_MB_VER).tar.gz AESNIMB_LIB_TARBALL_URL := http://github.com/01org/intel-ipsec-mb/archive/$(AESNIMB_LIB_TARBALL) -AESNIMB_LIB_SOURCE := $(B)/intel-ipsec-mb-0.44-gcm.2 -ISA_L_CRYPTO_LIB_TARBALL := isa_l_crypto.tar.gz -ISA_L_CRYPTO_LIB_TARBALL_URL := http://github.com/01org/isa-l_crypto/archive/master.tar.gz -ISA_L_CRYPTO_LIB_SOURCE := $(B)/isa-l_crypto-master -endif +AESNIMB_LIB_SOURCE := $(B)/intel-ipsec-mb-$(IPSEC_MB_VER) +ISA_L_CRYPTO_VER := 2.18.0 +ISA_L_CRYPTO_LIB_TARBALL := v$(ISA_L_CRYPTO_VER).tar.gz +ISA_L_CRYPTO_LIB_TARBALL_URL := http://github.com/01org/isa-l_crypto/archive/$(ISA_L_CRYPTO_LIB_TARBALL) +ISA_L_CRYPTO_LIB_SOURCE := $(B)/isa-l_crypto-$(ISA_L_CRYPTO_VER) +ISA_L_CRYPTO_INSTALL_DIR := $(ISA_L_CRYPTO_LIB_SOURCE)/install ifneq (,$(findstring clang,$(CC))) DPDK_CC=clang @@ -95,11 +95,9 @@ else DPDK_EXTRA_CFLAGS := -g -O0 endif -ifeq ($(DPDK_CRYPTO_SW_PMD),y) -DPDK_EXTRA_CFLAGS += -I$(I)/include +DPDK_EXTRA_CFLAGS += -I$(ISA_L_CRYPTO_INSTALL_DIR)/include -Wl,-z,muldefs DPDK_EXTRA_LDFLAGS += -L$(I)/lib DPDK_MAKE_EXTRA_ARGS += AESNI_MULTI_BUFFER_LIB_PATH=$(AESNIMB_LIB_SOURCE) -endif # assemble DPDK make arguments DPDK_MAKE_ARGS := -C $(DPDK_SOURCE) -j $(JOBS) \ @@ -111,8 +109,6 @@ DPDK_MAKE_ARGS := -C $(DPDK_SOURCE) -j $(JOBS) \ DESTDIR=$(I) \ $(DPDK_MAKE_EXTRA_ARGS) -DPDK_SOURCE_FILES := $(shell [ -e $(DPDK_SOURCE) ] && find $(DPDK_SOURCE) -name "*.[chS]") - define set @if grep -q CONFIG_$1 $@ ; \ then sed -i -e 's/.*\(CONFIG_$1=\).*/\1$2/' $@ ; \ @@ -143,8 +139,8 @@ $(B)/custom-config: $(B)/.patch.ok Makefile $(call set,RTE_LIBRTE_PMD_BOND,y) $(call set,RTE_LIBRTE_IP_FRAG,y) $(call set,RTE_LIBRTE_PMD_QAT,y) - $(call set,RTE_LIBRTE_PMD_AESNI_MB,$(DPDK_CRYPTO_SW_PMD)) - $(call set,RTE_LIBRTE_PMD_AESNI_GCM,$(DPDK_CRYPTO_SW_PMD)) + $(call set,RTE_LIBRTE_PMD_AESNI_MB,y) + $(call set,RTE_LIBRTE_PMD_AESNI_GCM,y) $(call set,RTE_LIBRTE_MLX4_PMD,$(DPDK_MLX4_PMD)) $(call set,RTE_LIBRTE_MLX5_PMD,$(DPDK_MLX5_PMD)) @# not needed @@ -181,10 +177,8 @@ $(CURDIR)/$(ISA_L_CRYPTO_LIB_TARBALL): fi DPDK_DOWNLOADS = $(CURDIR)/$(DPDK_TARBALL) -ifeq ($(DPDK_CRYPTO_SW_PMD),y) DPDK_DOWNLOADS += $(CURDIR)/$(AESNIMB_LIB_TARBALL) DPDK_DOWNLOADS += $(CURDIR)/$(ISA_L_CRYPTO_LIB_TARBALL) -endif $(B)/.download.ok: $(DPDK_DOWNLOADS) @mkdir -p $(B) @@ -200,12 +194,10 @@ download: $(B)/.download.ok $(B)/.extract.ok: $(B)/.download.ok @echo --- extracting $(DPDK_TARBALL) --- @tar --directory $(B) --extract --file $(CURDIR)/$(DPDK_TARBALL) -ifeq ($(DPDK_CRYPTO_SW_PMD),y) @echo --- extracting $(AESNIMB_LIB_TARBALL) --- @tar --directory $(B) --extract --file $(CURDIR)/$(AESNIMB_LIB_TARBALL) @echo --- extracting $(ISA_L_CRYPTO_LIB_TARBALL) --- @tar --directory $(B) --extract --file $(CURDIR)/$(ISA_L_CRYPTO_LIB_TARBALL) -endif @touch $@ .PHONY: extract @@ -231,18 +223,29 @@ $(B)/.config.ok: $(B)/.patch.ok $(B)/custom-config .PHONY: config config: $(B)/.config.ok -$(B)/.build.ok: $(DPDK_SOURCE_FILES) - @if [ ! -e $(B)/.config.ok ] ; then echo 'Please run "make config" first' && false ; fi -ifeq ($(DPDK_CRYPTO_SW_PMD),y) - # Build IPsec_MB library +# Order matters +BUILD_TARGETS += build-ipsec-mb build-isal-crypto build-dpdk + +.PHONY: build-ipsec-mb +build-ipsec-mb: mkdir -p $(I)/lib/ make -C $(AESNIMB_LIB_SOURCE) -j NO_GCM=y cp $(AESNIMB_LIB_SOURCE)/libIPSec_MB.a $(I)/lib/ - # Build ISA-L Crypto library - cd $(ISA_L_CRYPTO_LIB_SOURCE) && ./autogen.sh && ./configure --prefix=$(I) + +.PHONY: build-isal-crypto +build-isal-crypto: + mkdir -p $(I)/lib/ + cd $(ISA_L_CRYPTO_LIB_SOURCE) && ./autogen.sh && \ + ./configure --prefix=$(ISA_L_CRYPTO_INSTALL_DIR) CFLAGS='-fPIC -DPIC -O2' make -C $(ISA_L_CRYPTO_LIB_SOURCE) -j install -endif + cp $(ISA_L_CRYPTO_INSTALL_DIR)/lib/libisal_crypto.a $(I)/lib/ + +.PHONY: build-dpdk +build-dpdk: + @if [ ! -e $(B)/.config.ok ] ; then echo 'Please run "make config" first' && false ; fi @make $(DPDK_MAKE_ARGS) install + +$(B)/.build.ok: $(BUILD_TARGETS) @touch $@ .PHONY: build @@ -323,7 +326,7 @@ build-rpm: $(DEV_RPM) install-rpm: ifneq ($(INSTALLED_RPM_VER),$(DPDK_VERSION)-$(PKG_SUFFIX)) - @make $(DEV_RPM) + @$(MAKE) $(DEV_RPM) sudo rpm -Uih $(DEV_RPM) else @echo "==========================================================" diff --git a/src/configure.ac b/src/configure.ac index ef8c3b00..cb00d0bd 100644 --- a/src/configure.ac +++ b/src/configure.ac @@ -80,6 +80,23 @@ AC_DEFUN([PLUGIN_DISABLED], AC_DEFUN([PRINT_VAL], [ AC_MSG_RESULT(AC_HELP_STRING($1,$2)) ]) +AC_DEFUN([DPDK_IS_PMD_ENABLED], +[ + AC_MSG_CHECKING([for $1 in rte_config.h]) + AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [[#include ]], + [[return RTE_$1;]], + )], + [with_$2=yes] + [AC_MSG_RESULT([yes])], + [with_$2=no] + [AC_MSG_RESULT([no])] + ) + AM_CONDITIONAL(m4_toupper(WITH_$2), test "$with_$2" = "yes") + m4_append_uniq([list_of_with], [$2], [, ]) +]) + ############################################################################### # configure arguments ############################################################################### @@ -97,8 +114,6 @@ DISABLE_ARG(papi, [Disable Python API bindings]) DISABLE_ARG(japi, [Disable Java API bindings]) # --with-X -WITH_ARG(dpdk_crypto_sw,[Use DPDK cryptodev SW PMDs]) -WITH_ARG(dpdk_mlx5_pmd, [Use DPDK with mlx5 PMD]) # --without-X WITHOUT_ARG(libssl, [Disable libssl]) @@ -130,7 +145,6 @@ AC_SUBST(PRE_DATA_SIZE, [$with_pre_data]) AC_SUBST(APICLI, [-DVPP_API_TEST_BUILTIN=${n_with_apicli}]) AC_DEFINE_UNQUOTED(DPDK_SHARED_LIB, [${n_enable_dpdk_shared}]) -AC_DEFINE_UNQUOTED(DPDK_CRYPTO_SW, [${n_with_dpdk_crypto_sw}]) AC_DEFINE_UNQUOTED(WITH_LIBSSL, [${n_with_libssl}]) @@ -170,6 +184,34 @@ AM_COND_IF([ENABLE_DPDK_SHARED], [AC_MSG_ERROR([DPDK shared library not found])],) ]) +DPDK_IS_PMD_ENABLED(LIBRTE_PMD_AESNI_MB, dpdk_aesni_mb_pmd) +AM_COND_IF([WITH_DPDK_AESNI_MB_PMD], +[ + AC_CHECK_LIB([IPSec_MB], [submit_job_sse], [], + [AC_MSG_ERROR([IPSec_MB library not found])]) +]) + +DPDK_IS_PMD_ENABLED(LIBRTE_PMD_AESNI_GCM, dpdk_aesni_gcm_pmd) +AM_COND_IF([WITH_DPDK_AESNI_GCM_PMD], +[ + AC_CHECK_LIB([isal_crypto], [aesni_gcm128_init], [], + [AC_MSG_ERROR([isal_crypto library not found])]) +]) + +DPDK_IS_PMD_ENABLED(LIBRTE_MLX5_PMD, dpdk_mlx5_pmd) +AM_COND_IF([WITH_DPDK_MLX5_PMD], +[ + AC_CHECK_LIB([ibverbs], [ibv_fork_init], [], + [AC_MSG_ERROR([ibverbs library not found])]) +]) + +DPDK_IS_PMD_ENABLED(LIBRTE_MLX4_PMD, dpdk_mlx4_pmd) +AM_COND_IF([WITH_DPDK_MLX4_PMD], +[ + AC_CHECK_LIB([ibverbs], [ibv_fork_init], [], + [AC_MSG_ERROR([ibverbs library not found])]) +]) + AM_COND_IF([ENABLE_G2], [ PKG_CHECK_MODULES(g2, gtk+-2.0) diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am index 75bfb967..3a1ffeeb 100644 --- a/src/plugins/dpdk.am +++ b/src/plugins/dpdk.am @@ -19,14 +19,19 @@ dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -ldpdk else dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive endif -if WITH_DPDK_CRYPTO_SW +if WITH_DPDK_AESNI_MB_PMD dpdk_plugin_la_LDFLAGS += -Wl,--exclude-libs,libIPSec_MB.a,-l:libIPSec_MB.a +endif +if WITH_DPDK_AESNI_GCM_PMD dpdk_plugin_la_LDFLAGS += -Wl,--exclude-libs,libisal_crypto.a,-l:libisal_crypto.a endif dpdk_plugin_la_LDFLAGS += -Wl,-lm,-ldl if WITH_DPDK_MLX5_PMD dpdk_plugin_la_LDFLAGS += -Wl,-libverbs endif +if WITH_DPDK_MLX4_PMD +dpdk_plugin_la_LDFLAGS += -Wl,-libverbs +endif dpdk_plugin_la_SOURCES = \ dpdk/main.c \ -- cgit 1.2.3-korg From acdc306093aaea2633cf765307d6cb7c1b80081c Mon Sep 17 00:00:00 2001 From: Sergio Gonzalez Monroy Date: Thu, 24 Aug 2017 14:09:17 +0100 Subject: dpdk: required changes for 17.08 DPDK 17.08 breaks ethdev and cryptodev APIs. Address those changes while keeping backwards compatibility for DPDK 17.02 and 17.05. Change-Id: Idd6ac264d0d047fe586c41d4c4ca74e8fc778a54 Signed-off-by: Sergio Gonzalez Monroy --- Makefile | 6 +- dpdk/Makefile | 47 +++++--- src/configure.ac | 70 ++++++++++-- src/plugins/dpdk.am | 16 ++- src/plugins/dpdk/device/common.c | 28 ++++- src/plugins/dpdk/device/dpdk.h | 7 ++ src/plugins/dpdk/ipsec/cli.c | 15 +++ src/plugins/dpdk/ipsec/esp.h | 212 ++++++++++++++++++++++++++++++----- src/plugins/dpdk/ipsec/esp_decrypt.c | 140 ++++++++++------------- src/plugins/dpdk/ipsec/esp_encrypt.c | 106 +++++++----------- src/plugins/dpdk/ipsec/ipsec.c | 130 +++++++++++++++++---- src/plugins/dpdk/ipsec/ipsec.h | 33 ++++-- 12 files changed, 567 insertions(+), 243 deletions(-) (limited to 'src/plugins/dpdk.am') diff --git a/Makefile b/Makefile index c1a7cbb5..6ac6f6e7 100644 --- a/Makefile +++ b/Makefile @@ -60,7 +60,7 @@ endif DEB_DEPENDS = curl build-essential autoconf automake bison libssl-dev ccache DEB_DEPENDS += debhelper dkms git libtool libapr1-dev dh-systemd DEB_DEPENDS += libconfuse-dev git-review exuberant-ctags cscope pkg-config -DEB_DEPENDS += lcov chrpath autoconf nasm indent +DEB_DEPENDS += lcov chrpath autoconf nasm indent libnuma-dev DEB_DEPENDS += python-all python-dev python-virtualenv python-pip libffi6 ifeq ($(OS_VERSION_ID),14.04) DEB_DEPENDS += openjdk-8-jdk-headless @@ -73,7 +73,7 @@ endif RPM_DEPENDS = redhat-lsb glibc-static java-1.8.0-openjdk-devel yum-utils RPM_DEPENDS += openssl-devel https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm apr-devel -RPM_DEPENDS += python-devel +RPM_DEPENDS += python-devel numactl-devel ifeq ($(OS_ID)-$(OS_VERSION_ID),fedora-25) RPM_DEPENDS += python2-virtualenv RPM_DEPENDS_GROUPS = 'C Development Tools and Libraries' @@ -99,7 +99,7 @@ endif RPM_SUSE_DEPENDS = autoconf automake bison ccache chrpath distribution-release gcc6 glibc-devel-static RPM_SUSE_DEPENDS += java-1_8_0-openjdk-devel libopenssl-devel libtool lsb-release make openssl-devel -RPM_SUSE_DEPENDS += python-devel python-pip python-rpm-macros shadow nasm +RPM_SUSE_DEPENDS += python-devel python-pip python-rpm-macros shadow nasm numactl-devel ifneq ($(wildcard $(STARTUP_DIR)/startup.conf),) STARTUP_CONF ?= $(STARTUP_DIR)/startup.conf diff --git a/dpdk/Makefile b/dpdk/Makefile index 2e4b0e96..8d5b42ef 100644 --- a/dpdk/Makefile +++ b/dpdk/Makefile @@ -35,17 +35,27 @@ DPDK_17.08_TARBALL_MD5_CKSUM := 0641f59ea8ea98afefa7cfa2699f6241 DPDK_SOURCE := $(B)/dpdk-$(DPDK_VERSION) MACHINE=$(shell uname -m) +AESNI ?= n +ISA_L_CRYPTO_LIB := n + +IPSEC_MB_VER ?= 0.46 +ISA_L_CRYPTO_VER := 2.18.0 + ifeq ($(MACHINE),$(filter $(MACHINE),x86_64)) -AESNI := y -else -AESNI := n +AESNI = y +# DPDK pre 17.08 depends on ISA-L Crypto library for GCM PMD + ifneq ($(firstword $(sort $(DPDK_VERSION), 17.08)), 17.08) + ISA_L_CRYPTO_LIB = y + IPSEC_MB_VER = 0.45 + $(info Building ISA-L Crypto $(ISA_L_CRYPTO_VER) library) + endif +$(info Building IPSec-MB $(IPSEC_MB_VER) library) endif -IPSEC_MB_VER := 0.45 AESNIMB_LIB_TARBALL := v$(IPSEC_MB_VER).tar.gz AESNIMB_LIB_TARBALL_URL := http://github.com/01org/intel-ipsec-mb/archive/$(AESNIMB_LIB_TARBALL) AESNIMB_LIB_SOURCE := $(B)/intel-ipsec-mb-$(IPSEC_MB_VER) -ISA_L_CRYPTO_VER := 2.18.0 + ISA_L_CRYPTO_LIB_TARBALL := v$(ISA_L_CRYPTO_VER).tar.gz ISA_L_CRYPTO_LIB_TARBALL_URL := http://github.com/01org/isa-l_crypto/archive/$(ISA_L_CRYPTO_LIB_TARBALL) ISA_L_CRYPTO_LIB_SOURCE := $(B)/isa-l_crypto-$(ISA_L_CRYPTO_VER) @@ -100,8 +110,10 @@ else DPDK_EXTRA_CFLAGS := -g -O0 endif +ifeq ($(ISA_L_CRYPTO_LIB),y) DPDK_EXTRA_CFLAGS += -I$(ISA_L_CRYPTO_INSTALL_DIR)/include -Wl,-z,muldefs DPDK_EXTRA_LDFLAGS += -L$(I)/lib +endif DPDK_MAKE_EXTRA_ARGS += AESNI_MULTI_BUFFER_LIB_PATH=$(AESNIMB_LIB_SOURCE) # assemble DPDK make arguments @@ -185,6 +197,8 @@ $(CURDIR)/$(ISA_L_CRYPTO_LIB_TARBALL): DPDK_DOWNLOADS = $(CURDIR)/$(DPDK_TARBALL) ifeq ($(AESNI),y) DPDK_DOWNLOADS += $(CURDIR)/$(AESNIMB_LIB_TARBALL) +endif +ifeq ($(ISA_L_CRYPTO_LIB),y) DPDK_DOWNLOADS += $(CURDIR)/$(ISA_L_CRYPTO_LIB_TARBALL) endif @@ -205,10 +219,12 @@ $(B)/.extract.ok: $(B)/.download.ok ifeq ($(AESNI),y) @echo --- extracting $(AESNIMB_LIB_TARBALL) --- @tar --directory $(B) --extract --file $(CURDIR)/$(AESNIMB_LIB_TARBALL) +endif +ifeq ($(ISA_L_CRYPTO_LIB),y) @echo --- extracting $(ISA_L_CRYPTO_LIB_TARBALL) --- @tar --directory $(B) --extract --file $(CURDIR)/$(ISA_L_CRYPTO_LIB_TARBALL) - @touch $@ endif + @touch $@ .PHONY: extract extract: $(B)/.extract.ok @@ -233,17 +249,11 @@ $(B)/.config.ok: $(B)/.patch.ok $(B)/custom-config .PHONY: config config: $(B)/.config.ok -# Order matters -ifeq ($(AESNI),y) -BUILD_TARGETS += build-ipsec-mb build-isal-crypto build-dpdk -else -BUILD_TARGETS += build-dpdk -endif - .PHONY: build-ipsec-mb build-ipsec-mb: mkdir -p $(I)/lib/ - make -C $(AESNIMB_LIB_SOURCE) -j NO_GCM=y + # Do not build GCM stuff if we are building ISA_L + make -C $(AESNIMB_LIB_SOURCE) -j NO_GCM=$(ISA_L_CRYPTO_LIB) cp $(AESNIMB_LIB_SOURCE)/libIPSec_MB.a $(I)/lib/ .PHONY: build-isal-crypto @@ -260,6 +270,15 @@ build-dpdk: @if [ ! -e $(B)/.config.ok ] ; then echo 'Please run "make config" first' && false ; fi @make $(DPDK_MAKE_ARGS) install +# Order matters +ifeq ($(AESNI),y) +BUILD_TARGETS += build-ipsec-mb +endif +ifeq ($(ISA_L_CRYPTO_LIB),y) +BUILD_TARGETS += build-isal-crypto +endif +BUILD_TARGETS += build-dpdk + $(B)/.build.ok: $(BUILD_TARGETS) @touch $@ diff --git a/src/configure.ac b/src/configure.ac index 4c2d3b47..6b6d9636 100644 --- a/src/configure.ac +++ b/src/configure.ac @@ -97,6 +97,26 @@ AC_DEFUN([DPDK_IS_PMD_ENABLED], m4_append_uniq([list_of_with], [$2], [, ]) ]) +AC_DEFUN([DETECT_DPDK_IS_1702_OR_1705], +[ + AC_MSG_CHECKING([for RTE_VERSION 17.02/17.05 in rte_version.h]) + AC_TRY_RUN( + [ + #include + int main() + { + return ((RTE_VER_YEAR != 17) || + (RTE_VER_MONTH != 2 && RTE_VER_MONTH != 5)); + } + ], + [dpdk_is_1702_or_1705=yes] + [AC_MSG_RESULT([yes])], + [dpdk_is_1702_or_1705=no] + [AC_MSG_RESULT([no])] + ) + AM_CONDITIONAL(DPDK_IS_1702_OR_1705, test "$dpdk_is_1702_or_1705" = "yes") +]) + ############################################################################### # configure arguments ############################################################################### @@ -185,34 +205,64 @@ AM_COND_IF([ENABLE_DPDK_SHARED], [AC_MSG_ERROR([DPDK shared library not found])],) ]) +with_aesni_mb_lib=no +with_isa_l_crypto_lib=no + DPDK_IS_PMD_ENABLED(LIBRTE_PMD_AESNI_MB, dpdk_aesni_mb_pmd) +DPDK_IS_PMD_ENABLED(LIBRTE_PMD_AESNI_GCM, dpdk_aesni_gcm_pmd) + +DETECT_DPDK_IS_1702_OR_1705() + AM_COND_IF([WITH_DPDK_AESNI_MB_PMD], [ - AC_CHECK_LIB([IPSec_MB], [submit_job_sse], [], + AC_CHECK_LIB([IPSec_MB], [submit_job_sse], + [with_aesni_mb_lib=yes], [AC_MSG_ERROR([IPSec_MB library not found])]) ]) -DPDK_IS_PMD_ENABLED(LIBRTE_PMD_AESNI_GCM, dpdk_aesni_gcm_pmd) AM_COND_IF([WITH_DPDK_AESNI_GCM_PMD], [ - AC_CHECK_LIB([isal_crypto], [aesni_gcm128_init], [], - [AC_MSG_ERROR([isal_crypto library not found])]) + AM_COND_IF([DPDK_IS_1702_OR_1705], + [ + AC_CHECK_LIB([isal_crypto], [aesni_gcm128_init], + [with_isa_l_crypto_lib=yes], + [AC_MSG_ERROR([isal_crypto library not found])]) + ], + [ + AC_CHECK_LIB([IPSec_MB], [submit_job_sse], + [with_aesni_mb_lib=yes], + [AC_MSG_ERROR([IPSec_MB library not found])]) + ]) ]) -DPDK_IS_PMD_ENABLED(LIBRTE_MLX5_PMD, dpdk_mlx5_pmd) -AM_COND_IF([WITH_DPDK_MLX5_PMD], +m4_append([list_of_with], [aesni_mb_lib], [, ]) +AM_CONDITIONAL(WITH_AESNI_MB_LIB, test "$with_aesni_mb_lib" = "yes") + +m4_append([list_of_with], [isa_l_crypto_lib], [, ]) +AM_CONDITIONAL(WITH_ISA_L_CRYPTO_LIB, test "$with_isa_l_crypto_lib" = "yes") + + +with_ibverbs_lib=no +DPDK_IS_PMD_ENABLED(LIBRTE_MLX4_PMD, dpdk_mlx4_pmd) +AM_COND_IF([WITH_DPDK_MLX4_PMD], [ - AC_CHECK_LIB([ibverbs], [ibv_fork_init], [], + AC_CHECK_LIB([ibverbs], [ibv_fork_init], + [with_ibverbs_lib=yes], [AC_MSG_ERROR([ibverbs library not found])]) ]) -DPDK_IS_PMD_ENABLED(LIBRTE_MLX4_PMD, dpdk_mlx4_pmd) -AM_COND_IF([WITH_DPDK_MLX4_PMD], +DPDK_IS_PMD_ENABLED(LIBRTE_MLX5_PMD, dpdk_mlx5_pmd) +AM_COND_IF([WITH_DPDK_MLX5_PMD], [ - AC_CHECK_LIB([ibverbs], [ibv_fork_init], [], + AC_CHECK_LIB([ibverbs], [ibv_fork_init], + [with_ibverbs_lib=yes], [AC_MSG_ERROR([ibverbs library not found])]) ]) +m4_append([list_of_with], [ibverbs_lib], [, ]) +AM_CONDITIONAL(WITH_IBVERBS_LIB, test "$with_ibverbs_lib" = "yes") + + AM_COND_IF([ENABLE_G2], [ PKG_CHECK_MODULES(g2, gtk+-2.0) diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am index 3a1ffeeb..15195a21 100644 --- a/src/plugins/dpdk.am +++ b/src/plugins/dpdk.am @@ -19,20 +19,24 @@ dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -ldpdk else dpdk_plugin_la_LDFLAGS = $(AM_LDFLAGS) -Wl,--whole-archive,-l:libdpdk.a,--no-whole-archive endif -if WITH_DPDK_AESNI_MB_PMD +if WITH_AESNI_MB_LIB dpdk_plugin_la_LDFLAGS += -Wl,--exclude-libs,libIPSec_MB.a,-l:libIPSec_MB.a endif -if WITH_DPDK_AESNI_GCM_PMD +if WITH_ISA_L_CRYPTO_LIB dpdk_plugin_la_LDFLAGS += -Wl,--exclude-libs,libisal_crypto.a,-l:libisal_crypto.a endif -dpdk_plugin_la_LDFLAGS += -Wl,-lm,-ldl -if WITH_DPDK_MLX5_PMD +if WITH_IBVERBS_LIB dpdk_plugin_la_LDFLAGS += -Wl,-libverbs endif -if WITH_DPDK_MLX4_PMD -dpdk_plugin_la_LDFLAGS += -Wl,-libverbs +if DPDK_IS_1702_OR_1705 +dpdk_plugin_la_CFLAGS = $(AM_CFLAGS) -DDPDK_VOID_CALLBACK=1 -DDPDK_NO_AEAD=1 +else +dpdk_plugin_la_CFLAGS = $(AM_CFLAGS) -DDPDK_VOID_CALLBACK=0 -DDPDK_NO_AEAD=0 +dpdk_plugin_la_LDFLAGS += -Wl,-lnuma endif +dpdk_plugin_la_LDFLAGS += -Wl,-lm,-ldl + dpdk_plugin_la_SOURCES = \ dpdk/main.c \ dpdk/buffer.c \ diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c index df52c58f..2707b4d8 100644 --- a/src/plugins/dpdk/device/common.c +++ b/src/plugins/dpdk/device/common.c @@ -181,9 +181,9 @@ dpdk_device_stop (dpdk_device_t * xd) } } -void -dpdk_port_state_callback (uint8_t port_id, - enum rte_eth_event_type type, void *param) +always_inline int +dpdk_port_state_callback_inline (uint8_t port_id, + enum rte_eth_event_type type, void *param) { struct rte_eth_link link; vlib_main_t *vm = vlib_get_main (); @@ -193,7 +193,7 @@ dpdk_port_state_callback (uint8_t port_id, if (type != RTE_ETH_EVENT_INTR_LSC) { clib_warning ("Unknown event %d received for port %d", type, port_id); - return; + return -1; } rte_eth_link_get_nowait (port_id, &link); @@ -238,8 +238,28 @@ dpdk_port_state_callback (uint8_t port_id, else clib_warning ("Port %d Link Down\n\n", port_id); } + + return 0; +} + +#if DPDK_VOID_CALLBACK +void +dpdk_port_state_callback (uint8_t port_id, + enum rte_eth_event_type type, void *param) +{ + dpdk_port_state_callback_inline (port_id, type, param); } +#else +int +dpdk_port_state_callback (uint8_t port_id, + enum rte_eth_event_type type, + void *param, + void *ret_param __attribute__ ((unused))) +{ + return dpdk_port_state_callback_inline (port_id, type, param); +} +#endif /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h index 29a2c760..1e34e3fb 100644 --- a/src/plugins/dpdk/device/dpdk.h +++ b/src/plugins/dpdk/device/dpdk.h @@ -418,8 +418,15 @@ typedef struct void dpdk_device_setup (dpdk_device_t * xd); void dpdk_device_start (dpdk_device_t * xd); void dpdk_device_stop (dpdk_device_t * xd); + +#if DPDK_VOID_CALLBACK void dpdk_port_state_callback (uint8_t port_id, enum rte_eth_event_type type, void *param); +#else +int dpdk_port_state_callback (uint8_t port_id, + enum rte_eth_event_type type, + void *param, void *ret_param); +#endif #define foreach_dpdk_error \ _(NONE, "no error") \ diff --git a/src/plugins/dpdk/ipsec/cli.c b/src/plugins/dpdk/ipsec/cli.c index a9314065..a9cf2502 100644 --- a/src/plugins/dpdk/ipsec/cli.c +++ b/src/plugins/dpdk/ipsec/cli.c @@ -86,13 +86,28 @@ dpdk_ipsec_show_mapping (vlib_main_t * vm, u16 detail_display) hash_foreach (key, data, cwm->algo_qp_map, ({ cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; +#if DPDK_NO_AEAD cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER; cap.sym.cipher.algo = p_key->cipher_algo; +#else + if (p_key->is_aead) + { + cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_AEAD; + cap.sym.aead.algo = p_key->cipher_algo; + } + else + { + cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER; + cap.sym.cipher.algo = p_key->cipher_algo; + } +#endif check_algo_is_supported (&cap, cipher_str); + cap.op = RTE_CRYPTO_OP_TYPE_SYMMETRIC; cap.sym.xform_type = RTE_CRYPTO_SYM_XFORM_AUTH; cap.sym.auth.algo = p_key->auth_algo; check_algo_is_supported (&cap, auth_str); + vlib_cli_output (vm, "%u\t%10s\t%15s\t%3s\t%u\t%u\n", vlib_mains[i]->thread_index, cipher_str, auth_str, p_key->is_outbound ? "out" : "in", diff --git a/src/plugins/dpdk/ipsec/esp.h b/src/plugins/dpdk/ipsec/esp.h index 56f0c756..308a66af 100644 --- a/src/plugins/dpdk/ipsec/esp.h +++ b/src/plugins/dpdk/ipsec/esp.h @@ -22,6 +22,9 @@ typedef struct { enum rte_crypto_cipher_algorithm algo; +#if ! DPDK_NO_AEAD + enum rte_crypto_aead_algorithm aead_algo; +#endif u8 key_len; u8 iv_len; } dpdk_esp_crypto_alg_t; @@ -65,7 +68,11 @@ dpdk_esp_init () c->iv_len = 16; c = &em->esp_crypto_algs[IPSEC_CRYPTO_ALG_AES_GCM_128]; +#if DPDK_NO_AEAD c->algo = RTE_CRYPTO_CIPHER_AES_GCM; +#else + c->aead_algo = RTE_CRYPTO_AEAD_AES_GCM; +#endif c->key_len = 16; c->iv_len = 8; @@ -90,42 +97,68 @@ dpdk_esp_init () i = &em->esp_integ_algs[IPSEC_INTEG_ALG_SHA_512_256]; i->algo = RTE_CRYPTO_AUTH_SHA512_HMAC; i->trunc_size = 32; - +#if DPDK_NO_AEAD i = &em->esp_integ_algs[IPSEC_INTEG_ALG_AES_GCM_128]; i->algo = RTE_CRYPTO_AUTH_AES_GCM; i->trunc_size = 16; +#endif } static_always_inline int translate_crypto_algo (ipsec_crypto_alg_t crypto_algo, - struct rte_crypto_sym_xform *cipher_xform) + struct rte_crypto_sym_xform *xform, u8 use_esn) { +#if ! DPDK_NO_AEAD + const u16 iv_off = + sizeof (struct rte_crypto_op) + sizeof (struct rte_crypto_sym_op) + + offsetof (dpdk_cop_priv_t, cb); +#endif + + xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; + switch (crypto_algo) { case IPSEC_CRYPTO_ALG_NONE: - cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_NULL; +#if ! DPDK_NO_AEAD + xform->cipher.iv.offset = iv_off; + xform->cipher.iv.length = 0; +#endif + xform->cipher.algo = RTE_CRYPTO_CIPHER_NULL; break; case IPSEC_CRYPTO_ALG_AES_CBC_128: case IPSEC_CRYPTO_ALG_AES_CBC_192: case IPSEC_CRYPTO_ALG_AES_CBC_256: - cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC; +#if ! DPDK_NO_AEAD + xform->cipher.iv.offset = iv_off; + xform->cipher.iv.length = 16; +#endif + xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC; break; case IPSEC_CRYPTO_ALG_AES_GCM_128: - cipher_xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_GCM; +#if DPDK_NO_AEAD + xform->cipher.algo = RTE_CRYPTO_CIPHER_AES_GCM; +#else + xform->type = RTE_CRYPTO_SYM_XFORM_AEAD; + xform->aead.algo = RTE_CRYPTO_AEAD_AES_GCM; + xform->aead.iv.offset = iv_off; + xform->aead.iv.length = 12; /* GCM IV, not ESP IV */ + xform->aead.digest_length = 16; + xform->aead.aad_length = use_esn ? 12 : 8; +#endif break; default: return -1; } - cipher_xform->type = RTE_CRYPTO_SYM_XFORM_CIPHER; - return 0; } static_always_inline int translate_integ_algo (ipsec_integ_alg_t integ_alg, - struct rte_crypto_sym_xform *auth_xform, int use_esn) + struct rte_crypto_sym_xform *auth_xform, u8 use_esn) { + auth_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; + switch (integ_alg) { case IPSEC_INTEG_ALG_NONE: @@ -152,21 +185,21 @@ translate_integ_algo (ipsec_integ_alg_t integ_alg, auth_xform->auth.algo = RTE_CRYPTO_AUTH_SHA512_HMAC; auth_xform->auth.digest_length = 32; break; +#if DPDK_NO_AEAD case IPSEC_INTEG_ALG_AES_GCM_128: auth_xform->auth.algo = RTE_CRYPTO_AUTH_AES_GCM; auth_xform->auth.digest_length = 16; auth_xform->auth.add_auth_data_length = use_esn ? 12 : 8; break; +#endif default: return -1; } - auth_xform->type = RTE_CRYPTO_SYM_XFORM_AUTH; - return 0; } -static_always_inline int +static_always_inline i32 create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess, u8 is_outbound) { @@ -178,6 +211,10 @@ create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess, struct rte_crypto_sym_xform *xfs; uword key = 0, *data; crypto_worker_qp_key_t *p_key = (crypto_worker_qp_key_t *) & key; +#if ! DPDK_NO_AEAD + i32 socket_id = rte_socket_id (); + i32 ret; +#endif if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) { @@ -190,15 +227,7 @@ create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess, sa->salt = random_u32 (&seed); } - cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER; - cipher_xform.cipher.key.data = sa->crypto_key; - cipher_xform.cipher.key.length = sa->crypto_key_len; - - auth_xform.type = RTE_CRYPTO_SYM_XFORM_AUTH; - auth_xform.auth.key.data = sa->integ_key; - auth_xform.auth.key.length = sa->integ_key_len; - - if (translate_crypto_algo (sa->crypto_alg, &cipher_xform) < 0) + if (translate_crypto_algo (sa->crypto_alg, &cipher_xform, sa->use_esn) < 0) return -1; p_key->cipher_algo = cipher_xform.cipher.algo; @@ -206,19 +235,44 @@ create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess, return -1; p_key->auth_algo = auth_xform.auth.algo; - if (is_outbound) +#if ! DPDK_NO_AEAD + if (sa->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) { - cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; - auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE; - cipher_xform.next = &auth_xform; + cipher_xform.aead.key.data = sa->crypto_key; + cipher_xform.aead.key.length = sa->crypto_key_len; + + if (is_outbound) + cipher_xform.cipher.op = RTE_CRYPTO_AEAD_OP_ENCRYPT; + else + cipher_xform.cipher.op = RTE_CRYPTO_AEAD_OP_DECRYPT; + cipher_xform.next = NULL; xfs = &cipher_xform; + p_key->is_aead = 1; } - else + else /* Cipher + Auth */ +#endif { - cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; - auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_VERIFY; - auth_xform.next = &cipher_xform; - xfs = &auth_xform; + cipher_xform.cipher.key.data = sa->crypto_key; + cipher_xform.cipher.key.length = sa->crypto_key_len; + + auth_xform.auth.key.data = sa->integ_key; + auth_xform.auth.key.length = sa->integ_key_len; + + if (is_outbound) + { + cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT; + auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE; + cipher_xform.next = &auth_xform; + xfs = &cipher_xform; + } + else + { + cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_DECRYPT; + auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_VERIFY; + auth_xform.next = &cipher_xform; + xfs = &auth_xform; + } + p_key->is_aead = 0; } p_key->is_outbound = is_outbound; @@ -227,17 +281,115 @@ create_sym_sess (ipsec_sa_t * sa, crypto_sa_session_t * sa_sess, if (!data) return -1; +#if DPDK_NO_AEAD sa_sess->sess = rte_cryptodev_sym_session_create (cwm->qp_data[*data].dev_id, xfs); - if (!sa_sess->sess) return -1; +#else + sa_sess->sess = + rte_cryptodev_sym_session_create (dcm->sess_h_pools[socket_id]); + if (!sa_sess->sess) + return -1; + + ret = + rte_cryptodev_sym_session_init (cwm->qp_data[*data].dev_id, sa_sess->sess, + xfs, dcm->sess_pools[socket_id]); + if (ret) + return -1; +#endif sa_sess->qp_index = (u8) * data; return 0; } +static_always_inline void +crypto_set_icb (dpdk_gcm_cnt_blk * icb, u32 salt, u32 seq, u32 seq_hi) +{ + icb->salt = salt; + icb->iv[0] = seq; + icb->iv[1] = seq_hi; +#if DPDK_NO_AEAD + icb->cnt = clib_host_to_net_u32 (1); +#endif +} + +#define __unused __attribute__((unused)) +static_always_inline void +crypto_op_setup (u8 is_aead, struct rte_mbuf *mb0, + struct rte_crypto_op *cop, void *session, + u32 cipher_off, u32 cipher_len, + u8 * icb __unused, u32 iv_size __unused, + u32 auth_off, u32 auth_len, + u8 * aad __unused, u32 aad_size __unused, + u8 * digest, u64 digest_paddr, u32 digest_size __unused) +{ + struct rte_crypto_sym_op *sym_cop; + + sym_cop = (struct rte_crypto_sym_op *) (cop + 1); + + sym_cop->m_src = mb0; + rte_crypto_op_attach_sym_session (cop, session); + + if (!digest_paddr) + digest_paddr = + rte_pktmbuf_mtophys_offset (mb0, (uintptr_t) digest - (uintptr_t) mb0); + +#if DPDK_NO_AEAD + sym_cop->cipher.data.offset = cipher_off; + sym_cop->cipher.data.length = cipher_len; + + sym_cop->cipher.iv.data = icb; + sym_cop->cipher.iv.phys_addr = + cop->phys_addr + (uintptr_t) icb - (uintptr_t) cop; + sym_cop->cipher.iv.length = iv_size; + + if (is_aead) + { + sym_cop->auth.aad.data = aad; + sym_cop->auth.aad.phys_addr = + cop->phys_addr + (uintptr_t) aad - (uintptr_t) cop; + sym_cop->auth.aad.length = aad_size; + } + else + { + sym_cop->auth.data.offset = auth_off; + sym_cop->auth.data.length = auth_len; + } + + sym_cop->auth.digest.data = digest; + sym_cop->auth.digest.phys_addr = digest_paddr; + sym_cop->auth.digest.length = digest_size; +#else /* ! DPDK_NO_AEAD */ + if (is_aead) + { + sym_cop->aead.data.offset = cipher_off; + sym_cop->aead.data.length = cipher_len; + + sym_cop->aead.aad.data = aad; + sym_cop->aead.aad.phys_addr = + cop->phys_addr + (uintptr_t) aad - (uintptr_t) cop; + + sym_cop->aead.digest.data = digest; + sym_cop->aead.digest.phys_addr = digest_paddr; + } + else + { + sym_cop->cipher.data.offset = cipher_off; + sym_cop->cipher.data.length = cipher_len; + + sym_cop->auth.data.offset = auth_off; + sym_cop->auth.data.length = auth_len; + + sym_cop->auth.digest.data = digest; + sym_cop->auth.digest.phys_addr = digest_paddr; + } +#endif /* DPDK_NO_AEAD */ +} + +#undef __unused + #endif /* __DPDK_ESP_H__ */ /* diff --git a/src/plugins/dpdk/ipsec/esp_decrypt.c b/src/plugins/dpdk/ipsec/esp_decrypt.c index 9377970a..c4f295d3 100644 --- a/src/plugins/dpdk/ipsec/esp_decrypt.c +++ b/src/plugins/dpdk/ipsec/esp_decrypt.c @@ -44,8 +44,7 @@ typedef enum { _(NOT_IP, "Not IP packet (dropped)") \ _(ENQ_FAIL, "Enqueue failed (buffer full)") \ _(NO_CRYPTODEV, "Cryptodev not configured") \ - _(BAD_LEN, "Invalid ciphertext length") \ - _(UNSUPPORTED, "Cipher/Auth not supported") + _(BAD_LEN, "Invalid ciphertext length") typedef enum { @@ -122,7 +121,7 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm, while (n_left_from > 0 && n_left_to_next > 0) { - u32 bi0, sa_index0 = ~0, seq, icv_size, iv_size; + u32 bi0, sa_index0 = ~0, seq, trunc_size, iv_size; vlib_buffer_t * b0; esp_header_t * esp0; ipsec_sa_t * sa0; @@ -169,18 +168,6 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm, sa0->total_data_size += b0->current_length; - if (PREDICT_FALSE(sa0->integ_alg == IPSEC_INTEG_ALG_NONE) || - PREDICT_FALSE(sa0->crypto_alg == IPSEC_CRYPTO_ALG_NONE)) - { - clib_warning ("SPI %u : only cipher + auth supported", sa0->spi); - vlib_node_increment_counter (vm, dpdk_esp_decrypt_node.index, - ESP_DECRYPT_ERROR_UNSUPPORTED, 1); - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; - goto trace; - } - sa_sess = pool_elt_at_index(cwm->sa_sess_d[0], sa_index0); if (PREDICT_FALSE(!sa_sess->sess)) @@ -211,7 +198,10 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm, rte_crypto_op_attach_sym_session(cop, sess); - icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; + if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + trunc_size = 16; + else + trunc_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; /* Convert vlib buffer to mbuf */ @@ -222,7 +212,7 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm, /* Outer IP header has already been stripped */ u16 payload_len = rte_pktmbuf_pkt_len(mb0) - sizeof (esp_header_t) - - iv_size - icv_size; + iv_size - trunc_size; if ((payload_len & (BLOCK_SIZE - 1)) || (payload_len <= 0)) { @@ -242,84 +232,64 @@ dpdk_esp_decrypt_node_fn (vlib_main_t * vm, struct rte_crypto_sym_op *sym_cop = (struct rte_crypto_sym_op *)(cop + 1); - sym_cop->m_src = mb0; - sym_cop->cipher.data.offset = sizeof (esp_header_t) + iv_size; - sym_cop->cipher.data.length = payload_len; + u8 is_aead = sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128; + u32 cipher_off, cipher_len; + u32 auth_off = 0, auth_len = 0, aad_size = 0; + u8 *aad = NULL, *digest = NULL; + u64 digest_paddr = 0; u8 *iv = rte_pktmbuf_mtod_offset(mb0, void*, sizeof (esp_header_t)); - dpdk_cop_priv_t * priv = (dpdk_cop_priv_t *)(sym_cop + 1); + dpdk_cop_priv_t *priv = (dpdk_cop_priv_t *)(sym_cop + 1); + dpdk_gcm_cnt_blk *icb = &priv->cb; + + cipher_off = sizeof (esp_header_t) + iv_size; + cipher_len = payload_len; - if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + digest = + vlib_buffer_get_current (b0) + sizeof(esp_header_t) + + iv_size + payload_len; + + if (is_aead) { - dpdk_gcm_cnt_blk *icb = &priv->cb; - icb->salt = sa0->salt; - clib_memcpy(icb->iv, iv, 8); - icb->cnt = clib_host_to_net_u32(1); - sym_cop->cipher.iv.data = (u8 *)icb; - sym_cop->cipher.iv.phys_addr = cop->phys_addr + - (uintptr_t)icb - (uintptr_t)cop; - sym_cop->cipher.iv.length = 16; - - u8 *aad = priv->aad; - clib_memcpy(aad, iv - sizeof(esp_header_t), 8); - sym_cop->auth.aad.data = aad; - sym_cop->auth.aad.phys_addr = cop->phys_addr + - (uintptr_t)aad - (uintptr_t)cop; - if (sa0->use_esn) - { - *((u32*)&aad[8]) = sa0->seq_hi; - sym_cop->auth.aad.length = 12; - } - else - { - sym_cop->auth.aad.length = 8; - } + u32 *_iv = (u32 *) iv; - sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.length = icv_size; + crypto_set_icb (icb, sa0->salt, _iv[0], _iv[1]); + iv_size = 16; + aad = priv->aad; + clib_memcpy(aad, esp0, 8); + aad_size = 8; + if (sa0->use_esn) + { + *((u32*)&aad[8]) = sa0->seq_hi; + aad_size = 12; + } } else { - sym_cop->cipher.iv.data = rte_pktmbuf_mtod_offset(mb0, void*, - sizeof (esp_header_t)); - sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(mb0, - sizeof (esp_header_t)); - sym_cop->cipher.iv.length = iv_size; + clib_memcpy(icb, iv, 16); + + auth_off = 0; + auth_len = sizeof(esp_header_t) + iv_size + payload_len; if (sa0->use_esn) { dpdk_cop_priv_t* priv = (dpdk_cop_priv_t*) (sym_cop + 1); - u8* payload_end = rte_pktmbuf_mtod_offset( - mb0, u8*, sizeof(esp_header_t) + iv_size + payload_len); - - clib_memcpy (priv->icv, payload_end, icv_size); - *((u32*) payload_end) = sa0->seq_hi; - sym_cop->auth.data.offset = 0; - sym_cop->auth.data.length = sizeof(esp_header_t) + iv_size - + payload_len + sizeof(sa0->seq_hi); - sym_cop->auth.digest.data = priv->icv; - sym_cop->auth.digest.phys_addr = cop->phys_addr - + (uintptr_t) priv->icv - (uintptr_t) cop; - sym_cop->auth.digest.length = icv_size; - } - else - { - sym_cop->auth.data.offset = 0; - sym_cop->auth.data.length = sizeof(esp_header_t) + - iv_size + payload_len; - - sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(mb0, void*, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(mb0, - rte_pktmbuf_pkt_len(mb0) - icv_size); - sym_cop->auth.digest.length = icv_size; + + clib_memcpy (priv->icv, digest, trunc_size); + *((u32*) digest) = sa0->seq_hi; + auth_len += sizeof(sa0->seq_hi); + + digest = priv->icv; + digest_paddr = + cop->phys_addr + (uintptr_t) priv->icv - (uintptr_t) cop; } } + crypto_op_setup (is_aead, mb0, cop, sess, + cipher_off, cipher_len, (u8 *) icb, iv_size, + auth_off, auth_len, aad, aad_size, + digest, digest_paddr, trunc_size); trace: if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) { @@ -339,6 +309,9 @@ trace: { u32 enq; + if (!n_cop_qp[i]) + continue; + qpd = vec_elt_at_index(cwm->qp_data, i); enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, qpd->cops, n_cop_qp[i]); @@ -433,7 +406,7 @@ dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm, while (n_left_from > 0 && n_left_to_next > 0) { esp_footer_t * f0; - u32 bi0, next0, icv_size, iv_size; + u32 bi0, next0, trunc_size, iv_size; vlib_buffer_t * b0 = 0; ip4_header_t *ih4 = 0, *oh4 = 0; ip6_header_t *ih6 = 0, *oh6 = 0; @@ -455,7 +428,10 @@ dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm, to_next[0] = bi0; to_next += 1; - icv_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; + if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + trunc_size = 16; + else + trunc_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; if (sa0->use_anti_replay) @@ -472,7 +448,7 @@ dpdk_esp_decrypt_post_node_fn (vlib_main_t * vm, ih4 = (ip4_header_t *) (b0->data + sizeof(ethernet_header_t)); vlib_buffer_advance (b0, sizeof (esp_header_t) + iv_size); - b0->current_length -= (icv_size + 2); + b0->current_length -= (trunc_size + 2); b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID; f0 = (esp_footer_t *) ((u8 *) vlib_buffer_get_current (b0) + b0->current_length); diff --git a/src/plugins/dpdk/ipsec/esp_encrypt.c b/src/plugins/dpdk/ipsec/esp_encrypt.c index ac552f6c..6de444fd 100644 --- a/src/plugins/dpdk/ipsec/esp_encrypt.c +++ b/src/plugins/dpdk/ipsec/esp_encrypt.c @@ -43,8 +43,7 @@ typedef enum _(RX_PKTS, "ESP pkts received") \ _(SEQ_CYCLED, "sequence number cycled") \ _(ENQ_FAIL, "Enqueue failed (buffer full)") \ - _(NO_CRYPTODEV, "Cryptodev not configured") \ - _(UNSUPPORTED, "Cipher/Auth not supported") + _(NO_CRYPTODEV, "Cryptodev not configured") typedef enum @@ -142,6 +141,7 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm, const int BLOCK_SIZE = 16; u32 iv_size; u16 orig_sz; + u8 trunc_size; crypto_sa_session_t *sa_sess; void *sess; struct rte_crypto_op *cop = 0; @@ -199,6 +199,11 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm, ssize_t adv; iv_size = em->esp_crypto_algs[sa0->crypto_alg].iv_len; + if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + trunc_size = 16; + else + trunc_size = em->esp_integ_algs[sa0->integ_alg].trunc_size; + ih0 = vlib_buffer_get_current (b0); orig_sz = b0->current_length; is_ipv6 = (ih0->ip4.ip_version_and_header_length & 0xF0) == 0x60; @@ -314,9 +319,6 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm, transport_mode = 1; } - ASSERT (sa0->crypto_alg < IPSEC_CRYPTO_N_ALG); - ASSERT (sa0->crypto_alg != IPSEC_CRYPTO_ALG_NONE); - int blocks = 1 + (orig_sz + 1) / BLOCK_SIZE; /* pad packet in input buffer */ @@ -330,8 +332,7 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm, f0 = vlib_buffer_get_current (b0) + b0->current_length + pad_bytes; f0->pad_length = pad_bytes; f0->next_header = next_hdr_type; - b0->current_length += pad_bytes + 2 + - em->esp_integ_algs[sa0->integ_alg].trunc_size; + b0->current_length += pad_bytes + 2 + trunc_size; vnet_buffer (b0)->sw_if_index[VLIB_RX] = vnet_buffer (b0)->sw_if_index[VLIB_RX]; @@ -349,88 +350,64 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm, mb0->pkt_len = b0->current_length; mb0->data_off = RTE_PKTMBUF_HEADROOM + b0->current_data; - rte_crypto_op_attach_sym_session (cop, sess); + dpdk_gcm_cnt_blk *icb = &priv->cb; - sym_cop->m_src = mb0; + crypto_set_icb (icb, sa0->salt, sa0->seq, sa0->seq_hi); - dpdk_gcm_cnt_blk *icb = &priv->cb; - icb->salt = sa0->salt; - icb->iv[0] = sa0->seq; - icb->iv[1] = sa0->seq_hi; - icb->cnt = clib_host_to_net_u32 (1); + u8 is_aead = sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128; + u32 cipher_off, cipher_len; + u32 auth_off = 0, auth_len = 0, aad_size = 0; + u8 *aad = NULL, *digest = NULL; - if (sa0->crypto_alg == IPSEC_CRYPTO_ALG_AES_GCM_128) + if (is_aead) { u32 *esp_iv = (u32 *) (b0->data + b0->current_data + ip_hdr_size + sizeof (esp_header_t)); esp_iv[0] = sa0->seq; esp_iv[1] = sa0->seq_hi; - sym_cop->cipher.data.offset = - ip_hdr_size + sizeof (esp_header_t) + iv_size; - sym_cop->cipher.data.length = BLOCK_SIZE * blocks; - sym_cop->cipher.iv.length = 16; - } - else - { - sym_cop->cipher.data.offset = - ip_hdr_size + sizeof (esp_header_t); - sym_cop->cipher.data.length = BLOCK_SIZE * blocks + iv_size; - sym_cop->cipher.iv.length = iv_size; - } - sym_cop->cipher.iv.data = (u8 *) icb; - sym_cop->cipher.iv.phys_addr = cop->phys_addr + (uintptr_t) icb - - (uintptr_t) cop; + cipher_off = ip_hdr_size + sizeof (esp_header_t) + iv_size; + cipher_len = BLOCK_SIZE * blocks; + iv_size = 16; /* GCM IV size, not ESP IV size */ - - ASSERT (sa0->integ_alg < IPSEC_INTEG_N_ALG); - ASSERT (sa0->integ_alg != IPSEC_INTEG_ALG_NONE); - - if (PREDICT_FALSE (sa0->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128)) - { - u8 *aad = priv->aad; + aad = priv->aad; clib_memcpy (aad, vlib_buffer_get_current (b0) + ip_hdr_size, 8); - sym_cop->auth.aad.data = aad; - sym_cop->auth.aad.phys_addr = cop->phys_addr + - (uintptr_t) aad - (uintptr_t) cop; - + aad_size = 8; if (PREDICT_FALSE (sa0->use_esn)) { *((u32 *) & aad[8]) = sa0->seq_hi; - sym_cop->auth.aad.length = 12; - } - else - { - sym_cop->auth.aad.length = 8; + aad_size = 12; } + + digest = + vlib_buffer_get_current (b0) + b0->current_length - + trunc_size; } else { - sym_cop->auth.data.offset = ip_hdr_size; - sym_cop->auth.data.length = b0->current_length - ip_hdr_size - - em->esp_integ_algs[sa0->integ_alg].trunc_size; + cipher_off = ip_hdr_size + sizeof (esp_header_t); + cipher_len = BLOCK_SIZE * blocks + iv_size; + + auth_off = ip_hdr_size; + auth_len = b0->current_length - ip_hdr_size - trunc_size; + + digest = + vlib_buffer_get_current (b0) + b0->current_length - + trunc_size; if (PREDICT_FALSE (sa0->use_esn)) { - u8 *payload_end = - vlib_buffer_get_current (b0) + b0->current_length; - *((u32 *) payload_end) = sa0->seq_hi; - sym_cop->auth.data.length += sizeof (sa0->seq_hi); + *((u32 *) digest) = sa0->seq_hi; + auth_len += sizeof (sa0->seq_hi); } } - sym_cop->auth.digest.data = vlib_buffer_get_current (b0) + - b0->current_length - - em->esp_integ_algs[sa0->integ_alg].trunc_size; - sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset (mb0, - b0->current_length - - - em->esp_integ_algs - [sa0->integ_alg].trunc_size); - sym_cop->auth.digest.length = - em->esp_integ_algs[sa0->integ_alg].trunc_size; + crypto_op_setup (is_aead, mb0, cop, sess, + cipher_off, cipher_len, (u8 *) icb, iv_size, + auth_off, auth_len, aad, aad_size, + digest, 0, trunc_size); if (PREDICT_FALSE (is_ipv6)) { @@ -470,6 +447,9 @@ dpdk_esp_encrypt_node_fn (vlib_main_t * vm, { u32 enq; + if (!n_cop_qp[i]) + continue; + qpd = vec_elt_at_index(cwm->qp_data, i); enq = rte_cryptodev_enqueue_burst(qpd->dev_id, qpd->qp_id, qpd->cops, n_cop_qp[i]); diff --git a/src/plugins/dpdk/ipsec/ipsec.c b/src/plugins/dpdk/ipsec/ipsec.c index 7066564d..c922940c 100644 --- a/src/plugins/dpdk/ipsec/ipsec.c +++ b/src/plugins/dpdk/ipsec/ipsec.c @@ -56,18 +56,23 @@ add_del_sa_sess (u32 sa_index, u8 is_add) else { u8 dev_id; + i32 ret; sa_sess = pool_elt_at_index (cwm->sa_sess_d[is_outbound], sa_index); dev_id = cwm->qp_data[sa_sess->qp_index].dev_id; if (!sa_sess->sess) continue; - - if (rte_cryptodev_sym_session_free(dev_id, sa_sess->sess)) - { - clib_warning("failed to free session"); - return -1; - } +#if DPDK_NO_AEAD + ret = (rte_cryptodev_sym_session_free(dev_id, sa_sess->sess) == NULL); + ASSERT (ret); +#else + ret = rte_cryptodev_sym_session_clear(dev_id, sa_sess->sess); + ASSERT (!ret); + + ret = rte_cryptodev_sym_session_free(sa_sess->sess); + ASSERT (!ret); +#endif memset(sa_sess, 0, sizeof(sa_sess[0])); } } @@ -94,7 +99,7 @@ update_qp_data (crypto_worker_main_t * cwm, } /* *INDENT-ON* */ - vec_add2 (cwm->qp_data, qpd, 1); + vec_add2_aligned (cwm->qp_data, qpd, 1, CLIB_CACHE_LINE_BYTES); qpd->dev_id = cdev_id; qpd->qp_id = qp_id; @@ -119,6 +124,9 @@ add_mapping (crypto_worker_main_t * cwm, p_key->cipher_algo = (u8) cipher_cap->sym.cipher.algo; p_key->auth_algo = (u8) auth_cap->sym.auth.algo; p_key->is_outbound = is_outbound; +#if ! DPDK_NO_AEAD + p_key->is_aead = cipher_cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD; +#endif ret = hash_get (cwm->algo_qp_map, key); if (ret) @@ -147,6 +155,20 @@ add_cdev_mapping (crypto_worker_main_t * cwm, for (i = dev_info->capabilities; i->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; i++) { +#if ! DPDK_NO_AEAD + if (i->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD) + { + struct rte_cryptodev_capabilities none = { 0 }; + + if (check_algo_is_supported (i, NULL) != 0) + continue; + + none.sym.auth.algo = RTE_CRYPTO_AUTH_NULL; + + mapped |= add_mapping (cwm, cdev_id, qp, is_outbound, i, &none); + continue; + } +#endif if (i->sym.xform_type != RTE_CRYPTO_SYM_XFORM_CIPHER) continue; @@ -205,17 +227,23 @@ dpdk_ipsec_check_support (ipsec_sa_t * sa) { if (sa->integ_alg != IPSEC_INTEG_ALG_NONE) return clib_error_return (0, "unsupported integ-alg %U with " - "crypto-algo aes-gcm-128", + "crypto-alg aes-gcm-128", format_ipsec_integ_alg, sa->integ_alg); +#if DPDK_NO_AEAD sa->integ_alg = IPSEC_INTEG_ALG_AES_GCM_128; +#endif } - else - { - if (sa->integ_alg == IPSEC_INTEG_ALG_NONE || - sa->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128) - return clib_error_return (0, "unsupported integ-alg %U", - format_ipsec_integ_alg, sa->integ_alg); - } +#if DPDK_NO_AEAD + else if (sa->crypto_alg == IPSEC_CRYPTO_ALG_NONE || + sa->integ_alg == IPSEC_INTEG_ALG_NONE || + sa->integ_alg == IPSEC_INTEG_ALG_AES_GCM_128) +#else + else if (sa->integ_alg == IPSEC_INTEG_ALG_NONE) +#endif + return clib_error_return (0, + "unsupported integ-alg %U with crypto-alg %U", + format_ipsec_integ_alg, sa->integ_alg, + format_ipsec_crypto_alg, sa->crypto_alg); return 0; } @@ -233,6 +261,10 @@ dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, struct rte_mempool *rmp; i32 dev_id, ret; u32 i, skip_master; +#if ! DPDK_NO_AEAD + u32 max_sess_size = 0, sess_size; + i8 socket_id; +#endif if (check_cryptodev_queues () < 0) { @@ -297,9 +329,10 @@ dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, dev_conf.socket_id = rte_cryptodev_socket_id (dev_id); dev_conf.nb_queue_pairs = cdev_info.max_nb_queue_pairs; +#if DPDK_NO_AEAD dev_conf.session_mp.nb_objs = DPDK_CRYPTO_NB_SESS_OBJS; dev_conf.session_mp.cache_size = DPDK_CRYPTO_CACHE_SIZE; - +#endif ret = rte_cryptodev_configure (dev_id, &dev_conf); if (ret < 0) { @@ -310,16 +343,26 @@ dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, qp_conf.nb_descriptors = DPDK_CRYPTO_N_QUEUE_DESC; for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++) { +#if DPDK_NO_AEAD ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf, dev_conf.socket_id); +#else + ret = rte_cryptodev_queue_pair_setup (dev_id, qp, &qp_conf, + dev_conf.socket_id, NULL); +#endif if (ret < 0) { clib_warning ("cryptodev %u qp %u setup error", dev_id, qp); goto error; } } - vec_validate_aligned (dcm->cop_pools, dev_conf.socket_id, - CLIB_CACHE_LINE_BYTES); + vec_validate (dcm->cop_pools, dev_conf.socket_id); + +#if ! DPDK_NO_AEAD + sess_size = rte_cryptodev_get_private_session_size (dev_id); + if (sess_size > max_sess_size) + max_sess_size = sess_size; +#endif if (!vec_elt (dcm->cop_pools, dev_conf.socket_id)) { @@ -333,14 +376,14 @@ dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, DPDK_CRYPTO_CACHE_SIZE, DPDK_CRYPTO_PRIV_SIZE, dev_conf.socket_id); - vec_free (pool_name); if (!rmp) { - clib_warning ("failed to allocate mempool on socket %u", - dev_conf.socket_id); + clib_warning ("failed to allocate %s", pool_name); + vec_free (pool_name); goto error; } + vec_free (pool_name); vec_elt (dcm->cop_pools, dev_conf.socket_id) = rmp; } @@ -348,6 +391,51 @@ dpdk_ipsec_process (vlib_main_t * vm, vlib_node_runtime_t * rt, DPDK_CRYPTO_NB_SESS_OBJS, DPDK_CRYPTO_CACHE_SIZE); } +#if ! DPDK_NO_AEAD + /* *INDENT-OFF* */ + vec_foreach_index (socket_id, dcm->cop_pools) + { + u8 *pool_name; + + if (!vec_elt (dcm->cop_pools, socket_id)) + continue; + + vec_validate (dcm->sess_h_pools, socket_id); + pool_name = format (0, "crypto_sess_h_socket%u%c", + socket_id, 0); + rmp = + rte_mempool_create((i8 *)pool_name, DPDK_CRYPTO_NB_SESS_OBJS, + rte_cryptodev_get_header_session_size (), + 512, 0, NULL, NULL, NULL, NULL, + socket_id, 0); + if (!rmp) + { + clib_warning ("failed to allocate %s", pool_name); + vec_free (pool_name); + goto error; + } + vec_free (pool_name); + vec_elt (dcm->sess_h_pools, socket_id) = rmp; + + vec_validate (dcm->sess_pools, socket_id); + pool_name = format (0, "crypto_sess_socket%u%c", + socket_id, 0); + rmp = + rte_mempool_create((i8 *)pool_name, DPDK_CRYPTO_NB_SESS_OBJS, + max_sess_size, 512, 0, NULL, NULL, NULL, NULL, + socket_id, 0); + if (!rmp) + { + clib_warning ("failed to allocate %s", pool_name); + vec_free (pool_name); + goto error; + } + vec_free (pool_name); + vec_elt (dcm->sess_pools, socket_id) = rmp; + } + /* *INDENT-ON* */ +#endif + dpdk_esp_init (); /* Add new next node and set as default */ diff --git a/src/plugins/dpdk/ipsec/ipsec.h b/src/plugins/dpdk/ipsec/ipsec.h index d7940345..a94dd682 100644 --- a/src/plugins/dpdk/ipsec/ipsec.h +++ b/src/plugins/dpdk/ipsec/ipsec.h @@ -53,6 +53,7 @@ typedef struct u8 cipher_algo; u8 auth_algo; u8 is_outbound; + u8 is_aead; } crypto_worker_qp_key_t; typedef struct @@ -81,6 +82,8 @@ typedef struct typedef struct { + struct rte_mempool **sess_h_pools; + struct rte_mempool **sess_pools; struct rte_mempool **cop_pools; crypto_worker_main_t *workers_main; u8 enabled; @@ -146,12 +149,14 @@ check_algo_is_supported (const struct rte_cryptodev_capabilities *cap, { struct { - uint8_t cipher_algo; enum rte_crypto_sym_xform_type type; union { enum rte_crypto_auth_algorithm auth; enum rte_crypto_cipher_algorithm cipher; +#if ! DPDK_NO_AEAD + enum rte_crypto_aead_algorithm aead; +#endif }; char *name; } supported_algo[] = @@ -162,15 +167,18 @@ check_algo_is_supported (const struct rte_cryptodev_capabilities *cap, { .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = RTE_CRYPTO_CIPHER_AES_CBC,.name = "AES_CBC"}, +#if DPDK_NO_AEAD { .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_AES_CTR,.name = "AES_CTR"}, + RTE_CRYPTO_CIPHER_AES_GCM,.name = "AES-GCM"}, +#else { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_3DES_CBC,.name = "3DES-CBC"}, + .type = RTE_CRYPTO_SYM_XFORM_AEAD,.aead = + RTE_CRYPTO_AEAD_AES_GCM,.name = "AES-GCM"}, +#endif { - .type = RTE_CRYPTO_SYM_XFORM_CIPHER,.cipher = - RTE_CRYPTO_CIPHER_AES_GCM,.name = "AES-GCM"}, + .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = + RTE_CRYPTO_AUTH_NULL,.name = "NULL"}, { .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = RTE_CRYPTO_AUTH_SHA1_HMAC,.name = "HMAC-SHA1"}, @@ -183,15 +191,16 @@ check_algo_is_supported (const struct rte_cryptodev_capabilities *cap, { .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = RTE_CRYPTO_AUTH_SHA512_HMAC,.name = "HMAC-SHA512"}, - { - .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = - RTE_CRYPTO_AUTH_AES_XCBC_MAC,.name = "AES-XCBC-MAC"}, +#if DPDK_NO_AEAD { .type = RTE_CRYPTO_SYM_XFORM_AUTH,.auth = RTE_CRYPTO_AUTH_AES_GCM,.name = "AES-GCM"}, +#endif { /* tail */ - .type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED},}; + .type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED} + }; + uint32_t i = 0; if (cap->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC) @@ -203,6 +212,10 @@ check_algo_is_supported (const struct rte_cryptodev_capabilities *cap, { if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_CIPHER && cap->sym.cipher.algo == supported_algo[i].cipher) || +#if ! DPDK_NO_AEAD + (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AEAD && + cap->sym.aead.algo == supported_algo[i].aead) || +#endif (cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH && cap->sym.auth.algo == supported_algo[i].auth)) { -- cgit 1.2.3-korg