/* * Copyright (c) 2017 Cisco and/or its affiliates. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include <vnet/vnet.h> #include <vppinfra/vec.h> #include <vppinfra/format.h> #include <vlib/unix/cj.h> #include <assert.h> #include <vnet/ip/ip.h> #include <vnet/ethernet/ethernet.h> #include <vnet/ethernet/arp_packet.h> #include <dpdk/buffer.h> #include <dpdk/device/dpdk.h> #include <dpdk/device/dpdk_priv.h> #include <vppinfra/error.h> void dpdk_device_error (dpdk_device_t * xd, char *str, int rv) { dpdk_log_err ("Interface %U error %d: %s", format_dpdk_device_name, xd->port_id, rv, rte_strerror (rv)); xd->errors = clib_error_return (xd->errors, "%s[port:%d, errno:%d]: %s", str, xd->port_id, rv, rte_strerror (rv)); } void dpdk_device_setup (dpdk_device_t * xd) { dpdk_main_t *dm = &dpdk_main; vlib_main_t *vm = vlib_get_main (); vnet_main_t *vnm = vnet_get_main (); vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->sw_if_index); vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index); struct rte_eth_dev_info dev_info; u64 bitmap; int rv; int j; ASSERT (vlib_get_thread_index () == 0); clib_error_free (xd->errors); sw->flags &= ~VNET_SW_INTERFACE_FLAG_ERROR; if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) { vnet_hw_interface_set_flags (dm->vnet_main, xd->hw_if_index, 0); dpdk_device_stop (xd); } /* Enable flow director when flows exist */ if (xd->pmd == VNET_DPDK_PMD_I40E) { if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) != 0) xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_PERFECT; else xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_NONE; } rte_eth_dev_info_get (xd->port_id, &dev_info); bitmap = xd->port_conf.txmode.offloads & ~dev_info.tx_offload_capa; if (bitmap) { dpdk_log_warn ("unsupported tx offloads requested on port %u: %U", xd->port_id, format_dpdk_tx_offload_caps, bitmap); xd->port_conf.txmode.offloads ^= bitmap; } bitmap = xd->port_conf.rxmode.offloads & ~dev_info.rx_offload_capa; if (bitmap) { dpdk_log_warn ("unsupported rx offloads requested on port %u: %U", xd->port_id, format_dpdk_rx_offload_caps, bitmap); xd->port_conf.rxmode.offloads ^= bitmap; } rv = rte_eth_dev_configure (xd->port_id, xd->rx_q_used, xd->tx_q_used, &xd->port_conf); if (rv < 0) { dpdk_device_error (xd, "rte_eth_dev_configure", rv); goto error; } /* Set up one TX-queue per worker thread */ for (j = 0; j < xd->tx_q_used; j++) { rv = rte_eth_tx_queue_setup (xd->port_id, j, xd->nb_tx_desc, xd->cpu_socket, &xd->tx_conf); /* retry with any other CPU socket */ if (rv < 0) rv = rte_eth_tx_queue_setup (xd->port_id, j, xd->nb_tx_desc, SOCKET_ID_ANY, &xd->tx_conf); if (rv < 0) dpdk_device_error (xd, "rte_eth_tx_queue_setup", rv); } vec_validate_aligned (xd->buffer_pool_for_queue, xd->rx_q_used - 1, CLIB_CACHE_LINE_BYTES); for (j = 0; j < xd->rx_q_used; j++) { uword tidx = vnet_get_device_input_thread_index (dm->vnet_main, xd->hw_if_index, j); unsigned lcore = vlib_worker_threads[tidx].cpu_id; u16 socket_id = rte_lcore_to_socket_id (lcore); u8 bpidx = vlib_buffer_pool_get_default_for_numa (vm, socket_id); vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, bpidx); struct rte_mempool *mp = dpdk_mempool_by_buffer_pool_index[bpidx]; rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc, xd->cpu_socket, 0, mp); /* retry with any other CPU socket */ if (rv < 0) rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc, SOCKET_ID_ANY, 0, mp); xd->buffer_pool_for_queue[j] = bp->index; if (rv < 0) dpdk_device_error (xd, "rte_eth_rx_queue_setup", rv); } if (vec_len (xd->errors)) goto error; rte_eth_dev_set_mtu (xd->port_id, hi->max_packet_bytes); if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP) dpdk_device_start (xd); if (vec_len (xd->errors)) goto error; return; error: xd->flags |= DPDK_DEVICE_FLAG_PMD_INIT_FAIL; sw->flags |= VNET_SW_INTERFACE_FLAG_ERROR; } void dpdk_device_start (dpdk_device_t * xd) { int rv; if (xd->flags & DPDK_DEVICE_FLAG_PMD_INIT_FAIL) return; rv = rte_eth_dev_start (xd->port_id); if (rv) { dpdk_device_error (xd, "rte_eth_dev_start", rv); return; } if (xd->default_mac_address) rv = rte_eth_dev_default_mac_addr_set (xd->port_id, (struct ether_addr *) xd->default_mac_address); if (rv) dpdk_device_error (xd, "rte_eth_dev_default_mac_addr_set", rv); if (xd->flags & DPDK_DEVICE_FLAG_PROMISC) rte_eth_promiscuous_enable (xd->port_id); else rte_eth_promiscuous_disable (xd->port_id); rte_eth_allmulticast_enable (xd->port_id); if (xd->pmd == VNET_DPDK_PMD_BOND) { dpdk_portid_t slink[16]; int nlink = rte_eth_bond_slaves_get (xd->port_id, slink, 16); while (nlink >= 1) { dpdk_portid_t dpdk_port = slink[--nlink]; rte_eth_allmulticast_enable (dpdk_port); } } dpdk_log_info ("Interface %U started", format_dpdk_device_name, xd->port_id); } void dpdk_device_stop (dpdk_device_t * xd) { if (xd->flags & DPDK_DEVICE_FLAG_PMD_INIT_FAIL) return; rte_eth_allmulticast_disable (xd->port_id); rte_eth_dev_stop (xd->port_id); clib_memset (&xd->link, 0, sizeof (struct rte_eth_link)); /* For bonded interface, stop slave links */ if (xd->pmd == VNET_DPDK_PMD_BOND) { dpdk_portid_t slink[16]; int nlink = rte_eth_bond_slaves_get (xd->port_id, slink, 16); while (nlink >= 1) { dpdk_portid_t dpdk_port = slink[--nlink]; rte_eth_dev_stop (dpdk_port); } } dpdk_log_info ("Interface %U stopped", format_dpdk_device_name, xd->port_id); } /* Even type for send_garp_na_process */ enum { SEND_GARP_NA = 1, } dpdk_send_garp_na_process_event_t; static vlib_node_registration_t send_garp_na_proc_node; static uword send_garp_na_process (vlib_main_t * vm, vlib_node_runtime_t * rt, vlib_frame_t * f) { uword event_type, *event_data = 0; while (1) { u32 i; uword dpdk_port; vlib_process_wait_for_event (vm); event_type = vlib_process_get_events (vm, &event_data); ASSERT (event_type == SEND_GARP_NA); for (i = 0; i < vec_len (event_data); i++) { dpdk_port = event_data[i]; if (i < 5) /* wait 0.2 sec for link to settle, max total 1 sec */ vlib_process_suspend (vm, 0.2); dpdk_device_t *xd = &dpdk_main.devices[dpdk_port]; dpdk_update_link_state (xd, vlib_time_now (vm)); send_ip4_garp (vm, xd->sw_if_index); send_ip6_na (vm, xd->sw_if_index); } vec_reset_length (event_data); } return 0; } /* *INDENT-OFF* */ VLIB_REGISTER_NODE (send_garp_na_proc_node, static) = { .function = send_garp_na_process, .type = VLIB_NODE_TYPE_PROCESS, .name = "send-garp-na-process", }; /* *INDENT-ON* */ void vl_api_force_rpc_call_main_thread (void *fp, u8 * data, u32 data_length); static void garp_na_proc_callback (uword * dpdk_port) { vlib_main_t *vm = vlib_get_main (); ASSERT (vlib_get_thread_index () == 0); vlib_process_signal_event (vm, send_garp_na_proc_node.index, SEND_GARP_NA, *dpdk_port); } always_inline int dpdk_port_state_callback_inline (dpdk_portid_t port_id, enum rte_eth_event_type type, void *param) { struct rte_eth_link link; dpdk_device_t *xd = &dpdk_main.devices[port_id]; RTE_SET_USED (param); if (type != RTE_ETH_EVENT_INTR_LSC) { dpdk_log_info ("Unknown event %d received for port %d", type, port_id); return -1; } rte_eth_link_get_nowait (port_id, &link); u8 link_up = link.link_status; if (xd->flags & DPDK_DEVICE_FLAG_BOND_SLAVE) { uword bd_port = xd->bond_port; int bd_mode = rte_eth_bond_mode_get (bd_port); dpdk_log_info ("Port %d state to %s, " "slave of port %d BondEthernet%d in mode %d", port_id, (link_up) ? "UP" : "DOWN", bd_port, xd->bond_instance_num, bd_mode); if (bd_mode == BONDING_MODE_ACTIVE_BACKUP) { vl_api_force_rpc_call_main_thread (garp_na_proc_callback, (u8 *) & bd_port, sizeof (uword)); } if (link_up) xd->flags |= DPDK_DEVICE_FLAG_BOND_SLAVE_UP; else xd->flags &= ~DPDK_DEVICE_FLAG_BOND_SLAVE_UP; } else /* Should not happen as callback not setup for "normal" links */ { if (link_up) dpdk_log_info ("Port %d Link Up - speed %u Mbps - %s", port_id, (unsigned) link.link_speed, (link.link_duplex == ETH_LINK_FULL_DUPLEX) ? "full-duplex" : "half-duplex"); else dpdk_log_info ("Port %d Link Down\n\n", port_id); } return 0; } int dpdk_port_state_callback (dpdk_portid_t port_id, enum rte_eth_event_type type, void *param, void *ret_param __attribute__ ((unused))) { return dpdk_port_state_callback_inline (port_id, type, param); } /* If this device is PCI return pointer to info, otherwise NULL */ struct rte_pci_device * dpdk_get_pci_device (const struct rte_eth_dev_info *info) { const struct rte_bus *bus; bus = rte_bus_find_by_device (info->device); if (bus && !strcmp (bus->name, "pci")) return RTE_DEV_TO_PCI (info->device); else return NULL; } /* * fd.io coding-style-patch-verification: ON * * Local Variables: * eval: (c-set-style "gnu") * End: */