aboutsummaryrefslogtreecommitdiffstats
path: root/src/plugins/dpdk/device/common.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/dpdk/device/common.c')
-rw-r--r--src/plugins/dpdk/device/common.c276
1 files changed, 215 insertions, 61 deletions
diff --git a/src/plugins/dpdk/device/common.c b/src/plugins/dpdk/device/common.c
index 89046d1a8c0..7a49c5aaef2 100644
--- a/src/plugins/dpdk/device/common.c
+++ b/src/plugins/dpdk/device/common.c
@@ -29,11 +29,26 @@
#include <dpdk/device/dpdk_priv.h>
#include <vppinfra/error.h>
+/* DPDK TX offload to vnet hw interface caps mapppings */
+static struct
+{
+ u64 offload;
+ vnet_hw_if_caps_t caps;
+} tx_off_caps_map[] = {
+ { RTE_ETH_TX_OFFLOAD_IPV4_CKSUM, VNET_HW_IF_CAP_TX_IP4_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_TCP_CKSUM, VNET_HW_IF_CAP_TX_TCP_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_UDP_CKSUM, VNET_HW_IF_CAP_TX_UDP_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM, VNET_HW_IF_CAP_TX_IP4_OUTER_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM, VNET_HW_IF_CAP_TX_UDP_OUTER_CKSUM },
+ { RTE_ETH_TX_OFFLOAD_TCP_TSO, VNET_HW_IF_CAP_TCP_GSO },
+ { RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO, VNET_HW_IF_CAP_VXLAN_TNL_GSO }
+};
+
void
dpdk_device_error (dpdk_device_t * xd, char *str, int rv)
{
- dpdk_log_err ("Interface %U error %d: %s",
- format_dpdk_device_name, xd->port_id, rv, rte_strerror (rv));
+ dpdk_log_err ("Interface %U error %d: %s", format_dpdk_device_name,
+ xd->device_index, rv, rte_strerror (rv));
xd->errors = clib_error_return (xd->errors, "%s[port:%d, errno:%d]: %s",
str, xd->port_id, rv, rte_strerror (rv));
}
@@ -41,14 +56,16 @@ dpdk_device_error (dpdk_device_t * xd, char *str, int rv)
void
dpdk_device_setup (dpdk_device_t * xd)
{
- dpdk_main_t *dm = &dpdk_main;
vlib_main_t *vm = vlib_get_main ();
vnet_main_t *vnm = vnet_get_main ();
- vlib_thread_main_t *tm = vlib_get_thread_main ();
vnet_sw_interface_t *sw = vnet_get_sw_interface (vnm, xd->sw_if_index);
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
+ u16 buf_sz = vlib_buffer_get_default_data_size (vm);
+ vnet_hw_if_caps_change_t caps = {};
struct rte_eth_dev_info dev_info;
- u64 bitmap;
+ struct rte_eth_conf conf = {};
+ u64 rxo, txo;
+ u32 max_frame_size;
int rv;
int j;
@@ -59,70 +76,152 @@ dpdk_device_setup (dpdk_device_t * xd)
if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
{
- vnet_hw_interface_set_flags (dm->vnet_main, xd->hw_if_index, 0);
+ vnet_hw_interface_set_flags (vnm, xd->hw_if_index, 0);
dpdk_device_stop (xd);
}
- /* Enable flow director when flows exist */
- if (xd->pmd == VNET_DPDK_PMD_I40E)
+ rte_eth_dev_info_get (xd->port_id, &dev_info);
+
+ dpdk_log_debug ("[%u] configuring device %U", xd->port_id,
+ format_dpdk_rte_device, dev_info.device);
+
+ /* create rx and tx offload wishlist */
+ rxo = RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
+ txo = 0;
+
+ if (xd->conf.enable_tcp_udp_checksum)
+ rxo |= RTE_ETH_RX_OFFLOAD_UDP_CKSUM | RTE_ETH_RX_OFFLOAD_TCP_CKSUM;
+
+ if (xd->conf.disable_tx_checksum_offload == 0 &&
+ xd->conf.enable_outer_checksum_offload)
+ txo |=
+ RTE_ETH_TX_OFFLOAD_OUTER_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_OUTER_UDP_CKSUM;
+
+ if (xd->conf.disable_tx_checksum_offload == 0)
+ txo |= RTE_ETH_TX_OFFLOAD_IPV4_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
+ RTE_ETH_TX_OFFLOAD_UDP_CKSUM;
+
+ if (xd->conf.disable_multi_seg == 0)
{
- if ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) != 0)
- xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_PERFECT;
- else
- xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_NONE;
+ txo |= RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
+ rxo |= RTE_ETH_RX_OFFLOAD_SCATTER;
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+ rxo |= DEV_RX_OFFLOAD_JUMBO_FRAME;
+#endif
}
- rte_eth_dev_info_get (xd->port_id, &dev_info);
-
- bitmap = xd->port_conf.txmode.offloads & ~dev_info.tx_offload_capa;
- if (bitmap)
+ if (xd->conf.enable_lro)
+ rxo |= RTE_ETH_RX_OFFLOAD_TCP_LRO;
+
+ /* per-device offload config */
+ if (xd->conf.enable_tso)
+ txo |= RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_TCP_TSO |
+ RTE_ETH_TX_OFFLOAD_VXLAN_TNL_TSO;
+
+ if (xd->conf.disable_rx_scatter)
+ rxo &= ~RTE_ETH_RX_OFFLOAD_SCATTER;
+
+ /* mask unsupported offloads */
+ rxo &= dev_info.rx_offload_capa;
+ txo &= dev_info.tx_offload_capa;
+
+ dpdk_log_debug ("[%u] Supported RX offloads: %U", xd->port_id,
+ format_dpdk_rx_offload_caps, dev_info.rx_offload_capa);
+ dpdk_log_debug ("[%u] Configured RX offloads: %U", xd->port_id,
+ format_dpdk_rx_offload_caps, rxo);
+ dpdk_log_debug ("[%u] Supported TX offloads: %U", xd->port_id,
+ format_dpdk_tx_offload_caps, dev_info.tx_offload_capa);
+ dpdk_log_debug ("[%u] Configured TX offloads: %U", xd->port_id,
+ format_dpdk_tx_offload_caps, txo);
+
+ /* finalize configuration */
+ conf.rxmode.offloads = rxo;
+ conf.txmode.offloads = txo;
+ if (rxo & RTE_ETH_RX_OFFLOAD_TCP_LRO)
+ conf.rxmode.max_lro_pkt_size = xd->conf.max_lro_pkt_size;
+
+ if (xd->conf.enable_lsc_int)
+ conf.intr_conf.lsc = 1;
+ if (xd->conf.enable_rxq_int)
+ conf.intr_conf.rxq = 1;
+
+ conf.rxmode.mq_mode = RTE_ETH_MQ_RX_NONE;
+ if (xd->conf.n_rx_queues > 1)
{
- dpdk_log_warn ("unsupported tx offloads requested on port %u: %U",
- xd->port_id, format_dpdk_tx_offload_caps, bitmap);
- xd->port_conf.txmode.offloads ^= bitmap;
+ if (xd->conf.disable_rss == 0)
+ {
+ conf.rxmode.mq_mode = RTE_ETH_MQ_RX_RSS;
+ conf.rx_adv_conf.rss_conf.rss_hf = xd->conf.rss_hf;
+ }
}
- bitmap = xd->port_conf.rxmode.offloads & ~dev_info.rx_offload_capa;
- if (bitmap)
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+ if (rxo & DEV_RX_OFFLOAD_JUMBO_FRAME)
+ {
+ conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen;
+ xd->max_supported_frame_size = dev_info.max_rx_pktlen;
+ }
+ else
{
- dpdk_log_warn ("unsupported rx offloads requested on port %u: %U",
- xd->port_id, format_dpdk_rx_offload_caps, bitmap);
- xd->port_conf.rxmode.offloads ^= bitmap;
+ xd->max_supported_frame_size =
+ clib_min (1500 + xd->driver_frame_overhead, buf_sz);
}
+#else
+ if (xd->conf.disable_multi_seg)
+ xd->max_supported_frame_size = clib_min (dev_info.max_rx_pktlen, buf_sz);
+ else
+ xd->max_supported_frame_size = dev_info.max_rx_pktlen;
+#endif
+
+ max_frame_size = clib_min (xd->max_supported_frame_size,
+ ethernet_main.default_mtu + hi->frame_overhead);
- rv = rte_eth_dev_configure (xd->port_id, xd->rx_q_used,
- xd->tx_q_used, &xd->port_conf);
+#if RTE_VERSION >= RTE_VERSION_NUM(21, 11, 0, 0)
+ conf.rxmode.mtu = max_frame_size - xd->driver_frame_overhead;
+#endif
- if (rv < 0)
+retry:
+ rv = rte_eth_dev_configure (xd->port_id, xd->conf.n_rx_queues,
+ xd->conf.n_tx_queues, &conf);
+ if (rv < 0 && conf.intr_conf.rxq)
{
- dpdk_device_error (xd, "rte_eth_dev_configure", rv);
- goto error;
+ conf.intr_conf.rxq = 0;
+ goto retry;
}
- vec_validate_aligned (xd->tx_queues, xd->tx_q_used - 1,
+#if RTE_VERSION < RTE_VERSION_NUM(21, 11, 0, 0)
+ rte_eth_dev_set_mtu (xd->port_id,
+ max_frame_size - xd->driver_frame_overhead);
+#endif
+
+ hi->max_frame_size = 0;
+ vnet_hw_interface_set_max_frame_size (vnm, xd->hw_if_index, max_frame_size);
+ dpdk_log_debug ("[%u] max_frame_size %u max max_frame_size %u "
+ "driver_frame_overhead %u",
+ xd->port_id, hi->max_frame_size,
+ xd->max_supported_frame_size, xd->driver_frame_overhead);
+
+ vec_validate_aligned (xd->tx_queues, xd->conf.n_tx_queues - 1,
CLIB_CACHE_LINE_BYTES);
- for (j = 0; j < xd->tx_q_used; j++)
+ for (j = 0; j < xd->conf.n_tx_queues; j++)
{
- rv =
- rte_eth_tx_queue_setup (xd->port_id, j, xd->nb_tx_desc,
- xd->cpu_socket, &xd->tx_conf);
+ rv = rte_eth_tx_queue_setup (xd->port_id, j, xd->conf.n_tx_desc,
+ xd->cpu_socket, 0);
/* retry with any other CPU socket */
if (rv < 0)
- rv =
- rte_eth_tx_queue_setup (xd->port_id, j,
- xd->nb_tx_desc, SOCKET_ID_ANY,
- &xd->tx_conf);
+ rv = rte_eth_tx_queue_setup (xd->port_id, j, xd->conf.n_tx_desc,
+ SOCKET_ID_ANY, 0);
if (rv < 0)
dpdk_device_error (xd, "rte_eth_tx_queue_setup", rv);
- if (xd->tx_q_used < tm->n_vlib_mains)
- clib_spinlock_init (&vec_elt (xd->tx_queues, j).lock);
+ clib_spinlock_init (&vec_elt (xd->tx_queues, j).lock);
}
- vec_validate_aligned (xd->rx_queues, xd->rx_q_used - 1,
+ vec_validate_aligned (xd->rx_queues, xd->conf.n_rx_queues - 1,
CLIB_CACHE_LINE_BYTES);
- for (j = 0; j < xd->rx_q_used; j++)
+
+ for (j = 0; j < xd->conf.n_rx_queues; j++)
{
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, j);
u8 bpidx = vlib_buffer_pool_get_default_for_numa (
@@ -130,12 +229,12 @@ dpdk_device_setup (dpdk_device_t * xd)
vlib_buffer_pool_t *bp = vlib_get_buffer_pool (vm, bpidx);
struct rte_mempool *mp = dpdk_mempool_by_buffer_pool_index[bpidx];
- rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc,
+ rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->conf.n_rx_desc,
xd->cpu_socket, 0, mp);
/* retry with any other CPU socket */
if (rv < 0)
- rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->nb_rx_desc,
+ rv = rte_eth_rx_queue_setup (xd->port_id, j, xd->conf.n_rx_desc,
SOCKET_ID_ANY, 0, mp);
rxq->buffer_pool_index = bp->index;
@@ -147,7 +246,40 @@ dpdk_device_setup (dpdk_device_t * xd)
if (vec_len (xd->errors))
goto error;
- rte_eth_dev_set_mtu (xd->port_id, hi->max_packet_bytes);
+ xd->buffer_flags =
+ (VLIB_BUFFER_TOTAL_LENGTH_VALID | VLIB_BUFFER_EXT_HDR_VALID);
+
+ if ((rxo & (RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM)) ==
+ (RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM))
+ xd->buffer_flags |=
+ (VNET_BUFFER_F_L4_CHECKSUM_COMPUTED | VNET_BUFFER_F_L4_CHECKSUM_CORRECT);
+
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_RX_IP4_CKSUM,
+ rxo & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM);
+ dpdk_device_flag_set (xd, DPDK_DEVICE_FLAG_MAYBE_MULTISEG,
+ rxo & RTE_ETH_RX_OFFLOAD_SCATTER);
+ dpdk_device_flag_set (
+ xd, DPDK_DEVICE_FLAG_TX_OFFLOAD,
+ (txo & (RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM)) ==
+ (RTE_ETH_TX_OFFLOAD_TCP_CKSUM | RTE_ETH_TX_OFFLOAD_UDP_CKSUM));
+
+ /* unconditionally set mac filtering cap */
+ caps.val = caps.mask = VNET_HW_IF_CAP_MAC_FILTER;
+
+ ethernet_set_flags (vnm, xd->hw_if_index,
+ ETHERNET_INTERFACE_FLAG_DEFAULT_L3);
+
+ for (int i = 0; i < ARRAY_LEN (tx_off_caps_map); i++)
+ {
+ __typeof__ (tx_off_caps_map[0]) *v = tx_off_caps_map + i;
+ caps.mask |= v->caps;
+ if ((v->offload & txo) == v->offload)
+ caps.val |= v->caps;
+ }
+
+ vnet_hw_if_change_caps (vnm, xd->hw_if_index, &caps);
+ xd->enabled_rx_off = rxo;
+ xd->enabled_tx_off = txo;
if (xd->flags & DPDK_DEVICE_FLAG_ADMIN_UP)
dpdk_device_start (xd);
@@ -187,17 +319,18 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
{
vnet_main_t *vnm = vnet_get_main ();
vnet_hw_interface_t *hi = vnet_get_hw_interface (vnm, xd->hw_if_index);
+ int int_mode = 0;
if (!hi)
return;
- if (!xd->port_conf.intr_conf.rxq)
+ if (!xd->conf.enable_rxq_int)
return;
/* Probe for interrupt support */
if (rte_eth_dev_rx_intr_enable (xd->port_id, 0))
{
dpdk_log_info ("probe for interrupt mode for device %U. Failed.\n",
- format_dpdk_device_name, xd->port_id);
+ format_dpdk_device_name, xd->device_index);
}
else
{
@@ -205,13 +338,13 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
if (!(xd->flags & DPDK_DEVICE_FLAG_INT_UNMASKABLE))
rte_eth_dev_rx_intr_disable (xd->port_id, 0);
dpdk_log_info ("Probe for interrupt mode for device %U. Success.\n",
- format_dpdk_device_name, xd->port_id);
+ format_dpdk_device_name, xd->device_index);
}
if (xd->flags & DPDK_DEVICE_FLAG_INT_SUPPORTED)
{
- hi->caps |= VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
- for (int q = 0; q < xd->rx_q_used; q++)
+ int_mode = 1;
+ for (int q = 0; q < xd->conf.n_rx_queues; q++)
{
dpdk_rx_queue_t *rxq = vec_elt_at_index (xd->rx_queues, q);
clib_file_t f = { 0 };
@@ -219,15 +352,15 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
if (rxq->efd < 0)
{
xd->flags &= ~DPDK_DEVICE_FLAG_INT_SUPPORTED;
- hi->caps &= ~VNET_HW_INTERFACE_CAP_SUPPORTS_INT_MODE;
+ int_mode = 0;
break;
}
f.read_function = dpdk_rx_read_ready;
f.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED;
f.file_descriptor = rxq->efd;
f.private_data = rxq->queue_index;
- f.description =
- format (0, "%U queue %u", format_dpdk_device_name, xd->port_id, q);
+ f.description = format (0, "%U queue %u", format_dpdk_device_name,
+ xd->device_index, q);
rxq->clib_file_index = clib_file_add (&file_main, &f);
vnet_hw_if_set_rx_queue_file_index (vnm, rxq->queue_index,
rxq->clib_file_index);
@@ -240,6 +373,11 @@ dpdk_setup_interrupts (dpdk_device_t *xd)
}
}
}
+
+ if (int_mode)
+ vnet_hw_if_set_caps (vnm, hi->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
+ else
+ vnet_hw_if_unset_caps (vnm, hi->hw_if_index, VNET_HW_IF_CAP_INT_MODE);
vnet_hw_if_update_runtime_data (vnm, xd->hw_if_index);
}
@@ -259,6 +397,11 @@ dpdk_device_start (dpdk_device_t * xd)
return;
}
+ dpdk_log_debug ("[%u] RX burst function: %U", xd->port_id,
+ format_dpdk_burst_fn, xd, VLIB_RX);
+ dpdk_log_debug ("[%u] TX burst function: %U", xd->port_id,
+ format_dpdk_burst_fn, xd, VLIB_TX);
+
dpdk_setup_interrupts (xd);
if (xd->default_mac_address)
@@ -275,8 +418,8 @@ dpdk_device_start (dpdk_device_t * xd)
rte_eth_allmulticast_enable (xd->port_id);
- dpdk_log_info ("Interface %U started",
- format_dpdk_device_name, xd->port_id);
+ dpdk_log_info ("Interface %U started", format_dpdk_device_name,
+ xd->device_index);
}
void
@@ -289,8 +432,8 @@ dpdk_device_stop (dpdk_device_t * xd)
rte_eth_dev_stop (xd->port_id);
clib_memset (&xd->link, 0, sizeof (struct rte_eth_link));
- dpdk_log_info ("Interface %U stopped",
- format_dpdk_device_name, xd->port_id);
+ dpdk_log_info ("Interface %U stopped", format_dpdk_device_name,
+ xd->device_index);
}
void vl_api_force_rpc_call_main_thread (void *fp, u8 * data, u32 data_length);
@@ -311,10 +454,11 @@ dpdk_port_state_callback_inline (dpdk_portid_t port_id,
rte_eth_link_get_nowait (port_id, &link);
u8 link_up = link.link_status;
if (link_up)
- dpdk_log_info ("Port %d Link Up - speed %u Mbps - %s",
- port_id, (unsigned) link.link_speed,
- (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
- "full-duplex" : "half-duplex");
+ dpdk_log_info ("Port %d Link Up - speed %u Mbps - %s", port_id,
+ (unsigned) link.link_speed,
+ (link.link_duplex == RTE_ETH_LINK_FULL_DUPLEX) ?
+ "full-duplex" :
+ "half-duplex");
else
dpdk_log_info ("Port %d Link Down\n\n", port_id);
@@ -337,12 +481,17 @@ dpdk_get_pci_device (const struct rte_eth_dev_info *info)
const struct rte_bus *bus;
bus = rte_bus_find_by_device (info->device);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (bus && !strcmp (rte_bus_name (bus), "pci"))
+#else
if (bus && !strcmp (bus->name, "pci"))
+#endif
return RTE_DEV_TO_PCI (info->device);
else
return NULL;
}
+#ifdef __linux__
/* If this device is VMBUS return pointer to info, otherwise NULL */
struct rte_vmbus_device *
dpdk_get_vmbus_device (const struct rte_eth_dev_info *info)
@@ -350,11 +499,16 @@ dpdk_get_vmbus_device (const struct rte_eth_dev_info *info)
const struct rte_bus *bus;
bus = rte_bus_find_by_device (info->device);
+#if RTE_VERSION >= RTE_VERSION_NUM(22, 11, 0, 0)
+ if (bus && !strcmp (rte_bus_name (bus), "vmbus"))
+#else
if (bus && !strcmp (bus->name, "vmbus"))
+#endif
return container_of (info->device, struct rte_vmbus_device, device);
else
return NULL;
}
+#endif /* __linux__ */
/*
* fd.io coding-style-patch-verification: ON