aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/mlx5
diff options
context:
space:
mode:
authorLuca Boccassi <luca.boccassi@gmail.com>2017-11-08 14:15:11 +0000
committerLuca Boccassi <luca.boccassi@gmail.com>2017-11-08 14:45:54 +0000
commit055c52583a2794da8ba1e85a48cce3832372b12f (patch)
tree8ceb1cb78fbb46a0f341f8ee24feb3c6b5540013 /drivers/net/mlx5
parentf239aed5e674965691846e8ce3f187dd47523689 (diff)
New upstream version 17.11-rc3
Change-Id: I6a5baa40612fe0c20f30b5fa773a6cbbac63a685 Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'drivers/net/mlx5')
-rw-r--r--drivers/net/mlx5/Makefile42
-rw-r--r--drivers/net/mlx5/mlx5.c412
-rw-r--r--drivers/net/mlx5/mlx5.h138
-rw-r--r--drivers/net/mlx5/mlx5_defs.h6
-rw-r--r--drivers/net/mlx5/mlx5_ethdev.c441
-rw-r--r--drivers/net/mlx5/mlx5_fdir.c1101
-rw-r--r--drivers/net/mlx5/mlx5_flow.c2811
-rw-r--r--drivers/net/mlx5/mlx5_mac.c410
-rw-r--r--drivers/net/mlx5/mlx5_mr.c283
-rw-r--r--drivers/net/mlx5/mlx5_prm.h48
-rw-r--r--drivers/net/mlx5/mlx5_rss.c139
-rw-r--r--drivers/net/mlx5/mlx5_rxmode.c383
-rw-r--r--drivers/net/mlx5/mlx5_rxq.c2133
-rw-r--r--drivers/net/mlx5/mlx5_rxtx.c267
-rw-r--r--drivers/net/mlx5/mlx5_rxtx.h366
-rw-r--r--drivers/net/mlx5/mlx5_rxtx_vec.c388
-rw-r--r--drivers/net/mlx5/mlx5_rxtx_vec.h130
-rw-r--r--drivers/net/mlx5/mlx5_rxtx_vec_neon.h1039
-rw-r--r--drivers/net/mlx5/mlx5_rxtx_vec_sse.h (renamed from drivers/net/mlx5/mlx5_rxtx_vec_sse.c)512
-rw-r--r--drivers/net/mlx5/mlx5_socket.c294
-rw-r--r--drivers/net/mlx5/mlx5_stats.c25
-rw-r--r--drivers/net/mlx5/mlx5_trigger.c350
-rw-r--r--drivers/net/mlx5/mlx5_txq.c885
-rw-r--r--drivers/net/mlx5/mlx5_utils.h2
-rw-r--r--drivers/net/mlx5/mlx5_vlan.c84
25 files changed, 7167 insertions, 5522 deletions
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 8736de5d..a3984eb9 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -39,8 +39,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxq.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_txq.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx.c
-ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
-SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx_vec_sse.c
+ifneq ($(filter y,$(CONFIG_RTE_ARCH_X86_64) \
+ $(CONFIG_RTE_ARCH_ARM64)),)
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx_vec.c
endif
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_trigger.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_ethdev.c
@@ -49,9 +50,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxmode.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_vlan.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_stats.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
-SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_fdir.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
# Basic CFLAGS.
CFLAGS += -O3
@@ -63,7 +64,10 @@ CFLAGS += -D_DEFAULT_SOURCE
CFLAGS += -D_XOPEN_SOURCE=600
CFLAGS += $(WERROR_FLAGS)
CFLAGS += -Wno-strict-prototypes
-LDLIBS += -libverbs
+LDLIBS += -libverbs -lmlx5
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_bus_pci
# A few warnings cannot be avoided in external headers.
CFLAGS += -Wno-error=cast-qual
@@ -104,24 +108,24 @@ mlx5_autoconf.h.new: FORCE
mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
$Q $(RM) -f -- '$@'
$Q sh -- '$<' '$@' \
- HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP \
- infiniband/verbs_exp.h \
- enum IBV_EXP_FLOW_SPEC_ACTION_DROP \
+ HAVE_IBV_DEVICE_VXLAN_SUPPORT \
+ infiniband/verbs.h \
+ enum IBV_DEVICE_VXLAN_SUPPORT \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
- HAVE_VERBS_IBV_EXP_CQ_COMPRESSED_CQE \
- infiniband/verbs_exp.h \
- enum IBV_EXP_CQ_COMPRESSED_CQE \
+ HAVE_IBV_WQ_FLAG_RX_END_PADDING \
+ infiniband/verbs.h \
+ enum IBV_WQ_FLAG_RX_END_PADDING \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
- HAVE_VERBS_MLX5_ETH_VLAN_INLINE_HEADER_SIZE \
- infiniband/mlx5_hw.h \
- enum MLX5_ETH_VLAN_INLINE_HEADER_SIZE \
+ HAVE_IBV_MLX5_MOD_MPW \
+ infiniband/mlx5dv.h \
+ enum MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
- HAVE_VERBS_MLX5_OPCODE_TSO \
- infiniband/mlx5_hw.h \
- enum MLX5_OPCODE_TSO \
+ HAVE_IBV_MLX5_MOD_CQE_128B_COMP \
+ infiniband/mlx5dv.h \
+ enum MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
HAVE_ETHTOOL_LINK_MODE_25G \
@@ -139,9 +143,9 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
enum ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
- HAVE_UPDATE_CQ_CI \
- infiniband/mlx5_hw.h \
- func ibv_mlx5_exp_update_cq_ci \
+ HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT \
+ infiniband/verbs.h \
+ enum IBV_FLOW_SPEC_ACTION_COUNT \
$(AUTOCONF_OUTPUT)
# Create mlx5_autoconf.h or update it in case it differs from the new one.
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index b7e50463..0548d17a 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -50,19 +50,13 @@
#pragma GCC diagnostic error "-Wpedantic"
#endif
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_malloc.h>
#include <rte_ethdev.h>
#include <rte_ethdev_pci.h>
#include <rte_pci.h>
+#include <rte_bus_pci.h>
#include <rte_common.h>
#include <rte_kvargs.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
#include "mlx5.h"
#include "mlx5_utils.h"
@@ -103,6 +97,15 @@
/* Default PMD specific parameter value. */
#define MLX5_ARG_UNSET (-1)
+#ifndef HAVE_IBV_MLX5_MOD_MPW
+#define MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED (1 << 2)
+#define MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW (1 << 3)
+#endif
+
+#ifndef HAVE_IBV_MLX5_MOD_CQE_128B_COMP
+#define MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP (1 << 4)
+#endif
+
struct mlx5_args {
int cqe_comp;
int txq_inline;
@@ -134,6 +137,52 @@ mlx5_getenv_int(const char *name)
}
/**
+ * Verbs callback to allocate a memory. This function should allocate the space
+ * according to the size provided residing inside a huge page.
+ * Please note that all allocation must respect the alignment from libmlx5
+ * (i.e. currently sysconf(_SC_PAGESIZE)).
+ *
+ * @param[in] size
+ * The size in bytes of the memory to allocate.
+ * @param[in] data
+ * A pointer to the callback data.
+ *
+ * @return
+ * a pointer to the allocate space.
+ */
+static void *
+mlx5_alloc_verbs_buf(size_t size, void *data)
+{
+ struct priv *priv = data;
+ void *ret;
+ size_t alignment = sysconf(_SC_PAGESIZE);
+
+ assert(data != NULL);
+ assert(!mlx5_is_secondary());
+ ret = rte_malloc_socket(__func__, size, alignment,
+ priv->dev->device->numa_node);
+ DEBUG("Extern alloc size: %lu, align: %lu: %p", size, alignment, ret);
+ return ret;
+}
+
+/**
+ * Verbs callback to free a memory.
+ *
+ * @param[in] ptr
+ * A pointer to the memory to free.
+ * @param[in] data
+ * A pointer to the callback data.
+ */
+static void
+mlx5_free_verbs_buf(void *ptr, void *data __rte_unused)
+{
+ assert(data != NULL);
+ assert(!mlx5_is_secondary());
+ DEBUG("Extern free request: %p", ptr);
+ rte_free(ptr);
+}
+
+/**
* DPDK callback to close the device.
*
* Destroy all queues and objects, free memory.
@@ -146,6 +195,7 @@ mlx5_dev_close(struct rte_eth_dev *dev)
{
struct priv *priv = mlx5_get_priv(dev);
unsigned int i;
+ int ret;
priv_lock(priv);
DEBUG("%p: closing device \"%s\"",
@@ -153,48 +203,23 @@ mlx5_dev_close(struct rte_eth_dev *dev)
((priv->ctx != NULL) ? priv->ctx->device->name : ""));
/* In case mlx5_dev_stop() has not been called. */
priv_dev_interrupt_handler_uninstall(priv, dev);
- priv_special_flow_disable_all(priv);
- priv_mac_addrs_disable(priv);
- priv_destroy_hash_rxqs(priv);
-
- /* Remove flow director elements. */
- priv_fdir_disable(priv);
- priv_fdir_delete_filters_list(priv);
-
+ priv_dev_traffic_disable(priv, dev);
/* Prevent crashes when queues are still in use. */
dev->rx_pkt_burst = removed_rx_burst;
dev->tx_pkt_burst = removed_tx_burst;
if (priv->rxqs != NULL) {
/* XXX race condition if mlx5_rx_burst() is still running. */
usleep(1000);
- for (i = 0; (i != priv->rxqs_n); ++i) {
- struct rxq *rxq = (*priv->rxqs)[i];
- struct rxq_ctrl *rxq_ctrl;
-
- if (rxq == NULL)
- continue;
- rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
- (*priv->rxqs)[i] = NULL;
- rxq_cleanup(rxq_ctrl);
- rte_free(rxq_ctrl);
- }
+ for (i = 0; (i != priv->rxqs_n); ++i)
+ mlx5_priv_rxq_release(priv, i);
priv->rxqs_n = 0;
priv->rxqs = NULL;
}
if (priv->txqs != NULL) {
/* XXX race condition if mlx5_tx_burst() is still running. */
usleep(1000);
- for (i = 0; (i != priv->txqs_n); ++i) {
- struct txq *txq = (*priv->txqs)[i];
- struct txq_ctrl *txq_ctrl;
-
- if (txq == NULL)
- continue;
- txq_ctrl = container_of(txq, struct txq_ctrl, txq);
- (*priv->txqs)[i] = NULL;
- txq_cleanup(txq_ctrl);
- rte_free(txq_ctrl);
- }
+ for (i = 0; (i != priv->txqs_n); ++i)
+ mlx5_priv_txq_release(priv, i);
priv->txqs_n = 0;
priv->txqs = NULL;
}
@@ -204,18 +229,40 @@ mlx5_dev_close(struct rte_eth_dev *dev)
claim_zero(ibv_close_device(priv->ctx));
} else
assert(priv->ctx == NULL);
- if (priv->rss_conf != NULL) {
- for (i = 0; (i != hash_rxq_init_n); ++i)
- rte_free((*priv->rss_conf)[i]);
- rte_free(priv->rss_conf);
- }
+ if (priv->rss_conf.rss_key != NULL)
+ rte_free(priv->rss_conf.rss_key);
if (priv->reta_idx != NULL)
rte_free(priv->reta_idx);
+ priv_socket_uninit(priv);
+ ret = mlx5_priv_hrxq_ibv_verify(priv);
+ if (ret)
+ WARN("%p: some Hash Rx queue still remain", (void *)priv);
+ ret = mlx5_priv_ind_table_ibv_verify(priv);
+ if (ret)
+ WARN("%p: some Indirection table still remain", (void *)priv);
+ ret = mlx5_priv_rxq_ibv_verify(priv);
+ if (ret)
+ WARN("%p: some Verbs Rx queue still remain", (void *)priv);
+ ret = mlx5_priv_rxq_verify(priv);
+ if (ret)
+ WARN("%p: some Rx Queues still remain", (void *)priv);
+ ret = mlx5_priv_txq_ibv_verify(priv);
+ if (ret)
+ WARN("%p: some Verbs Tx queue still remain", (void *)priv);
+ ret = mlx5_priv_txq_verify(priv);
+ if (ret)
+ WARN("%p: some Tx Queues still remain", (void *)priv);
+ ret = priv_flow_verify(priv);
+ if (ret)
+ WARN("%p: some flows still remain", (void *)priv);
+ ret = priv_mr_verify(priv);
+ if (ret)
+ WARN("%p: some Memory Region still remain", (void *)priv);
priv_unlock(priv);
memset(priv, 0, sizeof(*priv));
}
-static const struct eth_dev_ops mlx5_dev_ops = {
+const struct eth_dev_ops mlx5_dev_ops = {
.dev_configure = mlx5_dev_configure,
.dev_start = mlx5_dev_start,
.dev_stop = mlx5_dev_stop,
@@ -254,10 +301,55 @@ static const struct eth_dev_ops mlx5_dev_ops = {
.filter_ctrl = mlx5_dev_filter_ctrl,
.rx_descriptor_status = mlx5_rx_descriptor_status,
.tx_descriptor_status = mlx5_tx_descriptor_status,
-#ifdef HAVE_UPDATE_CQ_CI
.rx_queue_intr_enable = mlx5_rx_intr_enable,
.rx_queue_intr_disable = mlx5_rx_intr_disable,
-#endif
+};
+
+static const struct eth_dev_ops mlx5_dev_sec_ops = {
+ .stats_get = mlx5_stats_get,
+ .stats_reset = mlx5_stats_reset,
+ .xstats_get = mlx5_xstats_get,
+ .xstats_reset = mlx5_xstats_reset,
+ .xstats_get_names = mlx5_xstats_get_names,
+ .dev_infos_get = mlx5_dev_infos_get,
+ .rx_descriptor_status = mlx5_rx_descriptor_status,
+ .tx_descriptor_status = mlx5_tx_descriptor_status,
+};
+
+/* Available operators in flow isolated mode. */
+const struct eth_dev_ops mlx5_dev_ops_isolate = {
+ .dev_configure = mlx5_dev_configure,
+ .dev_start = mlx5_dev_start,
+ .dev_stop = mlx5_dev_stop,
+ .dev_set_link_down = mlx5_set_link_down,
+ .dev_set_link_up = mlx5_set_link_up,
+ .dev_close = mlx5_dev_close,
+ .link_update = mlx5_link_update,
+ .stats_get = mlx5_stats_get,
+ .stats_reset = mlx5_stats_reset,
+ .xstats_get = mlx5_xstats_get,
+ .xstats_reset = mlx5_xstats_reset,
+ .xstats_get_names = mlx5_xstats_get_names,
+ .dev_infos_get = mlx5_dev_infos_get,
+ .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
+ .vlan_filter_set = mlx5_vlan_filter_set,
+ .rx_queue_setup = mlx5_rx_queue_setup,
+ .tx_queue_setup = mlx5_tx_queue_setup,
+ .rx_queue_release = mlx5_rx_queue_release,
+ .tx_queue_release = mlx5_tx_queue_release,
+ .flow_ctrl_get = mlx5_dev_get_flow_ctrl,
+ .flow_ctrl_set = mlx5_dev_set_flow_ctrl,
+ .mac_addr_remove = mlx5_mac_addr_remove,
+ .mac_addr_add = mlx5_mac_addr_add,
+ .mac_addr_set = mlx5_mac_addr_set,
+ .mtu_set = mlx5_dev_set_mtu,
+ .vlan_strip_queue_set = mlx5_vlan_strip_queue_set,
+ .vlan_offload_set = mlx5_vlan_offload_set,
+ .filter_ctrl = mlx5_dev_filter_ctrl,
+ .rx_descriptor_status = mlx5_rx_descriptor_status,
+ .tx_descriptor_status = mlx5_tx_descriptor_status,
+ .rx_queue_intr_enable = mlx5_rx_intr_enable,
+ .rx_queue_intr_disable = mlx5_rx_intr_disable,
};
static struct {
@@ -449,12 +541,17 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
struct ibv_device *ibv_dev;
int err = 0;
struct ibv_context *attr_ctx = NULL;
- struct ibv_device_attr device_attr;
+ struct ibv_device_attr_ex device_attr;
unsigned int sriov;
unsigned int mps;
- unsigned int tunnel_en;
+ unsigned int cqe_comp;
+ unsigned int tunnel_en = 0;
int idx;
int i;
+ struct mlx5dv_context attrs_out;
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+ struct ibv_counter_set_description cs_desc;
+#endif
(void)pci_drv;
assert(pci_drv == &mlx5_driver);
@@ -500,34 +597,24 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) ||
(pci_dev->id.device_id ==
PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF));
- /*
- * Multi-packet send is supported by ConnectX-4 Lx PF as well
- * as all ConnectX-5 devices.
- */
switch (pci_dev->id.device_id) {
case PCI_DEVICE_ID_MELLANOX_CONNECTX4:
tunnel_en = 1;
- mps = MLX5_MPW_DISABLED;
break;
case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX:
- mps = MLX5_MPW;
- break;
case PCI_DEVICE_ID_MELLANOX_CONNECTX5:
case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:
case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX:
case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF:
tunnel_en = 1;
- mps = MLX5_MPW_ENHANCED;
break;
default:
- mps = MLX5_MPW_DISABLED;
+ break;
}
INFO("PCI information matches, using device \"%s\""
- " (SR-IOV: %s, %sMPS: %s)",
+ " (SR-IOV: %s)",
list[i]->name,
- sriov ? "true" : "false",
- mps == MLX5_MPW_ENHANCED ? "Enhanced " : "",
- mps != MLX5_MPW_DISABLED ? "true" : "false");
+ sriov ? "true" : "false");
attr_ctx = ibv_open_device(list[i]);
err = errno;
break;
@@ -548,11 +635,33 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
ibv_dev = list[i];
DEBUG("device opened");
- if (ibv_query_device(attr_ctx, &device_attr))
+ /*
+ * Multi-packet send is supported by ConnectX-4 Lx PF as well
+ * as all ConnectX-5 devices.
+ */
+ mlx5dv_query_device(attr_ctx, &attrs_out);
+ if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) {
+ if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) {
+ DEBUG("Enhanced MPW is supported");
+ mps = MLX5_MPW_ENHANCED;
+ } else {
+ DEBUG("MPW is supported");
+ mps = MLX5_MPW;
+ }
+ } else {
+ DEBUG("MPW isn't supported");
+ mps = MLX5_MPW_DISABLED;
+ }
+ if (RTE_CACHE_LINE_SIZE == 128 &&
+ !(attrs_out.flags & MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP))
+ cqe_comp = 0;
+ else
+ cqe_comp = 1;
+ if (ibv_query_device_ex(attr_ctx, NULL, &device_attr))
goto error;
- INFO("%u port(s) detected", device_attr.phys_port_cnt);
+ INFO("%u port(s) detected", device_attr.orig_attr.phys_port_cnt);
- for (i = 0; i < device_attr.phys_port_cnt; i++) {
+ for (i = 0; i < device_attr.orig_attr.phys_port_cnt; i++) {
uint32_t port = i + 1; /* ports are indexed from one */
uint32_t test = (1 << i);
struct ibv_context *ctx = NULL;
@@ -560,9 +669,10 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
struct ibv_pd *pd = NULL;
struct priv *priv = NULL;
struct rte_eth_dev *eth_dev;
- struct ibv_exp_device_attr exp_device_attr;
+ struct ibv_device_attr_ex device_attr_ex;
struct ether_addr mac;
uint16_t num_vfs = 0;
+ struct ibv_device_attr_ex device_attr;
struct mlx5_args args = {
.cqe_comp = MLX5_ARG_UNSET,
.txq_inline = MLX5_ARG_UNSET,
@@ -575,20 +685,49 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
.rx_vec_en = MLX5_ARG_UNSET,
};
- exp_device_attr.comp_mask =
- IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS |
- IBV_EXP_DEVICE_ATTR_RX_HASH |
- IBV_EXP_DEVICE_ATTR_VLAN_OFFLOADS |
- IBV_EXP_DEVICE_ATTR_RX_PAD_END_ALIGN |
- IBV_EXP_DEVICE_ATTR_TSO_CAPS |
- 0;
+ mlx5_dev[idx].ports |= test;
+
+ if (mlx5_is_secondary()) {
+ /* from rte_ethdev.c */
+ char name[RTE_ETH_NAME_MAX_LEN];
+
+ snprintf(name, sizeof(name), "%s port %u",
+ ibv_get_device_name(ibv_dev), port);
+ eth_dev = rte_eth_dev_attach_secondary(name);
+ if (eth_dev == NULL) {
+ ERROR("can not attach rte ethdev");
+ err = ENOMEM;
+ goto error;
+ }
+ eth_dev->device = &pci_dev->device;
+ eth_dev->dev_ops = &mlx5_dev_sec_ops;
+ priv = eth_dev->data->dev_private;
+ /* Receive command fd from primary process */
+ err = priv_socket_connect(priv);
+ if (err < 0) {
+ err = -err;
+ goto error;
+ }
+ /* Remap UAR for Tx queues. */
+ err = priv_tx_uar_remap(priv, err);
+ if (err < 0) {
+ err = -err;
+ goto error;
+ }
+ priv_dev_select_rx_function(priv, eth_dev);
+ priv_dev_select_tx_function(priv, eth_dev);
+ continue;
+ }
DEBUG("using port %u (%08" PRIx32 ")", port, test);
ctx = ibv_open_device(ibv_dev);
- if (ctx == NULL)
+ if (ctx == NULL) {
+ err = ENODEV;
goto port_error;
+ }
+ ibv_query_device_ex(ctx, NULL, &device_attr);
/* Check port status. */
err = ibv_query_port(ctx, port, &port_attr);
if (err) {
@@ -599,6 +738,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) {
ERROR("port %d is not configured in Ethernet mode",
port);
+ err = EINVAL;
goto port_error;
}
@@ -628,12 +768,14 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
}
priv->ctx = ctx;
+ strncpy(priv->ibdev_path, priv->ctx->device->ibdev_path,
+ sizeof(priv->ibdev_path));
priv->device_attr = device_attr;
priv->port = port;
priv->pd = pd;
priv->mtu = ETHER_MTU;
priv->mps = mps; /* Enable MPW by default if supported. */
- priv->cqe_comp = 1; /* Enable compression by default. */
+ priv->cqe_comp = cqe_comp;
priv->tunnel_en = tunnel_en;
/* Enable vector by default if supported. */
priv->tx_vec_en = 1;
@@ -645,25 +787,33 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
goto port_error;
}
mlx5_args_assign(priv, &args);
- if (ibv_exp_query_device(ctx, &exp_device_attr)) {
- ERROR("ibv_exp_query_device() failed");
+ if (ibv_query_device_ex(ctx, NULL, &device_attr_ex)) {
+ ERROR("ibv_query_device_ex() failed");
goto port_error;
}
priv->hw_csum =
- ((exp_device_attr.exp_device_cap_flags &
- IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) &&
- (exp_device_attr.exp_device_cap_flags &
- IBV_EXP_DEVICE_RX_CSUM_IP_PKT));
+ !!(device_attr_ex.device_cap_flags_ex &
+ IBV_DEVICE_RAW_IP_CSUM);
DEBUG("checksum offloading is %ssupported",
(priv->hw_csum ? "" : "not "));
+#ifdef HAVE_IBV_DEVICE_VXLAN_SUPPORT
priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags &
- IBV_EXP_DEVICE_VXLAN_SUPPORT);
+ IBV_DEVICE_VXLAN_SUPPORT);
+#endif
DEBUG("L2 tunnel checksum offloads are %ssupported",
(priv->hw_csum_l2tun ? "" : "not "));
- priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size;
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+ priv->counter_set_supported = !!(device_attr.max_counter_sets);
+ ibv_describe_counter_set(ctx, 0, &cs_desc);
+ DEBUG("counter type = %d, num of cs = %ld, attributes = %d",
+ cs_desc.counter_type, cs_desc.num_of_cs,
+ cs_desc.attributes);
+#endif
+ priv->ind_table_max_size =
+ device_attr_ex.rss_caps.max_rwq_indirection_table_size;
/* Remove this check once DPDK supports larger/variable
* indirection tables. */
if (priv->ind_table_max_size >
@@ -671,29 +821,32 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
priv->ind_table_max_size = ETH_RSS_RETA_SIZE_512;
DEBUG("maximum RX indirection table size is %u",
priv->ind_table_max_size);
- priv->hw_vlan_strip = !!(exp_device_attr.wq_vlan_offloads_cap &
- IBV_EXP_RECEIVE_WQ_CVLAN_STRIP);
+ priv->hw_vlan_strip = !!(device_attr_ex.raw_packet_caps &
+ IBV_RAW_PACKET_CAP_CVLAN_STRIPPING);
DEBUG("VLAN stripping is %ssupported",
(priv->hw_vlan_strip ? "" : "not "));
- priv->hw_fcs_strip = !!(exp_device_attr.exp_device_cap_flags &
- IBV_EXP_DEVICE_SCATTER_FCS);
+ priv->hw_fcs_strip =
+ !!(device_attr_ex.orig_attr.device_cap_flags &
+ IBV_WQ_FLAGS_SCATTER_FCS);
DEBUG("FCS stripping configuration is %ssupported",
(priv->hw_fcs_strip ? "" : "not "));
- priv->hw_padding = !!exp_device_attr.rx_pad_end_addr_align;
+#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
+ priv->hw_padding = !!device_attr_ex.rx_pad_end_addr_align;
+#endif
DEBUG("hardware RX end alignment padding is %ssupported",
(priv->hw_padding ? "" : "not "));
priv_get_num_vfs(priv, &num_vfs);
priv->sriov = (num_vfs || sriov);
priv->tso = ((priv->tso) &&
- (exp_device_attr.tso_caps.max_tso > 0) &&
- (exp_device_attr.tso_caps.supported_qpts &
- (1 << IBV_QPT_RAW_ETH)));
+ (device_attr_ex.tso_caps.max_tso > 0) &&
+ (device_attr_ex.tso_caps.supported_qpts &
+ (1 << IBV_QPT_RAW_PACKET)));
if (priv->tso)
priv->max_tso_payload_sz =
- exp_device_attr.tso_caps.max_tso;
+ device_attr_ex.tso_caps.max_tso;
if (priv->mps && !mps) {
ERROR("multi-packet send not supported on this device"
" (" MLX5_TXQ_MPW_EN ")");
@@ -718,23 +871,15 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
priv->txq_inline = MLX5_WQE_SIZE_MAX -
MLX5_WQE_SIZE;
}
- /* Allocate and register default RSS hash keys. */
- priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n,
- sizeof((*priv->rss_conf)[0]), 0);
- if (priv->rss_conf == NULL) {
- err = ENOMEM;
- goto port_error;
+ if (priv->cqe_comp && !cqe_comp) {
+ WARN("Rx CQE compression isn't supported");
+ priv->cqe_comp = 0;
}
- err = rss_hash_rss_conf_new_key(priv,
- rss_hash_default_key,
- rss_hash_default_key_len,
- ETH_RSS_PROTO_MASK);
- if (err)
- goto port_error;
/* Configure the first MAC address by default. */
if (priv_get_mac(priv, &mac.addr_bytes)) {
ERROR("cannot get MAC address, is mlx5_en loaded?"
" (errno: %s)", strerror(errno));
+ err = ENODEV;
goto port_error;
}
INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x",
@@ -742,14 +887,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
mac.addr_bytes[0], mac.addr_bytes[1],
mac.addr_bytes[2], mac.addr_bytes[3],
mac.addr_bytes[4], mac.addr_bytes[5]);
- /* Register MAC address. */
- claim_zero(priv_mac_addr_add(priv, 0,
- (const uint8_t (*)[ETHER_ADDR_LEN])
- mac.addr_bytes));
- /* Initialize FD filters list. */
- err = fdir_init_filters_list(priv);
- if (err)
- goto port_error;
#ifndef NDEBUG
{
char ifname[IF_NAMESIZE];
@@ -778,44 +915,26 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
err = ENOMEM;
goto port_error;
}
-
- /* Secondary processes have to use local storage for their
- * private data as well as a copy of eth_dev->data, but this
- * pointer must not be modified before burst functions are
- * actually called. */
- if (mlx5_is_secondary()) {
- struct mlx5_secondary_data *sd =
- &mlx5_secondary_data[eth_dev->data->port_id];
- sd->primary_priv = eth_dev->data->dev_private;
- if (sd->primary_priv == NULL) {
- ERROR("no private data for port %u",
- eth_dev->data->port_id);
- err = EINVAL;
- goto port_error;
- }
- sd->shared_dev_data = eth_dev->data;
- rte_spinlock_init(&sd->lock);
- memcpy(sd->data.name, sd->shared_dev_data->name,
- sizeof(sd->data.name));
- sd->data.dev_private = priv;
- sd->data.rx_mbuf_alloc_failed = 0;
- sd->data.mtu = ETHER_MTU;
- sd->data.port_id = sd->shared_dev_data->port_id;
- sd->data.mac_addrs = priv->mac;
- eth_dev->tx_pkt_burst = mlx5_tx_burst_secondary_setup;
- eth_dev->rx_pkt_burst = mlx5_rx_burst_secondary_setup;
- } else {
- eth_dev->data->dev_private = priv;
- eth_dev->data->mac_addrs = priv->mac;
- }
-
+ eth_dev->data->dev_private = priv;
+ eth_dev->data->mac_addrs = priv->mac;
eth_dev->device = &pci_dev->device;
rte_eth_copy_pci_info(eth_dev, pci_dev);
- eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
eth_dev->device->driver = &mlx5_driver.driver;
priv->dev = eth_dev;
eth_dev->dev_ops = &mlx5_dev_ops;
+ /* Register MAC address. */
+ claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
TAILQ_INIT(&priv->flows);
+ TAILQ_INIT(&priv->ctrl_flows);
+
+ /* Hint libmlx5 to use PMD allocator for data plane resources */
+ struct mlx5dv_ctx_allocators alctr = {
+ .alloc = &mlx5_alloc_verbs_buf,
+ .free = &mlx5_free_verbs_buf,
+ .data = priv,
+ };
+ mlx5dv_set_context_attr(ctx, MLX5DV_CTX_ATTR_BUF_ALLOCATORS,
+ (void *)((uintptr_t)&alctr));
/* Bring Ethernet device up. */
DEBUG("forcing Ethernet interface up");
@@ -824,10 +943,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
continue;
port_error:
- if (priv) {
- rte_free(priv->rss_conf);
+ if (priv)
rte_free(priv);
- }
if (pd)
claim_zero(ibv_dealloc_pd(pd));
if (ctx)
@@ -901,7 +1018,7 @@ static struct rte_pci_driver mlx5_driver = {
},
.id_table = mlx5_pci_id_map,
.probe = mlx5_pci_probe,
- .drv_flags = RTE_PCI_DRV_INTR_LSC,
+ .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV,
};
/**
@@ -920,6 +1037,9 @@ rte_mlx5_pmd_init(void)
* using this PMD, which is not supported in forked processes.
*/
setenv("RDMAV_HUGEPAGES_SAFE", "1", 1);
+ /* Match the size of Rx completion entry to the size of a cacheline. */
+ if (RTE_CACHE_LINE_SIZE == 128)
+ setenv("MLX5_CQE_SIZE", "128", 0);
ibv_fork_init();
rte_pci_register(&mlx5_driver);
}
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index 43c53841..e6a69b82 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -39,6 +39,7 @@
#include <limits.h>
#include <net/if.h>
#include <netinet/in.h>
+#include <sys/queue.h>
/* Verbs header. */
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -50,10 +51,6 @@
#pragma GCC diagnostic error "-Wpedantic"
#endif
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_pci.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
@@ -61,20 +58,12 @@
#include <rte_interrupts.h>
#include <rte_errno.h>
#include <rte_flow.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
#include "mlx5_utils.h"
#include "mlx5_rxtx.h"
#include "mlx5_autoconf.h"
#include "mlx5_defs.h"
-#if !defined(HAVE_VERBS_IBV_EXP_CQ_COMPRESSED_CQE) || \
- !defined(HAVE_VERBS_MLX5_ETH_VLAN_INLINE_HEADER_SIZE)
-#error Mellanox OFED >= 3.3 is required, please refer to the documentation.
-#endif
-
enum {
PCI_VENDOR_ID_MELLANOX = 0x15b3,
};
@@ -98,26 +87,21 @@ struct mlx5_xstats_ctrl {
uint64_t base[MLX5_MAX_XSTATS];
};
+/* Flow list . */
+TAILQ_HEAD(mlx5_flows, rte_flow);
+
struct priv {
- struct rte_eth_dev *dev; /* Ethernet device. */
+ struct rte_eth_dev *dev; /* Ethernet device of master process. */
struct ibv_context *ctx; /* Verbs context. */
- struct ibv_device_attr device_attr; /* Device properties. */
+ struct ibv_device_attr_ex device_attr; /* Device properties. */
struct ibv_pd *pd; /* Protection Domain. */
- /*
- * MAC addresses array and configuration bit-field.
- * An extra entry that cannot be modified by the DPDK is reserved
- * for broadcast frames (destination MAC address ff:ff:ff:ff:ff:ff).
- */
- struct ether_addr mac[MLX5_MAX_MAC_ADDRESSES];
- BITFIELD_DECLARE(mac_configured, uint32_t, MLX5_MAX_MAC_ADDRESSES);
+ char ibdev_path[IBV_SYSFS_PATH_MAX]; /* IB device path for secondary */
+ struct ether_addr mac[MLX5_MAX_MAC_ADDRESSES]; /* MAC addresses. */
uint16_t vlan_filter[MLX5_MAX_VLAN_IDS]; /* VLAN filters table. */
unsigned int vlan_filter_n; /* Number of configured VLAN filters. */
/* Device properties. */
uint16_t mtu; /* Configured MTU. */
uint8_t port; /* Physical port number. */
- unsigned int started:1; /* Device started, flows enabled. */
- unsigned int promisc_req:1; /* Promiscuous mode requested. */
- unsigned int allmulti_req:1; /* All multicast mode requested. */
unsigned int hw_csum:1; /* Checksum offload is supported. */
unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */
unsigned int hw_vlan_strip:1; /* VLAN stripping is supported. */
@@ -133,6 +117,7 @@ struct priv {
unsigned int isolated:1; /* Whether isolated mode is enabled. */
unsigned int tx_vec_en:1; /* Whether Tx vector is enabled. */
unsigned int rx_vec_en:1; /* Whether Rx vector is enabled. */
+ unsigned int counter_set_supported:1; /* Counter set is supported. */
/* Whether Tx offloads for tunneled packets are supported. */
unsigned int max_tso_payload_sz; /* Maximum TCP payload for TSO. */
unsigned int txq_inline; /* Maximum packet size for inlining. */
@@ -141,38 +126,31 @@ struct priv {
/* RX/TX queues. */
unsigned int rxqs_n; /* RX queues array size. */
unsigned int txqs_n; /* TX queues array size. */
- struct rxq *(*rxqs)[]; /* RX queues. */
- struct txq *(*txqs)[]; /* TX queues. */
- /* Indirection tables referencing all RX WQs. */
- struct ibv_exp_rwq_ind_table *(*ind_tables)[];
- unsigned int ind_tables_n; /* Number of indirection tables. */
+ struct mlx5_rxq_data *(*rxqs)[]; /* RX queues. */
+ struct mlx5_txq_data *(*txqs)[]; /* TX queues. */
unsigned int ind_table_max_size; /* Maximum indirection table size. */
- /* Hash RX QPs feeding the indirection table. */
- struct hash_rxq (*hash_rxqs)[];
- unsigned int hash_rxqs_n; /* Hash RX QPs array size. */
- /* RSS configuration array indexed by hash RX queue type. */
- struct rte_eth_rss_conf *(*rss_conf)[];
- uint64_t rss_hf; /* RSS DPDK bit field of active RSS. */
+ struct rte_eth_rss_conf rss_conf; /* RSS configuration. */
struct rte_intr_handle intr_handle; /* Interrupt handler. */
unsigned int (*reta_idx)[]; /* RETA index table. */
unsigned int reta_idx_n; /* RETA index size. */
- struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */
- struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */
- struct rte_flow_drop *flow_drop_queue; /* Flow drop queue. */
- TAILQ_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */
+ struct mlx5_hrxq_drop *flow_drop_queue; /* Flow drop queue. */
+ struct mlx5_flows flows; /* RTE Flow rules. */
+ struct mlx5_flows ctrl_flows; /* Control flow rules. */
+ LIST_HEAD(mr, mlx5_mr) mr; /* Memory region. */
+ LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
+ LIST_HEAD(rxqibv, mlx5_rxq_ibv) rxqsibv; /* Verbs Rx queues. */
+ LIST_HEAD(hrxq, mlx5_hrxq) hrxqs; /* Verbs Hash Rx queues. */
+ LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
+ LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
+ /* Verbs Indirection tables. */
+ LIST_HEAD(ind_tables, mlx5_ind_table_ibv) ind_tbls;
uint32_t link_speed_capa; /* Link speed capabilities. */
struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
rte_spinlock_t lock; /* Lock for control functions. */
+ int primary_socket; /* Unix socket for primary process. */
+ struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */
};
-/* Local storage for secondary process data. */
-struct mlx5_secondary_data {
- struct rte_eth_dev_data data; /* Local device data. */
- struct priv *primary_priv; /* Private structure from primary. */
- struct rte_eth_dev_data *shared_dev_data; /* Shared device data. */
- rte_spinlock_t lock; /* Port configuration lock. */
-} mlx5_secondary_data[RTE_MAX_ETHPORTS];
-
/**
* Lock private structure to protect it from concurrent access in the
* control path.
@@ -228,28 +206,19 @@ void priv_dev_interrupt_handler_uninstall(struct priv *, struct rte_eth_dev *);
void priv_dev_interrupt_handler_install(struct priv *, struct rte_eth_dev *);
int mlx5_set_link_down(struct rte_eth_dev *dev);
int mlx5_set_link_up(struct rte_eth_dev *dev);
-struct priv *mlx5_secondary_data_setup(struct priv *priv);
-void priv_select_tx_function(struct priv *);
-void priv_select_rx_function(struct priv *);
+void priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev);
+void priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev);
/* mlx5_mac.c */
int priv_get_mac(struct priv *, uint8_t (*)[ETHER_ADDR_LEN]);
-void hash_rxq_mac_addrs_del(struct hash_rxq *);
-void priv_mac_addrs_disable(struct priv *);
void mlx5_mac_addr_remove(struct rte_eth_dev *, uint32_t);
-int hash_rxq_mac_addrs_add(struct hash_rxq *);
-int priv_mac_addr_add(struct priv *, unsigned int,
- const uint8_t (*)[ETHER_ADDR_LEN]);
-int priv_mac_addrs_enable(struct priv *);
int mlx5_mac_addr_add(struct rte_eth_dev *, struct ether_addr *, uint32_t,
uint32_t);
void mlx5_mac_addr_set(struct rte_eth_dev *, struct ether_addr *);
/* mlx5_rss.c */
-int rss_hash_rss_conf_new_key(struct priv *, const uint8_t *, unsigned int,
- uint64_t);
int mlx5_rss_hash_update(struct rte_eth_dev *, struct rte_eth_rss_conf *);
int mlx5_rss_hash_conf_get(struct rte_eth_dev *, struct rte_eth_rss_conf *);
int priv_rss_reta_index_resize(struct priv *, unsigned int);
@@ -260,10 +229,6 @@ int mlx5_dev_rss_reta_update(struct rte_eth_dev *,
/* mlx5_rxmode.c */
-int priv_special_flow_enable(struct priv *, enum hash_rxq_flow_type);
-void priv_special_flow_disable(struct priv *, enum hash_rxq_flow_type);
-int priv_special_flow_enable_all(struct priv *);
-void priv_special_flow_disable_all(struct priv *);
void mlx5_promiscuous_enable(struct rte_eth_dev *);
void mlx5_promiscuous_disable(struct rte_eth_dev *);
void mlx5_allmulticast_enable(struct rte_eth_dev *);
@@ -272,7 +237,7 @@ void mlx5_allmulticast_disable(struct rte_eth_dev *);
/* mlx5_stats.c */
void priv_xstats_init(struct priv *);
-void mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *);
+int mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *);
void mlx5_stats_reset(struct rte_eth_dev *);
int mlx5_xstats_get(struct rte_eth_dev *,
struct rte_eth_xstat *, unsigned int);
@@ -283,26 +248,22 @@ int mlx5_xstats_get_names(struct rte_eth_dev *,
/* mlx5_vlan.c */
int mlx5_vlan_filter_set(struct rte_eth_dev *, uint16_t, int);
-void mlx5_vlan_offload_set(struct rte_eth_dev *, int);
+int mlx5_vlan_offload_set(struct rte_eth_dev *, int);
void mlx5_vlan_strip_queue_set(struct rte_eth_dev *, uint16_t, int);
/* mlx5_trigger.c */
int mlx5_dev_start(struct rte_eth_dev *);
void mlx5_dev_stop(struct rte_eth_dev *);
+int priv_dev_traffic_enable(struct priv *, struct rte_eth_dev *);
+int priv_dev_traffic_disable(struct priv *, struct rte_eth_dev *);
+int priv_dev_traffic_restart(struct priv *, struct rte_eth_dev *);
+int mlx5_traffic_restart(struct rte_eth_dev *);
-/* mlx5_fdir.c */
+/* mlx5_flow.c */
-void priv_fdir_queue_destroy(struct priv *, struct fdir_queue *);
-int fdir_init_filters_list(struct priv *);
-void priv_fdir_delete_filters_list(struct priv *);
-void priv_fdir_disable(struct priv *);
-void priv_fdir_enable(struct priv *);
int mlx5_dev_filter_ctrl(struct rte_eth_dev *, enum rte_filter_type,
enum rte_filter_op, void *);
-
-/* mlx5_flow.c */
-
int mlx5_flow_validate(struct rte_eth_dev *, const struct rte_flow_attr *,
const struct rte_flow_item [],
const struct rte_flow_action [],
@@ -314,10 +275,35 @@ struct rte_flow *mlx5_flow_create(struct rte_eth_dev *,
struct rte_flow_error *);
int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *,
struct rte_flow_error *);
+void priv_flow_flush(struct priv *, struct mlx5_flows *);
int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *);
+int mlx5_flow_query(struct rte_eth_dev *, struct rte_flow *,
+ enum rte_flow_action_type, void *,
+ struct rte_flow_error *);
int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *);
-int priv_flow_start(struct priv *);
-void priv_flow_stop(struct priv *);
-int priv_flow_rxq_in_use(struct priv *, struct rxq *);
+int priv_flow_start(struct priv *, struct mlx5_flows *);
+void priv_flow_stop(struct priv *, struct mlx5_flows *);
+int priv_flow_verify(struct priv *);
+int mlx5_ctrl_flow_vlan(struct rte_eth_dev *, struct rte_flow_item_eth *,
+ struct rte_flow_item_eth *, struct rte_flow_item_vlan *,
+ struct rte_flow_item_vlan *);
+int mlx5_ctrl_flow(struct rte_eth_dev *, struct rte_flow_item_eth *,
+ struct rte_flow_item_eth *);
+int priv_flow_create_drop_queue(struct priv *);
+void priv_flow_delete_drop_queue(struct priv *);
+
+/* mlx5_socket.c */
+
+int priv_socket_init(struct priv *priv);
+int priv_socket_uninit(struct priv *priv);
+void priv_socket_handle(struct priv *priv);
+int priv_socket_connect(struct priv *priv);
+
+/* mlx5_mr.c */
+
+struct mlx5_mr *priv_mr_new(struct priv *, struct rte_mempool *);
+struct mlx5_mr *priv_mr_get(struct priv *, struct rte_mempool *);
+int priv_mr_release(struct priv *, struct mlx5_mr *);
+int priv_mr_verify(struct priv *);
#endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h
index a76bc6f6..3a7706cf 100644
--- a/drivers/net/mlx5/mlx5_defs.h
+++ b/drivers/net/mlx5/mlx5_defs.h
@@ -45,9 +45,6 @@
/* Maximum number of simultaneous VLAN filters. */
#define MLX5_MAX_VLAN_IDS 128
-/* Maximum number of special flows. */
-#define MLX5_MAX_SPECIAL_FLOWS 4
-
/*
* Request TX completion every time descriptors reach this threshold since
* the previous request. Must be a power of two for performance reasons.
@@ -100,7 +97,8 @@
/*
* Maximum size of burst for vectorized Tx. This is related to the maximum size
- * of Enhaned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * of Enhanced MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
+ * Careful when changing, large value can cause WQE DS to overlap.
*/
#define MLX5_VPMD_TX_MAX_BURST 32U
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index b0eb3cdf..c31ea4b6 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -31,6 +31,8 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#define _GNU_SOURCE
+
#include <stddef.h>
#include <assert.h>
#include <unistd.h>
@@ -49,21 +51,17 @@
#include <linux/sockios.h>
#include <linux/version.h>
#include <fcntl.h>
+#include <stdalign.h>
+#include <sys/un.h>
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_atomic.h>
#include <rte_ethdev.h>
+#include <rte_bus_pci.h>
#include <rte_mbuf.h>
#include <rte_common.h>
#include <rte_interrupts.h>
#include <rte_alarm.h>
#include <rte_malloc.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
#include "mlx5.h"
#include "mlx5_rxtx.h"
@@ -119,7 +117,6 @@ struct ethtool_link_settings {
#define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38
#define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39
#endif
-#define ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32 (SCHAR_MAX)
/**
* Return private structure associated with an Ethernet device.
@@ -133,12 +130,7 @@ struct ethtool_link_settings {
struct priv *
mlx5_get_priv(struct rte_eth_dev *dev)
{
- struct mlx5_secondary_data *sd;
-
- if (!mlx5_is_secondary())
- return dev->data->dev_private;
- sd = &mlx5_secondary_data[dev->data->port_id];
- return sd->data.dev_private;
+ return dev->data->dev_private;
}
/**
@@ -150,7 +142,7 @@ mlx5_get_priv(struct rte_eth_dev *dev)
inline int
mlx5_is_secondary(void)
{
- return rte_eal_process_type() != RTE_PROC_PRIMARY;
+ return rte_eal_process_type() == RTE_PROC_SECONDARY;
}
/**
@@ -174,7 +166,7 @@ priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
char match[IF_NAMESIZE] = "";
{
- MKSTR(path, "%s/device/net", priv->ctx->device->ibdev_path);
+ MKSTR(path, "%s/device/net", priv->ibdev_path);
dir = opendir(path);
if (dir == NULL)
@@ -192,7 +184,7 @@ priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE])
continue;
MKSTR(path, "%s/device/net/%s/%s",
- priv->ctx->device->ibdev_path, name,
+ priv->ibdev_path, name,
(dev_type ? "dev_id" : "dev_port"));
file = fopen(path, "rb");
@@ -280,11 +272,11 @@ priv_sysfs_read(const struct priv *priv, const char *entry,
if (priv_is_ib_cntr(entry)) {
MKSTR(path, "%s/ports/1/hw_counters/%s",
- priv->ctx->device->ibdev_path, entry);
+ priv->ibdev_path, entry);
file = fopen(path, "rb");
} else {
MKSTR(path, "%s/device/net/%s/%s",
- priv->ctx->device->ibdev_path, ifname, entry);
+ priv->ibdev_path, ifname, entry);
file = fopen(path, "rb");
}
if (file == NULL)
@@ -327,8 +319,7 @@ priv_sysfs_write(const struct priv *priv, const char *entry,
if (priv_get_ifname(priv, &ifname))
return -1;
- MKSTR(path, "%s/device/net/%s/%s", priv->ctx->device->ibdev_path,
- ifname, entry);
+ MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry);
file = fopen(path, "wb");
if (file == NULL)
@@ -585,8 +576,29 @@ dev_configure(struct rte_eth_dev *dev)
unsigned int i;
unsigned int j;
unsigned int reta_idx_n;
+ const uint8_t use_app_rss_key =
+ !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
- priv->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
+ if (use_app_rss_key &&
+ (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len !=
+ rss_hash_default_key_len)) {
+ /* MLX5 RSS only support 40bytes key. */
+ return EINVAL;
+ }
+ priv->rss_conf.rss_key =
+ rte_realloc(priv->rss_conf.rss_key,
+ rss_hash_default_key_len, 0);
+ if (!priv->rss_conf.rss_key) {
+ ERROR("cannot allocate RSS hash key memory (%u)", rxqs_n);
+ return ENOMEM;
+ }
+ memcpy(priv->rss_conf.rss_key,
+ use_app_rss_key ?
+ dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key :
+ rss_hash_default_key,
+ rss_hash_default_key_len);
+ priv->rss_conf.rss_key_len = rss_hash_default_key_len;
+ priv->rss_conf.rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
priv->rxqs = (void *)dev->data->rx_queues;
priv->txqs = (void *)dev->data->tx_queues;
if (txqs_n != priv->txqs_n) {
@@ -672,8 +684,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
* Since we need one CQ per QP, the limit is the minimum number
* between the two values.
*/
- max = ((priv->device_attr.max_cq > priv->device_attr.max_qp) ?
- priv->device_attr.max_qp : priv->device_attr.max_cq);
+ max = RTE_MIN(priv->device_attr.orig_attr.max_cq,
+ priv->device_attr.orig_attr.max_qp);
/* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */
if (max >= 65535)
max = 65535;
@@ -686,7 +698,9 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
DEV_RX_OFFLOAD_UDP_CKSUM |
DEV_RX_OFFLOAD_TCP_CKSUM) :
0) |
- (priv->hw_vlan_strip ? DEV_RX_OFFLOAD_VLAN_STRIP : 0);
+ (priv->hw_vlan_strip ? DEV_RX_OFFLOAD_VLAN_STRIP : 0) |
+ DEV_RX_OFFLOAD_TIMESTAMP;
+
if (!priv->mps)
info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
if (priv->hw_csum)
@@ -704,9 +718,7 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info)
info->if_index = if_nametoindex(ifname);
info->reta_size = priv->reta_idx_n ?
priv->reta_idx_n : priv->ind_table_max_size;
- info->hash_key_size = ((*priv->rss_conf) ?
- (*priv->rss_conf)[0]->rss_key_len :
- 0);
+ info->hash_key_size = priv->rss_conf.rss_key_len;
info->speed_capa = priv->link_speed_capa;
priv_unlock(priv);
}
@@ -816,12 +828,7 @@ static int
mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
{
struct priv *priv = mlx5_get_priv(dev);
- __extension__ struct {
- struct ethtool_link_settings edata;
- uint32_t link_mode_data[3 *
- ETHTOOL_LINK_MODE_MASK_MAX_KERNEL_NU32];
- } ecmd;
-
+ struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
struct ifreq ifr;
struct rte_eth_link dev_link;
uint64_t sc;
@@ -834,23 +841,29 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
memset(&dev_link, 0, sizeof(dev_link));
dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
(ifr.ifr_flags & IFF_RUNNING));
- memset(&ecmd, 0, sizeof(ecmd));
- ecmd.edata.cmd = ETHTOOL_GLINKSETTINGS;
- ifr.ifr_data = (void *)&ecmd;
+ ifr.ifr_data = (void *)&gcmd;
if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s",
strerror(errno));
return -1;
}
- ecmd.edata.link_mode_masks_nwords = -ecmd.edata.link_mode_masks_nwords;
+ gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords;
+
+ alignas(struct ethtool_link_settings)
+ uint8_t data[offsetof(struct ethtool_link_settings, link_mode_masks) +
+ sizeof(uint32_t) * gcmd.link_mode_masks_nwords * 3];
+ struct ethtool_link_settings *ecmd = (void *)data;
+
+ *ecmd = gcmd;
+ ifr.ifr_data = (void *)ecmd;
if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) {
DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s",
strerror(errno));
return -1;
}
- dev_link.link_speed = ecmd.edata.speed;
- sc = ecmd.edata.link_mode_masks[0] |
- ((uint64_t)ecmd.edata.link_mode_masks[1] << 32);
+ dev_link.link_speed = ecmd->speed;
+ sc = ecmd->link_mode_masks[0] |
+ ((uint64_t)ecmd->link_mode_masks[1] << 32);
priv->link_speed_capa = 0;
if (sc & ETHTOOL_LINK_MODE_Autoneg_BIT)
priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG;
@@ -886,7 +899,7 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT |
ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT))
priv->link_speed_capa |= ETH_LINK_SPEED_100G;
- dev_link.link_duplex = ((ecmd.edata.duplex == DUPLEX_HALF) ?
+ dev_link.link_duplex = ((ecmd->duplex == DUPLEX_HALF) ?
ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
ETH_LINK_SPEED_FIXED);
@@ -1124,47 +1137,77 @@ mlx5_ibv_device_to_pci_addr(const struct ibv_device *device,
}
/**
- * Link status handler.
+ * Update the link status.
*
* @param priv
* Pointer to private structure.
- * @param dev
- * Pointer to the rte_eth_dev structure.
*
* @return
- * Nonzero if the callback process can be called immediately.
+ * Zero if the callback process can be called immediately.
*/
static int
-priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev)
+priv_link_status_update(struct priv *priv)
+{
+ struct rte_eth_link *link = &priv->dev->data->dev_link;
+
+ mlx5_link_update(priv->dev, 0);
+ if (((link->link_speed == 0) && link->link_status) ||
+ ((link->link_speed != 0) && !link->link_status)) {
+ /*
+ * Inconsistent status. Event likely occurred before the
+ * kernel netdevice exposes the new status.
+ */
+ if (!priv->pending_alarm) {
+ priv->pending_alarm = 1;
+ rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
+ mlx5_dev_link_status_handler,
+ priv->dev);
+ }
+ return 1;
+ } else if (unlikely(priv->pending_alarm)) {
+ /* Link interrupt occurred while alarm is already scheduled. */
+ priv->pending_alarm = 0;
+ rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv->dev);
+ }
+ return 0;
+}
+
+/**
+ * Device status handler.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param events
+ * Pointer to event flags holder.
+ *
+ * @return
+ * Events bitmap of callback process which can be called immediately.
+ */
+static uint32_t
+priv_dev_status_handler(struct priv *priv)
{
struct ibv_async_event event;
- struct rte_eth_link *link = &dev->data->dev_link;
- int ret = 0;
+ uint32_t ret = 0;
/* Read all message and acknowledge them. */
for (;;) {
if (ibv_get_async_event(priv->ctx, &event))
break;
-
- if (event.event_type != IBV_EVENT_PORT_ACTIVE &&
- event.event_type != IBV_EVENT_PORT_ERR)
+ if ((event.event_type == IBV_EVENT_PORT_ACTIVE ||
+ event.event_type == IBV_EVENT_PORT_ERR) &&
+ (priv->dev->data->dev_conf.intr_conf.lsc == 1))
+ ret |= (1 << RTE_ETH_EVENT_INTR_LSC);
+ else if (event.event_type == IBV_EVENT_DEVICE_FATAL &&
+ priv->dev->data->dev_conf.intr_conf.rmv == 1)
+ ret |= (1 << RTE_ETH_EVENT_INTR_RMV);
+ else
DEBUG("event type %d on port %d not handled",
event.event_type, event.element.port_num);
ibv_ack_async_event(&event);
}
- mlx5_link_update(dev, 0);
- if (((link->link_speed == 0) && link->link_status) ||
- ((link->link_speed != 0) && !link->link_status)) {
- if (!priv->pending_alarm) {
- /* Inconsistent status, check again later. */
- priv->pending_alarm = 1;
- rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
- mlx5_dev_link_status_handler,
- dev);
- }
- } else {
- ret = 1;
- }
+ if (ret & (1 << RTE_ETH_EVENT_INTR_LSC))
+ if (priv_link_status_update(priv))
+ ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC);
return ret;
}
@@ -1184,9 +1227,9 @@ mlx5_dev_link_status_handler(void *arg)
priv_lock(priv);
assert(priv->pending_alarm == 1);
priv->pending_alarm = 0;
- ret = priv_dev_link_status_handler(priv, dev);
+ ret = priv_link_status_update(priv);
priv_unlock(priv);
- if (ret)
+ if (!ret)
_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL,
NULL);
}
@@ -1204,14 +1247,34 @@ mlx5_dev_interrupt_handler(void *cb_arg)
{
struct rte_eth_dev *dev = cb_arg;
struct priv *priv = dev->data->dev_private;
- int ret;
+ uint32_t events;
priv_lock(priv);
- ret = priv_dev_link_status_handler(priv, dev);
+ events = priv_dev_status_handler(priv);
priv_unlock(priv);
- if (ret)
+ if (events & (1 << RTE_ETH_EVENT_INTR_LSC))
_rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL,
NULL);
+ if (events & (1 << RTE_ETH_EVENT_INTR_RMV))
+ _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL,
+ NULL);
+}
+
+/**
+ * Handle interrupts from the socket.
+ *
+ * @param cb_arg
+ * Callback argument.
+ */
+static void
+mlx5_dev_handler_socket(void *cb_arg)
+{
+ struct rte_eth_dev *dev = cb_arg;
+ struct priv *priv = dev->data->dev_private;
+
+ priv_lock(priv);
+ priv_socket_handle(priv);
+ priv_unlock(priv);
}
/**
@@ -1225,16 +1288,20 @@ mlx5_dev_interrupt_handler(void *cb_arg)
void
priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev)
{
- if (!dev->data->dev_conf.intr_conf.lsc)
- return;
- rte_intr_callback_unregister(&priv->intr_handle,
- mlx5_dev_interrupt_handler,
- dev);
+ if (dev->data->dev_conf.intr_conf.lsc ||
+ dev->data->dev_conf.intr_conf.rmv)
+ rte_intr_callback_unregister(&priv->intr_handle,
+ mlx5_dev_interrupt_handler, dev);
+ if (priv->primary_socket)
+ rte_intr_callback_unregister(&priv->intr_handle_socket,
+ mlx5_dev_handler_socket, dev);
if (priv->pending_alarm)
rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev);
priv->pending_alarm = 0;
priv->intr_handle.fd = 0;
priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+ priv->intr_handle_socket.fd = 0;
+ priv->intr_handle_socket.type = RTE_INTR_HANDLE_UNKNOWN;
}
/**
@@ -1250,20 +1317,29 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev)
{
int rc, flags;
- if (!dev->data->dev_conf.intr_conf.lsc)
- return;
+ assert(!mlx5_is_secondary());
assert(priv->ctx->async_fd > 0);
flags = fcntl(priv->ctx->async_fd, F_GETFL);
rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK);
if (rc < 0) {
INFO("failed to change file descriptor async event queue");
dev->data->dev_conf.intr_conf.lsc = 0;
- } else {
+ dev->data->dev_conf.intr_conf.rmv = 0;
+ }
+ if (dev->data->dev_conf.intr_conf.lsc ||
+ dev->data->dev_conf.intr_conf.rmv) {
priv->intr_handle.fd = priv->ctx->async_fd;
priv->intr_handle.type = RTE_INTR_HANDLE_EXT;
rte_intr_callback_register(&priv->intr_handle,
- mlx5_dev_interrupt_handler,
- dev);
+ mlx5_dev_interrupt_handler, dev);
+ }
+
+ rc = priv_socket_init(priv);
+ if (!rc && priv->primary_socket) {
+ priv->intr_handle_socket.fd = priv->primary_socket;
+ priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT;
+ rte_intr_callback_register(&priv->intr_handle_socket,
+ mlx5_dev_handler_socket, dev);
}
}
@@ -1271,7 +1347,9 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev)
* Change the link state (UP / DOWN).
*
* @param priv
- * Pointer to Ethernet device structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
* @param up
* Nonzero for link up, otherwise link down.
*
@@ -1279,17 +1357,16 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev)
* 0 on success, errno value on failure.
*/
static int
-priv_set_link(struct priv *priv, int up)
+priv_dev_set_link(struct priv *priv, struct rte_eth_dev *dev, int up)
{
- struct rte_eth_dev *dev = priv->dev;
int err;
if (up) {
err = priv_set_flags(priv, ~IFF_UP, IFF_UP);
if (err)
return err;
- priv_select_tx_function(priv);
- priv_select_rx_function(priv);
+ priv_dev_select_tx_function(priv, dev);
+ priv_dev_select_rx_function(priv, dev);
} else {
err = priv_set_flags(priv, ~IFF_UP, ~IFF_UP);
if (err)
@@ -1316,7 +1393,7 @@ mlx5_set_link_down(struct rte_eth_dev *dev)
int err;
priv_lock(priv);
- err = priv_set_link(priv, 0);
+ err = priv_dev_set_link(priv, dev, 0);
priv_unlock(priv);
return err;
}
@@ -1337,195 +1414,42 @@ mlx5_set_link_up(struct rte_eth_dev *dev)
int err;
priv_lock(priv);
- err = priv_set_link(priv, 1);
+ err = priv_dev_set_link(priv, dev, 1);
priv_unlock(priv);
return err;
}
/**
- * Configure secondary process queues from a private data pointer (primary
- * or secondary) and update burst callbacks. Can take place only once.
- *
- * All queues must have been previously created by the primary process to
- * avoid undefined behavior.
- *
- * @param priv
- * Private data pointer from either primary or secondary process.
- *
- * @return
- * Private data pointer from secondary process, NULL in case of error.
- */
-struct priv *
-mlx5_secondary_data_setup(struct priv *priv)
-{
- unsigned int port_id = 0;
- struct mlx5_secondary_data *sd;
- void **tx_queues;
- void **rx_queues;
- unsigned int nb_tx_queues;
- unsigned int nb_rx_queues;
- unsigned int i;
-
- /* priv must be valid at this point. */
- assert(priv != NULL);
- /* priv->dev must also be valid but may point to local memory from
- * another process, possibly with the same address and must not
- * be dereferenced yet. */
- assert(priv->dev != NULL);
- /* Determine port ID by finding out where priv comes from. */
- while (1) {
- sd = &mlx5_secondary_data[port_id];
- rte_spinlock_lock(&sd->lock);
- /* Primary process? */
- if (sd->primary_priv == priv)
- break;
- /* Secondary process? */
- if (sd->data.dev_private == priv)
- break;
- rte_spinlock_unlock(&sd->lock);
- if (++port_id == RTE_DIM(mlx5_secondary_data))
- port_id = 0;
- }
- /* Switch to secondary private structure. If private data has already
- * been updated by another thread, there is nothing else to do. */
- priv = sd->data.dev_private;
- if (priv->dev->data == &sd->data)
- goto end;
- /* Sanity checks. Secondary private structure is supposed to point
- * to local eth_dev, itself still pointing to the shared device data
- * structure allocated by the primary process. */
- assert(sd->shared_dev_data != &sd->data);
- assert(sd->data.nb_tx_queues == 0);
- assert(sd->data.tx_queues == NULL);
- assert(sd->data.nb_rx_queues == 0);
- assert(sd->data.rx_queues == NULL);
- assert(priv != sd->primary_priv);
- assert(priv->dev->data == sd->shared_dev_data);
- assert(priv->txqs_n == 0);
- assert(priv->txqs == NULL);
- assert(priv->rxqs_n == 0);
- assert(priv->rxqs == NULL);
- nb_tx_queues = sd->shared_dev_data->nb_tx_queues;
- nb_rx_queues = sd->shared_dev_data->nb_rx_queues;
- /* Allocate local storage for queues. */
- tx_queues = rte_zmalloc("secondary ethdev->tx_queues",
- sizeof(sd->data.tx_queues[0]) * nb_tx_queues,
- RTE_CACHE_LINE_SIZE);
- rx_queues = rte_zmalloc("secondary ethdev->rx_queues",
- sizeof(sd->data.rx_queues[0]) * nb_rx_queues,
- RTE_CACHE_LINE_SIZE);
- if (tx_queues == NULL || rx_queues == NULL)
- goto error;
- /* Lock to prevent control operations during setup. */
- priv_lock(priv);
- /* TX queues. */
- for (i = 0; i != nb_tx_queues; ++i) {
- struct txq *primary_txq = (*sd->primary_priv->txqs)[i];
- struct txq_ctrl *primary_txq_ctrl;
- struct txq_ctrl *txq_ctrl;
-
- if (primary_txq == NULL)
- continue;
- primary_txq_ctrl = container_of(primary_txq,
- struct txq_ctrl, txq);
- txq_ctrl = rte_calloc_socket("TXQ", 1, sizeof(*txq_ctrl) +
- (1 << primary_txq->elts_n) *
- sizeof(struct rte_mbuf *), 0,
- primary_txq_ctrl->socket);
- if (txq_ctrl != NULL) {
- if (txq_ctrl_setup(priv->dev,
- txq_ctrl,
- 1 << primary_txq->elts_n,
- primary_txq_ctrl->socket,
- NULL) == 0) {
- txq_ctrl->txq.stats.idx =
- primary_txq->stats.idx;
- tx_queues[i] = &txq_ctrl->txq;
- continue;
- }
- rte_free(txq_ctrl);
- }
- while (i) {
- txq_ctrl = tx_queues[--i];
- txq_cleanup(txq_ctrl);
- rte_free(txq_ctrl);
- }
- goto error;
- }
- /* RX queues. */
- for (i = 0; i != nb_rx_queues; ++i) {
- struct rxq_ctrl *primary_rxq =
- container_of((*sd->primary_priv->rxqs)[i],
- struct rxq_ctrl, rxq);
-
- if (primary_rxq == NULL)
- continue;
- /* Not supported yet. */
- rx_queues[i] = NULL;
- }
- /* Update everything. */
- priv->txqs = (void *)tx_queues;
- priv->txqs_n = nb_tx_queues;
- priv->rxqs = (void *)rx_queues;
- priv->rxqs_n = nb_rx_queues;
- sd->data.rx_queues = rx_queues;
- sd->data.tx_queues = tx_queues;
- sd->data.nb_rx_queues = nb_rx_queues;
- sd->data.nb_tx_queues = nb_tx_queues;
- sd->data.dev_link = sd->shared_dev_data->dev_link;
- sd->data.mtu = sd->shared_dev_data->mtu;
- memcpy(sd->data.rx_queue_state, sd->shared_dev_data->rx_queue_state,
- sizeof(sd->data.rx_queue_state));
- memcpy(sd->data.tx_queue_state, sd->shared_dev_data->tx_queue_state,
- sizeof(sd->data.tx_queue_state));
- sd->data.dev_flags = sd->shared_dev_data->dev_flags;
- /* Use local data from now on. */
- rte_mb();
- priv->dev->data = &sd->data;
- rte_mb();
- priv_select_tx_function(priv);
- priv_select_rx_function(priv);
- priv_unlock(priv);
-end:
- /* More sanity checks. */
- assert(priv->dev->data == &sd->data);
- rte_spinlock_unlock(&sd->lock);
- return priv;
-error:
- priv_unlock(priv);
- rte_free(tx_queues);
- rte_free(rx_queues);
- rte_spinlock_unlock(&sd->lock);
- return NULL;
-}
-
-/**
* Configure the TX function to use.
*
* @param priv
- * Pointer to private structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
*/
void
-priv_select_tx_function(struct priv *priv)
+priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev)
{
- priv->dev->tx_pkt_burst = mlx5_tx_burst;
+ assert(priv != NULL);
+ assert(dev != NULL);
+ dev->tx_pkt_burst = mlx5_tx_burst;
/* Select appropriate TX function. */
if (priv->mps == MLX5_MPW_ENHANCED) {
if (priv_check_vec_tx_support(priv) > 0) {
if (priv_check_raw_vec_tx_support(priv) > 0)
- priv->dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
+ dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
else
- priv->dev->tx_pkt_burst = mlx5_tx_burst_vec;
+ dev->tx_pkt_burst = mlx5_tx_burst_vec;
DEBUG("selected Enhanced MPW TX vectorized function");
} else {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_empw;
+ dev->tx_pkt_burst = mlx5_tx_burst_empw;
DEBUG("selected Enhanced MPW TX function");
}
} else if (priv->mps && priv->txq_inline) {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
+ dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
DEBUG("selected MPW inline TX function");
} else if (priv->mps) {
- priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw;
+ dev->tx_pkt_burst = mlx5_tx_burst_mpw;
DEBUG("selected MPW TX function");
}
}
@@ -1534,16 +1458,19 @@ priv_select_tx_function(struct priv *priv)
* Configure the RX function to use.
*
* @param priv
- * Pointer to private structure.
+ * Pointer to private data structure.
+ * @param dev
+ * Pointer to rte_eth_dev structure.
*/
void
-priv_select_rx_function(struct priv *priv)
+priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev)
{
+ assert(priv != NULL);
+ assert(dev != NULL);
if (priv_check_vec_rx_support(priv) > 0) {
- priv_prep_vec_rx_function(priv);
- priv->dev->rx_pkt_burst = mlx5_rx_burst_vec;
+ dev->rx_pkt_burst = mlx5_rx_burst_vec;
DEBUG("selected RX vectorized function");
} else {
- priv->dev->rx_pkt_burst = mlx5_rx_burst;
+ dev->rx_pkt_burst = mlx5_rx_burst;
}
}
diff --git a/drivers/net/mlx5/mlx5_fdir.c b/drivers/net/mlx5/mlx5_fdir.c
deleted file mode 100644
index 34a7e69f..00000000
--- a/drivers/net/mlx5/mlx5_fdir.c
+++ /dev/null
@@ -1,1101 +0,0 @@
-/*-
- * BSD LICENSE
- *
- * Copyright 2015 6WIND S.A.
- * Copyright 2015 Mellanox.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of 6WIND S.A. nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stddef.h>
-#include <assert.h>
-#include <stdint.h>
-#include <string.h>
-#include <errno.h>
-
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <infiniband/verbs.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <rte_ether.h>
-#include <rte_malloc.h>
-#include <rte_ethdev.h>
-#include <rte_common.h>
-#include <rte_flow.h>
-#include <rte_flow_driver.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
-#include "mlx5.h"
-#include "mlx5_rxtx.h"
-
-struct fdir_flow_desc {
- uint16_t dst_port;
- uint16_t src_port;
- uint32_t src_ip[4];
- uint32_t dst_ip[4];
- uint8_t mac[6];
- uint16_t vlan_tag;
- enum hash_rxq_type type;
-};
-
-struct mlx5_fdir_filter {
- LIST_ENTRY(mlx5_fdir_filter) next;
- uint16_t queue; /* Queue assigned to if FDIR match. */
- enum rte_eth_fdir_behavior behavior;
- struct fdir_flow_desc desc;
- struct ibv_exp_flow *flow;
-};
-
-LIST_HEAD(fdir_filter_list, mlx5_fdir_filter);
-
-/**
- * Convert struct rte_eth_fdir_filter to mlx5 filter descriptor.
- *
- * @param[in] fdir_filter
- * DPDK filter structure to convert.
- * @param[out] desc
- * Resulting mlx5 filter descriptor.
- * @param mode
- * Flow director mode.
- */
-static void
-fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter,
- struct fdir_flow_desc *desc, enum rte_fdir_mode mode)
-{
- /* Initialize descriptor. */
- memset(desc, 0, sizeof(*desc));
-
- /* Set VLAN ID. */
- desc->vlan_tag = fdir_filter->input.flow_ext.vlan_tci;
-
- /* Set MAC address. */
- if (mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
- rte_memcpy(desc->mac,
- fdir_filter->input.flow.mac_vlan_flow.mac_addr.
- addr_bytes,
- sizeof(desc->mac));
- desc->type = HASH_RXQ_ETH;
- return;
- }
-
- /* Set mode */
- switch (fdir_filter->input.flow_type) {
- case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
- desc->type = HASH_RXQ_UDPV4;
- break;
- case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
- desc->type = HASH_RXQ_TCPV4;
- break;
- case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
- desc->type = HASH_RXQ_IPV4;
- break;
- case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
- desc->type = HASH_RXQ_UDPV6;
- break;
- case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
- desc->type = HASH_RXQ_TCPV6;
- break;
- case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
- desc->type = HASH_RXQ_IPV6;
- break;
- default:
- break;
- }
-
- /* Set flow values */
- switch (fdir_filter->input.flow_type) {
- case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
- case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
- desc->src_port = fdir_filter->input.flow.udp4_flow.src_port;
- desc->dst_port = fdir_filter->input.flow.udp4_flow.dst_port;
- /* fallthrough */
- case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
- desc->src_ip[0] = fdir_filter->input.flow.ip4_flow.src_ip;
- desc->dst_ip[0] = fdir_filter->input.flow.ip4_flow.dst_ip;
- break;
- case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
- case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
- desc->src_port = fdir_filter->input.flow.udp6_flow.src_port;
- desc->dst_port = fdir_filter->input.flow.udp6_flow.dst_port;
- /* Fall through. */
- case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
- rte_memcpy(desc->src_ip,
- fdir_filter->input.flow.ipv6_flow.src_ip,
- sizeof(desc->src_ip));
- rte_memcpy(desc->dst_ip,
- fdir_filter->input.flow.ipv6_flow.dst_ip,
- sizeof(desc->dst_ip));
- break;
- default:
- break;
- }
-}
-
-/**
- * Check if two flow descriptors overlap according to configured mask.
- *
- * @param priv
- * Private structure that provides flow director mask.
- * @param desc1
- * First flow descriptor to compare.
- * @param desc2
- * Second flow descriptor to compare.
- *
- * @return
- * Nonzero if descriptors overlap.
- */
-static int
-priv_fdir_overlap(const struct priv *priv,
- const struct fdir_flow_desc *desc1,
- const struct fdir_flow_desc *desc2)
-{
- const struct rte_eth_fdir_masks *mask =
- &priv->dev->data->dev_conf.fdir_conf.mask;
- unsigned int i;
-
- if (desc1->type != desc2->type)
- return 0;
- /* Ignore non masked bits. */
- for (i = 0; i != RTE_DIM(desc1->mac); ++i)
- if ((desc1->mac[i] & mask->mac_addr_byte_mask) !=
- (desc2->mac[i] & mask->mac_addr_byte_mask))
- return 0;
- if (((desc1->src_port & mask->src_port_mask) !=
- (desc2->src_port & mask->src_port_mask)) ||
- ((desc1->dst_port & mask->dst_port_mask) !=
- (desc2->dst_port & mask->dst_port_mask)))
- return 0;
- switch (desc1->type) {
- case HASH_RXQ_IPV4:
- case HASH_RXQ_UDPV4:
- case HASH_RXQ_TCPV4:
- if (((desc1->src_ip[0] & mask->ipv4_mask.src_ip) !=
- (desc2->src_ip[0] & mask->ipv4_mask.src_ip)) ||
- ((desc1->dst_ip[0] & mask->ipv4_mask.dst_ip) !=
- (desc2->dst_ip[0] & mask->ipv4_mask.dst_ip)))
- return 0;
- break;
- case HASH_RXQ_IPV6:
- case HASH_RXQ_UDPV6:
- case HASH_RXQ_TCPV6:
- for (i = 0; i != RTE_DIM(desc1->src_ip); ++i)
- if (((desc1->src_ip[i] & mask->ipv6_mask.src_ip[i]) !=
- (desc2->src_ip[i] & mask->ipv6_mask.src_ip[i])) ||
- ((desc1->dst_ip[i] & mask->ipv6_mask.dst_ip[i]) !=
- (desc2->dst_ip[i] & mask->ipv6_mask.dst_ip[i])))
- return 0;
- break;
- default:
- break;
- }
- return 1;
-}
-
-/**
- * Create flow director steering rule for a specific filter.
- *
- * @param priv
- * Private structure.
- * @param mlx5_fdir_filter
- * Filter to create a steering rule for.
- * @param fdir_queue
- * Flow director queue for matching packets.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-static int
-priv_fdir_flow_add(struct priv *priv,
- struct mlx5_fdir_filter *mlx5_fdir_filter,
- struct fdir_queue *fdir_queue)
-{
- struct ibv_exp_flow *flow;
- struct fdir_flow_desc *desc = &mlx5_fdir_filter->desc;
- enum rte_fdir_mode fdir_mode =
- priv->dev->data->dev_conf.fdir_conf.mode;
- struct rte_eth_fdir_masks *mask =
- &priv->dev->data->dev_conf.fdir_conf.mask;
- FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, desc->type));
- struct ibv_exp_flow_attr *attr = &data->attr;
- uintptr_t spec_offset = (uintptr_t)&data->spec;
- struct ibv_exp_flow_spec_eth *spec_eth;
- struct ibv_exp_flow_spec_ipv4 *spec_ipv4;
- struct ibv_exp_flow_spec_ipv6 *spec_ipv6;
- struct ibv_exp_flow_spec_tcp_udp *spec_tcp_udp;
- struct mlx5_fdir_filter *iter_fdir_filter;
- unsigned int i;
-
- /* Abort if an existing flow overlaps this one to avoid packet
- * duplication, even if it targets another queue. */
- LIST_FOREACH(iter_fdir_filter, priv->fdir_filter_list, next)
- if ((iter_fdir_filter != mlx5_fdir_filter) &&
- (iter_fdir_filter->flow != NULL) &&
- (priv_fdir_overlap(priv,
- &mlx5_fdir_filter->desc,
- &iter_fdir_filter->desc)))
- return EEXIST;
-
- /*
- * No padding must be inserted by the compiler between attr and spec.
- * This layout is expected by libibverbs.
- */
- assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec_offset);
- priv_flow_attr(priv, attr, sizeof(data), desc->type);
-
- /* Set Ethernet spec */
- spec_eth = (struct ibv_exp_flow_spec_eth *)spec_offset;
-
- /* The first specification must be Ethernet. */
- assert(spec_eth->type == IBV_EXP_FLOW_SPEC_ETH);
- assert(spec_eth->size == sizeof(*spec_eth));
-
- /* VLAN ID */
- spec_eth->val.vlan_tag = desc->vlan_tag & mask->vlan_tci_mask;
- spec_eth->mask.vlan_tag = mask->vlan_tci_mask;
-
- /* Update priority */
- attr->priority = 2;
-
- if (fdir_mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
- /* MAC Address */
- for (i = 0; i != RTE_DIM(spec_eth->mask.dst_mac); ++i) {
- spec_eth->val.dst_mac[i] =
- desc->mac[i] & mask->mac_addr_byte_mask;
- spec_eth->mask.dst_mac[i] = mask->mac_addr_byte_mask;
- }
- goto create_flow;
- }
-
- switch (desc->type) {
- case HASH_RXQ_IPV4:
- case HASH_RXQ_UDPV4:
- case HASH_RXQ_TCPV4:
- spec_offset += spec_eth->size;
-
- /* Set IP spec */
- spec_ipv4 = (struct ibv_exp_flow_spec_ipv4 *)spec_offset;
-
- /* The second specification must be IP. */
- assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4);
- assert(spec_ipv4->size == sizeof(*spec_ipv4));
-
- spec_ipv4->val.src_ip =
- desc->src_ip[0] & mask->ipv4_mask.src_ip;
- spec_ipv4->val.dst_ip =
- desc->dst_ip[0] & mask->ipv4_mask.dst_ip;
- spec_ipv4->mask.src_ip = mask->ipv4_mask.src_ip;
- spec_ipv4->mask.dst_ip = mask->ipv4_mask.dst_ip;
-
- /* Update priority */
- attr->priority = 1;
-
- if (desc->type == HASH_RXQ_IPV4)
- goto create_flow;
-
- spec_offset += spec_ipv4->size;
- break;
- case HASH_RXQ_IPV6:
- case HASH_RXQ_UDPV6:
- case HASH_RXQ_TCPV6:
- spec_offset += spec_eth->size;
-
- /* Set IP spec */
- spec_ipv6 = (struct ibv_exp_flow_spec_ipv6 *)spec_offset;
-
- /* The second specification must be IP. */
- assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6);
- assert(spec_ipv6->size == sizeof(*spec_ipv6));
-
- for (i = 0; i != RTE_DIM(desc->src_ip); ++i) {
- ((uint32_t *)spec_ipv6->val.src_ip)[i] =
- desc->src_ip[i] & mask->ipv6_mask.src_ip[i];
- ((uint32_t *)spec_ipv6->val.dst_ip)[i] =
- desc->dst_ip[i] & mask->ipv6_mask.dst_ip[i];
- }
- rte_memcpy(spec_ipv6->mask.src_ip,
- mask->ipv6_mask.src_ip,
- sizeof(spec_ipv6->mask.src_ip));
- rte_memcpy(spec_ipv6->mask.dst_ip,
- mask->ipv6_mask.dst_ip,
- sizeof(spec_ipv6->mask.dst_ip));
-
- /* Update priority */
- attr->priority = 1;
-
- if (desc->type == HASH_RXQ_IPV6)
- goto create_flow;
-
- spec_offset += spec_ipv6->size;
- break;
- default:
- ERROR("invalid flow attribute type");
- return EINVAL;
- }
-
- /* Set TCP/UDP flow specification. */
- spec_tcp_udp = (struct ibv_exp_flow_spec_tcp_udp *)spec_offset;
-
- /* The third specification must be TCP/UDP. */
- assert(spec_tcp_udp->type == IBV_EXP_FLOW_SPEC_TCP ||
- spec_tcp_udp->type == IBV_EXP_FLOW_SPEC_UDP);
- assert(spec_tcp_udp->size == sizeof(*spec_tcp_udp));
-
- spec_tcp_udp->val.src_port = desc->src_port & mask->src_port_mask;
- spec_tcp_udp->val.dst_port = desc->dst_port & mask->dst_port_mask;
- spec_tcp_udp->mask.src_port = mask->src_port_mask;
- spec_tcp_udp->mask.dst_port = mask->dst_port_mask;
-
- /* Update priority */
- attr->priority = 0;
-
-create_flow:
-
- errno = 0;
- flow = ibv_exp_create_flow(fdir_queue->qp, attr);
- if (flow == NULL) {
- /* It's not clear whether errno is always set in this case. */
- ERROR("%p: flow director configuration failed, errno=%d: %s",
- (void *)priv, errno,
- (errno ? strerror(errno) : "Unknown error"));
- if (errno)
- return errno;
- return EINVAL;
- }
-
- DEBUG("%p: added flow director rule (%p)", (void *)priv, (void *)flow);
- mlx5_fdir_filter->flow = flow;
- return 0;
-}
-
-/**
- * Destroy a flow director queue.
- *
- * @param fdir_queue
- * Flow director queue to be destroyed.
- */
-void
-priv_fdir_queue_destroy(struct priv *priv, struct fdir_queue *fdir_queue)
-{
- struct mlx5_fdir_filter *fdir_filter;
-
- /* Disable filter flows still applying to this queue. */
- LIST_FOREACH(fdir_filter, priv->fdir_filter_list, next) {
- unsigned int idx = fdir_filter->queue;
- struct rxq_ctrl *rxq_ctrl =
- container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
-
- assert(idx < priv->rxqs_n);
- if (fdir_queue == rxq_ctrl->fdir_queue &&
- fdir_filter->flow != NULL) {
- claim_zero(ibv_exp_destroy_flow(fdir_filter->flow));
- fdir_filter->flow = NULL;
- }
- }
- assert(fdir_queue->qp);
- claim_zero(ibv_destroy_qp(fdir_queue->qp));
- assert(fdir_queue->ind_table);
- claim_zero(ibv_exp_destroy_rwq_ind_table(fdir_queue->ind_table));
- if (fdir_queue->wq)
- claim_zero(ibv_exp_destroy_wq(fdir_queue->wq));
- if (fdir_queue->cq)
- claim_zero(ibv_destroy_cq(fdir_queue->cq));
-#ifndef NDEBUG
- memset(fdir_queue, 0x2a, sizeof(*fdir_queue));
-#endif
- rte_free(fdir_queue);
-}
-
-/**
- * Create a flow director queue.
- *
- * @param priv
- * Private structure.
- * @param wq
- * Work queue to route matched packets to, NULL if one needs to
- * be created.
- *
- * @return
- * Related flow director queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_fdir_queue_create(struct priv *priv, struct ibv_exp_wq *wq,
- unsigned int socket)
-{
- struct fdir_queue *fdir_queue;
-
- fdir_queue = rte_calloc_socket(__func__, 1, sizeof(*fdir_queue),
- 0, socket);
- if (!fdir_queue) {
- ERROR("cannot allocate flow director queue");
- return NULL;
- }
- assert(priv->pd);
- assert(priv->ctx);
- if (!wq) {
- fdir_queue->cq = ibv_exp_create_cq(
- priv->ctx, 1, NULL, NULL, 0,
- &(struct ibv_exp_cq_init_attr){
- .comp_mask = 0,
- });
- if (!fdir_queue->cq) {
- ERROR("cannot create flow director CQ");
- goto error;
- }
- fdir_queue->wq = ibv_exp_create_wq(
- priv->ctx,
- &(struct ibv_exp_wq_init_attr){
- .wq_type = IBV_EXP_WQT_RQ,
- .max_recv_wr = 1,
- .max_recv_sge = 1,
- .pd = priv->pd,
- .cq = fdir_queue->cq,
- });
- if (!fdir_queue->wq) {
- ERROR("cannot create flow director WQ");
- goto error;
- }
- wq = fdir_queue->wq;
- }
- fdir_queue->ind_table = ibv_exp_create_rwq_ind_table(
- priv->ctx,
- &(struct ibv_exp_rwq_ind_table_init_attr){
- .pd = priv->pd,
- .log_ind_tbl_size = 0,
- .ind_tbl = &wq,
- .comp_mask = 0,
- });
- if (!fdir_queue->ind_table) {
- ERROR("cannot create flow director indirection table");
- goto error;
- }
- fdir_queue->qp = ibv_exp_create_qp(
- priv->ctx,
- &(struct ibv_exp_qp_init_attr){
- .qp_type = IBV_QPT_RAW_PACKET,
- .comp_mask =
- IBV_EXP_QP_INIT_ATTR_PD |
- IBV_EXP_QP_INIT_ATTR_PORT |
- IBV_EXP_QP_INIT_ATTR_RX_HASH,
- .pd = priv->pd,
- .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
- .rx_hash_function =
- IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
- .rx_hash_key_len = rss_hash_default_key_len,
- .rx_hash_key = rss_hash_default_key,
- .rx_hash_fields_mask = 0,
- .rwq_ind_tbl = fdir_queue->ind_table,
- },
- .port_num = priv->port,
- });
- if (!fdir_queue->qp) {
- ERROR("cannot create flow director hash RX QP");
- goto error;
- }
- return fdir_queue;
-error:
- assert(fdir_queue);
- assert(!fdir_queue->qp);
- if (fdir_queue->ind_table)
- claim_zero(ibv_exp_destroy_rwq_ind_table
- (fdir_queue->ind_table));
- if (fdir_queue->wq)
- claim_zero(ibv_exp_destroy_wq(fdir_queue->wq));
- if (fdir_queue->cq)
- claim_zero(ibv_destroy_cq(fdir_queue->cq));
- rte_free(fdir_queue);
- return NULL;
-}
-
-/**
- * Get flow director queue for a specific RX queue, create it in case
- * it does not exist.
- *
- * @param priv
- * Private structure.
- * @param idx
- * RX queue index.
- *
- * @return
- * Related flow director queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_get_fdir_queue(struct priv *priv, uint16_t idx)
-{
- struct rxq_ctrl *rxq_ctrl =
- container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq);
- struct fdir_queue *fdir_queue = rxq_ctrl->fdir_queue;
-
- assert(rxq_ctrl->wq);
- if (fdir_queue == NULL) {
- fdir_queue = priv_fdir_queue_create(priv, rxq_ctrl->wq,
- rxq_ctrl->socket);
- rxq_ctrl->fdir_queue = fdir_queue;
- }
- return fdir_queue;
-}
-
-/**
- * Get or flow director drop queue. Create it if it does not exist.
- *
- * @param priv
- * Private structure.
- *
- * @return
- * Flow director drop queue on success, NULL otherwise.
- */
-static struct fdir_queue *
-priv_get_fdir_drop_queue(struct priv *priv)
-{
- struct fdir_queue *fdir_queue = priv->fdir_drop_queue;
-
- if (fdir_queue == NULL) {
- unsigned int socket = SOCKET_ID_ANY;
-
- /* Select a known NUMA socket if possible. */
- if (priv->rxqs_n && (*priv->rxqs)[0])
- socket = container_of((*priv->rxqs)[0],
- struct rxq_ctrl, rxq)->socket;
- fdir_queue = priv_fdir_queue_create(priv, NULL, socket);
- priv->fdir_drop_queue = fdir_queue;
- }
- return fdir_queue;
-}
-
-/**
- * Enable flow director filter and create steering rules.
- *
- * @param priv
- * Private structure.
- * @param mlx5_fdir_filter
- * Filter to create steering rule for.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_enable(struct priv *priv,
- struct mlx5_fdir_filter *mlx5_fdir_filter)
-{
- struct fdir_queue *fdir_queue;
-
- /* Check if flow already exists. */
- if (mlx5_fdir_filter->flow != NULL)
- return 0;
-
- /* Get fdir_queue for specific queue. */
- if (mlx5_fdir_filter->behavior == RTE_ETH_FDIR_REJECT)
- fdir_queue = priv_get_fdir_drop_queue(priv);
- else
- fdir_queue = priv_get_fdir_queue(priv,
- mlx5_fdir_filter->queue);
-
- if (fdir_queue == NULL) {
- ERROR("failed to create flow director rxq for queue %d",
- mlx5_fdir_filter->queue);
- return EINVAL;
- }
-
- /* Create flow */
- return priv_fdir_flow_add(priv, mlx5_fdir_filter, fdir_queue);
-}
-
-/**
- * Initialize flow director filters list.
- *
- * @param priv
- * Private structure.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-int
-fdir_init_filters_list(struct priv *priv)
-{
- /* Filter list initialization should be done only once. */
- if (priv->fdir_filter_list)
- return 0;
-
- /* Create filters list. */
- priv->fdir_filter_list =
- rte_calloc(__func__, 1, sizeof(*priv->fdir_filter_list), 0);
-
- if (priv->fdir_filter_list == NULL) {
- int err = ENOMEM;
-
- ERROR("cannot allocate flow director filter list: %s",
- strerror(err));
- return err;
- }
-
- LIST_INIT(priv->fdir_filter_list);
-
- return 0;
-}
-
-/**
- * Flush all filters.
- *
- * @param priv
- * Private structure.
- */
-static void
-priv_fdir_filter_flush(struct priv *priv)
-{
- struct mlx5_fdir_filter *mlx5_fdir_filter;
-
- while ((mlx5_fdir_filter = LIST_FIRST(priv->fdir_filter_list))) {
- struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
-
- DEBUG("%p: flushing flow director filter %p",
- (void *)priv, (void *)mlx5_fdir_filter);
- LIST_REMOVE(mlx5_fdir_filter, next);
- if (flow != NULL)
- claim_zero(ibv_exp_destroy_flow(flow));
- rte_free(mlx5_fdir_filter);
- }
-}
-
-/**
- * Remove all flow director filters and delete list.
- *
- * @param priv
- * Private structure.
- */
-void
-priv_fdir_delete_filters_list(struct priv *priv)
-{
- priv_fdir_filter_flush(priv);
- rte_free(priv->fdir_filter_list);
- priv->fdir_filter_list = NULL;
-}
-
-/**
- * Disable flow director, remove all steering rules.
- *
- * @param priv
- * Private structure.
- */
-void
-priv_fdir_disable(struct priv *priv)
-{
- unsigned int i;
- struct mlx5_fdir_filter *mlx5_fdir_filter;
-
- /* Run on every flow director filter and destroy flow handle. */
- LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
- struct ibv_exp_flow *flow;
-
- /* Only valid elements should be in the list */
- assert(mlx5_fdir_filter != NULL);
- flow = mlx5_fdir_filter->flow;
-
- /* Destroy flow handle */
- if (flow != NULL) {
- claim_zero(ibv_exp_destroy_flow(flow));
- mlx5_fdir_filter->flow = NULL;
- }
- }
-
- /* Destroy flow director context in each RX queue. */
- for (i = 0; (i != priv->rxqs_n); i++) {
- struct rxq_ctrl *rxq_ctrl;
-
- if (!(*priv->rxqs)[i])
- continue;
- rxq_ctrl = container_of((*priv->rxqs)[i], struct rxq_ctrl, rxq);
- if (!rxq_ctrl->fdir_queue)
- continue;
- priv_fdir_queue_destroy(priv, rxq_ctrl->fdir_queue);
- rxq_ctrl->fdir_queue = NULL;
- }
- if (priv->fdir_drop_queue) {
- priv_fdir_queue_destroy(priv, priv->fdir_drop_queue);
- priv->fdir_drop_queue = NULL;
- }
-}
-
-/**
- * Enable flow director, create steering rules.
- *
- * @param priv
- * Private structure.
- */
-void
-priv_fdir_enable(struct priv *priv)
-{
- struct mlx5_fdir_filter *mlx5_fdir_filter;
-
- /* Run on every fdir filter and create flow handle */
- LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
- /* Only valid elements should be in the list */
- assert(mlx5_fdir_filter != NULL);
-
- priv_fdir_filter_enable(priv, mlx5_fdir_filter);
- }
-}
-
-/**
- * Find specific filter in list.
- *
- * @param priv
- * Private structure.
- * @param fdir_filter
- * Flow director filter to find.
- *
- * @return
- * Filter element if found, otherwise NULL.
- */
-static struct mlx5_fdir_filter *
-priv_find_filter_in_list(struct priv *priv,
- const struct rte_eth_fdir_filter *fdir_filter)
-{
- struct fdir_flow_desc desc;
- struct mlx5_fdir_filter *mlx5_fdir_filter;
- enum rte_fdir_mode fdir_mode = priv->dev->data->dev_conf.fdir_conf.mode;
-
- /* Get flow director filter to look for. */
- fdir_filter_to_flow_desc(fdir_filter, &desc, fdir_mode);
-
- /* Look for the requested element. */
- LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) {
- /* Only valid elements should be in the list. */
- assert(mlx5_fdir_filter != NULL);
-
- /* Return matching filter. */
- if (!memcmp(&desc, &mlx5_fdir_filter->desc, sizeof(desc)))
- return mlx5_fdir_filter;
- }
-
- /* Filter not found */
- return NULL;
-}
-
-/**
- * Add new flow director filter and store it in list.
- *
- * @param priv
- * Private structure.
- * @param fdir_filter
- * Flow director filter to add.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_add(struct priv *priv,
- const struct rte_eth_fdir_filter *fdir_filter)
-{
- struct mlx5_fdir_filter *mlx5_fdir_filter;
- enum rte_fdir_mode fdir_mode = priv->dev->data->dev_conf.fdir_conf.mode;
- int err = 0;
-
- /* Validate queue number. */
- if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
- ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
- return EINVAL;
- }
-
- /* Duplicate filters are currently unsupported. */
- mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
- if (mlx5_fdir_filter != NULL) {
- ERROR("filter already exists");
- return EINVAL;
- }
-
- /* Create new flow director filter. */
- mlx5_fdir_filter =
- rte_calloc(__func__, 1, sizeof(*mlx5_fdir_filter), 0);
- if (mlx5_fdir_filter == NULL) {
- err = ENOMEM;
- ERROR("cannot allocate flow director filter: %s",
- strerror(err));
- return err;
- }
-
- /* Set action parameters. */
- mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
- mlx5_fdir_filter->behavior = fdir_filter->action.behavior;
-
- /* Convert to mlx5 filter descriptor. */
- fdir_filter_to_flow_desc(fdir_filter,
- &mlx5_fdir_filter->desc, fdir_mode);
-
- /* Insert new filter into list. */
- LIST_INSERT_HEAD(priv->fdir_filter_list, mlx5_fdir_filter, next);
-
- DEBUG("%p: flow director filter %p added",
- (void *)priv, (void *)mlx5_fdir_filter);
-
- /* Enable filter immediately if device is started. */
- if (priv->started)
- err = priv_fdir_filter_enable(priv, mlx5_fdir_filter);
-
- return err;
-}
-
-/**
- * Update queue for specific filter.
- *
- * @param priv
- * Private structure.
- * @param fdir_filter
- * Filter to be updated.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_update(struct priv *priv,
- const struct rte_eth_fdir_filter *fdir_filter)
-{
- struct mlx5_fdir_filter *mlx5_fdir_filter;
-
- /* Validate queue number. */
- if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
- ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
- return EINVAL;
- }
-
- mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
- if (mlx5_fdir_filter != NULL) {
- struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
- int err = 0;
-
- /* Update queue number. */
- mlx5_fdir_filter->queue = fdir_filter->action.rx_queue;
-
- /* Destroy flow handle. */
- if (flow != NULL) {
- claim_zero(ibv_exp_destroy_flow(flow));
- mlx5_fdir_filter->flow = NULL;
- }
- DEBUG("%p: flow director filter %p updated",
- (void *)priv, (void *)mlx5_fdir_filter);
-
- /* Enable filter if device is started. */
- if (priv->started)
- err = priv_fdir_filter_enable(priv, mlx5_fdir_filter);
-
- return err;
- }
-
- /* Filter not found, create it. */
- DEBUG("%p: filter not found for update, creating new filter",
- (void *)priv);
- return priv_fdir_filter_add(priv, fdir_filter);
-}
-
-/**
- * Delete specific filter.
- *
- * @param priv
- * Private structure.
- * @param fdir_filter
- * Filter to be deleted.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-static int
-priv_fdir_filter_delete(struct priv *priv,
- const struct rte_eth_fdir_filter *fdir_filter)
-{
- struct mlx5_fdir_filter *mlx5_fdir_filter;
-
- mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter);
- if (mlx5_fdir_filter != NULL) {
- struct ibv_exp_flow *flow = mlx5_fdir_filter->flow;
-
- /* Remove element from list. */
- LIST_REMOVE(mlx5_fdir_filter, next);
-
- /* Destroy flow handle. */
- if (flow != NULL) {
- claim_zero(ibv_exp_destroy_flow(flow));
- mlx5_fdir_filter->flow = NULL;
- }
-
- DEBUG("%p: flow director filter %p deleted",
- (void *)priv, (void *)mlx5_fdir_filter);
-
- /* Delete filter. */
- rte_free(mlx5_fdir_filter);
-
- return 0;
- }
-
- ERROR("%p: flow director delete failed, cannot find filter",
- (void *)priv);
- return EINVAL;
-}
-
-/**
- * Get flow director information.
- *
- * @param priv
- * Private structure.
- * @param[out] fdir_info
- * Resulting flow director information.
- */
-static void
-priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
-{
- struct rte_eth_fdir_masks *mask =
- &priv->dev->data->dev_conf.fdir_conf.mask;
-
- fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
- fdir_info->guarant_spc = 0;
-
- rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
-
- fdir_info->max_flexpayload = 0;
- fdir_info->flow_types_mask[0] = 0;
-
- fdir_info->flex_payload_unit = 0;
- fdir_info->max_flex_payload_segment_num = 0;
- fdir_info->flex_payload_limit = 0;
- memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
-}
-
-/**
- * Deal with flow director operations.
- *
- * @param priv
- * Pointer to private structure.
- * @param filter_op
- * Operation to perform.
- * @param arg
- * Pointer to operation-specific structure.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-static int
-priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
-{
- enum rte_fdir_mode fdir_mode =
- priv->dev->data->dev_conf.fdir_conf.mode;
- int ret = 0;
-
- if (filter_op == RTE_ETH_FILTER_NOP)
- return 0;
-
- if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
- fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
- ERROR("%p: flow director mode %d not supported",
- (void *)priv, fdir_mode);
- return EINVAL;
- }
-
- switch (filter_op) {
- case RTE_ETH_FILTER_ADD:
- ret = priv_fdir_filter_add(priv, arg);
- break;
- case RTE_ETH_FILTER_UPDATE:
- ret = priv_fdir_filter_update(priv, arg);
- break;
- case RTE_ETH_FILTER_DELETE:
- ret = priv_fdir_filter_delete(priv, arg);
- break;
- case RTE_ETH_FILTER_FLUSH:
- priv_fdir_filter_flush(priv);
- break;
- case RTE_ETH_FILTER_INFO:
- priv_fdir_info_get(priv, arg);
- break;
- default:
- DEBUG("%p: unknown operation %u", (void *)priv, filter_op);
- ret = EINVAL;
- break;
- }
- return ret;
-}
-
-static const struct rte_flow_ops mlx5_flow_ops = {
- .validate = mlx5_flow_validate,
- .create = mlx5_flow_create,
- .destroy = mlx5_flow_destroy,
- .flush = mlx5_flow_flush,
- .query = NULL,
- .isolate = mlx5_flow_isolate,
-};
-
-/**
- * Manage filter operations.
- *
- * @param dev
- * Pointer to Ethernet device structure.
- * @param filter_type
- * Filter type.
- * @param filter_op
- * Operation to perform.
- * @param arg
- * Pointer to operation-specific structure.
- *
- * @return
- * 0 on success, negative errno value on failure.
- */
-int
-mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
- enum rte_filter_type filter_type,
- enum rte_filter_op filter_op,
- void *arg)
-{
- int ret = EINVAL;
- struct priv *priv = dev->data->dev_private;
-
- switch (filter_type) {
- case RTE_ETH_FILTER_GENERIC:
- if (filter_op != RTE_ETH_FILTER_GET)
- return -EINVAL;
- *(const void **)arg = &mlx5_flow_ops;
- return 0;
- case RTE_ETH_FILTER_FDIR:
- priv_lock(priv);
- ret = priv_fdir_ctrl_func(priv, filter_op, arg);
- priv_unlock(priv);
- break;
- default:
- ERROR("%p: filter type (%d) not supported",
- (void *)dev, filter_type);
- break;
- }
-
- return -ret;
-}
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 86be9291..cd99cb07 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -52,13 +52,36 @@
#include "mlx5.h"
#include "mlx5_prm.h"
-/* Number of Work Queue necessary for the DROP queue. */
-#ifndef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
-#define MLX5_DROP_WQ_N 4
-#else
-#define MLX5_DROP_WQ_N 1
+/* Define minimal priority for control plane flows. */
+#define MLX5_CTRL_FLOW_PRIORITY 4
+
+/* Internet Protocol versions. */
+#define MLX5_IPV4 4
+#define MLX5_IPV6 6
+
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+struct ibv_counter_set_init_attr {
+ int dummy;
+};
+struct ibv_flow_spec_counter_action {
+ int dummy;
+};
+struct ibv_counter_set {
+ int dummy;
+};
+
+static inline int
+ibv_destroy_counter_set(struct ibv_counter_set *cs)
+{
+ (void)cs;
+ return -ENOTSUP;
+}
#endif
+/* Dev ops structure defined in mlx5.c */
+extern const struct eth_dev_ops mlx5_dev_ops;
+extern const struct eth_dev_ops mlx5_dev_ops_isolate;
+
static int
mlx5_flow_create_eth(const struct rte_flow_item *item,
const void *default_mask,
@@ -94,19 +117,144 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
const void *default_mask,
void *data);
-struct rte_flow {
- TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
- struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
- struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
+struct mlx5_flow_parse;
+
+static void
+mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
+ unsigned int size);
+
+static int
+mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
+
+static int
+mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
+
+/* Hash RX queue types. */
+enum hash_rxq_type {
+ HASH_RXQ_TCPV4,
+ HASH_RXQ_UDPV4,
+ HASH_RXQ_IPV4,
+ HASH_RXQ_TCPV6,
+ HASH_RXQ_UDPV6,
+ HASH_RXQ_IPV6,
+ HASH_RXQ_ETH,
+};
+
+/* Initialization data for hash RX queue. */
+struct hash_rxq_init {
+ uint64_t hash_fields; /* Fields that participate in the hash. */
+ uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
+ unsigned int flow_priority; /* Flow priority to use. */
+ unsigned int ip_version; /* Internet protocol. */
+};
+
+/* Initialization data for hash RX queues. */
+const struct hash_rxq_init hash_rxq_init[] = {
+ [HASH_RXQ_TCPV4] = {
+ .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
+ IBV_RX_HASH_DST_IPV4 |
+ IBV_RX_HASH_SRC_PORT_TCP |
+ IBV_RX_HASH_DST_PORT_TCP),
+ .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
+ .flow_priority = 0,
+ .ip_version = MLX5_IPV4,
+ },
+ [HASH_RXQ_UDPV4] = {
+ .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
+ IBV_RX_HASH_DST_IPV4 |
+ IBV_RX_HASH_SRC_PORT_UDP |
+ IBV_RX_HASH_DST_PORT_UDP),
+ .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
+ .flow_priority = 0,
+ .ip_version = MLX5_IPV4,
+ },
+ [HASH_RXQ_IPV4] = {
+ .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
+ IBV_RX_HASH_DST_IPV4),
+ .dpdk_rss_hf = (ETH_RSS_IPV4 |
+ ETH_RSS_FRAG_IPV4),
+ .flow_priority = 1,
+ .ip_version = MLX5_IPV4,
+ },
+ [HASH_RXQ_TCPV6] = {
+ .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
+ IBV_RX_HASH_DST_IPV6 |
+ IBV_RX_HASH_SRC_PORT_TCP |
+ IBV_RX_HASH_DST_PORT_TCP),
+ .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
+ .flow_priority = 0,
+ .ip_version = MLX5_IPV6,
+ },
+ [HASH_RXQ_UDPV6] = {
+ .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
+ IBV_RX_HASH_DST_IPV6 |
+ IBV_RX_HASH_SRC_PORT_UDP |
+ IBV_RX_HASH_DST_PORT_UDP),
+ .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
+ .flow_priority = 0,
+ .ip_version = MLX5_IPV6,
+ },
+ [HASH_RXQ_IPV6] = {
+ .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
+ IBV_RX_HASH_DST_IPV6),
+ .dpdk_rss_hf = (ETH_RSS_IPV6 |
+ ETH_RSS_FRAG_IPV6),
+ .flow_priority = 1,
+ .ip_version = MLX5_IPV6,
+ },
+ [HASH_RXQ_ETH] = {
+ .hash_fields = 0,
+ .dpdk_rss_hf = 0,
+ .flow_priority = 2,
+ },
+};
+
+/* Number of entries in hash_rxq_init[]. */
+const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
+
+/** Structure for holding counter stats. */
+struct mlx5_flow_counter_stats {
+ uint64_t hits; /**< Number of packets matched by the rule. */
+ uint64_t bytes; /**< Number of bytes matched by the rule. */
+};
+
+/** Structure for Drop queue. */
+struct mlx5_hrxq_drop {
+ struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
struct ibv_qp *qp; /**< Verbs queue pair. */
- struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
- struct ibv_exp_wq *wq; /**< Verbs work queue. */
+ struct ibv_wq *wq; /**< Verbs work queue. */
struct ibv_cq *cq; /**< Verbs completion queue. */
- uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
+};
+
+/* Flows structures. */
+struct mlx5_flow {
+ uint64_t hash_fields; /**< Fields that participate in the hash. */
+ struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+ struct ibv_flow *ibv_flow; /**< Verbs flow. */
+ struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
+};
+
+/* Drop flows structures. */
+struct mlx5_flow_drop {
+ struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
+ struct ibv_flow *ibv_flow; /**< Verbs flow. */
+};
+
+struct rte_flow {
+ TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
uint32_t mark:1; /**< Set if the flow is marked. */
uint32_t drop:1; /**< Drop queue. */
- uint64_t hash_fields; /**< Fields that participate in the hash. */
- struct rxq *rxqs[]; /**< Pointer to the queues array. */
+ uint16_t queues_n; /**< Number of entries in queue[]. */
+ uint16_t (*queues)[]; /**< Queues indexes to use. */
+ struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
+ uint8_t rss_key[40]; /**< copy of the RSS key. */
+ struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
+ struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
+ union {
+ struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
+ /**< Flow with Rx queue. */
+ struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
+ };
};
/** Static initializer for items. */
@@ -157,6 +305,9 @@ static const enum rte_flow_action_type valid_actions[] = {
RTE_FLOW_ACTION_TYPE_QUEUE,
RTE_FLOW_ACTION_TYPE_MARK,
RTE_FLOW_ACTION_TYPE_FLAG,
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+ RTE_FLOW_ACTION_TYPE_COUNT,
+#endif
RTE_FLOW_ACTION_TYPE_END,
};
@@ -179,7 +330,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
.default_mask = &rte_flow_item_eth_mask,
.mask_sz = sizeof(struct rte_flow_item_eth),
.convert = mlx5_flow_create_eth,
- .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
+ .dst_sz = sizeof(struct ibv_flow_spec_eth),
},
[RTE_FLOW_ITEM_TYPE_VLAN] = {
.items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
@@ -208,7 +359,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
.default_mask = &rte_flow_item_ipv4_mask,
.mask_sz = sizeof(struct rte_flow_item_ipv4),
.convert = mlx5_flow_create_ipv4,
- .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
+ .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
},
[RTE_FLOW_ITEM_TYPE_IPV6] = {
.items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
@@ -236,7 +387,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
.default_mask = &rte_flow_item_ipv6_mask,
.mask_sz = sizeof(struct rte_flow_item_ipv6),
.convert = mlx5_flow_create_ipv6,
- .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
+ .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
},
[RTE_FLOW_ITEM_TYPE_UDP] = {
.items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
@@ -250,7 +401,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
.default_mask = &rte_flow_item_udp_mask,
.mask_sz = sizeof(struct rte_flow_item_udp),
.convert = mlx5_flow_create_udp,
- .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
+ .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
},
[RTE_FLOW_ITEM_TYPE_TCP] = {
.actions = valid_actions,
@@ -263,7 +414,7 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
.default_mask = &rte_flow_item_tcp_mask,
.mask_sz = sizeof(struct rte_flow_item_tcp),
.convert = mlx5_flow_create_tcp,
- .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
+ .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
},
[RTE_FLOW_ITEM_TYPE_VXLAN] = {
.items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
@@ -274,33 +425,76 @@ static const struct mlx5_flow_items mlx5_flow_items[] = {
.default_mask = &rte_flow_item_vxlan_mask,
.mask_sz = sizeof(struct rte_flow_item_vxlan),
.convert = mlx5_flow_create_vxlan,
- .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
+ .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
},
};
/** Structure to pass to the conversion function. */
-struct mlx5_flow {
- struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
- unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
+struct mlx5_flow_parse {
uint32_t inner; /**< Set once VXLAN is encountered. */
- uint64_t hash_fields; /**< Fields that participate in the hash. */
-};
-
-/** Structure for Drop queue. */
-struct rte_flow_drop {
- struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
- struct ibv_qp *qp; /**< Verbs queue pair. */
- struct ibv_exp_wq *wqs[MLX5_DROP_WQ_N]; /**< Verbs work queue. */
- struct ibv_cq *cq; /**< Verbs completion queue. */
-};
-
-struct mlx5_flow_action {
- uint32_t queue:1; /**< Target is a receive queue. */
+ uint32_t create:1;
+ /**< Whether resources should remain after a validate. */
uint32_t drop:1; /**< Target is a drop queue. */
uint32_t mark:1; /**< Mark is present in the flow. */
+ uint32_t count:1; /**< Count is present in the flow. */
uint32_t mark_id; /**< Mark identifier. */
uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
uint16_t queues_n; /**< Number of entries in queue[]. */
+ struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
+ uint8_t rss_key[40]; /**< copy of the RSS key. */
+ enum hash_rxq_type layer; /**< Last pattern layer detected. */
+ struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
+ union {
+ struct {
+ struct ibv_flow_attr *ibv_attr;
+ /**< Pointer to Verbs attributes. */
+ unsigned int offset;
+ /**< Current position or total size of the attribute. */
+ } queue[RTE_DIM(hash_rxq_init)];
+ struct {
+ struct ibv_flow_attr *ibv_attr;
+ /**< Pointer to Verbs attributes. */
+ unsigned int offset;
+ /**< Current position or total size of the attribute. */
+ } drop_q;
+ };
+};
+
+static const struct rte_flow_ops mlx5_flow_ops = {
+ .validate = mlx5_flow_validate,
+ .create = mlx5_flow_create,
+ .destroy = mlx5_flow_destroy,
+ .flush = mlx5_flow_flush,
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+ .query = mlx5_flow_query,
+#else
+ .query = NULL,
+#endif
+ .isolate = mlx5_flow_isolate,
+};
+
+/* Convert FDIR request to Generic flow. */
+struct mlx5_fdir {
+ struct rte_flow_attr attr;
+ struct rte_flow_action actions[2];
+ struct rte_flow_item items[4];
+ struct rte_flow_item_eth l2;
+ struct rte_flow_item_eth l2_mask;
+ union {
+ struct rte_flow_item_ipv4 ipv4;
+ struct rte_flow_item_ipv6 ipv6;
+ } l3;
+ union {
+ struct rte_flow_item_udp udp;
+ struct rte_flow_item_tcp tcp;
+ } l4;
+ struct rte_flow_action_queue queue;
+};
+
+/* Verbs specification header. */
+struct ibv_spec_header {
+ enum ibv_flow_spec_type type;
+ uint16_t size;
};
/**
@@ -367,38 +561,58 @@ mlx5_flow_item_validate(const struct rte_flow_item *item,
}
/**
- * Validate a flow supported by the NIC.
+ * Copy the RSS configuration from the user ones.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param parser
+ * Internal parser structure.
+ * @param rss_conf
+ * User RSS configuration to save.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+static int
+priv_flow_convert_rss_conf(struct priv *priv,
+ struct mlx5_flow_parse *parser,
+ const struct rte_eth_rss_conf *rss_conf)
+{
+ const struct rte_eth_rss_conf *rss =
+ rss_conf ? rss_conf : &priv->rss_conf;
+
+ if (rss->rss_key_len > 40)
+ return EINVAL;
+ parser->rss_conf.rss_key_len = rss->rss_key_len;
+ parser->rss_conf.rss_hf = rss->rss_hf;
+ memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
+ parser->rss_conf.rss_key = parser->rss_key;
+ return 0;
+}
+
+/**
+ * Extract attribute to the parser.
*
* @param priv
* Pointer to private structure.
* @param[in] attr
* Flow rule attributes.
- * @param[in] pattern
- * Pattern specification (list terminated by the END pattern item).
- * @param[in] actions
- * Associated actions (list terminated by the END action).
* @param[out] error
* Perform verbose error reporting if not NULL.
- * @param[in, out] flow
- * Flow structure to update.
- * @param[in, out] action
- * Action structure to update.
+ * @param[in, out] parser
+ * Internal parser structure.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
-priv_flow_validate(struct priv *priv,
- const struct rte_flow_attr *attr,
- const struct rte_flow_item items[],
- const struct rte_flow_action actions[],
- struct rte_flow_error *error,
- struct mlx5_flow *flow,
- struct mlx5_flow_action *action)
+priv_flow_convert_attributes(struct priv *priv,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error,
+ struct mlx5_flow_parse *parser)
{
- const struct mlx5_flow_items *cur_item = mlx5_flow_items;
-
(void)priv;
+ (void)parser;
if (attr->group) {
rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
@@ -406,7 +620,7 @@ priv_flow_validate(struct priv *priv,
"groups are not supported");
return -rte_errno;
}
- if (attr->priority) {
+ if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
NULL,
@@ -427,56 +641,42 @@ priv_flow_validate(struct priv *priv,
"only ingress is supported");
return -rte_errno;
}
- for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
- const struct mlx5_flow_items *token = NULL;
- unsigned int i;
- int err;
+ return 0;
+}
- if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
- continue;
- for (i = 0;
- cur_item->items &&
- cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
- ++i) {
- if (cur_item->items[i] == items->type) {
- token = &mlx5_flow_items[items->type];
- break;
- }
- }
- if (!token)
- goto exit_item_not_supported;
- cur_item = token;
- err = mlx5_flow_item_validate(items,
- (const uint8_t *)cur_item->mask,
- cur_item->mask_sz);
- if (err)
- goto exit_item_not_supported;
- if (flow->ibv_attr && cur_item->convert) {
- err = cur_item->convert(items,
- (cur_item->default_mask ?
- cur_item->default_mask :
- cur_item->mask),
- flow);
- if (err)
- goto exit_item_not_supported;
- } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
- if (flow->inner) {
- rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- items,
- "cannot recognize multiple"
- " VXLAN encapsulations");
- return -rte_errno;
- }
- flow->inner = 1;
- }
- flow->offset += cur_item->dst_sz;
- }
+/**
+ * Extract actions request to the parser.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ * @param[in, out] parser
+ * Internal parser structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_convert_actions(struct priv *priv,
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error,
+ struct mlx5_flow_parse *parser)
+{
+ /*
+ * Add default RSS configuration necessary for Verbs to create QP even
+ * if no RSS is necessary.
+ */
+ priv_flow_convert_rss_conf(priv, parser,
+ (const struct rte_eth_rss_conf *)
+ &priv->rss_conf);
for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
continue;
} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
- action->drop = 1;
+ parser->drop = 1;
} else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
const struct rte_flow_action_queue *queue =
(const struct rte_flow_action_queue *)
@@ -486,13 +686,13 @@ priv_flow_validate(struct priv *priv,
if (!queue || (queue->index > (priv->rxqs_n - 1)))
goto exit_action_not_supported;
- for (n = 0; n < action->queues_n; ++n) {
- if (action->queues[n] == queue->index) {
+ for (n = 0; n < parser->queues_n; ++n) {
+ if (parser->queues[n] == queue->index) {
found = 1;
break;
}
}
- if (action->queues_n > 1 && !found) {
+ if (parser->queues_n > 1 && !found) {
rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ACTION,
actions,
@@ -500,9 +700,8 @@ priv_flow_validate(struct priv *priv,
return -rte_errno;
}
if (!found) {
- action->queue = 1;
- action->queues_n = 1;
- action->queues[0] = queue->index;
+ parser->queues_n = 1;
+ parser->queues[0] = queue->index;
}
} else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
const struct rte_flow_action_rss *rss =
@@ -517,12 +716,12 @@ priv_flow_validate(struct priv *priv,
"no valid queues");
return -rte_errno;
}
- if (action->queues_n == 1) {
+ if (parser->queues_n == 1) {
uint16_t found = 0;
- assert(action->queues_n);
+ assert(parser->queues_n);
for (n = 0; n < rss->num; ++n) {
- if (action->queues[0] ==
+ if (parser->queues[0] ==
rss->queue[n]) {
found = 1;
break;
@@ -547,10 +746,17 @@ priv_flow_validate(struct priv *priv,
return -rte_errno;
}
}
- action->queue = 1;
for (n = 0; n < rss->num; ++n)
- action->queues[n] = rss->queue[n];
- action->queues_n = rss->num;
+ parser->queues[n] = rss->queue[n];
+ parser->queues_n = rss->num;
+ if (priv_flow_convert_rss_conf(priv, parser,
+ rss->rss_conf)) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "wrong RSS configuration");
+ return -rte_errno;
+ }
} else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
const struct rte_flow_action_mark *mark =
(const struct rte_flow_action_mark *)
@@ -570,30 +776,25 @@ priv_flow_validate(struct priv *priv,
" and 16777199");
return -rte_errno;
}
- action->mark = 1;
- action->mark_id = mark->id;
+ parser->mark = 1;
+ parser->mark_id = mark->id;
} else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
- action->mark = 1;
+ parser->mark = 1;
+ } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
+ priv->counter_set_supported) {
+ parser->count = 1;
} else {
goto exit_action_not_supported;
}
}
- if (action->mark && !flow->ibv_attr && !action->drop)
- flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
-#ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
- if (!flow->ibv_attr && action->drop)
- flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
-#endif
- if (!action->queue && !action->drop) {
+ if (parser->drop && parser->mark)
+ parser->mark = 0;
+ if (!parser->queues_n && !parser->drop) {
rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
NULL, "no valid action");
return -rte_errno;
}
return 0;
-exit_item_not_supported:
- rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
- items, "item not supported");
- return -rte_errno;
exit_action_not_supported:
rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
actions, "action not supported");
@@ -601,34 +802,467 @@ exit_action_not_supported:
}
/**
- * Validate a flow supported by the NIC.
+ * Validate items.
*
- * @see rte_flow_validate()
- * @see rte_flow_ops
+ * @param priv
+ * Pointer to private structure.
+ * @param[in] items
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ * @param[in, out] parser
+ * Internal parser structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-int
-mlx5_flow_validate(struct rte_eth_dev *dev,
- const struct rte_flow_attr *attr,
- const struct rte_flow_item items[],
- const struct rte_flow_action actions[],
- struct rte_flow_error *error)
+static int
+priv_flow_convert_items_validate(struct priv *priv,
+ const struct rte_flow_item items[],
+ struct rte_flow_error *error,
+ struct mlx5_flow_parse *parser)
{
- struct priv *priv = dev->data->dev_private;
+ const struct mlx5_flow_items *cur_item = mlx5_flow_items;
+ unsigned int i;
+
+ (void)priv;
+ /* Initialise the offsets to start after verbs attribute. */
+ if (parser->drop) {
+ parser->drop_q.offset = sizeof(struct ibv_flow_attr);
+ } else {
+ for (i = 0; i != hash_rxq_init_n; ++i)
+ parser->queue[i].offset = sizeof(struct ibv_flow_attr);
+ }
+ for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+ const struct mlx5_flow_items *token = NULL;
+ unsigned int n;
+ int err;
+
+ if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+ continue;
+ for (i = 0;
+ cur_item->items &&
+ cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
+ ++i) {
+ if (cur_item->items[i] == items->type) {
+ token = &mlx5_flow_items[items->type];
+ break;
+ }
+ }
+ if (!token)
+ goto exit_item_not_supported;
+ cur_item = token;
+ err = mlx5_flow_item_validate(items,
+ (const uint8_t *)cur_item->mask,
+ cur_item->mask_sz);
+ if (err)
+ goto exit_item_not_supported;
+ if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
+ if (parser->inner) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ items,
+ "cannot recognize multiple"
+ " VXLAN encapsulations");
+ return -rte_errno;
+ }
+ parser->inner = IBV_FLOW_SPEC_INNER;
+ }
+ if (parser->drop) {
+ parser->drop_q.offset += cur_item->dst_sz;
+ } else if (parser->queues_n == 1) {
+ parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
+ } else {
+ for (n = 0; n != hash_rxq_init_n; ++n)
+ parser->queue[n].offset += cur_item->dst_sz;
+ }
+ }
+ if (parser->mark) {
+ for (i = 0; i != hash_rxq_init_n; ++i)
+ parser->queue[i].offset +=
+ sizeof(struct ibv_flow_spec_action_tag);
+ }
+ if (parser->count) {
+ unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
+
+ if (parser->drop) {
+ parser->drop_q.offset += size;
+ } else {
+ for (i = 0; i != hash_rxq_init_n; ++i)
+ parser->queue[i].offset += size;
+ }
+ }
+ return 0;
+exit_item_not_supported:
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+ items, "item not supported");
+ return -rte_errno;
+}
+
+/**
+ * Allocate memory space to store verbs flow attributes.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in] priority
+ * Flow priority.
+ * @param[in] size
+ * Amount of byte to allocate.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * A verbs flow attribute on success, NULL otherwise.
+ */
+static struct ibv_flow_attr*
+priv_flow_convert_allocate(struct priv *priv,
+ unsigned int priority,
+ unsigned int size,
+ struct rte_flow_error *error)
+{
+ struct ibv_flow_attr *ibv_attr;
+
+ (void)priv;
+ ibv_attr = rte_calloc(__func__, 1, size, 0);
+ if (!ibv_attr) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "cannot allocate verbs spec attributes.");
+ return NULL;
+ }
+ ibv_attr->priority = priority;
+ return ibv_attr;
+}
+
+/**
+ * Finalise verbs flow attributes.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in, out] parser
+ * Internal parser structure.
+ */
+static void
+priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
+{
+ const unsigned int ipv4 =
+ hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
+ const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
+ const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
+ const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
+ const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
+ const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
+ unsigned int i;
+
+ (void)priv;
+ if (parser->layer == HASH_RXQ_ETH) {
+ goto fill;
+ } else {
+ /*
+ * This layer becomes useless as the pattern define under
+ * layers.
+ */
+ rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
+ parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
+ }
+ /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
+ for (i = ohmin; i != (ohmax + 1); ++i) {
+ if (!parser->queue[i].ibv_attr)
+ continue;
+ rte_free(parser->queue[i].ibv_attr);
+ parser->queue[i].ibv_attr = NULL;
+ }
+ /* Remove impossible flow according to the RSS configuration. */
+ if (hash_rxq_init[parser->layer].dpdk_rss_hf &
+ parser->rss_conf.rss_hf) {
+ /* Remove any other flow. */
+ for (i = hmin; i != (hmax + 1); ++i) {
+ if ((i == parser->layer) ||
+ (!parser->queue[i].ibv_attr))
+ continue;
+ rte_free(parser->queue[i].ibv_attr);
+ parser->queue[i].ibv_attr = NULL;
+ }
+ } else if (!parser->queue[ip].ibv_attr) {
+ /* no RSS possible with the current configuration. */
+ parser->queues_n = 1;
+ return;
+ }
+fill:
+ /*
+ * Fill missing layers in verbs specifications, or compute the correct
+ * offset to allocate the memory space for the attributes and
+ * specifications.
+ */
+ for (i = 0; i != hash_rxq_init_n - 1; ++i) {
+ union {
+ struct ibv_flow_spec_ipv4_ext ipv4;
+ struct ibv_flow_spec_ipv6 ipv6;
+ struct ibv_flow_spec_tcp_udp udp_tcp;
+ } specs;
+ void *dst;
+ uint16_t size;
+
+ if (i == parser->layer)
+ continue;
+ if (parser->layer == HASH_RXQ_ETH) {
+ if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
+ size = sizeof(struct ibv_flow_spec_ipv4_ext);
+ specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
+ .type = IBV_FLOW_SPEC_IPV4_EXT,
+ .size = size,
+ };
+ } else {
+ size = sizeof(struct ibv_flow_spec_ipv6);
+ specs.ipv6 = (struct ibv_flow_spec_ipv6){
+ .type = IBV_FLOW_SPEC_IPV6,
+ .size = size,
+ };
+ }
+ if (parser->queue[i].ibv_attr) {
+ dst = (void *)((uintptr_t)
+ parser->queue[i].ibv_attr +
+ parser->queue[i].offset);
+ memcpy(dst, &specs, size);
+ ++parser->queue[i].ibv_attr->num_of_specs;
+ }
+ parser->queue[i].offset += size;
+ }
+ if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
+ (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
+ size = sizeof(struct ibv_flow_spec_tcp_udp);
+ specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
+ .type = ((i == HASH_RXQ_UDPV4 ||
+ i == HASH_RXQ_UDPV6) ?
+ IBV_FLOW_SPEC_UDP :
+ IBV_FLOW_SPEC_TCP),
+ .size = size,
+ };
+ if (parser->queue[i].ibv_attr) {
+ dst = (void *)((uintptr_t)
+ parser->queue[i].ibv_attr +
+ parser->queue[i].offset);
+ memcpy(dst, &specs, size);
+ ++parser->queue[i].ibv_attr->num_of_specs;
+ }
+ parser->queue[i].offset += size;
+ }
+ }
+}
+
+/**
+ * Validate and convert a flow supported by the NIC.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param[in] attr
+ * Flow rule attributes.
+ * @param[in] pattern
+ * Pattern specification (list terminated by the END pattern item).
+ * @param[in] actions
+ * Associated actions (list terminated by the END action).
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ * @param[in, out] parser
+ * Internal parser structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_convert(struct priv *priv,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error,
+ struct mlx5_flow_parse *parser)
+{
+ const struct mlx5_flow_items *cur_item = mlx5_flow_items;
+ unsigned int i;
int ret;
- struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
- struct mlx5_flow_action action = {
- .queue = 0,
- .drop = 0,
- .mark = 0,
+
+ /* First step. Validate the attributes, items and actions. */
+ *parser = (struct mlx5_flow_parse){
+ .create = parser->create,
+ .layer = HASH_RXQ_ETH,
.mark_id = MLX5_FLOW_MARK_DEFAULT,
- .queues_n = 0,
};
-
- priv_lock(priv);
- ret = priv_flow_validate(priv, attr, items, actions, error, &flow,
- &action);
- priv_unlock(priv);
+ ret = priv_flow_convert_attributes(priv, attr, error, parser);
+ if (ret)
+ return ret;
+ ret = priv_flow_convert_actions(priv, actions, error, parser);
+ if (ret)
+ return ret;
+ ret = priv_flow_convert_items_validate(priv, items, error, parser);
+ if (ret)
+ return ret;
+ priv_flow_convert_finalise(priv, parser);
+ /*
+ * Second step.
+ * Allocate the memory space to store verbs specifications.
+ */
+ if (parser->drop) {
+ parser->drop_q.ibv_attr =
+ priv_flow_convert_allocate(priv, attr->priority,
+ parser->drop_q.offset,
+ error);
+ if (!parser->drop_q.ibv_attr)
+ return ENOMEM;
+ parser->drop_q.offset = sizeof(struct ibv_flow_attr);
+ } else if (parser->queues_n == 1) {
+ unsigned int priority =
+ attr->priority +
+ hash_rxq_init[HASH_RXQ_ETH].flow_priority;
+ unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
+
+ parser->queue[HASH_RXQ_ETH].ibv_attr =
+ priv_flow_convert_allocate(priv, priority,
+ offset, error);
+ if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
+ return ENOMEM;
+ parser->queue[HASH_RXQ_ETH].offset =
+ sizeof(struct ibv_flow_attr);
+ } else {
+ for (i = 0; i != hash_rxq_init_n; ++i) {
+ unsigned int priority =
+ attr->priority +
+ hash_rxq_init[i].flow_priority;
+ unsigned int offset;
+
+ if (!(parser->rss_conf.rss_hf &
+ hash_rxq_init[i].dpdk_rss_hf) &&
+ (i != HASH_RXQ_ETH))
+ continue;
+ offset = parser->queue[i].offset;
+ parser->queue[i].ibv_attr =
+ priv_flow_convert_allocate(priv, priority,
+ offset, error);
+ if (!parser->queue[i].ibv_attr)
+ goto exit_enomem;
+ parser->queue[i].offset = sizeof(struct ibv_flow_attr);
+ }
+ }
+ /* Third step. Conversion parse, fill the specifications. */
+ parser->inner = 0;
+ for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
+ if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
+ continue;
+ cur_item = &mlx5_flow_items[items->type];
+ ret = cur_item->convert(items,
+ (cur_item->default_mask ?
+ cur_item->default_mask :
+ cur_item->mask),
+ parser);
+ if (ret) {
+ rte_flow_error_set(error, ret,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ items, "item not supported");
+ goto exit_free;
+ }
+ }
+ if (parser->mark)
+ mlx5_flow_create_flag_mark(parser, parser->mark_id);
+ if (parser->count && parser->create) {
+ mlx5_flow_create_count(priv, parser);
+ if (!parser->cs)
+ goto exit_count_error;
+ }
+ /*
+ * Last step. Complete missing specification to reach the RSS
+ * configuration.
+ */
+ if (parser->drop) {
+ /*
+ * Drop queue priority needs to be adjusted to
+ * their most specific layer priority.
+ */
+ parser->drop_q.ibv_attr->priority =
+ attr->priority +
+ hash_rxq_init[parser->layer].flow_priority;
+ } else if (parser->queues_n > 1) {
+ priv_flow_convert_finalise(priv, parser);
+ } else {
+ /*
+ * Action queue have their priority overridden with
+ * Ethernet priority, this priority needs to be adjusted to
+ * their most specific layer priority.
+ */
+ parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
+ attr->priority +
+ hash_rxq_init[parser->layer].flow_priority;
+ }
+exit_free:
+ /* Only verification is expected, all resources should be released. */
+ if (!parser->create) {
+ if (parser->drop) {
+ rte_free(parser->drop_q.ibv_attr);
+ parser->drop_q.ibv_attr = NULL;
+ }
+ for (i = 0; i != hash_rxq_init_n; ++i) {
+ if (parser->queue[i].ibv_attr) {
+ rte_free(parser->queue[i].ibv_attr);
+ parser->queue[i].ibv_attr = NULL;
+ }
+ }
+ }
return ret;
+exit_enomem:
+ for (i = 0; i != hash_rxq_init_n; ++i) {
+ if (parser->queue[i].ibv_attr) {
+ rte_free(parser->queue[i].ibv_attr);
+ parser->queue[i].ibv_attr = NULL;
+ }
+ }
+ rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, "cannot allocate verbs spec attributes.");
+ return ret;
+exit_count_error:
+ rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, "cannot create counter.");
+ return rte_errno;
+}
+
+/**
+ * Copy the specification created into the flow.
+ *
+ * @param parser
+ * Internal parser structure.
+ * @param src
+ * Create specification.
+ * @param size
+ * Size in bytes of the specification to copy.
+ */
+static void
+mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
+ unsigned int size)
+{
+ unsigned int i;
+ void *dst;
+
+ if (parser->drop) {
+ dst = (void *)((uintptr_t)parser->drop_q.ibv_attr +
+ parser->drop_q.offset);
+ memcpy(dst, src, size);
+ ++parser->drop_q.ibv_attr->num_of_specs;
+ parser->drop_q.offset += size;
+ return;
+ }
+ for (i = 0; i != hash_rxq_init_n; ++i) {
+ if (!parser->queue[i].ibv_attr)
+ continue;
+ /* Specification must be the same l3 type or none. */
+ if (parser->layer == HASH_RXQ_ETH ||
+ (hash_rxq_init[parser->layer].ip_version ==
+ hash_rxq_init[i].ip_version) ||
+ (hash_rxq_init[i].ip_version == 0)) {
+ dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
+ parser->queue[i].offset);
+ memcpy(dst, src, size);
+ ++parser->queue[i].ibv_attr->num_of_specs;
+ parser->queue[i].offset += size;
+ }
+ }
}
/**
@@ -648,35 +1282,35 @@ mlx5_flow_create_eth(const struct rte_flow_item *item,
{
const struct rte_flow_item_eth *spec = item->spec;
const struct rte_flow_item_eth *mask = item->mask;
- struct mlx5_flow *flow = (struct mlx5_flow *)data;
- struct ibv_exp_flow_spec_eth *eth;
- const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
- unsigned int i;
-
- ++flow->ibv_attr->num_of_specs;
- flow->ibv_attr->priority = 2;
- flow->hash_fields = 0;
- eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
- *eth = (struct ibv_exp_flow_spec_eth) {
- .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
+ struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+ const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
+ struct ibv_flow_spec_eth eth = {
+ .type = parser->inner | IBV_FLOW_SPEC_ETH,
.size = eth_size,
};
- if (!spec)
- return 0;
- if (!mask)
- mask = default_mask;
- memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
- memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
- eth->val.ether_type = spec->type;
- memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
- memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
- eth->mask.ether_type = mask->type;
- /* Remove unwanted bits from values. */
- for (i = 0; i < ETHER_ADDR_LEN; ++i) {
- eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
- eth->val.src_mac[i] &= eth->mask.src_mac[i];
- }
- eth->val.ether_type &= eth->mask.ether_type;
+
+ /* Don't update layer for the inner pattern. */
+ if (!parser->inner)
+ parser->layer = HASH_RXQ_ETH;
+ if (spec) {
+ unsigned int i;
+
+ if (!mask)
+ mask = default_mask;
+ memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+ memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+ eth.val.ether_type = spec->type;
+ memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+ memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+ eth.mask.ether_type = mask->type;
+ /* Remove unwanted bits from values. */
+ for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+ eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
+ eth.val.src_mac[i] &= eth.mask.src_mac[i];
+ }
+ eth.val.ether_type &= eth.mask.ether_type;
+ }
+ mlx5_flow_create_copy(parser, &eth, eth_size);
return 0;
}
@@ -697,18 +1331,34 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item,
{
const struct rte_flow_item_vlan *spec = item->spec;
const struct rte_flow_item_vlan *mask = item->mask;
- struct mlx5_flow *flow = (struct mlx5_flow *)data;
- struct ibv_exp_flow_spec_eth *eth;
- const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
+ struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+ struct ibv_flow_spec_eth *eth;
+ const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
- eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
- if (!spec)
- return 0;
- if (!mask)
- mask = default_mask;
- eth->val.vlan_tag = spec->tci;
- eth->mask.vlan_tag = mask->tci;
- eth->val.vlan_tag &= eth->mask.vlan_tag;
+ if (spec) {
+ unsigned int i;
+ if (!mask)
+ mask = default_mask;
+
+ if (parser->drop) {
+ eth = (void *)((uintptr_t)parser->drop_q.ibv_attr +
+ parser->drop_q.offset - eth_size);
+ eth->val.vlan_tag = spec->tci;
+ eth->mask.vlan_tag = mask->tci;
+ eth->val.vlan_tag &= eth->mask.vlan_tag;
+ return 0;
+ }
+ for (i = 0; i != hash_rxq_init_n; ++i) {
+ if (!parser->queue[i].ibv_attr)
+ continue;
+
+ eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
+ parser->queue[i].offset - eth_size);
+ eth->val.vlan_tag = spec->tci;
+ eth->mask.vlan_tag = mask->tci;
+ eth->val.vlan_tag &= eth->mask.vlan_tag;
+ }
+ }
return 0;
}
@@ -729,40 +1379,38 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item,
{
const struct rte_flow_item_ipv4 *spec = item->spec;
const struct rte_flow_item_ipv4 *mask = item->mask;
- struct mlx5_flow *flow = (struct mlx5_flow *)data;
- struct ibv_exp_flow_spec_ipv4_ext *ipv4;
- unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
-
- ++flow->ibv_attr->num_of_specs;
- flow->ibv_attr->priority = 1;
- flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
- IBV_EXP_RX_HASH_DST_IPV4);
- ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
- *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
- .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
+ struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+ unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
+ struct ibv_flow_spec_ipv4_ext ipv4 = {
+ .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
.size = ipv4_size,
};
- if (!spec)
- return 0;
- if (!mask)
- mask = default_mask;
- ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
- .src_ip = spec->hdr.src_addr,
- .dst_ip = spec->hdr.dst_addr,
- .proto = spec->hdr.next_proto_id,
- .tos = spec->hdr.type_of_service,
- };
- ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
- .src_ip = mask->hdr.src_addr,
- .dst_ip = mask->hdr.dst_addr,
- .proto = mask->hdr.next_proto_id,
- .tos = mask->hdr.type_of_service,
- };
- /* Remove unwanted bits from values. */
- ipv4->val.src_ip &= ipv4->mask.src_ip;
- ipv4->val.dst_ip &= ipv4->mask.dst_ip;
- ipv4->val.proto &= ipv4->mask.proto;
- ipv4->val.tos &= ipv4->mask.tos;
+
+ /* Don't update layer for the inner pattern. */
+ if (!parser->inner)
+ parser->layer = HASH_RXQ_IPV4;
+ if (spec) {
+ if (!mask)
+ mask = default_mask;
+ ipv4.val = (struct ibv_flow_ipv4_ext_filter){
+ .src_ip = spec->hdr.src_addr,
+ .dst_ip = spec->hdr.dst_addr,
+ .proto = spec->hdr.next_proto_id,
+ .tos = spec->hdr.type_of_service,
+ };
+ ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
+ .src_ip = mask->hdr.src_addr,
+ .dst_ip = mask->hdr.dst_addr,
+ .proto = mask->hdr.next_proto_id,
+ .tos = mask->hdr.type_of_service,
+ };
+ /* Remove unwanted bits from values. */
+ ipv4.val.src_ip &= ipv4.mask.src_ip;
+ ipv4.val.dst_ip &= ipv4.mask.dst_ip;
+ ipv4.val.proto &= ipv4.mask.proto;
+ ipv4.val.tos &= ipv4.mask.tos;
+ }
+ mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
return 0;
}
@@ -783,43 +1431,42 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item,
{
const struct rte_flow_item_ipv6 *spec = item->spec;
const struct rte_flow_item_ipv6 *mask = item->mask;
- struct mlx5_flow *flow = (struct mlx5_flow *)data;
- struct ibv_exp_flow_spec_ipv6_ext *ipv6;
- unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
- unsigned int i;
-
- ++flow->ibv_attr->num_of_specs;
- flow->ibv_attr->priority = 1;
- flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
- IBV_EXP_RX_HASH_DST_IPV6);
- ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
- *ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
- .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
+ struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+ unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
+ struct ibv_flow_spec_ipv6 ipv6 = {
+ .type = parser->inner | IBV_FLOW_SPEC_IPV6,
.size = ipv6_size,
};
- if (!spec)
- return 0;
- if (!mask)
- mask = default_mask;
- memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
- RTE_DIM(ipv6->val.src_ip));
- memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
- RTE_DIM(ipv6->val.dst_ip));
- memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
- RTE_DIM(ipv6->mask.src_ip));
- memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
- RTE_DIM(ipv6->mask.dst_ip));
- ipv6->mask.flow_label = mask->hdr.vtc_flow;
- ipv6->mask.next_hdr = mask->hdr.proto;
- ipv6->mask.hop_limit = mask->hdr.hop_limits;
- /* Remove unwanted bits from values. */
- for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
- ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
- ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
- }
- ipv6->val.flow_label &= ipv6->mask.flow_label;
- ipv6->val.next_hdr &= ipv6->mask.next_hdr;
- ipv6->val.hop_limit &= ipv6->mask.hop_limit;
+
+ /* Don't update layer for the inner pattern. */
+ if (!parser->inner)
+ parser->layer = HASH_RXQ_IPV6;
+ if (spec) {
+ unsigned int i;
+
+ if (!mask)
+ mask = default_mask;
+ memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
+ RTE_DIM(ipv6.val.src_ip));
+ memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
+ RTE_DIM(ipv6.val.dst_ip));
+ memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
+ RTE_DIM(ipv6.mask.src_ip));
+ memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
+ RTE_DIM(ipv6.mask.dst_ip));
+ ipv6.mask.flow_label = mask->hdr.vtc_flow;
+ ipv6.mask.next_hdr = mask->hdr.proto;
+ ipv6.mask.hop_limit = mask->hdr.hop_limits;
+ /* Remove unwanted bits from values. */
+ for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
+ ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
+ ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
+ }
+ ipv6.val.flow_label &= ipv6.mask.flow_label;
+ ipv6.val.next_hdr &= ipv6.mask.next_hdr;
+ ipv6.val.hop_limit &= ipv6.mask.hop_limit;
+ }
+ mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
return 0;
}
@@ -840,30 +1487,32 @@ mlx5_flow_create_udp(const struct rte_flow_item *item,
{
const struct rte_flow_item_udp *spec = item->spec;
const struct rte_flow_item_udp *mask = item->mask;
- struct mlx5_flow *flow = (struct mlx5_flow *)data;
- struct ibv_exp_flow_spec_tcp_udp *udp;
- unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
-
- ++flow->ibv_attr->num_of_specs;
- flow->ibv_attr->priority = 0;
- flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
- IBV_EXP_RX_HASH_DST_PORT_UDP);
- udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
- *udp = (struct ibv_exp_flow_spec_tcp_udp) {
- .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
+ struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+ unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+ struct ibv_flow_spec_tcp_udp udp = {
+ .type = parser->inner | IBV_FLOW_SPEC_UDP,
.size = udp_size,
};
- if (!spec)
- return 0;
- if (!mask)
- mask = default_mask;
- udp->val.dst_port = spec->hdr.dst_port;
- udp->val.src_port = spec->hdr.src_port;
- udp->mask.dst_port = mask->hdr.dst_port;
- udp->mask.src_port = mask->hdr.src_port;
- /* Remove unwanted bits from values. */
- udp->val.src_port &= udp->mask.src_port;
- udp->val.dst_port &= udp->mask.dst_port;
+
+ /* Don't update layer for the inner pattern. */
+ if (!parser->inner) {
+ if (parser->layer == HASH_RXQ_IPV4)
+ parser->layer = HASH_RXQ_UDPV4;
+ else
+ parser->layer = HASH_RXQ_UDPV6;
+ }
+ if (spec) {
+ if (!mask)
+ mask = default_mask;
+ udp.val.dst_port = spec->hdr.dst_port;
+ udp.val.src_port = spec->hdr.src_port;
+ udp.mask.dst_port = mask->hdr.dst_port;
+ udp.mask.src_port = mask->hdr.src_port;
+ /* Remove unwanted bits from values. */
+ udp.val.src_port &= udp.mask.src_port;
+ udp.val.dst_port &= udp.mask.dst_port;
+ }
+ mlx5_flow_create_copy(parser, &udp, udp_size);
return 0;
}
@@ -884,30 +1533,32 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item,
{
const struct rte_flow_item_tcp *spec = item->spec;
const struct rte_flow_item_tcp *mask = item->mask;
- struct mlx5_flow *flow = (struct mlx5_flow *)data;
- struct ibv_exp_flow_spec_tcp_udp *tcp;
- unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
-
- ++flow->ibv_attr->num_of_specs;
- flow->ibv_attr->priority = 0;
- flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
- IBV_EXP_RX_HASH_DST_PORT_TCP);
- tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
- *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
- .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
+ struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+ unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
+ struct ibv_flow_spec_tcp_udp tcp = {
+ .type = parser->inner | IBV_FLOW_SPEC_TCP,
.size = tcp_size,
};
- if (!spec)
- return 0;
- if (!mask)
- mask = default_mask;
- tcp->val.dst_port = spec->hdr.dst_port;
- tcp->val.src_port = spec->hdr.src_port;
- tcp->mask.dst_port = mask->hdr.dst_port;
- tcp->mask.src_port = mask->hdr.src_port;
- /* Remove unwanted bits from values. */
- tcp->val.src_port &= tcp->mask.src_port;
- tcp->val.dst_port &= tcp->mask.dst_port;
+
+ /* Don't update layer for the inner pattern. */
+ if (!parser->inner) {
+ if (parser->layer == HASH_RXQ_IPV4)
+ parser->layer = HASH_RXQ_TCPV4;
+ else
+ parser->layer = HASH_RXQ_TCPV6;
+ }
+ if (spec) {
+ if (!mask)
+ mask = default_mask;
+ tcp.val.dst_port = spec->hdr.dst_port;
+ tcp.val.src_port = spec->hdr.src_port;
+ tcp.mask.dst_port = mask->hdr.dst_port;
+ tcp.mask.src_port = mask->hdr.src_port;
+ /* Remove unwanted bits from values. */
+ tcp.val.src_port &= tcp.mask.src_port;
+ tcp.val.dst_port &= tcp.mask.dst_port;
+ }
+ mlx5_flow_create_copy(parser, &tcp, tcp_size);
return 0;
}
@@ -928,57 +1579,97 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item,
{
const struct rte_flow_item_vxlan *spec = item->spec;
const struct rte_flow_item_vxlan *mask = item->mask;
- struct mlx5_flow *flow = (struct mlx5_flow *)data;
- struct ibv_exp_flow_spec_tunnel *vxlan;
- unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
+ struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
+ unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+ struct ibv_flow_spec_tunnel vxlan = {
+ .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
+ .size = size,
+ };
union vni {
uint32_t vlan_id;
uint8_t vni[4];
} id;
- ++flow->ibv_attr->num_of_specs;
- flow->ibv_attr->priority = 0;
id.vni[0] = 0;
- vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
- *vxlan = (struct ibv_exp_flow_spec_tunnel) {
- .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
- .size = size,
- };
- flow->inner = IBV_EXP_FLOW_SPEC_INNER;
- if (!spec)
- return 0;
- if (!mask)
- mask = default_mask;
- memcpy(&id.vni[1], spec->vni, 3);
- vxlan->val.tunnel_id = id.vlan_id;
- memcpy(&id.vni[1], mask->vni, 3);
- vxlan->mask.tunnel_id = id.vlan_id;
- /* Remove unwanted bits from values. */
- vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
+ parser->inner = IBV_FLOW_SPEC_INNER;
+ if (spec) {
+ if (!mask)
+ mask = default_mask;
+ memcpy(&id.vni[1], spec->vni, 3);
+ vxlan.val.tunnel_id = id.vlan_id;
+ memcpy(&id.vni[1], mask->vni, 3);
+ vxlan.mask.tunnel_id = id.vlan_id;
+ /* Remove unwanted bits from values. */
+ vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
+ }
+ /*
+ * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
+ * layer is defined in the Verbs specification it is interpreted as
+ * wildcard and all packets will match this rule, if it follows a full
+ * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
+ * before will also match this rule.
+ * To avoid such situation, VNI 0 is currently refused.
+ */
+ if (!vxlan.val.tunnel_id)
+ return EINVAL;
+ mlx5_flow_create_copy(parser, &vxlan, size);
return 0;
}
/**
* Convert mark/flag action to Verbs specification.
*
- * @param flow
- * Pointer to MLX5 flow structure.
+ * @param parser
+ * Internal parser structure.
* @param mark_id
* Mark identifier.
*/
static int
-mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
+mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
{
- struct ibv_exp_flow_spec_action_tag *tag;
- unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
-
- tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
- *tag = (struct ibv_exp_flow_spec_action_tag){
- .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
+ unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
+ struct ibv_flow_spec_action_tag tag = {
+ .type = IBV_FLOW_SPEC_ACTION_TAG,
.size = size,
.tag_id = mlx5_flow_mark_set(mark_id),
};
- ++flow->ibv_attr->num_of_specs;
+
+ assert(parser->mark);
+ mlx5_flow_create_copy(parser, &tag, size);
+ return 0;
+}
+
+/**
+ * Convert count action to Verbs specification.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param parser
+ * Pointer to MLX5 flow parser structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+static int
+mlx5_flow_create_count(struct priv *priv __rte_unused,
+ struct mlx5_flow_parse *parser __rte_unused)
+{
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+ unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
+ struct ibv_counter_set_init_attr init_attr = {0};
+ struct ibv_flow_spec_counter_action counter = {
+ .type = IBV_FLOW_SPEC_ACTION_COUNT,
+ .size = size,
+ .counter_set_handle = 0,
+ };
+
+ init_attr.counter_set_id = 0;
+ parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
+ if (!parser->cs)
+ return EINVAL;
+ counter.counter_set_handle = parser->cs->handle;
+ mlx5_flow_create_copy(parser, &counter, size);
+#endif
return 0;
}
@@ -987,59 +1678,127 @@ mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
*
* @param priv
* Pointer to private structure.
+ * @param parser
+ * Internal parser structure.
* @param flow
- * MLX5 flow attributes (filled by mlx5_flow_validate()).
+ * Pointer to the rte_flow.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
- * A flow if the rule could be created.
+ * 0 on success, errno value on failure.
*/
-static struct rte_flow *
+static int
priv_flow_create_action_queue_drop(struct priv *priv,
- struct mlx5_flow *flow,
+ struct mlx5_flow_parse *parser,
+ struct rte_flow *flow,
struct rte_flow_error *error)
{
- struct rte_flow *rte_flow;
-#ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
- struct ibv_exp_flow_spec_action_drop *drop;
- unsigned int size = sizeof(struct ibv_exp_flow_spec_action_drop);
-#endif
+ struct ibv_flow_spec_action_drop *drop;
+ unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
+ int err = 0;
assert(priv->pd);
assert(priv->ctx);
- rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
- if (!rte_flow) {
- rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
- NULL, "cannot allocate flow memory");
- return NULL;
- }
- rte_flow->drop = 1;
-#ifdef HAVE_VERBS_IBV_EXP_FLOW_SPEC_ACTION_DROP
- drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
- *drop = (struct ibv_exp_flow_spec_action_drop){
- .type = IBV_EXP_FLOW_SPEC_ACTION_DROP,
+ flow->drop = 1;
+ drop = (void *)((uintptr_t)parser->drop_q.ibv_attr +
+ parser->drop_q.offset);
+ *drop = (struct ibv_flow_spec_action_drop){
+ .type = IBV_FLOW_SPEC_ACTION_DROP,
.size = size,
};
- ++flow->ibv_attr->num_of_specs;
- flow->offset += sizeof(struct ibv_exp_flow_spec_action_drop);
-#endif
- rte_flow->ibv_attr = flow->ibv_attr;
- if (!priv->started)
- return rte_flow;
- rte_flow->qp = priv->flow_drop_queue->qp;
- rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
- rte_flow->ibv_attr);
- if (!rte_flow->ibv_flow) {
+ ++parser->drop_q.ibv_attr->num_of_specs;
+ parser->drop_q.offset += size;
+ flow->drxq.ibv_attr = parser->drop_q.ibv_attr;
+ if (parser->count)
+ flow->cs = parser->cs;
+ if (!priv->dev->data->dev_started)
+ return 0;
+ parser->drop_q.ibv_attr = NULL;
+ flow->drxq.ibv_flow = ibv_create_flow(priv->flow_drop_queue->qp,
+ flow->drxq.ibv_attr);
+ if (!flow->drxq.ibv_flow) {
rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
NULL, "flow rule creation failure");
+ err = ENOMEM;
goto error;
}
- return rte_flow;
+ return 0;
error:
- assert(rte_flow);
- rte_free(rte_flow);
- return NULL;
+ assert(flow);
+ if (flow->drxq.ibv_flow) {
+ claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
+ flow->drxq.ibv_flow = NULL;
+ }
+ if (flow->drxq.ibv_attr) {
+ rte_free(flow->drxq.ibv_attr);
+ flow->drxq.ibv_attr = NULL;
+ }
+ if (flow->cs) {
+ claim_zero(ibv_destroy_counter_set(flow->cs));
+ flow->cs = NULL;
+ parser->cs = NULL;
+ }
+ return err;
+}
+
+/**
+ * Create hash Rx queues when RSS is enabled.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param parser
+ * Internal parser structure.
+ * @param flow
+ * Pointer to the rte_flow.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * 0 on success, a errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_create_action_queue_rss(struct priv *priv,
+ struct mlx5_flow_parse *parser,
+ struct rte_flow *flow,
+ struct rte_flow_error *error)
+{
+ unsigned int i;
+
+ for (i = 0; i != hash_rxq_init_n; ++i) {
+ uint64_t hash_fields;
+
+ if (!parser->queue[i].ibv_attr)
+ continue;
+ flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
+ parser->queue[i].ibv_attr = NULL;
+ hash_fields = hash_rxq_init[i].hash_fields;
+ if (!priv->dev->data->dev_started)
+ continue;
+ flow->frxq[i].hrxq =
+ mlx5_priv_hrxq_get(priv,
+ parser->rss_conf.rss_key,
+ parser->rss_conf.rss_key_len,
+ hash_fields,
+ parser->queues,
+ parser->queues_n);
+ if (flow->frxq[i].hrxq)
+ continue;
+ flow->frxq[i].hrxq =
+ mlx5_priv_hrxq_new(priv,
+ parser->rss_conf.rss_key,
+ parser->rss_conf.rss_key_len,
+ hash_fields,
+ parser->queues,
+ parser->queues_n);
+ if (!flow->frxq[i].hrxq) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "cannot create hash rxq");
+ return ENOMEM;
+ }
+ }
+ return 0;
}
/**
@@ -1047,112 +1806,79 @@ error:
*
* @param priv
* Pointer to private structure.
+ * @param parser
+ * Internal parser structure.
* @param flow
- * MLX5 flow attributes (filled by mlx5_flow_validate()).
- * @param action
- * Target action structure.
+ * Pointer to the rte_flow.
* @param[out] error
* Perform verbose error reporting if not NULL.
*
* @return
- * A flow if the rule could be created.
+ * 0 on success, a errno value otherwise and rte_errno is set.
*/
-static struct rte_flow *
+static int
priv_flow_create_action_queue(struct priv *priv,
- struct mlx5_flow *flow,
- struct mlx5_flow_action *action,
+ struct mlx5_flow_parse *parser,
+ struct rte_flow *flow,
struct rte_flow_error *error)
{
- struct rte_flow *rte_flow;
+ int err = 0;
unsigned int i;
- unsigned int j;
- const unsigned int wqs_n = 1 << log2above(action->queues_n);
- struct ibv_exp_wq *wqs[wqs_n];
assert(priv->pd);
assert(priv->ctx);
- assert(!action->drop);
- rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
- sizeof(*rte_flow->rxqs) * action->queues_n, 0);
- if (!rte_flow) {
- rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
- NULL, "cannot allocate flow memory");
- return NULL;
- }
- for (i = 0; i < action->queues_n; ++i) {
- struct rxq_ctrl *rxq;
-
- rxq = container_of((*priv->rxqs)[action->queues[i]],
- struct rxq_ctrl, rxq);
- wqs[i] = rxq->wq;
- rte_flow->rxqs[i] = &rxq->rxq;
- ++rte_flow->rxqs_n;
- rxq->rxq.mark |= action->mark;
- }
- /* finalise indirection table. */
- for (j = 0; i < wqs_n; ++i, ++j) {
- wqs[i] = wqs[j];
- if (j == action->queues_n)
- j = 0;
- }
- rte_flow->mark = action->mark;
- rte_flow->ibv_attr = flow->ibv_attr;
- rte_flow->hash_fields = flow->hash_fields;
- rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
- priv->ctx,
- &(struct ibv_exp_rwq_ind_table_init_attr){
- .pd = priv->pd,
- .log_ind_tbl_size = log2above(action->queues_n),
- .ind_tbl = wqs,
- .comp_mask = 0,
- });
- if (!rte_flow->ind_table) {
- rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
- NULL, "cannot allocate indirection table");
- goto error;
- }
- rte_flow->qp = ibv_exp_create_qp(
- priv->ctx,
- &(struct ibv_exp_qp_init_attr){
- .qp_type = IBV_QPT_RAW_PACKET,
- .comp_mask =
- IBV_EXP_QP_INIT_ATTR_PD |
- IBV_EXP_QP_INIT_ATTR_PORT |
- IBV_EXP_QP_INIT_ATTR_RX_HASH,
- .pd = priv->pd,
- .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
- .rx_hash_function =
- IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
- .rx_hash_key_len = rss_hash_default_key_len,
- .rx_hash_key = rss_hash_default_key,
- .rx_hash_fields_mask = rte_flow->hash_fields,
- .rwq_ind_tbl = rte_flow->ind_table,
- },
- .port_num = priv->port,
- });
- if (!rte_flow->qp) {
- rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
- NULL, "cannot allocate QP");
+ assert(!parser->drop);
+ err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
+ if (err)
goto error;
+ if (parser->count)
+ flow->cs = parser->cs;
+ if (!priv->dev->data->dev_started)
+ return 0;
+ for (i = 0; i != hash_rxq_init_n; ++i) {
+ if (!flow->frxq[i].hrxq)
+ continue;
+ flow->frxq[i].ibv_flow =
+ ibv_create_flow(flow->frxq[i].hrxq->qp,
+ flow->frxq[i].ibv_attr);
+ if (!flow->frxq[i].ibv_flow) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_HANDLE,
+ NULL, "flow rule creation failure");
+ err = ENOMEM;
+ goto error;
+ }
+ DEBUG("%p type %d QP %p ibv_flow %p",
+ (void *)flow, i,
+ (void *)flow->frxq[i].hrxq,
+ (void *)flow->frxq[i].ibv_flow);
}
- if (!priv->started)
- return rte_flow;
- rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
- rte_flow->ibv_attr);
- if (!rte_flow->ibv_flow) {
- rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
- NULL, "flow rule creation failure");
- goto error;
+ for (i = 0; i != parser->queues_n; ++i) {
+ struct mlx5_rxq_data *q =
+ (*priv->rxqs)[parser->queues[i]];
+
+ q->mark |= parser->mark;
}
- return rte_flow;
+ return 0;
error:
- assert(rte_flow);
- if (rte_flow->qp)
- ibv_destroy_qp(rte_flow->qp);
- if (rte_flow->ind_table)
- ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
- rte_free(rte_flow);
- return NULL;
+ assert(flow);
+ for (i = 0; i != hash_rxq_init_n; ++i) {
+ if (flow->frxq[i].ibv_flow) {
+ struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
+
+ claim_zero(ibv_destroy_flow(ibv_flow));
+ }
+ if (flow->frxq[i].hrxq)
+ mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
+ if (flow->frxq[i].ibv_attr)
+ rte_free(flow->frxq[i].ibv_attr);
+ }
+ if (flow->cs) {
+ claim_zero(ibv_destroy_counter_set(flow->cs));
+ flow->cs = NULL;
+ parser->cs = NULL;
+ }
+ return err;
}
/**
@@ -1160,6 +1886,8 @@ error:
*
* @param priv
* Pointer to private structure.
+ * @param list
+ * Pointer to a TAILQ flow list.
* @param[in] attr
* Flow rule attributes.
* @param[in] pattern
@@ -1174,65 +1902,86 @@ error:
*/
static struct rte_flow *
priv_flow_create(struct priv *priv,
+ struct mlx5_flows *list,
const struct rte_flow_attr *attr,
const struct rte_flow_item items[],
const struct rte_flow_action actions[],
struct rte_flow_error *error)
{
- struct rte_flow *rte_flow;
- struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
- struct mlx5_flow_action action = {
- .queue = 0,
- .drop = 0,
- .mark = 0,
- .mark_id = MLX5_FLOW_MARK_DEFAULT,
- .queues_n = 0,
- };
+ struct mlx5_flow_parse parser = { .create = 1, };
+ struct rte_flow *flow = NULL;
+ unsigned int i;
int err;
- err = priv_flow_validate(priv, attr, items, actions, error, &flow,
- &action);
+ err = priv_flow_convert(priv, attr, items, actions, error, &parser);
if (err)
goto exit;
- flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
- flow.offset = sizeof(struct ibv_exp_flow_attr);
- if (!flow.ibv_attr) {
- rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
- NULL, "cannot allocate ibv_attr memory");
- goto exit;
+ flow = rte_calloc(__func__, 1,
+ sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
+ 0);
+ if (!flow) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "cannot allocate flow memory");
+ return NULL;
}
- *flow.ibv_attr = (struct ibv_exp_flow_attr){
- .type = IBV_EXP_FLOW_ATTR_NORMAL,
- .size = sizeof(struct ibv_exp_flow_attr),
- .priority = attr->priority,
- .num_of_specs = 0,
- .port = 0,
- .flags = 0,
- .reserved = 0,
- };
- flow.inner = 0;
- flow.hash_fields = 0;
- claim_zero(priv_flow_validate(priv, attr, items, actions,
- error, &flow, &action));
- if (action.mark && !action.drop) {
- mlx5_flow_create_flag_mark(&flow, action.mark_id);
- flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
- }
- if (action.drop)
- rte_flow =
- priv_flow_create_action_queue_drop(priv, &flow, error);
- else
- rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
+ /* Copy queues configuration. */
+ flow->queues = (uint16_t (*)[])(flow + 1);
+ memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
+ flow->queues_n = parser.queues_n;
+ /* Copy RSS configuration. */
+ flow->rss_conf = parser.rss_conf;
+ flow->rss_conf.rss_key = flow->rss_key;
+ memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
+ /* finalise the flow. */
+ if (parser.drop)
+ err = priv_flow_create_action_queue_drop(priv, &parser, flow,
error);
- if (!rte_flow)
+ else
+ err = priv_flow_create_action_queue(priv, &parser, flow, error);
+ if (err)
goto exit;
- return rte_flow;
+ TAILQ_INSERT_TAIL(list, flow, next);
+ DEBUG("Flow created %p", (void *)flow);
+ return flow;
exit:
- rte_free(flow.ibv_attr);
+ if (parser.drop) {
+ rte_free(parser.drop_q.ibv_attr);
+ } else {
+ for (i = 0; i != hash_rxq_init_n; ++i) {
+ if (parser.queue[i].ibv_attr)
+ rte_free(parser.queue[i].ibv_attr);
+ }
+ }
+ rte_free(flow);
return NULL;
}
/**
+ * Validate a flow supported by the NIC.
+ *
+ * @see rte_flow_validate()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+ int ret;
+ struct mlx5_flow_parse parser = { .create = 0, };
+
+ priv_lock(priv);
+ ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
+ priv_unlock(priv);
+ return ret;
+}
+
+/**
* Create a flow.
*
* @see rte_flow_create()
@@ -1249,11 +1998,8 @@ mlx5_flow_create(struct rte_eth_dev *dev,
struct rte_flow *flow;
priv_lock(priv);
- flow = priv_flow_create(priv, attr, items, actions, error);
- if (flow) {
- TAILQ_INSERT_TAIL(&priv->flows, flow, next);
- DEBUG("Flow created %p", (void *)flow);
- }
+ flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
+ error);
priv_unlock(priv);
return flow;
}
@@ -1263,121 +2009,95 @@ mlx5_flow_create(struct rte_eth_dev *dev,
*
* @param priv
* Pointer to private structure.
+ * @param list
+ * Pointer to a TAILQ flow list.
* @param[in] flow
* Flow to destroy.
*/
static void
priv_flow_destroy(struct priv *priv,
+ struct mlx5_flows *list,
struct rte_flow *flow)
{
- TAILQ_REMOVE(&priv->flows, flow, next);
- if (flow->ibv_flow)
- claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
- if (flow->drop)
+ unsigned int i;
+
+ if (flow->drop || !flow->mark)
goto free;
- if (flow->qp)
- claim_zero(ibv_destroy_qp(flow->qp));
- if (flow->ind_table)
- claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
- if (flow->drop && flow->wq)
- claim_zero(ibv_exp_destroy_wq(flow->wq));
- if (flow->drop && flow->cq)
- claim_zero(ibv_destroy_cq(flow->cq));
- if (flow->mark) {
+ for (i = 0; i != flow->queues_n; ++i) {
struct rte_flow *tmp;
- struct rxq *rxq;
- uint32_t mark_n = 0;
- uint32_t queue_n;
+ int mark = 0;
/*
* To remove the mark from the queue, the queue must not be
* present in any other marked flow (RSS or not).
*/
- for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
- rxq = flow->rxqs[queue_n];
- for (tmp = TAILQ_FIRST(&priv->flows);
- tmp;
- tmp = TAILQ_NEXT(tmp, next)) {
- uint32_t tqueue_n;
-
- if (tmp->drop)
+ TAILQ_FOREACH(tmp, list, next) {
+ unsigned int j;
+ uint16_t *tqs = NULL;
+ uint16_t tq_n = 0;
+
+ if (!tmp->mark)
+ continue;
+ for (j = 0; j != hash_rxq_init_n; ++j) {
+ if (!tmp->frxq[j].hrxq)
continue;
- for (tqueue_n = 0;
- tqueue_n < tmp->rxqs_n;
- ++tqueue_n) {
- struct rxq *trxq;
-
- trxq = tmp->rxqs[tqueue_n];
- if (rxq == trxq)
- ++mark_n;
- }
+ tqs = tmp->frxq[j].hrxq->ind_table->queues;
+ tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
}
- rxq->mark = !!mark_n;
+ if (!tq_n)
+ continue;
+ for (j = 0; (j != tq_n) && !mark; j++)
+ if (tqs[j] == (*flow->queues)[i])
+ mark = 1;
}
+ (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
}
free:
- rte_free(flow->ibv_attr);
+ if (flow->drop) {
+ if (flow->drxq.ibv_flow)
+ claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
+ rte_free(flow->drxq.ibv_attr);
+ } else {
+ for (i = 0; i != hash_rxq_init_n; ++i) {
+ struct mlx5_flow *frxq = &flow->frxq[i];
+
+ if (frxq->ibv_flow)
+ claim_zero(ibv_destroy_flow(frxq->ibv_flow));
+ if (frxq->hrxq)
+ mlx5_priv_hrxq_release(priv, frxq->hrxq);
+ if (frxq->ibv_attr)
+ rte_free(frxq->ibv_attr);
+ }
+ }
+ if (flow->cs) {
+ claim_zero(ibv_destroy_counter_set(flow->cs));
+ flow->cs = NULL;
+ }
+ TAILQ_REMOVE(list, flow, next);
DEBUG("Flow destroyed %p", (void *)flow);
rte_free(flow);
}
/**
- * Destroy a flow.
- *
- * @see rte_flow_destroy()
- * @see rte_flow_ops
- */
-int
-mlx5_flow_destroy(struct rte_eth_dev *dev,
- struct rte_flow *flow,
- struct rte_flow_error *error)
-{
- struct priv *priv = dev->data->dev_private;
-
- (void)error;
- priv_lock(priv);
- priv_flow_destroy(priv, flow);
- priv_unlock(priv);
- return 0;
-}
-
-/**
* Destroy all flows.
*
* @param priv
* Pointer to private structure.
+ * @param list
+ * Pointer to a TAILQ flow list.
*/
-static void
-priv_flow_flush(struct priv *priv)
+void
+priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
{
- while (!TAILQ_EMPTY(&priv->flows)) {
+ while (!TAILQ_EMPTY(list)) {
struct rte_flow *flow;
- flow = TAILQ_FIRST(&priv->flows);
- priv_flow_destroy(priv, flow);
+ flow = TAILQ_FIRST(list);
+ priv_flow_destroy(priv, list, flow);
}
}
/**
- * Destroy all flows.
- *
- * @see rte_flow_flush()
- * @see rte_flow_ops
- */
-int
-mlx5_flow_flush(struct rte_eth_dev *dev,
- struct rte_flow_error *error)
-{
- struct priv *priv = dev->data->dev_private;
-
- (void)error;
- priv_lock(priv);
- priv_flow_flush(priv);
- priv_unlock(priv);
- return 0;
-}
-
-/**
* Create drop queue.
*
* @param priv
@@ -1386,11 +2106,10 @@ mlx5_flow_flush(struct rte_eth_dev *dev,
* @return
* 0 on success.
*/
-static int
+int
priv_flow_create_drop_queue(struct priv *priv)
{
- struct rte_flow_drop *fdq = NULL;
- unsigned int i;
+ struct mlx5_hrxq_drop *fdq = NULL;
assert(priv->pd);
assert(priv->ctx);
@@ -1399,57 +2118,50 @@ priv_flow_create_drop_queue(struct priv *priv)
WARN("cannot allocate memory for drop queue");
goto error;
}
- fdq->cq = ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
- &(struct ibv_exp_cq_init_attr){
- .comp_mask = 0,
- });
+ fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
if (!fdq->cq) {
WARN("cannot allocate CQ for drop queue");
goto error;
}
- for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
- fdq->wqs[i] = ibv_exp_create_wq(priv->ctx,
- &(struct ibv_exp_wq_init_attr){
- .wq_type = IBV_EXP_WQT_RQ,
- .max_recv_wr = 1,
- .max_recv_sge = 1,
- .pd = priv->pd,
- .cq = fdq->cq,
- });
- if (!fdq->wqs[i]) {
- WARN("cannot allocate WQ for drop queue");
- goto error;
- }
- }
- fdq->ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
- &(struct ibv_exp_rwq_ind_table_init_attr){
+ fdq->wq = ibv_create_wq(priv->ctx,
+ &(struct ibv_wq_init_attr){
+ .wq_type = IBV_WQT_RQ,
+ .max_wr = 1,
+ .max_sge = 1,
.pd = priv->pd,
+ .cq = fdq->cq,
+ });
+ if (!fdq->wq) {
+ WARN("cannot allocate WQ for drop queue");
+ goto error;
+ }
+ fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
+ &(struct ibv_rwq_ind_table_init_attr){
.log_ind_tbl_size = 0,
- .ind_tbl = fdq->wqs,
+ .ind_tbl = &fdq->wq,
.comp_mask = 0,
});
if (!fdq->ind_table) {
WARN("cannot allocate indirection table for drop queue");
goto error;
}
- fdq->qp = ibv_exp_create_qp(priv->ctx,
- &(struct ibv_exp_qp_init_attr){
+ fdq->qp = ibv_create_qp_ex(priv->ctx,
+ &(struct ibv_qp_init_attr_ex){
.qp_type = IBV_QPT_RAW_PACKET,
.comp_mask =
- IBV_EXP_QP_INIT_ATTR_PD |
- IBV_EXP_QP_INIT_ATTR_PORT |
- IBV_EXP_QP_INIT_ATTR_RX_HASH,
- .pd = priv->pd,
- .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
+ IBV_QP_INIT_ATTR_PD |
+ IBV_QP_INIT_ATTR_IND_TABLE |
+ IBV_QP_INIT_ATTR_RX_HASH,
+ .rx_hash_conf = (struct ibv_rx_hash_conf){
.rx_hash_function =
- IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
+ IBV_RX_HASH_FUNC_TOEPLITZ,
.rx_hash_key_len = rss_hash_default_key_len,
.rx_hash_key = rss_hash_default_key,
.rx_hash_fields_mask = 0,
- .rwq_ind_tbl = fdq->ind_table,
},
- .port_num = priv->port,
- });
+ .rwq_ind_tbl = fdq->ind_table,
+ .pd = priv->pd
+ });
if (!fdq->qp) {
WARN("cannot allocate QP for drop queue");
goto error;
@@ -1460,11 +2172,9 @@ error:
if (fdq->qp)
claim_zero(ibv_destroy_qp(fdq->qp));
if (fdq->ind_table)
- claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
- for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
- if (fdq->wqs[i])
- claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
- }
+ claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
+ if (fdq->wq)
+ claim_zero(ibv_destroy_wq(fdq->wq));
if (fdq->cq)
claim_zero(ibv_destroy_cq(fdq->cq));
if (fdq)
@@ -1479,22 +2189,19 @@ error:
* @param priv
* Pointer to private structure.
*/
-static void
+void
priv_flow_delete_drop_queue(struct priv *priv)
{
- struct rte_flow_drop *fdq = priv->flow_drop_queue;
- unsigned int i;
+ struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
if (!fdq)
return;
if (fdq->qp)
claim_zero(ibv_destroy_qp(fdq->qp));
if (fdq->ind_table)
- claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
- for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
- if (fdq->wqs[i])
- claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
- }
+ claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
+ if (fdq->wq)
+ claim_zero(ibv_destroy_wq(fdq->wq));
if (fdq->cq)
claim_zero(ibv_destroy_cq(fdq->cq));
rte_free(fdq);
@@ -1504,28 +2211,49 @@ priv_flow_delete_drop_queue(struct priv *priv)
/**
* Remove all flows.
*
- * Called by dev_stop() to remove all flows.
- *
* @param priv
* Pointer to private structure.
+ * @param list
+ * Pointer to a TAILQ flow list.
*/
void
-priv_flow_stop(struct priv *priv)
+priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
{
struct rte_flow *flow;
- TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
- claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
- flow->ibv_flow = NULL;
+ TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
+ unsigned int i;
+
+ if (flow->drop) {
+ if (!flow->drxq.ibv_flow)
+ continue;
+ claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
+ flow->drxq.ibv_flow = NULL;
+ /* Next flow. */
+ continue;
+ }
if (flow->mark) {
- unsigned int n;
+ struct mlx5_ind_table_ibv *ind_tbl = NULL;
- for (n = 0; n < flow->rxqs_n; ++n)
- flow->rxqs[n]->mark = 0;
+ for (i = 0; i != hash_rxq_init_n; ++i) {
+ if (!flow->frxq[i].hrxq)
+ continue;
+ ind_tbl = flow->frxq[i].hrxq->ind_table;
+ }
+ assert(ind_tbl);
+ for (i = 0; i != ind_tbl->queues_n; ++i)
+ (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
+ }
+ for (i = 0; i != hash_rxq_init_n; ++i) {
+ if (!flow->frxq[i].ibv_flow)
+ continue;
+ claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
+ flow->frxq[i].ibv_flow = NULL;
+ mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
+ flow->frxq[i].hrxq = NULL;
}
DEBUG("Flow %p removed", (void *)flow);
}
- priv_flow_delete_drop_queue(priv);
}
/**
@@ -1533,75 +2261,321 @@ priv_flow_stop(struct priv *priv)
*
* @param priv
* Pointer to private structure.
+ * @param list
+ * Pointer to a TAILQ flow list.
*
* @return
* 0 on success, a errno value otherwise and rte_errno is set.
*/
int
-priv_flow_start(struct priv *priv)
+priv_flow_start(struct priv *priv, struct mlx5_flows *list)
{
- int ret;
struct rte_flow *flow;
- ret = priv_flow_create_drop_queue(priv);
- if (ret)
- return -1;
- TAILQ_FOREACH(flow, &priv->flows, next) {
- struct ibv_qp *qp;
+ TAILQ_FOREACH(flow, list, next) {
+ unsigned int i;
- if (flow->drop)
- qp = priv->flow_drop_queue->qp;
- else
- qp = flow->qp;
- flow->ibv_flow = ibv_exp_create_flow(qp, flow->ibv_attr);
- if (!flow->ibv_flow) {
- DEBUG("Flow %p cannot be applied", (void *)flow);
- rte_errno = EINVAL;
- return rte_errno;
+ if (flow->drop) {
+ flow->drxq.ibv_flow =
+ ibv_create_flow(priv->flow_drop_queue->qp,
+ flow->drxq.ibv_attr);
+ if (!flow->drxq.ibv_flow) {
+ DEBUG("Flow %p cannot be applied",
+ (void *)flow);
+ rte_errno = EINVAL;
+ return rte_errno;
+ }
+ DEBUG("Flow %p applied", (void *)flow);
+ /* Next flow. */
+ continue;
}
- DEBUG("Flow %p applied", (void *)flow);
- if (flow->mark) {
- unsigned int n;
-
- for (n = 0; n < flow->rxqs_n; ++n)
- flow->rxqs[n]->mark = 1;
+ for (i = 0; i != hash_rxq_init_n; ++i) {
+ if (!flow->frxq[i].ibv_attr)
+ continue;
+ flow->frxq[i].hrxq =
+ mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
+ flow->rss_conf.rss_key_len,
+ hash_rxq_init[i].hash_fields,
+ (*flow->queues),
+ flow->queues_n);
+ if (flow->frxq[i].hrxq)
+ goto flow_create;
+ flow->frxq[i].hrxq =
+ mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
+ flow->rss_conf.rss_key_len,
+ hash_rxq_init[i].hash_fields,
+ (*flow->queues),
+ flow->queues_n);
+ if (!flow->frxq[i].hrxq) {
+ DEBUG("Flow %p cannot be applied",
+ (void *)flow);
+ rte_errno = EINVAL;
+ return rte_errno;
+ }
+flow_create:
+ flow->frxq[i].ibv_flow =
+ ibv_create_flow(flow->frxq[i].hrxq->qp,
+ flow->frxq[i].ibv_attr);
+ if (!flow->frxq[i].ibv_flow) {
+ DEBUG("Flow %p cannot be applied",
+ (void *)flow);
+ rte_errno = EINVAL;
+ return rte_errno;
+ }
+ DEBUG("Flow %p applied", (void *)flow);
}
+ if (!flow->mark)
+ continue;
+ for (i = 0; i != flow->queues_n; ++i)
+ (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
}
return 0;
}
/**
- * Verify if the Rx queue is used in a flow.
+ * Verify the flow list is empty
*
* @param priv
- * Pointer to private structure.
- * @param rxq
- * Pointer to the queue to search.
+ * Pointer to private structure.
+ *
+ * @return the number of flows not released.
+ */
+int
+priv_flow_verify(struct priv *priv)
+{
+ struct rte_flow *flow;
+ int ret = 0;
+
+ TAILQ_FOREACH(flow, &priv->flows, next) {
+ DEBUG("%p: flow %p still referenced", (void *)priv,
+ (void *)flow);
+ ++ret;
+ }
+ return ret;
+}
+
+/**
+ * Enable a control flow configured from the control plane.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param eth_spec
+ * An Ethernet flow spec to apply.
+ * @param eth_mask
+ * An Ethernet flow mask to apply.
+ * @param vlan_spec
+ * A VLAN flow spec to apply.
+ * @param vlan_mask
+ * A VLAN flow mask to apply.
*
* @return
- * Nonzero if the queue is used by a flow.
+ * 0 on success.
*/
int
-priv_flow_rxq_in_use(struct priv *priv, struct rxq *rxq)
+mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
+ struct rte_flow_item_eth *eth_spec,
+ struct rte_flow_item_eth *eth_mask,
+ struct rte_flow_item_vlan *vlan_spec,
+ struct rte_flow_item_vlan *vlan_mask)
{
+ struct priv *priv = dev->data->dev_private;
+ const struct rte_flow_attr attr = {
+ .ingress = 1,
+ .priority = MLX5_CTRL_FLOW_PRIORITY,
+ };
+ struct rte_flow_item items[] = {
+ {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .spec = eth_spec,
+ .last = NULL,
+ .mask = eth_mask,
+ },
+ {
+ .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
+ RTE_FLOW_ITEM_TYPE_END,
+ .spec = vlan_spec,
+ .last = NULL,
+ .mask = vlan_mask,
+ },
+ {
+ .type = RTE_FLOW_ITEM_TYPE_END,
+ },
+ };
+ struct rte_flow_action actions[] = {
+ {
+ .type = RTE_FLOW_ACTION_TYPE_RSS,
+ },
+ {
+ .type = RTE_FLOW_ACTION_TYPE_END,
+ },
+ };
struct rte_flow *flow;
+ struct rte_flow_error error;
+ unsigned int i;
+ union {
+ struct rte_flow_action_rss rss;
+ struct {
+ const struct rte_eth_rss_conf *rss_conf;
+ uint16_t num;
+ uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
+ } local;
+ } action_rss;
+
+ if (!priv->reta_idx_n)
+ return EINVAL;
+ for (i = 0; i != priv->reta_idx_n; ++i)
+ action_rss.local.queue[i] = (*priv->reta_idx)[i];
+ action_rss.local.rss_conf = &priv->rss_conf;
+ action_rss.local.num = priv->reta_idx_n;
+ actions[0].conf = (const void *)&action_rss.rss;
+ flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
+ &error);
+ if (!flow)
+ return rte_errno;
+ return 0;
+}
- for (flow = TAILQ_FIRST(&priv->flows);
- flow;
- flow = TAILQ_NEXT(flow, next)) {
- unsigned int n;
+/**
+ * Enable a flow control configured from the control plane.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param eth_spec
+ * An Ethernet flow spec to apply.
+ * @param eth_mask
+ * An Ethernet flow mask to apply.
+ *
+ * @return
+ * 0 on success.
+ */
+int
+mlx5_ctrl_flow(struct rte_eth_dev *dev,
+ struct rte_flow_item_eth *eth_spec,
+ struct rte_flow_item_eth *eth_mask)
+{
+ return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
+}
- if (flow->drop)
- continue;
- for (n = 0; n < flow->rxqs_n; ++n) {
- if (flow->rxqs[n] == rxq)
- return 1;
- }
+/**
+ * Destroy a flow.
+ *
+ * @see rte_flow_destroy()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_destroy(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+
+ (void)error;
+ priv_lock(priv);
+ priv_flow_destroy(priv, &priv->flows, flow);
+ priv_unlock(priv);
+ return 0;
+}
+
+/**
+ * Destroy all flows.
+ *
+ * @see rte_flow_flush()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_flush(struct rte_eth_dev *dev,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+
+ (void)error;
+ priv_lock(priv);
+ priv_flow_flush(priv, &priv->flows);
+ priv_unlock(priv);
+ return 0;
+}
+
+#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+/**
+ * Query flow counter.
+ *
+ * @param cs
+ * the counter set.
+ * @param counter_value
+ * returned data from the counter.
+ *
+ * @return
+ * 0 on success, a errno value otherwise and rte_errno is set.
+ */
+static int
+priv_flow_query_count(struct ibv_counter_set *cs,
+ struct mlx5_flow_counter_stats *counter_stats,
+ struct rte_flow_query_count *query_count,
+ struct rte_flow_error *error)
+{
+ uint64_t counters[2];
+ struct ibv_query_counter_set_attr query_cs_attr = {
+ .cs = cs,
+ .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
+ };
+ struct ibv_counter_set_data query_out = {
+ .out = counters,
+ .outlen = 2 * sizeof(uint64_t),
+ };
+ int res = ibv_query_counter_set(&query_cs_attr, &query_out);
+
+ if (res) {
+ rte_flow_error_set(error, -res,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "cannot read counter");
+ return -res;
+ }
+ query_count->hits_set = 1;
+ query_count->bytes_set = 1;
+ query_count->hits = counters[0] - counter_stats->hits;
+ query_count->bytes = counters[1] - counter_stats->bytes;
+ if (query_count->reset) {
+ counter_stats->hits = counters[0];
+ counter_stats->bytes = counters[1];
}
return 0;
}
/**
+ * Query a flows.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+int
+mlx5_flow_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ enum rte_flow_action_type action __rte_unused,
+ void *data,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+ int res = EINVAL;
+
+ priv_lock(priv);
+ if (flow->cs) {
+ res = priv_flow_query_count(flow->cs,
+ &flow->counter_stats,
+ (struct rte_flow_query_count *)data,
+ error);
+ } else {
+ rte_flow_error_set(error, res,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "no counter found for flow");
+ }
+ priv_unlock(priv);
+ return -res;
+}
+#endif
+
+/**
* Isolated mode.
*
* @see rte_flow_isolate()
@@ -1615,7 +2589,7 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
struct priv *priv = dev->data->dev_private;
priv_lock(priv);
- if (priv->started) {
+ if (dev->data->dev_started) {
rte_flow_error_set(error, EBUSY,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
NULL,
@@ -1624,6 +2598,497 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
return -rte_errno;
}
priv->isolated = !!enable;
+ if (enable)
+ priv->dev->dev_ops = &mlx5_dev_ops_isolate;
+ else
+ priv->dev->dev_ops = &mlx5_dev_ops;
priv_unlock(priv);
return 0;
}
+
+/**
+ * Convert a flow director filter to a generic flow.
+ *
+ * @param priv
+ * Private structure.
+ * @param fdir_filter
+ * Flow director filter to add.
+ * @param attributes
+ * Generic flow parameters structure.
+ *
+ * @return
+ * 0 on success, errno value on error.
+ */
+static int
+priv_fdir_filter_convert(struct priv *priv,
+ const struct rte_eth_fdir_filter *fdir_filter,
+ struct mlx5_fdir *attributes)
+{
+ const struct rte_eth_fdir_input *input = &fdir_filter->input;
+
+ /* Validate queue number. */
+ if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
+ ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
+ return EINVAL;
+ }
+ attributes->attr.ingress = 1;
+ attributes->items[0] = (struct rte_flow_item) {
+ .type = RTE_FLOW_ITEM_TYPE_ETH,
+ .spec = &attributes->l2,
+ .mask = &attributes->l2_mask,
+ };
+ switch (fdir_filter->action.behavior) {
+ case RTE_ETH_FDIR_ACCEPT:
+ attributes->actions[0] = (struct rte_flow_action){
+ .type = RTE_FLOW_ACTION_TYPE_QUEUE,
+ .conf = &attributes->queue,
+ };
+ break;
+ case RTE_ETH_FDIR_REJECT:
+ attributes->actions[0] = (struct rte_flow_action){
+ .type = RTE_FLOW_ACTION_TYPE_DROP,
+ };
+ break;
+ default:
+ ERROR("invalid behavior %d", fdir_filter->action.behavior);
+ return ENOTSUP;
+ }
+ attributes->queue.index = fdir_filter->action.rx_queue;
+ switch (fdir_filter->input.flow_type) {
+ case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
+ attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+ .src_addr = input->flow.udp4_flow.ip.src_ip,
+ .dst_addr = input->flow.udp4_flow.ip.dst_ip,
+ .time_to_live = input->flow.udp4_flow.ip.ttl,
+ .type_of_service = input->flow.udp4_flow.ip.tos,
+ .next_proto_id = input->flow.udp4_flow.ip.proto,
+ };
+ attributes->l4.udp.hdr = (struct udp_hdr){
+ .src_port = input->flow.udp4_flow.src_port,
+ .dst_port = input->flow.udp4_flow.dst_port,
+ };
+ attributes->items[1] = (struct rte_flow_item){
+ .type = RTE_FLOW_ITEM_TYPE_IPV4,
+ .spec = &attributes->l3,
+ };
+ attributes->items[2] = (struct rte_flow_item){
+ .type = RTE_FLOW_ITEM_TYPE_UDP,
+ .spec = &attributes->l4,
+ };
+ break;
+ case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
+ attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+ .src_addr = input->flow.tcp4_flow.ip.src_ip,
+ .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
+ .time_to_live = input->flow.tcp4_flow.ip.ttl,
+ .type_of_service = input->flow.tcp4_flow.ip.tos,
+ .next_proto_id = input->flow.tcp4_flow.ip.proto,
+ };
+ attributes->l4.tcp.hdr = (struct tcp_hdr){
+ .src_port = input->flow.tcp4_flow.src_port,
+ .dst_port = input->flow.tcp4_flow.dst_port,
+ };
+ attributes->items[1] = (struct rte_flow_item){
+ .type = RTE_FLOW_ITEM_TYPE_IPV4,
+ .spec = &attributes->l3,
+ };
+ attributes->items[2] = (struct rte_flow_item){
+ .type = RTE_FLOW_ITEM_TYPE_TCP,
+ .spec = &attributes->l4,
+ };
+ break;
+ case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
+ attributes->l3.ipv4.hdr = (struct ipv4_hdr){
+ .src_addr = input->flow.ip4_flow.src_ip,
+ .dst_addr = input->flow.ip4_flow.dst_ip,
+ .time_to_live = input->flow.ip4_flow.ttl,
+ .type_of_service = input->flow.ip4_flow.tos,
+ .next_proto_id = input->flow.ip4_flow.proto,
+ };
+ attributes->items[1] = (struct rte_flow_item){
+ .type = RTE_FLOW_ITEM_TYPE_IPV4,
+ .spec = &attributes->l3,
+ };
+ break;
+ case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
+ attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+ .hop_limits = input->flow.udp6_flow.ip.hop_limits,
+ .proto = input->flow.udp6_flow.ip.proto,
+ };
+ memcpy(attributes->l3.ipv6.hdr.src_addr,
+ input->flow.udp6_flow.ip.src_ip,
+ RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+ memcpy(attributes->l3.ipv6.hdr.dst_addr,
+ input->flow.udp6_flow.ip.dst_ip,
+ RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+ attributes->l4.udp.hdr = (struct udp_hdr){
+ .src_port = input->flow.udp6_flow.src_port,
+ .dst_port = input->flow.udp6_flow.dst_port,
+ };
+ attributes->items[1] = (struct rte_flow_item){
+ .type = RTE_FLOW_ITEM_TYPE_IPV6,
+ .spec = &attributes->l3,
+ };
+ attributes->items[2] = (struct rte_flow_item){
+ .type = RTE_FLOW_ITEM_TYPE_UDP,
+ .spec = &attributes->l4,
+ };
+ break;
+ case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
+ attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+ .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
+ .proto = input->flow.tcp6_flow.ip.proto,
+ };
+ memcpy(attributes->l3.ipv6.hdr.src_addr,
+ input->flow.tcp6_flow.ip.src_ip,
+ RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+ memcpy(attributes->l3.ipv6.hdr.dst_addr,
+ input->flow.tcp6_flow.ip.dst_ip,
+ RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+ attributes->l4.tcp.hdr = (struct tcp_hdr){
+ .src_port = input->flow.tcp6_flow.src_port,
+ .dst_port = input->flow.tcp6_flow.dst_port,
+ };
+ attributes->items[1] = (struct rte_flow_item){
+ .type = RTE_FLOW_ITEM_TYPE_IPV6,
+ .spec = &attributes->l3,
+ };
+ attributes->items[2] = (struct rte_flow_item){
+ .type = RTE_FLOW_ITEM_TYPE_TCP,
+ .spec = &attributes->l4,
+ };
+ break;
+ case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
+ attributes->l3.ipv6.hdr = (struct ipv6_hdr){
+ .hop_limits = input->flow.ipv6_flow.hop_limits,
+ .proto = input->flow.ipv6_flow.proto,
+ };
+ memcpy(attributes->l3.ipv6.hdr.src_addr,
+ input->flow.ipv6_flow.src_ip,
+ RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+ memcpy(attributes->l3.ipv6.hdr.dst_addr,
+ input->flow.ipv6_flow.dst_ip,
+ RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
+ attributes->items[1] = (struct rte_flow_item){
+ .type = RTE_FLOW_ITEM_TYPE_IPV6,
+ .spec = &attributes->l3,
+ };
+ break;
+ default:
+ ERROR("invalid flow type%d",
+ fdir_filter->input.flow_type);
+ return ENOTSUP;
+ }
+ return 0;
+}
+
+/**
+ * Add new flow director filter and store it in list.
+ *
+ * @param priv
+ * Private structure.
+ * @param fdir_filter
+ * Flow director filter to add.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_add(struct priv *priv,
+ const struct rte_eth_fdir_filter *fdir_filter)
+{
+ struct mlx5_fdir attributes = {
+ .attr.group = 0,
+ .l2_mask = {
+ .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+ .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+ .type = 0,
+ },
+ };
+ struct mlx5_flow_parse parser = {
+ .layer = HASH_RXQ_ETH,
+ };
+ struct rte_flow_error error;
+ struct rte_flow *flow;
+ int ret;
+
+ ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
+ if (ret)
+ return -ret;
+ ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
+ attributes.actions, &error, &parser);
+ if (ret)
+ return -ret;
+ flow = priv_flow_create(priv,
+ &priv->flows,
+ &attributes.attr,
+ attributes.items,
+ attributes.actions,
+ &error);
+ if (flow) {
+ DEBUG("FDIR created %p", (void *)flow);
+ return 0;
+ }
+ return ENOTSUP;
+}
+
+/**
+ * Delete specific filter.
+ *
+ * @param priv
+ * Private structure.
+ * @param fdir_filter
+ * Filter to be deleted.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_delete(struct priv *priv,
+ const struct rte_eth_fdir_filter *fdir_filter)
+{
+ struct mlx5_fdir attributes = {
+ .attr.group = 0,
+ };
+ struct mlx5_flow_parse parser = {
+ .create = 1,
+ .layer = HASH_RXQ_ETH,
+ };
+ struct rte_flow_error error;
+ struct rte_flow *flow;
+ unsigned int i;
+ int ret;
+
+ ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
+ if (ret)
+ return -ret;
+ ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
+ attributes.actions, &error, &parser);
+ if (ret)
+ goto exit;
+ /*
+ * Special case for drop action which is only set in the
+ * specifications when the flow is created. In this situation the
+ * drop specification is missing.
+ */
+ if (parser.drop) {
+ struct ibv_flow_spec_action_drop *drop;
+
+ drop = (void *)((uintptr_t)parser.drop_q.ibv_attr +
+ parser.drop_q.offset);
+ *drop = (struct ibv_flow_spec_action_drop){
+ .type = IBV_FLOW_SPEC_ACTION_DROP,
+ .size = sizeof(struct ibv_flow_spec_action_drop),
+ };
+ parser.drop_q.ibv_attr->num_of_specs++;
+ }
+ TAILQ_FOREACH(flow, &priv->flows, next) {
+ struct ibv_flow_attr *attr;
+ struct ibv_spec_header *attr_h;
+ void *spec;
+ struct ibv_flow_attr *flow_attr;
+ struct ibv_spec_header *flow_h;
+ void *flow_spec;
+ unsigned int specs_n;
+
+ if (parser.drop)
+ attr = parser.drop_q.ibv_attr;
+ else
+ attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
+ if (flow->drop)
+ flow_attr = flow->drxq.ibv_attr;
+ else
+ flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
+ /* Compare first the attributes. */
+ if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
+ continue;
+ if (attr->num_of_specs == 0)
+ continue;
+ spec = (void *)((uintptr_t)attr +
+ sizeof(struct ibv_flow_attr));
+ flow_spec = (void *)((uintptr_t)flow_attr +
+ sizeof(struct ibv_flow_attr));
+ specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
+ for (i = 0; i != specs_n; ++i) {
+ attr_h = spec;
+ flow_h = flow_spec;
+ if (memcmp(spec, flow_spec,
+ RTE_MIN(attr_h->size, flow_h->size)))
+ continue;
+ spec = (void *)((uintptr_t)attr + attr_h->size);
+ flow_spec = (void *)((uintptr_t)flow_attr +
+ flow_h->size);
+ }
+ /* At this point, the flow match. */
+ break;
+ }
+ if (flow)
+ priv_flow_destroy(priv, &priv->flows, flow);
+exit:
+ if (parser.drop) {
+ rte_free(parser.drop_q.ibv_attr);
+ } else {
+ for (i = 0; i != hash_rxq_init_n; ++i) {
+ if (parser.queue[i].ibv_attr)
+ rte_free(parser.queue[i].ibv_attr);
+ }
+ }
+ return -ret;
+}
+
+/**
+ * Update queue for specific filter.
+ *
+ * @param priv
+ * Private structure.
+ * @param fdir_filter
+ * Filter to be updated.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+static int
+priv_fdir_filter_update(struct priv *priv,
+ const struct rte_eth_fdir_filter *fdir_filter)
+{
+ int ret;
+
+ ret = priv_fdir_filter_delete(priv, fdir_filter);
+ if (ret)
+ return ret;
+ ret = priv_fdir_filter_add(priv, fdir_filter);
+ return ret;
+}
+
+/**
+ * Flush all filters.
+ *
+ * @param priv
+ * Private structure.
+ */
+static void
+priv_fdir_filter_flush(struct priv *priv)
+{
+ priv_flow_flush(priv, &priv->flows);
+}
+
+/**
+ * Get flow director information.
+ *
+ * @param priv
+ * Private structure.
+ * @param[out] fdir_info
+ * Resulting flow director information.
+ */
+static void
+priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
+{
+ struct rte_eth_fdir_masks *mask =
+ &priv->dev->data->dev_conf.fdir_conf.mask;
+
+ fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
+ fdir_info->guarant_spc = 0;
+ rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
+ fdir_info->max_flexpayload = 0;
+ fdir_info->flow_types_mask[0] = 0;
+ fdir_info->flex_payload_unit = 0;
+ fdir_info->max_flex_payload_segment_num = 0;
+ fdir_info->flex_payload_limit = 0;
+ memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
+}
+
+/**
+ * Deal with flow director operations.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param filter_op
+ * Operation to perform.
+ * @param arg
+ * Pointer to operation-specific structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+static int
+priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
+{
+ enum rte_fdir_mode fdir_mode =
+ priv->dev->data->dev_conf.fdir_conf.mode;
+ int ret = 0;
+
+ if (filter_op == RTE_ETH_FILTER_NOP)
+ return 0;
+ if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
+ fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
+ ERROR("%p: flow director mode %d not supported",
+ (void *)priv, fdir_mode);
+ return EINVAL;
+ }
+ switch (filter_op) {
+ case RTE_ETH_FILTER_ADD:
+ ret = priv_fdir_filter_add(priv, arg);
+ break;
+ case RTE_ETH_FILTER_UPDATE:
+ ret = priv_fdir_filter_update(priv, arg);
+ break;
+ case RTE_ETH_FILTER_DELETE:
+ ret = priv_fdir_filter_delete(priv, arg);
+ break;
+ case RTE_ETH_FILTER_FLUSH:
+ priv_fdir_filter_flush(priv);
+ break;
+ case RTE_ETH_FILTER_INFO:
+ priv_fdir_info_get(priv, arg);
+ break;
+ default:
+ DEBUG("%p: unknown operation %u", (void *)priv,
+ filter_op);
+ ret = EINVAL;
+ break;
+ }
+ return ret;
+}
+
+/**
+ * Manage filter operations.
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param filter_type
+ * Filter type.
+ * @param filter_op
+ * Operation to perform.
+ * @param arg
+ * Pointer to operation-specific structure.
+ *
+ * @return
+ * 0 on success, negative errno value on failure.
+ */
+int
+mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
+ enum rte_filter_type filter_type,
+ enum rte_filter_op filter_op,
+ void *arg)
+{
+ int ret = EINVAL;
+ struct priv *priv = dev->data->dev_private;
+
+ switch (filter_type) {
+ case RTE_ETH_FILTER_GENERIC:
+ if (filter_op != RTE_ETH_FILTER_GET)
+ return -EINVAL;
+ *(const void **)arg = &mlx5_flow_ops;
+ return 0;
+ case RTE_ETH_FILTER_FDIR:
+ priv_lock(priv);
+ ret = priv_fdir_ctrl_func(priv, filter_op, arg);
+ priv_unlock(priv);
+ break;
+ default:
+ ERROR("%p: filter type (%d) not supported",
+ (void *)dev, filter_type);
+ break;
+ }
+ return -ret;
+}
diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c
index 8489ea67..d17b991e 100644
--- a/drivers/net/mlx5/mlx5_mac.c
+++ b/drivers/net/mlx5/mlx5_mac.c
@@ -51,16 +51,9 @@
#pragma GCC diagnostic error "-Wpedantic"
#endif
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_common.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
#include "mlx5.h"
#include "mlx5_utils.h"
@@ -90,112 +83,6 @@ priv_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN])
}
/**
- * Delete MAC flow steering rule.
- *
- * @param hash_rxq
- * Pointer to hash RX queue structure.
- * @param mac_index
- * MAC address index.
- * @param vlan_index
- * VLAN index to use.
- */
-static void
-hash_rxq_del_mac_flow(struct hash_rxq *hash_rxq, unsigned int mac_index,
- unsigned int vlan_index)
-{
-#ifndef NDEBUG
- const uint8_t (*mac)[ETHER_ADDR_LEN] =
- (const uint8_t (*)[ETHER_ADDR_LEN])
- hash_rxq->priv->mac[mac_index].addr_bytes;
-#endif
-
- assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
- assert(vlan_index < RTE_DIM(hash_rxq->mac_flow[mac_index]));
- if (hash_rxq->mac_flow[mac_index][vlan_index] == NULL)
- return;
- DEBUG("%p: removing MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u"
- " VLAN index %u",
- (void *)hash_rxq,
- (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
- mac_index,
- vlan_index);
- claim_zero(ibv_exp_destroy_flow(hash_rxq->mac_flow
- [mac_index][vlan_index]));
- hash_rxq->mac_flow[mac_index][vlan_index] = NULL;
-}
-
-/**
- * Unregister a MAC address from a hash RX queue.
- *
- * @param hash_rxq
- * Pointer to hash RX queue structure.
- * @param mac_index
- * MAC address index.
- */
-static void
-hash_rxq_mac_addr_del(struct hash_rxq *hash_rxq, unsigned int mac_index)
-{
- unsigned int i;
-
- assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
- for (i = 0; (i != RTE_DIM(hash_rxq->mac_flow[mac_index])); ++i)
- hash_rxq_del_mac_flow(hash_rxq, mac_index, i);
-}
-
-/**
- * Unregister all MAC addresses from a hash RX queue.
- *
- * @param hash_rxq
- * Pointer to hash RX queue structure.
- */
-void
-hash_rxq_mac_addrs_del(struct hash_rxq *hash_rxq)
-{
- unsigned int i;
-
- for (i = 0; (i != RTE_DIM(hash_rxq->mac_flow)); ++i)
- hash_rxq_mac_addr_del(hash_rxq, i);
-}
-
-/**
- * Unregister a MAC address.
- *
- * This is done for each hash RX queue.
- *
- * @param priv
- * Pointer to private structure.
- * @param mac_index
- * MAC address index.
- */
-static void
-priv_mac_addr_del(struct priv *priv, unsigned int mac_index)
-{
- unsigned int i;
-
- assert(mac_index < RTE_DIM(priv->mac));
- if (!BITFIELD_ISSET(priv->mac_configured, mac_index))
- return;
- for (i = 0; (i != priv->hash_rxqs_n); ++i)
- hash_rxq_mac_addr_del(&(*priv->hash_rxqs)[i], mac_index);
- BITFIELD_RESET(priv->mac_configured, mac_index);
-}
-
-/**
- * Unregister all MAC addresses from all hash RX queues.
- *
- * @param priv
- * Pointer to private structure.
- */
-void
-priv_mac_addrs_disable(struct priv *priv)
-{
- unsigned int i;
-
- for (i = 0; (i != priv->hash_rxqs_n); ++i)
- hash_rxq_mac_addrs_del(&(*priv->hash_rxqs)[i]);
-}
-
-/**
* DPDK callback to remove a MAC address.
*
* @param dev
@@ -206,258 +93,12 @@ priv_mac_addrs_disable(struct priv *priv)
void
mlx5_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
{
- struct priv *priv = dev->data->dev_private;
-
if (mlx5_is_secondary())
return;
-
- priv_lock(priv);
- DEBUG("%p: removing MAC address from index %" PRIu32,
- (void *)dev, index);
- if (index >= RTE_DIM(priv->mac))
- goto end;
- priv_mac_addr_del(priv, index);
-end:
- priv_unlock(priv);
-}
-
-/**
- * Add MAC flow steering rule.
- *
- * @param hash_rxq
- * Pointer to hash RX queue structure.
- * @param mac_index
- * MAC address index to register.
- * @param vlan_index
- * VLAN index to use.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-static int
-hash_rxq_add_mac_flow(struct hash_rxq *hash_rxq, unsigned int mac_index,
- unsigned int vlan_index)
-{
- struct ibv_exp_flow *flow;
- struct priv *priv = hash_rxq->priv;
- const uint8_t (*mac)[ETHER_ADDR_LEN] =
- (const uint8_t (*)[ETHER_ADDR_LEN])
- priv->mac[mac_index].addr_bytes;
- FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, hash_rxq->type));
- struct ibv_exp_flow_attr *attr = &data->attr;
- struct ibv_exp_flow_spec_eth *spec = &data->spec;
- unsigned int vlan_enabled = !!priv->vlan_filter_n;
- unsigned int vlan_id = priv->vlan_filter[vlan_index];
-
- assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
- assert(vlan_index < RTE_DIM(hash_rxq->mac_flow[mac_index]));
- if (hash_rxq->mac_flow[mac_index][vlan_index] != NULL)
- return 0;
- /*
- * No padding must be inserted by the compiler between attr and spec.
- * This layout is expected by libibverbs.
- */
- assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
- priv_flow_attr(priv, attr, sizeof(data), hash_rxq->type);
- /* The first specification must be Ethernet. */
- assert(spec->type == IBV_EXP_FLOW_SPEC_ETH);
- assert(spec->size == sizeof(*spec));
- *spec = (struct ibv_exp_flow_spec_eth){
- .type = IBV_EXP_FLOW_SPEC_ETH,
- .size = sizeof(*spec),
- .val = {
- .dst_mac = {
- (*mac)[0], (*mac)[1], (*mac)[2],
- (*mac)[3], (*mac)[4], (*mac)[5]
- },
- .vlan_tag = (vlan_enabled ? htons(vlan_id) : 0),
- },
- .mask = {
- .dst_mac = "\xff\xff\xff\xff\xff\xff",
- .vlan_tag = (vlan_enabled ? htons(0xfff) : 0),
- },
- };
- DEBUG("%p: adding MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u"
- " VLAN index %u filtering %s, ID %u",
- (void *)hash_rxq,
- (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5],
- mac_index,
- vlan_index,
- (vlan_enabled ? "enabled" : "disabled"),
- vlan_id);
- /* Create related flow. */
- errno = 0;
- flow = ibv_exp_create_flow(hash_rxq->qp, attr);
- if (flow == NULL) {
- /* It's not clear whether errno is always set in this case. */
- ERROR("%p: flow configuration failed, errno=%d: %s",
- (void *)hash_rxq, errno,
- (errno ? strerror(errno) : "Unknown error"));
- if (errno)
- return errno;
- return EINVAL;
- }
- hash_rxq->mac_flow[mac_index][vlan_index] = flow;
- return 0;
-}
-
-/**
- * Register a MAC address in a hash RX queue.
- *
- * @param hash_rxq
- * Pointer to hash RX queue structure.
- * @param mac_index
- * MAC address index to register.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-static int
-hash_rxq_mac_addr_add(struct hash_rxq *hash_rxq, unsigned int mac_index)
-{
- struct priv *priv = hash_rxq->priv;
- unsigned int i = 0;
- int ret;
-
- assert(mac_index < RTE_DIM(hash_rxq->mac_flow));
- assert(RTE_DIM(hash_rxq->mac_flow[mac_index]) ==
- RTE_DIM(priv->vlan_filter));
- /* Add a MAC address for each VLAN filter, or at least once. */
- do {
- ret = hash_rxq_add_mac_flow(hash_rxq, mac_index, i);
- if (ret) {
- /* Failure, rollback. */
- while (i != 0)
- hash_rxq_del_mac_flow(hash_rxq, mac_index,
- --i);
- return ret;
- }
- } while (++i < priv->vlan_filter_n);
- return 0;
-}
-
-/**
- * Register all MAC addresses in a hash RX queue.
- *
- * @param hash_rxq
- * Pointer to hash RX queue structure.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-int
-hash_rxq_mac_addrs_add(struct hash_rxq *hash_rxq)
-{
- struct priv *priv = hash_rxq->priv;
- unsigned int i;
- int ret;
-
- assert(RTE_DIM(priv->mac) == RTE_DIM(hash_rxq->mac_flow));
- for (i = 0; (i != RTE_DIM(priv->mac)); ++i) {
- if (!BITFIELD_ISSET(priv->mac_configured, i))
- continue;
- ret = hash_rxq_mac_addr_add(hash_rxq, i);
- if (!ret)
- continue;
- /* Failure, rollback. */
- while (i != 0)
- hash_rxq_mac_addr_del(hash_rxq, --i);
- assert(ret > 0);
- return ret;
- }
- return 0;
-}
-
-/**
- * Register a MAC address.
- *
- * This is done for each hash RX queue.
- *
- * @param priv
- * Pointer to private structure.
- * @param mac_index
- * MAC address index to use.
- * @param mac
- * MAC address to register.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-int
-priv_mac_addr_add(struct priv *priv, unsigned int mac_index,
- const uint8_t (*mac)[ETHER_ADDR_LEN])
-{
- unsigned int i;
- int ret;
-
- assert(mac_index < RTE_DIM(priv->mac));
- /* First, make sure this address isn't already configured. */
- for (i = 0; (i != RTE_DIM(priv->mac)); ++i) {
- /* Skip this index, it's going to be reconfigured. */
- if (i == mac_index)
- continue;
- if (!BITFIELD_ISSET(priv->mac_configured, i))
- continue;
- if (memcmp(priv->mac[i].addr_bytes, *mac, sizeof(*mac)))
- continue;
- /* Address already configured elsewhere, return with error. */
- return EADDRINUSE;
- }
- if (BITFIELD_ISSET(priv->mac_configured, mac_index))
- priv_mac_addr_del(priv, mac_index);
- priv->mac[mac_index] = (struct ether_addr){
- {
- (*mac)[0], (*mac)[1], (*mac)[2],
- (*mac)[3], (*mac)[4], (*mac)[5]
- }
- };
- if (!priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
- goto end;
- for (i = 0; (i != priv->hash_rxqs_n); ++i) {
- ret = hash_rxq_mac_addr_add(&(*priv->hash_rxqs)[i], mac_index);
- if (!ret)
- continue;
- /* Failure, rollback. */
- while (i != 0)
- hash_rxq_mac_addr_del(&(*priv->hash_rxqs)[--i],
- mac_index);
- return ret;
- }
-end:
- BITFIELD_SET(priv->mac_configured, mac_index);
- return 0;
-}
-
-/**
- * Register all MAC addresses in all hash RX queues.
- *
- * @param priv
- * Pointer to private structure.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-int
-priv_mac_addrs_enable(struct priv *priv)
-{
- unsigned int i;
- int ret;
-
- if (priv->isolated)
- return 0;
- if (!priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
- return 0;
- for (i = 0; (i != priv->hash_rxqs_n); ++i) {
- ret = hash_rxq_mac_addrs_add(&(*priv->hash_rxqs)[i]);
- if (!ret)
- continue;
- /* Failure, rollback. */
- while (i != 0)
- hash_rxq_mac_addrs_del(&(*priv->hash_rxqs)[--i]);
- assert(ret > 0);
- return ret;
- }
- return 0;
+ assert(index < MLX5_MAX_MAC_ADDRESSES);
+ memset(&dev->data->mac_addrs[index], 0, sizeof(struct ether_addr));
+ if (!dev->data->promiscuous && !dev->data->all_multicast)
+ mlx5_traffic_restart(dev);
}
/**
@@ -471,31 +112,35 @@ priv_mac_addrs_enable(struct priv *priv)
* MAC address index.
* @param vmdq
* VMDq pool index to associate address with (ignored).
+ *
+ * @return
+ * 0 on success.
*/
int
-mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
+mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac,
uint32_t index, uint32_t vmdq)
{
- struct priv *priv = dev->data->dev_private;
- int re;
-
- if (mlx5_is_secondary())
- return -ENOTSUP;
+ unsigned int i;
+ int ret = 0;
(void)vmdq;
- priv_lock(priv);
- DEBUG("%p: adding MAC address at index %" PRIu32,
- (void *)dev, index);
- if (index >= RTE_DIM(priv->mac)) {
- re = EINVAL;
- goto end;
+ if (mlx5_is_secondary())
+ return 0;
+ assert(index < MLX5_MAX_MAC_ADDRESSES);
+ /* First, make sure this address isn't already configured. */
+ for (i = 0; (i != MLX5_MAX_MAC_ADDRESSES); ++i) {
+ /* Skip this index, it's going to be reconfigured. */
+ if (i == index)
+ continue;
+ if (memcmp(&dev->data->mac_addrs[i], mac, sizeof(*mac)))
+ continue;
+ /* Address already configured elsewhere, return with error. */
+ return EADDRINUSE;
}
- re = priv_mac_addr_add(priv, index,
- (const uint8_t (*)[ETHER_ADDR_LEN])
- mac_addr->addr_bytes);
-end:
- priv_unlock(priv);
- return -re;
+ dev->data->mac_addrs[index] = *mac;
+ if (!dev->data->promiscuous && !dev->data->all_multicast)
+ mlx5_traffic_restart(dev);
+ return ret;
}
/**
@@ -509,7 +154,8 @@ end:
void
mlx5_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
{
+ if (mlx5_is_secondary())
+ return;
DEBUG("%p: setting primary MAC address", (void *)dev);
- mlx5_mac_addr_remove(dev, 0);
mlx5_mac_addr_add(dev, mac_addr, 0, 0);
}
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 28733517..6b29eed5 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -41,14 +41,8 @@
#pragma GCC diagnostic error "-Wpedantic"
#endif
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_mempool.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
+#include <rte_malloc.h>
#include "mlx5.h"
#include "mlx5_rxtx.h"
@@ -118,59 +112,13 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start,
}
/**
- * Register mempool as a memory region.
- *
- * @param pd
- * Pointer to protection domain.
- * @param mp
- * Pointer to memory pool.
- *
- * @return
- * Memory region pointer, NULL in case of error.
- */
-struct ibv_mr *
-mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
-{
- const struct rte_memseg *ms = rte_eal_get_physmem_layout();
- uintptr_t start;
- uintptr_t end;
- unsigned int i;
-
- if (mlx5_check_mempool(mp, &start, &end) != 0) {
- ERROR("mempool %p: not virtually contiguous",
- (void *)mp);
- return NULL;
- }
-
- DEBUG("mempool %p area start=%p end=%p size=%zu",
- (void *)mp, (void *)start, (void *)end,
- (size_t)(end - start));
- /* Round start and end to page boundary if found in memory segments. */
- for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
- uintptr_t addr = (uintptr_t)ms[i].addr;
- size_t len = ms[i].len;
- unsigned int align = ms[i].hugepage_sz;
-
- if ((start > addr) && (start < addr + len))
- start = RTE_ALIGN_FLOOR(start, align);
- if ((end > addr) && (end < addr + len))
- end = RTE_ALIGN_CEIL(end, align);
- }
- DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
- (void *)mp, (void *)start, (void *)end,
- (size_t)(end - start));
- return ibv_reg_mr(pd,
- (void *)start,
- end - start,
- IBV_ACCESS_LOCAL_WRITE);
-}
-
-/**
* Register a Memory Region (MR) <-> Memory Pool (MP) association in
* txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
*
* This function should only be called by txq_mp2mr().
*
+ * @param priv
+ * Pointer to private structure.
* @param txq
* Pointer to TX queue structure.
* @param[in] mp
@@ -179,45 +127,75 @@ mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
* Index of the next available entry.
*
* @return
- * mr->lkey on success, (uint32_t)-1 on failure.
+ * mr on success, NULL on failure.
*/
-uint32_t
-txq_mp2mr_reg(struct txq *txq, struct rte_mempool *mp, unsigned int idx)
+struct mlx5_mr*
+priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *txq,
+ struct rte_mempool *mp, unsigned int idx)
{
- struct txq_ctrl *txq_ctrl = container_of(txq, struct txq_ctrl, txq);
- struct ibv_mr *mr;
+ struct mlx5_txq_ctrl *txq_ctrl =
+ container_of(txq, struct mlx5_txq_ctrl, txq);
+ struct mlx5_mr *mr;
/* Add a new entry, register MR first. */
DEBUG("%p: discovered new memory pool \"%s\" (%p)",
(void *)txq_ctrl, mp->name, (void *)mp);
- mr = mlx5_mp2mr(txq_ctrl->priv->pd, mp);
+ mr = priv_mr_get(priv, mp);
+ if (mr == NULL)
+ mr = priv_mr_new(priv, mp);
if (unlikely(mr == NULL)) {
DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.",
(void *)txq_ctrl);
- return (uint32_t)-1;
+ return NULL;
}
- if (unlikely(idx == RTE_DIM(txq_ctrl->txq.mp2mr))) {
+ if (unlikely(idx == RTE_DIM(txq->mp2mr))) {
/* Table is full, remove oldest entry. */
DEBUG("%p: MR <-> MP table full, dropping oldest entry.",
(void *)txq_ctrl);
--idx;
- claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[0].mr));
- memmove(&txq_ctrl->txq.mp2mr[0], &txq_ctrl->txq.mp2mr[1],
- (sizeof(txq_ctrl->txq.mp2mr) -
- sizeof(txq_ctrl->txq.mp2mr[0])));
+ priv_mr_release(priv, txq->mp2mr[0]);
+ memmove(&txq->mp2mr[0], &txq->mp2mr[1],
+ (sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0])));
}
/* Store the new entry. */
- txq_ctrl->txq.mp2mr[idx].start = (uintptr_t)mr->addr;
- txq_ctrl->txq.mp2mr[idx].end = (uintptr_t)mr->addr + mr->length;
- txq_ctrl->txq.mp2mr[idx].mr = mr;
- txq_ctrl->txq.mp2mr[idx].lkey = htonl(mr->lkey);
+ txq_ctrl->txq.mp2mr[idx] = mr;
DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
(void *)txq_ctrl, mp->name, (void *)mp,
- txq_ctrl->txq.mp2mr[idx].lkey);
- return txq_ctrl->txq.mp2mr[idx].lkey;
+ txq_ctrl->txq.mp2mr[idx]->lkey);
+ return mr;
+}
+
+/**
+ * Register a Memory Region (MR) <-> Memory Pool (MP) association in
+ * txq->mp2mr[]. If mp2mr[] is full, remove an entry first.
+ *
+ * This function should only be called by txq_mp2mr().
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param[in] mp
+ * Memory Pool for which a Memory Region lkey must be returned.
+ * @param idx
+ * Index of the next available entry.
+ *
+ * @return
+ * mr on success, NULL on failure.
+ */
+struct mlx5_mr*
+mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp,
+ unsigned int idx)
+{
+ struct mlx5_txq_ctrl *txq_ctrl =
+ container_of(txq, struct mlx5_txq_ctrl, txq);
+ struct mlx5_mr *mr;
+
+ priv_lock(txq_ctrl->priv);
+ mr = priv_txq_mp2mr_reg(txq_ctrl->priv, txq, mp, idx);
+ priv_unlock(txq_ctrl->priv);
+ return mr;
}
-struct txq_mp2mr_mbuf_check_data {
+struct mlx5_mp2mr_mbuf_check_data {
int ret;
};
@@ -239,7 +217,7 @@ static void
txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
uint32_t index __rte_unused)
{
- struct txq_mp2mr_mbuf_check_data *data = arg;
+ struct mlx5_mp2mr_mbuf_check_data *data = arg;
struct rte_mbuf *buf = obj;
/*
@@ -260,35 +238,158 @@ txq_mp2mr_mbuf_check(struct rte_mempool *mp, void *arg, void *obj,
* Pointer to TX queue structure.
*/
void
-txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
+mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg)
{
- struct txq_ctrl *txq_ctrl = arg;
- struct txq_mp2mr_mbuf_check_data data = {
+ struct priv *priv = (struct priv *)arg;
+ struct mlx5_mp2mr_mbuf_check_data data = {
.ret = 0,
};
- uintptr_t start;
- uintptr_t end;
- unsigned int i;
+ struct mlx5_mr *mr;
/* Register mempool only if the first element looks like a mbuf. */
if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
data.ret == -1)
return;
+ mr = priv_mr_get(priv, mp);
+ if (mr) {
+ priv_mr_release(priv, mr);
+ return;
+ }
+ priv_mr_new(priv, mp);
+}
+
+/**
+ * Register a new memory region from the mempool and store it in the memory
+ * region list.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param mp
+ * Pointer to the memory pool to register.
+ * @return
+ * The memory region on success.
+ */
+struct mlx5_mr*
+priv_mr_new(struct priv *priv, struct rte_mempool *mp)
+{
+ const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+ uintptr_t start;
+ uintptr_t end;
+ unsigned int i;
+ struct mlx5_mr *mr;
+
+ mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id);
+ if (!mr) {
+ DEBUG("unable to configure MR, ibv_reg_mr() failed.");
+ return NULL;
+ }
if (mlx5_check_mempool(mp, &start, &end) != 0) {
ERROR("mempool %p: not virtually contiguous",
(void *)mp);
- return;
+ return NULL;
}
- for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
- struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr;
+ DEBUG("mempool %p area start=%p end=%p size=%zu",
+ (void *)mp, (void *)start, (void *)end,
+ (size_t)(end - start));
+ /* Round start and end to page boundary if found in memory segments. */
+ for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
+ uintptr_t addr = (uintptr_t)ms[i].addr;
+ size_t len = ms[i].len;
+ unsigned int align = ms[i].hugepage_sz;
- if (unlikely(mr == NULL)) {
- /* Unknown MP, add a new MR for it. */
- break;
+ if ((start > addr) && (start < addr + len))
+ start = RTE_ALIGN_FLOOR(start, align);
+ if ((end > addr) && (end < addr + len))
+ end = RTE_ALIGN_CEIL(end, align);
+ }
+ DEBUG("mempool %p using start=%p end=%p size=%zu for MR",
+ (void *)mp, (void *)start, (void *)end,
+ (size_t)(end - start));
+ mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start,
+ IBV_ACCESS_LOCAL_WRITE);
+ mr->mp = mp;
+ mr->lkey = rte_cpu_to_be_32(mr->mr->lkey);
+ mr->start = start;
+ mr->end = (uintptr_t)mr->mr->addr + mr->mr->length;
+ rte_atomic32_inc(&mr->refcnt);
+ DEBUG("%p: new Memory Region %p refcnt: %d", (void *)priv,
+ (void *)mr, rte_atomic32_read(&mr->refcnt));
+ LIST_INSERT_HEAD(&priv->mr, mr, next);
+ return mr;
+}
+
+/**
+ * Search the memory region object in the memory region list.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param mp
+ * Pointer to the memory pool to register.
+ * @return
+ * The memory region on success.
+ */
+struct mlx5_mr*
+priv_mr_get(struct priv *priv, struct rte_mempool *mp)
+{
+ struct mlx5_mr *mr;
+
+ assert(mp);
+ if (LIST_EMPTY(&priv->mr))
+ return NULL;
+ LIST_FOREACH(mr, &priv->mr, next) {
+ if (mr->mp == mp) {
+ rte_atomic32_inc(&mr->refcnt);
+ DEBUG("Memory Region %p refcnt: %d",
+ (void *)mr, rte_atomic32_read(&mr->refcnt));
+ return mr;
}
- if (start >= (uintptr_t)mr->addr &&
- end <= (uintptr_t)mr->addr + mr->length)
- return;
}
- txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
+ return NULL;
+}
+
+/**
+ * Release the memory region object.
+ *
+ * @param mr
+ * Pointer to memory region to release.
+ *
+ * @return
+ * 0 on success, errno on failure.
+ */
+int
+priv_mr_release(struct priv *priv, struct mlx5_mr *mr)
+{
+ (void)priv;
+ assert(mr);
+ DEBUG("Memory Region %p refcnt: %d",
+ (void *)mr, rte_atomic32_read(&mr->refcnt));
+ if (rte_atomic32_dec_and_test(&mr->refcnt)) {
+ claim_zero(ibv_dereg_mr(mr->mr));
+ LIST_REMOVE(mr, next);
+ rte_free(mr);
+ return 0;
+ }
+ return EBUSY;
+}
+
+/**
+ * Verify the flow list is empty
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+priv_mr_verify(struct priv *priv)
+{
+ int ret = 0;
+ struct mlx5_mr *mr;
+
+ LIST_FOREACH(mr, &priv->mr, next) {
+ DEBUG("%p: mr %p still referenced", (void *)priv,
+ (void *)mr);
+ ++ret;
+ }
+ return ret;
}
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 608072f7..2de310bc 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -41,7 +41,7 @@
#ifdef PEDANTIC
#pragma GCC diagnostic ignored "-Wpedantic"
#endif
-#include <infiniband/mlx5_hw.h>
+#include <infiniband/mlx5dv.h>
#ifdef PEDANTIC
#pragma GCC diagnostic error "-Wpedantic"
#endif
@@ -89,9 +89,6 @@
/* Default max packet length to be inlined. */
#define MLX5_EMPW_MAX_INLINE_LEN (4U * MLX5_WQE_SIZE)
-#ifndef HAVE_VERBS_MLX5_OPCODE_TSO
-#define MLX5_OPCODE_TSO MLX5_OPCODE_LSO_MPW /* Compat with OFED 3.3. */
-#endif
#define MLX5_OPC_MOD_ENHANCED_MPSW 0
#define MLX5_OPCODE_ENHANCED_MPSW 0x29
@@ -154,6 +151,9 @@
/* Default mark value used when none is provided. */
#define MLX5_FLOW_MARK_DEFAULT 0xffffff
+/* Maximum number of DS in WQE. */
+#define MLX5_DSEG_MAX 63
+
/* Subset of struct mlx5_wqe_eth_seg. */
struct mlx5_wqe_eth_seg_small {
uint32_t rsvd0;
@@ -244,6 +244,46 @@ struct mlx5_cqe {
uint8_t op_own;
};
+/* Adding direct verbs to data-path. */
+
+/* CQ sequence number mask. */
+#define MLX5_CQ_SQN_MASK 0x3
+
+/* CQ sequence number index. */
+#define MLX5_CQ_SQN_OFFSET 28
+
+/* CQ doorbell index mask. */
+#define MLX5_CI_MASK 0xffffff
+
+/* CQ doorbell offset. */
+#define MLX5_CQ_ARM_DB 1
+
+/* CQ doorbell offset*/
+#define MLX5_CQ_DOORBELL 0x20
+
+/* CQE format value. */
+#define MLX5_COMPRESSED 0x3
+
+/* CQE format mask. */
+#define MLX5E_CQE_FORMAT_MASK 0xc
+
+/* MPW opcode. */
+#define MLX5_OPC_MOD_MPW 0x01
+
+/* Compressed Rx CQE structure. */
+struct mlx5_mini_cqe8 {
+ union {
+ uint32_t rx_hash_result;
+ uint32_t checksum;
+ struct {
+ uint16_t wqe_counter;
+ uint8_t s_wqe_opcode;
+ uint8_t reserved;
+ } s_wqe_info;
+ };
+ uint32_t byte_cnt;
+};
+
/**
* Convert a user mark to flow mark.
*
diff --git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c
index a2dd7d17..f3de46de 100644
--- a/drivers/net/mlx5/mlx5_rss.c
+++ b/drivers/net/mlx5/mlx5_rss.c
@@ -47,88 +47,13 @@
#pragma GCC diagnostic error "-Wpedantic"
#endif
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_malloc.h>
#include <rte_ethdev.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
#include "mlx5.h"
#include "mlx5_rxtx.h"
/**
- * Get a RSS configuration hash key.
- *
- * @param priv
- * Pointer to private structure.
- * @param rss_hf
- * RSS hash functions configuration must be retrieved for.
- *
- * @return
- * Pointer to a RSS configuration structure or NULL if rss_hf cannot
- * be matched.
- */
-static struct rte_eth_rss_conf *
-rss_hash_get(struct priv *priv, uint64_t rss_hf)
-{
- unsigned int i;
-
- for (i = 0; (i != hash_rxq_init_n); ++i) {
- uint64_t dpdk_rss_hf = hash_rxq_init[i].dpdk_rss_hf;
-
- if (!(dpdk_rss_hf & rss_hf))
- continue;
- return (*priv->rss_conf)[i];
- }
- return NULL;
-}
-
-/**
- * Register a RSS key.
- *
- * @param priv
- * Pointer to private structure.
- * @param key
- * Hash key to register.
- * @param key_len
- * Hash key length in bytes.
- * @param rss_hf
- * RSS hash functions the provided key applies to.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-int
-rss_hash_rss_conf_new_key(struct priv *priv, const uint8_t *key,
- unsigned int key_len, uint64_t rss_hf)
-{
- unsigned int i;
-
- for (i = 0; (i != hash_rxq_init_n); ++i) {
- struct rte_eth_rss_conf *rss_conf;
- uint64_t dpdk_rss_hf = hash_rxq_init[i].dpdk_rss_hf;
-
- if (!(dpdk_rss_hf & rss_hf))
- continue;
- rss_conf = rte_realloc((*priv->rss_conf)[i],
- (sizeof(*rss_conf) + key_len),
- 0);
- if (!rss_conf)
- return ENOMEM;
- rss_conf->rss_key = (void *)(rss_conf + 1);
- rss_conf->rss_key_len = key_len;
- rss_conf->rss_hf = dpdk_rss_hf;
- memcpy(rss_conf->rss_key, key, key_len);
- (*priv->rss_conf)[i] = rss_conf;
- }
- return 0;
-}
-
-/**
* DPDK callback to update the RSS hash configuration.
*
* @param dev
@@ -144,23 +69,24 @@ mlx5_rss_hash_update(struct rte_eth_dev *dev,
struct rte_eth_rss_conf *rss_conf)
{
struct priv *priv = dev->data->dev_private;
- int err = 0;
+ int ret = 0;
priv_lock(priv);
-
- assert(priv->rss_conf != NULL);
-
- /* Apply configuration. */
- if (rss_conf->rss_key)
- err = rss_hash_rss_conf_new_key(priv,
- rss_conf->rss_key,
- rss_conf->rss_key_len,
- rss_conf->rss_hf);
- /* Store protocols for which RSS is enabled. */
- priv->rss_hf = rss_conf->rss_hf;
+ if (rss_conf->rss_key && rss_conf->rss_key_len) {
+ priv->rss_conf.rss_key = rte_realloc(priv->rss_conf.rss_key,
+ rss_conf->rss_key_len, 0);
+ if (!priv->rss_conf.rss_key) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ memcpy(priv->rss_conf.rss_key, rss_conf->rss_key,
+ rss_conf->rss_key_len);
+ priv->rss_conf.rss_key_len = rss_conf->rss_key_len;
+ }
+ priv->rss_conf.rss_hf = rss_conf->rss_hf;
+out:
priv_unlock(priv);
- assert(err >= 0);
- return -err;
+ return ret;
}
/**
@@ -179,26 +105,17 @@ mlx5_rss_hash_conf_get(struct rte_eth_dev *dev,
struct rte_eth_rss_conf *rss_conf)
{
struct priv *priv = dev->data->dev_private;
- struct rte_eth_rss_conf *priv_rss_conf;
- priv_lock(priv);
-
- assert(priv->rss_conf != NULL);
-
- priv_rss_conf = rss_hash_get(priv, rss_conf->rss_hf);
- if (!priv_rss_conf) {
- rss_conf->rss_hf = 0;
- priv_unlock(priv);
+ if (!rss_conf)
return -EINVAL;
- }
+ priv_lock(priv);
if (rss_conf->rss_key &&
- rss_conf->rss_key_len >= priv_rss_conf->rss_key_len)
- memcpy(rss_conf->rss_key,
- priv_rss_conf->rss_key,
- priv_rss_conf->rss_key_len);
- rss_conf->rss_key_len = priv_rss_conf->rss_key_len;
- rss_conf->rss_hf = priv_rss_conf->rss_hf;
-
+ (rss_conf->rss_key_len >= priv->rss_conf.rss_key_len)) {
+ memcpy(rss_conf->rss_key, priv->rss_conf.rss_key,
+ priv->rss_conf.rss_key_len);
+ }
+ rss_conf->rss_key_len = priv->rss_conf.rss_key_len;
+ rss_conf->rss_hf = priv->rss_conf.rss_hf;
priv_unlock(priv);
return 0;
}
@@ -357,11 +274,13 @@ mlx5_dev_rss_reta_update(struct rte_eth_dev *dev,
int ret;
struct priv *priv = dev->data->dev_private;
- mlx5_dev_stop(dev);
+ assert(!mlx5_is_secondary());
priv_lock(priv);
ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size);
priv_unlock(priv);
- if (ret)
- return -ret;
- return mlx5_dev_start(dev);
+ if (dev->data->dev_started) {
+ mlx5_dev_stop(dev);
+ mlx5_dev_start(dev);
+ }
+ return -ret;
}
diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c
index a67e5426..0ef2cdf0 100644
--- a/drivers/net/mlx5/mlx5_rxmode.c
+++ b/drivers/net/mlx5/mlx5_rxmode.c
@@ -45,343 +45,12 @@
#pragma GCC diagnostic error "-Wpedantic"
#endif
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_ethdev.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
#include "mlx5.h"
#include "mlx5_rxtx.h"
#include "mlx5_utils.h"
-/* Initialization data for special flows. */
-static const struct special_flow_init special_flow_init[] = {
- [HASH_RXQ_FLOW_TYPE_PROMISC] = {
- .dst_mac_val = "\x00\x00\x00\x00\x00\x00",
- .dst_mac_mask = "\x00\x00\x00\x00\x00\x00",
- .hash_types =
- 1 << HASH_RXQ_TCPV4 |
- 1 << HASH_RXQ_UDPV4 |
- 1 << HASH_RXQ_IPV4 |
- 1 << HASH_RXQ_TCPV6 |
- 1 << HASH_RXQ_UDPV6 |
- 1 << HASH_RXQ_IPV6 |
- 1 << HASH_RXQ_ETH |
- 0,
- .per_vlan = 0,
- },
- [HASH_RXQ_FLOW_TYPE_ALLMULTI] = {
- .dst_mac_val = "\x01\x00\x00\x00\x00\x00",
- .dst_mac_mask = "\x01\x00\x00\x00\x00\x00",
- .hash_types =
- 1 << HASH_RXQ_UDPV4 |
- 1 << HASH_RXQ_IPV4 |
- 1 << HASH_RXQ_UDPV6 |
- 1 << HASH_RXQ_IPV6 |
- 1 << HASH_RXQ_ETH |
- 0,
- .per_vlan = 0,
- },
- [HASH_RXQ_FLOW_TYPE_BROADCAST] = {
- .dst_mac_val = "\xff\xff\xff\xff\xff\xff",
- .dst_mac_mask = "\xff\xff\xff\xff\xff\xff",
- .hash_types =
- 1 << HASH_RXQ_UDPV4 |
- 1 << HASH_RXQ_IPV4 |
- 1 << HASH_RXQ_UDPV6 |
- 1 << HASH_RXQ_IPV6 |
- 1 << HASH_RXQ_ETH |
- 0,
- .per_vlan = 1,
- },
- [HASH_RXQ_FLOW_TYPE_IPV6MULTI] = {
- .dst_mac_val = "\x33\x33\x00\x00\x00\x00",
- .dst_mac_mask = "\xff\xff\x00\x00\x00\x00",
- .hash_types =
- 1 << HASH_RXQ_UDPV6 |
- 1 << HASH_RXQ_IPV6 |
- 1 << HASH_RXQ_ETH |
- 0,
- .per_vlan = 1,
- },
-};
-
-/**
- * Enable a special flow in a hash RX queue for a given VLAN index.
- *
- * @param hash_rxq
- * Pointer to hash RX queue structure.
- * @param flow_type
- * Special flow type.
- * @param vlan_index
- * VLAN index to use.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-static int
-hash_rxq_special_flow_enable_vlan(struct hash_rxq *hash_rxq,
- enum hash_rxq_flow_type flow_type,
- unsigned int vlan_index)
-{
- struct priv *priv = hash_rxq->priv;
- struct ibv_exp_flow *flow;
- FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, hash_rxq->type));
- struct ibv_exp_flow_attr *attr = &data->attr;
- struct ibv_exp_flow_spec_eth *spec = &data->spec;
- const uint8_t *mac;
- const uint8_t *mask;
- unsigned int vlan_enabled = (priv->vlan_filter_n &&
- special_flow_init[flow_type].per_vlan);
- unsigned int vlan_id = priv->vlan_filter[vlan_index];
-
- /* Check if flow is relevant for this hash_rxq. */
- if (!(special_flow_init[flow_type].hash_types & (1 << hash_rxq->type)))
- return 0;
- /* Check if flow already exists. */
- if (hash_rxq->special_flow[flow_type][vlan_index] != NULL)
- return 0;
-
- /*
- * No padding must be inserted by the compiler between attr and spec.
- * This layout is expected by libibverbs.
- */
- assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec);
- priv_flow_attr(priv, attr, sizeof(data), hash_rxq->type);
- /* The first specification must be Ethernet. */
- assert(spec->type == IBV_EXP_FLOW_SPEC_ETH);
- assert(spec->size == sizeof(*spec));
-
- mac = special_flow_init[flow_type].dst_mac_val;
- mask = special_flow_init[flow_type].dst_mac_mask;
- *spec = (struct ibv_exp_flow_spec_eth){
- .type = IBV_EXP_FLOW_SPEC_ETH,
- .size = sizeof(*spec),
- .val = {
- .dst_mac = {
- mac[0], mac[1], mac[2],
- mac[3], mac[4], mac[5],
- },
- .vlan_tag = (vlan_enabled ? htons(vlan_id) : 0),
- },
- .mask = {
- .dst_mac = {
- mask[0], mask[1], mask[2],
- mask[3], mask[4], mask[5],
- },
- .vlan_tag = (vlan_enabled ? htons(0xfff) : 0),
- },
- };
-
- errno = 0;
- flow = ibv_exp_create_flow(hash_rxq->qp, attr);
- if (flow == NULL) {
- /* It's not clear whether errno is always set in this case. */
- ERROR("%p: flow configuration failed, errno=%d: %s",
- (void *)hash_rxq, errno,
- (errno ? strerror(errno) : "Unknown error"));
- if (errno)
- return errno;
- return EINVAL;
- }
- hash_rxq->special_flow[flow_type][vlan_index] = flow;
- DEBUG("%p: special flow %s (index %d) VLAN %u (index %u) enabled",
- (void *)hash_rxq, hash_rxq_flow_type_str(flow_type), flow_type,
- vlan_id, vlan_index);
- return 0;
-}
-
-/**
- * Disable a special flow in a hash RX queue for a given VLAN index.
- *
- * @param hash_rxq
- * Pointer to hash RX queue structure.
- * @param flow_type
- * Special flow type.
- * @param vlan_index
- * VLAN index to use.
- */
-static void
-hash_rxq_special_flow_disable_vlan(struct hash_rxq *hash_rxq,
- enum hash_rxq_flow_type flow_type,
- unsigned int vlan_index)
-{
- struct ibv_exp_flow *flow =
- hash_rxq->special_flow[flow_type][vlan_index];
-
- if (flow == NULL)
- return;
- claim_zero(ibv_exp_destroy_flow(flow));
- hash_rxq->special_flow[flow_type][vlan_index] = NULL;
- DEBUG("%p: special flow %s (index %d) VLAN %u (index %u) disabled",
- (void *)hash_rxq, hash_rxq_flow_type_str(flow_type), flow_type,
- hash_rxq->priv->vlan_filter[vlan_index], vlan_index);
-}
-
-/**
- * Enable a special flow in a hash RX queue.
- *
- * @param hash_rxq
- * Pointer to hash RX queue structure.
- * @param flow_type
- * Special flow type.
- * @param vlan_index
- * VLAN index to use.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-static int
-hash_rxq_special_flow_enable(struct hash_rxq *hash_rxq,
- enum hash_rxq_flow_type flow_type)
-{
- struct priv *priv = hash_rxq->priv;
- unsigned int i = 0;
- int ret;
-
- assert((unsigned int)flow_type < RTE_DIM(hash_rxq->special_flow));
- assert(RTE_DIM(hash_rxq->special_flow[flow_type]) ==
- RTE_DIM(priv->vlan_filter));
- /* Add a special flow for each VLAN filter when relevant. */
- do {
- ret = hash_rxq_special_flow_enable_vlan(hash_rxq, flow_type, i);
- if (ret) {
- /* Failure, rollback. */
- while (i != 0)
- hash_rxq_special_flow_disable_vlan(hash_rxq,
- flow_type,
- --i);
- return ret;
- }
- } while (special_flow_init[flow_type].per_vlan &&
- ++i < priv->vlan_filter_n);
- return 0;
-}
-
-/**
- * Disable a special flow in a hash RX queue.
- *
- * @param hash_rxq
- * Pointer to hash RX queue structure.
- * @param flow_type
- * Special flow type.
- */
-static void
-hash_rxq_special_flow_disable(struct hash_rxq *hash_rxq,
- enum hash_rxq_flow_type flow_type)
-{
- unsigned int i;
-
- assert((unsigned int)flow_type < RTE_DIM(hash_rxq->special_flow));
- for (i = 0; (i != RTE_DIM(hash_rxq->special_flow[flow_type])); ++i)
- hash_rxq_special_flow_disable_vlan(hash_rxq, flow_type, i);
-}
-
-/**
- * Enable a special flow in all hash RX queues.
- *
- * @param priv
- * Private structure.
- * @param flow_type
- * Special flow type.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-int
-priv_special_flow_enable(struct priv *priv, enum hash_rxq_flow_type flow_type)
-{
- unsigned int i;
-
- if (!priv_allow_flow_type(priv, flow_type))
- return 0;
- for (i = 0; (i != priv->hash_rxqs_n); ++i) {
- struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
- int ret;
-
- ret = hash_rxq_special_flow_enable(hash_rxq, flow_type);
- if (!ret)
- continue;
- /* Failure, rollback. */
- while (i != 0) {
- hash_rxq = &(*priv->hash_rxqs)[--i];
- hash_rxq_special_flow_disable(hash_rxq, flow_type);
- }
- return ret;
- }
- return 0;
-}
-
-/**
- * Disable a special flow in all hash RX queues.
- *
- * @param priv
- * Private structure.
- * @param flow_type
- * Special flow type.
- */
-void
-priv_special_flow_disable(struct priv *priv, enum hash_rxq_flow_type flow_type)
-{
- unsigned int i;
-
- for (i = 0; (i != priv->hash_rxqs_n); ++i) {
- struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
-
- hash_rxq_special_flow_disable(hash_rxq, flow_type);
- }
-}
-
-/**
- * Enable all special flows in all hash RX queues.
- *
- * @param priv
- * Private structure.
- */
-int
-priv_special_flow_enable_all(struct priv *priv)
-{
- enum hash_rxq_flow_type flow_type;
-
- if (priv->isolated)
- return 0;
- for (flow_type = HASH_RXQ_FLOW_TYPE_PROMISC;
- flow_type != HASH_RXQ_FLOW_TYPE_MAC;
- ++flow_type) {
- int ret;
-
- ret = priv_special_flow_enable(priv, flow_type);
- if (!ret)
- continue;
- /* Failure, rollback. */
- while (flow_type)
- priv_special_flow_disable(priv, --flow_type);
- return ret;
- }
- return 0;
-}
-
-/**
- * Disable all special flows in all hash RX queues.
- *
- * @param priv
- * Private structure.
- */
-void
-priv_special_flow_disable_all(struct priv *priv)
-{
- enum hash_rxq_flow_type flow_type;
-
- for (flow_type = HASH_RXQ_FLOW_TYPE_PROMISC;
- flow_type != HASH_RXQ_FLOW_TYPE_MAC;
- ++flow_type)
- priv_special_flow_disable(priv, flow_type);
-}
-
/**
* DPDK callback to enable promiscuous mode.
*
@@ -391,19 +60,10 @@ priv_special_flow_disable_all(struct priv *priv)
void
mlx5_promiscuous_enable(struct rte_eth_dev *dev)
{
- struct priv *priv = dev->data->dev_private;
- int ret;
-
if (mlx5_is_secondary())
return;
-
- priv_lock(priv);
- priv->promisc_req = 1;
- ret = priv_rehash_flows(priv);
- if (ret)
- ERROR("error while enabling promiscuous mode: %s",
- strerror(ret));
- priv_unlock(priv);
+ dev->data->promiscuous = 1;
+ mlx5_traffic_restart(dev);
}
/**
@@ -415,19 +75,10 @@ mlx5_promiscuous_enable(struct rte_eth_dev *dev)
void
mlx5_promiscuous_disable(struct rte_eth_dev *dev)
{
- struct priv *priv = dev->data->dev_private;
- int ret;
-
if (mlx5_is_secondary())
return;
-
- priv_lock(priv);
- priv->promisc_req = 0;
- ret = priv_rehash_flows(priv);
- if (ret)
- ERROR("error while disabling promiscuous mode: %s",
- strerror(ret));
- priv_unlock(priv);
+ dev->data->promiscuous = 0;
+ mlx5_traffic_restart(dev);
}
/**
@@ -439,19 +90,10 @@ mlx5_promiscuous_disable(struct rte_eth_dev *dev)
void
mlx5_allmulticast_enable(struct rte_eth_dev *dev)
{
- struct priv *priv = dev->data->dev_private;
- int ret;
-
if (mlx5_is_secondary())
return;
-
- priv_lock(priv);
- priv->allmulti_req = 1;
- ret = priv_rehash_flows(priv);
- if (ret)
- ERROR("error while enabling allmulticast mode: %s",
- strerror(ret));
- priv_unlock(priv);
+ dev->data->all_multicast = 1;
+ mlx5_traffic_restart(dev);
}
/**
@@ -463,17 +105,8 @@ mlx5_allmulticast_enable(struct rte_eth_dev *dev)
void
mlx5_allmulticast_disable(struct rte_eth_dev *dev)
{
- struct priv *priv = dev->data->dev_private;
- int ret;
-
if (mlx5_is_secondary())
return;
-
- priv_lock(priv);
- priv->allmulti_req = 0;
- ret = priv_rehash_flows(priv);
- if (ret)
- ERROR("error while disabling allmulticast mode: %s",
- strerror(ret));
- priv_unlock(priv);
+ dev->data->all_multicast = 0;
+ mlx5_traffic_restart(dev);
}
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 74387a79..6b29aaee 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -37,6 +37,7 @@
#include <string.h>
#include <stdint.h>
#include <fcntl.h>
+#include <sys/queue.h>
/* Verbs header. */
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -44,25 +45,18 @@
#pragma GCC diagnostic ignored "-Wpedantic"
#endif
#include <infiniband/verbs.h>
-#include <infiniband/arch.h>
-#include <infiniband/mlx5_hw.h>
+#include <infiniband/mlx5dv.h>
#ifdef PEDANTIC
#pragma GCC diagnostic error "-Wpedantic"
#endif
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_mbuf.h>
#include <rte_malloc.h>
#include <rte_ethdev.h>
#include <rte_common.h>
#include <rte_interrupts.h>
#include <rte_debug.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
+#include <rte_io.h>
#include "mlx5.h"
#include "mlx5_rxtx.h"
@@ -70,122 +64,6 @@
#include "mlx5_autoconf.h"
#include "mlx5_defs.h"
-/* Initialization data for hash RX queues. */
-const struct hash_rxq_init hash_rxq_init[] = {
- [HASH_RXQ_TCPV4] = {
- .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
- IBV_EXP_RX_HASH_DST_IPV4 |
- IBV_EXP_RX_HASH_SRC_PORT_TCP |
- IBV_EXP_RX_HASH_DST_PORT_TCP),
- .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
- .flow_priority = 0,
- .flow_spec.tcp_udp = {
- .type = IBV_EXP_FLOW_SPEC_TCP,
- .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
- },
- .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
- },
- [HASH_RXQ_UDPV4] = {
- .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
- IBV_EXP_RX_HASH_DST_IPV4 |
- IBV_EXP_RX_HASH_SRC_PORT_UDP |
- IBV_EXP_RX_HASH_DST_PORT_UDP),
- .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
- .flow_priority = 0,
- .flow_spec.tcp_udp = {
- .type = IBV_EXP_FLOW_SPEC_UDP,
- .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
- },
- .underlayer = &hash_rxq_init[HASH_RXQ_IPV4],
- },
- [HASH_RXQ_IPV4] = {
- .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
- IBV_EXP_RX_HASH_DST_IPV4),
- .dpdk_rss_hf = (ETH_RSS_IPV4 |
- ETH_RSS_FRAG_IPV4),
- .flow_priority = 1,
- .flow_spec.ipv4 = {
- .type = IBV_EXP_FLOW_SPEC_IPV4,
- .size = sizeof(hash_rxq_init[0].flow_spec.ipv4),
- },
- .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
- },
- [HASH_RXQ_TCPV6] = {
- .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
- IBV_EXP_RX_HASH_DST_IPV6 |
- IBV_EXP_RX_HASH_SRC_PORT_TCP |
- IBV_EXP_RX_HASH_DST_PORT_TCP),
- .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
- .flow_priority = 0,
- .flow_spec.tcp_udp = {
- .type = IBV_EXP_FLOW_SPEC_TCP,
- .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
- },
- .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
- },
- [HASH_RXQ_UDPV6] = {
- .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
- IBV_EXP_RX_HASH_DST_IPV6 |
- IBV_EXP_RX_HASH_SRC_PORT_UDP |
- IBV_EXP_RX_HASH_DST_PORT_UDP),
- .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
- .flow_priority = 0,
- .flow_spec.tcp_udp = {
- .type = IBV_EXP_FLOW_SPEC_UDP,
- .size = sizeof(hash_rxq_init[0].flow_spec.tcp_udp),
- },
- .underlayer = &hash_rxq_init[HASH_RXQ_IPV6],
- },
- [HASH_RXQ_IPV6] = {
- .hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
- IBV_EXP_RX_HASH_DST_IPV6),
- .dpdk_rss_hf = (ETH_RSS_IPV6 |
- ETH_RSS_FRAG_IPV6),
- .flow_priority = 1,
- .flow_spec.ipv6 = {
- .type = IBV_EXP_FLOW_SPEC_IPV6,
- .size = sizeof(hash_rxq_init[0].flow_spec.ipv6),
- },
- .underlayer = &hash_rxq_init[HASH_RXQ_ETH],
- },
- [HASH_RXQ_ETH] = {
- .hash_fields = 0,
- .dpdk_rss_hf = 0,
- .flow_priority = 2,
- .flow_spec.eth = {
- .type = IBV_EXP_FLOW_SPEC_ETH,
- .size = sizeof(hash_rxq_init[0].flow_spec.eth),
- },
- .underlayer = NULL,
- },
-};
-
-/* Number of entries in hash_rxq_init[]. */
-const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
-
-/* Initialization data for hash RX queue indirection tables. */
-static const struct ind_table_init ind_table_init[] = {
- {
- .max_size = -1u, /* Superseded by HW limitations. */
- .hash_types =
- 1 << HASH_RXQ_TCPV4 |
- 1 << HASH_RXQ_UDPV4 |
- 1 << HASH_RXQ_IPV4 |
- 1 << HASH_RXQ_TCPV6 |
- 1 << HASH_RXQ_UDPV6 |
- 1 << HASH_RXQ_IPV6 |
- 0,
- .hash_types_n = 6,
- },
- {
- .max_size = 1,
- .hash_types = 1 << HASH_RXQ_ETH,
- .hash_types_n = 1,
- },
-};
-
-#define IND_TABLE_INIT_N RTE_DIM(ind_table_init)
-
/* Default RSS hash key also used for ConnectX-3. */
uint8_t rss_hash_default_key[] = {
0x2c, 0xc6, 0x81, 0xd1,
@@ -204,495 +82,27 @@ uint8_t rss_hash_default_key[] = {
const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key);
/**
- * Populate flow steering rule for a given hash RX queue type using
- * information from hash_rxq_init[]. Nothing is written to flow_attr when
- * flow_attr_size is not large enough, but the required size is still returned.
- *
- * @param priv
- * Pointer to private structure.
- * @param[out] flow_attr
- * Pointer to flow attribute structure to fill. Note that the allocated
- * area must be larger and large enough to hold all flow specifications.
- * @param flow_attr_size
- * Entire size of flow_attr and trailing room for flow specifications.
- * @param type
- * Hash RX queue type to use for flow steering rule.
- *
- * @return
- * Total size of the flow attribute buffer. No errors are defined.
- */
-size_t
-priv_flow_attr(struct priv *priv, struct ibv_exp_flow_attr *flow_attr,
- size_t flow_attr_size, enum hash_rxq_type type)
-{
- size_t offset = sizeof(*flow_attr);
- const struct hash_rxq_init *init = &hash_rxq_init[type];
-
- assert(priv != NULL);
- assert((size_t)type < RTE_DIM(hash_rxq_init));
- do {
- offset += init->flow_spec.hdr.size;
- init = init->underlayer;
- } while (init != NULL);
- if (offset > flow_attr_size)
- return offset;
- flow_attr_size = offset;
- init = &hash_rxq_init[type];
- *flow_attr = (struct ibv_exp_flow_attr){
- .type = IBV_EXP_FLOW_ATTR_NORMAL,
- /* Priorities < 3 are reserved for flow director. */
- .priority = init->flow_priority + 3,
- .num_of_specs = 0,
- .port = priv->port,
- .flags = 0,
- };
- do {
- offset -= init->flow_spec.hdr.size;
- memcpy((void *)((uintptr_t)flow_attr + offset),
- &init->flow_spec,
- init->flow_spec.hdr.size);
- ++flow_attr->num_of_specs;
- init = init->underlayer;
- } while (init != NULL);
- return flow_attr_size;
-}
-
-/**
- * Convert hash type position in indirection table initializer to
- * hash RX queue type.
- *
- * @param table
- * Indirection table initializer.
- * @param pos
- * Hash type position.
- *
- * @return
- * Hash RX queue type.
- */
-static enum hash_rxq_type
-hash_rxq_type_from_pos(const struct ind_table_init *table, unsigned int pos)
-{
- enum hash_rxq_type type = HASH_RXQ_TCPV4;
-
- assert(pos < table->hash_types_n);
- do {
- if ((table->hash_types & (1 << type)) && (pos-- == 0))
- break;
- ++type;
- } while (1);
- return type;
-}
-
-/**
- * Filter out disabled hash RX queue types from ind_table_init[].
- *
- * @param priv
- * Pointer to private structure.
- * @param[out] table
- * Output table.
- *
- * @return
- * Number of table entries.
- */
-static unsigned int
-priv_make_ind_table_init(struct priv *priv,
- struct ind_table_init (*table)[IND_TABLE_INIT_N])
-{
- uint64_t rss_hf;
- unsigned int i;
- unsigned int j;
- unsigned int table_n = 0;
- /* Mandatory to receive frames not handled by normal hash RX queues. */
- unsigned int hash_types_sup = 1 << HASH_RXQ_ETH;
-
- rss_hf = priv->rss_hf;
- /* Process other protocols only if more than one queue. */
- if (priv->rxqs_n > 1)
- for (i = 0; (i != hash_rxq_init_n); ++i)
- if (rss_hf & hash_rxq_init[i].dpdk_rss_hf)
- hash_types_sup |= (1 << i);
-
- /* Filter out entries whose protocols are not in the set. */
- for (i = 0, j = 0; (i != IND_TABLE_INIT_N); ++i) {
- unsigned int nb;
- unsigned int h;
-
- /* j is increased only if the table has valid protocols. */
- assert(j <= i);
- (*table)[j] = ind_table_init[i];
- (*table)[j].hash_types &= hash_types_sup;
- for (h = 0, nb = 0; (h != hash_rxq_init_n); ++h)
- if (((*table)[j].hash_types >> h) & 0x1)
- ++nb;
- (*table)[i].hash_types_n = nb;
- if (nb) {
- ++table_n;
- ++j;
- }
- }
- return table_n;
-}
-
-/**
- * Initialize hash RX queues and indirection table.
- *
- * @param priv
- * Pointer to private structure.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-int
-priv_create_hash_rxqs(struct priv *priv)
-{
- struct ibv_exp_wq *wqs[priv->reta_idx_n];
- struct ind_table_init ind_table_init[IND_TABLE_INIT_N];
- unsigned int ind_tables_n =
- priv_make_ind_table_init(priv, &ind_table_init);
- unsigned int hash_rxqs_n = 0;
- struct hash_rxq (*hash_rxqs)[] = NULL;
- struct ibv_exp_rwq_ind_table *(*ind_tables)[] = NULL;
- unsigned int i;
- unsigned int j;
- unsigned int k;
- int err = 0;
-
- assert(priv->ind_tables == NULL);
- assert(priv->ind_tables_n == 0);
- assert(priv->hash_rxqs == NULL);
- assert(priv->hash_rxqs_n == 0);
- assert(priv->pd != NULL);
- assert(priv->ctx != NULL);
- if (priv->isolated)
- return 0;
- if (priv->rxqs_n == 0)
- return EINVAL;
- assert(priv->rxqs != NULL);
- if (ind_tables_n == 0) {
- ERROR("all hash RX queue types have been filtered out,"
- " indirection table cannot be created");
- return EINVAL;
- }
- if (priv->rxqs_n & (priv->rxqs_n - 1)) {
- INFO("%u RX queues are configured, consider rounding this"
- " number to the next power of two for better balancing",
- priv->rxqs_n);
- DEBUG("indirection table extended to assume %u WQs",
- priv->reta_idx_n);
- }
- for (i = 0; (i != priv->reta_idx_n); ++i) {
- struct rxq_ctrl *rxq_ctrl;
-
- rxq_ctrl = container_of((*priv->rxqs)[(*priv->reta_idx)[i]],
- struct rxq_ctrl, rxq);
- wqs[i] = rxq_ctrl->wq;
- }
- /* Get number of hash RX queues to configure. */
- for (i = 0, hash_rxqs_n = 0; (i != ind_tables_n); ++i)
- hash_rxqs_n += ind_table_init[i].hash_types_n;
- DEBUG("allocating %u hash RX queues for %u WQs, %u indirection tables",
- hash_rxqs_n, priv->rxqs_n, ind_tables_n);
- /* Create indirection tables. */
- ind_tables = rte_calloc(__func__, ind_tables_n,
- sizeof((*ind_tables)[0]), 0);
- if (ind_tables == NULL) {
- err = ENOMEM;
- ERROR("cannot allocate indirection tables container: %s",
- strerror(err));
- goto error;
- }
- for (i = 0; (i != ind_tables_n); ++i) {
- struct ibv_exp_rwq_ind_table_init_attr ind_init_attr = {
- .pd = priv->pd,
- .log_ind_tbl_size = 0, /* Set below. */
- .ind_tbl = wqs,
- .comp_mask = 0,
- };
- unsigned int ind_tbl_size = ind_table_init[i].max_size;
- struct ibv_exp_rwq_ind_table *ind_table;
-
- if (priv->reta_idx_n < ind_tbl_size)
- ind_tbl_size = priv->reta_idx_n;
- ind_init_attr.log_ind_tbl_size = log2above(ind_tbl_size);
- errno = 0;
- ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
- &ind_init_attr);
- if (ind_table != NULL) {
- (*ind_tables)[i] = ind_table;
- continue;
- }
- /* Not clear whether errno is set. */
- err = (errno ? errno : EINVAL);
- ERROR("RX indirection table creation failed with error %d: %s",
- err, strerror(err));
- goto error;
- }
- /* Allocate array that holds hash RX queues and related data. */
- hash_rxqs = rte_calloc(__func__, hash_rxqs_n,
- sizeof((*hash_rxqs)[0]), 0);
- if (hash_rxqs == NULL) {
- err = ENOMEM;
- ERROR("cannot allocate hash RX queues container: %s",
- strerror(err));
- goto error;
- }
- for (i = 0, j = 0, k = 0;
- ((i != hash_rxqs_n) && (j != ind_tables_n));
- ++i) {
- struct hash_rxq *hash_rxq = &(*hash_rxqs)[i];
- enum hash_rxq_type type =
- hash_rxq_type_from_pos(&ind_table_init[j], k);
- struct rte_eth_rss_conf *priv_rss_conf =
- (*priv->rss_conf)[type];
- struct ibv_exp_rx_hash_conf hash_conf = {
- .rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
- .rx_hash_key_len = (priv_rss_conf ?
- priv_rss_conf->rss_key_len :
- rss_hash_default_key_len),
- .rx_hash_key = (priv_rss_conf ?
- priv_rss_conf->rss_key :
- rss_hash_default_key),
- .rx_hash_fields_mask = hash_rxq_init[type].hash_fields,
- .rwq_ind_tbl = (*ind_tables)[j],
- };
- struct ibv_exp_qp_init_attr qp_init_attr = {
- .max_inl_recv = 0, /* Currently not supported. */
- .qp_type = IBV_QPT_RAW_PACKET,
- .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD |
- IBV_EXP_QP_INIT_ATTR_RX_HASH),
- .pd = priv->pd,
- .rx_hash_conf = &hash_conf,
- .port_num = priv->port,
- };
-
- DEBUG("using indirection table %u for hash RX queue %u type %d",
- j, i, type);
- *hash_rxq = (struct hash_rxq){
- .priv = priv,
- .qp = ibv_exp_create_qp(priv->ctx, &qp_init_attr),
- .type = type,
- };
- if (hash_rxq->qp == NULL) {
- err = (errno ? errno : EINVAL);
- ERROR("Hash RX QP creation failure: %s",
- strerror(err));
- goto error;
- }
- if (++k < ind_table_init[j].hash_types_n)
- continue;
- /* Switch to the next indirection table and reset hash RX
- * queue type array index. */
- ++j;
- k = 0;
- }
- priv->ind_tables = ind_tables;
- priv->ind_tables_n = ind_tables_n;
- priv->hash_rxqs = hash_rxqs;
- priv->hash_rxqs_n = hash_rxqs_n;
- assert(err == 0);
- return 0;
-error:
- if (hash_rxqs != NULL) {
- for (i = 0; (i != hash_rxqs_n); ++i) {
- struct ibv_qp *qp = (*hash_rxqs)[i].qp;
-
- if (qp == NULL)
- continue;
- claim_zero(ibv_destroy_qp(qp));
- }
- rte_free(hash_rxqs);
- }
- if (ind_tables != NULL) {
- for (j = 0; (j != ind_tables_n); ++j) {
- struct ibv_exp_rwq_ind_table *ind_table =
- (*ind_tables)[j];
-
- if (ind_table == NULL)
- continue;
- claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table));
- }
- rte_free(ind_tables);
- }
- return err;
-}
-
-/**
- * Clean up hash RX queues and indirection table.
- *
- * @param priv
- * Pointer to private structure.
- */
-void
-priv_destroy_hash_rxqs(struct priv *priv)
-{
- unsigned int i;
-
- DEBUG("destroying %u hash RX queues", priv->hash_rxqs_n);
- if (priv->hash_rxqs_n == 0) {
- assert(priv->hash_rxqs == NULL);
- assert(priv->ind_tables == NULL);
- return;
- }
- for (i = 0; (i != priv->hash_rxqs_n); ++i) {
- struct hash_rxq *hash_rxq = &(*priv->hash_rxqs)[i];
- unsigned int j, k;
-
- assert(hash_rxq->priv == priv);
- assert(hash_rxq->qp != NULL);
- /* Also check that there are no remaining flows. */
- for (j = 0; (j != RTE_DIM(hash_rxq->special_flow)); ++j)
- for (k = 0;
- (k != RTE_DIM(hash_rxq->special_flow[j]));
- ++k)
- assert(hash_rxq->special_flow[j][k] == NULL);
- for (j = 0; (j != RTE_DIM(hash_rxq->mac_flow)); ++j)
- for (k = 0; (k != RTE_DIM(hash_rxq->mac_flow[j])); ++k)
- assert(hash_rxq->mac_flow[j][k] == NULL);
- claim_zero(ibv_destroy_qp(hash_rxq->qp));
- }
- priv->hash_rxqs_n = 0;
- rte_free(priv->hash_rxqs);
- priv->hash_rxqs = NULL;
- for (i = 0; (i != priv->ind_tables_n); ++i) {
- struct ibv_exp_rwq_ind_table *ind_table =
- (*priv->ind_tables)[i];
-
- assert(ind_table != NULL);
- claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table));
- }
- priv->ind_tables_n = 0;
- rte_free(priv->ind_tables);
- priv->ind_tables = NULL;
-}
-
-/**
- * Check whether a given flow type is allowed.
- *
- * @param priv
- * Pointer to private structure.
- * @param type
- * Flow type to check.
- *
- * @return
- * Nonzero if the given flow type is allowed.
- */
-int
-priv_allow_flow_type(struct priv *priv, enum hash_rxq_flow_type type)
-{
- /* Only FLOW_TYPE_PROMISC is allowed when promiscuous mode
- * has been requested. */
- if (priv->promisc_req)
- return type == HASH_RXQ_FLOW_TYPE_PROMISC;
- switch (type) {
- case HASH_RXQ_FLOW_TYPE_PROMISC:
- return !!priv->promisc_req;
- case HASH_RXQ_FLOW_TYPE_ALLMULTI:
- return !!priv->allmulti_req;
- case HASH_RXQ_FLOW_TYPE_BROADCAST:
- case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
- /* If allmulti is enabled, broadcast and ipv6multi
- * are unnecessary. */
- return !priv->allmulti_req;
- case HASH_RXQ_FLOW_TYPE_MAC:
- return 1;
- default:
- /* Unsupported flow type is not allowed. */
- return 0;
- }
- return 0;
-}
-
-/**
- * Automatically enable/disable flows according to configuration.
- *
- * @param priv
- * Private structure.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-int
-priv_rehash_flows(struct priv *priv)
-{
- enum hash_rxq_flow_type i;
-
- for (i = HASH_RXQ_FLOW_TYPE_PROMISC;
- i != RTE_DIM((*priv->hash_rxqs)[0].special_flow);
- ++i)
- if (!priv_allow_flow_type(priv, i)) {
- priv_special_flow_disable(priv, i);
- } else {
- int ret = priv_special_flow_enable(priv, i);
-
- if (ret)
- return ret;
- }
- if (priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC))
- return priv_mac_addrs_enable(priv);
- priv_mac_addrs_disable(priv);
- return 0;
-}
-
-/**
- * Unlike regular Rx function, vPMD Rx doesn't replace mbufs immediately when
- * receiving packets. Instead it replaces later in bulk. In rxq->elts[], entries
- * from rq_pi to rq_ci are owned by device but the rest is already delivered to
- * application. In order not to reuse those mbufs by rxq_alloc_elts(), this
- * function must be called to replace used mbufs.
- *
- * @param rxq
- * Pointer to RX queue structure.
- */
-static void
-rxq_trim_elts(struct rxq *rxq)
-{
- const uint16_t q_n = (1 << rxq->elts_n);
- const uint16_t q_mask = q_n - 1;
- uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
- uint16_t i;
-
- if (!rxq->trim_elts)
- return;
- for (i = 0; i < used; ++i)
- (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
- rxq->trim_elts = 0;
- return;
-}
-
-/**
* Allocate RX queue elements.
*
* @param rxq_ctrl
* Pointer to RX queue structure.
- * @param elts_n
- * Number of elements to allocate.
- * @param[in] pool
- * If not NULL, fetch buffers from this array instead of allocating them
- * with rte_pktmbuf_alloc().
*
* @return
* 0 on success, errno value on failure.
*/
-static int
-rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n,
- struct rte_mbuf *(*pool)[])
+int
+rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
{
const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
+ unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
unsigned int i;
int ret = 0;
/* Iterate on segments. */
for (i = 0; (i != elts_n); ++i) {
struct rte_mbuf *buf;
- volatile struct mlx5_wqe_data_seg *scat =
- &(*rxq_ctrl->rxq.wqes)[i];
-
- buf = (pool != NULL) ? (*pool)[i] : NULL;
- if (buf != NULL) {
- rte_pktmbuf_reset(buf);
- rte_pktmbuf_refcnt_update(buf, 1);
- } else
- buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
+
+ buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp);
if (buf == NULL) {
ERROR("%p: empty mbuf pool", (void *)rxq_ctrl);
ret = ENOMEM;
@@ -711,21 +121,35 @@ rxq_alloc_elts(struct rxq_ctrl *rxq_ctrl, unsigned int elts_n,
DATA_LEN(buf) = rte_pktmbuf_tailroom(buf);
PKT_LEN(buf) = DATA_LEN(buf);
NB_SEGS(buf) = 1;
- /* scat->addr must be able to store a pointer. */
- assert(sizeof(scat->addr) >= sizeof(uintptr_t));
- *scat = (struct mlx5_wqe_data_seg){
- .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)),
- .byte_count = htonl(DATA_LEN(buf)),
- .lkey = htonl(rxq_ctrl->mr->lkey),
- };
(*rxq_ctrl->rxq.elts)[i] = buf;
}
+ /* If Rx vector is activated. */
+ if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) {
+ struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
+ struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
+ int j;
+
+ /* Initialize default rearm_data for vPMD. */
+ mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
+ rte_mbuf_refcnt_set(mbuf_init, 1);
+ mbuf_init->nb_segs = 1;
+ mbuf_init->port = rxq->port_id;
+ /*
+ * prevent compiler reordering:
+ * rearm_data covers previous fields.
+ */
+ rte_compiler_barrier();
+ rxq->mbuf_initializer =
+ *(uint64_t *)&mbuf_init->rearm_data;
+ /* Padding with a fake mbuf for vectorized Rx. */
+ for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
+ (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
+ }
DEBUG("%p: allocated and configured %u segments (max %u packets)",
(void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n));
assert(ret == 0);
return 0;
error:
- assert(pool == NULL);
elts_n = i;
for (i = 0; (i != elts_n); ++i) {
if ((*rxq_ctrl->rxq.elts)[i] != NULL)
@@ -744,19 +168,30 @@ error:
* Pointer to RX queue structure.
*/
static void
-rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
+rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
{
- unsigned int i;
+ struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
+ const uint16_t q_n = (1 << rxq->elts_n);
+ const uint16_t q_mask = q_n - 1;
+ uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
+ uint16_t i;
- rxq_trim_elts(&rxq_ctrl->rxq);
DEBUG("%p: freeing WRs", (void *)rxq_ctrl);
- if (rxq_ctrl->rxq.elts == NULL)
+ if (rxq->elts == NULL)
return;
-
- for (i = 0; (i != (1u << rxq_ctrl->rxq.elts_n)); ++i) {
- if ((*rxq_ctrl->rxq.elts)[i] != NULL)
- rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
- (*rxq_ctrl->rxq.elts)[i] = NULL;
+ /**
+ * Some mbuf in the Ring belongs to the application. They cannot be
+ * freed.
+ */
+ if (rxq_check_vec_support(rxq) > 0) {
+ for (i = 0; i < used; ++i)
+ (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
+ rxq->rq_pi = rxq->rq_ci;
+ }
+ for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
+ if ((*rxq->elts)[i] != NULL)
+ rte_pktmbuf_free_seg((*rxq->elts)[i]);
+ (*rxq->elts)[i] = NULL;
}
}
@@ -769,343 +204,15 @@ rxq_free_elts(struct rxq_ctrl *rxq_ctrl)
* Pointer to RX queue structure.
*/
void
-rxq_cleanup(struct rxq_ctrl *rxq_ctrl)
+mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl)
{
DEBUG("cleaning up %p", (void *)rxq_ctrl);
- rxq_free_elts(rxq_ctrl);
- if (rxq_ctrl->fdir_queue != NULL)
- priv_fdir_queue_destroy(rxq_ctrl->priv, rxq_ctrl->fdir_queue);
- if (rxq_ctrl->wq != NULL)
- claim_zero(ibv_exp_destroy_wq(rxq_ctrl->wq));
- if (rxq_ctrl->cq != NULL)
- claim_zero(ibv_destroy_cq(rxq_ctrl->cq));
- if (rxq_ctrl->channel != NULL)
- claim_zero(ibv_destroy_comp_channel(rxq_ctrl->channel));
- if (rxq_ctrl->mr != NULL)
- claim_zero(ibv_dereg_mr(rxq_ctrl->mr));
+ if (rxq_ctrl->ibv)
+ mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
memset(rxq_ctrl, 0, sizeof(*rxq_ctrl));
}
/**
- * Initialize RX queue.
- *
- * @param tmpl
- * Pointer to RX queue control template.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-static inline int
-rxq_setup(struct rxq_ctrl *tmpl)
-{
- struct ibv_cq *ibcq = tmpl->cq;
- struct ibv_mlx5_cq_info cq_info;
- struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq);
- struct rte_mbuf *(*elts)[1 << tmpl->rxq.elts_n] =
- rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket);
-
- if (ibv_mlx5_exp_get_cq_info(ibcq, &cq_info)) {
- ERROR("Unable to query CQ info. check your OFED.");
- return ENOTSUP;
- }
- if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
- ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
- "it should be set to %u", RTE_CACHE_LINE_SIZE);
- return EINVAL;
- }
- if (elts == NULL)
- return ENOMEM;
- tmpl->rxq.rq_db = rwq->rq.db;
- tmpl->rxq.cqe_n = log2above(cq_info.cqe_cnt);
- tmpl->rxq.cq_ci = 0;
- tmpl->rxq.rq_ci = 0;
- tmpl->rxq.rq_pi = 0;
- tmpl->rxq.cq_db = cq_info.dbrec;
- tmpl->rxq.wqes =
- (volatile struct mlx5_wqe_data_seg (*)[])
- (uintptr_t)rwq->rq.buff;
- tmpl->rxq.cqes =
- (volatile struct mlx5_cqe (*)[])
- (uintptr_t)cq_info.buf;
- tmpl->rxq.elts = elts;
- return 0;
-}
-
-/**
- * Configure a RX queue.
- *
- * @param dev
- * Pointer to Ethernet device structure.
- * @param rxq_ctrl
- * Pointer to RX queue structure.
- * @param desc
- * Number of descriptors to configure in queue.
- * @param socket
- * NUMA socket on which memory must be allocated.
- * @param[in] conf
- * Thresholds parameters.
- * @param mp
- * Memory pool for buffer allocations.
- *
- * @return
- * 0 on success, errno value on failure.
- */
-static int
-rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
- uint16_t desc, unsigned int socket,
- const struct rte_eth_rxconf *conf, struct rte_mempool *mp)
-{
- struct priv *priv = dev->data->dev_private;
- struct rxq_ctrl tmpl = {
- .priv = priv,
- .socket = socket,
- .rxq = {
- .elts_n = log2above(desc),
- .mp = mp,
- .rss_hash = priv->rxqs_n > 1,
- },
- };
- struct ibv_exp_wq_attr mod;
- union {
- struct ibv_exp_cq_init_attr cq;
- struct ibv_exp_wq_init_attr wq;
- struct ibv_exp_cq_attr cq_attr;
- } attr;
- unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
- unsigned int cqe_n = desc - 1;
- struct rte_mbuf *(*elts)[desc] = NULL;
- int ret = 0;
-
- (void)conf; /* Thresholds configuration (ignored). */
- /* Enable scattered packets support for this queue if necessary. */
- assert(mb_len >= RTE_PKTMBUF_HEADROOM);
- if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
- (mb_len - RTE_PKTMBUF_HEADROOM)) {
- tmpl.rxq.sges_n = 0;
- } else if (dev->data->dev_conf.rxmode.enable_scatter) {
- unsigned int size =
- RTE_PKTMBUF_HEADROOM +
- dev->data->dev_conf.rxmode.max_rx_pkt_len;
- unsigned int sges_n;
-
- /*
- * Determine the number of SGEs needed for a full packet
- * and round it to the next power of two.
- */
- sges_n = log2above((size / mb_len) + !!(size % mb_len));
- tmpl.rxq.sges_n = sges_n;
- /* Make sure rxq.sges_n did not overflow. */
- size = mb_len * (1 << tmpl.rxq.sges_n);
- size -= RTE_PKTMBUF_HEADROOM;
- if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
- ERROR("%p: too many SGEs (%u) needed to handle"
- " requested maximum packet size %u",
- (void *)dev,
- 1 << sges_n,
- dev->data->dev_conf.rxmode.max_rx_pkt_len);
- return EOVERFLOW;
- }
- } else {
- WARN("%p: the requested maximum Rx packet size (%u) is"
- " larger than a single mbuf (%u) and scattered"
- " mode has not been requested",
- (void *)dev,
- dev->data->dev_conf.rxmode.max_rx_pkt_len,
- mb_len - RTE_PKTMBUF_HEADROOM);
- }
- DEBUG("%p: maximum number of segments per packet: %u",
- (void *)dev, 1 << tmpl.rxq.sges_n);
- if (desc % (1 << tmpl.rxq.sges_n)) {
- ERROR("%p: number of RX queue descriptors (%u) is not a"
- " multiple of SGEs per packet (%u)",
- (void *)dev,
- desc,
- 1 << tmpl.rxq.sges_n);
- return EINVAL;
- }
- /* Toggle RX checksum offload if hardware supports it. */
- if (priv->hw_csum)
- tmpl.rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
- if (priv->hw_csum_l2tun)
- tmpl.rxq.csum_l2tun =
- !!dev->data->dev_conf.rxmode.hw_ip_checksum;
- /* Use the entire RX mempool as the memory region. */
- tmpl.mr = mlx5_mp2mr(priv->pd, mp);
- if (tmpl.mr == NULL) {
- ret = EINVAL;
- ERROR("%p: MR creation failure: %s",
- (void *)dev, strerror(ret));
- goto error;
- }
- if (dev->data->dev_conf.intr_conf.rxq) {
- tmpl.channel = ibv_create_comp_channel(priv->ctx);
- if (tmpl.channel == NULL) {
- ret = ENOMEM;
- ERROR("%p: Rx interrupt completion channel creation"
- " failure: %s",
- (void *)dev, strerror(ret));
- goto error;
- }
- }
- attr.cq = (struct ibv_exp_cq_init_attr){
- .comp_mask = 0,
- };
- if (priv->cqe_comp) {
- attr.cq.comp_mask |= IBV_EXP_CQ_INIT_ATTR_FLAGS;
- attr.cq.flags |= IBV_EXP_CQ_COMPRESSED_CQE;
- /*
- * For vectorized Rx, it must not be doubled in order to
- * make cq_ci and rq_ci aligned.
- */
- if (rxq_check_vec_support(&tmpl.rxq) < 0)
- cqe_n = (desc * 2) - 1; /* Double the number of CQEs. */
- }
- tmpl.cq = ibv_exp_create_cq(priv->ctx, cqe_n, NULL, tmpl.channel, 0,
- &attr.cq);
- if (tmpl.cq == NULL) {
- ret = ENOMEM;
- ERROR("%p: CQ creation failure: %s",
- (void *)dev, strerror(ret));
- goto error;
- }
- DEBUG("priv->device_attr.max_qp_wr is %d",
- priv->device_attr.max_qp_wr);
- DEBUG("priv->device_attr.max_sge is %d",
- priv->device_attr.max_sge);
- /* Configure VLAN stripping. */
- tmpl.rxq.vlan_strip = (priv->hw_vlan_strip &&
- !!dev->data->dev_conf.rxmode.hw_vlan_strip);
- attr.wq = (struct ibv_exp_wq_init_attr){
- .wq_context = NULL, /* Could be useful in the future. */
- .wq_type = IBV_EXP_WQT_RQ,
- /* Max number of outstanding WRs. */
- .max_recv_wr = desc >> tmpl.rxq.sges_n,
- /* Max number of scatter/gather elements in a WR. */
- .max_recv_sge = 1 << tmpl.rxq.sges_n,
- .pd = priv->pd,
- .cq = tmpl.cq,
- .comp_mask =
- IBV_EXP_CREATE_WQ_VLAN_OFFLOADS |
- 0,
- .vlan_offloads = (tmpl.rxq.vlan_strip ?
- IBV_EXP_RECEIVE_WQ_CVLAN_STRIP :
- 0),
- };
- /* By default, FCS (CRC) is stripped by hardware. */
- if (dev->data->dev_conf.rxmode.hw_strip_crc) {
- tmpl.rxq.crc_present = 0;
- } else if (priv->hw_fcs_strip) {
- /* Ask HW/Verbs to leave CRC in place when supported. */
- attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_SCATTER_FCS;
- attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS;
- tmpl.rxq.crc_present = 1;
- } else {
- WARN("%p: CRC stripping has been disabled but will still"
- " be performed by hardware, make sure MLNX_OFED and"
- " firmware are up to date",
- (void *)dev);
- tmpl.rxq.crc_present = 0;
- }
- DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
- " incoming frames to hide it",
- (void *)dev,
- tmpl.rxq.crc_present ? "disabled" : "enabled",
- tmpl.rxq.crc_present << 2);
- if (!mlx5_getenv_int("MLX5_PMD_ENABLE_PADDING"))
- ; /* Nothing else to do. */
- else if (priv->hw_padding) {
- INFO("%p: enabling packet padding on queue %p",
- (void *)dev, (void *)rxq_ctrl);
- attr.wq.flags |= IBV_EXP_CREATE_WQ_FLAG_RX_END_PADDING;
- attr.wq.comp_mask |= IBV_EXP_CREATE_WQ_FLAGS;
- } else
- WARN("%p: packet padding has been requested but is not"
- " supported, make sure MLNX_OFED and firmware are"
- " up to date",
- (void *)dev);
-
- tmpl.wq = ibv_exp_create_wq(priv->ctx, &attr.wq);
- if (tmpl.wq == NULL) {
- ret = (errno ? errno : EINVAL);
- ERROR("%p: WQ creation failure: %s",
- (void *)dev, strerror(ret));
- goto error;
- }
- /*
- * Make sure number of WRs*SGEs match expectations since a queue
- * cannot allocate more than "desc" buffers.
- */
- if (((int)attr.wq.max_recv_wr != (desc >> tmpl.rxq.sges_n)) ||
- ((int)attr.wq.max_recv_sge != (1 << tmpl.rxq.sges_n))) {
- ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
- (void *)dev,
- (desc >> tmpl.rxq.sges_n), (1 << tmpl.rxq.sges_n),
- attr.wq.max_recv_wr, attr.wq.max_recv_sge);
- ret = EINVAL;
- goto error;
- }
- /* Save port ID. */
- tmpl.rxq.port_id = dev->data->port_id;
- DEBUG("%p: RTE port ID: %u", (void *)rxq_ctrl, tmpl.rxq.port_id);
- /* Change queue state to ready. */
- mod = (struct ibv_exp_wq_attr){
- .attr_mask = IBV_EXP_WQ_ATTR_STATE,
- .wq_state = IBV_EXP_WQS_RDY,
- };
- ret = ibv_exp_modify_wq(tmpl.wq, &mod);
- if (ret) {
- ERROR("%p: WQ state to IBV_EXP_WQS_RDY failed: %s",
- (void *)dev, strerror(ret));
- goto error;
- }
- ret = rxq_setup(&tmpl);
- if (ret) {
- ERROR("%p: cannot initialize RX queue structure: %s",
- (void *)dev, strerror(ret));
- goto error;
- }
- /* Reuse buffers from original queue if possible. */
- if (rxq_ctrl->rxq.elts_n) {
- assert(1 << rxq_ctrl->rxq.elts_n == desc);
- assert(rxq_ctrl->rxq.elts != tmpl.rxq.elts);
- rxq_trim_elts(&rxq_ctrl->rxq);
- ret = rxq_alloc_elts(&tmpl, desc, rxq_ctrl->rxq.elts);
- } else
- ret = rxq_alloc_elts(&tmpl, desc, NULL);
- if (ret) {
- ERROR("%p: RXQ allocation failed: %s",
- (void *)dev, strerror(ret));
- goto error;
- }
- /* Clean up rxq in case we're reinitializing it. */
- DEBUG("%p: cleaning-up old rxq just in case", (void *)rxq_ctrl);
- rxq_cleanup(rxq_ctrl);
- /* Move mbuf pointers to dedicated storage area in RX queue. */
- elts = (void *)(rxq_ctrl + 1);
- rte_memcpy(elts, tmpl.rxq.elts, sizeof(*elts));
-#ifndef NDEBUG
- memset(tmpl.rxq.elts, 0x55, sizeof(*elts));
-#endif
- rte_free(tmpl.rxq.elts);
- tmpl.rxq.elts = elts;
- *rxq_ctrl = tmpl;
- /* Update doorbell counter. */
- rxq_ctrl->rxq.rq_ci = desc >> rxq_ctrl->rxq.sges_n;
- rte_wmb();
- *rxq_ctrl->rxq.rq_db = htonl(rxq_ctrl->rxq.rq_ci);
- DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
- assert(ret == 0);
- return 0;
-error:
- elts = tmpl.rxq.elts;
- rxq_cleanup(&tmpl);
- rte_free(elts);
- assert(ret > 0);
- return ret;
-}
-
-/**
- * DPDK callback to configure a RX queue.
*
* @param dev
* Pointer to Ethernet device structure.
@@ -1129,14 +236,14 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
struct rte_mempool *mp)
{
struct priv *priv = dev->data->dev_private;
- struct rxq *rxq = (*priv->rxqs)[idx];
- struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
- const uint16_t desc_pad = MLX5_VPMD_DESCS_PER_LOOP; /* For vPMD. */
- int ret;
+ struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
+ struct mlx5_rxq_ctrl *rxq_ctrl =
+ container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+ int ret = 0;
+ (void)conf;
if (mlx5_is_secondary())
return -E_RTE_SECONDARY;
-
priv_lock(priv);
if (!rte_is_power_of_2(desc)) {
desc = 1 << log2above(desc);
@@ -1152,50 +259,24 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
priv_unlock(priv);
return -EOVERFLOW;
}
- if (rxq != NULL) {
- DEBUG("%p: reusing already allocated queue index %u (%p)",
- (void *)dev, idx, (void *)rxq);
- if (priv->started) {
- priv_unlock(priv);
- return -EEXIST;
- }
- (*priv->rxqs)[idx] = NULL;
- rxq_cleanup(rxq_ctrl);
- /* Resize if rxq size is changed. */
- if (rxq_ctrl->rxq.elts_n != log2above(desc)) {
- rxq_ctrl = rte_realloc(rxq_ctrl,
- sizeof(*rxq_ctrl) +
- (desc + desc_pad) *
- sizeof(struct rte_mbuf *),
- RTE_CACHE_LINE_SIZE);
- if (!rxq_ctrl) {
- ERROR("%p: unable to reallocate queue index %u",
- (void *)dev, idx);
- priv_unlock(priv);
- return -ENOMEM;
- }
- }
- } else {
- rxq_ctrl = rte_calloc_socket("RXQ", 1, sizeof(*rxq_ctrl) +
- (desc + desc_pad) *
- sizeof(struct rte_mbuf *),
- 0, socket);
- if (rxq_ctrl == NULL) {
- ERROR("%p: unable to allocate queue index %u",
- (void *)dev, idx);
- priv_unlock(priv);
- return -ENOMEM;
- }
+ if (!mlx5_priv_rxq_releasable(priv, idx)) {
+ ret = EBUSY;
+ ERROR("%p: unable to release queue index %u",
+ (void *)dev, idx);
+ goto out;
}
- ret = rxq_ctrl_setup(dev, rxq_ctrl, desc, socket, conf, mp);
- if (ret)
- rte_free(rxq_ctrl);
- else {
- rxq_ctrl->rxq.stats.idx = idx;
- DEBUG("%p: adding RX queue %p to list",
- (void *)dev, (void *)rxq_ctrl);
- (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
+ mlx5_priv_rxq_release(priv, idx);
+ rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp);
+ if (!rxq_ctrl) {
+ ERROR("%p: unable to allocate queue index %u",
+ (void *)dev, idx);
+ ret = ENOMEM;
+ goto out;
}
+ DEBUG("%p: adding RX queue %p to list",
+ (void *)dev, (void *)rxq_ctrl);
+ (*priv->rxqs)[idx] = &rxq_ctrl->rxq;
+out:
priv_unlock(priv);
return -ret;
}
@@ -1209,76 +290,26 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
void
mlx5_rx_queue_release(void *dpdk_rxq)
{
- struct rxq *rxq = (struct rxq *)dpdk_rxq;
- struct rxq_ctrl *rxq_ctrl;
+ struct mlx5_rxq_data *rxq = (struct mlx5_rxq_data *)dpdk_rxq;
+ struct mlx5_rxq_ctrl *rxq_ctrl;
struct priv *priv;
- unsigned int i;
if (mlx5_is_secondary())
return;
if (rxq == NULL)
return;
- rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+ rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq);
priv = rxq_ctrl->priv;
priv_lock(priv);
- if (priv_flow_rxq_in_use(priv, rxq))
+ if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx))
rte_panic("Rx queue %p is still used by a flow and cannot be"
" removed\n", (void *)rxq_ctrl);
- for (i = 0; (i != priv->rxqs_n); ++i)
- if ((*priv->rxqs)[i] == rxq) {
- DEBUG("%p: removing RX queue %p from list",
- (void *)priv->dev, (void *)rxq_ctrl);
- (*priv->rxqs)[i] = NULL;
- break;
- }
- rxq_cleanup(rxq_ctrl);
- rte_free(rxq_ctrl);
+ mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx);
priv_unlock(priv);
}
/**
- * DPDK callback for RX in secondary processes.
- *
- * This function configures all queues from primary process information
- * if necessary before reverting to the normal RX burst callback.
- *
- * @param dpdk_rxq
- * Generic pointer to RX queue structure.
- * @param[out] pkts
- * Array to store received packets.
- * @param pkts_n
- * Maximum number of packets in array.
- *
- * @return
- * Number of packets successfully received (<= pkts_n).
- */
-uint16_t
-mlx5_rx_burst_secondary_setup(void *dpdk_rxq, struct rte_mbuf **pkts,
- uint16_t pkts_n)
-{
- struct rxq *rxq = dpdk_rxq;
- struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
- struct priv *priv = mlx5_secondary_data_setup(rxq_ctrl->priv);
- struct priv *primary_priv;
- unsigned int index;
-
- if (priv == NULL)
- return 0;
- primary_priv =
- mlx5_secondary_data[priv->dev->data->port_id].primary_priv;
- /* Look for queue index in both private structures. */
- for (index = 0; index != priv->rxqs_n; ++index)
- if (((*primary_priv->rxqs)[index] == rxq) ||
- ((*priv->rxqs)[index] == rxq))
- break;
- if (index == priv->rxqs_n)
- return 0;
- rxq = (*priv->rxqs)[index];
- return priv->dev->rx_pkt_burst(rxq, pkts, pkts_n);
-}
-
-/**
* Allocate queue vector and fill epoll fd list for Rx interrupts.
*
* @param priv
@@ -1296,6 +327,7 @@ priv_rx_intr_vec_enable(struct priv *priv)
unsigned int count = 0;
struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+ assert(!mlx5_is_secondary());
if (!priv->dev->data->dev_conf.intr_conf.rxq)
return 0;
priv_rx_intr_vec_disable(priv);
@@ -1307,15 +339,14 @@ priv_rx_intr_vec_enable(struct priv *priv)
}
intr_handle->type = RTE_INTR_HANDLE_EXT;
for (i = 0; i != n; ++i) {
- struct rxq *rxq = (*priv->rxqs)[i];
- struct rxq_ctrl *rxq_ctrl =
- container_of(rxq, struct rxq_ctrl, rxq);
+ /* This rxq ibv must not be released in this function. */
+ struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i);
int fd;
int flags;
int rc;
/* Skip queues that cannot request interrupts. */
- if (!rxq || !rxq_ctrl->channel) {
+ if (!rxq_ibv || !rxq_ibv->channel) {
/* Use invalid intr_vec[] index to disable entry. */
intr_handle->intr_vec[i] =
RTE_INTR_VEC_RXTX_OFFSET +
@@ -1329,7 +360,7 @@ priv_rx_intr_vec_enable(struct priv *priv)
priv_rx_intr_vec_disable(priv);
return -1;
}
- fd = rxq_ctrl->channel->fd;
+ fd = rxq_ibv->channel->fd;
flags = fcntl(fd, F_GETFL);
rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
if (rc < 0) {
@@ -1359,14 +390,61 @@ void
priv_rx_intr_vec_disable(struct priv *priv)
{
struct rte_intr_handle *intr_handle = priv->dev->intr_handle;
+ unsigned int i;
+ unsigned int rxqs_n = priv->rxqs_n;
+ unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
+ if (!priv->dev->data->dev_conf.intr_conf.rxq)
+ return;
+ if (!intr_handle->intr_vec)
+ goto free;
+ for (i = 0; i != n; ++i) {
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+ struct mlx5_rxq_data *rxq_data;
+
+ if (intr_handle->intr_vec[i] == RTE_INTR_VEC_RXTX_OFFSET +
+ RTE_MAX_RXTX_INTR_VEC_ID)
+ continue;
+ /**
+ * Need to access directly the queue to release the reference
+ * kept in priv_rx_intr_vec_enable().
+ */
+ rxq_data = (*priv->rxqs)[i];
+ rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+ mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv);
+ }
+free:
rte_intr_free_epoll_fd(intr_handle);
- free(intr_handle->intr_vec);
+ if (intr_handle->intr_vec)
+ free(intr_handle->intr_vec);
intr_handle->nb_efd = 0;
intr_handle->intr_vec = NULL;
}
-#ifdef HAVE_UPDATE_CQ_CI
+/**
+ * MLX5 CQ notification .
+ *
+ * @param rxq
+ * Pointer to receive queue structure.
+ * @param sq_n_rxq
+ * Sequence number per receive queue .
+ */
+static inline void
+mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq)
+{
+ int sq_n = 0;
+ uint32_t doorbell_hi;
+ uint64_t doorbell;
+ void *cq_db_reg = (char *)rxq->cq_uar + MLX5_CQ_DOORBELL;
+
+ sq_n = sq_n_rxq & MLX5_CQ_SQN_MASK;
+ doorbell_hi = sq_n << MLX5_CQ_SQN_OFFSET | (rxq->cq_ci & MLX5_CI_MASK);
+ doorbell = (uint64_t)doorbell_hi << 32;
+ doorbell |= rxq->cqn;
+ rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi);
+ rte_wmb();
+ rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg);
+}
/**
* DPDK callback for Rx queue interrupt enable.
@@ -1383,16 +461,30 @@ int
mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
{
struct priv *priv = mlx5_get_priv(dev);
- struct rxq *rxq = (*priv->rxqs)[rx_queue_id];
- struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
- int ret;
+ struct mlx5_rxq_data *rxq_data;
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+ int ret = 0;
- if (!rxq || !rxq_ctrl->channel) {
+ priv_lock(priv);
+ rxq_data = (*priv->rxqs)[rx_queue_id];
+ if (!rxq_data) {
ret = EINVAL;
- } else {
- ibv_mlx5_exp_update_cq_ci(rxq_ctrl->cq, rxq->cq_ci);
- ret = ibv_req_notify_cq(rxq_ctrl->cq, 0);
+ goto exit;
+ }
+ rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+ if (rxq_ctrl->irq) {
+ struct mlx5_rxq_ibv *rxq_ibv;
+
+ rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
+ if (!rxq_ibv) {
+ ret = EINVAL;
+ goto exit;
+ }
+ mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn);
+ mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
}
+exit:
+ priv_unlock(priv);
if (ret)
WARN("unable to arm interrupt on rx queue %d", rx_queue_id);
return -ret;
@@ -1413,25 +505,920 @@ int
mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id)
{
struct priv *priv = mlx5_get_priv(dev);
- struct rxq *rxq = (*priv->rxqs)[rx_queue_id];
- struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
+ struct mlx5_rxq_data *rxq_data;
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+ struct mlx5_rxq_ibv *rxq_ibv = NULL;
struct ibv_cq *ev_cq;
void *ev_ctx;
- int ret;
+ int ret = 0;
- if (!rxq || !rxq_ctrl->channel) {
+ priv_lock(priv);
+ rxq_data = (*priv->rxqs)[rx_queue_id];
+ if (!rxq_data) {
ret = EINVAL;
- } else {
- ret = ibv_get_cq_event(rxq_ctrl->cq->channel, &ev_cq, &ev_ctx);
- if (ret || ev_cq != rxq_ctrl->cq)
- ret = EINVAL;
+ goto exit;
+ }
+ rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+ if (!rxq_ctrl->irq)
+ goto exit;
+ rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id);
+ if (!rxq_ibv) {
+ ret = EINVAL;
+ goto exit;
+ }
+ ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx);
+ if (ret || ev_cq != rxq_ibv->cq) {
+ ret = EINVAL;
+ goto exit;
}
+ rxq_data->cq_arm_sn++;
+ ibv_ack_cq_events(rxq_ibv->cq, 1);
+exit:
+ if (rxq_ibv)
+ mlx5_priv_rxq_ibv_release(priv, rxq_ibv);
+ priv_unlock(priv);
if (ret)
WARN("unable to disable interrupt on rx queue %d",
rx_queue_id);
- else
- ibv_ack_cq_events(rxq_ctrl->cq, 1);
return -ret;
}
-#endif /* HAVE_UPDATE_CQ_CI */
+/**
+ * Create the Rx queue Verbs object.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * Queue index in DPDK Rx queue array
+ *
+ * @return
+ * The Verbs object initialised if it can be created.
+ */
+struct mlx5_rxq_ibv*
+mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx)
+{
+ struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
+ struct mlx5_rxq_ctrl *rxq_ctrl =
+ container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+ struct ibv_wq_attr mod;
+ union {
+ struct {
+ struct ibv_cq_init_attr_ex ibv;
+ struct mlx5dv_cq_init_attr mlx5;
+ } cq;
+ struct ibv_wq_init_attr wq;
+ struct ibv_cq_ex cq_attr;
+ } attr;
+ unsigned int cqe_n = (1 << rxq_data->elts_n) - 1;
+ struct mlx5_rxq_ibv *tmpl;
+ struct mlx5dv_cq cq_info;
+ struct mlx5dv_rwq rwq;
+ unsigned int i;
+ int ret = 0;
+ struct mlx5dv_obj obj;
+
+ assert(rxq_data);
+ assert(!rxq_ctrl->ibv);
+ tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0,
+ rxq_ctrl->socket);
+ if (!tmpl) {
+ ERROR("%p: cannot allocate verbs resources",
+ (void *)rxq_ctrl);
+ goto error;
+ }
+ tmpl->rxq_ctrl = rxq_ctrl;
+ /* Use the entire RX mempool as the memory region. */
+ tmpl->mr = priv_mr_get(priv, rxq_data->mp);
+ if (!tmpl->mr) {
+ tmpl->mr = priv_mr_new(priv, rxq_data->mp);
+ if (!tmpl->mr) {
+ ERROR("%p: MR creation failure", (void *)rxq_ctrl);
+ goto error;
+ }
+ }
+ if (rxq_ctrl->irq) {
+ tmpl->channel = ibv_create_comp_channel(priv->ctx);
+ if (!tmpl->channel) {
+ ERROR("%p: Comp Channel creation failure",
+ (void *)rxq_ctrl);
+ goto error;
+ }
+ }
+ attr.cq.ibv = (struct ibv_cq_init_attr_ex){
+ .cqe = cqe_n,
+ .channel = tmpl->channel,
+ .comp_mask = 0,
+ };
+ attr.cq.mlx5 = (struct mlx5dv_cq_init_attr){
+ .comp_mask = 0,
+ };
+ if (priv->cqe_comp && !rxq_data->hw_timestamp) {
+ attr.cq.mlx5.comp_mask |=
+ MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE;
+ attr.cq.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH;
+ /*
+ * For vectorized Rx, it must not be doubled in order to
+ * make cq_ci and rq_ci aligned.
+ */
+ if (rxq_check_vec_support(rxq_data) < 0)
+ attr.cq.ibv.cqe *= 2;
+ } else if (priv->cqe_comp && rxq_data->hw_timestamp) {
+ DEBUG("Rx CQE compression is disabled for HW timestamp");
+ }
+ tmpl->cq = ibv_cq_ex_to_cq(mlx5dv_create_cq(priv->ctx, &attr.cq.ibv,
+ &attr.cq.mlx5));
+ if (tmpl->cq == NULL) {
+ ERROR("%p: CQ creation failure", (void *)rxq_ctrl);
+ goto error;
+ }
+ DEBUG("priv->device_attr.max_qp_wr is %d",
+ priv->device_attr.orig_attr.max_qp_wr);
+ DEBUG("priv->device_attr.max_sge is %d",
+ priv->device_attr.orig_attr.max_sge);
+ attr.wq = (struct ibv_wq_init_attr){
+ .wq_context = NULL, /* Could be useful in the future. */
+ .wq_type = IBV_WQT_RQ,
+ /* Max number of outstanding WRs. */
+ .max_wr = (1 << rxq_data->elts_n) >> rxq_data->sges_n,
+ /* Max number of scatter/gather elements in a WR. */
+ .max_sge = 1 << rxq_data->sges_n,
+ .pd = priv->pd,
+ .cq = tmpl->cq,
+ .comp_mask =
+ IBV_WQ_FLAGS_CVLAN_STRIPPING |
+ 0,
+ .create_flags = (rxq_data->vlan_strip ?
+ IBV_WQ_FLAGS_CVLAN_STRIPPING :
+ 0),
+ };
+ /* By default, FCS (CRC) is stripped by hardware. */
+ if (rxq_data->crc_present) {
+ attr.wq.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS;
+ attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+ }
+#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING
+ if (priv->hw_padding) {
+ attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING;
+ attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS;
+ }
+#endif
+ tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq);
+ if (tmpl->wq == NULL) {
+ ERROR("%p: WQ creation failure", (void *)rxq_ctrl);
+ goto error;
+ }
+ /*
+ * Make sure number of WRs*SGEs match expectations since a queue
+ * cannot allocate more than "desc" buffers.
+ */
+ if (((int)attr.wq.max_wr !=
+ ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) ||
+ ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) {
+ ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs",
+ (void *)rxq_ctrl,
+ ((1 << rxq_data->elts_n) >> rxq_data->sges_n),
+ (1 << rxq_data->sges_n),
+ attr.wq.max_wr, attr.wq.max_sge);
+ goto error;
+ }
+ /* Change queue state to ready. */
+ mod = (struct ibv_wq_attr){
+ .attr_mask = IBV_WQ_ATTR_STATE,
+ .wq_state = IBV_WQS_RDY,
+ };
+ ret = ibv_modify_wq(tmpl->wq, &mod);
+ if (ret) {
+ ERROR("%p: WQ state to IBV_WQS_RDY failed",
+ (void *)rxq_ctrl);
+ goto error;
+ }
+ obj.cq.in = tmpl->cq;
+ obj.cq.out = &cq_info;
+ obj.rwq.in = tmpl->wq;
+ obj.rwq.out = &rwq;
+ ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ);
+ if (ret != 0)
+ goto error;
+ if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
+ ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
+ "it should be set to %u", RTE_CACHE_LINE_SIZE);
+ goto error;
+ }
+ /* Fill the rings. */
+ rxq_data->wqes = (volatile struct mlx5_wqe_data_seg (*)[])
+ (uintptr_t)rwq.buf;
+ for (i = 0; (i != (unsigned int)(1 << rxq_data->elts_n)); ++i) {
+ struct rte_mbuf *buf = (*rxq_data->elts)[i];
+ volatile struct mlx5_wqe_data_seg *scat = &(*rxq_data->wqes)[i];
+
+ /* scat->addr must be able to store a pointer. */
+ assert(sizeof(scat->addr) >= sizeof(uintptr_t));
+ *scat = (struct mlx5_wqe_data_seg){
+ .addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf,
+ uintptr_t)),
+ .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
+ .lkey = tmpl->mr->lkey,
+ };
+ }
+ rxq_data->rq_db = rwq.dbrec;
+ rxq_data->cqe_n = log2above(cq_info.cqe_cnt);
+ rxq_data->cq_ci = 0;
+ rxq_data->rq_ci = 0;
+ rxq_data->rq_pi = 0;
+ rxq_data->zip = (struct rxq_zip){
+ .ai = 0,
+ };
+ rxq_data->cq_db = cq_info.dbrec;
+ rxq_data->cqes = (volatile struct mlx5_cqe (*)[])(uintptr_t)cq_info.buf;
+ rxq_data->cq_uar = cq_info.cq_uar;
+ rxq_data->cqn = cq_info.cqn;
+ rxq_data->cq_arm_sn = 0;
+ /* Update doorbell counter. */
+ rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n;
+ rte_wmb();
+ *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci);
+ DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl);
+ rte_atomic32_inc(&tmpl->refcnt);
+ DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
+ (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+ LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next);
+ return tmpl;
+error:
+ if (tmpl->wq)
+ claim_zero(ibv_destroy_wq(tmpl->wq));
+ if (tmpl->cq)
+ claim_zero(ibv_destroy_cq(tmpl->cq));
+ if (tmpl->channel)
+ claim_zero(ibv_destroy_comp_channel(tmpl->channel));
+ if (tmpl->mr)
+ priv_mr_release(priv, tmpl->mr);
+ return NULL;
+}
+
+/**
+ * Get an Rx queue Verbs object.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * Queue index in DPDK Rx queue array
+ *
+ * @return
+ * The Verbs object if it exists.
+ */
+struct mlx5_rxq_ibv*
+mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx)
+{
+ struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx];
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+
+ if (idx >= priv->rxqs_n)
+ return NULL;
+ if (!rxq_data)
+ return NULL;
+ rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq);
+ if (rxq_ctrl->ibv) {
+ priv_mr_get(priv, rxq_data->mp);
+ rte_atomic32_inc(&rxq_ctrl->ibv->refcnt);
+ DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
+ (void *)rxq_ctrl->ibv,
+ rte_atomic32_read(&rxq_ctrl->ibv->refcnt));
+ }
+ return rxq_ctrl->ibv;
+}
+
+/**
+ * Release an Rx verbs queue object.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param rxq_ibv
+ * Verbs Rx queue object.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
+{
+ int ret;
+
+ assert(rxq_ibv);
+ assert(rxq_ibv->wq);
+ assert(rxq_ibv->cq);
+ assert(rxq_ibv->mr);
+ ret = priv_mr_release(priv, rxq_ibv->mr);
+ if (!ret)
+ rxq_ibv->mr = NULL;
+ DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv,
+ (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt));
+ if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) {
+ rxq_free_elts(rxq_ibv->rxq_ctrl);
+ claim_zero(ibv_destroy_wq(rxq_ibv->wq));
+ claim_zero(ibv_destroy_cq(rxq_ibv->cq));
+ if (rxq_ibv->channel)
+ claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel));
+ LIST_REMOVE(rxq_ibv, next);
+ rte_free(rxq_ibv);
+ return 0;
+ }
+ return EBUSY;
+}
+
+/**
+ * Verify the Verbs Rx queue list is empty
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_rxq_ibv_verify(struct priv *priv)
+{
+ int ret = 0;
+ struct mlx5_rxq_ibv *rxq_ibv;
+
+ LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) {
+ DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv,
+ (void *)rxq_ibv);
+ ++ret;
+ }
+ return ret;
+}
+
+/**
+ * Return true if a single reference exists on the object.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param rxq_ibv
+ * Verbs Rx queue object.
+ */
+int
+mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv)
+{
+ (void)priv;
+ assert(rxq_ibv);
+ return (rte_atomic32_read(&rxq_ibv->refcnt) == 1);
+}
+
+/**
+ * Create a DPDK Rx queue.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * TX queue index.
+ * @param desc
+ * Number of descriptors to configure in queue.
+ * @param socket
+ * NUMA socket on which memory must be allocated.
+ *
+ * @return
+ * A DPDK queue object on success.
+ */
+struct mlx5_rxq_ctrl*
+mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc,
+ unsigned int socket, struct rte_mempool *mp)
+{
+ struct rte_eth_dev *dev = priv->dev;
+ struct mlx5_rxq_ctrl *tmpl;
+ const uint16_t desc_n =
+ desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
+ unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
+
+ tmpl = rte_calloc_socket("RXQ", 1,
+ sizeof(*tmpl) +
+ desc_n * sizeof(struct rte_mbuf *),
+ 0, socket);
+ if (!tmpl)
+ return NULL;
+ tmpl->socket = socket;
+ if (priv->dev->data->dev_conf.intr_conf.rxq)
+ tmpl->irq = 1;
+ /* Enable scattered packets support for this queue if necessary. */
+ assert(mb_len >= RTE_PKTMBUF_HEADROOM);
+ if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
+ (mb_len - RTE_PKTMBUF_HEADROOM)) {
+ tmpl->rxq.sges_n = 0;
+ } else if (dev->data->dev_conf.rxmode.enable_scatter) {
+ unsigned int size =
+ RTE_PKTMBUF_HEADROOM +
+ dev->data->dev_conf.rxmode.max_rx_pkt_len;
+ unsigned int sges_n;
+
+ /*
+ * Determine the number of SGEs needed for a full packet
+ * and round it to the next power of two.
+ */
+ sges_n = log2above((size / mb_len) + !!(size % mb_len));
+ tmpl->rxq.sges_n = sges_n;
+ /* Make sure rxq.sges_n did not overflow. */
+ size = mb_len * (1 << tmpl->rxq.sges_n);
+ size -= RTE_PKTMBUF_HEADROOM;
+ if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) {
+ ERROR("%p: too many SGEs (%u) needed to handle"
+ " requested maximum packet size %u",
+ (void *)dev,
+ 1 << sges_n,
+ dev->data->dev_conf.rxmode.max_rx_pkt_len);
+ goto error;
+ }
+ } else {
+ WARN("%p: the requested maximum Rx packet size (%u) is"
+ " larger than a single mbuf (%u) and scattered"
+ " mode has not been requested",
+ (void *)dev,
+ dev->data->dev_conf.rxmode.max_rx_pkt_len,
+ mb_len - RTE_PKTMBUF_HEADROOM);
+ }
+ DEBUG("%p: maximum number of segments per packet: %u",
+ (void *)dev, 1 << tmpl->rxq.sges_n);
+ if (desc % (1 << tmpl->rxq.sges_n)) {
+ ERROR("%p: number of RX queue descriptors (%u) is not a"
+ " multiple of SGEs per packet (%u)",
+ (void *)dev,
+ desc,
+ 1 << tmpl->rxq.sges_n);
+ goto error;
+ }
+ /* Toggle RX checksum offload if hardware supports it. */
+ if (priv->hw_csum)
+ tmpl->rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum;
+ if (priv->hw_csum_l2tun)
+ tmpl->rxq.csum_l2tun =
+ !!dev->data->dev_conf.rxmode.hw_ip_checksum;
+ tmpl->rxq.hw_timestamp =
+ !!dev->data->dev_conf.rxmode.hw_timestamp;
+ /* Configure VLAN stripping. */
+ tmpl->rxq.vlan_strip = (priv->hw_vlan_strip &&
+ !!dev->data->dev_conf.rxmode.hw_vlan_strip);
+ /* By default, FCS (CRC) is stripped by hardware. */
+ if (dev->data->dev_conf.rxmode.hw_strip_crc) {
+ tmpl->rxq.crc_present = 0;
+ } else if (priv->hw_fcs_strip) {
+ tmpl->rxq.crc_present = 1;
+ } else {
+ WARN("%p: CRC stripping has been disabled but will still"
+ " be performed by hardware, make sure MLNX_OFED and"
+ " firmware are up to date",
+ (void *)dev);
+ tmpl->rxq.crc_present = 0;
+ }
+ DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from"
+ " incoming frames to hide it",
+ (void *)dev,
+ tmpl->rxq.crc_present ? "disabled" : "enabled",
+ tmpl->rxq.crc_present << 2);
+ /* Save port ID. */
+ tmpl->rxq.rss_hash = priv->rxqs_n > 1;
+ tmpl->rxq.port_id = dev->data->port_id;
+ tmpl->priv = priv;
+ tmpl->rxq.mp = mp;
+ tmpl->rxq.stats.idx = idx;
+ tmpl->rxq.elts_n = log2above(desc);
+ tmpl->rxq.elts =
+ (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
+ rte_atomic32_inc(&tmpl->refcnt);
+ DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+ (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+ LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
+ return tmpl;
+error:
+ rte_free(tmpl);
+ return NULL;
+}
+
+/**
+ * Get a Rx queue.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * TX queue index.
+ *
+ * @return
+ * A pointer to the queue if it exists.
+ */
+struct mlx5_rxq_ctrl*
+mlx5_priv_rxq_get(struct priv *priv, uint16_t idx)
+{
+ struct mlx5_rxq_ctrl *rxq_ctrl = NULL;
+
+ if ((*priv->rxqs)[idx]) {
+ rxq_ctrl = container_of((*priv->rxqs)[idx],
+ struct mlx5_rxq_ctrl,
+ rxq);
+
+ mlx5_priv_rxq_ibv_get(priv, idx);
+ rte_atomic32_inc(&rxq_ctrl->refcnt);
+ DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+ (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
+ }
+ return rxq_ctrl;
+}
+
+/**
+ * Release a Rx queue.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * TX queue index.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+mlx5_priv_rxq_release(struct priv *priv, uint16_t idx)
+{
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+
+ if (!(*priv->rxqs)[idx])
+ return 0;
+ rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
+ assert(rxq_ctrl->priv);
+ if (rxq_ctrl->ibv) {
+ int ret;
+
+ ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv);
+ if (!ret)
+ rxq_ctrl->ibv = NULL;
+ }
+ DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv,
+ (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt));
+ if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) {
+ LIST_REMOVE(rxq_ctrl, next);
+ rte_free(rxq_ctrl);
+ (*priv->rxqs)[idx] = NULL;
+ return 0;
+ }
+ return EBUSY;
+}
+
+/**
+ * Verify if the queue can be released.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * TX queue index.
+ *
+ * @return
+ * 1 if the queue can be released.
+ */
+int
+mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx)
+{
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+
+ if (!(*priv->rxqs)[idx])
+ return -1;
+ rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq);
+ return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1);
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_rxq_verify(struct priv *priv)
+{
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+ int ret = 0;
+
+ LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
+ DEBUG("%p: Rx Queue %p still referenced", (void *)priv,
+ (void *)rxq_ctrl);
+ ++ret;
+ }
+ return ret;
+}
+
+/**
+ * Create an indirection table.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param queues
+ * Queues entering in the indirection table.
+ * @param queues_n
+ * Number of queues in the array.
+ *
+ * @return
+ * A new indirection table.
+ */
+struct mlx5_ind_table_ibv*
+mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[],
+ uint16_t queues_n)
+{
+ struct mlx5_ind_table_ibv *ind_tbl;
+ const unsigned int wq_n = rte_is_power_of_2(queues_n) ?
+ log2above(queues_n) :
+ log2above(priv->ind_table_max_size);
+ struct ibv_wq *wq[1 << wq_n];
+ unsigned int i;
+ unsigned int j;
+
+ ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) +
+ queues_n * sizeof(uint16_t), 0);
+ if (!ind_tbl)
+ return NULL;
+ for (i = 0; i != queues_n; ++i) {
+ struct mlx5_rxq_ctrl *rxq =
+ mlx5_priv_rxq_get(priv, queues[i]);
+
+ if (!rxq)
+ goto error;
+ wq[i] = rxq->ibv->wq;
+ ind_tbl->queues[i] = queues[i];
+ }
+ ind_tbl->queues_n = queues_n;
+ /* Finalise indirection table. */
+ for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j)
+ wq[i] = wq[j];
+ ind_tbl->ind_table = ibv_create_rwq_ind_table(
+ priv->ctx,
+ &(struct ibv_rwq_ind_table_init_attr){
+ .log_ind_tbl_size = wq_n,
+ .ind_tbl = wq,
+ .comp_mask = 0,
+ });
+ if (!ind_tbl->ind_table)
+ goto error;
+ rte_atomic32_inc(&ind_tbl->refcnt);
+ LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next);
+ DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+ (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+ return ind_tbl;
+error:
+ rte_free(ind_tbl);
+ DEBUG("%p cannot create indirection table", (void *)priv);
+ return NULL;
+}
+
+/**
+ * Get an indirection table.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param queues
+ * Queues entering in the indirection table.
+ * @param queues_n
+ * Number of queues in the array.
+ *
+ * @return
+ * An indirection table if found.
+ */
+struct mlx5_ind_table_ibv*
+mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[],
+ uint16_t queues_n)
+{
+ struct mlx5_ind_table_ibv *ind_tbl;
+
+ LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
+ if ((ind_tbl->queues_n == queues_n) &&
+ (memcmp(ind_tbl->queues, queues,
+ ind_tbl->queues_n * sizeof(ind_tbl->queues[0]))
+ == 0))
+ break;
+ }
+ if (ind_tbl) {
+ unsigned int i;
+
+ rte_atomic32_inc(&ind_tbl->refcnt);
+ DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+ (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+ for (i = 0; i != ind_tbl->queues_n; ++i)
+ mlx5_priv_rxq_get(priv, ind_tbl->queues[i]);
+ }
+ return ind_tbl;
+}
+
+/**
+ * Release an indirection table.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param ind_table
+ * Indirection table to release.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+mlx5_priv_ind_table_ibv_release(struct priv *priv,
+ struct mlx5_ind_table_ibv *ind_tbl)
+{
+ unsigned int i;
+
+ DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv,
+ (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt));
+ if (rte_atomic32_dec_and_test(&ind_tbl->refcnt))
+ claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table));
+ for (i = 0; i != ind_tbl->queues_n; ++i)
+ claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i]));
+ if (!rte_atomic32_read(&ind_tbl->refcnt)) {
+ LIST_REMOVE(ind_tbl, next);
+ rte_free(ind_tbl);
+ return 0;
+ }
+ return EBUSY;
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_ind_table_ibv_verify(struct priv *priv)
+{
+ struct mlx5_ind_table_ibv *ind_tbl;
+ int ret = 0;
+
+ LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) {
+ DEBUG("%p: Verbs indirection table %p still referenced",
+ (void *)priv, (void *)ind_tbl);
+ ++ret;
+ }
+ return ret;
+}
+
+/**
+ * Create an Rx Hash queue.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param rss_key
+ * RSS key for the Rx hash queue.
+ * @param rss_key_len
+ * RSS key length.
+ * @param hash_fields
+ * Verbs protocol hash field to make the RSS on.
+ * @param queues
+ * Queues entering in hash queue. In case of empty hash_fields only the
+ * first queue index will be taken for the indirection table.
+ * @param queues_n
+ * Number of queues.
+ *
+ * @return
+ * An hash Rx queue on success.
+ */
+struct mlx5_hrxq*
+mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
+ uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
+{
+ struct mlx5_hrxq *hrxq;
+ struct mlx5_ind_table_ibv *ind_tbl;
+ struct ibv_qp *qp;
+
+ queues_n = hash_fields ? queues_n : 1;
+ ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
+ if (!ind_tbl)
+ ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n);
+ if (!ind_tbl)
+ return NULL;
+ qp = ibv_create_qp_ex(
+ priv->ctx,
+ &(struct ibv_qp_init_attr_ex){
+ .qp_type = IBV_QPT_RAW_PACKET,
+ .comp_mask =
+ IBV_QP_INIT_ATTR_PD |
+ IBV_QP_INIT_ATTR_IND_TABLE |
+ IBV_QP_INIT_ATTR_RX_HASH,
+ .rx_hash_conf = (struct ibv_rx_hash_conf){
+ .rx_hash_function = IBV_RX_HASH_FUNC_TOEPLITZ,
+ .rx_hash_key_len = rss_key_len,
+ .rx_hash_key = rss_key,
+ .rx_hash_fields_mask = hash_fields,
+ },
+ .rwq_ind_tbl = ind_tbl->ind_table,
+ .pd = priv->pd,
+ });
+ if (!qp)
+ goto error;
+ hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0);
+ if (!hrxq)
+ goto error;
+ hrxq->ind_table = ind_tbl;
+ hrxq->qp = qp;
+ hrxq->rss_key_len = rss_key_len;
+ hrxq->hash_fields = hash_fields;
+ memcpy(hrxq->rss_key, rss_key, rss_key_len);
+ rte_atomic32_inc(&hrxq->refcnt);
+ LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next);
+ DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+ (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+ return hrxq;
+error:
+ mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
+ if (qp)
+ claim_zero(ibv_destroy_qp(qp));
+ return NULL;
+}
+
+/**
+ * Get an Rx Hash queue.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param rss_conf
+ * RSS configuration for the Rx hash queue.
+ * @param queues
+ * Queues entering in hash queue. In case of empty hash_fields only the
+ * first queue index will be taken for the indirection table.
+ * @param queues_n
+ * Number of queues.
+ *
+ * @return
+ * An hash Rx queue on success.
+ */
+struct mlx5_hrxq*
+mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len,
+ uint64_t hash_fields, uint16_t queues[], uint16_t queues_n)
+{
+ struct mlx5_hrxq *hrxq;
+
+ queues_n = hash_fields ? queues_n : 1;
+ LIST_FOREACH(hrxq, &priv->hrxqs, next) {
+ struct mlx5_ind_table_ibv *ind_tbl;
+
+ if (hrxq->rss_key_len != rss_key_len)
+ continue;
+ if (memcmp(hrxq->rss_key, rss_key, rss_key_len))
+ continue;
+ if (hrxq->hash_fields != hash_fields)
+ continue;
+ ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n);
+ if (!ind_tbl)
+ continue;
+ if (ind_tbl != hrxq->ind_table) {
+ mlx5_priv_ind_table_ibv_release(priv, ind_tbl);
+ continue;
+ }
+ rte_atomic32_inc(&hrxq->refcnt);
+ DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+ (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+ return hrxq;
+ }
+ return NULL;
+}
+
+/**
+ * Release the hash Rx queue.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param hrxq
+ * Pointer to Hash Rx queue to release.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq)
+{
+ DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv,
+ (void *)hrxq, rte_atomic32_read(&hrxq->refcnt));
+ if (rte_atomic32_dec_and_test(&hrxq->refcnt)) {
+ claim_zero(ibv_destroy_qp(hrxq->qp));
+ mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table);
+ LIST_REMOVE(hrxq, next);
+ rte_free(hrxq);
+ return 0;
+ }
+ claim_nonzero(mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table));
+ return EBUSY;
+}
+
+/**
+ * Verify the Rx Queue list is empty
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_hrxq_ibv_verify(struct priv *priv)
+{
+ struct mlx5_hrxq *hrxq;
+ int ret = 0;
+
+ LIST_FOREACH(hrxq, &priv->hrxqs, next) {
+ DEBUG("%p: Verbs Hash Rx queue %p still referenced",
+ (void *)priv, (void *)hrxq);
+ ++ret;
+ }
+ return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index b07bcd11..9658b378 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -42,25 +42,17 @@
#pragma GCC diagnostic ignored "-Wpedantic"
#endif
#include <infiniband/verbs.h>
-#include <infiniband/mlx5_hw.h>
-#include <infiniband/arch.h>
+#include <infiniband/mlx5dv.h>
#ifdef PEDANTIC
#pragma GCC diagnostic error "-Wpedantic"
#endif
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_mbuf.h>
#include <rte_mempool.h>
#include <rte_prefetch.h>
#include <rte_common.h>
#include <rte_branch_prediction.h>
#include <rte_ether.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
#include "mlx5.h"
#include "mlx5_utils.h"
@@ -73,11 +65,11 @@ static __rte_always_inline uint32_t
rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe);
static __rte_always_inline int
-mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
+mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
uint16_t cqe_cnt, uint32_t *rss_hash);
static __rte_always_inline uint32_t
-rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe);
+rxq_cq_to_ol_flags(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe);
uint32_t mlx5_ptype_table[] __rte_cache_aligned = {
[0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */
@@ -105,6 +97,8 @@ mlx5_set_ptype_table(void)
* bit[6] = tunneled
* bit[7] = outer_l3_type
*/
+ /* L2 */
+ (*p)[0x00] = RTE_PTYPE_L2_ETHER;
/* L3 */
(*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
RTE_PTYPE_L4_NONFRAG;
@@ -171,29 +165,29 @@ mlx5_set_ptype_table(void)
/* Tunneled - TCP */
(*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_L4_TCP;
+ RTE_PTYPE_INNER_L4_TCP;
(*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_L4_TCP;
+ RTE_PTYPE_INNER_L4_TCP;
(*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_L4_TCP;
+ RTE_PTYPE_INNER_L4_TCP;
(*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_L4_TCP;
+ RTE_PTYPE_INNER_L4_TCP;
/* Tunneled - UDP */
(*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_L4_UDP;
+ RTE_PTYPE_INNER_L4_UDP;
(*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_L4_UDP;
+ RTE_PTYPE_INNER_L4_UDP;
(*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_L4_UDP;
+ RTE_PTYPE_INNER_L4_UDP;
(*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_L4_UDP;
+ RTE_PTYPE_INNER_L4_UDP;
}
/**
@@ -208,7 +202,7 @@ mlx5_set_ptype_table(void)
* Size of tailroom.
*/
static inline size_t
-tx_mlx5_wq_tailroom(struct txq *txq, void *addr)
+tx_mlx5_wq_tailroom(struct mlx5_txq_data *txq, void *addr)
{
size_t tailroom;
tailroom = (uintptr_t)(txq->wqes) +
@@ -266,7 +260,7 @@ mlx5_copy_to_wq(void *dst, const void *src, size_t n,
int
mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
{
- struct txq *txq = tx_queue;
+ struct mlx5_txq_data *txq = tx_queue;
uint16_t used;
mlx5_tx_complete(txq);
@@ -290,7 +284,7 @@ mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
int
mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
{
- struct rxq *rxq = rx_queue;
+ struct mlx5_rxq_data *rxq = rx_queue;
struct rxq_zip *zip = &rxq->zip;
volatile struct mlx5_cqe *cqe;
const unsigned int cqe_n = (1 << rxq->cqe_n);
@@ -313,7 +307,7 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
op_own = cqe->op_own;
if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED)
- n = ntohl(cqe->byte_cnt);
+ n = rte_be_to_cpu_32(cqe->byte_cnt);
else
n = 1;
cq_ci += n;
@@ -342,7 +336,7 @@ mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset)
uint16_t
mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
- struct txq *txq = (struct txq *)dpdk_txq;
+ struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
@@ -413,8 +407,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
#ifdef MLX5_PMD_SOFT_COUNTERS
total_length = length;
#endif
- if (length < (MLX5_WQE_DWORD_SIZE + 2))
+ if (length < (MLX5_WQE_DWORD_SIZE + 2)) {
+ txq->stats.oerrors++;
break;
+ }
/* Update element. */
(*txq->elts)[elts_head & elts_m] = buf;
/* Prefetch next buffer data. */
@@ -441,7 +437,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
/* Replace the Ethernet type by the VLAN if necessary. */
if (buf->ol_flags & PKT_TX_VLAN_PKT) {
- uint32_t vlan = htonl(0x81000000 | buf->vlan_tci);
+ uint32_t vlan = rte_cpu_to_be_32(0x81000000 |
+ buf->vlan_tci);
unsigned int len = 2 * ETHER_ADDR_LEN - 2;
addr += 2;
@@ -461,6 +458,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
length -= pkt_inline_sz;
addr += pkt_inline_sz;
}
+ raw += MLX5_WQE_DWORD_SIZE;
if (txq->tso_en) {
tso = buf->ol_flags & PKT_TX_TCP_SEG;
if (tso) {
@@ -479,7 +477,10 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
tso_header_sz = buf->l2_len + vlan_sz +
buf->l3_len + buf->l4_len;
tso_segsz = buf->tso_segsz;
-
+ if (unlikely(tso_segsz == 0)) {
+ txq->stats.oerrors++;
+ break;
+ }
if (is_tunneled && txq->tunnel_en) {
tso_header_sz += buf->outer_l2_len +
buf->outer_l3_len;
@@ -488,12 +489,13 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
cs_flags |= MLX5_ETH_WQE_L4_CSUM;
}
if (unlikely(tso_header_sz >
- MLX5_MAX_TSO_HEADER))
+ MLX5_MAX_TSO_HEADER)) {
+ txq->stats.oerrors++;
break;
+ }
copy_b = tso_header_sz - pkt_inline_sz;
/* First seg must contain all headers. */
assert(copy_b <= length);
- raw += MLX5_WQE_DWORD_SIZE;
if (copy_b &&
((end - (uintptr_t)raw) > copy_b)) {
uint16_t n = (MLX5_WQE_DS(copy_b) -
@@ -506,19 +508,18 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
(void *)addr, copy_b);
addr += copy_b;
length -= copy_b;
+ /* Include padding for TSO header. */
+ copy_b = MLX5_WQE_DS(copy_b) *
+ MLX5_WQE_DWORD_SIZE;
pkt_inline_sz += copy_b;
- /*
- * Another DWORD will be added
- * in the inline part.
- */
- raw += MLX5_WQE_DS(copy_b) *
- MLX5_WQE_DWORD_SIZE -
- MLX5_WQE_DWORD_SIZE;
+ raw += copy_b;
} else {
/* NOP WQE. */
wqe->ctrl = (rte_v128u32_t){
- htonl(txq->wqe_ci << 8),
- htonl(txq->qp_num_8s | 1),
+ rte_cpu_to_be_32(
+ txq->wqe_ci << 8),
+ rte_cpu_to_be_32(
+ txq->qp_num_8s | 1),
0,
0,
};
@@ -531,19 +532,20 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
}
/* Inline if enough room. */
if (inline_en || tso) {
+ uint32_t inl;
uintptr_t end = (uintptr_t)
(((uintptr_t)txq->wqes) +
(1 << txq->wqe_n) * MLX5_WQE_SIZE);
unsigned int inline_room = max_inline *
RTE_CACHE_LINE_SIZE -
- (pkt_inline_sz - 2);
+ (pkt_inline_sz - 2) -
+ !!tso * sizeof(inl);
uintptr_t addr_end = (addr + inline_room) &
~(RTE_CACHE_LINE_SIZE - 1);
unsigned int copy_b = (addr_end > addr) ?
RTE_MIN((addr_end - addr), length) :
0;
- raw += MLX5_WQE_DWORD_SIZE;
if (copy_b && ((end - (uintptr_t)raw) > copy_b)) {
/*
* One Dseg remains in the current WQE. To
@@ -556,12 +558,8 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
break;
max_wqe -= n;
if (tso) {
- uint32_t inl =
- htonl(copy_b | MLX5_INLINE_SEG);
-
- pkt_inline_sz =
- MLX5_WQE_DS(tso_header_sz) *
- MLX5_WQE_DWORD_SIZE;
+ inl = rte_cpu_to_be_32(copy_b |
+ MLX5_INLINE_SEG);
rte_memcpy((void *)raw,
(void *)&inl, sizeof(inl));
raw += sizeof(inl);
@@ -610,9 +608,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
ds = 3;
use_dseg:
/* Add the remaining packet as a simple ds. */
- naddr = htonll(addr);
+ naddr = rte_cpu_to_be_64(addr);
*dseg = (rte_v128u32_t){
- htonl(length),
+ rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
naddr,
naddr >> 32,
@@ -649,9 +647,9 @@ next_seg:
total_length += length;
#endif
/* Store segment information. */
- naddr = htonll(rte_pktmbuf_mtod(buf, uintptr_t));
+ naddr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t));
*dseg = (rte_v128u32_t){
- htonl(length),
+ rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
naddr,
naddr >> 32,
@@ -664,27 +662,33 @@ next_seg:
else
j += sg;
next_pkt:
+ if (ds > MLX5_DSEG_MAX) {
+ txq->stats.oerrors++;
+ break;
+ }
++elts_head;
++pkts;
++i;
/* Initialize known and common part of the WQE structure. */
if (tso) {
wqe->ctrl = (rte_v128u32_t){
- htonl((txq->wqe_ci << 8) | MLX5_OPCODE_TSO),
- htonl(txq->qp_num_8s | ds),
+ rte_cpu_to_be_32((txq->wqe_ci << 8) |
+ MLX5_OPCODE_TSO),
+ rte_cpu_to_be_32(txq->qp_num_8s | ds),
0,
0,
};
wqe->eseg = (rte_v128u32_t){
0,
- cs_flags | (htons(tso_segsz) << 16),
+ cs_flags | (rte_cpu_to_be_16(tso_segsz) << 16),
0,
- (ehdr << 16) | htons(tso_header_sz),
+ (ehdr << 16) | rte_cpu_to_be_16(tso_header_sz),
};
} else {
wqe->ctrl = (rte_v128u32_t){
- htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND),
- htonl(txq->qp_num_8s | ds),
+ rte_cpu_to_be_32((txq->wqe_ci << 8) |
+ MLX5_OPCODE_SEND),
+ rte_cpu_to_be_32(txq->qp_num_8s | ds),
0,
0,
};
@@ -692,7 +696,7 @@ next_pkt:
0,
cs_flags,
0,
- (ehdr << 16) | htons(pkt_inline_sz),
+ (ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz),
};
}
next_wqe:
@@ -712,7 +716,7 @@ next_wqe:
comp = txq->elts_comp + i + j + k;
if (comp >= MLX5_TX_COMP_THRESH) {
/* Request completion on last WQE. */
- last_wqe->ctrl2 = htonl(8);
+ last_wqe->ctrl2 = rte_cpu_to_be_32(8);
/* Save elts_head in unused "immediate" field of WQE. */
last_wqe->ctrl3 = txq->elts_head;
txq->elts_comp = 0;
@@ -739,7 +743,7 @@ next_wqe:
* Packet length.
*/
static inline void
-mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
+mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length)
{
uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
@@ -751,13 +755,14 @@ mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
mpw->len = length;
mpw->total_len = 0;
mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
- mpw->wqe->eseg.mss = htons(length);
+ mpw->wqe->eseg.mss = rte_cpu_to_be_16(length);
mpw->wqe->eseg.inline_hdr_sz = 0;
mpw->wqe->eseg.rsvd0 = 0;
mpw->wqe->eseg.rsvd1 = 0;
mpw->wqe->eseg.rsvd2 = 0;
- mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
- (txq->wqe_ci << 8) | MLX5_OPCODE_TSO);
+ mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
+ (txq->wqe_ci << 8) |
+ MLX5_OPCODE_TSO);
mpw->wqe->ctrl[2] = 0;
mpw->wqe->ctrl[3] = 0;
mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *)
@@ -778,7 +783,7 @@ mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
* Pointer to MPW session structure.
*/
static inline void
-mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_mpw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
{
unsigned int num = mpw->pkts_n;
@@ -786,7 +791,7 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
* Store size in multiple of 16 bytes. Control and Ethernet segments
* count as 2.
*/
- mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | (2 + num));
+ mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | (2 + num));
mpw->state = MLX5_MPW_STATE_CLOSED;
if (num < 3)
++txq->wqe_ci;
@@ -812,7 +817,7 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
uint16_t
mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
- struct txq *txq = (struct txq *)dpdk_txq;
+ struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
@@ -850,8 +855,10 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (max_elts < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
break;
+ }
max_elts -= segs_n;
--pkts_n;
/* Should we enable HW CKSUM offload */
@@ -893,9 +900,9 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
dseg = mpw.data.dseg[mpw.pkts_n];
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
- .byte_count = htonl(DATA_LEN(buf)),
+ .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)),
.lkey = mlx5_tx_mb2mr(txq, buf),
- .addr = htonll(addr),
+ .addr = rte_cpu_to_be_64(addr),
};
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
length += DATA_LEN(buf);
@@ -923,7 +930,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
volatile struct mlx5_wqe *wqe = mpw.wqe;
/* Request completion on last WQE. */
- wqe->ctrl[2] = htonl(8);
+ wqe->ctrl[2] = rte_cpu_to_be_32(8);
/* Save elts_head in unused "immediate" field of WQE. */
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
@@ -953,7 +960,8 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
* Packet length.
*/
static inline void
-mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
+mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw,
+ uint32_t length)
{
uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
struct mlx5_wqe_inl_small *inl;
@@ -963,12 +971,12 @@ mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
mpw->len = length;
mpw->total_len = 0;
mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
- mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
- (txq->wqe_ci << 8) |
- MLX5_OPCODE_TSO);
+ mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
+ (txq->wqe_ci << 8) |
+ MLX5_OPCODE_TSO);
mpw->wqe->ctrl[2] = 0;
mpw->wqe->ctrl[3] = 0;
- mpw->wqe->eseg.mss = htons(length);
+ mpw->wqe->eseg.mss = rte_cpu_to_be_16(length);
mpw->wqe->eseg.inline_hdr_sz = 0;
mpw->wqe->eseg.cs_flags = 0;
mpw->wqe->eseg.rsvd0 = 0;
@@ -988,7 +996,7 @@ mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
* Pointer to MPW session structure.
*/
static inline void
-mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_mpw_inline_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
{
unsigned int size;
struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *)
@@ -999,9 +1007,10 @@ mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw)
* Store size in multiple of 16 bytes. Control and Ethernet segments
* count as 2.
*/
- mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(size));
+ mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s |
+ MLX5_WQE_DS(size));
mpw->state = MLX5_MPW_STATE_CLOSED;
- inl->byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG);
+ inl->byte_cnt = rte_cpu_to_be_32(mpw->total_len | MLX5_INLINE_SEG);
txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;
}
@@ -1022,7 +1031,7 @@ uint16_t
mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n)
{
- struct txq *txq = (struct txq *)dpdk_txq;
+ struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
@@ -1071,8 +1080,10 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
if (max_elts < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
break;
+ }
max_elts -= segs_n;
--pkts_n;
/*
@@ -1139,9 +1150,10 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
dseg = mpw.data.dseg[mpw.pkts_n];
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
- .byte_count = htonl(DATA_LEN(buf)),
+ .byte_count =
+ rte_cpu_to_be_32(DATA_LEN(buf)),
.lkey = mlx5_tx_mb2mr(txq, buf),
- .addr = htonll(addr),
+ .addr = rte_cpu_to_be_64(addr),
};
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
length += DATA_LEN(buf);
@@ -1213,7 +1225,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
volatile struct mlx5_wqe *wqe = mpw.wqe;
/* Request completion on last WQE. */
- wqe->ctrl[2] = htonl(8);
+ wqe->ctrl[2] = rte_cpu_to_be_32(8);
/* Save elts_head in unused "immediate" field of WQE. */
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
@@ -1245,7 +1257,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
* Packet length.
*/
static inline void
-mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
+mlx5_empw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, int padding)
{
uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1);
@@ -1253,9 +1265,10 @@ mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
mpw->pkts_n = 0;
mpw->total_len = sizeof(struct mlx5_wqe);
mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx);
- mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_ENHANCED_MPSW << 24) |
- (txq->wqe_ci << 8) |
- MLX5_OPCODE_ENHANCED_MPSW);
+ mpw->wqe->ctrl[0] =
+ rte_cpu_to_be_32((MLX5_OPC_MOD_ENHANCED_MPSW << 24) |
+ (txq->wqe_ci << 8) |
+ MLX5_OPCODE_ENHANCED_MPSW);
mpw->wqe->ctrl[2] = 0;
mpw->wqe->ctrl[3] = 0;
memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE);
@@ -1263,9 +1276,9 @@ mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
uintptr_t addr = (uintptr_t)(mpw->wqe + 1);
/* Pad the first 2 DWORDs with zero-length inline header. */
- *(volatile uint32_t *)addr = htonl(MLX5_INLINE_SEG);
+ *(volatile uint32_t *)addr = rte_cpu_to_be_32(MLX5_INLINE_SEG);
*(volatile uint32_t *)(addr + MLX5_WQE_DWORD_SIZE) =
- htonl(MLX5_INLINE_SEG);
+ rte_cpu_to_be_32(MLX5_INLINE_SEG);
mpw->total_len += 2 * MLX5_WQE_DWORD_SIZE;
/* Start from the next WQEBB. */
mpw->data.raw = (volatile void *)(tx_mlx5_wqe(txq, idx + 1));
@@ -1286,14 +1299,15 @@ mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding)
* Number of consumed WQEs.
*/
static inline uint16_t
-mlx5_empw_close(struct txq *txq, struct mlx5_mpw *mpw)
+mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw)
{
uint16_t ret;
/* Store size in multiple of 16 bytes. Control and Ethernet segments
* count as 2.
*/
- mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(mpw->total_len));
+ mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s |
+ MLX5_WQE_DS(mpw->total_len));
mpw->state = MLX5_MPW_STATE_CLOSED;
ret = (mpw->total_len + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;
txq->wqe_ci += ret;
@@ -1316,7 +1330,7 @@ mlx5_empw_close(struct txq *txq, struct mlx5_mpw *mpw)
uint16_t
mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
- struct txq *txq = (struct txq *)dpdk_txq;
+ struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
uint16_t elts_head = txq->elts_head;
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
@@ -1360,8 +1374,10 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (max_elts - j < segs_n)
break;
/* Do not bother with large packets MPW cannot handle. */
- if (segs_n > MLX5_MPW_DSEG_MAX)
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
break;
+ }
/* Should we enable HW CKSUM offload. */
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
@@ -1446,9 +1462,10 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
dseg = mpw.data.dseg[mpw.pkts_n];
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
- .byte_count = htonl(DATA_LEN(buf)),
+ .byte_count = rte_cpu_to_be_32(
+ DATA_LEN(buf)),
.lkey = mlx5_tx_mb2mr(txq, buf),
- .addr = htonll(addr),
+ .addr = rte_cpu_to_be_64(addr),
};
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
length += DATA_LEN(buf);
@@ -1471,7 +1488,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED);
assert(length == DATA_LEN(buf));
- inl_hdr = htonl(length | MLX5_INLINE_SEG);
+ inl_hdr = rte_cpu_to_be_32(length | MLX5_INLINE_SEG);
addr = rte_pktmbuf_mtod(buf, uintptr_t);
mpw.data.raw = (volatile void *)
((uintptr_t)mpw.data.raw + inl_pad);
@@ -1527,9 +1544,9 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
for (n = 0; n * RTE_CACHE_LINE_SIZE < length; n++)
rte_prefetch2((void *)(addr +
n * RTE_CACHE_LINE_SIZE));
- naddr = htonll(addr);
+ naddr = rte_cpu_to_be_64(addr);
*dseg = (rte_v128u32_t) {
- htonl(length),
+ rte_cpu_to_be_32(length),
mlx5_tx_mb2mr(txq, buf),
naddr,
naddr >> 32,
@@ -1557,7 +1574,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
volatile struct mlx5_wqe *wqe = mpw.wqe;
/* Request completion on last WQE. */
- wqe->ctrl[2] = htonl(8);
+ wqe->ctrl[2] = rte_cpu_to_be_32(8);
/* Save elts_head in unused "immediate" field of WQE. */
wqe->ctrl[3] = elts_head;
txq->elts_comp = 0;
@@ -1627,7 +1644,7 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe)
* with error.
*/
static inline int
-mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
+mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe,
uint16_t cqe_cnt, uint32_t *rss_hash)
{
struct rxq_zip *zip = &rxq->zip;
@@ -1641,8 +1658,8 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
(volatile struct mlx5_mini_cqe8 (*)[8])
(uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].pkt_info);
- len = ntohl((*mc)[zip->ai & 7].byte_cnt);
- *rss_hash = ntohl((*mc)[zip->ai & 7].rx_hash_result);
+ len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt);
+ *rss_hash = rte_be_to_cpu_32((*mc)[zip->ai & 7].rx_hash_result);
if ((++zip->ai & 7) == 0) {
/* Invalidate consumed CQEs */
idx = zip->ca;
@@ -1690,7 +1707,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
cqe_cnt].pkt_info);
/* Fix endianness. */
- zip->cqe_cnt = ntohl(cqe->byte_cnt);
+ zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt);
/*
* Current mini array position is the one returned by
* check_cqe64().
@@ -1705,8 +1722,8 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
--rxq->cq_ci;
zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
/* Get packet size to return. */
- len = ntohl((*mc)[0].byte_cnt);
- *rss_hash = ntohl((*mc)[0].rx_hash_result);
+ len = rte_be_to_cpu_32((*mc)[0].byte_cnt);
+ *rss_hash = rte_be_to_cpu_32((*mc)[0].rx_hash_result);
zip->ai = 1;
/* Prefetch all the entries to be invalidated */
idx = zip->ca;
@@ -1716,8 +1733,8 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
++idx;
}
} else {
- len = ntohl(cqe->byte_cnt);
- *rss_hash = ntohl(cqe->rx_hash_res);
+ len = rte_be_to_cpu_32(cqe->byte_cnt);
+ *rss_hash = rte_be_to_cpu_32(cqe->rx_hash_res);
}
/* Error while receiving packet. */
if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR))
@@ -1738,10 +1755,10 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
* Offload flags (ol_flags) for struct rte_mbuf.
*/
static inline uint32_t
-rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
+rxq_cq_to_ol_flags(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe)
{
uint32_t ol_flags = 0;
- uint16_t flags = ntohs(cqe->hdr_type_etc);
+ uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc);
ol_flags =
TRANSPOSE(flags,
@@ -1777,7 +1794,7 @@ rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe)
uint16_t
mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
- struct rxq *rxq = dpdk_rxq;
+ struct mlx5_rxq_data *rxq = dpdk_rxq;
const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
const unsigned int sges_n = rxq->sges_n;
@@ -1848,7 +1865,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) {
pkt->ol_flags |= PKT_RX_FDIR;
if (cqe->sop_drop_qpn !=
- htonl(MLX5_FLOW_MARK_DEFAULT)) {
+ rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) {
uint32_t mark = cqe->sop_drop_qpn;
pkt->ol_flags |= PKT_RX_FDIR_ID;
@@ -1860,10 +1877,16 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
pkt->ol_flags |= rxq_cq_to_ol_flags(rxq, cqe);
if (rxq->vlan_strip &&
(cqe->hdr_type_etc &
- htons(MLX5_CQE_VLAN_STRIPPED))) {
- pkt->ol_flags |= PKT_RX_VLAN_PKT |
+ rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) {
+ pkt->ol_flags |= PKT_RX_VLAN |
PKT_RX_VLAN_STRIPPED;
- pkt->vlan_tci = ntohs(cqe->vlan_info);
+ pkt->vlan_tci =
+ rte_be_to_cpu_16(cqe->vlan_info);
+ }
+ if (rxq->hw_timestamp) {
+ pkt->timestamp =
+ rte_be_to_cpu_64(cqe->timestamp);
+ pkt->ol_flags |= PKT_RX_TIMESTAMP;
}
if (rxq->crc_present)
len -= ETHER_CRC_LEN;
@@ -1879,7 +1902,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
* of the buffers are already known, only the buffer address
* changes.
*/
- wqe->addr = htonll(rte_pktmbuf_mtod(rep, uintptr_t));
+ wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t));
if (len > DATA_LEN(seg)) {
len -= DATA_LEN(seg);
++NB_SEGS(pkt);
@@ -1906,10 +1929,10 @@ skip:
return 0;
/* Update the consumer index. */
rxq->rq_ci = rq_ci >> sges_n;
- rte_wmb();
- *rxq->cq_db = htonl(rxq->cq_ci);
- rte_wmb();
- *rxq->rq_db = htonl(rxq->rq_ci);
+ rte_io_wmb();
+ *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+ rte_io_wmb();
+ *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Increment packets counter. */
rxq->stats.ipackets += i;
@@ -2016,7 +2039,7 @@ priv_check_vec_tx_support(struct priv *priv)
}
int __attribute__((weak))
-rxq_check_vec_support(struct rxq *rxq)
+rxq_check_vec_support(struct mlx5_rxq_data *rxq)
{
(void)rxq;
return -ENOTSUP;
@@ -2028,9 +2051,3 @@ priv_check_vec_rx_support(struct priv *priv)
(void)priv;
return -ENOTSUP;
}
-
-void __attribute__((weak))
-priv_prep_vec_rx_function(struct priv *priv)
-{
- (void)priv;
-}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 7de1d108..d34f3cc0 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -36,6 +36,7 @@
#include <stddef.h>
#include <stdint.h>
+#include <sys/queue.h>
/* Verbs header. */
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -43,21 +44,16 @@
#pragma GCC diagnostic ignored "-Wpedantic"
#endif
#include <infiniband/verbs.h>
-#include <infiniband/mlx5_hw.h>
+#include <infiniband/mlx5dv.h>
#ifdef PEDANTIC
#pragma GCC diagnostic error "-Wpedantic"
#endif
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_mbuf.h>
#include <rte_mempool.h>
#include <rte_common.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
+#include <rte_hexdump.h>
+#include <rte_atomic.h>
#include "mlx5_utils.h"
#include "mlx5.h"
@@ -81,19 +77,22 @@ struct mlx5_txq_stats {
uint64_t opackets; /**< Total of successfully sent packets. */
uint64_t obytes; /**< Total of successfully sent bytes. */
#endif
- uint64_t odropped; /**< Total of packets not sent when TX ring full. */
-};
-
-/* Flow director queue structure. */
-struct fdir_queue {
- struct ibv_qp *qp; /* Associated RX QP. */
- struct ibv_exp_rwq_ind_table *ind_table; /* Indirection table. */
- struct ibv_exp_wq *wq; /* Work queue. */
- struct ibv_cq *cq; /* Completion queue. */
+ uint64_t oerrors; /**< Total number of failed transmitted packets. */
};
struct priv;
+/* Memory region queue object. */
+struct mlx5_mr {
+ LIST_ENTRY(mlx5_mr) next; /**< Pointer to the next element. */
+ rte_atomic32_t refcnt; /*<< Reference counter. */
+ uint32_t lkey; /*<< rte_cpu_to_be_32(mr->lkey) */
+ uintptr_t start; /* Start address of MR */
+ uintptr_t end; /* End address of MR */
+ struct ibv_mr *mr; /*<< Memory Region. */
+ struct rte_mempool *mp; /*<< Memory Pool. */
+};
+
/* Compressed CQE context. */
struct rxq_zip {
uint16_t ai; /* Array index. */
@@ -104,22 +103,22 @@ struct rxq_zip {
};
/* RX queue descriptor. */
-struct rxq {
+struct mlx5_rxq_data {
unsigned int csum:1; /* Enable checksum offloading. */
unsigned int csum_l2tun:1; /* Same for L2 tunnels. */
+ unsigned int hw_timestamp:1; /* Enable HW timestamp. */
unsigned int vlan_strip:1; /* Enable VLAN stripping. */
unsigned int crc_present:1; /* CRC must be subtracted. */
unsigned int sges_n:2; /* Log 2 of SGEs (max buffers per packet). */
unsigned int cqe_n:4; /* Log 2 of CQ elements. */
unsigned int elts_n:4; /* Log 2 of Mbufs. */
- unsigned int port_id:8;
unsigned int rss_hash:1; /* RSS hash result is enabled. */
unsigned int mark:1; /* Marked flow available on the queue. */
unsigned int pending_err:1; /* CQE error needs to be handled. */
- unsigned int trim_elts:1; /* Whether elts needs clean-up. */
- unsigned int :6; /* Remaining bits. */
+ unsigned int :14; /* Remaining bits. */
volatile uint32_t *rq_db;
volatile uint32_t *cq_db;
+ uint16_t port_id;
uint16_t rq_ci;
uint16_t rq_pi;
uint16_t cq_ci;
@@ -131,122 +130,56 @@ struct rxq {
struct mlx5_rxq_stats stats;
uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
+ void *cq_uar; /* CQ user access region. */
+ uint32_t cqn; /* CQ number. */
+ uint8_t cq_arm_sn; /* CQ arm seq number. */
} __rte_cache_aligned;
-/* RX queue control descriptor. */
-struct rxq_ctrl {
- struct priv *priv; /* Back pointer to private data. */
+/* Verbs Rx queue elements. */
+struct mlx5_rxq_ibv {
+ LIST_ENTRY(mlx5_rxq_ibv) next; /* Pointer to the next element. */
+ rte_atomic32_t refcnt; /* Reference counter. */
+ struct mlx5_rxq_ctrl *rxq_ctrl; /* Back pointer to parent. */
struct ibv_cq *cq; /* Completion Queue. */
- struct ibv_exp_wq *wq; /* Work Queue. */
- struct fdir_queue *fdir_queue; /* Flow director queue. */
- struct ibv_mr *mr; /* Memory Region (for mp). */
+ struct ibv_wq *wq; /* Work Queue. */
struct ibv_comp_channel *channel;
- unsigned int socket; /* CPU socket ID for allocations. */
- struct rxq rxq; /* Data path structure. */
-};
-
-/* Hash RX queue types. */
-enum hash_rxq_type {
- HASH_RXQ_TCPV4,
- HASH_RXQ_UDPV4,
- HASH_RXQ_IPV4,
- HASH_RXQ_TCPV6,
- HASH_RXQ_UDPV6,
- HASH_RXQ_IPV6,
- HASH_RXQ_ETH,
-};
-
-/* Flow structure with Ethernet specification. It is packed to prevent padding
- * between attr and spec as this layout is expected by libibverbs. */
-struct flow_attr_spec_eth {
- struct ibv_exp_flow_attr attr;
- struct ibv_exp_flow_spec_eth spec;
-} __attribute__((packed));
-
-/* Define a struct flow_attr_spec_eth object as an array of at least
- * "size" bytes. Room after the first index is normally used to store
- * extra flow specifications. */
-#define FLOW_ATTR_SPEC_ETH(name, size) \
- struct flow_attr_spec_eth name \
- [((size) / sizeof(struct flow_attr_spec_eth)) + \
- !!((size) % sizeof(struct flow_attr_spec_eth))]
-
-/* Initialization data for hash RX queue. */
-struct hash_rxq_init {
- uint64_t hash_fields; /* Fields that participate in the hash. */
- uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
- unsigned int flow_priority; /* Flow priority to use. */
- union {
- struct {
- enum ibv_exp_flow_spec_type type;
- uint16_t size;
- } hdr;
- struct ibv_exp_flow_spec_tcp_udp tcp_udp;
- struct ibv_exp_flow_spec_ipv4 ipv4;
- struct ibv_exp_flow_spec_ipv6 ipv6;
- struct ibv_exp_flow_spec_eth eth;
- } flow_spec; /* Flow specification template. */
- const struct hash_rxq_init *underlayer; /* Pointer to underlayer. */
+ struct mlx5_mr *mr; /* Memory Region (for mp). */
};
-/* Initialization data for indirection table. */
-struct ind_table_init {
- unsigned int max_size; /* Maximum number of WQs. */
- /* Hash RX queues using this table. */
- unsigned int hash_types;
- unsigned int hash_types_n;
+/* RX queue control descriptor. */
+struct mlx5_rxq_ctrl {
+ LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
+ rte_atomic32_t refcnt; /* Reference counter. */
+ struct priv *priv; /* Back pointer to private data. */
+ struct mlx5_rxq_ibv *ibv; /* Verbs elements. */
+ struct mlx5_rxq_data rxq; /* Data path structure. */
+ unsigned int socket; /* CPU socket ID for allocations. */
+ unsigned int irq:1; /* Whether IRQ is enabled. */
};
-/* Initialization data for special flows. */
-struct special_flow_init {
- uint8_t dst_mac_val[6];
- uint8_t dst_mac_mask[6];
- unsigned int hash_types;
- unsigned int per_vlan:1;
+/* Indirection table. */
+struct mlx5_ind_table_ibv {
+ LIST_ENTRY(mlx5_ind_table_ibv) next; /* Pointer to the next element. */
+ rte_atomic32_t refcnt; /* Reference counter. */
+ struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
+ uint16_t queues_n; /**< Number of queues in the list. */
+ uint16_t queues[]; /**< Queue list. */
};
-enum hash_rxq_flow_type {
- HASH_RXQ_FLOW_TYPE_PROMISC,
- HASH_RXQ_FLOW_TYPE_ALLMULTI,
- HASH_RXQ_FLOW_TYPE_BROADCAST,
- HASH_RXQ_FLOW_TYPE_IPV6MULTI,
- HASH_RXQ_FLOW_TYPE_MAC,
-};
-
-#ifndef NDEBUG
-static inline const char *
-hash_rxq_flow_type_str(enum hash_rxq_flow_type flow_type)
-{
- switch (flow_type) {
- case HASH_RXQ_FLOW_TYPE_PROMISC:
- return "promiscuous";
- case HASH_RXQ_FLOW_TYPE_ALLMULTI:
- return "allmulticast";
- case HASH_RXQ_FLOW_TYPE_BROADCAST:
- return "broadcast";
- case HASH_RXQ_FLOW_TYPE_IPV6MULTI:
- return "IPv6 multicast";
- case HASH_RXQ_FLOW_TYPE_MAC:
- return "MAC";
- }
- return NULL;
-}
-#endif /* NDEBUG */
-
-struct hash_rxq {
- struct priv *priv; /* Back pointer to private data. */
- struct ibv_qp *qp; /* Hash RX QP. */
- enum hash_rxq_type type; /* Hash RX queue type. */
- /* MAC flow steering rules, one per VLAN ID. */
- struct ibv_exp_flow *mac_flow
- [MLX5_MAX_MAC_ADDRESSES][MLX5_MAX_VLAN_IDS];
- struct ibv_exp_flow *special_flow
- [MLX5_MAX_SPECIAL_FLOWS][MLX5_MAX_VLAN_IDS];
+/* Hash Rx queue. */
+struct mlx5_hrxq {
+ LIST_ENTRY(mlx5_hrxq) next; /* Pointer to the next element. */
+ rte_atomic32_t refcnt; /* Reference counter. */
+ struct mlx5_ind_table_ibv *ind_table; /* Indirection table. */
+ struct ibv_qp *qp; /* Verbs queue pair. */
+ uint64_t hash_fields; /* Verbs Hash fields. */
+ uint8_t rss_key_len; /* Hash key length in bytes. */
+ uint8_t rss_key[]; /* Hash key. */
};
/* TX queue descriptor. */
__extension__
-struct txq {
+struct mlx5_txq_data {
uint16_t elts_head; /* Current counter in (*elts)[]. */
uint16_t elts_tail; /* Counter of first element awaiting completion. */
uint16_t elts_comp; /* Counter since last completion request. */
@@ -265,6 +198,7 @@ struct txq {
uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
uint16_t inline_max_packet_sz; /* Max packet size for inlining. */
+ uint16_t mr_cache_idx; /* Index of last hit entry. */
uint32_t qp_num_8s; /* QP number shifted by 8. */
uint32_t flags; /* Flags for Tx Queue. */
volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
@@ -272,61 +206,92 @@ struct txq {
volatile uint32_t *qp_db; /* Work queue doorbell. */
volatile uint32_t *cq_db; /* Completion queue doorbell. */
volatile void *bf_reg; /* Blueflame register. */
- struct {
- uintptr_t start; /* Start address of MR */
- uintptr_t end; /* End address of MR */
- struct ibv_mr *mr; /* Memory Region (for mp). */
- uint32_t lkey; /* htonl(mr->lkey) */
- } mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
- uint16_t mr_cache_idx; /* Index of last hit entry. */
+ struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */
struct rte_mbuf *(*elts)[]; /* TX elements. */
struct mlx5_txq_stats stats; /* TX queue counters. */
} __rte_cache_aligned;
-/* TX queue control descriptor. */
-struct txq_ctrl {
- struct priv *priv; /* Back pointer to private data. */
+/* Verbs Rx queue elements. */
+struct mlx5_txq_ibv {
+ LIST_ENTRY(mlx5_txq_ibv) next; /* Pointer to the next element. */
+ rte_atomic32_t refcnt; /* Reference counter. */
struct ibv_cq *cq; /* Completion Queue. */
struct ibv_qp *qp; /* Queue Pair. */
+};
+
+/* TX queue control descriptor. */
+struct mlx5_txq_ctrl {
+ LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */
+ rte_atomic32_t refcnt; /* Reference counter. */
+ struct priv *priv; /* Back pointer to private data. */
unsigned int socket; /* CPU socket ID for allocations. */
- struct txq txq; /* Data path structure. */
+ unsigned int max_inline_data; /* Max inline data. */
+ unsigned int max_tso_header; /* Max TSO header size. */
+ struct mlx5_txq_ibv *ibv; /* Verbs queue object. */
+ struct mlx5_txq_data txq; /* Data path structure. */
+ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */
};
/* mlx5_rxq.c */
-extern const struct hash_rxq_init hash_rxq_init[];
-extern const unsigned int hash_rxq_init_n;
-
extern uint8_t rss_hash_default_key[];
extern const size_t rss_hash_default_key_len;
-size_t priv_flow_attr(struct priv *, struct ibv_exp_flow_attr *,
- size_t, enum hash_rxq_type);
-int priv_create_hash_rxqs(struct priv *);
-void priv_destroy_hash_rxqs(struct priv *);
-int priv_allow_flow_type(struct priv *, enum hash_rxq_flow_type);
-int priv_rehash_flows(struct priv *);
-void rxq_cleanup(struct rxq_ctrl *);
+void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *);
int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
const struct rte_eth_rxconf *, struct rte_mempool *);
void mlx5_rx_queue_release(void *);
-uint16_t mlx5_rx_burst_secondary_setup(void *, struct rte_mbuf **, uint16_t);
int priv_rx_intr_vec_enable(struct priv *priv);
void priv_rx_intr_vec_disable(struct priv *priv);
-#ifdef HAVE_UPDATE_CQ_CI
int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id);
-#endif /* HAVE_UPDATE_CQ_CI */
+struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_new(struct priv *, uint16_t);
+struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_get(struct priv *, uint16_t);
+int mlx5_priv_rxq_ibv_release(struct priv *, struct mlx5_rxq_ibv *);
+int mlx5_priv_rxq_ibv_releasable(struct priv *, struct mlx5_rxq_ibv *);
+int mlx5_priv_rxq_ibv_verify(struct priv *);
+struct mlx5_rxq_ctrl *mlx5_priv_rxq_new(struct priv *, uint16_t,
+ uint16_t, unsigned int,
+ struct rte_mempool *);
+struct mlx5_rxq_ctrl *mlx5_priv_rxq_get(struct priv *, uint16_t);
+int mlx5_priv_rxq_release(struct priv *, uint16_t);
+int mlx5_priv_rxq_releasable(struct priv *, uint16_t);
+int mlx5_priv_rxq_verify(struct priv *);
+int rxq_alloc_elts(struct mlx5_rxq_ctrl *);
+struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_new(struct priv *,
+ uint16_t [],
+ uint16_t);
+struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_get(struct priv *,
+ uint16_t [],
+ uint16_t);
+int mlx5_priv_ind_table_ibv_release(struct priv *, struct mlx5_ind_table_ibv *);
+int mlx5_priv_ind_table_ibv_verify(struct priv *);
+struct mlx5_hrxq *mlx5_priv_hrxq_new(struct priv *, uint8_t *, uint8_t,
+ uint64_t, uint16_t [], uint16_t);
+struct mlx5_hrxq *mlx5_priv_hrxq_get(struct priv *, uint8_t *, uint8_t,
+ uint64_t, uint16_t [], uint16_t);
+int mlx5_priv_hrxq_release(struct priv *, struct mlx5_hrxq *);
+int mlx5_priv_hrxq_ibv_verify(struct priv *);
/* mlx5_txq.c */
-void txq_cleanup(struct txq_ctrl *);
-int txq_ctrl_setup(struct rte_eth_dev *, struct txq_ctrl *, uint16_t,
- unsigned int, const struct rte_eth_txconf *);
int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int,
const struct rte_eth_txconf *);
void mlx5_tx_queue_release(void *);
-uint16_t mlx5_tx_burst_secondary_setup(void *, struct rte_mbuf **, uint16_t);
+int priv_tx_uar_remap(struct priv *priv, int fd);
+struct mlx5_txq_ibv *mlx5_priv_txq_ibv_new(struct priv *, uint16_t);
+struct mlx5_txq_ibv *mlx5_priv_txq_ibv_get(struct priv *, uint16_t);
+int mlx5_priv_txq_ibv_release(struct priv *, struct mlx5_txq_ibv *);
+int mlx5_priv_txq_ibv_releasable(struct priv *, struct mlx5_txq_ibv *);
+int mlx5_priv_txq_ibv_verify(struct priv *);
+struct mlx5_txq_ctrl *mlx5_priv_txq_new(struct priv *, uint16_t,
+ uint16_t, unsigned int,
+ const struct rte_eth_txconf *);
+struct mlx5_txq_ctrl *mlx5_priv_txq_get(struct priv *, uint16_t);
+int mlx5_priv_txq_release(struct priv *, uint16_t);
+int mlx5_priv_txq_releasable(struct priv *, uint16_t);
+int mlx5_priv_txq_verify(struct priv *);
+void txq_alloc_elts(struct mlx5_txq_ctrl *);
/* mlx5_rxtx.c */
@@ -346,18 +311,19 @@ int mlx5_tx_descriptor_status(void *, uint16_t);
/* Vectorized version of mlx5_rxtx.c */
int priv_check_raw_vec_tx_support(struct priv *);
int priv_check_vec_tx_support(struct priv *);
-int rxq_check_vec_support(struct rxq *);
+int rxq_check_vec_support(struct mlx5_rxq_data *);
int priv_check_vec_rx_support(struct priv *);
-void priv_prep_vec_rx_function(struct priv *);
uint16_t mlx5_tx_burst_raw_vec(void *, struct rte_mbuf **, uint16_t);
uint16_t mlx5_tx_burst_vec(void *, struct rte_mbuf **, uint16_t);
uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
/* mlx5_mr.c */
-struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *);
-void txq_mp2mr_iter(struct rte_mempool *, void *);
-uint32_t txq_mp2mr_reg(struct txq *, struct rte_mempool *, unsigned int);
+void mlx5_mp2mr_iter(struct rte_mempool *, void *);
+struct mlx5_mr *priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *,
+ struct rte_mempool *, unsigned int);
+struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *,
+ unsigned int);
#ifndef NDEBUG
/**
@@ -419,16 +385,24 @@ check_cqe(volatile struct mlx5_cqe *cqe,
if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) ||
(syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR))
return 0;
- if (!check_cqe_seen(cqe))
+ if (!check_cqe_seen(cqe)) {
ERROR("unexpected CQE error %u (0x%02x)"
" syndrome 0x%02x",
op_code, op_code, syndrome);
+ rte_hexdump(stderr, "MLX5 Error CQE:",
+ (const void *)((uintptr_t)err_cqe),
+ sizeof(*err_cqe));
+ }
return 1;
} else if ((op_code != MLX5_CQE_RESP_SEND) &&
(op_code != MLX5_CQE_REQ)) {
- if (!check_cqe_seen(cqe))
+ if (!check_cqe_seen(cqe)) {
ERROR("unexpected CQE opcode %u (0x%02x)",
op_code, op_code);
+ rte_hexdump(stderr, "MLX5 CQE:",
+ (const void *)((uintptr_t)cqe),
+ sizeof(*cqe));
+ }
return 1;
}
#endif /* NDEBUG */
@@ -447,7 +421,7 @@ check_cqe(volatile struct mlx5_cqe *cqe,
* WQE address.
*/
static inline uintptr_t *
-tx_mlx5_wqe(struct txq *txq, uint16_t ci)
+tx_mlx5_wqe(struct mlx5_txq_data *txq, uint16_t ci)
{
ci &= ((1 << txq->wqe_n) - 1);
return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
@@ -462,7 +436,7 @@ tx_mlx5_wqe(struct txq *txq, uint16_t ci)
* Pointer to TX queue structure.
*/
static __rte_always_inline void
-mlx5_tx_complete(struct txq *txq)
+mlx5_tx_complete(struct mlx5_txq_data *txq)
{
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
@@ -483,13 +457,18 @@ mlx5_tx_complete(struct txq *txq)
#ifndef NDEBUG
if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
(MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
- if (!check_cqe_seen(cqe))
+ if (!check_cqe_seen(cqe)) {
ERROR("unexpected error CQE, TX stopped");
+ rte_hexdump(stderr, "MLX5 TXQ:",
+ (const void *)((uintptr_t)txq->wqes),
+ ((1 << txq->wqe_n) *
+ MLX5_WQE_SIZE));
+ }
return;
}
#endif /* NDEBUG */
++cq_ci;
- txq->wqe_pi = ntohs(cqe->wqe_counter);
+ txq->wqe_pi = rte_be_to_cpu_16(cqe->wqe_counter);
ctrl = (volatile struct mlx5_wqe_ctrl *)
tx_mlx5_wqe(txq, txq->wqe_pi);
elts_tail = ctrl->ctrl3;
@@ -526,8 +505,8 @@ mlx5_tx_complete(struct txq *txq)
txq->cq_ci = cq_ci;
txq->elts_tail = elts_tail;
/* Update the consumer index. */
- rte_wmb();
- *txq->cq_db = htonl(cq_ci);
+ rte_compiler_barrier();
+ *txq->cq_db = rte_cpu_to_be_32(cq_ci);
}
/**
@@ -562,51 +541,80 @@ mlx5_tx_mb2mp(struct rte_mbuf *buf)
* mr->lkey on success, (uint32_t)-1 on failure.
*/
static __rte_always_inline uint32_t
-mlx5_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
+mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
{
uint16_t i = txq->mr_cache_idx;
uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
+ struct mlx5_mr *mr;
assert(i < RTE_DIM(txq->mp2mr));
- if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr))
- return txq->mp2mr[i].lkey;
+ if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr))
+ return txq->mp2mr[i]->lkey;
for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
- if (unlikely(txq->mp2mr[i].mr == NULL)) {
+ if (unlikely(txq->mp2mr[i]->mr == NULL)) {
/* Unknown MP, add a new MR for it. */
break;
}
- if (txq->mp2mr[i].start <= addr &&
- txq->mp2mr[i].end >= addr) {
- assert(txq->mp2mr[i].lkey != (uint32_t)-1);
- assert(htonl(txq->mp2mr[i].mr->lkey) ==
- txq->mp2mr[i].lkey);
+ if (txq->mp2mr[i]->start <= addr &&
+ txq->mp2mr[i]->end >= addr) {
+ assert(txq->mp2mr[i]->lkey != (uint32_t)-1);
+ assert(rte_cpu_to_be_32(txq->mp2mr[i]->mr->lkey) ==
+ txq->mp2mr[i]->lkey);
txq->mr_cache_idx = i;
- return txq->mp2mr[i].lkey;
+ return txq->mp2mr[i]->lkey;
}
}
txq->mr_cache_idx = 0;
- return txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
+ mr = mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
+ /*
+ * Request the reference to use in this queue, the original one is
+ * kept by the control plane.
+ */
+ if (mr) {
+ rte_atomic32_inc(&mr->refcnt);
+ return mr->lkey;
+ }
+ return (uint32_t)-1;
}
/**
- * Ring TX queue doorbell.
+ * Ring TX queue doorbell and flush the update if requested.
*
* @param txq
* Pointer to TX queue structure.
* @param wqe
* Pointer to the last WQE posted in the NIC.
+ * @param cond
+ * Request for write memory barrier after BlueFlame update.
*/
static __rte_always_inline void
-mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe)
+mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe,
+ int cond)
{
uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
volatile uint64_t *src = ((volatile uint64_t *)wqe);
- rte_wmb();
- *txq->qp_db = htonl(txq->wqe_ci);
+ rte_io_wmb();
+ *txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci);
/* Ensure ordering between DB record and BF copy. */
rte_wmb();
*dst = *src;
+ if (cond)
+ rte_wmb();
+}
+
+/**
+ * Ring TX queue doorbell and flush the update by write memory barrier.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param wqe
+ * Pointer to the last WQE posted in the NIC.
+ */
+static __rte_always_inline void
+mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe)
+{
+ mlx5_tx_dbrec_cond_wmb(txq, wqe, 1);
}
#endif /* RTE_PMD_MLX5_RXTX_H_ */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
new file mode 100644
index 00000000..ba6c8cef
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -0,0 +1,388 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#include <infiniband/mlx5dv.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+#include <rte_prefetch.h>
+
+#include "mlx5.h"
+#include "mlx5_utils.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_rxtx_vec.h"
+#include "mlx5_autoconf.h"
+#include "mlx5_defs.h"
+#include "mlx5_prm.h"
+
+#if defined RTE_ARCH_X86_64
+#include "mlx5_rxtx_vec_sse.h"
+#elif defined RTE_ARCH_ARM64
+#include "mlx5_rxtx_vec_neon.h"
+#else
+#error "This should not be compiled if SIMD instructions are not supported."
+#endif
+
+/**
+ * Count the number of continuous single segment packets.
+ *
+ * @param pkts
+ * Pointer to array of packets.
+ * @param pkts_n
+ * Number of packets.
+ *
+ * @return
+ * Number of continuous single segment packets.
+ */
+static inline unsigned int
+txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ unsigned int pos;
+
+ if (!pkts_n)
+ return 0;
+ /* Count the number of continuous single segment packets. */
+ for (pos = 0; pos < pkts_n; ++pos)
+ if (NB_SEGS(pkts[pos]) > 1)
+ break;
+ return pos;
+}
+
+/**
+ * Count the number of packets having same ol_flags and calculate cs_flags.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param pkts
+ * Pointer to array of packets.
+ * @param pkts_n
+ * Number of packets.
+ * @param cs_flags
+ * Pointer of flags to be returned.
+ *
+ * @return
+ * Number of packets having same ol_flags.
+ */
+static inline unsigned int
+txq_calc_offload(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
+ uint16_t pkts_n, uint8_t *cs_flags)
+{
+ unsigned int pos;
+ const uint64_t ol_mask =
+ PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
+ PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
+ PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
+
+ if (!pkts_n)
+ return 0;
+ /* Count the number of packets having same ol_flags. */
+ for (pos = 1; pos < pkts_n; ++pos)
+ if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask)
+ break;
+ /* Should open another MPW session for the rest. */
+ if (pkts[0]->ol_flags &
+ (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
+ const uint64_t is_tunneled =
+ pkts[0]->ol_flags &
+ (PKT_TX_TUNNEL_GRE |
+ PKT_TX_TUNNEL_VXLAN);
+
+ if (is_tunneled && txq->tunnel_en) {
+ *cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM |
+ MLX5_ETH_WQE_L4_INNER_CSUM;
+ if (pkts[0]->ol_flags & PKT_TX_OUTER_IP_CKSUM)
+ *cs_flags |= MLX5_ETH_WQE_L3_CSUM;
+ } else {
+ *cs_flags = MLX5_ETH_WQE_L3_CSUM |
+ MLX5_ETH_WQE_L4_CSUM;
+ }
+ }
+ return pos;
+}
+
+/**
+ * DPDK callback for vectorized TX.
+ *
+ * @param dpdk_txq
+ * Generic pointer to TX queue structure.
+ * @param[in] pkts
+ * Packets to transmit.
+ * @param pkts_n
+ * Number of packets in array.
+ *
+ * @return
+ * Number of packets successfully transmitted (<= pkts_n).
+ */
+uint16_t
+mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
+ uint16_t pkts_n)
+{
+ struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
+ uint16_t nb_tx = 0;
+
+ while (pkts_n > nb_tx) {
+ uint16_t n;
+ uint16_t ret;
+
+ n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
+ ret = txq_burst_v(txq, &pkts[nb_tx], n, 0);
+ nb_tx += ret;
+ if (!ret)
+ break;
+ }
+ return nb_tx;
+}
+
+/**
+ * DPDK callback for vectorized TX with multi-seg packets and offload.
+ *
+ * @param dpdk_txq
+ * Generic pointer to TX queue structure.
+ * @param[in] pkts
+ * Packets to transmit.
+ * @param pkts_n
+ * Number of packets in array.
+ *
+ * @return
+ * Number of packets successfully transmitted (<= pkts_n).
+ */
+uint16_t
+mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
+ uint16_t nb_tx = 0;
+
+ while (pkts_n > nb_tx) {
+ uint8_t cs_flags = 0;
+ uint16_t n;
+ uint16_t ret;
+
+ /* Transmit multi-seg packets in the head of pkts list. */
+ if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) &&
+ NB_SEGS(pkts[nb_tx]) > 1)
+ nb_tx += txq_scatter_v(txq,
+ &pkts[nb_tx],
+ pkts_n - nb_tx);
+ n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
+ if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS))
+ n = txq_check_multiseg(&pkts[nb_tx], n);
+ if (!(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
+ n = txq_calc_offload(txq, &pkts[nb_tx], n, &cs_flags);
+ ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
+ nb_tx += ret;
+ if (!ret)
+ break;
+ }
+ return nb_tx;
+}
+
+/**
+ * Skip error packets.
+ *
+ * @param rxq
+ * Pointer to RX queue structure.
+ * @param[out] pkts
+ * Array to store received packets.
+ * @param pkts_n
+ * Maximum number of packets in array.
+ *
+ * @return
+ * Number of packets successfully received (<= pkts_n).
+ */
+static uint16_t
+rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
+ uint16_t pkts_n)
+{
+ uint16_t n = 0;
+ unsigned int i;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ uint32_t err_bytes = 0;
+#endif
+
+ for (i = 0; i < pkts_n; ++i) {
+ struct rte_mbuf *pkt = pkts[i];
+
+ if (pkt->packet_type == RTE_PTYPE_ALL_MASK) {
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ err_bytes += PKT_LEN(pkt);
+#endif
+ rte_pktmbuf_free_seg(pkt);
+ } else {
+ pkts[n++] = pkt;
+ }
+ }
+ rxq->stats.idropped += (pkts_n - n);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ /* Correct counters of errored completions. */
+ rxq->stats.ipackets -= (pkts_n - n);
+ rxq->stats.ibytes -= err_bytes;
+#endif
+ rxq->pending_err = 0;
+ return n;
+}
+
+/**
+ * DPDK callback for vectorized RX.
+ *
+ * @param dpdk_rxq
+ * Generic pointer to RX queue structure.
+ * @param[out] pkts
+ * Array to store received packets.
+ * @param pkts_n
+ * Maximum number of packets in array.
+ *
+ * @return
+ * Number of packets successfully received (<= pkts_n).
+ */
+uint16_t
+mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ struct mlx5_rxq_data *rxq = dpdk_rxq;
+ uint16_t nb_rx;
+
+ nb_rx = rxq_burst_v(rxq, pkts, pkts_n);
+ if (unlikely(rxq->pending_err))
+ nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx);
+ return nb_rx;
+}
+
+/**
+ * Check Tx queue flags are set for raw vectorized Tx.
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 1 if supported, negative errno value if not.
+ */
+int __attribute__((cold))
+priv_check_raw_vec_tx_support(struct priv *priv)
+{
+ uint16_t i;
+
+ /* All the configured queues should support. */
+ for (i = 0; i < priv->txqs_n; ++i) {
+ struct mlx5_txq_data *txq = (*priv->txqs)[i];
+
+ if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) ||
+ !(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
+ break;
+ }
+ if (i != priv->txqs_n)
+ return -ENOTSUP;
+ return 1;
+}
+
+/**
+ * Check a device can support vectorized TX.
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 1 if supported, negative errno value if not.
+ */
+int __attribute__((cold))
+priv_check_vec_tx_support(struct priv *priv)
+{
+ if (!priv->tx_vec_en ||
+ priv->txqs_n > MLX5_VPMD_MIN_TXQS ||
+ priv->mps != MLX5_MPW_ENHANCED ||
+ priv->tso)
+ return -ENOTSUP;
+ return 1;
+}
+
+/**
+ * Check a RX queue can support vectorized RX.
+ *
+ * @param rxq
+ * Pointer to RX queue.
+ *
+ * @return
+ * 1 if supported, negative errno value if not.
+ */
+int __attribute__((cold))
+rxq_check_vec_support(struct mlx5_rxq_data *rxq)
+{
+ struct mlx5_rxq_ctrl *ctrl =
+ container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+
+ if (!ctrl->priv->rx_vec_en || rxq->sges_n != 0)
+ return -ENOTSUP;
+ return 1;
+}
+
+/**
+ * Check a device can support vectorized RX.
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 1 if supported, negative errno value if not.
+ */
+int __attribute__((cold))
+priv_check_vec_rx_support(struct priv *priv)
+{
+ uint16_t i;
+
+ if (!priv->rx_vec_en)
+ return -ENOTSUP;
+ /* All the configured queues should support. */
+ for (i = 0; i < priv->rxqs_n; ++i) {
+ struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
+
+ if (!rxq)
+ continue;
+ if (rxq_check_vec_support(rxq) < 0)
+ break;
+ }
+ if (i != priv->rxqs_n)
+ return -ENOTSUP;
+ return 1;
+}
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.h b/drivers/net/mlx5/mlx5_rxtx_vec.h
new file mode 100644
index 00000000..1f08ed0b
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.h
@@ -0,0 +1,130 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX5_RXTX_VEC_H_
+#define RTE_PMD_MLX5_RXTX_VEC_H_
+
+#include <rte_common.h>
+#include <rte_mbuf.h>
+
+#include "mlx5_autoconf.h"
+#include "mlx5_prm.h"
+
+/*
+ * Compile time sanity check for vectorized functions.
+ */
+
+#define S_ASSERT_RTE_MBUF(s) \
+ static_assert(s, "A field of struct rte_mbuf is changed")
+#define S_ASSERT_MLX5_CQE(s) \
+ static_assert(s, "A field of struct mlx5_cqe is changed")
+
+/* rxq_cq_decompress_v() */
+S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, pkt_len) ==
+ offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, data_len) ==
+ offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, hash) ==
+ offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
+
+/* rxq_cq_to_ptype_oflags_v() */
+S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, ol_flags) ==
+ offsetof(struct rte_mbuf, rearm_data) + 8);
+S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, rearm_data) ==
+ RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
+
+/* rxq_burst_v() */
+S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, pkt_len) ==
+ offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
+S_ASSERT_RTE_MBUF(offsetof(struct rte_mbuf, data_len) ==
+ offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
+#if (RTE_CACHE_LINE_SIZE == 128)
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, pkt_info) == 64);
+#else
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, pkt_info) == 0);
+#endif
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, rx_hash_res) ==
+ offsetof(struct mlx5_cqe, pkt_info) + 12);
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, rsvd1) +
+ sizeof(((struct mlx5_cqe *)0)->rsvd1) ==
+ offsetof(struct mlx5_cqe, hdr_type_etc));
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, vlan_info) ==
+ offsetof(struct mlx5_cqe, hdr_type_etc) + 2);
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, rsvd2) +
+ sizeof(((struct mlx5_cqe *)0)->rsvd2) ==
+ offsetof(struct mlx5_cqe, byte_cnt));
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, sop_drop_qpn) ==
+ RTE_ALIGN(offsetof(struct mlx5_cqe, sop_drop_qpn), 8));
+S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, op_own) ==
+ offsetof(struct mlx5_cqe, sop_drop_qpn) + 7);
+
+/**
+ * Replenish buffers for RX in bulk.
+ *
+ * @param rxq
+ * Pointer to RX queue structure.
+ * @param n
+ * Number of buffers to be replenished.
+ */
+static inline void
+mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n)
+{
+ const uint16_t q_n = 1 << rxq->elts_n;
+ const uint16_t q_mask = q_n - 1;
+ uint16_t elts_idx = rxq->rq_ci & q_mask;
+ struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
+ volatile struct mlx5_wqe_data_seg *wq = &(*rxq->wqes)[elts_idx];
+ unsigned int i;
+
+ assert(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH);
+ assert(n <= (uint16_t)(q_n - (rxq->rq_ci - rxq->rq_pi)));
+ assert(MLX5_VPMD_RXQ_RPLNSH_THRESH > MLX5_VPMD_DESCS_PER_LOOP);
+ /* Not to cross queue end. */
+ n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
+ if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
+ rxq->stats.rx_nombuf += n;
+ return;
+ }
+ for (i = 0; i < n; ++i)
+ wq[i].addr = rte_cpu_to_be_64((uintptr_t)elts[i]->buf_addr +
+ RTE_PKTMBUF_HEADROOM);
+ rxq->rq_ci += n;
+ /* Prevent overflowing into consumed mbufs. */
+ elts_idx = rxq->rq_ci & q_mask;
+ for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
+ (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
+ rte_io_wmb();
+ *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
+}
+
+#endif /* RTE_PMD_MLX5_RXTX_VEC_H_ */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
new file mode 100644
index 00000000..c721d80e
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -0,0 +1,1039 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2017 6WIND S.A.
+ * Copyright 2017 Mellanox.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RTE_PMD_MLX5_RXTX_VEC_NEON_H_
+#define RTE_PMD_MLX5_RXTX_VEC_NEON_H_
+
+#include <assert.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <arm_neon.h>
+
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+#include <rte_prefetch.h>
+
+#include "mlx5.h"
+#include "mlx5_utils.h"
+#include "mlx5_rxtx.h"
+#include "mlx5_rxtx_vec.h"
+#include "mlx5_autoconf.h"
+#include "mlx5_defs.h"
+#include "mlx5_prm.h"
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+/**
+ * Fill in buffer descriptors in a multi-packet send descriptor.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param dseg
+ * Pointer to buffer descriptor to be written.
+ * @param pkts
+ * Pointer to array of packets to be sent.
+ * @param n
+ * Number of packets to be filled.
+ */
+static inline void
+txq_wr_dseg_v(struct mlx5_txq_data *txq, uint8_t *dseg,
+ struct rte_mbuf **pkts, unsigned int n)
+{
+ unsigned int pos;
+ uintptr_t addr;
+ const uint8x16_t dseg_shuf_m = {
+ 3, 2, 1, 0, /* length, bswap32 */
+ 4, 5, 6, 7, /* lkey */
+ 15, 14, 13, 12, /* addr, bswap64 */
+ 11, 10, 9, 8
+ };
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ uint32_t tx_byte = 0;
+#endif
+
+ for (pos = 0; pos < n; ++pos, dseg += MLX5_WQE_DWORD_SIZE) {
+ uint8x16_t desc;
+ struct rte_mbuf *pkt = pkts[pos];
+
+ addr = rte_pktmbuf_mtod(pkt, uintptr_t);
+ desc = vreinterpretq_u8_u32((uint32x4_t) {
+ DATA_LEN(pkt),
+ mlx5_tx_mb2mr(txq, pkt),
+ addr,
+ addr >> 32 });
+ desc = vqtbl1q_u8(desc, dseg_shuf_m);
+ vst1q_u8(dseg, desc);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ tx_byte += DATA_LEN(pkt);
+#endif
+ }
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ txq->stats.obytes += tx_byte;
+#endif
+}
+
+/**
+ * Send multi-segmented packets until it encounters a single segment packet in
+ * the pkts list.
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param pkts
+ * Pointer to array of packets to be sent.
+ * @param pkts_n
+ * Number of packets to be sent.
+ *
+ * @return
+ * Number of packets successfully transmitted (<= pkts_n).
+ */
+static uint16_t
+txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
+ uint16_t pkts_n)
+{
+ uint16_t elts_head = txq->elts_head;
+ const uint16_t elts_n = 1 << txq->elts_n;
+ const uint16_t elts_m = elts_n - 1;
+ const uint16_t wq_n = 1 << txq->wqe_n;
+ const uint16_t wq_mask = wq_n - 1;
+ const unsigned int nb_dword_per_wqebb =
+ MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE;
+ const unsigned int nb_dword_in_hdr =
+ sizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;
+ unsigned int n;
+ volatile struct mlx5_wqe *wqe = NULL;
+
+ assert(elts_n > pkts_n);
+ mlx5_tx_complete(txq);
+ if (unlikely(!pkts_n))
+ return 0;
+ for (n = 0; n < pkts_n; ++n) {
+ struct rte_mbuf *buf = pkts[n];
+ unsigned int segs_n = buf->nb_segs;
+ unsigned int ds = nb_dword_in_hdr;
+ unsigned int len = PKT_LEN(buf);
+ uint16_t wqe_ci = txq->wqe_ci;
+ const uint8x16_t ctrl_shuf_m = {
+ 3, 2, 1, 0, /* bswap32 */
+ 7, 6, 5, 4, /* bswap32 */
+ 11, 10, 9, 8, /* bswap32 */
+ 12, 13, 14, 15
+ };
+ uint8_t cs_flags = 0;
+ uint16_t max_elts;
+ uint16_t max_wqe;
+ uint8x16_t *t_wqe;
+ uint8_t *dseg;
+ uint8x16_t ctrl;
+
+ assert(segs_n);
+ max_elts = elts_n - (elts_head - txq->elts_tail);
+ max_wqe = wq_n - (txq->wqe_ci - txq->wqe_pi);
+ /*
+ * A MPW session consumes 2 WQEs at most to
+ * include MLX5_MPW_DSEG_MAX pointers.
+ */
+ if (segs_n == 1 ||
+ max_elts < segs_n || max_wqe < 2)
+ break;
+ wqe = &((volatile struct mlx5_wqe64 *)
+ txq->wqes)[wqe_ci & wq_mask].hdr;
+ if (buf->ol_flags &
+ (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
+ const uint64_t is_tunneled =
+ buf->ol_flags & (PKT_TX_TUNNEL_GRE |
+ PKT_TX_TUNNEL_VXLAN);
+
+ if (is_tunneled && txq->tunnel_en) {
+ cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM |
+ MLX5_ETH_WQE_L4_INNER_CSUM;
+ if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM)
+ cs_flags |= MLX5_ETH_WQE_L3_CSUM;
+ } else {
+ cs_flags = MLX5_ETH_WQE_L3_CSUM |
+ MLX5_ETH_WQE_L4_CSUM;
+ }
+ }
+ /* Title WQEBB pointer. */
+ t_wqe = (uint8x16_t *)wqe;
+ dseg = (uint8_t *)(wqe + 1);
+ do {
+ if (!(ds++ % nb_dword_per_wqebb)) {
+ dseg = (uint8_t *)
+ &((volatile struct mlx5_wqe64 *)
+ txq->wqes)[++wqe_ci & wq_mask];
+ }
+ txq_wr_dseg_v(txq, dseg, &buf, 1);
+ dseg += MLX5_WQE_DWORD_SIZE;
+ (*txq->elts)[elts_head++ & elts_m] = buf;
+ buf = buf->next;
+ } while (--segs_n);
+ ++wqe_ci;
+ /* Fill CTRL in the header. */
+ ctrl = vreinterpretq_u8_u32((uint32x4_t) {
+ MLX5_OPC_MOD_MPW << 24 |
+ txq->wqe_ci << 8 | MLX5_OPCODE_TSO,
+ txq->qp_num_8s | ds, 0, 0});
+ ctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m);
+ vst1q_u8((void *)t_wqe, ctrl);
+ /* Fill ESEG in the header. */
+ vst1q_u16((void *)(t_wqe + 1),
+ (uint16x8_t) { 0, 0, cs_flags, rte_cpu_to_be_16(len),
+ 0, 0, 0, 0 });
+ txq->wqe_ci = wqe_ci;
+ }
+ if (!n)
+ return 0;
+ txq->elts_comp += (uint16_t)(elts_head - txq->elts_head);
+ txq->elts_head = elts_head;
+ if (txq->elts_comp >= MLX5_TX_COMP_THRESH) {
+ wqe->ctrl[2] = rte_cpu_to_be_32(8);
+ wqe->ctrl[3] = txq->elts_head;
+ txq->elts_comp = 0;
+ ++txq->cq_pi;
+ }
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ txq->stats.opackets += n;
+#endif
+ mlx5_tx_dbrec(txq, wqe);
+ return n;
+}
+
+/**
+ * Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet,
+ * it returns to make it processed by txq_scatter_v(). All the packets in
+ * the pkts list should be single segment packets having same offload flags.
+ * This must be checked by txq_check_multiseg() and txq_calc_offload().
+ *
+ * @param txq
+ * Pointer to TX queue structure.
+ * @param pkts
+ * Pointer to array of packets to be sent.
+ * @param pkts_n
+ * Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
+ * @param cs_flags
+ * Checksum offload flags to be written in the descriptor.
+ *
+ * @return
+ * Number of packets successfully transmitted (<= pkts_n).
+ */
+static inline uint16_t
+txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
+ uint8_t cs_flags)
+{
+ struct rte_mbuf **elts;
+ uint16_t elts_head = txq->elts_head;
+ const uint16_t elts_n = 1 << txq->elts_n;
+ const uint16_t elts_m = elts_n - 1;
+ const unsigned int nb_dword_per_wqebb =
+ MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE;
+ const unsigned int nb_dword_in_hdr =
+ sizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;
+ unsigned int n = 0;
+ unsigned int pos;
+ uint16_t max_elts;
+ uint16_t max_wqe;
+ uint32_t comp_req = 0;
+ const uint16_t wq_n = 1 << txq->wqe_n;
+ const uint16_t wq_mask = wq_n - 1;
+ uint16_t wq_idx = txq->wqe_ci & wq_mask;
+ volatile struct mlx5_wqe64 *wq =
+ &((volatile struct mlx5_wqe64 *)txq->wqes)[wq_idx];
+ volatile struct mlx5_wqe *wqe = (volatile struct mlx5_wqe *)wq;
+ const uint8x16_t ctrl_shuf_m = {
+ 3, 2, 1, 0, /* bswap32 */
+ 7, 6, 5, 4, /* bswap32 */
+ 11, 10, 9, 8, /* bswap32 */
+ 12, 13, 14, 15
+ };
+ uint8x16_t *t_wqe;
+ uint8_t *dseg;
+ uint8x16_t ctrl;
+
+ /* Make sure all packets can fit into a single WQE. */
+ assert(elts_n > pkts_n);
+ mlx5_tx_complete(txq);
+ max_elts = (elts_n - (elts_head - txq->elts_tail));
+ max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
+ pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
+ if (unlikely(!pkts_n))
+ return 0;
+ elts = &(*txq->elts)[elts_head & elts_m];
+ /* Loop for available tailroom first. */
+ n = RTE_MIN(elts_n - (elts_head & elts_m), pkts_n);
+ for (pos = 0; pos < (n & -2); pos += 2)
+ vst1q_u64((void *)&elts[pos], vld1q_u64((void *)&pkts[pos]));
+ if (n & 1)
+ elts[pos] = pkts[pos];
+ /* Check if it crosses the end of the queue. */
+ if (unlikely(n < pkts_n)) {
+ elts = &(*txq->elts)[0];
+ for (pos = 0; pos < pkts_n - n; ++pos)
+ elts[pos] = pkts[n + pos];
+ }
+ txq->elts_head += pkts_n;
+ /* Save title WQEBB pointer. */
+ t_wqe = (uint8x16_t *)wqe;
+ dseg = (uint8_t *)(wqe + 1);
+ /* Calculate the number of entries to the end. */
+ n = RTE_MIN(
+ (wq_n - wq_idx) * nb_dword_per_wqebb - nb_dword_in_hdr,
+ pkts_n);
+ /* Fill DSEGs. */
+ txq_wr_dseg_v(txq, dseg, pkts, n);
+ /* Check if it crosses the end of the queue. */
+ if (n < pkts_n) {
+ dseg = (uint8_t *)txq->wqes;
+ txq_wr_dseg_v(txq, dseg, &pkts[n], pkts_n - n);
+ }
+ if (txq->elts_comp + pkts_n < MLX5_TX_COMP_THRESH) {
+ txq->elts_comp += pkts_n;
+ } else {
+ /* Request a completion. */
+ txq->elts_comp = 0;
+ ++txq->cq_pi;
+ comp_req = 8;
+ }
+ /* Fill CTRL in the header. */
+ ctrl = vreinterpretq_u8_u32((uint32x4_t) {
+ MLX5_OPC_MOD_ENHANCED_MPSW << 24 |
+ txq->wqe_ci << 8 | MLX5_OPCODE_ENHANCED_MPSW,
+ txq->qp_num_8s | (pkts_n + 2),
+ comp_req,
+ txq->elts_head });
+ ctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m);
+ vst1q_u8((void *)t_wqe, ctrl);
+ /* Fill ESEG in the header. */
+ vst1q_u8((void *)(t_wqe + 1),
+ (uint8x16_t) { 0, 0, 0, 0,
+ cs_flags, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0 });
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ txq->stats.opackets += pkts_n;
+#endif
+ txq->wqe_ci += (nb_dword_in_hdr + pkts_n + (nb_dword_per_wqebb - 1)) /
+ nb_dword_per_wqebb;
+ /* Ring QP doorbell. */
+ mlx5_tx_dbrec_cond_wmb(txq, wqe, pkts_n < MLX5_VPMD_TX_MAX_BURST);
+ return pkts_n;
+}
+
+/**
+ * Store free buffers to RX SW ring.
+ *
+ * @param rxq
+ * Pointer to RX queue structure.
+ * @param pkts
+ * Pointer to array of packets to be stored.
+ * @param pkts_n
+ * Number of packets to be stored.
+ */
+static inline void
+rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n)
+{
+ const uint16_t q_mask = (1 << rxq->elts_n) - 1;
+ struct rte_mbuf **elts = &(*rxq->elts)[rxq->rq_pi & q_mask];
+ unsigned int pos;
+ uint16_t p = n & -2;
+
+ for (pos = 0; pos < p; pos += 2) {
+ uint64x2_t mbp;
+
+ mbp = vld1q_u64((void *)&elts[pos]);
+ vst1q_u64((void *)&pkts[pos], mbp);
+ }
+ if (n & 1)
+ pkts[pos] = elts[pos];
+}
+
+/**
+ * Decompress a compressed completion and fill in mbufs in RX SW ring with data
+ * extracted from the title completion descriptor.
+ *
+ * @param rxq
+ * Pointer to RX queue structure.
+ * @param cq
+ * Pointer to completion array having a compressed completion at first.
+ * @param elts
+ * Pointer to SW ring to be filled. The first mbuf has to be pre-built from
+ * the title completion descriptor to be copied to the rest of mbufs.
+ */
+static inline void
+rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
+ struct rte_mbuf **elts)
+{
+ volatile struct mlx5_mini_cqe8 *mcq = (void *)&(cq + 1)->pkt_info;
+ struct rte_mbuf *t_pkt = elts[0]; /* Title packet is pre-built. */
+ unsigned int pos;
+ unsigned int i;
+ unsigned int inv = 0;
+ /* Mask to shuffle from extracted mini CQE to mbuf. */
+ const uint8x16_t mcqe_shuf_m1 = {
+ -1, -1, -1, -1, /* skip packet_type */
+ 7, 6, -1, -1, /* pkt_len, bswap16 */
+ 7, 6, /* data_len, bswap16 */
+ -1, -1, /* skip vlan_tci */
+ 3, 2, 1, 0 /* hash.rss, bswap32 */
+ };
+ const uint8x16_t mcqe_shuf_m2 = {
+ -1, -1, -1, -1, /* skip packet_type */
+ 15, 14, -1, -1, /* pkt_len, bswap16 */
+ 15, 14, /* data_len, bswap16 */
+ -1, -1, /* skip vlan_tci */
+ 11, 10, 9, 8 /* hash.rss, bswap32 */
+ };
+ /* Restore the compressed count. Must be 16 bits. */
+ const uint16_t mcqe_n = t_pkt->data_len +
+ (rxq->crc_present * ETHER_CRC_LEN);
+ const uint64x2_t rearm =
+ vld1q_u64((void *)&t_pkt->rearm_data);
+ const uint32x4_t rxdf_mask = {
+ 0xffffffff, /* packet_type */
+ 0, /* skip pkt_len */
+ 0xffff0000, /* vlan_tci, skip data_len */
+ 0, /* skip hash.rss */
+ };
+ const uint8x16_t rxdf =
+ vandq_u8(vld1q_u8((void *)&t_pkt->rx_descriptor_fields1),
+ vreinterpretq_u8_u32(rxdf_mask));
+ const uint16x8_t crc_adj = {
+ 0, 0,
+ rxq->crc_present * ETHER_CRC_LEN, 0,
+ rxq->crc_present * ETHER_CRC_LEN, 0,
+ 0, 0
+ };
+ const uint32_t flow_tag = t_pkt->hash.fdir.hi;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ uint32_t rcvd_byte = 0;
+#endif
+ /* Mask to shuffle byte_cnt to add up stats. Do bswap16 for all. */
+ const uint8x8_t len_shuf_m = {
+ 7, 6, /* 1st mCQE */
+ 15, 14, /* 2nd mCQE */
+ 23, 22, /* 3rd mCQE */
+ 31, 30 /* 4th mCQE */
+ };
+
+ /*
+ * A. load mCQEs into a 128bit register.
+ * B. store rearm data to mbuf.
+ * C. combine data from mCQEs with rx_descriptor_fields1.
+ * D. store rx_descriptor_fields1.
+ * E. store flow tag (rte_flow mark).
+ */
+ for (pos = 0; pos < mcqe_n; ) {
+ uint8_t *p = (void *)&mcq[pos % 8];
+ uint8_t *e0 = (void *)&elts[pos]->rearm_data;
+ uint8_t *e1 = (void *)&elts[pos + 1]->rearm_data;
+ uint8_t *e2 = (void *)&elts[pos + 2]->rearm_data;
+ uint8_t *e3 = (void *)&elts[pos + 3]->rearm_data;
+ uint16x4_t byte_cnt;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ uint16x4_t invalid_mask =
+ vcreate_u16(mcqe_n - pos < MLX5_VPMD_DESCS_PER_LOOP ?
+ -1UL << ((mcqe_n - pos) *
+ sizeof(uint16_t) * 8) : 0);
+#endif
+
+ if (!(pos & 0x7) && pos + 8 < mcqe_n)
+ rte_prefetch0((void *)(cq + pos + 8));
+ __asm__ volatile (
+ /* A.1 load mCQEs into a 128bit register. */
+ "ld1 {v16.16b - v17.16b}, [%[mcq]] \n\t"
+ /* B.1 store rearm data to mbuf. */
+ "st1 {%[rearm].2d}, [%[e0]] \n\t"
+ "add %[e0], %[e0], #16 \n\t"
+ "st1 {%[rearm].2d}, [%[e1]] \n\t"
+ "add %[e1], %[e1], #16 \n\t"
+ /* C.1 combine data from mCQEs with rx_descriptor_fields1. */
+ "tbl v18.16b, {v16.16b}, %[mcqe_shuf_m1].16b \n\t"
+ "tbl v19.16b, {v16.16b}, %[mcqe_shuf_m2].16b \n\t"
+ "sub v18.8h, v18.8h, %[crc_adj].8h \n\t"
+ "sub v19.8h, v19.8h, %[crc_adj].8h \n\t"
+ "orr v18.16b, v18.16b, %[rxdf].16b \n\t"
+ "orr v19.16b, v19.16b, %[rxdf].16b \n\t"
+ /* D.1 store rx_descriptor_fields1. */
+ "st1 {v18.2d}, [%[e0]] \n\t"
+ "st1 {v19.2d}, [%[e1]] \n\t"
+ /* B.1 store rearm data to mbuf. */
+ "st1 {%[rearm].2d}, [%[e2]] \n\t"
+ "add %[e2], %[e2], #16 \n\t"
+ "st1 {%[rearm].2d}, [%[e3]] \n\t"
+ "add %[e3], %[e3], #16 \n\t"
+ /* C.1 combine data from mCQEs with rx_descriptor_fields1. */
+ "tbl v18.16b, {v17.16b}, %[mcqe_shuf_m1].16b \n\t"
+ "tbl v19.16b, {v17.16b}, %[mcqe_shuf_m2].16b \n\t"
+ "sub v18.8h, v18.8h, %[crc_adj].8h \n\t"
+ "sub v19.8h, v19.8h, %[crc_adj].8h \n\t"
+ "orr v18.16b, v18.16b, %[rxdf].16b \n\t"
+ "orr v19.16b, v19.16b, %[rxdf].16b \n\t"
+ /* D.1 store rx_descriptor_fields1. */
+ "st1 {v18.2d}, [%[e2]] \n\t"
+ "st1 {v19.2d}, [%[e3]] \n\t"
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ "tbl %[byte_cnt].8b, {v16.16b - v17.16b}, %[len_shuf_m].8b \n\t"
+#endif
+ :[byte_cnt]"=&w"(byte_cnt)
+ :[mcq]"r"(p),
+ [rxdf]"w"(rxdf),
+ [rearm]"w"(rearm),
+ [e3]"r"(e3), [e2]"r"(e2), [e1]"r"(e1), [e0]"r"(e0),
+ [mcqe_shuf_m1]"w"(mcqe_shuf_m1),
+ [mcqe_shuf_m2]"w"(mcqe_shuf_m2),
+ [crc_adj]"w"(crc_adj),
+ [len_shuf_m]"w"(len_shuf_m)
+ :"memory", "v16", "v17", "v18", "v19");
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ byte_cnt = vbic_u16(byte_cnt, invalid_mask);
+ rcvd_byte += vget_lane_u64(vpaddl_u32(vpaddl_u16(byte_cnt)), 0);
+#endif
+ if (rxq->mark) {
+ /* E.1 store flow tag (rte_flow mark). */
+ elts[pos]->hash.fdir.hi = flow_tag;
+ elts[pos + 1]->hash.fdir.hi = flow_tag;
+ elts[pos + 2]->hash.fdir.hi = flow_tag;
+ elts[pos + 3]->hash.fdir.hi = flow_tag;
+ }
+ pos += MLX5_VPMD_DESCS_PER_LOOP;
+ /* Move to next CQE and invalidate consumed CQEs. */
+ if (!(pos & 0x7) && pos < mcqe_n) {
+ mcq = (void *)&(cq + pos)->pkt_info;
+ for (i = 0; i < 8; ++i)
+ cq[inv++].op_own = MLX5_CQE_INVALIDATE;
+ }
+ }
+ /* Invalidate the rest of CQEs. */
+ for (; inv < mcqe_n; ++inv)
+ cq[inv].op_own = MLX5_CQE_INVALIDATE;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ rxq->stats.ipackets += mcqe_n;
+ rxq->stats.ibytes += rcvd_byte;
+#endif
+ rxq->cq_ci += mcqe_n;
+}
+
+/**
+ * Calculate packet type and offload flag for mbuf and store it.
+ *
+ * @param rxq
+ * Pointer to RX queue structure.
+ * @param ptype_info
+ * Array of four 4bytes packet type info extracted from the original
+ * completion descriptor.
+ * @param flow_tag
+ * Array of four 4bytes flow ID extracted from the original completion
+ * descriptor.
+ * @param op_err
+ * Opcode vector having responder error status. Each field is 4B.
+ * @param pkts
+ * Pointer to array of packets to be filled.
+ */
+static inline void
+rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq,
+ uint32x4_t ptype_info, uint32x4_t flow_tag,
+ uint16x4_t op_err, struct rte_mbuf **pkts)
+{
+ uint16x4_t ptype;
+ uint32x4_t pinfo, cv_flags;
+ uint32x4_t ol_flags =
+ vdupq_n_u32(rxq->rss_hash * PKT_RX_RSS_HASH |
+ rxq->hw_timestamp * PKT_RX_TIMESTAMP);
+ const uint32x4_t ptype_ol_mask = { 0x106, 0x106, 0x106, 0x106 };
+ const uint8x16_t cv_flag_sel = {
+ 0,
+ (uint8_t)(PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED),
+ (uint8_t)(PKT_RX_IP_CKSUM_GOOD >> 1),
+ 0,
+ (uint8_t)(PKT_RX_L4_CKSUM_GOOD >> 1),
+ 0,
+ (uint8_t)((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1),
+ 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ const uint32x4_t cv_mask =
+ vdupq_n_u32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
+ const uint64x1_t mbuf_init = vld1_u64(&rxq->mbuf_initializer);
+ const uint64x1_t r32_mask = vcreate_u64(0xffffffff);
+ uint64x2_t rearm0, rearm1, rearm2, rearm3;
+
+ if (rxq->mark) {
+ const uint32x4_t ft_def = vdupq_n_u32(MLX5_FLOW_MARK_DEFAULT);
+ const uint32x4_t fdir_flags = vdupq_n_u32(PKT_RX_FDIR);
+ const uint32x4_t fdir_id_flags = vdupq_n_u32(PKT_RX_FDIR_ID);
+
+ /* Check if flow tag is non-zero then set PKT_RX_FDIR. */
+ ol_flags = vorrq_u32(ol_flags, vbicq_u32(fdir_flags,
+ vceqzq_u32(flow_tag)));
+ /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */
+ ol_flags = vorrq_u32(ol_flags,
+ vbicq_u32(fdir_id_flags,
+ vceqq_u32(flow_tag, ft_def)));
+ }
+ /*
+ * ptype_info has the following:
+ * bit[1] = l3_ok
+ * bit[2] = l4_ok
+ * bit[8] = cv
+ * bit[11:10] = l3_hdr_type
+ * bit[14:12] = l4_hdr_type
+ * bit[15] = ip_frag
+ * bit[16] = tunneled
+ * bit[17] = outer_l3_type
+ */
+ ptype = vshrn_n_u32(ptype_info, 10);
+ /* Errored packets will have RTE_PTYPE_ALL_MASK. */
+ ptype = vorr_u16(ptype, op_err);
+ pkts[0]->packet_type =
+ mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 6)];
+ pkts[1]->packet_type =
+ mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 4)];
+ pkts[2]->packet_type =
+ mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 2)];
+ pkts[3]->packet_type =
+ mlx5_ptype_table[vget_lane_u8(vreinterpret_u8_u16(ptype), 0)];
+ /* Fill flags for checksum and VLAN. */
+ pinfo = vandq_u32(ptype_info, ptype_ol_mask);
+ pinfo = vreinterpretq_u32_u8(
+ vqtbl1q_u8(cv_flag_sel, vreinterpretq_u8_u32(pinfo)));
+ /* Locate checksum flags at byte[2:1] and merge with VLAN flags. */
+ cv_flags = vshlq_n_u32(pinfo, 9);
+ cv_flags = vorrq_u32(pinfo, cv_flags);
+ /* Move back flags to start from byte[0]. */
+ cv_flags = vshrq_n_u32(cv_flags, 8);
+ /* Mask out garbage bits. */
+ cv_flags = vandq_u32(cv_flags, cv_mask);
+ /* Merge to ol_flags. */
+ ol_flags = vorrq_u32(ol_flags, cv_flags);
+ /* Merge mbuf_init and ol_flags, and store. */
+ rearm0 = vcombine_u64(mbuf_init,
+ vshr_n_u64(vget_high_u64(vreinterpretq_u64_u32(
+ ol_flags)), 32));
+ rearm1 = vcombine_u64(mbuf_init,
+ vand_u64(vget_high_u64(vreinterpretq_u64_u32(
+ ol_flags)), r32_mask));
+ rearm2 = vcombine_u64(mbuf_init,
+ vshr_n_u64(vget_low_u64(vreinterpretq_u64_u32(
+ ol_flags)), 32));
+ rearm3 = vcombine_u64(mbuf_init,
+ vand_u64(vget_low_u64(vreinterpretq_u64_u32(
+ ol_flags)), r32_mask));
+ vst1q_u64((void *)&pkts[0]->rearm_data, rearm0);
+ vst1q_u64((void *)&pkts[1]->rearm_data, rearm1);
+ vst1q_u64((void *)&pkts[2]->rearm_data, rearm2);
+ vst1q_u64((void *)&pkts[3]->rearm_data, rearm3);
+}
+
+/**
+ * Receive burst of packets. An errored completion also consumes a mbuf, but the
+ * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
+ * before returning to application.
+ *
+ * @param rxq
+ * Pointer to RX queue structure.
+ * @param[out] pkts
+ * Array to store received packets.
+ * @param pkts_n
+ * Maximum number of packets in array.
+ *
+ * @return
+ * Number of packets received including errors (<= pkts_n).
+ */
+static inline uint16_t
+rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+{
+ const uint16_t q_n = 1 << rxq->cqe_n;
+ const uint16_t q_mask = q_n - 1;
+ volatile struct mlx5_cqe *cq;
+ struct rte_mbuf **elts;
+ unsigned int pos;
+ uint64_t n;
+ uint16_t repl_n;
+ uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
+ uint16_t nocmp_n = 0;
+ uint16_t rcvd_pkt = 0;
+ unsigned int cq_idx = rxq->cq_ci & q_mask;
+ unsigned int elts_idx;
+ const uint16x4_t ownership = vdup_n_u16(!(rxq->cq_ci & (q_mask + 1)));
+ const uint16x4_t owner_check = vcreate_u16(0x0001000100010001);
+ const uint16x4_t opcode_check = vcreate_u16(0x00f000f000f000f0);
+ const uint16x4_t format_check = vcreate_u16(0x000c000c000c000c);
+ const uint16x4_t resp_err_check = vcreate_u16(0x00e000e000e000e0);
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ uint32_t rcvd_byte = 0;
+#endif
+ /* Mask to generate 16B length vector. */
+ const uint8x8_t len_shuf_m = {
+ 52, 53, /* 4th CQE */
+ 36, 37, /* 3rd CQE */
+ 20, 21, /* 2nd CQE */
+ 4, 5 /* 1st CQE */
+ };
+ /* Mask to extract 16B data from a 64B CQE. */
+ const uint8x16_t cqe_shuf_m = {
+ 28, 29, /* hdr_type_etc */
+ 0, /* pkt_info */
+ -1, /* null */
+ 47, 46, /* byte_cnt, bswap16 */
+ 31, 30, /* vlan_info, bswap16 */
+ 15, 14, 13, 12, /* rx_hash_res, bswap32 */
+ 57, 58, 59, /* flow_tag */
+ 63 /* op_own */
+ };
+ /* Mask to generate 16B data for mbuf. */
+ const uint8x16_t mb_shuf_m = {
+ 4, 5, -1, -1, /* pkt_len */
+ 4, 5, /* data_len */
+ 6, 7, /* vlan_tci */
+ 8, 9, 10, 11, /* hash.rss */
+ 12, 13, 14, -1 /* hash.fdir.hi */
+ };
+ /* Mask to generate 16B owner vector. */
+ const uint8x8_t owner_shuf_m = {
+ 63, -1, /* 4th CQE */
+ 47, -1, /* 3rd CQE */
+ 31, -1, /* 2nd CQE */
+ 15, -1 /* 1st CQE */
+ };
+ /* Mask to generate a vector having packet_type/ol_flags. */
+ const uint8x16_t ptype_shuf_m = {
+ 48, 49, 50, -1, /* 4th CQE */
+ 32, 33, 34, -1, /* 3rd CQE */
+ 16, 17, 18, -1, /* 2nd CQE */
+ 0, 1, 2, -1 /* 1st CQE */
+ };
+ /* Mask to generate a vector having flow tags. */
+ const uint8x16_t ftag_shuf_m = {
+ 60, 61, 62, -1, /* 4th CQE */
+ 44, 45, 46, -1, /* 3rd CQE */
+ 28, 29, 30, -1, /* 2nd CQE */
+ 12, 13, 14, -1 /* 1st CQE */
+ };
+ const uint16x8_t crc_adj = {
+ 0, 0, rxq->crc_present * ETHER_CRC_LEN, 0, 0, 0, 0, 0
+ };
+ const uint32x4_t flow_mark_adj = { 0, 0, 0, rxq->mark * (-1) };
+
+ assert(rxq->sges_n == 0);
+ assert(rxq->cqe_n == rxq->elts_n);
+ cq = &(*rxq->cqes)[cq_idx];
+ rte_prefetch_non_temporal(cq);
+ rte_prefetch_non_temporal(cq + 1);
+ rte_prefetch_non_temporal(cq + 2);
+ rte_prefetch_non_temporal(cq + 3);
+ pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
+ /*
+ * Order of indexes:
+ * rq_ci >= cq_ci >= rq_pi
+ * Definition of indexes:
+ * rq_ci - cq_ci := # of buffers owned by HW (posted).
+ * cq_ci - rq_pi := # of buffers not returned to app (decompressed).
+ * N - (rq_ci - rq_pi) := # of buffers consumed (to be replenished).
+ */
+ repl_n = q_n - (rxq->rq_ci - rxq->rq_pi);
+ if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH)
+ mlx5_rx_replenish_bulk_mbuf(rxq, repl_n);
+ /* See if there're unreturned mbufs from compressed CQE. */
+ rcvd_pkt = rxq->cq_ci - rxq->rq_pi;
+ if (rcvd_pkt > 0) {
+ rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
+ rxq_copy_mbuf_v(rxq, pkts, rcvd_pkt);
+ rxq->rq_pi += rcvd_pkt;
+ pkts += rcvd_pkt;
+ }
+ elts_idx = rxq->rq_pi & q_mask;
+ elts = &(*rxq->elts)[elts_idx];
+ /* Not to overflow pkts array. */
+ pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP);
+ /* Not to cross queue end. */
+ pkts_n = RTE_MIN(pkts_n, q_n - elts_idx);
+ if (!pkts_n)
+ return rcvd_pkt;
+ /* At this point, there shouldn't be any remained packets. */
+ assert(rxq->rq_pi == rxq->cq_ci);
+ /*
+ * Note that vectors have reverse order - {v3, v2, v1, v0}, because
+ * there's no instruction to count trailing zeros. __builtin_clzl() is
+ * used instead.
+ *
+ * A. copy 4 mbuf pointers from elts ring to returing pkts.
+ * B. load 64B CQE and extract necessary fields
+ * Final 16bytes cqes[] extracted from original 64bytes CQE has the
+ * following structure:
+ * struct {
+ * uint16_t hdr_type_etc;
+ * uint8_t pkt_info;
+ * uint8_t rsvd;
+ * uint16_t byte_cnt;
+ * uint16_t vlan_info;
+ * uint32_t rx_has_res;
+ * uint8_t flow_tag[3];
+ * uint8_t op_own;
+ * } c;
+ * C. fill in mbuf.
+ * D. get valid CQEs.
+ * E. find compressed CQE.
+ */
+ for (pos = 0;
+ pos < pkts_n;
+ pos += MLX5_VPMD_DESCS_PER_LOOP) {
+ uint16x4_t op_own;
+ uint16x4_t opcode, owner_mask, invalid_mask;
+ uint16x4_t comp_mask;
+ uint16x4_t mask;
+ uint16x4_t byte_cnt;
+ uint32x4_t ptype_info, flow_tag;
+ uint8_t *p0, *p1, *p2, *p3;
+ uint8_t *e0 = (void *)&elts[pos]->pkt_len;
+ uint8_t *e1 = (void *)&elts[pos + 1]->pkt_len;
+ uint8_t *e2 = (void *)&elts[pos + 2]->pkt_len;
+ uint8_t *e3 = (void *)&elts[pos + 3]->pkt_len;
+ void *elts_p = (void *)&elts[pos];
+ void *pkts_p = (void *)&pkts[pos];
+
+ /* A.0 do not cross the end of CQ. */
+ mask = vcreate_u16(pkts_n - pos < MLX5_VPMD_DESCS_PER_LOOP ?
+ -1UL >> ((pkts_n - pos) *
+ sizeof(uint16_t) * 8) : 0);
+ p0 = (void *)&cq[pos].pkt_info;
+ p1 = p0 + (pkts_n - pos > 1) * sizeof(struct mlx5_cqe);
+ p2 = p1 + (pkts_n - pos > 2) * sizeof(struct mlx5_cqe);
+ p3 = p2 + (pkts_n - pos > 3) * sizeof(struct mlx5_cqe);
+ /* Prefetch next 4 CQEs. */
+ if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) {
+ unsigned int next = pos + MLX5_VPMD_DESCS_PER_LOOP;
+ rte_prefetch_non_temporal(&cq[next]);
+ rte_prefetch_non_temporal(&cq[next + 1]);
+ rte_prefetch_non_temporal(&cq[next + 2]);
+ rte_prefetch_non_temporal(&cq[next + 3]);
+ }
+ __asm__ volatile (
+ /* B.1 (CQE 3) load a block having op_own. */
+ "ld1 {v19.16b}, [%[p3]] \n\t"
+ "sub %[p3], %[p3], #48 \n\t"
+ /* B.2 (CQE 3) load the rest blocks. */
+ "ld1 {v16.16b - v18.16b}, [%[p3]] \n\t"
+ /* B.3 (CQE 3) extract 16B fields. */
+ "tbl v23.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
+ /* B.4 (CQE 3) adjust CRC length. */
+ "sub v23.8h, v23.8h, %[crc_adj].8h \n\t"
+ /* B.1 (CQE 2) load a block having op_own. */
+ "ld1 {v19.16b}, [%[p2]] \n\t"
+ "sub %[p2], %[p2], #48 \n\t"
+ /* C.1 (CQE 3) generate final structure for mbuf. */
+ "tbl v15.16b, {v23.16b}, %[mb_shuf_m].16b \n\t"
+ /* B.2 (CQE 2) load the rest blocks. */
+ "ld1 {v16.16b - v18.16b}, [%[p2]] \n\t"
+ /* B.3 (CQE 2) extract 16B fields. */
+ "tbl v22.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
+ /* B.4 (CQE 2) adjust CRC length. */
+ "sub v22.8h, v22.8h, %[crc_adj].8h \n\t"
+ /* B.1 (CQE 1) load a block having op_own. */
+ "ld1 {v19.16b}, [%[p1]] \n\t"
+ "sub %[p1], %[p1], #48 \n\t"
+ /* C.1 (CQE 2) generate final structure for mbuf. */
+ "tbl v14.16b, {v22.16b}, %[mb_shuf_m].16b \n\t"
+ /* B.2 (CQE 1) load the rest blocks. */
+ "ld1 {v16.16b - v18.16b}, [%[p1]] \n\t"
+ /* B.3 (CQE 1) extract 16B fields. */
+ "tbl v21.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
+ /* B.4 (CQE 1) adjust CRC length. */
+ "sub v21.8h, v21.8h, %[crc_adj].8h \n\t"
+ /* B.1 (CQE 0) load a block having op_own. */
+ "ld1 {v19.16b}, [%[p0]] \n\t"
+ "sub %[p0], %[p0], #48 \n\t"
+ /* C.1 (CQE 1) generate final structure for mbuf. */
+ "tbl v13.16b, {v21.16b}, %[mb_shuf_m].16b \n\t"
+ /* B.2 (CQE 0) load the rest blocks. */
+ "ld1 {v16.16b - v18.16b}, [%[p0]] \n\t"
+ /* B.3 (CQE 0) extract 16B fields. */
+ "tbl v20.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t"
+ /* B.4 (CQE 0) adjust CRC length. */
+ "sub v20.8h, v20.8h, %[crc_adj].8h \n\t"
+ /* A.1 load mbuf pointers. */
+ "ld1 {v24.2d - v25.2d}, [%[elts_p]] \n\t"
+ /* D.1 extract op_own byte. */
+ "tbl %[op_own].8b, {v20.16b - v23.16b}, %[owner_shuf_m].8b \n\t"
+ /* C.2 (CQE 3) adjust flow mark. */
+ "add v15.4s, v15.4s, %[flow_mark_adj].4s \n\t"
+ /* C.3 (CQE 3) fill in mbuf - rx_descriptor_fields1. */
+ "st1 {v15.2d}, [%[e3]] \n\t"
+ /* C.2 (CQE 2) adjust flow mark. */
+ "add v14.4s, v14.4s, %[flow_mark_adj].4s \n\t"
+ /* C.3 (CQE 2) fill in mbuf - rx_descriptor_fields1. */
+ "st1 {v14.2d}, [%[e2]] \n\t"
+ /* C.1 (CQE 0) generate final structure for mbuf. */
+ "tbl v12.16b, {v20.16b}, %[mb_shuf_m].16b \n\t"
+ /* C.2 (CQE 1) adjust flow mark. */
+ "add v13.4s, v13.4s, %[flow_mark_adj].4s \n\t"
+ /* C.3 (CQE 1) fill in mbuf - rx_descriptor_fields1. */
+ "st1 {v13.2d}, [%[e1]] \n\t"
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ /* Extract byte_cnt. */
+ "tbl %[byte_cnt].8b, {v20.16b - v23.16b}, %[len_shuf_m].8b \n\t"
+#endif
+ /* Extract ptype_info. */
+ "tbl %[ptype_info].16b, {v20.16b - v23.16b}, %[ptype_shuf_m].16b \n\t"
+ /* Extract flow_tag. */
+ "tbl %[flow_tag].16b, {v20.16b - v23.16b}, %[ftag_shuf_m].16b \n\t"
+ /* A.2 copy mbuf pointers. */
+ "st1 {v24.2d - v25.2d}, [%[pkts_p]] \n\t"
+ /* C.2 (CQE 0) adjust flow mark. */
+ "add v12.4s, v12.4s, %[flow_mark_adj].4s \n\t"
+ /* C.3 (CQE 1) fill in mbuf - rx_descriptor_fields1. */
+ "st1 {v12.2d}, [%[e0]] \n\t"
+ :[op_own]"=&w"(op_own),
+ [byte_cnt]"=&w"(byte_cnt),
+ [ptype_info]"=&w"(ptype_info),
+ [flow_tag]"=&w"(flow_tag)
+ :[p3]"r"(p3 + 48), [p2]"r"(p2 + 48),
+ [p1]"r"(p1 + 48), [p0]"r"(p0 + 48),
+ [e3]"r"(e3), [e2]"r"(e2), [e1]"r"(e1), [e0]"r"(e0),
+ [elts_p]"r"(elts_p),
+ [pkts_p]"r"(pkts_p),
+ [cqe_shuf_m]"w"(cqe_shuf_m),
+ [mb_shuf_m]"w"(mb_shuf_m),
+ [owner_shuf_m]"w"(owner_shuf_m),
+ [len_shuf_m]"w"(len_shuf_m),
+ [ptype_shuf_m]"w"(ptype_shuf_m),
+ [ftag_shuf_m]"w"(ftag_shuf_m),
+ [crc_adj]"w"(crc_adj),
+ [flow_mark_adj]"w"(flow_mark_adj)
+ :"memory",
+ "v12", "v13", "v14", "v15",
+ "v16", "v17", "v18", "v19",
+ "v20", "v21", "v22", "v23",
+ "v24", "v25");
+ /* D.2 flip owner bit to mark CQEs from last round. */
+ owner_mask = vand_u16(op_own, owner_check);
+ owner_mask = vceq_u16(owner_mask, ownership);
+ /* D.3 get mask for invalidated CQEs. */
+ opcode = vand_u16(op_own, opcode_check);
+ invalid_mask = vceq_u16(opcode_check, opcode);
+ /* E.1 find compressed CQE format. */
+ comp_mask = vand_u16(op_own, format_check);
+ comp_mask = vceq_u16(comp_mask, format_check);
+ /* D.4 mask out beyond boundary. */
+ invalid_mask = vorr_u16(invalid_mask, mask);
+ /* D.5 merge invalid_mask with invalid owner. */
+ invalid_mask = vorr_u16(invalid_mask, owner_mask);
+ /* E.2 mask out invalid entries. */
+ comp_mask = vbic_u16(comp_mask, invalid_mask);
+ /* E.3 get the first compressed CQE. */
+ comp_idx = __builtin_clzl(vget_lane_u64(vreinterpret_u64_u16(
+ comp_mask), 0)) /
+ (sizeof(uint16_t) * 8);
+ /* D.6 mask out entries after the compressed CQE. */
+ mask = vcreate_u16(comp_idx < MLX5_VPMD_DESCS_PER_LOOP ?
+ -1UL >> (comp_idx * sizeof(uint16_t) * 8) :
+ 0);
+ invalid_mask = vorr_u16(invalid_mask, mask);
+ /* D.7 count non-compressed valid CQEs. */
+ n = __builtin_clzl(vget_lane_u64(vreinterpret_u64_u16(
+ invalid_mask), 0)) / (sizeof(uint16_t) * 8);
+ nocmp_n += n;
+ /* D.2 get the final invalid mask. */
+ mask = vcreate_u16(n < MLX5_VPMD_DESCS_PER_LOOP ?
+ -1UL >> (n * sizeof(uint16_t) * 8) : 0);
+ invalid_mask = vorr_u16(invalid_mask, mask);
+ /* D.3 check error in opcode. */
+ opcode = vceq_u16(resp_err_check, opcode);
+ opcode = vbic_u16(opcode, invalid_mask);
+ /* D.4 mark if any error is set */
+ rxq->pending_err |=
+ !!vget_lane_u64(vreinterpret_u64_u16(opcode), 0);
+ /* C.4 fill in mbuf - rearm_data and packet_type. */
+ rxq_cq_to_ptype_oflags_v(rxq, ptype_info, flow_tag,
+ opcode, &elts[pos]);
+ if (rxq->hw_timestamp) {
+ elts[pos]->timestamp =
+ rte_be_to_cpu_64(
+ container_of(p0, struct mlx5_cqe,
+ pkt_info)->timestamp);
+ elts[pos + 1]->timestamp =
+ rte_be_to_cpu_64(
+ container_of(p1, struct mlx5_cqe,
+ pkt_info)->timestamp);
+ elts[pos + 2]->timestamp =
+ rte_be_to_cpu_64(
+ container_of(p2, struct mlx5_cqe,
+ pkt_info)->timestamp);
+ elts[pos + 3]->timestamp =
+ rte_be_to_cpu_64(
+ container_of(p3, struct mlx5_cqe,
+ pkt_info)->timestamp);
+ }
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ /* Add up received bytes count. */
+ byte_cnt = vbic_u16(byte_cnt, invalid_mask);
+ rcvd_byte += vget_lane_u64(vpaddl_u32(vpaddl_u16(byte_cnt)), 0);
+#endif
+ /*
+ * Break the loop unless more valid CQE is expected, or if
+ * there's a compressed CQE.
+ */
+ if (n != MLX5_VPMD_DESCS_PER_LOOP)
+ break;
+ }
+ /* If no new CQE seen, return without updating cq_db. */
+ if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP))
+ return rcvd_pkt;
+ /* Update the consumer indexes for non-compressed CQEs. */
+ assert(nocmp_n <= pkts_n);
+ rxq->cq_ci += nocmp_n;
+ rxq->rq_pi += nocmp_n;
+ rcvd_pkt += nocmp_n;
+#ifdef MLX5_PMD_SOFT_COUNTERS
+ rxq->stats.ipackets += nocmp_n;
+ rxq->stats.ibytes += rcvd_byte;
+#endif
+ /* Decompress the last CQE if compressed. */
+ if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP && comp_idx == n) {
+ assert(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
+ rxq_cq_decompress_v(rxq, &cq[nocmp_n], &elts[nocmp_n]);
+ /* Return more packets if needed. */
+ if (nocmp_n < pkts_n) {
+ uint16_t n = rxq->cq_ci - rxq->rq_pi;
+
+ n = RTE_MIN(n, pkts_n - nocmp_n);
+ rxq_copy_mbuf_v(rxq, &pkts[nocmp_n], n);
+ rxq->rq_pi += n;
+ rcvd_pkt += n;
+ }
+ }
+ rte_compiler_barrier();
+ *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
+ return rcvd_pkt;
+}
+
+#endif /* RTE_PMD_MLX5_RXTX_VEC_NEON_H_ */
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index 8560f745..2b9f1601 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -31,38 +31,23 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#ifndef RTE_PMD_MLX5_RXTX_VEC_SSE_H_
+#define RTE_PMD_MLX5_RXTX_VEC_SSE_H_
+
#include <assert.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <smmintrin.h>
-/* Verbs header. */
-/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
-#include <infiniband/verbs.h>
-#include <infiniband/mlx5_hw.h>
-#include <infiniband/arch.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
-
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_mbuf.h>
#include <rte_mempool.h>
#include <rte_prefetch.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
#include "mlx5.h"
#include "mlx5_utils.h"
#include "mlx5_rxtx.h"
+#include "mlx5_rxtx_vec.h"
#include "mlx5_autoconf.h"
#include "mlx5_defs.h"
#include "mlx5_prm.h"
@@ -77,14 +62,14 @@
* @param txq
* Pointer to TX queue structure.
* @param dseg
- * Pointer to buffer descriptor to be writen.
+ * Pointer to buffer descriptor to be written.
* @param pkts
* Pointer to array of packets to be sent.
* @param n
* Number of packets to be filled.
*/
static inline void
-txq_wr_dseg_v(struct txq *txq, __m128i *dseg,
+txq_wr_dseg_v(struct mlx5_txq_data *txq, __m128i *dseg,
struct rte_mbuf **pkts, unsigned int n)
{
unsigned int pos;
@@ -119,85 +104,6 @@ txq_wr_dseg_v(struct txq *txq, __m128i *dseg,
}
/**
- * Count the number of continuous single segment packets. The first packet must
- * be a single segment packet.
- *
- * @param pkts
- * Pointer to array of packets.
- * @param pkts_n
- * Number of packets.
- *
- * @return
- * Number of continuous single segment packets.
- */
-static inline unsigned int
-txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n)
-{
- unsigned int pos;
-
- if (!pkts_n)
- return 0;
- assert(NB_SEGS(pkts[0]) == 1);
- /* Count the number of continuous single segment packets. */
- for (pos = 1; pos < pkts_n; ++pos)
- if (NB_SEGS(pkts[pos]) > 1)
- break;
- return pos;
-}
-
-/**
- * Count the number of packets having same ol_flags and calculate cs_flags.
- *
- * @param txq
- * Pointer to TX queue structure.
- * @param pkts
- * Pointer to array of packets.
- * @param pkts_n
- * Number of packets.
- * @param cs_flags
- * Pointer of flags to be returned.
- *
- * @return
- * Number of packets having same ol_flags.
- */
-static inline unsigned int
-txq_calc_offload(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
- uint8_t *cs_flags)
-{
- unsigned int pos;
- const uint64_t ol_mask =
- PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
- PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
- PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
-
- if (!pkts_n)
- return 0;
- /* Count the number of packets having same ol_flags. */
- for (pos = 1; pos < pkts_n; ++pos)
- if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask)
- break;
- /* Should open another MPW session for the rest. */
- if (pkts[0]->ol_flags &
- (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
- const uint64_t is_tunneled =
- pkts[0]->ol_flags &
- (PKT_TX_TUNNEL_GRE |
- PKT_TX_TUNNEL_VXLAN);
-
- if (is_tunneled && txq->tunnel_en) {
- *cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM |
- MLX5_ETH_WQE_L4_INNER_CSUM;
- if (pkts[0]->ol_flags & PKT_TX_OUTER_IP_CKSUM)
- *cs_flags |= MLX5_ETH_WQE_L3_CSUM;
- } else {
- *cs_flags = MLX5_ETH_WQE_L3_CSUM |
- MLX5_ETH_WQE_L4_CSUM;
- }
- }
- return pos;
-}
-
-/**
* Send multi-segmented packets until it encounters a single segment packet in
* the pkts list.
*
@@ -212,7 +118,8 @@ txq_calc_offload(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
* Number of packets successfully transmitted (<= pkts_n).
*/
static uint16_t
-txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
+txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
+ uint16_t pkts_n)
{
uint16_t elts_head = txq->elts_head;
const uint16_t elts_n = 1 << txq->elts_n;
@@ -257,13 +164,17 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (segs_n == 1 ||
max_elts < segs_n || max_wqe < 2)
break;
+ if (segs_n > MLX5_MPW_DSEG_MAX) {
+ txq->stats.oerrors++;
+ break;
+ }
wqe = &((volatile struct mlx5_wqe64 *)
txq->wqes)[wqe_ci & wq_mask].hdr;
if (buf->ol_flags &
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
- const uint64_t is_tunneled = buf->ol_flags &
- (PKT_TX_TUNNEL_GRE |
- PKT_TX_TUNNEL_VXLAN);
+ const uint64_t is_tunneled =
+ buf->ol_flags & (PKT_TX_TUNNEL_GRE |
+ PKT_TX_TUNNEL_VXLAN);
if (is_tunneled && txq->tunnel_en) {
cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM |
@@ -298,7 +209,7 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
/* Fill ESEG in the header. */
_mm_store_si128(t_wqe + 1,
_mm_set_epi16(0, 0, 0, 0,
- htons(len), cs_flags,
+ rte_cpu_to_be_16(len), cs_flags,
0, 0));
txq->wqe_ci = wqe_ci;
}
@@ -307,7 +218,7 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
txq->elts_comp += (uint16_t)(elts_head - txq->elts_head);
txq->elts_head = elts_head;
if (txq->elts_comp >= MLX5_TX_COMP_THRESH) {
- wqe->ctrl[2] = htonl(8);
+ wqe->ctrl[2] = rte_cpu_to_be_32(8);
wqe->ctrl[3] = txq->elts_head;
txq->elts_comp = 0;
++txq->cq_pi;
@@ -338,7 +249,7 @@ txq_scatter_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n)
* Number of packets successfully transmitted (<= pkts_n).
*/
static inline uint16_t
-txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
+txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
uint8_t cs_flags)
{
struct rte_mbuf **elts;
@@ -374,6 +285,7 @@ txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
max_elts = (elts_n - (elts_head - txq->elts_tail));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
+ assert(pkts_n <= MLX5_DSEG_MAX - nb_dword_in_hdr);
if (unlikely(!pkts_n))
return 0;
elts = &(*txq->elts)[elts_head & elts_m];
@@ -432,87 +344,11 @@ txq_burst_v(struct txq *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
txq->wqe_ci += (nb_dword_in_hdr + pkts_n + (nb_dword_per_wqebb - 1)) /
nb_dword_per_wqebb;
/* Ring QP doorbell. */
- mlx5_tx_dbrec(txq, wqe);
+ mlx5_tx_dbrec_cond_wmb(txq, wqe, pkts_n < MLX5_VPMD_TX_MAX_BURST);
return pkts_n;
}
/**
- * DPDK callback for vectorized TX.
- *
- * @param dpdk_txq
- * Generic pointer to TX queue structure.
- * @param[in] pkts
- * Packets to transmit.
- * @param pkts_n
- * Number of packets in array.
- *
- * @return
- * Number of packets successfully transmitted (<= pkts_n).
- */
-uint16_t
-mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
- uint16_t pkts_n)
-{
- struct txq *txq = (struct txq *)dpdk_txq;
- uint16_t nb_tx = 0;
-
- while (pkts_n > nb_tx) {
- uint16_t n;
- uint16_t ret;
-
- n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
- ret = txq_burst_v(txq, &pkts[nb_tx], n, 0);
- nb_tx += ret;
- if (!ret)
- break;
- }
- return nb_tx;
-}
-
-/**
- * DPDK callback for vectorized TX with multi-seg packets and offload.
- *
- * @param dpdk_txq
- * Generic pointer to TX queue structure.
- * @param[in] pkts
- * Packets to transmit.
- * @param pkts_n
- * Number of packets in array.
- *
- * @return
- * Number of packets successfully transmitted (<= pkts_n).
- */
-uint16_t
-mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
-{
- struct txq *txq = (struct txq *)dpdk_txq;
- uint16_t nb_tx = 0;
-
- while (pkts_n > nb_tx) {
- uint8_t cs_flags = 0;
- uint16_t n;
- uint16_t ret;
-
- /* Transmit multi-seg packets in the head of pkts list. */
- if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) &&
- NB_SEGS(pkts[nb_tx]) > 1)
- nb_tx += txq_scatter_v(txq,
- &pkts[nb_tx],
- pkts_n - nb_tx);
- n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
- if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS))
- n = txq_check_multiseg(&pkts[nb_tx], n);
- if (!(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
- n = txq_calc_offload(txq, &pkts[nb_tx], n, &cs_flags);
- ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
- nb_tx += ret;
- if (!ret)
- break;
- }
- return nb_tx;
-}
-
-/**
* Store free buffers to RX SW ring.
*
* @param rxq
@@ -523,7 +359,7 @@ mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
* Number of packets to be stored.
*/
static inline void
-rxq_copy_mbuf_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t n)
+rxq_copy_mbuf_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t n)
{
const uint16_t q_mask = (1 << rxq->elts_n) - 1;
struct rte_mbuf **elts = &(*rxq->elts)[rxq->rq_pi & q_mask];
@@ -541,41 +377,6 @@ rxq_copy_mbuf_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t n)
}
/**
- * Replenish buffers for RX in bulk.
- *
- * @param rxq
- * Pointer to RX queue structure.
- * @param n
- * Number of buffers to be replenished.
- */
-static inline void
-rxq_replenish_bulk_mbuf(struct rxq *rxq, uint16_t n)
-{
- const uint16_t q_n = 1 << rxq->elts_n;
- const uint16_t q_mask = q_n - 1;
- const uint16_t elts_idx = rxq->rq_ci & q_mask;
- struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
- volatile struct mlx5_wqe_data_seg *wq = &(*rxq->wqes)[elts_idx];
- unsigned int i;
-
- assert(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH);
- assert(n <= (uint16_t)(q_n - (rxq->rq_ci - rxq->rq_pi)));
- assert(MLX5_VPMD_RXQ_RPLNSH_THRESH > MLX5_VPMD_DESCS_PER_LOOP);
- /* Not to cross queue end. */
- n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
- if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
- rxq->stats.rx_nombuf += n;
- return;
- }
- for (i = 0; i < n; ++i)
- wq[i].addr = htonll((uintptr_t)elts[i]->buf_addr +
- RTE_PKTMBUF_HEADROOM);
- rxq->rq_ci += n;
- rte_wmb();
- *rxq->rq_db = htonl(rxq->rq_ci);
-}
-
-/**
* Decompress a compressed completion and fill in mbufs in RX SW ring with data
* extracted from the title completion descriptor.
*
@@ -588,8 +389,7 @@ rxq_replenish_bulk_mbuf(struct rxq *rxq, uint16_t n)
* the title completion descriptor to be copied to the rest of mbufs.
*/
static inline void
-rxq_cq_decompress_v(struct rxq *rxq,
- volatile struct mlx5_cqe *cq,
+rxq_cq_decompress_v(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cq,
struct rte_mbuf **elts)
{
volatile struct mlx5_mini_cqe8 *mcq = (void *)(cq + 1);
@@ -636,13 +436,6 @@ rxq_cq_decompress_v(struct rxq *rxq,
10, 11, 2, 3);
#endif
- /* Compile time sanity check for this function. */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
- offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
- offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) !=
- offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12);
/*
* A. load mCQEs into a 128bit register.
* B. store rearm data to mbuf.
@@ -747,12 +540,13 @@ rxq_cq_decompress_v(struct rxq *rxq,
* Pointer to array of packets to be filled.
*/
static inline void
-rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err,
- struct rte_mbuf **pkts)
+rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4],
+ __m128i op_err, struct rte_mbuf **pkts)
{
__m128i pinfo0, pinfo1;
__m128i pinfo, ptype;
- __m128i ol_flags = _mm_set1_epi32(rxq->rss_hash * PKT_RX_RSS_HASH);
+ __m128i ol_flags = _mm_set1_epi32(rxq->rss_hash * PKT_RX_RSS_HASH |
+ rxq->hw_timestamp * PKT_RX_TIMESTAMP);
__m128i cv_flags;
const __m128i zero = _mm_setzero_si128();
const __m128i ptype_mask =
@@ -769,17 +563,17 @@ rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err,
(uint8_t)(PKT_RX_L4_CKSUM_GOOD >> 1),
0,
(uint8_t)(PKT_RX_IP_CKSUM_GOOD >> 1),
- (uint8_t)(PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED),
+ (uint8_t)(PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED),
0);
const __m128i cv_mask =
_mm_set_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED,
+ PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED,
+ PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED,
+ PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED,
PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD |
- PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED);
+ PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED);
const __m128i mbuf_init =
_mm_loadl_epi64((__m128i *)&rxq->mbuf_initializer);
__m128i rearm0, rearm1, rearm2, rearm3;
@@ -853,15 +647,11 @@ rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err,
/* Merge to ol_flags. */
ol_flags = _mm_or_si128(ol_flags, cv_flags);
/* Merge mbuf_init and ol_flags. */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
- offsetof(struct rte_mbuf, rearm_data) + 8);
rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(ol_flags, 8), 0x30);
rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(ol_flags, 4), 0x30);
rearm2 = _mm_blend_epi16(mbuf_init, ol_flags, 0x30);
rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(ol_flags, 4), 0x30);
/* Write 8B rearm_data and 8B ol_flags. */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) !=
- RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16));
_mm_store_si128((__m128i *)&pkts[0]->rearm_data, rearm0);
_mm_store_si128((__m128i *)&pkts[1]->rearm_data, rearm1);
_mm_store_si128((__m128i *)&pkts[2]->rearm_data, rearm2);
@@ -869,51 +659,6 @@ rxq_cq_to_ptype_oflags_v(struct rxq *rxq, __m128i cqes[4], __m128i op_err,
}
/**
- * Skip error packets.
- *
- * @param rxq
- * Pointer to RX queue structure.
- * @param[out] pkts
- * Array to store received packets.
- * @param pkts_n
- * Maximum number of packets in array.
- *
- * @return
- * Number of packets successfully received (<= pkts_n).
- */
-static uint16_t
-rxq_handle_pending_error(struct rxq *rxq, struct rte_mbuf **pkts,
- uint16_t pkts_n)
-{
- uint16_t n = 0;
- unsigned int i;
-#ifdef MLX5_PMD_SOFT_COUNTERS
- uint32_t err_bytes = 0;
-#endif
-
- for (i = 0; i < pkts_n; ++i) {
- struct rte_mbuf *pkt = pkts[i];
-
- if (pkt->packet_type == RTE_PTYPE_ALL_MASK) {
-#ifdef MLX5_PMD_SOFT_COUNTERS
- err_bytes += PKT_LEN(pkt);
-#endif
- rte_pktmbuf_free_seg(pkt);
- } else {
- pkts[n++] = pkt;
- }
- }
- rxq->stats.idropped += (pkts_n - n);
-#ifdef MLX5_PMD_SOFT_COUNTERS
- /* Correct counters of errored completions. */
- rxq->stats.ipackets -= (pkts_n - n);
- rxq->stats.ibytes -= err_bytes;
-#endif
- rxq->pending_err = 0;
- return n;
-}
-
-/**
* Receive burst of packets. An errored completion also consumes a mbuf, but the
* packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
* before returning to application.
@@ -929,7 +674,7 @@ rxq_handle_pending_error(struct rxq *rxq, struct rte_mbuf **pkts,
* Number of packets received including errors (<= pkts_n).
*/
static inline uint16_t
-rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
+rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
const uint16_t q_n = 1 << rxq->cqe_n;
const uint16_t q_mask = q_n - 1;
@@ -984,26 +729,6 @@ rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
rxq->crc_present * ETHER_CRC_LEN);
const __m128i flow_mark_adj = _mm_set_epi32(rxq->mark * (-1), 0, 0, 0);
- /* Compile time sanity check for this function. */
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) !=
- offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4);
- RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) !=
- offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8);
- RTE_BUILD_BUG_ON(offsetof(struct mlx5_cqe, pkt_info) != 0);
- RTE_BUILD_BUG_ON(offsetof(struct mlx5_cqe, rx_hash_res) !=
- offsetof(struct mlx5_cqe, pkt_info) + 12);
- RTE_BUILD_BUG_ON(offsetof(struct mlx5_cqe, rsvd1) +
- sizeof(((struct mlx5_cqe *)0)->rsvd1) !=
- offsetof(struct mlx5_cqe, hdr_type_etc));
- RTE_BUILD_BUG_ON(offsetof(struct mlx5_cqe, vlan_info) !=
- offsetof(struct mlx5_cqe, hdr_type_etc) + 2);
- RTE_BUILD_BUG_ON(offsetof(struct mlx5_cqe, rsvd2) +
- sizeof(((struct mlx5_cqe *)0)->rsvd2) !=
- offsetof(struct mlx5_cqe, byte_cnt));
- RTE_BUILD_BUG_ON(offsetof(struct mlx5_cqe, sop_drop_qpn) !=
- RTE_ALIGN(offsetof(struct mlx5_cqe, sop_drop_qpn), 8));
- RTE_BUILD_BUG_ON(offsetof(struct mlx5_cqe, op_own) !=
- offsetof(struct mlx5_cqe, sop_drop_qpn) + 7);
assert(rxq->sges_n == 0);
assert(rxq->cqe_n == rxq->elts_n);
cq = &(*rxq->cqes)[cq_idx];
@@ -1022,7 +747,7 @@ rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
*/
repl_n = q_n - (rxq->rq_ci - rxq->rq_pi);
if (repl_n >= MLX5_VPMD_RXQ_RPLNSH_THRESH)
- rxq_replenish_bulk_mbuf(rxq, repl_n);
+ mlx5_rx_replenish_bulk_mbuf(rxq, repl_n);
/* See if there're unreturned mbufs from compressed CQE. */
rcvd_pkt = rxq->cq_ci - rxq->rq_pi;
if (rcvd_pkt > 0) {
@@ -1214,6 +939,16 @@ rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
rxq->pending_err |= !!_mm_cvtsi128_si64(opcode);
/* D.5 fill in mbuf - rearm_data and packet_type. */
rxq_cq_to_ptype_oflags_v(rxq, cqes, opcode, &pkts[pos]);
+ if (rxq->hw_timestamp) {
+ pkts[pos]->timestamp =
+ rte_be_to_cpu_64(cq[pos].timestamp);
+ pkts[pos + 1]->timestamp =
+ rte_be_to_cpu_64(cq[pos + p1].timestamp);
+ pkts[pos + 2]->timestamp =
+ rte_be_to_cpu_64(cq[pos + p2].timestamp);
+ pkts[pos + 3]->timestamp =
+ rte_be_to_cpu_64(cq[pos + p3].timestamp);
+ }
#ifdef MLX5_PMD_SOFT_COUNTERS
/* Add up received bytes count. */
byte_cnt = _mm_shuffle_epi8(op_own, len_shuf_mask);
@@ -1254,164 +989,9 @@ rxq_burst_v(struct rxq *rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
rcvd_pkt += n;
}
}
- rte_wmb();
- *rxq->cq_db = htonl(rxq->cq_ci);
+ rte_compiler_barrier();
+ *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
return rcvd_pkt;
}
-/**
- * DPDK callback for vectorized RX.
- *
- * @param dpdk_rxq
- * Generic pointer to RX queue structure.
- * @param[out] pkts
- * Array to store received packets.
- * @param pkts_n
- * Maximum number of packets in array.
- *
- * @return
- * Number of packets successfully received (<= pkts_n).
- */
-uint16_t
-mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
-{
- struct rxq *rxq = dpdk_rxq;
- uint16_t nb_rx;
-
- nb_rx = rxq_burst_v(rxq, pkts, pkts_n);
- if (unlikely(rxq->pending_err))
- nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx);
- return nb_rx;
-}
-
-/**
- * Check Tx queue flags are set for raw vectorized Tx.
- *
- * @param priv
- * Pointer to private structure.
- *
- * @return
- * 1 if supported, negative errno value if not.
- */
-int __attribute__((cold))
-priv_check_raw_vec_tx_support(struct priv *priv)
-{
- uint16_t i;
-
- /* All the configured queues should support. */
- for (i = 0; i < priv->txqs_n; ++i) {
- struct txq *txq = (*priv->txqs)[i];
-
- if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) ||
- !(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS))
- break;
- }
- if (i != priv->txqs_n)
- return -ENOTSUP;
- return 1;
-}
-
-/**
- * Check a device can support vectorized TX.
- *
- * @param priv
- * Pointer to private structure.
- *
- * @return
- * 1 if supported, negative errno value if not.
- */
-int __attribute__((cold))
-priv_check_vec_tx_support(struct priv *priv)
-{
- if (!priv->tx_vec_en ||
- priv->txqs_n > MLX5_VPMD_MIN_TXQS ||
- priv->mps != MLX5_MPW_ENHANCED ||
- priv->tso)
- return -ENOTSUP;
- return 1;
-}
-
-/**
- * Check a RX queue can support vectorized RX.
- *
- * @param rxq
- * Pointer to RX queue.
- *
- * @return
- * 1 if supported, negative errno value if not.
- */
-int __attribute__((cold))
-rxq_check_vec_support(struct rxq *rxq)
-{
- struct rxq_ctrl *ctrl = container_of(rxq, struct rxq_ctrl, rxq);
-
- if (!ctrl->priv->rx_vec_en || rxq->sges_n != 0)
- return -ENOTSUP;
- return 1;
-}
-
-/**
- * Check a device can support vectorized RX.
- *
- * @param priv
- * Pointer to private structure.
- *
- * @return
- * 1 if supported, negative errno value if not.
- */
-int __attribute__((cold))
-priv_check_vec_rx_support(struct priv *priv)
-{
- uint16_t i;
-
- if (!priv->rx_vec_en)
- return -ENOTSUP;
- /* All the configured queues should support. */
- for (i = 0; i < priv->rxqs_n; ++i) {
- struct rxq *rxq = (*priv->rxqs)[i];
-
- if (rxq_check_vec_support(rxq) < 0)
- break;
- }
- if (i != priv->rxqs_n)
- return -ENOTSUP;
- return 1;
-}
-
-/**
- * Prepare for vectorized RX.
- *
- * @param priv
- * Pointer to private structure.
- */
-void
-priv_prep_vec_rx_function(struct priv *priv)
-{
- uint16_t i;
-
- for (i = 0; i < priv->rxqs_n; ++i) {
- struct rxq *rxq = (*priv->rxqs)[i];
- struct rte_mbuf *mbuf_init = &rxq->fake_mbuf;
- const uint16_t desc = 1 << rxq->elts_n;
- int j;
-
- assert(rxq->elts_n == rxq->cqe_n);
- /* Initialize default rearm_data for vPMD. */
- mbuf_init->data_off = RTE_PKTMBUF_HEADROOM;
- rte_mbuf_refcnt_set(mbuf_init, 1);
- mbuf_init->nb_segs = 1;
- mbuf_init->port = rxq->port_id;
- /*
- * prevent compiler reordering:
- * rearm_data covers previous fields.
- */
- rte_compiler_barrier();
- rxq->mbuf_initializer =
- *(uint64_t *)&mbuf_init->rearm_data;
- /* Padding with a fake mbuf for vectorized Rx. */
- for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j)
- (*rxq->elts)[desc + j] = &rxq->fake_mbuf;
- /* Mark that it need to be cleaned up for rxq_alloc_elts(). */
- rxq->trim_elts = 1;
- }
-}
+#endif /* RTE_PMD_MLX5_RXTX_VEC_SSE_H_ */
diff --git a/drivers/net/mlx5/mlx5_socket.c b/drivers/net/mlx5/mlx5_socket.c
new file mode 100644
index 00000000..5cd1ab80
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_socket.c
@@ -0,0 +1,294 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright 2016 6WIND S.A.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of 6WIND S.A. nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#define _GNU_SOURCE
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "mlx5.h"
+#include "mlx5_utils.h"
+
+/**
+ * Initialise the socket to communicate with the secondary process
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_socket_init(struct priv *priv)
+{
+ struct sockaddr_un sun = {
+ .sun_family = AF_UNIX,
+ };
+ int ret;
+ int flags;
+ struct stat file_stat;
+
+ /*
+ * Initialise the socket to communicate with the secondary
+ * process.
+ */
+ ret = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ret < 0) {
+ WARN("secondary process not supported: %s", strerror(errno));
+ return ret;
+ }
+ priv->primary_socket = ret;
+ flags = fcntl(priv->primary_socket, F_GETFL, 0);
+ if (flags == -1)
+ goto out;
+ ret = fcntl(priv->primary_socket, F_SETFL, flags | O_NONBLOCK);
+ if (ret < 0)
+ goto out;
+ snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
+ MLX5_DRIVER_NAME, priv->primary_socket);
+ ret = stat(sun.sun_path, &file_stat);
+ if (!ret)
+ claim_zero(remove(sun.sun_path));
+ ret = bind(priv->primary_socket, (const struct sockaddr *)&sun,
+ sizeof(sun));
+ if (ret < 0) {
+ WARN("cannot bind socket, secondary process not supported: %s",
+ strerror(errno));
+ goto close;
+ }
+ ret = listen(priv->primary_socket, 0);
+ if (ret < 0) {
+ WARN("Secondary process not supported: %s", strerror(errno));
+ goto close;
+ }
+ return ret;
+close:
+ remove(sun.sun_path);
+out:
+ claim_zero(close(priv->primary_socket));
+ priv->primary_socket = 0;
+ return -(ret);
+}
+
+/**
+ * Un-Initialise the socket to communicate with the secondary process
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * 0 on success, errno value on failure.
+ */
+int
+priv_socket_uninit(struct priv *priv)
+{
+ MKSTR(path, "/var/tmp/%s_%d", MLX5_DRIVER_NAME, priv->primary_socket);
+ claim_zero(close(priv->primary_socket));
+ priv->primary_socket = 0;
+ claim_zero(remove(path));
+ return 0;
+}
+
+/**
+ * Handle socket interrupts.
+ *
+ * @param priv
+ * Pointer to private structure.
+ */
+void
+priv_socket_handle(struct priv *priv)
+{
+ int conn_sock;
+ int ret = 0;
+ struct cmsghdr *cmsg = NULL;
+ struct ucred *cred = NULL;
+ char buf[CMSG_SPACE(sizeof(struct ucred))] = { 0 };
+ char vbuf[1024] = { 0 };
+ struct iovec io = {
+ .iov_base = vbuf,
+ .iov_len = sizeof(*vbuf),
+ };
+ struct msghdr msg = {
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ .msg_control = buf,
+ .msg_controllen = sizeof(buf),
+ };
+ int *fd;
+
+ /* Accept the connection from the client. */
+ conn_sock = accept(priv->primary_socket, NULL, NULL);
+ if (conn_sock < 0) {
+ WARN("connection failed: %s", strerror(errno));
+ return;
+ }
+ ret = setsockopt(conn_sock, SOL_SOCKET, SO_PASSCRED, &(int){1},
+ sizeof(int));
+ if (ret < 0) {
+ WARN("cannot change socket options");
+ goto out;
+ }
+ ret = recvmsg(conn_sock, &msg, MSG_WAITALL);
+ if (ret < 0) {
+ WARN("received an empty message: %s", strerror(errno));
+ goto out;
+ }
+ /* Expect to receive credentials only. */
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ WARN("no message");
+ goto out;
+ }
+ if ((cmsg->cmsg_type == SCM_CREDENTIALS) &&
+ (cmsg->cmsg_len >= sizeof(*cred))) {
+ cred = (struct ucred *)CMSG_DATA(cmsg);
+ assert(cred != NULL);
+ }
+ cmsg = CMSG_NXTHDR(&msg, cmsg);
+ if (cmsg != NULL) {
+ WARN("Message wrongly formatted");
+ goto out;
+ }
+ /* Make sure all the ancillary data was received and valid. */
+ if ((cred == NULL) || (cred->uid != getuid()) ||
+ (cred->gid != getgid())) {
+ WARN("wrong credentials");
+ goto out;
+ }
+ /* Set-up the ancillary data. */
+ cmsg = CMSG_FIRSTHDR(&msg);
+ assert(cmsg != NULL);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(priv->ctx->cmd_fd));
+ fd = (int *)CMSG_DATA(cmsg);
+ *fd = priv->ctx->cmd_fd;
+ ret = sendmsg(conn_sock, &msg, 0);
+ if (ret < 0)
+ WARN("cannot send response");
+out:
+ close(conn_sock);
+}
+
+/**
+ * Connect to the primary process.
+ *
+ * @param[in] priv
+ * Pointer to private structure.
+ *
+ * @return
+ * fd on success, negative errno value on failure.
+ */
+int
+priv_socket_connect(struct priv *priv)
+{
+ struct sockaddr_un sun = {
+ .sun_family = AF_UNIX,
+ };
+ int socket_fd;
+ int *fd = NULL;
+ int ret;
+ struct ucred *cred;
+ char buf[CMSG_SPACE(sizeof(*cred))] = { 0 };
+ char vbuf[1024] = { 0 };
+ struct iovec io = {
+ .iov_base = vbuf,
+ .iov_len = sizeof(*vbuf),
+ };
+ struct msghdr msg = {
+ .msg_control = buf,
+ .msg_controllen = sizeof(buf),
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ };
+ struct cmsghdr *cmsg;
+
+ ret = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (ret < 0) {
+ WARN("cannot connect to primary");
+ return ret;
+ }
+ socket_fd = ret;
+ snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d",
+ MLX5_DRIVER_NAME, priv->primary_socket);
+ ret = connect(socket_fd, (const struct sockaddr *)&sun, sizeof(sun));
+ if (ret < 0) {
+ WARN("cannot connect to primary");
+ goto out;
+ }
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ DEBUG("cannot get first message");
+ goto out;
+ }
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_CREDENTIALS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(*cred));
+ cred = (struct ucred *)CMSG_DATA(cmsg);
+ if (cred == NULL) {
+ DEBUG("no credentials received");
+ goto out;
+ }
+ cred->pid = getpid();
+ cred->uid = getuid();
+ cred->gid = getgid();
+ ret = sendmsg(socket_fd, &msg, MSG_DONTWAIT);
+ if (ret < 0) {
+ WARN("cannot send credentials to primary: %s",
+ strerror(errno));
+ goto out;
+ }
+ ret = recvmsg(socket_fd, &msg, MSG_WAITALL);
+ if (ret <= 0) {
+ WARN("no message from primary: %s", strerror(errno));
+ goto out;
+ }
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg == NULL) {
+ WARN("No file descriptor received");
+ goto out;
+ }
+ fd = (int *)CMSG_DATA(cmsg);
+ if (*fd <= 0) {
+ WARN("no file descriptor received: %s", strerror(errno));
+ ret = *fd;
+ goto out;
+ }
+ ret = *fd;
+out:
+ close(socket_fd);
+ return ret;
+}
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 703f48c3..5e225d37 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -34,16 +34,9 @@
#include <linux/sockios.h>
#include <linux/ethtool.h>
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_ethdev.h>
#include <rte_common.h>
#include <rte_malloc.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
#include "mlx5.h"
#include "mlx5_rxtx.h"
@@ -325,7 +318,7 @@ priv_xstats_reset(struct priv *priv)
* @param[out] stats
* Stats structure output buffer.
*/
-void
+int
mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
{
struct priv *priv = mlx5_get_priv(dev);
@@ -336,7 +329,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
priv_lock(priv);
/* Add software counters. */
for (i = 0; (i != priv->rxqs_n); ++i) {
- struct rxq *rxq = (*priv->rxqs)[i];
+ struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
if (rxq == NULL)
continue;
@@ -357,7 +350,7 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
tmp.rx_nombuf += rxq->stats.rx_nombuf;
}
for (i = 0; (i != priv->txqs_n); ++i) {
- struct txq *txq = (*priv->txqs)[i];
+ struct mlx5_txq_data *txq = (*priv->txqs)[i];
if (txq == NULL)
continue;
@@ -367,19 +360,20 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
tmp.q_opackets[idx] += txq->stats.opackets;
tmp.q_obytes[idx] += txq->stats.obytes;
#endif
- tmp.q_errors[idx] += txq->stats.odropped;
+ tmp.q_errors[idx] += txq->stats.oerrors;
}
#ifdef MLX5_PMD_SOFT_COUNTERS
tmp.opackets += txq->stats.opackets;
tmp.obytes += txq->stats.obytes;
#endif
- tmp.oerrors += txq->stats.odropped;
+ tmp.oerrors += txq->stats.oerrors;
}
#ifndef MLX5_PMD_SOFT_COUNTERS
/* FIXME: retrieve and add hardware counters. */
#endif
*stats = tmp;
priv_unlock(priv);
+ return 0;
}
/**
@@ -442,8 +436,10 @@ mlx5_xstats_get(struct rte_eth_dev *dev,
priv_lock(priv);
stats_n = priv_ethtool_get_stats_n(priv);
- if (stats_n < 0)
+ if (stats_n < 0) {
+ priv_unlock(priv);
return -1;
+ }
if (xstats_ctrl->stats_n != stats_n)
priv_xstats_init(priv);
ret = priv_xstats_get(priv, stats);
@@ -468,10 +464,11 @@ mlx5_xstats_reset(struct rte_eth_dev *dev)
priv_lock(priv);
stats_n = priv_ethtool_get_stats_n(priv);
if (stats_n < 0)
- return;
+ goto unlock;
if (xstats_ctrl->stats_n != stats_n)
priv_xstats_init(priv);
priv_xstats_reset(priv);
+unlock:
priv_unlock(priv);
}
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index 595a9e06..5de2d026 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -30,23 +30,90 @@
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <unistd.h>
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_interrupts.h>
#include <rte_alarm.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
#include "mlx5.h"
#include "mlx5_rxtx.h"
#include "mlx5_utils.h"
+static void
+priv_txq_stop(struct priv *priv)
+{
+ unsigned int i;
+
+ for (i = 0; i != priv->txqs_n; ++i)
+ mlx5_priv_txq_release(priv, i);
+}
+
+static int
+priv_txq_start(struct priv *priv)
+{
+ unsigned int i;
+ int ret = 0;
+
+ /* Add memory regions to Tx queues. */
+ for (i = 0; i != priv->txqs_n; ++i) {
+ unsigned int idx = 0;
+ struct mlx5_mr *mr;
+ struct mlx5_txq_ctrl *txq_ctrl = mlx5_priv_txq_get(priv, i);
+
+ if (!txq_ctrl)
+ continue;
+ LIST_FOREACH(mr, &priv->mr, next)
+ priv_txq_mp2mr_reg(priv, &txq_ctrl->txq, mr->mp, idx++);
+ txq_alloc_elts(txq_ctrl);
+ txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, i);
+ if (!txq_ctrl->ibv) {
+ ret = ENOMEM;
+ goto error;
+ }
+ }
+ return -ret;
+error:
+ priv_txq_stop(priv);
+ return -ret;
+}
+
+static void
+priv_rxq_stop(struct priv *priv)
+{
+ unsigned int i;
+
+ for (i = 0; i != priv->rxqs_n; ++i)
+ mlx5_priv_rxq_release(priv, i);
+}
+
+static int
+priv_rxq_start(struct priv *priv)
+{
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; i != priv->rxqs_n; ++i) {
+ struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_priv_rxq_get(priv, i);
+
+ if (!rxq_ctrl)
+ continue;
+ ret = rxq_alloc_elts(rxq_ctrl);
+ if (ret)
+ goto error;
+ rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, i);
+ if (!rxq_ctrl->ibv) {
+ ret = ENOMEM;
+ goto error;
+ }
+ }
+ return -ret;
+error:
+ priv_rxq_stop(priv);
+ return -ret;
+}
+
/**
* DPDK callback to start the device.
*
@@ -62,36 +129,47 @@ int
mlx5_dev_start(struct rte_eth_dev *dev)
{
struct priv *priv = dev->data->dev_private;
+ struct mlx5_mr *mr = NULL;
int err;
if (mlx5_is_secondary())
return -E_RTE_SECONDARY;
+ dev->data->dev_started = 1;
priv_lock(priv);
- if (priv->started) {
- priv_unlock(priv);
- return 0;
+ err = priv_flow_create_drop_queue(priv);
+ if (err) {
+ ERROR("%p: Drop queue allocation failed: %s",
+ (void *)dev, strerror(err));
+ goto error;
}
- /* Update Rx/Tx callback. */
- priv_select_tx_function(priv);
- priv_select_rx_function(priv);
DEBUG("%p: allocating and configuring hash RX queues", (void *)dev);
- err = priv_create_hash_rxqs(priv);
- if (!err)
- err = priv_rehash_flows(priv);
- if (!err)
- priv->started = 1;
- else {
- ERROR("%p: an error occurred while configuring hash RX queues:"
+ rte_mempool_walk(mlx5_mp2mr_iter, priv);
+ err = priv_txq_start(priv);
+ if (err) {
+ ERROR("%p: TXQ allocation failed: %s",
+ (void *)dev, strerror(err));
+ goto error;
+ }
+ /* Update send callback. */
+ priv_dev_select_tx_function(priv, dev);
+ err = priv_rxq_start(priv);
+ if (err) {
+ ERROR("%p: RXQ allocation failed: %s",
+ (void *)dev, strerror(err));
+ goto error;
+ }
+ /* Update receive callback. */
+ priv_dev_select_rx_function(priv, dev);
+ err = priv_dev_traffic_enable(priv, dev);
+ if (err) {
+ ERROR("%p: an error occurred while configuring control flows:"
" %s",
(void *)priv, strerror(err));
goto error;
}
- if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE)
- priv_fdir_enable(priv);
- err = priv_flow_start(priv);
+ err = priv_flow_start(priv, &priv->flows);
if (err) {
- priv->started = 0;
ERROR("%p: an error occurred while configuring flows:"
" %s",
(void *)priv, strerror(err));
@@ -109,10 +187,14 @@ mlx5_dev_start(struct rte_eth_dev *dev)
return 0;
error:
/* Rollback. */
- priv_special_flow_disable_all(priv);
- priv_mac_addrs_disable(priv);
- priv_destroy_hash_rxqs(priv);
- priv_flow_stop(priv);
+ dev->data->dev_started = 0;
+ for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
+ priv_mr_release(priv, mr);
+ priv_flow_stop(priv, &priv->flows);
+ priv_dev_traffic_disable(priv, dev);
+ priv_txq_stop(priv);
+ priv_rxq_stop(priv);
+ priv_flow_delete_drop_queue(priv);
priv_unlock(priv);
return -err;
}
@@ -129,23 +211,215 @@ void
mlx5_dev_stop(struct rte_eth_dev *dev)
{
struct priv *priv = dev->data->dev_private;
+ struct mlx5_mr *mr;
if (mlx5_is_secondary())
return;
priv_lock(priv);
- if (!priv->started) {
- priv_unlock(priv);
- return;
- }
+ dev->data->dev_started = 0;
+ /* Prevent crashes when queues are still in use. */
+ dev->rx_pkt_burst = removed_rx_burst;
+ dev->tx_pkt_burst = removed_tx_burst;
+ rte_wmb();
+ usleep(1000 * priv->rxqs_n);
DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev);
- priv_special_flow_disable_all(priv);
- priv_mac_addrs_disable(priv);
- priv_destroy_hash_rxqs(priv);
- priv_fdir_disable(priv);
- priv_flow_stop(priv);
+ priv_flow_stop(priv, &priv->flows);
+ priv_dev_traffic_disable(priv, dev);
priv_rx_intr_vec_disable(priv);
priv_dev_interrupt_handler_uninstall(priv, dev);
- priv->started = 0;
+ priv_txq_stop(priv);
+ priv_rxq_stop(priv);
+ for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr))
+ priv_mr_release(priv, mr);
+ priv_flow_delete_drop_queue(priv);
+ priv_unlock(priv);
+}
+
+/**
+ * Enable traffic flows configured by control plane
+ *
+ * @param priv
+ * Pointer to Ethernet device private data.
+ * @param dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * 0 on success.
+ */
+int
+priv_dev_traffic_enable(struct priv *priv, struct rte_eth_dev *dev)
+{
+ struct rte_flow_item_eth bcast = {
+ .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ };
+ struct rte_flow_item_eth ipv6_multi_spec = {
+ .dst.addr_bytes = "\x33\x33\x00\x00\x00\x00",
+ };
+ struct rte_flow_item_eth ipv6_multi_mask = {
+ .dst.addr_bytes = "\xff\xff\x00\x00\x00\x00",
+ };
+ struct rte_flow_item_eth unicast = {
+ .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+ };
+ struct rte_flow_item_eth unicast_mask = {
+ .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ };
+ const unsigned int vlan_filter_n = priv->vlan_filter_n;
+ const struct ether_addr cmp = {
+ .addr_bytes = "\x00\x00\x00\x00\x00\x00",
+ };
+ unsigned int i;
+ unsigned int j;
+ int ret;
+
+ if (priv->isolated)
+ return 0;
+ if (dev->data->promiscuous) {
+ struct rte_flow_item_eth promisc = {
+ .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+ .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+ .type = 0,
+ };
+
+ claim_zero(mlx5_ctrl_flow(dev, &promisc, &promisc));
+ return 0;
+ }
+ if (dev->data->all_multicast) {
+ struct rte_flow_item_eth multicast = {
+ .dst.addr_bytes = "\x01\x00\x00\x00\x00\x00",
+ .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+ .type = 0,
+ };
+
+ claim_zero(mlx5_ctrl_flow(dev, &multicast, &multicast));
+ } else {
+ /* Add broadcast/multicast flows. */
+ for (i = 0; i != vlan_filter_n; ++i) {
+ uint16_t vlan = priv->vlan_filter[i];
+
+ struct rte_flow_item_vlan vlan_spec = {
+ .tci = rte_cpu_to_be_16(vlan),
+ };
+ struct rte_flow_item_vlan vlan_mask = {
+ .tci = 0xffff,
+ };
+
+ ret = mlx5_ctrl_flow_vlan(dev, &bcast, &bcast,
+ &vlan_spec, &vlan_mask);
+ if (ret)
+ goto error;
+ ret = mlx5_ctrl_flow_vlan(dev, &ipv6_multi_spec,
+ &ipv6_multi_mask,
+ &vlan_spec, &vlan_mask);
+ if (ret)
+ goto error;
+ }
+ if (!vlan_filter_n) {
+ ret = mlx5_ctrl_flow(dev, &bcast, &bcast);
+ if (ret)
+ goto error;
+ ret = mlx5_ctrl_flow(dev, &ipv6_multi_spec,
+ &ipv6_multi_mask);
+ if (ret)
+ goto error;
+ }
+ }
+ /* Add MAC address flows. */
+ for (i = 0; i != MLX5_MAX_MAC_ADDRESSES; ++i) {
+ struct ether_addr *mac = &dev->data->mac_addrs[i];
+
+ if (!memcmp(mac, &cmp, sizeof(*mac)))
+ continue;
+ memcpy(&unicast.dst.addr_bytes,
+ mac->addr_bytes,
+ ETHER_ADDR_LEN);
+ for (j = 0; j != vlan_filter_n; ++j) {
+ uint16_t vlan = priv->vlan_filter[j];
+
+ struct rte_flow_item_vlan vlan_spec = {
+ .tci = rte_cpu_to_be_16(vlan),
+ };
+ struct rte_flow_item_vlan vlan_mask = {
+ .tci = 0xffff,
+ };
+
+ ret = mlx5_ctrl_flow_vlan(dev, &unicast,
+ &unicast_mask,
+ &vlan_spec,
+ &vlan_mask);
+ if (ret)
+ goto error;
+ }
+ if (!vlan_filter_n) {
+ ret = mlx5_ctrl_flow(dev, &unicast,
+ &unicast_mask);
+ if (ret)
+ goto error;
+ }
+ }
+ return 0;
+error:
+ return rte_errno;
+}
+
+
+/**
+ * Disable traffic flows configured by control plane
+ *
+ * @param priv
+ * Pointer to Ethernet device private data.
+ * @param dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * 0 on success.
+ */
+int
+priv_dev_traffic_disable(struct priv *priv, struct rte_eth_dev *dev)
+{
+ (void)dev;
+ priv_flow_flush(priv, &priv->ctrl_flows);
+ return 0;
+}
+
+/**
+ * Restart traffic flows configured by control plane
+ *
+ * @param priv
+ * Pointer to Ethernet device private data.
+ * @param dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * 0 on success.
+ */
+int
+priv_dev_traffic_restart(struct priv *priv, struct rte_eth_dev *dev)
+{
+ if (dev->data->dev_started) {
+ priv_dev_traffic_disable(priv, dev);
+ priv_dev_traffic_enable(priv, dev);
+ }
+ return 0;
+}
+
+/**
+ * Restart traffic flows configured by control plane
+ *
+ * @param dev
+ * Pointer to Ethernet device structure.
+ *
+ * @return
+ * 0 on success.
+ */
+int
+mlx5_traffic_restart(struct rte_eth_dev *dev)
+{
+ struct priv *priv = dev->data->dev_private;
+
+ priv_lock(priv);
+ priv_dev_traffic_restart(priv, dev);
priv_unlock(priv);
+ return 0;
}
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 98aaa7ca..9c5860ff 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -36,6 +36,8 @@
#include <errno.h>
#include <string.h>
#include <stdint.h>
+#include <unistd.h>
+#include <sys/mman.h>
/* Verbs header. */
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
@@ -47,17 +49,10 @@
#pragma GCC diagnostic error "-Wpedantic"
#endif
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_mbuf.h>
#include <rte_malloc.h>
#include <rte_ethdev.h>
#include <rte_common.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
#include "mlx5_utils.h"
#include "mlx5_defs.h"
@@ -70,23 +65,15 @@
*
* @param txq_ctrl
* Pointer to TX queue structure.
- * @param elts_n
- * Number of elements to allocate.
*/
-static void
-txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
+void
+txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl)
{
+ const unsigned int elts_n = 1 << txq_ctrl->txq.elts_n;
unsigned int i;
for (i = 0; (i != elts_n); ++i)
(*txq_ctrl->txq.elts)[i] = NULL;
- for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) {
- volatile struct mlx5_wqe64 *wqe =
- (volatile struct mlx5_wqe64 *)
- txq_ctrl->txq.wqes + i;
-
- memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe));
- }
DEBUG("%p: allocated and configured %u WRs", (void *)txq_ctrl, elts_n);
txq_ctrl->txq.elts_head = 0;
txq_ctrl->txq.elts_tail = 0;
@@ -100,7 +87,7 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n)
* Pointer to TX queue structure.
*/
static void
-txq_free_elts(struct txq_ctrl *txq_ctrl)
+txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl)
{
const uint16_t elts_n = 1 << txq_ctrl->txq.elts_n;
const uint16_t elts_m = elts_n - 1;
@@ -129,155 +116,231 @@ txq_free_elts(struct txq_ctrl *txq_ctrl)
}
/**
- * Clean up a TX queue.
+ * DPDK callback to configure a TX queue.
*
- * Destroy objects, free allocated memory and reset the structure for reuse.
+ * @param dev
+ * Pointer to Ethernet device structure.
+ * @param idx
+ * TX queue index.
+ * @param desc
+ * Number of descriptors to configure in queue.
+ * @param socket
+ * NUMA socket on which memory must be allocated.
+ * @param[in] conf
+ * Thresholds parameters.
*
- * @param txq_ctrl
- * Pointer to TX queue structure.
+ * @return
+ * 0 on success, negative errno value on failure.
+ */
+int
+mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+ unsigned int socket, const struct rte_eth_txconf *conf)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_txq_data *txq = (*priv->txqs)[idx];
+ struct mlx5_txq_ctrl *txq_ctrl =
+ container_of(txq, struct mlx5_txq_ctrl, txq);
+ int ret = 0;
+
+ if (mlx5_is_secondary())
+ return -E_RTE_SECONDARY;
+
+ priv_lock(priv);
+ if (desc <= MLX5_TX_COMP_THRESH) {
+ WARN("%p: number of descriptors requested for TX queue %u"
+ " must be higher than MLX5_TX_COMP_THRESH, using"
+ " %u instead of %u",
+ (void *)dev, idx, MLX5_TX_COMP_THRESH + 1, desc);
+ desc = MLX5_TX_COMP_THRESH + 1;
+ }
+ if (!rte_is_power_of_2(desc)) {
+ desc = 1 << log2above(desc);
+ WARN("%p: increased number of descriptors in TX queue %u"
+ " to the next power of two (%d)",
+ (void *)dev, idx, desc);
+ }
+ DEBUG("%p: configuring queue %u for %u descriptors",
+ (void *)dev, idx, desc);
+ if (idx >= priv->txqs_n) {
+ ERROR("%p: queue index out of range (%u >= %u)",
+ (void *)dev, idx, priv->txqs_n);
+ priv_unlock(priv);
+ return -EOVERFLOW;
+ }
+ if (!mlx5_priv_txq_releasable(priv, idx)) {
+ ret = EBUSY;
+ ERROR("%p: unable to release queue index %u",
+ (void *)dev, idx);
+ goto out;
+ }
+ mlx5_priv_txq_release(priv, idx);
+ txq_ctrl = mlx5_priv_txq_new(priv, idx, desc, socket, conf);
+ if (!txq_ctrl) {
+ ERROR("%p: unable to allocate queue index %u",
+ (void *)dev, idx);
+ ret = ENOMEM;
+ goto out;
+ }
+ DEBUG("%p: adding TX queue %p to list",
+ (void *)dev, (void *)txq_ctrl);
+ (*priv->txqs)[idx] = &txq_ctrl->txq;
+out:
+ priv_unlock(priv);
+ return -ret;
+}
+
+/**
+ * DPDK callback to release a TX queue.
+ *
+ * @param dpdk_txq
+ * Generic TX queue pointer.
*/
void
-txq_cleanup(struct txq_ctrl *txq_ctrl)
+mlx5_tx_queue_release(void *dpdk_txq)
{
- size_t i;
-
- DEBUG("cleaning up %p", (void *)txq_ctrl);
- txq_free_elts(txq_ctrl);
- if (txq_ctrl->qp != NULL)
- claim_zero(ibv_destroy_qp(txq_ctrl->qp));
- if (txq_ctrl->cq != NULL)
- claim_zero(ibv_destroy_cq(txq_ctrl->cq));
- for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
- if (txq_ctrl->txq.mp2mr[i].mr == NULL)
+ struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
+ struct mlx5_txq_ctrl *txq_ctrl;
+ struct priv *priv;
+ unsigned int i;
+
+ if (mlx5_is_secondary())
+ return;
+
+ if (txq == NULL)
+ return;
+ txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+ priv = txq_ctrl->priv;
+ priv_lock(priv);
+ for (i = 0; (i != priv->txqs_n); ++i)
+ if ((*priv->txqs)[i] == txq) {
+ DEBUG("%p: removing TX queue %p from list",
+ (void *)priv->dev, (void *)txq_ctrl);
+ mlx5_priv_txq_release(priv, i);
break;
- claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[i].mr));
- }
- memset(txq_ctrl, 0, sizeof(*txq_ctrl));
+ }
+ priv_unlock(priv);
}
+
/**
- * Initialize TX queue.
+ * Map locally UAR used in Tx queues for BlueFlame doorbell.
*
- * @param tmpl
- * Pointer to TX queue control template.
- * @param txq_ctrl
- * Pointer to TX queue control.
+ * @param[in] priv
+ * Pointer to private structure.
+ * @param fd
+ * Verbs file descriptor to map UAR pages.
*
* @return
* 0 on success, errno value on failure.
*/
-static inline int
-txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl)
+int
+priv_tx_uar_remap(struct priv *priv, int fd)
{
- struct mlx5_qp *qp = to_mqp(tmpl->qp);
- struct ibv_cq *ibcq = tmpl->cq;
- struct ibv_mlx5_cq_info cq_info;
+ unsigned int i, j;
+ uintptr_t pages[priv->txqs_n];
+ unsigned int pages_n = 0;
+ uintptr_t uar_va;
+ void *addr;
+ struct mlx5_txq_data *txq;
+ struct mlx5_txq_ctrl *txq_ctrl;
+ int already_mapped;
+ size_t page_size = sysconf(_SC_PAGESIZE);
- if (ibv_mlx5_exp_get_cq_info(ibcq, &cq_info)) {
- ERROR("Unable to query CQ info. check your OFED.");
- return ENOTSUP;
- }
- if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
- ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
- "it should be set to %u", RTE_CACHE_LINE_SIZE);
- return EINVAL;
+ memset(pages, 0, priv->txqs_n * sizeof(uintptr_t));
+ /*
+ * As rdma-core, UARs are mapped in size of OS page size.
+ * Use aligned address to avoid duplicate mmap.
+ * Ref to libmlx5 function: mlx5_init_context()
+ */
+ for (i = 0; i != priv->txqs_n; ++i) {
+ txq = (*priv->txqs)[i];
+ txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+ uar_va = (uintptr_t)txq_ctrl->txq.bf_reg;
+ uar_va = RTE_ALIGN_FLOOR(uar_va, page_size);
+ already_mapped = 0;
+ for (j = 0; j != pages_n; ++j) {
+ if (pages[j] == uar_va) {
+ already_mapped = 1;
+ break;
+ }
+ }
+ if (already_mapped)
+ continue;
+ pages[pages_n++] = uar_va;
+ addr = mmap((void *)uar_va, page_size,
+ PROT_WRITE, MAP_FIXED | MAP_SHARED, fd,
+ txq_ctrl->uar_mmap_offset);
+ if (addr != (void *)uar_va) {
+ ERROR("call to mmap failed on UAR for txq %d\n", i);
+ return -1;
+ }
}
- tmpl->txq.cqe_n = log2above(cq_info.cqe_cnt);
- tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8;
- tmpl->txq.wqes = qp->gen_data.sqstart;
- tmpl->txq.wqe_n = log2above(qp->sq.wqe_cnt);
- tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR];
- tmpl->txq.bf_reg = qp->gen_data.bf->reg;
- tmpl->txq.cq_db = cq_info.dbrec;
- tmpl->txq.cqes =
- (volatile struct mlx5_cqe (*)[])
- (uintptr_t)cq_info.buf;
- tmpl->txq.elts =
- (struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])
- ((uintptr_t)txq_ctrl + sizeof(*txq_ctrl));
return 0;
}
/**
- * Configure a TX queue.
+ * Create the Tx queue Verbs object.
*
- * @param dev
- * Pointer to Ethernet device structure.
- * @param txq_ctrl
- * Pointer to TX queue structure.
- * @param desc
- * Number of descriptors to configure in queue.
- * @param socket
- * NUMA socket on which memory must be allocated.
- * @param[in] conf
- * Thresholds parameters.
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * Queue index in DPDK Rx queue array
*
* @return
- * 0 on success, errno value on failure.
+ * The Verbs object initialised if it can be created.
*/
-int
-txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
- uint16_t desc, unsigned int socket,
- const struct rte_eth_txconf *conf)
+struct mlx5_txq_ibv*
+mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx)
{
- struct priv *priv = mlx5_get_priv(dev);
- struct txq_ctrl tmpl = {
- .priv = priv,
- .socket = socket,
- };
+ struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
+ struct mlx5_txq_ctrl *txq_ctrl =
+ container_of(txq_data, struct mlx5_txq_ctrl, txq);
+ struct mlx5_txq_ibv tmpl;
+ struct mlx5_txq_ibv *txq_ibv;
union {
- struct ibv_exp_qp_init_attr init;
- struct ibv_exp_cq_init_attr cq;
- struct ibv_exp_qp_attr mod;
- struct ibv_exp_cq_attr cq_attr;
+ struct ibv_qp_init_attr_ex init;
+ struct ibv_cq_init_attr_ex cq;
+ struct ibv_qp_attr mod;
+ struct ibv_cq_ex cq_attr;
} attr;
unsigned int cqe_n;
- const unsigned int max_tso_inline = ((MLX5_MAX_TSO_HEADER +
- (RTE_CACHE_LINE_SIZE - 1)) /
- RTE_CACHE_LINE_SIZE);
+ struct mlx5dv_qp qp = { .comp_mask = MLX5DV_QP_MASK_UAR_MMAP_OFFSET };
+ struct mlx5dv_cq cq_info;
+ struct mlx5dv_obj obj;
+ const int desc = 1 << txq_data->elts_n;
int ret = 0;
+ assert(txq_data);
if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) {
- ret = ENOTSUP;
ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
goto error;
}
- tmpl.txq.flags = conf->txq_flags;
- assert(desc > MLX5_TX_COMP_THRESH);
- tmpl.txq.elts_n = log2above(desc);
- if (priv->mps == MLX5_MPW_ENHANCED)
- tmpl.txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
+ memset(&tmpl, 0, sizeof(struct mlx5_txq_ibv));
/* MRs will be registered in mp2mr[] later. */
- attr.cq = (struct ibv_exp_cq_init_attr){
+ attr.cq = (struct ibv_cq_init_attr_ex){
.comp_mask = 0,
};
cqe_n = ((desc / MLX5_TX_COMP_THRESH) - 1) ?
((desc / MLX5_TX_COMP_THRESH) - 1) : 1;
if (priv->mps == MLX5_MPW_ENHANCED)
cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;
- tmpl.cq = ibv_exp_create_cq(priv->ctx,
- cqe_n,
- NULL, NULL, 0, &attr.cq);
+ tmpl.cq = ibv_create_cq(priv->ctx, cqe_n, NULL, NULL, 0);
if (tmpl.cq == NULL) {
- ret = ENOMEM;
- ERROR("%p: CQ creation failure: %s",
- (void *)dev, strerror(ret));
+ ERROR("%p: CQ creation failure", (void *)txq_ctrl);
goto error;
}
- DEBUG("priv->device_attr.max_qp_wr is %d",
- priv->device_attr.max_qp_wr);
- DEBUG("priv->device_attr.max_sge is %d",
- priv->device_attr.max_sge);
- attr.init = (struct ibv_exp_qp_init_attr){
+ attr.init = (struct ibv_qp_init_attr_ex){
/* CQ to be associated with the send queue. */
.send_cq = tmpl.cq,
/* CQ to be associated with the receive queue. */
.recv_cq = tmpl.cq,
.cap = {
/* Max number of outstanding WRs. */
- .max_send_wr = ((priv->device_attr.max_qp_wr < desc) ?
- priv->device_attr.max_qp_wr :
- desc),
+ .max_send_wr =
+ ((priv->device_attr.orig_attr.max_qp_wr <
+ desc) ?
+ priv->device_attr.orig_attr.max_qp_wr :
+ desc),
/*
* Max number of scatter/gather elements in a WR,
* must be 1 to prevent libmlx5 from trying to affect
@@ -288,124 +351,204 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
.max_send_sge = 1,
},
.qp_type = IBV_QPT_RAW_PACKET,
- /* Do *NOT* enable this, completions events are managed per
- * TX burst. */
+ /*
+ * Do *NOT* enable this, completions events are managed per
+ * Tx burst.
+ */
.sq_sig_all = 0,
.pd = priv->pd,
- .comp_mask = IBV_EXP_QP_INIT_ATTR_PD,
+ .comp_mask = IBV_QP_INIT_ATTR_PD,
};
- if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
- tmpl.txq.max_inline =
- ((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
- RTE_CACHE_LINE_SIZE);
- tmpl.txq.inline_en = 1;
- /* TSO and MPS can't be enabled concurrently. */
- assert(!priv->tso || !priv->mps);
- if (priv->mps == MLX5_MPW_ENHANCED) {
- tmpl.txq.inline_max_packet_sz =
- priv->inline_max_packet_sz;
- /* To minimize the size of data set, avoid requesting
- * too large WQ.
- */
- attr.init.cap.max_inline_data =
- ((RTE_MIN(priv->txq_inline,
- priv->inline_max_packet_sz) +
- (RTE_CACHE_LINE_SIZE - 1)) /
- RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;
- } else if (priv->tso) {
- int inline_diff = tmpl.txq.max_inline - max_tso_inline;
-
- /*
- * Adjust inline value as Verbs aggregates
- * tso_inline and txq_inline fields.
- */
- attr.init.cap.max_inline_data = inline_diff > 0 ?
- inline_diff *
- RTE_CACHE_LINE_SIZE :
- 0;
- } else {
- attr.init.cap.max_inline_data =
- tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE;
- }
+ if (txq_data->inline_en)
+ attr.init.cap.max_inline_data = txq_ctrl->max_inline_data;
+ if (txq_data->tso_en) {
+ attr.init.max_tso_header = txq_ctrl->max_tso_header;
+ attr.init.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER;
}
- if (priv->tso) {
- attr.init.max_tso_header =
- max_tso_inline * RTE_CACHE_LINE_SIZE;
- attr.init.comp_mask |= IBV_EXP_QP_INIT_ATTR_MAX_TSO_HEADER;
- tmpl.txq.max_inline = RTE_MAX(tmpl.txq.max_inline,
- max_tso_inline);
- tmpl.txq.tso_en = 1;
- }
- if (priv->tunnel_en)
- tmpl.txq.tunnel_en = 1;
- tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init);
+ tmpl.qp = ibv_create_qp_ex(priv->ctx, &attr.init);
if (tmpl.qp == NULL) {
- ret = (errno ? errno : EINVAL);
- ERROR("%p: QP creation failure: %s",
- (void *)dev, strerror(ret));
+ ERROR("%p: QP creation failure", (void *)txq_ctrl);
goto error;
}
- DEBUG("TX queue capabilities: max_send_wr=%u, max_send_sge=%u,"
- " max_inline_data=%u",
- attr.init.cap.max_send_wr,
- attr.init.cap.max_send_sge,
- attr.init.cap.max_inline_data);
- attr.mod = (struct ibv_exp_qp_attr){
+ attr.mod = (struct ibv_qp_attr){
/* Move the QP to this state. */
.qp_state = IBV_QPS_INIT,
/* Primary port number. */
.port_num = priv->port
};
- ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod,
- (IBV_EXP_QP_STATE | IBV_EXP_QP_PORT));
+ ret = ibv_modify_qp(tmpl.qp, &attr.mod, (IBV_QP_STATE | IBV_QP_PORT));
if (ret) {
- ERROR("%p: QP state to IBV_QPS_INIT failed: %s",
- (void *)dev, strerror(ret));
+ ERROR("%p: QP state to IBV_QPS_INIT failed", (void *)txq_ctrl);
goto error;
}
- ret = txq_setup(&tmpl, txq_ctrl);
- if (ret) {
- ERROR("%p: cannot initialize TX queue structure: %s",
- (void *)dev, strerror(ret));
- goto error;
- }
- txq_alloc_elts(&tmpl, desc);
- attr.mod = (struct ibv_exp_qp_attr){
+ attr.mod = (struct ibv_qp_attr){
.qp_state = IBV_QPS_RTR
};
- ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE);
+ ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
if (ret) {
- ERROR("%p: QP state to IBV_QPS_RTR failed: %s",
- (void *)dev, strerror(ret));
+ ERROR("%p: QP state to IBV_QPS_RTR failed", (void *)txq_ctrl);
goto error;
}
attr.mod.qp_state = IBV_QPS_RTS;
- ret = ibv_exp_modify_qp(tmpl.qp, &attr.mod, IBV_EXP_QP_STATE);
+ ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE);
if (ret) {
- ERROR("%p: QP state to IBV_QPS_RTS failed: %s",
- (void *)dev, strerror(ret));
+ ERROR("%p: QP state to IBV_QPS_RTS failed", (void *)txq_ctrl);
goto error;
}
- /* Clean up txq in case we're reinitializing it. */
- DEBUG("%p: cleaning-up old txq just in case", (void *)txq_ctrl);
- txq_cleanup(txq_ctrl);
- *txq_ctrl = tmpl;
- DEBUG("%p: txq updated with %p", (void *)txq_ctrl, (void *)&tmpl);
- /* Pre-register known mempools. */
- rte_mempool_walk(txq_mp2mr_iter, txq_ctrl);
- assert(ret == 0);
- return 0;
+ txq_ibv = rte_calloc_socket(__func__, 1, sizeof(struct mlx5_txq_ibv), 0,
+ txq_ctrl->socket);
+ if (!txq_ibv) {
+ ERROR("%p: cannot allocate memory", (void *)txq_ctrl);
+ goto error;
+ }
+ obj.cq.in = tmpl.cq;
+ obj.cq.out = &cq_info;
+ obj.qp.in = tmpl.qp;
+ obj.qp.out = &qp;
+ ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP);
+ if (ret != 0)
+ goto error;
+ if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) {
+ ERROR("Wrong MLX5_CQE_SIZE environment variable value: "
+ "it should be set to %u", RTE_CACHE_LINE_SIZE);
+ goto error;
+ }
+ txq_data->cqe_n = log2above(cq_info.cqe_cnt);
+ txq_data->qp_num_8s = tmpl.qp->qp_num << 8;
+ txq_data->wqes = qp.sq.buf;
+ txq_data->wqe_n = log2above(qp.sq.wqe_cnt);
+ txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR];
+ txq_data->bf_reg = qp.bf.reg;
+ txq_data->cq_db = cq_info.dbrec;
+ txq_data->cqes =
+ (volatile struct mlx5_cqe (*)[])
+ (uintptr_t)cq_info.buf;
+ txq_data->cq_ci = 0;
+ txq_data->cq_pi = 0;
+ txq_data->wqe_ci = 0;
+ txq_data->wqe_pi = 0;
+ txq_ibv->qp = tmpl.qp;
+ txq_ibv->cq = tmpl.cq;
+ rte_atomic32_inc(&txq_ibv->refcnt);
+ if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) {
+ txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset;
+ } else {
+ ERROR("Failed to retrieve UAR info, invalid libmlx5.so version");
+ goto error;
+ }
+ DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
+ (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt));
+ LIST_INSERT_HEAD(&priv->txqsibv, txq_ibv, next);
+ return txq_ibv;
error:
- txq_cleanup(&tmpl);
- assert(ret > 0);
+ if (tmpl.cq)
+ claim_zero(ibv_destroy_cq(tmpl.cq));
+ if (tmpl.qp)
+ claim_zero(ibv_destroy_qp(tmpl.qp));
+ return NULL;
+}
+
+/**
+ * Get an Tx queue Verbs object.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * Queue index in DPDK Rx queue array
+ *
+ * @return
+ * The Verbs object if it exists.
+ */
+struct mlx5_txq_ibv*
+mlx5_priv_txq_ibv_get(struct priv *priv, uint16_t idx)
+{
+ struct mlx5_txq_ctrl *txq_ctrl;
+
+ if (idx >= priv->txqs_n)
+ return NULL;
+ if (!(*priv->txqs)[idx])
+ return NULL;
+ txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
+ if (txq_ctrl->ibv) {
+ rte_atomic32_inc(&txq_ctrl->ibv->refcnt);
+ DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
+ (void *)txq_ctrl->ibv,
+ rte_atomic32_read(&txq_ctrl->ibv->refcnt));
+ }
+ return txq_ctrl->ibv;
+}
+
+/**
+ * Release an Tx verbs queue object.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param txq_ibv
+ * Verbs Tx queue object.
+ *
+ * @return
+ * 0 on success, errno on failure.
+ */
+int
+mlx5_priv_txq_ibv_release(struct priv *priv, struct mlx5_txq_ibv *txq_ibv)
+{
+ (void)priv;
+ assert(txq_ibv);
+ DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv,
+ (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt));
+ if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) {
+ claim_zero(ibv_destroy_qp(txq_ibv->qp));
+ claim_zero(ibv_destroy_cq(txq_ibv->cq));
+ LIST_REMOVE(txq_ibv, next);
+ rte_free(txq_ibv);
+ return 0;
+ }
+ return EBUSY;
+}
+
+/**
+ * Return true if a single reference exists on the object.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param txq_ibv
+ * Verbs Tx queue object.
+ */
+int
+mlx5_priv_txq_ibv_releasable(struct priv *priv, struct mlx5_txq_ibv *txq_ibv)
+{
+ (void)priv;
+ assert(txq_ibv);
+ return (rte_atomic32_read(&txq_ibv->refcnt) == 1);
+}
+
+/**
+ * Verify the Verbs Tx queue list is empty
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_txq_ibv_verify(struct priv *priv)
+{
+ int ret = 0;
+ struct mlx5_txq_ibv *txq_ibv;
+
+ LIST_FOREACH(txq_ibv, &priv->txqsibv, next) {
+ DEBUG("%p: Verbs Tx queue %p still referenced", (void *)priv,
+ (void *)txq_ibv);
+ ++ret;
+ }
return ret;
}
/**
- * DPDK callback to configure a TX queue.
+ * Create a DPDK Tx queue.
*
- * @param dev
- * Pointer to Ethernet device structure.
+ * @param priv
+ * Pointer to private structure.
* @param idx
* TX queue index.
* @param desc
@@ -413,164 +556,236 @@ error:
* @param socket
* NUMA socket on which memory must be allocated.
* @param[in] conf
- * Thresholds parameters.
+ * Thresholds parameters.
*
* @return
- * 0 on success, negative errno value on failure.
+ * A DPDK queue object on success.
*/
-int
-mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
- unsigned int socket, const struct rte_eth_txconf *conf)
+struct mlx5_txq_ctrl*
+mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc,
+ unsigned int socket,
+ const struct rte_eth_txconf *conf)
{
- struct priv *priv = dev->data->dev_private;
- struct txq *txq = (*priv->txqs)[idx];
- struct txq_ctrl *txq_ctrl = container_of(txq, struct txq_ctrl, txq);
- int ret;
+ const unsigned int max_tso_inline =
+ ((MLX5_MAX_TSO_HEADER + (RTE_CACHE_LINE_SIZE - 1)) /
+ RTE_CACHE_LINE_SIZE);
+ struct mlx5_txq_ctrl *tmpl;
- if (mlx5_is_secondary())
- return -E_RTE_SECONDARY;
+ tmpl = rte_calloc_socket("TXQ", 1,
+ sizeof(*tmpl) +
+ desc * sizeof(struct rte_mbuf *),
+ 0, socket);
+ if (!tmpl)
+ return NULL;
+ assert(desc > MLX5_TX_COMP_THRESH);
+ tmpl->txq.flags = conf->txq_flags;
+ tmpl->priv = priv;
+ tmpl->socket = socket;
+ tmpl->txq.elts_n = log2above(desc);
+ if (priv->mps == MLX5_MPW_ENHANCED)
+ tmpl->txq.mpw_hdr_dseg = priv->mpw_hdr_dseg;
+ /* MRs will be registered in mp2mr[] later. */
+ DEBUG("priv->device_attr.max_qp_wr is %d",
+ priv->device_attr.orig_attr.max_qp_wr);
+ DEBUG("priv->device_attr.max_sge is %d",
+ priv->device_attr.orig_attr.max_sge);
+ if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) {
+ unsigned int ds_cnt;
- priv_lock(priv);
- if (desc <= MLX5_TX_COMP_THRESH) {
- WARN("%p: number of descriptors requested for TX queue %u"
- " must be higher than MLX5_TX_COMP_THRESH, using"
- " %u instead of %u",
- (void *)dev, idx, MLX5_TX_COMP_THRESH + 1, desc);
- desc = MLX5_TX_COMP_THRESH + 1;
- }
- if (!rte_is_power_of_2(desc)) {
- desc = 1 << log2above(desc);
- WARN("%p: increased number of descriptors in TX queue %u"
- " to the next power of two (%d)",
- (void *)dev, idx, desc);
- }
- DEBUG("%p: configuring queue %u for %u descriptors",
- (void *)dev, idx, desc);
- if (idx >= priv->txqs_n) {
- ERROR("%p: queue index out of range (%u >= %u)",
- (void *)dev, idx, priv->txqs_n);
- priv_unlock(priv);
- return -EOVERFLOW;
- }
- if (txq != NULL) {
- DEBUG("%p: reusing already allocated queue index %u (%p)",
- (void *)dev, idx, (void *)txq);
- if (priv->started) {
- priv_unlock(priv);
- return -EEXIST;
- }
- (*priv->txqs)[idx] = NULL;
- txq_cleanup(txq_ctrl);
- /* Resize if txq size is changed. */
- if (txq_ctrl->txq.elts_n != log2above(desc)) {
- txq_ctrl = rte_realloc(txq_ctrl,
- sizeof(*txq_ctrl) +
- desc * sizeof(struct rte_mbuf *),
- RTE_CACHE_LINE_SIZE);
- if (!txq_ctrl) {
- ERROR("%p: unable to reallocate queue index %u",
- (void *)dev, idx);
- priv_unlock(priv);
- return -ENOMEM;
- }
+ tmpl->txq.max_inline =
+ ((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
+ RTE_CACHE_LINE_SIZE);
+ tmpl->txq.inline_en = 1;
+ /* TSO and MPS can't be enabled concurrently. */
+ assert(!priv->tso || !priv->mps);
+ if (priv->mps == MLX5_MPW_ENHANCED) {
+ tmpl->txq.inline_max_packet_sz =
+ priv->inline_max_packet_sz;
+ /* To minimize the size of data set, avoid requesting
+ * too large WQ.
+ */
+ tmpl->max_inline_data =
+ ((RTE_MIN(priv->txq_inline,
+ priv->inline_max_packet_sz) +
+ (RTE_CACHE_LINE_SIZE - 1)) /
+ RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;
+ } else if (priv->tso) {
+ int inline_diff = tmpl->txq.max_inline - max_tso_inline;
+
+ /*
+ * Adjust inline value as Verbs aggregates
+ * tso_inline and txq_inline fields.
+ */
+ tmpl->max_inline_data = inline_diff > 0 ?
+ inline_diff *
+ RTE_CACHE_LINE_SIZE :
+ 0;
+ } else {
+ tmpl->max_inline_data =
+ tmpl->txq.max_inline * RTE_CACHE_LINE_SIZE;
}
- } else {
- txq_ctrl =
- rte_calloc_socket("TXQ", 1,
- sizeof(*txq_ctrl) +
- desc * sizeof(struct rte_mbuf *),
- 0, socket);
- if (txq_ctrl == NULL) {
- ERROR("%p: unable to allocate queue index %u",
- (void *)dev, idx);
- priv_unlock(priv);
- return -ENOMEM;
+ /*
+ * Check if the inline size is too large in a way which
+ * can make the WQE DS to overflow.
+ * Considering in calculation:
+ * WQE CTRL (1 DS)
+ * WQE ETH (1 DS)
+ * Inline part (N DS)
+ */
+ ds_cnt = 2 + (tmpl->txq.max_inline / MLX5_WQE_DWORD_SIZE);
+ if (ds_cnt > MLX5_DSEG_MAX) {
+ unsigned int max_inline = (MLX5_DSEG_MAX - 2) *
+ MLX5_WQE_DWORD_SIZE;
+
+ max_inline = max_inline - (max_inline %
+ RTE_CACHE_LINE_SIZE);
+ WARN("txq inline is too large (%d) setting it to "
+ "the maximum possible: %d\n",
+ priv->txq_inline, max_inline);
+ tmpl->txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE;
}
}
- ret = txq_ctrl_setup(dev, txq_ctrl, desc, socket, conf);
- if (ret)
- rte_free(txq_ctrl);
- else {
- txq_ctrl->txq.stats.idx = idx;
- DEBUG("%p: adding TX queue %p to list",
- (void *)dev, (void *)txq_ctrl);
- (*priv->txqs)[idx] = &txq_ctrl->txq;
+ if (priv->tso) {
+ tmpl->max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE;
+ tmpl->txq.max_inline = RTE_MAX(tmpl->txq.max_inline,
+ max_tso_inline);
+ tmpl->txq.tso_en = 1;
}
- priv_unlock(priv);
- return -ret;
+ if (priv->tunnel_en)
+ tmpl->txq.tunnel_en = 1;
+ tmpl->txq.elts =
+ (struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1);
+ tmpl->txq.stats.idx = idx;
+ rte_atomic32_inc(&tmpl->refcnt);
+ DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
+ (void *)tmpl, rte_atomic32_read(&tmpl->refcnt));
+ LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next);
+ return tmpl;
}
/**
- * DPDK callback to release a TX queue.
+ * Get a Tx queue.
*
- * @param dpdk_txq
- * Generic TX queue pointer.
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * TX queue index.
+ *
+ * @return
+ * A pointer to the queue if it exists.
*/
-void
-mlx5_tx_queue_release(void *dpdk_txq)
+struct mlx5_txq_ctrl*
+mlx5_priv_txq_get(struct priv *priv, uint16_t idx)
{
- struct txq *txq = (struct txq *)dpdk_txq;
- struct txq_ctrl *txq_ctrl;
- struct priv *priv;
- unsigned int i;
+ struct mlx5_txq_ctrl *ctrl = NULL;
- if (mlx5_is_secondary())
- return;
+ if ((*priv->txqs)[idx]) {
+ ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl,
+ txq);
+ unsigned int i;
- if (txq == NULL)
- return;
- txq_ctrl = container_of(txq, struct txq_ctrl, txq);
- priv = txq_ctrl->priv;
- priv_lock(priv);
- for (i = 0; (i != priv->txqs_n); ++i)
- if ((*priv->txqs)[i] == txq) {
- DEBUG("%p: removing TX queue %p from list",
- (void *)priv->dev, (void *)txq_ctrl);
- (*priv->txqs)[i] = NULL;
- break;
+ mlx5_priv_txq_ibv_get(priv, idx);
+ for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
+ struct mlx5_mr *mr = NULL;
+
+ (void)mr;
+ if (ctrl->txq.mp2mr[i]) {
+ mr = priv_mr_get(priv, ctrl->txq.mp2mr[i]->mp);
+ assert(mr);
+ }
}
- txq_cleanup(txq_ctrl);
- rte_free(txq_ctrl);
- priv_unlock(priv);
+ rte_atomic32_inc(&ctrl->refcnt);
+ DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
+ (void *)ctrl, rte_atomic32_read(&ctrl->refcnt));
+ }
+ return ctrl;
}
/**
- * DPDK callback for TX in secondary processes.
- *
- * This function configures all queues from primary process information
- * if necessary before reverting to the normal TX burst callback.
+ * Release a Tx queue.
*
- * @param dpdk_txq
- * Generic pointer to TX queue structure.
- * @param[in] pkts
- * Packets to transmit.
- * @param pkts_n
- * Number of packets in array.
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * TX queue index.
*
* @return
- * Number of packets successfully transmitted (<= pkts_n).
+ * 0 on success, errno on failure.
*/
-uint16_t
-mlx5_tx_burst_secondary_setup(void *dpdk_txq, struct rte_mbuf **pkts,
- uint16_t pkts_n)
+int
+mlx5_priv_txq_release(struct priv *priv, uint16_t idx)
{
- struct txq *txq = dpdk_txq;
- struct txq_ctrl *txq_ctrl = container_of(txq, struct txq_ctrl, txq);
- struct priv *priv = mlx5_secondary_data_setup(txq_ctrl->priv);
- struct priv *primary_priv;
- unsigned int index;
+ unsigned int i;
+ struct mlx5_txq_ctrl *txq;
- if (priv == NULL)
+ if (!(*priv->txqs)[idx])
return 0;
- primary_priv =
- mlx5_secondary_data[priv->dev->data->port_id].primary_priv;
- /* Look for queue index in both private structures. */
- for (index = 0; index != priv->txqs_n; ++index)
- if (((*primary_priv->txqs)[index] == txq) ||
- ((*priv->txqs)[index] == txq))
- break;
- if (index == priv->txqs_n)
+ txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
+ DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv,
+ (void *)txq, rte_atomic32_read(&txq->refcnt));
+ if (txq->ibv) {
+ int ret;
+
+ ret = mlx5_priv_txq_ibv_release(priv, txq->ibv);
+ if (!ret)
+ txq->ibv = NULL;
+ }
+ for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) {
+ if (txq->txq.mp2mr[i]) {
+ priv_mr_release(priv, txq->txq.mp2mr[i]);
+ txq->txq.mp2mr[i] = NULL;
+ }
+ }
+ if (rte_atomic32_dec_and_test(&txq->refcnt)) {
+ txq_free_elts(txq);
+ LIST_REMOVE(txq, next);
+ rte_free(txq);
+ (*priv->txqs)[idx] = NULL;
return 0;
- txq = (*priv->txqs)[index];
- return priv->dev->tx_pkt_burst(txq, pkts, pkts_n);
+ }
+ return EBUSY;
+}
+
+/**
+ * Verify if the queue can be released.
+ *
+ * @param priv
+ * Pointer to private structure.
+ * @param idx
+ * TX queue index.
+ *
+ * @return
+ * 1 if the queue can be released.
+ */
+int
+mlx5_priv_txq_releasable(struct priv *priv, uint16_t idx)
+{
+ struct mlx5_txq_ctrl *txq;
+
+ if (!(*priv->txqs)[idx])
+ return -1;
+ txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq);
+ return (rte_atomic32_read(&txq->refcnt) == 1);
+}
+
+/**
+ * Verify the Tx Queue list is empty
+ *
+ * @param priv
+ * Pointer to private structure.
+ *
+ * @return the number of object not released.
+ */
+int
+mlx5_priv_txq_verify(struct priv *priv)
+{
+ struct mlx5_txq_ctrl *txq;
+ int ret = 0;
+
+ LIST_FOREACH(txq, &priv->txqsctrl, next) {
+ DEBUG("%p: Tx Queue %p still referenced", (void *)priv,
+ (void *)txq);
+ ++ret;
+ }
+ return ret;
}
diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h
index a824787f..218ae831 100644
--- a/drivers/net/mlx5/mlx5_utils.h
+++ b/drivers/net/mlx5/mlx5_utils.h
@@ -128,11 +128,13 @@ pmd_drv_log_basename(const char *s)
#define DEBUG(...) PMD_DRV_LOG(DEBUG, __VA_ARGS__)
#define claim_zero(...) assert((__VA_ARGS__) == 0)
+#define claim_nonzero(...) assert((__VA_ARGS__) != 0)
#else /* NDEBUG */
#define DEBUG(...) (void)0
#define claim_zero(...) (__VA_ARGS__)
+#define claim_nonzero(...) (__VA_ARGS__)
#endif /* NDEBUG */
diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c
index 1b0fa40a..6fc315ef 100644
--- a/drivers/net/mlx5/mlx5_vlan.c
+++ b/drivers/net/mlx5/mlx5_vlan.c
@@ -36,22 +36,15 @@
#include <assert.h>
#include <stdint.h>
-/* DPDK headers don't like -pedantic. */
-#ifdef PEDANTIC
-#pragma GCC diagnostic ignored "-Wpedantic"
-#endif
#include <rte_ethdev.h>
#include <rte_common.h>
-#ifdef PEDANTIC
-#pragma GCC diagnostic error "-Wpedantic"
-#endif
#include "mlx5_utils.h"
#include "mlx5.h"
#include "mlx5_autoconf.h"
/**
- * Configure a VLAN filter.
+ * DPDK callback to configure a VLAN filter.
*
* @param dev
* Pointer to Ethernet device structure.
@@ -61,14 +54,16 @@
* Toggle filter.
*
* @return
- * 0 on success, errno value on failure.
+ * 0 on success, negative errno value on failure.
*/
-static int
-vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+int
+mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
{
struct priv *priv = dev->data->dev_private;
unsigned int i;
+ int ret = 0;
+ priv_lock(priv);
DEBUG("%p: %s VLAN filter ID %" PRIu16,
(void *)dev, (on ? "enable" : "disable"), vlan_id);
assert(priv->vlan_filter_n <= RTE_DIM(priv->vlan_filter));
@@ -76,13 +71,15 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
if (priv->vlan_filter[i] == vlan_id)
break;
/* Check if there's room for another VLAN filter. */
- if (i == RTE_DIM(priv->vlan_filter))
- return ENOMEM;
+ if (i == RTE_DIM(priv->vlan_filter)) {
+ ret = -ENOMEM;
+ goto out;
+ }
if (i < priv->vlan_filter_n) {
assert(priv->vlan_filter_n != 0);
/* Enabling an existing VLAN filter has no effect. */
if (on)
- return 0;
+ goto out;
/* Remove VLAN filter from list. */
--priv->vlan_filter_n;
memmove(&priv->vlan_filter[i],
@@ -94,41 +91,16 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
assert(i == priv->vlan_filter_n);
/* Disabling an unknown VLAN filter has no effect. */
if (!on)
- return 0;
+ goto out;
/* Add new VLAN filter. */
priv->vlan_filter[priv->vlan_filter_n] = vlan_id;
++priv->vlan_filter_n;
}
- /* Rehash flows in all hash RX queues. */
- priv_mac_addrs_disable(priv);
- priv_special_flow_disable_all(priv);
- return priv_rehash_flows(priv);
-}
-
-/**
- * DPDK callback to configure a VLAN filter.
- *
- * @param dev
- * Pointer to Ethernet device structure.
- * @param vlan_id
- * VLAN ID to filter.
- * @param on
- * Toggle filter.
- *
- * @return
- * 0 on success, negative errno value on failure.
- */
-int
-mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
-{
- struct priv *priv = dev->data->dev_private;
- int ret;
-
- priv_lock(priv);
- ret = vlan_filter_set(dev, vlan_id, on);
+ if (dev->data->dev_started)
+ priv_dev_traffic_restart(priv, dev);
+out:
priv_unlock(priv);
- assert(ret >= 0);
- return -ret;
+ return ret;
}
/**
@@ -144,22 +116,24 @@ mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
static void
priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on)
{
- struct rxq *rxq = (*priv->rxqs)[idx];
- struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
- struct ibv_exp_wq_attr mod;
+ struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx];
+ struct mlx5_rxq_ctrl *rxq_ctrl =
+ container_of(rxq, struct mlx5_rxq_ctrl, rxq);
+ struct ibv_wq_attr mod;
uint16_t vlan_offloads =
- (on ? IBV_EXP_RECEIVE_WQ_CVLAN_STRIP : 0) |
+ (on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) |
0;
int err;
DEBUG("set VLAN offloads 0x%x for port %d queue %d",
vlan_offloads, rxq->port_id, idx);
- mod = (struct ibv_exp_wq_attr){
- .attr_mask = IBV_EXP_WQ_ATTR_VLAN_OFFLOADS,
- .vlan_offloads = vlan_offloads,
+ mod = (struct ibv_wq_attr){
+ .attr_mask = IBV_WQ_ATTR_FLAGS,
+ .flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING,
+ .flags = vlan_offloads,
};
- err = ibv_exp_modify_wq(rxq_ctrl->wq, &mod);
+ err = ibv_modify_wq(rxq_ctrl->ibv->wq, &mod);
if (err) {
ERROR("%p: failed to modified stripping mode: %s",
(void *)priv, strerror(err));
@@ -210,7 +184,7 @@ mlx5_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on)
* @param mask
* VLAN offload bit mask.
*/
-void
+int
mlx5_vlan_offload_set(struct rte_eth_dev *dev, int mask)
{
struct priv *priv = dev->data->dev_private;
@@ -221,7 +195,7 @@ mlx5_vlan_offload_set(struct rte_eth_dev *dev, int mask)
if (!priv->hw_vlan_strip) {
ERROR("VLAN stripping is not supported");
- return;
+ return 0;
}
/* Run on every RX queue and set/reset VLAN stripping. */
@@ -230,4 +204,6 @@ mlx5_vlan_offload_set(struct rte_eth_dev *dev, int mask)
priv_vlan_strip_queue_set(priv, i, hw_vlan_strip);
priv_unlock(priv);
}
+
+ return 0;
}