diff options
author | Luca Boccassi <luca.boccassi@gmail.com> | 2018-02-19 11:16:57 +0000 |
---|---|---|
committer | Luca Boccassi <luca.boccassi@gmail.com> | 2018-02-19 11:17:28 +0000 |
commit | ca33590b6af032bff57d9cc70455660466a654b2 (patch) | |
tree | 0b68b090bd9b4a78a3614b62400b29279d76d553 /drivers/net/mlx5 | |
parent | 169a9de21e263aa6599cdc2d87a45ae158d9f509 (diff) |
New upstream version 18.02upstream/18.02
Change-Id: I89ed24cb2a49b78fe5be6970b99dd46c1499fcc3
Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'drivers/net/mlx5')
26 files changed, 2284 insertions, 1836 deletions
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index a3984eb9..3bc9736c 100644 --- a/drivers/net/mlx5/Makefile +++ b/drivers/net/mlx5/Makefile @@ -33,9 +33,15 @@ include $(RTE_SDK)/mk/rte.vars.mk # Library name. LIB = librte_pmd_mlx5.a +LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION) +LIB_GLUE_BASE = librte_pmd_mlx5_glue.so +LIB_GLUE_VERSION = 18.02.0 # Sources. SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c +ifneq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y) +SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_glue.c +endif SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxq.c SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_txq.c SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx.c @@ -54,6 +60,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c +ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y) +INSTALL-$(CONFIG_RTE_LIBRTE_MLX5_PMD)-lib += $(LIB_GLUE) +endif + # Basic CFLAGS. CFLAGS += -O3 CFLAGS += -std=c11 -Wall -Wextra @@ -64,7 +74,14 @@ CFLAGS += -D_DEFAULT_SOURCE CFLAGS += -D_XOPEN_SOURCE=600 CFLAGS += $(WERROR_FLAGS) CFLAGS += -Wno-strict-prototypes +ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y) +CFLAGS += -DMLX5_GLUE='"$(LIB_GLUE)"' +CFLAGS += -DMLX5_GLUE_VERSION='"$(LIB_GLUE_VERSION)"' +CFLAGS_mlx5_glue.o += -fPIC +LDLIBS += -ldl +else LDLIBS += -libverbs -lmlx5 +endif LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs LDLIBS += -lrte_bus_pci @@ -157,7 +174,24 @@ mlx5_autoconf.h: mlx5_autoconf.h.new $(SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD):.c=.o): mlx5_autoconf.h +# Generate dependency plug-in for rdma-core when the PMD must not be linked +# directly, so that applications do not inherit this dependency. + +ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y) + +$(LIB): $(LIB_GLUE) + +$(LIB_GLUE): mlx5_glue.o + $Q $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) \ + -Wl,-h,$(LIB_GLUE) \ + -s -shared -o $@ $< -libverbs -lmlx5 + +mlx5_glue.o: mlx5_autoconf.h + +endif + clean_mlx5: FORCE $Q rm -f -- mlx5_autoconf.h mlx5_autoconf.h.new + $Q rm -f -- mlx5_glue.o $(LIB_GLUE_BASE)* clean: clean_mlx5 diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 0548d17a..6c0985bd 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -1,44 +1,18 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ #include <stddef.h> #include <unistd.h> #include <string.h> #include <assert.h> +#include <dlfcn.h> #include <stdint.h> #include <stdlib.h> #include <errno.h> #include <net/if.h> +#include <sys/mman.h> /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ @@ -51,11 +25,13 @@ #endif #include <rte_malloc.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_ethdev_pci.h> #include <rte_pci.h> #include <rte_bus_pci.h> #include <rte_common.h> +#include <rte_config.h> +#include <rte_eal_memconfig.h> #include <rte_kvargs.h> #include "mlx5.h" @@ -63,6 +39,7 @@ #include "mlx5_rxtx.h" #include "mlx5_autoconf.h" #include "mlx5_defs.h" +#include "mlx5_glue.h" /* Device parameter to enable RX completion queue compression. */ #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en" @@ -85,18 +62,12 @@ /* Device parameter to limit the size of inlining packet. */ #define MLX5_TXQ_MAX_INLINE_LEN "txq_max_inline_len" -/* Device parameter to enable hardware TSO offload. */ -#define MLX5_TSO "tso" - /* Device parameter to enable hardware Tx vector. */ #define MLX5_TX_VEC_EN "tx_vec_en" /* Device parameter to enable hardware Rx vector. */ #define MLX5_RX_VEC_EN "rx_vec_en" -/* Default PMD specific parameter value. */ -#define MLX5_ARG_UNSET (-1) - #ifndef HAVE_IBV_MLX5_MOD_MPW #define MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED (1 << 2) #define MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW (1 << 3) @@ -106,17 +77,6 @@ #define MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP (1 << 4) #endif -struct mlx5_args { - int cqe_comp; - int txq_inline; - int txqs_inline; - int mps; - int mpw_hdr_dseg; - int inline_max_packet_sz; - int tso; - int tx_vec_en; - int rx_vec_en; -}; /** * Retrieve integer value from environment variable. * @@ -156,11 +116,20 @@ mlx5_alloc_verbs_buf(size_t size, void *data) struct priv *priv = data; void *ret; size_t alignment = sysconf(_SC_PAGESIZE); + unsigned int socket = SOCKET_ID_ANY; + + if (priv->verbs_alloc_ctx.type == MLX5_VERBS_ALLOC_TYPE_TX_QUEUE) { + const struct mlx5_txq_ctrl *ctrl = priv->verbs_alloc_ctx.obj; + + socket = ctrl->socket; + } else if (priv->verbs_alloc_ctx.type == + MLX5_VERBS_ALLOC_TYPE_RX_QUEUE) { + const struct mlx5_rxq_ctrl *ctrl = priv->verbs_alloc_ctx.obj; + socket = ctrl->socket; + } assert(data != NULL); - assert(!mlx5_is_secondary()); - ret = rte_malloc_socket(__func__, size, alignment, - priv->dev->device->numa_node); + ret = rte_malloc_socket(__func__, size, alignment, socket); DEBUG("Extern alloc size: %lu, align: %lu: %p", size, alignment, ret); return ret; } @@ -177,7 +146,6 @@ static void mlx5_free_verbs_buf(void *ptr, void *data __rte_unused) { assert(data != NULL); - assert(!mlx5_is_secondary()); DEBUG("Extern free request: %p", ptr); rte_free(ptr); } @@ -193,7 +161,7 @@ mlx5_free_verbs_buf(void *ptr, void *data __rte_unused) static void mlx5_dev_close(struct rte_eth_dev *dev) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; unsigned int i; int ret; @@ -225,15 +193,16 @@ mlx5_dev_close(struct rte_eth_dev *dev) } if (priv->pd != NULL) { assert(priv->ctx != NULL); - claim_zero(ibv_dealloc_pd(priv->pd)); - claim_zero(ibv_close_device(priv->ctx)); + claim_zero(mlx5_glue->dealloc_pd(priv->pd)); + claim_zero(mlx5_glue->close_device(priv->ctx)); } else assert(priv->ctx == NULL); if (priv->rss_conf.rss_key != NULL) rte_free(priv->rss_conf.rss_key); if (priv->reta_idx != NULL) rte_free(priv->reta_idx); - priv_socket_uninit(priv); + if (priv->primary_socket) + priv_socket_uninit(priv); ret = mlx5_priv_hrxq_ibv_verify(priv); if (ret) WARN("%p: some Hash Rx queue still remain", (void *)priv); @@ -303,6 +272,7 @@ const struct eth_dev_ops mlx5_dev_ops = { .tx_descriptor_status = mlx5_tx_descriptor_status, .rx_queue_intr_enable = mlx5_rx_intr_enable, .rx_queue_intr_disable = mlx5_rx_intr_disable, + .is_removed = mlx5_is_removed, }; static const struct eth_dev_ops mlx5_dev_sec_ops = { @@ -350,6 +320,7 @@ const struct eth_dev_ops mlx5_dev_ops_isolate = { .tx_descriptor_status = mlx5_tx_descriptor_status, .rx_queue_intr_enable = mlx5_rx_intr_enable, .rx_queue_intr_disable = mlx5_rx_intr_disable, + .is_removed = mlx5_is_removed, }; static struct { @@ -401,7 +372,7 @@ mlx5_dev_idx(struct rte_pci_addr *pci_addr) static int mlx5_args_check(const char *key, const char *val, void *opaque) { - struct mlx5_args *args = opaque; + struct mlx5_dev_config *config = opaque; unsigned long tmp; errno = 0; @@ -411,23 +382,21 @@ mlx5_args_check(const char *key, const char *val, void *opaque) return errno; } if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { - args->cqe_comp = !!tmp; + config->cqe_comp = !!tmp; } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) { - args->txq_inline = tmp; + config->txq_inline = tmp; } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { - args->txqs_inline = tmp; + config->txqs_inline = tmp; } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { - args->mps = !!tmp; + config->mps = !!tmp ? config->mps : 0; } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) { - args->mpw_hdr_dseg = !!tmp; + config->mpw_hdr_dseg = !!tmp; } else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) { - args->inline_max_packet_sz = tmp; - } else if (strcmp(MLX5_TSO, key) == 0) { - args->tso = !!tmp; + config->inline_max_packet_sz = tmp; } else if (strcmp(MLX5_TX_VEC_EN, key) == 0) { - args->tx_vec_en = !!tmp; + config->tx_vec_en = !!tmp; } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) { - args->rx_vec_en = !!tmp; + config->rx_vec_en = !!tmp; } else { WARN("%s: unknown parameter", key); return -EINVAL; @@ -438,8 +407,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque) /** * Parse device parameters. * - * @param priv - * Pointer to private structure. + * @param config + * Pointer to device configuration structure. * @param devargs * Device arguments structure. * @@ -447,7 +416,7 @@ mlx5_args_check(const char *key, const char *val, void *opaque) * 0 on success, errno value on failure. */ static int -mlx5_args(struct mlx5_args *args, struct rte_devargs *devargs) +mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) { const char **params = (const char *[]){ MLX5_RXQ_CQE_COMP_EN, @@ -456,7 +425,6 @@ mlx5_args(struct mlx5_args *args, struct rte_devargs *devargs) MLX5_TXQ_MPW_EN, MLX5_TXQ_MPW_HDR_DSEG_EN, MLX5_TXQ_MAX_INLINE_LEN, - MLX5_TSO, MLX5_TX_VEC_EN, MLX5_RX_VEC_EN, NULL, @@ -475,7 +443,7 @@ mlx5_args(struct mlx5_args *args, struct rte_devargs *devargs) for (i = 0; (params[i] != NULL); ++i) { if (rte_kvargs_count(kvlist, params[i])) { ret = rte_kvargs_process(kvlist, params[i], - mlx5_args_check, args); + mlx5_args_check, config); if (ret != 0) { rte_kvargs_free(kvlist); return ret; @@ -488,36 +456,104 @@ mlx5_args(struct mlx5_args *args, struct rte_devargs *devargs) static struct rte_pci_driver mlx5_driver; +/* + * Reserved UAR address space for TXQ UAR(hw doorbell) mapping, process + * local resource used by both primary and secondary to avoid duplicate + * reservation. + * The space has to be available on both primary and secondary process, + * TXQ UAR maps to this area using fixed mmap w/o double check. + */ +static void *uar_base; + /** - * Assign parameters from args into priv, only non default - * values are considered. + * Reserve UAR address space for primary process. * - * @param[out] priv + * @param[in] priv * Pointer to private structure. - * @param[in] args - * Pointer to args values. + * + * @return + * 0 on success, errno value on failure. */ -static void -mlx5_args_assign(struct priv *priv, struct mlx5_args *args) +static int +priv_uar_init_primary(struct priv *priv) { - if (args->cqe_comp != MLX5_ARG_UNSET) - priv->cqe_comp = args->cqe_comp; - if (args->txq_inline != MLX5_ARG_UNSET) - priv->txq_inline = args->txq_inline; - if (args->txqs_inline != MLX5_ARG_UNSET) - priv->txqs_inline = args->txqs_inline; - if (args->mps != MLX5_ARG_UNSET) - priv->mps = args->mps ? priv->mps : 0; - if (args->mpw_hdr_dseg != MLX5_ARG_UNSET) - priv->mpw_hdr_dseg = args->mpw_hdr_dseg; - if (args->inline_max_packet_sz != MLX5_ARG_UNSET) - priv->inline_max_packet_sz = args->inline_max_packet_sz; - if (args->tso != MLX5_ARG_UNSET) - priv->tso = args->tso; - if (args->tx_vec_en != MLX5_ARG_UNSET) - priv->tx_vec_en = args->tx_vec_en; - if (args->rx_vec_en != MLX5_ARG_UNSET) - priv->rx_vec_en = args->rx_vec_en; + void *addr = (void *)0; + int i; + const struct rte_mem_config *mcfg; + int ret; + + if (uar_base) { /* UAR address space mapped. */ + priv->uar_base = uar_base; + return 0; + } + /* find out lower bound of hugepage segments */ + mcfg = rte_eal_get_configuration()->mem_config; + for (i = 0; i < RTE_MAX_MEMSEG && mcfg->memseg[i].addr; i++) { + if (addr) + addr = RTE_MIN(addr, mcfg->memseg[i].addr); + else + addr = mcfg->memseg[i].addr; + } + /* keep distance to hugepages to minimize potential conflicts. */ + addr = RTE_PTR_SUB(addr, MLX5_UAR_OFFSET + MLX5_UAR_SIZE); + /* anonymous mmap, no real memory consumption. */ + addr = mmap(addr, MLX5_UAR_SIZE, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + ERROR("Failed to reserve UAR address space, please adjust " + "MLX5_UAR_SIZE or try --base-virtaddr"); + ret = ENOMEM; + return ret; + } + /* Accept either same addr or a new addr returned from mmap if target + * range occupied. + */ + INFO("Reserved UAR address space: %p", addr); + priv->uar_base = addr; /* for primary and secondary UAR re-mmap. */ + uar_base = addr; /* process local, don't reserve again. */ + return 0; +} + +/** + * Reserve UAR address space for secondary process, align with + * primary process. + * + * @param[in] priv + * Pointer to private structure. + * + * @return + * 0 on success, errno value on failure. + */ +static int +priv_uar_init_secondary(struct priv *priv) +{ + void *addr; + int ret; + + assert(priv->uar_base); + if (uar_base) { /* already reserved. */ + assert(uar_base == priv->uar_base); + return 0; + } + /* anonymous mmap, no real memory consumption. */ + addr = mmap(priv->uar_base, MLX5_UAR_SIZE, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + ERROR("UAR mmap failed: %p size: %llu", + priv->uar_base, MLX5_UAR_SIZE); + ret = ENXIO; + return ret; + } + if (priv->uar_base != addr) { + ERROR("UAR address %p size %llu occupied, please adjust " + "MLX5_UAR_OFFSET or try EAL parameter --base-virtaddr", + priv->uar_base, MLX5_UAR_SIZE); + ret = ENXIO; + return ret; + } + uar_base = addr; /* process local, don't reserve again */ + INFO("Reserved UAR address space: %p", addr); + return 0; } /** @@ -565,7 +601,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) /* Save PCI address. */ mlx5_dev[idx].pci_addr = pci_dev->addr; - list = ibv_get_device_list(&i); + list = mlx5_glue->get_device_list(&i); if (list == NULL) { assert(errno); if (errno == ENOSYS) @@ -615,12 +651,12 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) " (SR-IOV: %s)", list[i]->name, sriov ? "true" : "false"); - attr_ctx = ibv_open_device(list[i]); + attr_ctx = mlx5_glue->open_device(list[i]); err = errno; break; } if (attr_ctx == NULL) { - ibv_free_device_list(list); + mlx5_glue->free_device_list(list); switch (err) { case 0: ERROR("cannot access device, is mlx5_ib loaded?"); @@ -639,7 +675,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) * Multi-packet send is supported by ConnectX-4 Lx PF as well * as all ConnectX-5 devices. */ - mlx5dv_query_device(attr_ctx, &attrs_out); + mlx5_glue->dv_query_device(attr_ctx, &attrs_out); if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) { if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) { DEBUG("Enhanced MPW is supported"); @@ -657,11 +693,13 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) cqe_comp = 0; else cqe_comp = 1; - if (ibv_query_device_ex(attr_ctx, NULL, &device_attr)) + if (mlx5_glue->query_device_ex(attr_ctx, NULL, &device_attr)) goto error; INFO("%u port(s) detected", device_attr.orig_attr.phys_port_cnt); for (i = 0; i < device_attr.orig_attr.phys_port_cnt; i++) { + char name[RTE_ETH_NAME_MAX_LEN]; + int len; uint32_t port = i + 1; /* ports are indexed from one */ uint32_t test = (1 << i); struct ibv_context *ctx = NULL; @@ -673,26 +711,27 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) struct ether_addr mac; uint16_t num_vfs = 0; struct ibv_device_attr_ex device_attr; - struct mlx5_args args = { - .cqe_comp = MLX5_ARG_UNSET, + struct mlx5_dev_config config = { + .cqe_comp = cqe_comp, + .mps = mps, + .tunnel_en = tunnel_en, + .tx_vec_en = 1, + .rx_vec_en = 1, + .mpw_hdr_dseg = 0, .txq_inline = MLX5_ARG_UNSET, .txqs_inline = MLX5_ARG_UNSET, - .mps = MLX5_ARG_UNSET, - .mpw_hdr_dseg = MLX5_ARG_UNSET, .inline_max_packet_sz = MLX5_ARG_UNSET, - .tso = MLX5_ARG_UNSET, - .tx_vec_en = MLX5_ARG_UNSET, - .rx_vec_en = MLX5_ARG_UNSET, }; - mlx5_dev[idx].ports |= test; + len = snprintf(name, sizeof(name), PCI_PRI_FMT, + pci_dev->addr.domain, pci_dev->addr.bus, + pci_dev->addr.devid, pci_dev->addr.function); + if (device_attr.orig_attr.phys_port_cnt > 1) + snprintf(name + len, sizeof(name), " port %u", i); - if (mlx5_is_secondary()) { - /* from rte_ethdev.c */ - char name[RTE_ETH_NAME_MAX_LEN]; + mlx5_dev[idx].ports |= test; - snprintf(name, sizeof(name), "%s port %u", - ibv_get_device_name(ibv_dev), port); + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { eth_dev = rte_eth_dev_attach_secondary(name); if (eth_dev == NULL) { ERROR("can not attach rte ethdev"); @@ -702,6 +741,11 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) eth_dev->device = &pci_dev->device; eth_dev->dev_ops = &mlx5_dev_sec_ops; priv = eth_dev->data->dev_private; + err = priv_uar_init_secondary(priv); + if (err < 0) { + err = -err; + goto error; + } /* Receive command fd from primary process */ err = priv_socket_connect(priv); if (err < 0) { @@ -710,26 +754,31 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) } /* Remap UAR for Tx queues. */ err = priv_tx_uar_remap(priv, err); - if (err < 0) { - err = -err; + if (err) goto error; - } - priv_dev_select_rx_function(priv, eth_dev); - priv_dev_select_tx_function(priv, eth_dev); + /* + * Ethdev pointer is still required as input since + * the primary device is not accessible from the + * secondary process. + */ + eth_dev->rx_pkt_burst = + priv_select_rx_function(priv, eth_dev); + eth_dev->tx_pkt_burst = + priv_select_tx_function(priv, eth_dev); continue; } DEBUG("using port %u (%08" PRIx32 ")", port, test); - ctx = ibv_open_device(ibv_dev); + ctx = mlx5_glue->open_device(ibv_dev); if (ctx == NULL) { err = ENODEV; goto port_error; } - ibv_query_device_ex(ctx, NULL, &device_attr); + mlx5_glue->query_device_ex(ctx, NULL, &device_attr); /* Check port status. */ - err = ibv_query_port(ctx, port, &port_attr); + err = mlx5_glue->query_port(ctx, port, &port_attr); if (err) { ERROR("port query failed: %s", strerror(err)); goto port_error; @@ -744,11 +793,11 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) if (port_attr.state != IBV_PORT_ACTIVE) DEBUG("port %d is not active: \"%s\" (%d)", - port, ibv_port_state_str(port_attr.state), + port, mlx5_glue->port_state_str(port_attr.state), port_attr.state); /* Allocate protection domain. */ - pd = ibv_alloc_pd(ctx); + pd = mlx5_glue->alloc_pd(ctx); if (pd == NULL) { ERROR("PD allocation failure"); err = ENOMEM; @@ -774,107 +823,86 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) priv->port = port; priv->pd = pd; priv->mtu = ETHER_MTU; - priv->mps = mps; /* Enable MPW by default if supported. */ - priv->cqe_comp = cqe_comp; - priv->tunnel_en = tunnel_en; - /* Enable vector by default if supported. */ - priv->tx_vec_en = 1; - priv->rx_vec_en = 1; - err = mlx5_args(&args, pci_dev->device.devargs); + err = mlx5_args(&config, pci_dev->device.devargs); if (err) { ERROR("failed to process device arguments: %s", strerror(err)); goto port_error; } - mlx5_args_assign(priv, &args); - if (ibv_query_device_ex(ctx, NULL, &device_attr_ex)) { + if (mlx5_glue->query_device_ex(ctx, NULL, &device_attr_ex)) { ERROR("ibv_query_device_ex() failed"); goto port_error; } - priv->hw_csum = - !!(device_attr_ex.device_cap_flags_ex & - IBV_DEVICE_RAW_IP_CSUM); + config.hw_csum = !!(device_attr_ex.device_cap_flags_ex & + IBV_DEVICE_RAW_IP_CSUM); DEBUG("checksum offloading is %ssupported", - (priv->hw_csum ? "" : "not ")); + (config.hw_csum ? "" : "not ")); #ifdef HAVE_IBV_DEVICE_VXLAN_SUPPORT - priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags & - IBV_DEVICE_VXLAN_SUPPORT); + config.hw_csum_l2tun = + !!(exp_device_attr.exp_device_cap_flags & + IBV_DEVICE_VXLAN_SUPPORT); #endif - DEBUG("L2 tunnel checksum offloads are %ssupported", - (priv->hw_csum_l2tun ? "" : "not ")); + DEBUG("Rx L2 tunnel checksum offloads are %ssupported", + (config.hw_csum_l2tun ? "" : "not ")); #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT - priv->counter_set_supported = !!(device_attr.max_counter_sets); - ibv_describe_counter_set(ctx, 0, &cs_desc); + config.flow_counter_en = !!(device_attr.max_counter_sets); + mlx5_glue->describe_counter_set(ctx, 0, &cs_desc); DEBUG("counter type = %d, num of cs = %ld, attributes = %d", cs_desc.counter_type, cs_desc.num_of_cs, cs_desc.attributes); #endif - priv->ind_table_max_size = + config.ind_table_max_size = device_attr_ex.rss_caps.max_rwq_indirection_table_size; /* Remove this check once DPDK supports larger/variable * indirection tables. */ - if (priv->ind_table_max_size > + if (config.ind_table_max_size > (unsigned int)ETH_RSS_RETA_SIZE_512) - priv->ind_table_max_size = ETH_RSS_RETA_SIZE_512; + config.ind_table_max_size = ETH_RSS_RETA_SIZE_512; DEBUG("maximum RX indirection table size is %u", - priv->ind_table_max_size); - priv->hw_vlan_strip = !!(device_attr_ex.raw_packet_caps & + config.ind_table_max_size); + config.hw_vlan_strip = !!(device_attr_ex.raw_packet_caps & IBV_RAW_PACKET_CAP_CVLAN_STRIPPING); DEBUG("VLAN stripping is %ssupported", - (priv->hw_vlan_strip ? "" : "not ")); + (config.hw_vlan_strip ? "" : "not ")); - priv->hw_fcs_strip = - !!(device_attr_ex.orig_attr.device_cap_flags & - IBV_WQ_FLAGS_SCATTER_FCS); + config.hw_fcs_strip = !!(device_attr_ex.raw_packet_caps & + IBV_RAW_PACKET_CAP_SCATTER_FCS); DEBUG("FCS stripping configuration is %ssupported", - (priv->hw_fcs_strip ? "" : "not ")); + (config.hw_fcs_strip ? "" : "not ")); #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING - priv->hw_padding = !!device_attr_ex.rx_pad_end_addr_align; + config.hw_padding = !!device_attr_ex.rx_pad_end_addr_align; #endif DEBUG("hardware RX end alignment padding is %ssupported", - (priv->hw_padding ? "" : "not ")); + (config.hw_padding ? "" : "not ")); priv_get_num_vfs(priv, &num_vfs); - priv->sriov = (num_vfs || sriov); - priv->tso = ((priv->tso) && - (device_attr_ex.tso_caps.max_tso > 0) && - (device_attr_ex.tso_caps.supported_qpts & - (1 << IBV_QPT_RAW_PACKET))); - if (priv->tso) - priv->max_tso_payload_sz = - device_attr_ex.tso_caps.max_tso; - if (priv->mps && !mps) { + config.sriov = (num_vfs || sriov); + config.tso = ((device_attr_ex.tso_caps.max_tso > 0) && + (device_attr_ex.tso_caps.supported_qpts & + (1 << IBV_QPT_RAW_PACKET))); + if (config.tso) + config.tso_max_payload_sz = + device_attr_ex.tso_caps.max_tso; + if (config.mps && !mps) { ERROR("multi-packet send not supported on this device" " (" MLX5_TXQ_MPW_EN ")"); err = ENOTSUP; goto port_error; - } else if (priv->mps && priv->tso) { - WARN("multi-packet send not supported in conjunction " - "with TSO. MPS disabled"); - priv->mps = 0; } INFO("%sMPS is %s", - priv->mps == MLX5_MPW_ENHANCED ? "Enhanced " : "", - priv->mps != MLX5_MPW_DISABLED ? "enabled" : "disabled"); - /* Set default values for Enhanced MPW, a.k.a MPWv2. */ - if (priv->mps == MLX5_MPW_ENHANCED) { - if (args.txqs_inline == MLX5_ARG_UNSET) - priv->txqs_inline = MLX5_EMPW_MIN_TXQS; - if (args.inline_max_packet_sz == MLX5_ARG_UNSET) - priv->inline_max_packet_sz = - MLX5_EMPW_MAX_INLINE_LEN; - if (args.txq_inline == MLX5_ARG_UNSET) - priv->txq_inline = MLX5_WQE_SIZE_MAX - - MLX5_WQE_SIZE; - } - if (priv->cqe_comp && !cqe_comp) { + config.mps == MLX5_MPW_ENHANCED ? "Enhanced " : "", + config.mps != MLX5_MPW_DISABLED ? "enabled" : "disabled"); + if (config.cqe_comp && !cqe_comp) { WARN("Rx CQE compression isn't supported"); - priv->cqe_comp = 0; + config.cqe_comp = 0; } + err = priv_uar_init_primary(priv); + if (err) + goto port_error; /* Configure the first MAC address by default. */ if (priv_get_mac(priv, &mac.addr_bytes)) { ERROR("cannot get MAC address, is mlx5_en loaded?" @@ -902,14 +930,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) priv_get_mtu(priv, &priv->mtu); DEBUG("port %u MTU is %u", priv->port, priv->mtu); - /* from rte_ethdev.c */ - { - char name[RTE_ETH_NAME_MAX_LEN]; - - snprintf(name, sizeof(name), "%s port %u", - ibv_get_device_name(ibv_dev), port); - eth_dev = rte_eth_dev_allocate(name); - } + eth_dev = rte_eth_dev_allocate(name); if (eth_dev == NULL) { ERROR("can not allocate rte ethdev"); err = ENOMEM; @@ -920,6 +941,11 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) eth_dev->device = &pci_dev->device; rte_eth_copy_pci_info(eth_dev, pci_dev); eth_dev->device->driver = &mlx5_driver.driver; + /* + * Initialize burst functions to prevent crashes before link-up. + */ + eth_dev->rx_pkt_burst = removed_rx_burst; + eth_dev->tx_pkt_burst = removed_tx_burst; priv->dev = eth_dev; eth_dev->dev_ops = &mlx5_dev_ops; /* Register MAC address. */ @@ -933,22 +959,24 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) .free = &mlx5_free_verbs_buf, .data = priv, }; - mlx5dv_set_context_attr(ctx, MLX5DV_CTX_ATTR_BUF_ALLOCATORS, - (void *)((uintptr_t)&alctr)); + mlx5_glue->dv_set_context_attr(ctx, + MLX5DV_CTX_ATTR_BUF_ALLOCATORS, + (void *)((uintptr_t)&alctr)); /* Bring Ethernet device up. */ DEBUG("forcing Ethernet interface up"); priv_set_flags(priv, ~IFF_UP, IFF_UP); - mlx5_link_update(priv->dev, 1); + /* Store device configuration on private structure. */ + priv->config = config; continue; port_error: if (priv) rte_free(priv); if (pd) - claim_zero(ibv_dealloc_pd(pd)); + claim_zero(mlx5_glue->dealloc_pd(pd)); if (ctx) - claim_zero(ibv_close_device(ctx)); + claim_zero(mlx5_glue->close_device(ctx)); break; } @@ -967,9 +995,9 @@ port_error: error: if (attr_ctx) - claim_zero(ibv_close_device(attr_ctx)); + claim_zero(mlx5_glue->close_device(attr_ctx)); if (list) - ibv_free_device_list(list); + mlx5_glue->free_device_list(list); assert(err >= 0); return -err; } @@ -1021,6 +1049,88 @@ static struct rte_pci_driver mlx5_driver = { .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV, }; +#ifdef RTE_LIBRTE_MLX5_DLOPEN_DEPS + +/** + * Initialization routine for run-time dependency on rdma-core. + */ +static int +mlx5_glue_init(void) +{ + const char *path[] = { + /* + * A basic security check is necessary before trusting + * MLX5_GLUE_PATH, which may override RTE_EAL_PMD_PATH. + */ + (geteuid() == getuid() && getegid() == getgid() ? + getenv("MLX5_GLUE_PATH") : NULL), + RTE_EAL_PMD_PATH, + }; + unsigned int i = 0; + void *handle = NULL; + void **sym; + const char *dlmsg; + + while (!handle && i != RTE_DIM(path)) { + const char *end; + size_t len; + int ret; + + if (!path[i]) { + ++i; + continue; + } + end = strpbrk(path[i], ":;"); + if (!end) + end = path[i] + strlen(path[i]); + len = end - path[i]; + ret = 0; + do { + char name[ret + 1]; + + ret = snprintf(name, sizeof(name), "%.*s%s" MLX5_GLUE, + (int)len, path[i], + (!len || *(end - 1) == '/') ? "" : "/"); + if (ret == -1) + break; + if (sizeof(name) != (size_t)ret + 1) + continue; + DEBUG("looking for rdma-core glue as \"%s\"", name); + handle = dlopen(name, RTLD_LAZY); + break; + } while (1); + path[i] = end + 1; + if (!*end) + ++i; + } + if (!handle) { + rte_errno = EINVAL; + dlmsg = dlerror(); + if (dlmsg) + WARN("cannot load glue library: %s", dlmsg); + goto glue_error; + } + sym = dlsym(handle, "mlx5_glue"); + if (!sym || !*sym) { + rte_errno = EINVAL; + dlmsg = dlerror(); + if (dlmsg) + ERROR("cannot resolve glue symbol: %s", dlmsg); + goto glue_error; + } + mlx5_glue = *sym; + return 0; +glue_error: + if (handle) + dlclose(handle); + WARN("cannot initialize PMD due to missing run-time" + " dependency on rdma-core libraries (libibverbs," + " libmlx5)"); + return -rte_errno; +} + +#endif + /** * Driver initialization routine. */ @@ -1040,7 +1150,26 @@ rte_mlx5_pmd_init(void) /* Match the size of Rx completion entry to the size of a cacheline. */ if (RTE_CACHE_LINE_SIZE == 128) setenv("MLX5_CQE_SIZE", "128", 0); - ibv_fork_init(); +#ifdef RTE_LIBRTE_MLX5_DLOPEN_DEPS + if (mlx5_glue_init()) + return; + assert(mlx5_glue); +#endif +#ifndef NDEBUG + /* Glue structure must not contain any NULL pointers. */ + { + unsigned int i; + + for (i = 0; i != sizeof(*mlx5_glue) / sizeof(void *); ++i) + assert(((const void *const *)mlx5_glue)[i]); + } +#endif + if (strcmp(mlx5_glue->version, MLX5_GLUE_VERSION)) { + ERROR("rdma-core glue \"%s\" mismatch: \"%s\" is required", + mlx5_glue->version, MLX5_GLUE_VERSION); + return; + } + mlx5_glue->fork_init(); rte_pci_register(&mlx5_driver); } diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index e6a69b82..965c19f2 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ #ifndef RTE_PMD_MLX5_H_ @@ -53,7 +25,7 @@ #include <rte_pci.h> #include <rte_ether.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_spinlock.h> #include <rte_interrupts.h> #include <rte_errno.h> @@ -90,6 +62,57 @@ struct mlx5_xstats_ctrl { /* Flow list . */ TAILQ_HEAD(mlx5_flows, rte_flow); +/* Default PMD specific parameter value. */ +#define MLX5_ARG_UNSET (-1) + +/* + * Device configuration structure. + * + * Merged configuration from: + * + * - Device capabilities, + * - User device parameters disabled features. + */ +struct mlx5_dev_config { + unsigned int hw_csum:1; /* Checksum offload is supported. */ + unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */ + unsigned int hw_vlan_strip:1; /* VLAN stripping is supported. */ + unsigned int hw_fcs_strip:1; /* FCS stripping is supported. */ + unsigned int hw_padding:1; /* End alignment padding is supported. */ + unsigned int sriov:1; /* This is a VF or PF with VF devices. */ + unsigned int mps:2; /* Multi-packet send supported mode. */ + unsigned int tunnel_en:1; /* Whether tunnel is supported. */ + unsigned int flow_counter_en:1; /* Whether flow counter is supported. */ + unsigned int cqe_comp:1; /* CQE compression is enabled. */ + unsigned int tso:1; /* Whether TSO is supported. */ + unsigned int tx_vec_en:1; /* Tx vector is enabled. */ + unsigned int rx_vec_en:1; /* Rx vector is enabled. */ + unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */ + unsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */ + unsigned int ind_table_max_size; /* Maximum indirection table size. */ + int txq_inline; /* Maximum packet size for inlining. */ + int txqs_inline; /* Queue number threshold for inlining. */ + int inline_max_packet_sz; /* Max packet size for inlining. */ +}; + +/** + * Type of objet being allocated. + */ +enum mlx5_verbs_alloc_type { + MLX5_VERBS_ALLOC_TYPE_NONE, + MLX5_VERBS_ALLOC_TYPE_TX_QUEUE, + MLX5_VERBS_ALLOC_TYPE_RX_QUEUE, +}; + +/** + * Verbs allocator needs a context to know in the callback which kind of + * resources it is allocating. + */ +struct mlx5_verbs_alloc_ctx { + enum mlx5_verbs_alloc_type type; /* Kind of object being allocated. */ + const void *obj; /* Pointer to the DPDK object. */ +}; + struct priv { struct rte_eth_dev *dev; /* Ethernet device of master process. */ struct ibv_context *ctx; /* Verbs context. */ @@ -102,33 +125,13 @@ struct priv { /* Device properties. */ uint16_t mtu; /* Configured MTU. */ uint8_t port; /* Physical port number. */ - unsigned int hw_csum:1; /* Checksum offload is supported. */ - unsigned int hw_csum_l2tun:1; /* Same for L2 tunnels. */ - unsigned int hw_vlan_strip:1; /* VLAN stripping is supported. */ - unsigned int hw_fcs_strip:1; /* FCS stripping is supported. */ - unsigned int hw_padding:1; /* End alignment padding is supported. */ - unsigned int sriov:1; /* This is a VF or PF with VF devices. */ - unsigned int mps:2; /* Multi-packet send mode (0: disabled). */ - unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */ - unsigned int cqe_comp:1; /* Whether CQE compression is enabled. */ unsigned int pending_alarm:1; /* An alarm is pending. */ - unsigned int tso:1; /* Whether TSO is supported. */ - unsigned int tunnel_en:1; unsigned int isolated:1; /* Whether isolated mode is enabled. */ - unsigned int tx_vec_en:1; /* Whether Tx vector is enabled. */ - unsigned int rx_vec_en:1; /* Whether Rx vector is enabled. */ - unsigned int counter_set_supported:1; /* Counter set is supported. */ - /* Whether Tx offloads for tunneled packets are supported. */ - unsigned int max_tso_payload_sz; /* Maximum TCP payload for TSO. */ - unsigned int txq_inline; /* Maximum packet size for inlining. */ - unsigned int txqs_inline; /* Queue number threshold for inlining. */ - unsigned int inline_max_packet_sz; /* Max packet size for inlining. */ /* RX/TX queues. */ unsigned int rxqs_n; /* RX queues array size. */ unsigned int txqs_n; /* TX queues array size. */ struct mlx5_rxq_data *(*rxqs)[]; /* RX queues. */ struct mlx5_txq_data *(*txqs)[]; /* TX queues. */ - unsigned int ind_table_max_size; /* Maximum indirection table size. */ struct rte_eth_rss_conf rss_conf; /* RSS configuration. */ struct rte_intr_handle intr_handle; /* Interrupt handler. */ unsigned int (*reta_idx)[]; /* RETA index table. */ @@ -148,7 +151,11 @@ struct priv { struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */ rte_spinlock_t lock; /* Lock for control functions. */ int primary_socket; /* Unix socket for primary process. */ + void *uar_base; /* Reserved address space for UAR mapping */ struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */ + struct mlx5_dev_config config; /* Device configuration. */ + struct mlx5_verbs_alloc_ctx verbs_alloc_ctx; + /* Context for Verbs allocator. */ }; /** @@ -165,6 +172,22 @@ priv_lock(struct priv *priv) } /** + * Try to lock private structure to protect it from concurrent access in the + * control path. + * + * @param priv + * Pointer to private structure. + * + * @return + * 1 if the lock is successfully taken; 0 otherwise. + */ +static inline int +priv_trylock(struct priv *priv) +{ + return rte_spinlock_trylock(&priv->lock); +} + +/** * Unlock private structure. * * @param priv @@ -194,6 +217,8 @@ int priv_set_flags(struct priv *, unsigned int, unsigned int); int mlx5_dev_configure(struct rte_eth_dev *); void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *); const uint32_t *mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev); +int priv_link_update(struct priv *, int); +int priv_force_link_status_change(struct priv *, int); int mlx5_link_update(struct rte_eth_dev *, int); int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t); int mlx5_dev_get_flow_ctrl(struct rte_eth_dev *, struct rte_eth_fc_conf *); @@ -206,8 +231,9 @@ void priv_dev_interrupt_handler_uninstall(struct priv *, struct rte_eth_dev *); void priv_dev_interrupt_handler_install(struct priv *, struct rte_eth_dev *); int mlx5_set_link_down(struct rte_eth_dev *dev); int mlx5_set_link_up(struct rte_eth_dev *dev); -void priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev); -void priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev); +int mlx5_is_removed(struct rte_eth_dev *dev); +eth_tx_burst_t priv_select_tx_function(struct priv *, struct rte_eth_dev *); +eth_rx_burst_t priv_select_rx_function(struct priv *, struct rte_eth_dev *); /* mlx5_mac.c */ diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h index 3a7706cf..c3334ca3 100644 --- a/drivers/net/mlx5/mlx5_defs.h +++ b/drivers/net/mlx5/mlx5_defs.h @@ -1,39 +1,13 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ #ifndef RTE_PMD_MLX5_DEFS_H_ #define RTE_PMD_MLX5_DEFS_H_ +#include <rte_ethdev_driver.h> + #include "mlx5_autoconf.h" /* Reported driver name. */ @@ -105,4 +79,20 @@ /* Number of packets vectorized Rx can simultaneously process in a loop. */ #define MLX5_VPMD_DESCS_PER_LOOP 4 +/* Supported RSS */ +#define MLX5_RSS_HF_MASK (~(ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP)) + +/* Maximum number of attempts to query link status before giving up. */ +#define MLX5_MAX_LINK_QUERY_ATTEMPTS 5 + +/* Reserved address space for UAR mapping. */ +#define MLX5_UAR_SIZE (1ULL << 32) + +/* Offset of reserved UAR address space to hugepage memory. Offset is used here + * to minimize possibility of address next to hugepage being used by other code + * in either primary or secondary process, failing to map TX UAR would make TX + * packets invisible to HW. + */ +#define MLX5_UAR_OFFSET (1ULL << 32) + #endif /* RTE_PMD_MLX5_DEFS_H_ */ diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index a3cef689..66650769 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ #define _GNU_SOURCE @@ -55,7 +27,7 @@ #include <sys/un.h> #include <rte_atomic.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_bus_pci.h> #include <rte_mbuf.h> #include <rte_common.h> @@ -64,6 +36,7 @@ #include <rte_malloc.h> #include "mlx5.h" +#include "mlx5_glue.h" #include "mlx5_rxtx.h" #include "mlx5_utils.h" @@ -119,33 +92,6 @@ struct ethtool_link_settings { #endif /** - * Return private structure associated with an Ethernet device. - * - * @param dev - * Pointer to Ethernet device structure. - * - * @return - * Pointer to private structure. - */ -struct priv * -mlx5_get_priv(struct rte_eth_dev *dev) -{ - return dev->data->dev_private; -} - -/** - * Check if running as a secondary process. - * - * @return - * Nonzero if running as a secondary process. - */ -inline int -mlx5_is_secondary(void) -{ - return rte_eal_process_type() == RTE_PROC_SECONDARY; -} - -/** * Get interface name from private structure. * * @param[in] priv @@ -577,8 +523,26 @@ dev_configure(struct rte_eth_dev *dev) unsigned int j; unsigned int reta_idx_n; const uint8_t use_app_rss_key = - !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len; - + !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; + uint64_t supp_tx_offloads = mlx5_priv_get_tx_port_offloads(priv); + uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; + uint64_t supp_rx_offloads = + (mlx5_priv_get_rx_port_offloads(priv) | + mlx5_priv_get_rx_queue_offloads(priv)); + uint64_t rx_offloads = dev->data->dev_conf.rxmode.offloads; + + if ((tx_offloads & supp_tx_offloads) != tx_offloads) { + ERROR("Some Tx offloads are not supported " + "requested 0x%" PRIx64 " supported 0x%" PRIx64, + tx_offloads, supp_tx_offloads); + return ENOTSUP; + } + if ((rx_offloads & supp_rx_offloads) != rx_offloads) { + ERROR("Some Rx offloads are not supported " + "requested 0x%" PRIx64 " supported 0x%" PRIx64, + rx_offloads, supp_rx_offloads); + return ENOTSUP; + } if (use_app_rss_key && (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != rss_hash_default_key_len)) { @@ -606,7 +570,7 @@ dev_configure(struct rte_eth_dev *dev) (void *)dev, priv->txqs_n, txqs_n); priv->txqs_n = txqs_n; } - if (rxqs_n > priv->ind_table_max_size) { + if (rxqs_n > priv->config.ind_table_max_size) { ERROR("cannot handle this many RX queues (%u)", rxqs_n); return EINVAL; } @@ -619,7 +583,7 @@ dev_configure(struct rte_eth_dev *dev) * maximum indirection table size for better balancing. * The result is always rounded to the next power of two. */ reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? - priv->ind_table_max_size : + priv->config.ind_table_max_size : rxqs_n)); if (priv_rss_reta_index_resize(priv, reta_idx_n)) return ENOMEM; @@ -649,9 +613,6 @@ mlx5_dev_configure(struct rte_eth_dev *dev) struct priv *priv = dev->data->dev_private; int ret; - if (mlx5_is_secondary()) - return -E_RTE_SECONDARY; - priv_lock(priv); ret = dev_configure(dev); assert(ret >= 0); @@ -670,7 +631,8 @@ mlx5_dev_configure(struct rte_eth_dev *dev) void mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; + struct mlx5_dev_config *config = &priv->config; unsigned int max; char ifname[IF_NAMESIZE]; @@ -692,34 +654,18 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) info->max_rx_queues = max; info->max_tx_queues = max; info->max_mac_addrs = RTE_DIM(priv->mac); - info->rx_offload_capa = - (priv->hw_csum ? - (DEV_RX_OFFLOAD_IPV4_CKSUM | - DEV_RX_OFFLOAD_UDP_CKSUM | - DEV_RX_OFFLOAD_TCP_CKSUM) : - 0) | - (priv->hw_vlan_strip ? DEV_RX_OFFLOAD_VLAN_STRIP : 0) | - DEV_RX_OFFLOAD_TIMESTAMP; - - if (!priv->mps) - info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; - if (priv->hw_csum) - info->tx_offload_capa |= - (DEV_TX_OFFLOAD_IPV4_CKSUM | - DEV_TX_OFFLOAD_UDP_CKSUM | - DEV_TX_OFFLOAD_TCP_CKSUM); - if (priv->tso) - info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO; - if (priv->tunnel_en) - info->tx_offload_capa |= (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | - DEV_TX_OFFLOAD_VXLAN_TNL_TSO | - DEV_TX_OFFLOAD_GRE_TNL_TSO); + info->rx_queue_offload_capa = + mlx5_priv_get_rx_queue_offloads(priv); + info->rx_offload_capa = (mlx5_priv_get_rx_port_offloads(priv) | + info->rx_queue_offload_capa); + info->tx_offload_capa = mlx5_priv_get_tx_port_offloads(priv); if (priv_get_ifname(priv, &ifname) == 0) info->if_index = if_nametoindex(ifname); info->reta_size = priv->reta_idx_n ? - priv->reta_idx_n : priv->ind_table_max_size; + priv->reta_idx_n : config->ind_table_max_size; info->hash_key_size = priv->rss_conf.rss_key_len; info->speed_capa = priv->link_speed_capa; + info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; priv_unlock(priv); } @@ -761,7 +707,7 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) static int mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; struct ethtool_cmd edata = { .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ }; @@ -827,7 +773,7 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) static int mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; struct ifreq ifr; struct rte_eth_link dev_link; @@ -913,25 +859,131 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) } /** - * DPDK callback to retrieve physical link information. + * Enable receiving and transmitting traffic. * - * @param dev - * Pointer to Ethernet device structure. + * @param priv + * Pointer to private structure. + */ +static void +priv_link_start(struct priv *priv) +{ + struct rte_eth_dev *dev = priv->dev; + int err; + + dev->tx_pkt_burst = priv_select_tx_function(priv, dev); + dev->rx_pkt_burst = priv_select_rx_function(priv, dev); + err = priv_dev_traffic_enable(priv, dev); + if (err) + ERROR("%p: error occurred while configuring control flows: %s", + (void *)priv, strerror(err)); + err = priv_flow_start(priv, &priv->flows); + if (err) + ERROR("%p: error occurred while configuring flows: %s", + (void *)priv, strerror(err)); +} + +/** + * Disable receiving and transmitting traffic. + * + * @param priv + * Pointer to private structure. + */ +static void +priv_link_stop(struct priv *priv) +{ + struct rte_eth_dev *dev = priv->dev; + + priv_flow_stop(priv, &priv->flows); + priv_dev_traffic_disable(priv, dev); + dev->rx_pkt_burst = removed_rx_burst; + dev->tx_pkt_burst = removed_tx_burst; +} + +/** + * Retrieve physical link information and update rx/tx_pkt_burst callbacks + * accordingly. + * + * @param priv + * Pointer to private structure. * @param wait_to_complete * Wait for request completion (ignored). */ int -mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) +priv_link_update(struct priv *priv, int wait_to_complete) { + struct rte_eth_dev *dev = priv->dev; struct utsname utsname; int ver[3]; + int ret; + struct rte_eth_link dev_link = dev->data->dev_link; if (uname(&utsname) == -1 || sscanf(utsname.release, "%d.%d.%d", &ver[0], &ver[1], &ver[2]) != 3 || KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0)) - return mlx5_link_update_unlocked_gset(dev, wait_to_complete); - return mlx5_link_update_unlocked_gs(dev, wait_to_complete); + ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete); + else + ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete); + /* If lsc interrupt is disabled, should always be ready for traffic. */ + if (!dev->data->dev_conf.intr_conf.lsc) { + priv_link_start(priv); + return ret; + } + /* Re-select burst callbacks only if link status has been changed. */ + if (!ret && dev_link.link_status != dev->data->dev_link.link_status) { + if (dev->data->dev_link.link_status == ETH_LINK_UP) + priv_link_start(priv); + else + priv_link_stop(priv); + } + return ret; +} + +/** + * Querying the link status till it changes to the desired state. + * Number of query attempts is bounded by MLX5_MAX_LINK_QUERY_ATTEMPTS. + * + * @param priv + * Pointer to private structure. + * @param status + * Link desired status. + * + * @return + * 0 on success, negative errno value on failure. + */ +int +priv_force_link_status_change(struct priv *priv, int status) +{ + int try = 0; + + while (try < MLX5_MAX_LINK_QUERY_ATTEMPTS) { + priv_link_update(priv, 0); + if (priv->dev->data->dev_link.link_status == status) + return 0; + try++; + sleep(1); + } + return -EAGAIN; +} + +/** + * DPDK callback to retrieve physical link information. + * + * @param dev + * Pointer to Ethernet device structure. + * @param wait_to_complete + * Wait for request completion (ignored). + */ +int +mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) +{ + struct priv *priv = dev->data->dev_private; + int ret; + + priv_lock(priv); + ret = priv_link_update(priv, wait_to_complete); + priv_unlock(priv); + return ret; } /** @@ -952,9 +1004,6 @@ mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) uint16_t kern_mtu; int ret = 0; - if (mlx5_is_secondary()) - return -E_RTE_SECONDARY; - priv_lock(priv); ret = priv_get_mtu(priv, &kern_mtu); if (ret) @@ -1002,9 +1051,6 @@ mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) }; int ret; - if (mlx5_is_secondary()) - return -E_RTE_SECONDARY; - ifr.ifr_data = (void *)ðpause; priv_lock(priv); if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { @@ -1053,9 +1099,6 @@ mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) }; int ret; - if (mlx5_is_secondary()) - return -E_RTE_SECONDARY; - ifr.ifr_data = (void *)ðpause; ethpause.autoneg = fc_conf->autoneg; if (((fc_conf->mode & RTE_FC_FULL) == RTE_FC_FULL) || @@ -1150,7 +1193,7 @@ priv_link_status_update(struct priv *priv) { struct rte_eth_link *link = &priv->dev->data->dev_link; - mlx5_link_update(priv->dev, 0); + priv_link_update(priv, 0); if (((link->link_speed == 0) && link->link_status) || ((link->link_speed != 0) && !link->link_status)) { /* @@ -1191,7 +1234,7 @@ priv_dev_status_handler(struct priv *priv) /* Read all message and acknowledge them. */ for (;;) { - if (ibv_get_async_event(priv->ctx, &event)) + if (mlx5_glue->get_async_event(priv->ctx, &event)) break; if ((event.event_type == IBV_EVENT_PORT_ACTIVE || event.event_type == IBV_EVENT_PORT_ERR) && @@ -1203,7 +1246,7 @@ priv_dev_status_handler(struct priv *priv) else DEBUG("event type %d on port %d not handled", event.event_type, event.element.port_num); - ibv_ack_async_event(&event); + mlx5_glue->ack_async_event(&event); } if (ret & (1 << RTE_ETH_EVENT_INTR_LSC)) if (priv_link_status_update(priv)) @@ -1224,14 +1267,17 @@ mlx5_dev_link_status_handler(void *arg) struct priv *priv = dev->data->dev_private; int ret; - priv_lock(priv); - assert(priv->pending_alarm == 1); + while (!priv_trylock(priv)) { + /* Alarm is being canceled. */ + if (priv->pending_alarm == 0) + return; + rte_pause(); + } priv->pending_alarm = 0; ret = priv_link_status_update(priv); priv_unlock(priv); if (!ret) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL, - NULL); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } /** @@ -1253,11 +1299,9 @@ mlx5_dev_interrupt_handler(void *cb_arg) events = priv_dev_status_handler(priv); priv_unlock(priv); if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL, - NULL); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); if (events & (1 << RTE_ETH_EVENT_INTR_RMV)) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL, - NULL); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RMV, NULL); } /** @@ -1295,9 +1339,10 @@ priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev) if (priv->primary_socket) rte_intr_callback_unregister(&priv->intr_handle_socket, mlx5_dev_handler_socket, dev); - if (priv->pending_alarm) + if (priv->pending_alarm) { + priv->pending_alarm = 0; rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev); - priv->pending_alarm = 0; + } priv->intr_handle.fd = 0; priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; priv->intr_handle_socket.fd = 0; @@ -1317,7 +1362,6 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) { int rc, flags; - assert(!mlx5_is_secondary()); assert(priv->ctx->async_fd > 0); flags = fcntl(priv->ctx->async_fd, F_GETFL); rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); @@ -1348,8 +1392,6 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) * * @param priv * Pointer to private data structure. - * @param dev - * Pointer to rte_eth_dev structure. * @param up * Nonzero for link up, otherwise link down. * @@ -1357,24 +1399,9 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) * 0 on success, errno value on failure. */ static int -priv_dev_set_link(struct priv *priv, struct rte_eth_dev *dev, int up) +priv_dev_set_link(struct priv *priv, int up) { - int err; - - if (up) { - err = priv_set_flags(priv, ~IFF_UP, IFF_UP); - if (err) - return err; - priv_dev_select_tx_function(priv, dev); - priv_dev_select_rx_function(priv, dev); - } else { - err = priv_set_flags(priv, ~IFF_UP, ~IFF_UP); - if (err) - return err; - dev->rx_pkt_burst = removed_rx_burst; - dev->tx_pkt_burst = removed_tx_burst; - } - return 0; + return priv_set_flags(priv, ~IFF_UP, up ? IFF_UP : ~IFF_UP); } /** @@ -1393,7 +1420,7 @@ mlx5_set_link_down(struct rte_eth_dev *dev) int err; priv_lock(priv); - err = priv_dev_set_link(priv, dev, 0); + err = priv_dev_set_link(priv, 0); priv_unlock(priv); return err; } @@ -1414,7 +1441,7 @@ mlx5_set_link_up(struct rte_eth_dev *dev) int err; priv_lock(priv); - err = priv_dev_set_link(priv, dev, 1); + err = priv_dev_set_link(priv, 1); priv_unlock(priv); return err; } @@ -1426,32 +1453,44 @@ mlx5_set_link_up(struct rte_eth_dev *dev) * Pointer to private data structure. * @param dev * Pointer to rte_eth_dev structure. + * + * @return + * Pointer to selected Tx burst function. */ -void -priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev) +eth_tx_burst_t +priv_select_tx_function(struct priv *priv, struct rte_eth_dev *dev) { + eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst; + struct mlx5_dev_config *config = &priv->config; + uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; + int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | + DEV_TX_OFFLOAD_VXLAN_TNL_TSO | + DEV_TX_OFFLOAD_GRE_TNL_TSO)); + int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT); + assert(priv != NULL); - assert(dev != NULL); - dev->tx_pkt_burst = mlx5_tx_burst; /* Select appropriate TX function. */ - if (priv->mps == MLX5_MPW_ENHANCED) { - if (priv_check_vec_tx_support(priv) > 0) { - if (priv_check_raw_vec_tx_support(priv) > 0) - dev->tx_pkt_burst = mlx5_tx_burst_raw_vec; + if (vlan_insert || tso) + return tx_pkt_burst; + if (config->mps == MLX5_MPW_ENHANCED) { + if (priv_check_vec_tx_support(priv, dev) > 0) { + if (priv_check_raw_vec_tx_support(priv, dev) > 0) + tx_pkt_burst = mlx5_tx_burst_raw_vec; else - dev->tx_pkt_burst = mlx5_tx_burst_vec; + tx_pkt_burst = mlx5_tx_burst_vec; DEBUG("selected Enhanced MPW TX vectorized function"); } else { - dev->tx_pkt_burst = mlx5_tx_burst_empw; + tx_pkt_burst = mlx5_tx_burst_empw; DEBUG("selected Enhanced MPW TX function"); } - } else if (priv->mps && priv->txq_inline) { - dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline; + } else if (config->mps && (config->txq_inline > 0)) { + tx_pkt_burst = mlx5_tx_burst_mpw_inline; DEBUG("selected MPW inline TX function"); - } else if (priv->mps) { - dev->tx_pkt_burst = mlx5_tx_burst_mpw; + } else if (config->mps) { + tx_pkt_burst = mlx5_tx_burst_mpw; DEBUG("selected MPW TX function"); } + return tx_pkt_burst; } /** @@ -1461,16 +1500,39 @@ priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev) * Pointer to private data structure. * @param dev * Pointer to rte_eth_dev structure. + * + * @return + * Pointer to selected Rx burst function. */ -void -priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev) +eth_rx_burst_t +priv_select_rx_function(struct priv *priv, __rte_unused struct rte_eth_dev *dev) { + eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; + assert(priv != NULL); - assert(dev != NULL); if (priv_check_vec_rx_support(priv) > 0) { - dev->rx_pkt_burst = mlx5_rx_burst_vec; + rx_pkt_burst = mlx5_rx_burst_vec; DEBUG("selected RX vectorized function"); - } else { - dev->rx_pkt_burst = mlx5_rx_burst; } + return rx_pkt_burst; +} + +/** + * Check if mlx5 device was removed. + * + * @param dev + * Pointer to Ethernet device structure. + * + * @return + * 1 when device is removed, otherwise 0. + */ +int +mlx5_is_removed(struct rte_eth_dev *dev) +{ + struct ibv_device_attr device_attr; + struct priv *priv = dev->data->dev_private; + + if (mlx5_glue->query_device(priv->ctx, &device_attr) == EIO) + return 1; + return 0; } diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index f32dfdd3..26002c4b 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2016 6WIND S.A. - * Copyright 2016 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox. */ #include <sys/queue.h> @@ -44,13 +16,16 @@ #pragma GCC diagnostic error "-Wpedantic" #endif -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_flow.h> #include <rte_flow_driver.h> #include <rte_malloc.h> +#include <rte_ip.h> #include "mlx5.h" +#include "mlx5_defs.h" #include "mlx5_prm.h" +#include "mlx5_glue.h" /* Define minimal priority for control plane flows. */ #define MLX5_CTRL_FLOW_PRIORITY 4 @@ -60,22 +35,9 @@ #define MLX5_IPV6 6 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT -struct ibv_counter_set_init_attr { - int dummy; -}; struct ibv_flow_spec_counter_action { int dummy; }; -struct ibv_counter_set { - int dummy; -}; - -static inline int -ibv_destroy_counter_set(struct ibv_counter_set *cs) -{ - (void)cs; - return -ENOTSUP; -} #endif /* Dev ops structure defined in mlx5.c */ @@ -250,11 +212,8 @@ struct rte_flow { uint8_t rss_key[40]; /**< copy of the RSS key. */ struct ibv_counter_set *cs; /**< Holds the counters for the rule. */ struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */ - union { - struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)]; - /**< Flow with Rx queue. */ - struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */ - }; + struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)]; + /**< Flow with Rx queue. */ }; /** Static initializer for items. */ @@ -444,20 +403,12 @@ struct mlx5_flow_parse { uint8_t rss_key[40]; /**< copy of the RSS key. */ enum hash_rxq_type layer; /**< Last pattern layer detected. */ struct ibv_counter_set *cs; /**< Holds the counter set for the rule */ - union { - struct { - struct ibv_flow_attr *ibv_attr; - /**< Pointer to Verbs attributes. */ - unsigned int offset; - /**< Current position or total size of the attribute. */ - } queue[RTE_DIM(hash_rxq_init)]; - struct { - struct ibv_flow_attr *ibv_attr; - /**< Pointer to Verbs attributes. */ - unsigned int offset; - /**< Current position or total size of the attribute. */ - } drop_q; - }; + struct { + struct ibv_flow_attr *ibv_attr; + /**< Pointer to Verbs attributes. */ + unsigned int offset; + /**< Current position or total size of the attribute. */ + } queue[RTE_DIM(hash_rxq_init)]; }; static const struct rte_flow_ops mlx5_flow_ops = { @@ -537,7 +488,7 @@ mlx5_flow_item_validate(const struct rte_flow_item *item, } if (item->mask) { unsigned int i; - const uint8_t *spec = item->mask; + const uint8_t *spec = item->spec; for (i = 0; i < size; ++i) if ((spec[i] | mask[i]) != mask[i]) @@ -561,7 +512,8 @@ mlx5_flow_item_validate(const struct rte_flow_item *item, } /** - * Copy the RSS configuration from the user ones. + * Copy the RSS configuration from the user ones, of the rss_conf is null, + * uses the driver one. * * @param priv * Pointer to private structure. @@ -578,15 +530,25 @@ priv_flow_convert_rss_conf(struct priv *priv, struct mlx5_flow_parse *parser, const struct rte_eth_rss_conf *rss_conf) { - const struct rte_eth_rss_conf *rss = - rss_conf ? rss_conf : &priv->rss_conf; - - if (rss->rss_key_len > 40) - return EINVAL; - parser->rss_conf.rss_key_len = rss->rss_key_len; - parser->rss_conf.rss_hf = rss->rss_hf; - memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len); - parser->rss_conf.rss_key = parser->rss_key; + /* + * This function is also called at the beginning of + * priv_flow_convert_actions() to initialize the parser with the + * device default RSS configuration. + */ + (void)priv; + if (rss_conf) { + if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) + return EINVAL; + if (rss_conf->rss_key_len != 40) + return EINVAL; + if (rss_conf->rss_key_len && rss_conf->rss_key) { + parser->rss_conf.rss_key_len = rss_conf->rss_key_len; + memcpy(parser->rss_key, rss_conf->rss_key, + rss_conf->rss_key_len); + parser->rss_conf.rss_key = parser->rss_key; + } + parser->rss_conf.rss_hf = rss_conf->rss_hf; + } return 0; } @@ -781,7 +743,7 @@ priv_flow_convert_actions(struct priv *priv, } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) { parser->mark = 1; } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT && - priv->counter_set_supported) { + priv->config.flow_counter_en) { parser->count = 1; } else { goto exit_action_not_supported; @@ -827,12 +789,8 @@ priv_flow_convert_items_validate(struct priv *priv, (void)priv; /* Initialise the offsets to start after verbs attribute. */ - if (parser->drop) { - parser->drop_q.offset = sizeof(struct ibv_flow_attr); - } else { - for (i = 0; i != hash_rxq_init_n; ++i) - parser->queue[i].offset = sizeof(struct ibv_flow_attr); - } + for (i = 0; i != hash_rxq_init_n; ++i) + parser->queue[i].offset = sizeof(struct ibv_flow_attr); for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) { const struct mlx5_flow_items *token = NULL; unsigned int n; @@ -869,14 +827,16 @@ priv_flow_convert_items_validate(struct priv *priv, parser->inner = IBV_FLOW_SPEC_INNER; } if (parser->drop) { - parser->drop_q.offset += cur_item->dst_sz; - } else if (parser->queues_n == 1) { parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz; } else { for (n = 0; n != hash_rxq_init_n; ++n) parser->queue[n].offset += cur_item->dst_sz; } } + if (parser->drop) { + parser->queue[HASH_RXQ_ETH].offset += + sizeof(struct ibv_flow_spec_action_drop); + } if (parser->mark) { for (i = 0; i != hash_rxq_init_n; ++i) parser->queue[i].offset += @@ -885,12 +845,8 @@ priv_flow_convert_items_validate(struct priv *priv, if (parser->count) { unsigned int size = sizeof(struct ibv_flow_spec_counter_action); - if (parser->drop) { - parser->drop_q.offset += size; - } else { - for (i = 0; i != hash_rxq_init_n; ++i) - parser->queue[i].offset += size; - } + for (i = 0; i != hash_rxq_init_n; ++i) + parser->queue[i].offset += size; } return 0; exit_item_not_supported: @@ -1103,14 +1059,6 @@ priv_flow_convert(struct priv *priv, * Allocate the memory space to store verbs specifications. */ if (parser->drop) { - parser->drop_q.ibv_attr = - priv_flow_convert_allocate(priv, attr->priority, - parser->drop_q.offset, - error); - if (!parser->drop_q.ibv_attr) - return ENOMEM; - parser->drop_q.offset = sizeof(struct ibv_flow_attr); - } else if (parser->queues_n == 1) { unsigned int priority = attr->priority + hash_rxq_init[HASH_RXQ_ETH].flow_priority; @@ -1172,22 +1120,9 @@ priv_flow_convert(struct priv *priv, * Last step. Complete missing specification to reach the RSS * configuration. */ - if (parser->drop) { - /* - * Drop queue priority needs to be adjusted to - * their most specific layer priority. - */ - parser->drop_q.ibv_attr->priority = - attr->priority + - hash_rxq_init[parser->layer].flow_priority; - } else if (parser->queues_n > 1) { + if (!parser->drop) { priv_flow_convert_finalise(priv, parser); } else { - /* - * Action queue have their priority overridden with - * Ethernet priority, this priority needs to be adjusted to - * their most specific layer priority. - */ parser->queue[HASH_RXQ_ETH].ibv_attr->priority = attr->priority + hash_rxq_init[parser->layer].flow_priority; @@ -1195,10 +1130,6 @@ priv_flow_convert(struct priv *priv, exit_free: /* Only verification is expected, all resources should be released. */ if (!parser->create) { - if (parser->drop) { - rte_free(parser->drop_q.ibv_attr); - parser->drop_q.ibv_attr = NULL; - } for (i = 0; i != hash_rxq_init_n; ++i) { if (parser->queue[i].ibv_attr) { rte_free(parser->queue[i].ibv_attr); @@ -1240,14 +1171,6 @@ mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src, unsigned int i; void *dst; - if (parser->drop) { - dst = (void *)((uintptr_t)parser->drop_q.ibv_attr + - parser->drop_q.offset); - memcpy(dst, src, size); - ++parser->drop_q.ibv_attr->num_of_specs; - parser->drop_q.offset += size; - return; - } for (i = 0; i != hash_rxq_init_n; ++i) { if (!parser->queue[i].ibv_attr) continue; @@ -1340,14 +1263,6 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item, if (!mask) mask = default_mask; - if (parser->drop) { - eth = (void *)((uintptr_t)parser->drop_q.ibv_attr + - parser->drop_q.offset - eth_size); - eth->val.vlan_tag = spec->tci; - eth->mask.vlan_tag = mask->tci; - eth->val.vlan_tag &= eth->mask.vlan_tag; - return 0; - } for (i = 0; i != hash_rxq_init_n; ++i) { if (!parser->queue[i].ibv_attr) continue; @@ -1443,6 +1358,8 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item, parser->layer = HASH_RXQ_IPV6; if (spec) { unsigned int i; + uint32_t vtc_flow_val; + uint32_t vtc_flow_mask; if (!mask) mask = default_mask; @@ -1454,7 +1371,20 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item, RTE_DIM(ipv6.mask.src_ip)); memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr, RTE_DIM(ipv6.mask.dst_ip)); - ipv6.mask.flow_label = mask->hdr.vtc_flow; + vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow); + vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow); + ipv6.val.flow_label = + rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >> + IPV6_HDR_FL_SHIFT); + ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >> + IPV6_HDR_TC_SHIFT; + ipv6.val.next_hdr = spec->hdr.proto; + ipv6.val.hop_limit = spec->hdr.hop_limits; + ipv6.mask.flow_label = + rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >> + IPV6_HDR_FL_SHIFT); + ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >> + IPV6_HDR_TC_SHIFT; ipv6.mask.next_hdr = mask->hdr.proto; ipv6.mask.hop_limit = mask->hdr.hop_limits; /* Remove unwanted bits from values. */ @@ -1463,6 +1393,7 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item, ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i]; } ipv6.val.flow_label &= ipv6.mask.flow_label; + ipv6.val.traffic_class &= ipv6.mask.traffic_class; ipv6.val.next_hdr &= ipv6.mask.next_hdr; ipv6.val.hop_limit &= ipv6.mask.hop_limit; } @@ -1664,7 +1595,7 @@ mlx5_flow_create_count(struct priv *priv __rte_unused, }; init_attr.counter_set_id = 0; - parser->cs = ibv_create_counter_set(priv->ctx, &init_attr); + parser->cs = mlx5_glue->create_counter_set(priv->ctx, &init_attr); if (!parser->cs) return EINVAL; counter.counter_set_handle = parser->cs->handle; @@ -1701,23 +1632,25 @@ priv_flow_create_action_queue_drop(struct priv *priv, assert(priv->pd); assert(priv->ctx); flow->drop = 1; - drop = (void *)((uintptr_t)parser->drop_q.ibv_attr + - parser->drop_q.offset); + drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr + + parser->queue[HASH_RXQ_ETH].offset); *drop = (struct ibv_flow_spec_action_drop){ .type = IBV_FLOW_SPEC_ACTION_DROP, .size = size, }; - ++parser->drop_q.ibv_attr->num_of_specs; - parser->drop_q.offset += size; - flow->drxq.ibv_attr = parser->drop_q.ibv_attr; + ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs; + parser->queue[HASH_RXQ_ETH].offset += size; + flow->frxq[HASH_RXQ_ETH].ibv_attr = + parser->queue[HASH_RXQ_ETH].ibv_attr; if (parser->count) flow->cs = parser->cs; if (!priv->dev->data->dev_started) return 0; - parser->drop_q.ibv_attr = NULL; - flow->drxq.ibv_flow = ibv_create_flow(priv->flow_drop_queue->qp, - flow->drxq.ibv_attr); - if (!flow->drxq.ibv_flow) { + parser->queue[HASH_RXQ_ETH].ibv_attr = NULL; + flow->frxq[HASH_RXQ_ETH].ibv_flow = + mlx5_glue->create_flow(priv->flow_drop_queue->qp, + flow->frxq[HASH_RXQ_ETH].ibv_attr); + if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) { rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, "flow rule creation failure"); err = ENOMEM; @@ -1726,16 +1659,17 @@ priv_flow_create_action_queue_drop(struct priv *priv, return 0; error: assert(flow); - if (flow->drxq.ibv_flow) { - claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow)); - flow->drxq.ibv_flow = NULL; + if (flow->frxq[HASH_RXQ_ETH].ibv_flow) { + claim_zero(mlx5_glue->destroy_flow + (flow->frxq[HASH_RXQ_ETH].ibv_flow)); + flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL; } - if (flow->drxq.ibv_attr) { - rte_free(flow->drxq.ibv_attr); - flow->drxq.ibv_attr = NULL; + if (flow->frxq[HASH_RXQ_ETH].ibv_attr) { + rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr); + flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL; } if (flow->cs) { - claim_zero(ibv_destroy_counter_set(flow->cs)); + claim_zero(mlx5_glue->destroy_counter_set(flow->cs)); flow->cs = NULL; parser->cs = NULL; } @@ -1839,8 +1773,8 @@ priv_flow_create_action_queue(struct priv *priv, if (!flow->frxq[i].hrxq) continue; flow->frxq[i].ibv_flow = - ibv_create_flow(flow->frxq[i].hrxq->qp, - flow->frxq[i].ibv_attr); + mlx5_glue->create_flow(flow->frxq[i].hrxq->qp, + flow->frxq[i].ibv_attr); if (!flow->frxq[i].ibv_flow) { rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, @@ -1866,7 +1800,7 @@ error: if (flow->frxq[i].ibv_flow) { struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow; - claim_zero(ibv_destroy_flow(ibv_flow)); + claim_zero(mlx5_glue->destroy_flow(ibv_flow)); } if (flow->frxq[i].hrxq) mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq); @@ -1874,7 +1808,7 @@ error: rte_free(flow->frxq[i].ibv_attr); } if (flow->cs) { - claim_zero(ibv_destroy_counter_set(flow->cs)); + claim_zero(mlx5_glue->destroy_counter_set(flow->cs)); flow->cs = NULL; parser->cs = NULL; } @@ -1947,13 +1881,10 @@ priv_flow_create(struct priv *priv, DEBUG("Flow created %p", (void *)flow); return flow; exit: - if (parser.drop) { - rte_free(parser.drop_q.ibv_attr); - } else { - for (i = 0; i != hash_rxq_init_n; ++i) { - if (parser.queue[i].ibv_attr) - rte_free(parser.queue[i].ibv_attr); - } + ERROR("flow creation error: %s", error->message); + for (i = 0; i != hash_rxq_init_n; ++i) { + if (parser.queue[i].ibv_attr) + rte_free(parser.queue[i].ibv_attr); } rte_free(flow); return NULL; @@ -2055,15 +1986,17 @@ priv_flow_destroy(struct priv *priv, } free: if (flow->drop) { - if (flow->drxq.ibv_flow) - claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow)); - rte_free(flow->drxq.ibv_attr); + if (flow->frxq[HASH_RXQ_ETH].ibv_flow) + claim_zero(mlx5_glue->destroy_flow + (flow->frxq[HASH_RXQ_ETH].ibv_flow)); + rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr); } else { for (i = 0; i != hash_rxq_init_n; ++i) { struct mlx5_flow *frxq = &flow->frxq[i]; if (frxq->ibv_flow) - claim_zero(ibv_destroy_flow(frxq->ibv_flow)); + claim_zero(mlx5_glue->destroy_flow + (frxq->ibv_flow)); if (frxq->hrxq) mlx5_priv_hrxq_release(priv, frxq->hrxq); if (frxq->ibv_attr) @@ -2071,7 +2004,7 @@ free: } } if (flow->cs) { - claim_zero(ibv_destroy_counter_set(flow->cs)); + claim_zero(mlx5_glue->destroy_counter_set(flow->cs)); flow->cs = NULL; } TAILQ_REMOVE(list, flow, next); @@ -2119,35 +2052,38 @@ priv_flow_create_drop_queue(struct priv *priv) WARN("cannot allocate memory for drop queue"); goto error; } - fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0); + fdq->cq = mlx5_glue->create_cq(priv->ctx, 1, NULL, NULL, 0); if (!fdq->cq) { WARN("cannot allocate CQ for drop queue"); goto error; } - fdq->wq = ibv_create_wq(priv->ctx, - &(struct ibv_wq_init_attr){ + fdq->wq = mlx5_glue->create_wq + (priv->ctx, + &(struct ibv_wq_init_attr){ .wq_type = IBV_WQT_RQ, .max_wr = 1, .max_sge = 1, .pd = priv->pd, .cq = fdq->cq, - }); + }); if (!fdq->wq) { WARN("cannot allocate WQ for drop queue"); goto error; } - fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx, - &(struct ibv_rwq_ind_table_init_attr){ + fdq->ind_table = mlx5_glue->create_rwq_ind_table + (priv->ctx, + &(struct ibv_rwq_ind_table_init_attr){ .log_ind_tbl_size = 0, .ind_tbl = &fdq->wq, .comp_mask = 0, - }); + }); if (!fdq->ind_table) { WARN("cannot allocate indirection table for drop queue"); goto error; } - fdq->qp = ibv_create_qp_ex(priv->ctx, - &(struct ibv_qp_init_attr_ex){ + fdq->qp = mlx5_glue->create_qp_ex + (priv->ctx, + &(struct ibv_qp_init_attr_ex){ .qp_type = IBV_QPT_RAW_PACKET, .comp_mask = IBV_QP_INIT_ATTR_PD | @@ -2162,7 +2098,7 @@ priv_flow_create_drop_queue(struct priv *priv) }, .rwq_ind_tbl = fdq->ind_table, .pd = priv->pd - }); + }); if (!fdq->qp) { WARN("cannot allocate QP for drop queue"); goto error; @@ -2171,13 +2107,13 @@ priv_flow_create_drop_queue(struct priv *priv) return 0; error: if (fdq->qp) - claim_zero(ibv_destroy_qp(fdq->qp)); + claim_zero(mlx5_glue->destroy_qp(fdq->qp)); if (fdq->ind_table) - claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table)); + claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table)); if (fdq->wq) - claim_zero(ibv_destroy_wq(fdq->wq)); + claim_zero(mlx5_glue->destroy_wq(fdq->wq)); if (fdq->cq) - claim_zero(ibv_destroy_cq(fdq->cq)); + claim_zero(mlx5_glue->destroy_cq(fdq->cq)); if (fdq) rte_free(fdq); priv->flow_drop_queue = NULL; @@ -2198,13 +2134,13 @@ priv_flow_delete_drop_queue(struct priv *priv) if (!fdq) return; if (fdq->qp) - claim_zero(ibv_destroy_qp(fdq->qp)); + claim_zero(mlx5_glue->destroy_qp(fdq->qp)); if (fdq->ind_table) - claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table)); + claim_zero(mlx5_glue->destroy_rwq_ind_table(fdq->ind_table)); if (fdq->wq) - claim_zero(ibv_destroy_wq(fdq->wq)); + claim_zero(mlx5_glue->destroy_wq(fdq->wq)); if (fdq->cq) - claim_zero(ibv_destroy_cq(fdq->cq)); + claim_zero(mlx5_glue->destroy_cq(fdq->cq)); rte_free(fdq); priv->flow_drop_queue = NULL; } @@ -2224,23 +2160,34 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list) TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) { unsigned int i; + struct mlx5_ind_table_ibv *ind_tbl = NULL; if (flow->drop) { - if (!flow->drxq.ibv_flow) + if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) continue; - claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow)); - flow->drxq.ibv_flow = NULL; + claim_zero(mlx5_glue->destroy_flow + (flow->frxq[HASH_RXQ_ETH].ibv_flow)); + flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL; + DEBUG("Flow %p removed", (void *)flow); /* Next flow. */ continue; } + /* Verify the flow has not already been cleaned. */ + for (i = 0; i != hash_rxq_init_n; ++i) { + if (!flow->frxq[i].ibv_flow) + continue; + /* + * Indirection table may be necessary to remove the + * flags in the Rx queues. + * This helps to speed-up the process by avoiding + * another loop. + */ + ind_tbl = flow->frxq[i].hrxq->ind_table; + break; + } + if (i == hash_rxq_init_n) + return; if (flow->mark) { - struct mlx5_ind_table_ibv *ind_tbl = NULL; - - for (i = 0; i != hash_rxq_init_n; ++i) { - if (!flow->frxq[i].hrxq) - continue; - ind_tbl = flow->frxq[i].hrxq->ind_table; - } assert(ind_tbl); for (i = 0; i != ind_tbl->queues_n; ++i) (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0; @@ -2248,7 +2195,8 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list) for (i = 0; i != hash_rxq_init_n; ++i) { if (!flow->frxq[i].ibv_flow) continue; - claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow)); + claim_zero(mlx5_glue->destroy_flow + (flow->frxq[i].ibv_flow)); flow->frxq[i].ibv_flow = NULL; mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq); flow->frxq[i].hrxq = NULL; @@ -2277,10 +2225,11 @@ priv_flow_start(struct priv *priv, struct mlx5_flows *list) unsigned int i; if (flow->drop) { - flow->drxq.ibv_flow = - ibv_create_flow(priv->flow_drop_queue->qp, - flow->drxq.ibv_attr); - if (!flow->drxq.ibv_flow) { + flow->frxq[HASH_RXQ_ETH].ibv_flow = + mlx5_glue->create_flow + (priv->flow_drop_queue->qp, + flow->frxq[HASH_RXQ_ETH].ibv_attr); + if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) { DEBUG("Flow %p cannot be applied", (void *)flow); rte_errno = EINVAL; @@ -2315,8 +2264,8 @@ priv_flow_start(struct priv *priv, struct mlx5_flows *list) } flow_create: flow->frxq[i].ibv_flow = - ibv_create_flow(flow->frxq[i].hrxq->qp, - flow->frxq[i].ibv_attr); + mlx5_glue->create_flow(flow->frxq[i].hrxq->qp, + flow->frxq[i].ibv_attr); if (!flow->frxq[i].ibv_flow) { DEBUG("Flow %p cannot be applied", (void *)flow); @@ -2523,7 +2472,7 @@ priv_flow_query_count(struct ibv_counter_set *cs, .out = counters, .outlen = 2 * sizeof(uint64_t), }; - int res = ibv_query_counter_set(&query_cs_attr, &query_out); + int res = mlx5_glue->query_counter_set(&query_cs_attr, &query_out); if (res) { rte_flow_error_set(error, -res, @@ -2671,10 +2620,12 @@ priv_fdir_filter_convert(struct priv *priv, attributes->items[1] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &attributes->l3, + .mask = &attributes->l3, }; attributes->items[2] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_UDP, .spec = &attributes->l4, + .mask = &attributes->l4, }; break; case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: @@ -2692,10 +2643,12 @@ priv_fdir_filter_convert(struct priv *priv, attributes->items[1] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &attributes->l3, + .mask = &attributes->l3, }; attributes->items[2] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_TCP, .spec = &attributes->l4, + .mask = &attributes->l4, }; break; case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: @@ -2709,6 +2662,7 @@ priv_fdir_filter_convert(struct priv *priv, attributes->items[1] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &attributes->l3, + .mask = &attributes->l3, }; break; case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: @@ -2729,10 +2683,12 @@ priv_fdir_filter_convert(struct priv *priv, attributes->items[1] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_IPV6, .spec = &attributes->l3, + .mask = &attributes->l3, }; attributes->items[2] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_UDP, .spec = &attributes->l4, + .mask = &attributes->l4, }; break; case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: @@ -2753,10 +2709,12 @@ priv_fdir_filter_convert(struct priv *priv, attributes->items[1] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_IPV6, .spec = &attributes->l3, + .mask = &attributes->l3, }; attributes->items[2] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_TCP, .spec = &attributes->l4, + .mask = &attributes->l4, }; break; case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: @@ -2773,6 +2731,7 @@ priv_fdir_filter_convert(struct priv *priv, attributes->items[1] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_IPV6, .spec = &attributes->l3, + .mask = &attributes->l3, }; break; default: @@ -2875,13 +2834,13 @@ priv_fdir_filter_delete(struct priv *priv, if (parser.drop) { struct ibv_flow_spec_action_drop *drop; - drop = (void *)((uintptr_t)parser.drop_q.ibv_attr + - parser.drop_q.offset); + drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr + + parser.queue[HASH_RXQ_ETH].offset); *drop = (struct ibv_flow_spec_action_drop){ .type = IBV_FLOW_SPEC_ACTION_DROP, .size = sizeof(struct ibv_flow_spec_action_drop), }; - parser.drop_q.ibv_attr->num_of_specs++; + parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++; } TAILQ_FOREACH(flow, &priv->flows, next) { struct ibv_flow_attr *attr; @@ -2892,14 +2851,8 @@ priv_fdir_filter_delete(struct priv *priv, void *flow_spec; unsigned int specs_n; - if (parser.drop) - attr = parser.drop_q.ibv_attr; - else - attr = parser.queue[HASH_RXQ_ETH].ibv_attr; - if (flow->drop) - flow_attr = flow->drxq.ibv_attr; - else - flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr; + attr = parser.queue[HASH_RXQ_ETH].ibv_attr; + flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr; /* Compare first the attributes. */ if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr))) continue; @@ -2929,13 +2882,9 @@ wrong_flow: if (flow) priv_flow_destroy(priv, &priv->flows, flow); exit: - if (parser.drop) { - rte_free(parser.drop_q.ibv_attr); - } else { - for (i = 0; i != hash_rxq_init_n; ++i) { - if (parser.queue[i].ibv_attr) - rte_free(parser.queue[i].ibv_attr); - } + for (i = 0; i != hash_rxq_init_n; ++i) { + if (parser.queue[i].ibv_attr) + rte_free(parser.queue[i].ibv_attr); } return -ret; } diff --git a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c new file mode 100644 index 00000000..1c4396ad --- /dev/null +++ b/drivers/net/mlx5/mlx5_glue.c @@ -0,0 +1,353 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 6WIND S.A. + * Copyright 2018 Mellanox Technologies, Ltd. + */ + +#include <errno.h> +#include <stddef.h> +#include <stdint.h> + +/* Verbs headers do not support -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-Wpedantic" +#endif +#include <infiniband/mlx5dv.h> +#include <infiniband/verbs.h> +#ifdef PEDANTIC +#pragma GCC diagnostic error "-Wpedantic" +#endif + +#include "mlx5_autoconf.h" +#include "mlx5_glue.h" + +static int +mlx5_glue_fork_init(void) +{ + return ibv_fork_init(); +} + +static struct ibv_pd * +mlx5_glue_alloc_pd(struct ibv_context *context) +{ + return ibv_alloc_pd(context); +} + +static int +mlx5_glue_dealloc_pd(struct ibv_pd *pd) +{ + return ibv_dealloc_pd(pd); +} + +static struct ibv_device ** +mlx5_glue_get_device_list(int *num_devices) +{ + return ibv_get_device_list(num_devices); +} + +static void +mlx5_glue_free_device_list(struct ibv_device **list) +{ + ibv_free_device_list(list); +} + +static struct ibv_context * +mlx5_glue_open_device(struct ibv_device *device) +{ + return ibv_open_device(device); +} + +static int +mlx5_glue_close_device(struct ibv_context *context) +{ + return ibv_close_device(context); +} + +static int +mlx5_glue_query_device(struct ibv_context *context, + struct ibv_device_attr *device_attr) +{ + return ibv_query_device(context, device_attr); +} + +static int +mlx5_glue_query_device_ex(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr) +{ + return ibv_query_device_ex(context, input, attr); +} + +static int +mlx5_glue_query_port(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr) +{ + return ibv_query_port(context, port_num, port_attr); +} + +static struct ibv_comp_channel * +mlx5_glue_create_comp_channel(struct ibv_context *context) +{ + return ibv_create_comp_channel(context); +} + +static int +mlx5_glue_destroy_comp_channel(struct ibv_comp_channel *channel) +{ + return ibv_destroy_comp_channel(channel); +} + +static struct ibv_cq * +mlx5_glue_create_cq(struct ibv_context *context, int cqe, void *cq_context, + struct ibv_comp_channel *channel, int comp_vector) +{ + return ibv_create_cq(context, cqe, cq_context, channel, comp_vector); +} + +static int +mlx5_glue_destroy_cq(struct ibv_cq *cq) +{ + return ibv_destroy_cq(cq); +} + +static int +mlx5_glue_get_cq_event(struct ibv_comp_channel *channel, struct ibv_cq **cq, + void **cq_context) +{ + return ibv_get_cq_event(channel, cq, cq_context); +} + +static void +mlx5_glue_ack_cq_events(struct ibv_cq *cq, unsigned int nevents) +{ + ibv_ack_cq_events(cq, nevents); +} + +static struct ibv_rwq_ind_table * +mlx5_glue_create_rwq_ind_table(struct ibv_context *context, + struct ibv_rwq_ind_table_init_attr *init_attr) +{ + return ibv_create_rwq_ind_table(context, init_attr); +} + +static int +mlx5_glue_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table) +{ + return ibv_destroy_rwq_ind_table(rwq_ind_table); +} + +static struct ibv_wq * +mlx5_glue_create_wq(struct ibv_context *context, + struct ibv_wq_init_attr *wq_init_attr) +{ + return ibv_create_wq(context, wq_init_attr); +} + +static int +mlx5_glue_destroy_wq(struct ibv_wq *wq) +{ + return ibv_destroy_wq(wq); +} +static int +mlx5_glue_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *wq_attr) +{ + return ibv_modify_wq(wq, wq_attr); +} + +static struct ibv_flow * +mlx5_glue_create_flow(struct ibv_qp *qp, struct ibv_flow_attr *flow) +{ + return ibv_create_flow(qp, flow); +} + +static int +mlx5_glue_destroy_flow(struct ibv_flow *flow_id) +{ + return ibv_destroy_flow(flow_id); +} + +static struct ibv_qp * +mlx5_glue_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr) +{ + return ibv_create_qp(pd, qp_init_attr); +} + +static struct ibv_qp * +mlx5_glue_create_qp_ex(struct ibv_context *context, + struct ibv_qp_init_attr_ex *qp_init_attr_ex) +{ + return ibv_create_qp_ex(context, qp_init_attr_ex); +} + +static int +mlx5_glue_destroy_qp(struct ibv_qp *qp) +{ + return ibv_destroy_qp(qp); +} + +static int +mlx5_glue_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) +{ + return ibv_modify_qp(qp, attr, attr_mask); +} + +static struct ibv_mr * +mlx5_glue_reg_mr(struct ibv_pd *pd, void *addr, size_t length, int access) +{ + return ibv_reg_mr(pd, addr, length, access); +} + +static int +mlx5_glue_dereg_mr(struct ibv_mr *mr) +{ + return ibv_dereg_mr(mr); +} + +static struct ibv_counter_set * +mlx5_glue_create_counter_set(struct ibv_context *context, + struct ibv_counter_set_init_attr *init_attr) +{ +#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT + (void)context; + (void)init_attr; + return NULL; +#else + return ibv_create_counter_set(context, init_attr); +#endif +} + +static int +mlx5_glue_destroy_counter_set(struct ibv_counter_set *cs) +{ +#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT + (void)cs; + return ENOTSUP; +#else + return ibv_destroy_counter_set(cs); +#endif +} + +static int +mlx5_glue_describe_counter_set(struct ibv_context *context, + uint16_t counter_set_id, + struct ibv_counter_set_description *cs_desc) +{ +#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT + (void)context; + (void)counter_set_id; + (void)cs_desc; + return ENOTSUP; +#else + return ibv_describe_counter_set(context, counter_set_id, cs_desc); +#endif +} + +static int +mlx5_glue_query_counter_set(struct ibv_query_counter_set_attr *query_attr, + struct ibv_counter_set_data *cs_data) +{ +#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT + (void)query_attr; + (void)cs_data; + return ENOTSUP; +#else + return ibv_query_counter_set(query_attr, cs_data); +#endif +} + +static void +mlx5_glue_ack_async_event(struct ibv_async_event *event) +{ + ibv_ack_async_event(event); +} + +static int +mlx5_glue_get_async_event(struct ibv_context *context, + struct ibv_async_event *event) +{ + return ibv_get_async_event(context, event); +} + +static const char * +mlx5_glue_port_state_str(enum ibv_port_state port_state) +{ + return ibv_port_state_str(port_state); +} + +static struct ibv_cq * +mlx5_glue_cq_ex_to_cq(struct ibv_cq_ex *cq) +{ + return ibv_cq_ex_to_cq(cq); +} + +static struct ibv_cq_ex * +mlx5_glue_dv_create_cq(struct ibv_context *context, + struct ibv_cq_init_attr_ex *cq_attr, + struct mlx5dv_cq_init_attr *mlx5_cq_attr) +{ + return mlx5dv_create_cq(context, cq_attr, mlx5_cq_attr); +} + +static int +mlx5_glue_dv_query_device(struct ibv_context *ctx, + struct mlx5dv_context *attrs_out) +{ + return mlx5dv_query_device(ctx, attrs_out); +} + +static int +mlx5_glue_dv_set_context_attr(struct ibv_context *ibv_ctx, + enum mlx5dv_set_ctx_attr_type type, void *attr) +{ + return mlx5dv_set_context_attr(ibv_ctx, type, attr); +} + +static int +mlx5_glue_dv_init_obj(struct mlx5dv_obj *obj, uint64_t obj_type) +{ + return mlx5dv_init_obj(obj, obj_type); +} + +const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){ + .version = MLX5_GLUE_VERSION, + .fork_init = mlx5_glue_fork_init, + .alloc_pd = mlx5_glue_alloc_pd, + .dealloc_pd = mlx5_glue_dealloc_pd, + .get_device_list = mlx5_glue_get_device_list, + .free_device_list = mlx5_glue_free_device_list, + .open_device = mlx5_glue_open_device, + .close_device = mlx5_glue_close_device, + .query_device = mlx5_glue_query_device, + .query_device_ex = mlx5_glue_query_device_ex, + .query_port = mlx5_glue_query_port, + .create_comp_channel = mlx5_glue_create_comp_channel, + .destroy_comp_channel = mlx5_glue_destroy_comp_channel, + .create_cq = mlx5_glue_create_cq, + .destroy_cq = mlx5_glue_destroy_cq, + .get_cq_event = mlx5_glue_get_cq_event, + .ack_cq_events = mlx5_glue_ack_cq_events, + .create_rwq_ind_table = mlx5_glue_create_rwq_ind_table, + .destroy_rwq_ind_table = mlx5_glue_destroy_rwq_ind_table, + .create_wq = mlx5_glue_create_wq, + .destroy_wq = mlx5_glue_destroy_wq, + .modify_wq = mlx5_glue_modify_wq, + .create_flow = mlx5_glue_create_flow, + .destroy_flow = mlx5_glue_destroy_flow, + .create_qp = mlx5_glue_create_qp, + .create_qp_ex = mlx5_glue_create_qp_ex, + .destroy_qp = mlx5_glue_destroy_qp, + .modify_qp = mlx5_glue_modify_qp, + .reg_mr = mlx5_glue_reg_mr, + .dereg_mr = mlx5_glue_dereg_mr, + .create_counter_set = mlx5_glue_create_counter_set, + .destroy_counter_set = mlx5_glue_destroy_counter_set, + .describe_counter_set = mlx5_glue_describe_counter_set, + .query_counter_set = mlx5_glue_query_counter_set, + .ack_async_event = mlx5_glue_ack_async_event, + .get_async_event = mlx5_glue_get_async_event, + .port_state_str = mlx5_glue_port_state_str, + .cq_ex_to_cq = mlx5_glue_cq_ex_to_cq, + .dv_create_cq = mlx5_glue_dv_create_cq, + .dv_query_device = mlx5_glue_dv_query_device, + .dv_set_context_attr = mlx5_glue_dv_set_context_attr, + .dv_init_obj = mlx5_glue_dv_init_obj, +}; diff --git a/drivers/net/mlx5/mlx5_glue.h b/drivers/net/mlx5/mlx5_glue.h new file mode 100644 index 00000000..b5efee3b --- /dev/null +++ b/drivers/net/mlx5/mlx5_glue.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 6WIND S.A. + * Copyright 2018 Mellanox Technologies, Ltd. + */ + +#ifndef MLX5_GLUE_H_ +#define MLX5_GLUE_H_ + +#include <stddef.h> +#include <stdint.h> + +/* Verbs headers do not support -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-Wpedantic" +#endif +#include <infiniband/mlx5dv.h> +#include <infiniband/verbs.h> +#ifdef PEDANTIC +#pragma GCC diagnostic error "-Wpedantic" +#endif + +#ifndef MLX5_GLUE_VERSION +#define MLX5_GLUE_VERSION "" +#endif + +#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT +struct ibv_counter_set; +struct ibv_counter_set_data; +struct ibv_counter_set_description; +struct ibv_counter_set_init_attr; +struct ibv_query_counter_set_attr; +#endif + +/* LIB_GLUE_VERSION must be updated every time this structure is modified. */ +struct mlx5_glue { + const char *version; + int (*fork_init)(void); + struct ibv_pd *(*alloc_pd)(struct ibv_context *context); + int (*dealloc_pd)(struct ibv_pd *pd); + struct ibv_device **(*get_device_list)(int *num_devices); + void (*free_device_list)(struct ibv_device **list); + struct ibv_context *(*open_device)(struct ibv_device *device); + int (*close_device)(struct ibv_context *context); + int (*query_device)(struct ibv_context *context, + struct ibv_device_attr *device_attr); + int (*query_device_ex)(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr); + int (*query_port)(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr); + struct ibv_comp_channel *(*create_comp_channel) + (struct ibv_context *context); + int (*destroy_comp_channel)(struct ibv_comp_channel *channel); + struct ibv_cq *(*create_cq)(struct ibv_context *context, int cqe, + void *cq_context, + struct ibv_comp_channel *channel, + int comp_vector); + int (*destroy_cq)(struct ibv_cq *cq); + int (*get_cq_event)(struct ibv_comp_channel *channel, + struct ibv_cq **cq, void **cq_context); + void (*ack_cq_events)(struct ibv_cq *cq, unsigned int nevents); + struct ibv_rwq_ind_table *(*create_rwq_ind_table) + (struct ibv_context *context, + struct ibv_rwq_ind_table_init_attr *init_attr); + int (*destroy_rwq_ind_table)(struct ibv_rwq_ind_table *rwq_ind_table); + struct ibv_wq *(*create_wq)(struct ibv_context *context, + struct ibv_wq_init_attr *wq_init_attr); + int (*destroy_wq)(struct ibv_wq *wq); + int (*modify_wq)(struct ibv_wq *wq, struct ibv_wq_attr *wq_attr); + struct ibv_flow *(*create_flow)(struct ibv_qp *qp, + struct ibv_flow_attr *flow); + int (*destroy_flow)(struct ibv_flow *flow_id); + struct ibv_qp *(*create_qp)(struct ibv_pd *pd, + struct ibv_qp_init_attr *qp_init_attr); + struct ibv_qp *(*create_qp_ex) + (struct ibv_context *context, + struct ibv_qp_init_attr_ex *qp_init_attr_ex); + int (*destroy_qp)(struct ibv_qp *qp); + int (*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask); + struct ibv_mr *(*reg_mr)(struct ibv_pd *pd, void *addr, + size_t length, int access); + int (*dereg_mr)(struct ibv_mr *mr); + struct ibv_counter_set *(*create_counter_set) + (struct ibv_context *context, + struct ibv_counter_set_init_attr *init_attr); + int (*destroy_counter_set)(struct ibv_counter_set *cs); + int (*describe_counter_set) + (struct ibv_context *context, + uint16_t counter_set_id, + struct ibv_counter_set_description *cs_desc); + int (*query_counter_set)(struct ibv_query_counter_set_attr *query_attr, + struct ibv_counter_set_data *cs_data); + void (*ack_async_event)(struct ibv_async_event *event); + int (*get_async_event)(struct ibv_context *context, + struct ibv_async_event *event); + const char *(*port_state_str)(enum ibv_port_state port_state); + struct ibv_cq *(*cq_ex_to_cq)(struct ibv_cq_ex *cq); + struct ibv_cq_ex *(*dv_create_cq) + (struct ibv_context *context, + struct ibv_cq_init_attr_ex *cq_attr, + struct mlx5dv_cq_init_attr *mlx5_cq_attr); + int (*dv_query_device)(struct ibv_context *ctx_in, + struct mlx5dv_context *attrs_out); + int (*dv_set_context_attr)(struct ibv_context *ibv_ctx, + enum mlx5dv_set_ctx_attr_type type, + void *attr); + int (*dv_init_obj)(struct mlx5dv_obj *obj, uint64_t obj_type); +}; + +const struct mlx5_glue *mlx5_glue; + +#endif /* MLX5_GLUE_H_ */ diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c index d17b991e..e8a8d459 100644 --- a/drivers/net/mlx5/mlx5_mac.c +++ b/drivers/net/mlx5/mlx5_mac.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ #include <stddef.h> @@ -52,7 +24,7 @@ #endif #include <rte_ether.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_common.h> #include "mlx5.h" @@ -93,11 +65,9 @@ priv_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN]) void mlx5_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index) { - if (mlx5_is_secondary()) - return; assert(index < MLX5_MAX_MAC_ADDRESSES); memset(&dev->data->mac_addrs[index], 0, sizeof(struct ether_addr)); - if (!dev->data->promiscuous && !dev->data->all_multicast) + if (!dev->data->promiscuous) mlx5_traffic_restart(dev); } @@ -124,8 +94,6 @@ mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac, int ret = 0; (void)vmdq; - if (mlx5_is_secondary()) - return 0; assert(index < MLX5_MAX_MAC_ADDRESSES); /* First, make sure this address isn't already configured. */ for (i = 0; (i != MLX5_MAX_MAC_ADDRESSES); ++i) { @@ -138,7 +106,7 @@ mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac, return EADDRINUSE; } dev->data->mac_addrs[index] = *mac; - if (!dev->data->promiscuous && !dev->data->all_multicast) + if (!dev->data->promiscuous) mlx5_traffic_restart(dev); return ret; } @@ -154,8 +122,6 @@ mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac, void mlx5_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr) { - if (mlx5_is_secondary()) - return; DEBUG("%p: setting primary MAC address", (void *)dev); mlx5_mac_addr_add(dev, mac_addr, 0, 0); } diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c index 6b29eed5..857dfcd8 100644 --- a/drivers/net/mlx5/mlx5_mr.c +++ b/drivers/net/mlx5/mlx5_mr.c @@ -1,38 +1,8 @@ -/*- - * BSD LICENSE - * - * Copyright 2016 6WIND S.A. - * Copyright 2016 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox. */ -/* Verbs header. */ -/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC #pragma GCC diagnostic ignored "-Wpedantic" #endif @@ -46,6 +16,7 @@ #include "mlx5.h" #include "mlx5_rxtx.h" +#include "mlx5_glue.h" struct mlx5_check_mempool_data { int ret; @@ -141,8 +112,15 @@ priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *txq, DEBUG("%p: discovered new memory pool \"%s\" (%p)", (void *)txq_ctrl, mp->name, (void *)mp); mr = priv_mr_get(priv, mp); - if (mr == NULL) + if (mr == NULL) { + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + DEBUG("Using unregistered mempool 0x%p(%s) in secondary process," + " please create mempool before rte_eth_dev_start()", + (void *)mp, mp->name); + return NULL; + } mr = priv_mr_new(priv, mp); + } if (unlikely(mr == NULL)) { DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.", (void *)txq_ctrl); @@ -291,6 +269,9 @@ priv_mr_new(struct priv *priv, struct rte_mempool *mp) DEBUG("mempool %p area start=%p end=%p size=%zu", (void *)mp, (void *)start, (void *)end, (size_t)(end - start)); + /* Save original addresses for exact MR lookup. */ + mr->start = start; + mr->end = end; /* Round start and end to page boundary if found in memory segments. */ for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) { uintptr_t addr = (uintptr_t)ms[i].addr; @@ -305,12 +286,10 @@ priv_mr_new(struct priv *priv, struct rte_mempool *mp) DEBUG("mempool %p using start=%p end=%p size=%zu for MR", (void *)mp, (void *)start, (void *)end, (size_t)(end - start)); - mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start, - IBV_ACCESS_LOCAL_WRITE); + mr->mr = mlx5_glue->reg_mr(priv->pd, (void *)start, end - start, + IBV_ACCESS_LOCAL_WRITE); mr->mp = mp; mr->lkey = rte_cpu_to_be_32(mr->mr->lkey); - mr->start = start; - mr->end = (uintptr_t)mr->mr->addr + mr->mr->length; rte_atomic32_inc(&mr->refcnt); DEBUG("%p: new Memory Region %p refcnt: %d", (void *)priv, (void *)mr, rte_atomic32_read(&mr->refcnt)); @@ -364,7 +343,7 @@ priv_mr_release(struct priv *priv, struct mlx5_mr *mr) DEBUG("Memory Region %p refcnt: %d", (void *)mr, rte_atomic32_read(&mr->refcnt)); if (rte_atomic32_dec_and_test(&mr->refcnt)) { - claim_zero(ibv_dereg_mr(mr->mr)); + claim_zero(mlx5_glue->dereg_mr(mr->mr)); LIST_REMOVE(mr, next); rte_free(mr); return 0; diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h index 2de310bc..9eb9c15e 100644 --- a/drivers/net/mlx5/mlx5_prm.h +++ b/drivers/net/mlx5/mlx5_prm.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2016 6WIND S.A. - * Copyright 2016 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox. */ #ifndef RTE_PMD_MLX5_PRM_H_ diff --git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c index f3de46de..d06b0bee 100644 --- a/drivers/net/mlx5/mlx5_rss.c +++ b/drivers/net/mlx5/mlx5_rss.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ #include <stddef.h> @@ -48,9 +20,10 @@ #endif #include <rte_malloc.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include "mlx5.h" +#include "mlx5_defs.h" #include "mlx5_rxtx.h" /** @@ -72,6 +45,10 @@ mlx5_rss_hash_update(struct rte_eth_dev *dev, int ret = 0; priv_lock(priv); + if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) { + ret = -EINVAL; + goto out; + } if (rss_conf->rss_key && rss_conf->rss_key_len) { priv->rss_conf.rss_key = rte_realloc(priv->rss_conf.rss_key, rss_conf->rss_key_len, 0); @@ -274,7 +251,6 @@ mlx5_dev_rss_reta_update(struct rte_eth_dev *dev, int ret; struct priv *priv = dev->data->dev_private; - assert(!mlx5_is_secondary()); priv_lock(priv); ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size); priv_unlock(priv); diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c index 0ef2cdf0..4ffc869a 100644 --- a/drivers/net/mlx5/mlx5_rxmode.c +++ b/drivers/net/mlx5/mlx5_rxmode.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ #include <stddef.h> @@ -45,7 +17,7 @@ #pragma GCC diagnostic error "-Wpedantic" #endif -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include "mlx5.h" #include "mlx5_rxtx.h" @@ -60,8 +32,6 @@ void mlx5_promiscuous_enable(struct rte_eth_dev *dev) { - if (mlx5_is_secondary()) - return; dev->data->promiscuous = 1; mlx5_traffic_restart(dev); } @@ -75,8 +45,6 @@ mlx5_promiscuous_enable(struct rte_eth_dev *dev) void mlx5_promiscuous_disable(struct rte_eth_dev *dev) { - if (mlx5_is_secondary()) - return; dev->data->promiscuous = 0; mlx5_traffic_restart(dev); } @@ -90,8 +58,6 @@ mlx5_promiscuous_disable(struct rte_eth_dev *dev) void mlx5_allmulticast_enable(struct rte_eth_dev *dev) { - if (mlx5_is_secondary()) - return; dev->data->all_multicast = 1; mlx5_traffic_restart(dev); } @@ -105,8 +71,6 @@ mlx5_allmulticast_enable(struct rte_eth_dev *dev) void mlx5_allmulticast_disable(struct rte_eth_dev *dev) { - if (mlx5_is_secondary()) - return; dev->data->all_multicast = 0; mlx5_traffic_restart(dev); } diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 85399eff..ff58c492 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ #include <stddef.h> @@ -52,7 +24,7 @@ #include <rte_mbuf.h> #include <rte_malloc.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_common.h> #include <rte_interrupts.h> #include <rte_debug.h> @@ -63,6 +35,7 @@ #include "mlx5_utils.h" #include "mlx5_autoconf.h" #include "mlx5_defs.h" +#include "mlx5_glue.h" /* Default RSS hash key also used for ConnectX-3. */ uint8_t rss_hash_default_key[] = { @@ -213,6 +186,78 @@ mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl) } /** + * Returns the per-queue supported offloads. + * + * @param priv + * Pointer to private structure. + * + * @return + * Supported Rx offloads. + */ +uint64_t +mlx5_priv_get_rx_queue_offloads(struct priv *priv) +{ + struct mlx5_dev_config *config = &priv->config; + uint64_t offloads = (DEV_RX_OFFLOAD_SCATTER | + DEV_RX_OFFLOAD_TIMESTAMP | + DEV_RX_OFFLOAD_JUMBO_FRAME); + + if (config->hw_fcs_strip) + offloads |= DEV_RX_OFFLOAD_CRC_STRIP; + if (config->hw_csum) + offloads |= (DEV_RX_OFFLOAD_IPV4_CKSUM | + DEV_RX_OFFLOAD_UDP_CKSUM | + DEV_RX_OFFLOAD_TCP_CKSUM); + if (config->hw_vlan_strip) + offloads |= DEV_RX_OFFLOAD_VLAN_STRIP; + return offloads; +} + + +/** + * Returns the per-port supported offloads. + * + * @param priv + * Pointer to private structure. + * @return + * Supported Rx offloads. + */ +uint64_t +mlx5_priv_get_rx_port_offloads(struct priv *priv __rte_unused) +{ + uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER; + + return offloads; +} + +/** + * Checks if the per-queue offload configuration is valid. + * + * @param priv + * Pointer to private structure. + * @param offloads + * Per-queue offloads configuration. + * + * @return + * 1 if the configuration is valid, 0 otherwise. + */ +static int +priv_is_rx_queue_offloads_allowed(struct priv *priv, uint64_t offloads) +{ + uint64_t port_offloads = priv->dev->data->dev_conf.rxmode.offloads; + uint64_t queue_supp_offloads = + mlx5_priv_get_rx_queue_offloads(priv); + uint64_t port_supp_offloads = mlx5_priv_get_rx_port_offloads(priv); + + if ((offloads & (queue_supp_offloads | port_supp_offloads)) != + offloads) + return 0; + if (((port_offloads ^ offloads) & port_supp_offloads)) + return 0; + return 1; +} + +/** * * @param dev * Pointer to Ethernet device structure. @@ -241,9 +286,6 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, container_of(rxq, struct mlx5_rxq_ctrl, rxq); int ret = 0; - (void)conf; - if (mlx5_is_secondary()) - return -E_RTE_SECONDARY; priv_lock(priv); if (!rte_is_power_of_2(desc)) { desc = 1 << log2above(desc); @@ -259,6 +301,16 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, priv_unlock(priv); return -EOVERFLOW; } + if (!priv_is_rx_queue_offloads_allowed(priv, conf->offloads)) { + ret = ENOTSUP; + ERROR("%p: Rx queue offloads 0x%" PRIx64 " don't match port " + "offloads 0x%" PRIx64 " or supported offloads 0x%" PRIx64, + (void *)dev, conf->offloads, + dev->data->dev_conf.rxmode.offloads, + (mlx5_priv_get_rx_port_offloads(priv) | + mlx5_priv_get_rx_queue_offloads(priv))); + goto out; + } if (!mlx5_priv_rxq_releasable(priv, idx)) { ret = EBUSY; ERROR("%p: unable to release queue index %u", @@ -266,7 +318,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, goto out; } mlx5_priv_rxq_release(priv, idx); - rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp); + rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, conf, mp); if (!rxq_ctrl) { ERROR("%p: unable to allocate queue index %u", (void *)dev, idx); @@ -294,9 +346,6 @@ mlx5_rx_queue_release(void *dpdk_rxq) struct mlx5_rxq_ctrl *rxq_ctrl; struct priv *priv; - if (mlx5_is_secondary()) - return; - if (rxq == NULL) return; rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); @@ -327,7 +376,6 @@ priv_rx_intr_vec_enable(struct priv *priv) unsigned int count = 0; struct rte_intr_handle *intr_handle = priv->dev->intr_handle; - assert(!mlx5_is_secondary()); if (!priv->dev->data->dev_conf.intr_conf.rxq) return 0; priv_rx_intr_vec_disable(priv); @@ -442,7 +490,6 @@ mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq) doorbell = (uint64_t)doorbell_hi << 32; doorbell |= rxq->cqn; rxq->cq_db[MLX5_CQ_ARM_DB] = rte_cpu_to_be_32(doorbell_hi); - rte_wmb(); rte_write64(rte_cpu_to_be_64(doorbell), cq_db_reg); } @@ -460,7 +507,7 @@ mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq) int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; struct mlx5_rxq_data *rxq_data; struct mlx5_rxq_ctrl *rxq_ctrl; int ret = 0; @@ -504,7 +551,7 @@ exit: int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; struct mlx5_rxq_data *rxq_data; struct mlx5_rxq_ctrl *rxq_ctrl; struct mlx5_rxq_ibv *rxq_ibv = NULL; @@ -526,13 +573,13 @@ mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) ret = EINVAL; goto exit; } - ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx); + ret = mlx5_glue->get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx); if (ret || ev_cq != rxq_ibv->cq) { ret = EINVAL; goto exit; } rxq_data->cq_arm_sn++; - ibv_ack_cq_events(rxq_ibv->cq, 1); + mlx5_glue->ack_cq_events(rxq_ibv->cq, 1); exit: if (rxq_ibv) mlx5_priv_rxq_ibv_release(priv, rxq_ibv); @@ -576,9 +623,12 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) unsigned int i; int ret = 0; struct mlx5dv_obj obj; + struct mlx5_dev_config *config = &priv->config; assert(rxq_data); assert(!rxq_ctrl->ibv); + priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE; + priv->verbs_alloc_ctx.obj = rxq_ctrl; tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0, rxq_ctrl->socket); if (!tmpl) { @@ -597,7 +647,7 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) } } if (rxq_ctrl->irq) { - tmpl->channel = ibv_create_comp_channel(priv->ctx); + tmpl->channel = mlx5_glue->create_comp_channel(priv->ctx); if (!tmpl->channel) { ERROR("%p: Comp Channel creation failure", (void *)rxq_ctrl); @@ -612,7 +662,7 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) attr.cq.mlx5 = (struct mlx5dv_cq_init_attr){ .comp_mask = 0, }; - if (priv->cqe_comp && !rxq_data->hw_timestamp) { + if (config->cqe_comp && !rxq_data->hw_timestamp) { attr.cq.mlx5.comp_mask |= MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE; attr.cq.mlx5.cqe_comp_res_format = MLX5DV_CQE_RES_FORMAT_HASH; @@ -622,11 +672,12 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) */ if (rxq_check_vec_support(rxq_data) < 0) attr.cq.ibv.cqe *= 2; - } else if (priv->cqe_comp && rxq_data->hw_timestamp) { + } else if (config->cqe_comp && rxq_data->hw_timestamp) { DEBUG("Rx CQE compression is disabled for HW timestamp"); } - tmpl->cq = ibv_cq_ex_to_cq(mlx5dv_create_cq(priv->ctx, &attr.cq.ibv, - &attr.cq.mlx5)); + tmpl->cq = mlx5_glue->cq_ex_to_cq + (mlx5_glue->dv_create_cq(priv->ctx, &attr.cq.ibv, + &attr.cq.mlx5)); if (tmpl->cq == NULL) { ERROR("%p: CQ creation failure", (void *)rxq_ctrl); goto error; @@ -657,12 +708,12 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; } #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING - if (priv->hw_padding) { + if (config->hw_padding) { attr.wq.create_flags |= IBV_WQ_FLAG_RX_END_PADDING; attr.wq.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; } #endif - tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq); + tmpl->wq = mlx5_glue->create_wq(priv->ctx, &attr.wq); if (tmpl->wq == NULL) { ERROR("%p: WQ creation failure", (void *)rxq_ctrl); goto error; @@ -686,7 +737,7 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) .attr_mask = IBV_WQ_ATTR_STATE, .wq_state = IBV_WQS_RDY, }; - ret = ibv_modify_wq(tmpl->wq, &mod); + ret = mlx5_glue->modify_wq(tmpl->wq, &mod); if (ret) { ERROR("%p: WQ state to IBV_WQS_RDY failed", (void *)rxq_ctrl); @@ -696,7 +747,7 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) obj.cq.out = &cq_info; obj.rwq.in = tmpl->wq; obj.rwq.out = &rwq; - ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ); + ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ); if (ret != 0) goto error; if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { @@ -742,16 +793,18 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv, (void *)tmpl, rte_atomic32_read(&tmpl->refcnt)); LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next); + priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; return tmpl; error: if (tmpl->wq) - claim_zero(ibv_destroy_wq(tmpl->wq)); + claim_zero(mlx5_glue->destroy_wq(tmpl->wq)); if (tmpl->cq) - claim_zero(ibv_destroy_cq(tmpl->cq)); + claim_zero(mlx5_glue->destroy_cq(tmpl->cq)); if (tmpl->channel) - claim_zero(ibv_destroy_comp_channel(tmpl->channel)); + claim_zero(mlx5_glue->destroy_comp_channel(tmpl->channel)); if (tmpl->mr) priv_mr_release(priv, tmpl->mr); + priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; return NULL; } @@ -814,10 +867,11 @@ mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv) (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt)); if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) { rxq_free_elts(rxq_ibv->rxq_ctrl); - claim_zero(ibv_destroy_wq(rxq_ibv->wq)); - claim_zero(ibv_destroy_cq(rxq_ibv->cq)); + claim_zero(mlx5_glue->destroy_wq(rxq_ibv->wq)); + claim_zero(mlx5_glue->destroy_cq(rxq_ibv->cq)); if (rxq_ibv->channel) - claim_zero(ibv_destroy_comp_channel(rxq_ibv->channel)); + claim_zero(mlx5_glue->destroy_comp_channel + (rxq_ibv->channel)); LIST_REMOVE(rxq_ibv, next); rte_free(rxq_ibv); return 0; @@ -880,13 +934,19 @@ mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv) */ struct mlx5_rxq_ctrl* mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc, - unsigned int socket, struct rte_mempool *mp) + unsigned int socket, const struct rte_eth_rxconf *conf, + struct rte_mempool *mp) { struct rte_eth_dev *dev = priv->dev; struct mlx5_rxq_ctrl *tmpl; - const uint16_t desc_n = - desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; unsigned int mb_len = rte_pktmbuf_data_room_size(mp); + struct mlx5_dev_config *config = &priv->config; + /* + * Always allocate extra slots, even if eventually + * the vector Rx will not be used. + */ + const uint16_t desc_n = + desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; tmpl = rte_calloc_socket("RXQ", 1, sizeof(*tmpl) + @@ -902,7 +962,7 @@ mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc, if (dev->data->dev_conf.rxmode.max_rx_pkt_len <= (mb_len - RTE_PKTMBUF_HEADROOM)) { tmpl->rxq.sges_n = 0; - } else if (dev->data->dev_conf.rxmode.enable_scatter) { + } else if (conf->offloads & DEV_RX_OFFLOAD_SCATTER) { unsigned int size = RTE_PKTMBUF_HEADROOM + dev->data->dev_conf.rxmode.max_rx_pkt_len; @@ -944,20 +1004,16 @@ mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc, goto error; } /* Toggle RX checksum offload if hardware supports it. */ - if (priv->hw_csum) - tmpl->rxq.csum = !!dev->data->dev_conf.rxmode.hw_ip_checksum; - if (priv->hw_csum_l2tun) - tmpl->rxq.csum_l2tun = - !!dev->data->dev_conf.rxmode.hw_ip_checksum; - tmpl->rxq.hw_timestamp = - !!dev->data->dev_conf.rxmode.hw_timestamp; + tmpl->rxq.csum = !!(conf->offloads & DEV_RX_OFFLOAD_CHECKSUM); + tmpl->rxq.csum_l2tun = (!!(conf->offloads & DEV_RX_OFFLOAD_CHECKSUM) && + priv->config.hw_csum_l2tun); + tmpl->rxq.hw_timestamp = !!(conf->offloads & DEV_RX_OFFLOAD_TIMESTAMP); /* Configure VLAN stripping. */ - tmpl->rxq.vlan_strip = (priv->hw_vlan_strip && - !!dev->data->dev_conf.rxmode.hw_vlan_strip); + tmpl->rxq.vlan_strip = !!(conf->offloads & DEV_RX_OFFLOAD_VLAN_STRIP); /* By default, FCS (CRC) is stripped by hardware. */ - if (dev->data->dev_conf.rxmode.hw_strip_crc) { + if (conf->offloads & DEV_RX_OFFLOAD_CRC_STRIP) { tmpl->rxq.crc_present = 0; - } else if (priv->hw_fcs_strip) { + } else if (config->hw_fcs_strip) { tmpl->rxq.crc_present = 1; } else { WARN("%p: CRC stripping has been disabled but will still" @@ -1121,7 +1177,7 @@ mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[], struct mlx5_ind_table_ibv *ind_tbl; const unsigned int wq_n = rte_is_power_of_2(queues_n) ? log2above(queues_n) : - log2above(priv->ind_table_max_size); + log2above(priv->config.ind_table_max_size); struct ibv_wq *wq[1 << wq_n]; unsigned int i; unsigned int j; @@ -1143,13 +1199,13 @@ mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[], /* Finalise indirection table. */ for (j = 0; i != (unsigned int)(1 << wq_n); ++i, ++j) wq[i] = wq[j]; - ind_tbl->ind_table = ibv_create_rwq_ind_table( - priv->ctx, - &(struct ibv_rwq_ind_table_init_attr){ + ind_tbl->ind_table = mlx5_glue->create_rwq_ind_table + (priv->ctx, + &(struct ibv_rwq_ind_table_init_attr){ .log_ind_tbl_size = wq_n, .ind_tbl = wq, .comp_mask = 0, - }); + }); if (!ind_tbl->ind_table) goto error; rte_atomic32_inc(&ind_tbl->refcnt); @@ -1221,7 +1277,8 @@ mlx5_priv_ind_table_ibv_release(struct priv *priv, DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv, (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt)); if (rte_atomic32_dec_and_test(&ind_tbl->refcnt)) - claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table)); + claim_zero(mlx5_glue->destroy_rwq_ind_table + (ind_tbl->ind_table)); for (i = 0; i != ind_tbl->queues_n; ++i) claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i])); if (!rte_atomic32_read(&ind_tbl->refcnt)) { @@ -1288,9 +1345,9 @@ mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len, ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n); if (!ind_tbl) return NULL; - qp = ibv_create_qp_ex( - priv->ctx, - &(struct ibv_qp_init_attr_ex){ + qp = mlx5_glue->create_qp_ex + (priv->ctx, + &(struct ibv_qp_init_attr_ex){ .qp_type = IBV_QPT_RAW_PACKET, .comp_mask = IBV_QP_INIT_ATTR_PD | @@ -1304,7 +1361,7 @@ mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len, }, .rwq_ind_tbl = ind_tbl->ind_table, .pd = priv->pd, - }); + }); if (!qp) goto error; hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0); @@ -1323,7 +1380,7 @@ mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len, error: mlx5_priv_ind_table_ibv_release(priv, ind_tbl); if (qp) - claim_zero(ibv_destroy_qp(qp)); + claim_zero(mlx5_glue->destroy_qp(qp)); return NULL; } @@ -1391,7 +1448,7 @@ mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq) DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv, (void *)hrxq, rte_atomic32_read(&hrxq->refcnt)); if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { - claim_zero(ibv_destroy_qp(hrxq->qp)); + claim_zero(mlx5_glue->destroy_qp(hrxq->qp)); mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table); LIST_REMOVE(hrxq, next); rte_free(hrxq); diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 2d30c507..dc4ead93 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ #include <assert.h> @@ -344,15 +316,11 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) unsigned int j = 0; unsigned int k = 0; uint16_t max_elts; - unsigned int max_inline = txq->max_inline; - const unsigned int inline_en = !!max_inline && txq->inline_en; uint16_t max_wqe; unsigned int comp; - volatile struct mlx5_wqe_v *wqe = NULL; volatile struct mlx5_wqe_ctrl *last_wqe = NULL; unsigned int segs_n = 0; - struct rte_mbuf *buf = NULL; - uint8_t *raw; + const unsigned int max_inline = txq->max_inline; if (unlikely(!pkts_n)) return 0; @@ -361,20 +329,24 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Start processing. */ mlx5_tx_complete(txq); max_elts = (elts_n - (elts_head - txq->elts_tail)); + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); if (unlikely(!max_wqe)) return 0; do { + struct rte_mbuf *buf = NULL; + uint8_t *raw; + volatile struct mlx5_wqe_v *wqe = NULL; volatile rte_v128u32_t *dseg = NULL; uint32_t length; unsigned int ds = 0; unsigned int sg = 0; /* counter of additional segs attached. */ uintptr_t addr; - uint64_t naddr; uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2; uint16_t tso_header_sz = 0; uint16_t ehdr; - uint8_t cs_flags = 0; + uint8_t cs_flags; uint64_t tso = 0; uint16_t tso_segsz = 0; #ifdef MLX5_PMD_SOFT_COUNTERS @@ -392,7 +364,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (max_elts < segs_n) break; max_elts -= segs_n; - --segs_n; + sg = --segs_n; if (unlikely(--max_wqe == 0)) break; wqe = (volatile struct mlx5_wqe_v *) @@ -417,23 +389,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (pkts_n - i > 1) rte_prefetch0( rte_pktmbuf_mtod(*(pkts + 1), volatile void *)); - /* Should we enable HW CKSUM offload */ - if (buf->ol_flags & - (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - const uint64_t is_tunneled = buf->ol_flags & - (PKT_TX_TUNNEL_GRE | - PKT_TX_TUNNEL_VXLAN); - - if (is_tunneled && txq->tunnel_en) { - cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM | - MLX5_ETH_WQE_L4_INNER_CSUM; - if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM) - cs_flags |= MLX5_ETH_WQE_L3_CSUM; - } else { - cs_flags = MLX5_ETH_WQE_L3_CSUM | - MLX5_ETH_WQE_L4_CSUM; - } - } + cs_flags = txq_ol_cksum_to_cs(txq, buf); raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE; /* Replace the Ethernet type by the VLAN if necessary. */ if (buf->ol_flags & PKT_TX_VLAN_PKT) { @@ -459,82 +415,72 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) addr += pkt_inline_sz; } raw += MLX5_WQE_DWORD_SIZE; - if (txq->tso_en) { - tso = buf->ol_flags & PKT_TX_TCP_SEG; - if (tso) { - uintptr_t end = (uintptr_t) - (((uintptr_t)txq->wqes) + - (1 << txq->wqe_n) * - MLX5_WQE_SIZE); - unsigned int copy_b; - uint8_t vlan_sz = (buf->ol_flags & - PKT_TX_VLAN_PKT) ? 4 : 0; - const uint64_t is_tunneled = - buf->ol_flags & - (PKT_TX_TUNNEL_GRE | - PKT_TX_TUNNEL_VXLAN); - - tso_header_sz = buf->l2_len + vlan_sz + - buf->l3_len + buf->l4_len; - tso_segsz = buf->tso_segsz; - if (unlikely(tso_segsz == 0)) { - txq->stats.oerrors++; - break; - } - if (is_tunneled && txq->tunnel_en) { - tso_header_sz += buf->outer_l2_len + - buf->outer_l3_len; - cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; - } else { - cs_flags |= MLX5_ETH_WQE_L4_CSUM; - } - if (unlikely(tso_header_sz > - MLX5_MAX_TSO_HEADER)) { - txq->stats.oerrors++; + tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG); + if (tso) { + uintptr_t end = + (uintptr_t)(((uintptr_t)txq->wqes) + + (1 << txq->wqe_n) * MLX5_WQE_SIZE); + unsigned int copy_b; + uint8_t vlan_sz = + (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0; + const uint64_t is_tunneled = + buf->ol_flags & (PKT_TX_TUNNEL_GRE | + PKT_TX_TUNNEL_VXLAN); + + tso_header_sz = buf->l2_len + vlan_sz + + buf->l3_len + buf->l4_len; + tso_segsz = buf->tso_segsz; + if (unlikely(tso_segsz == 0)) { + txq->stats.oerrors++; + break; + } + if (is_tunneled && txq->tunnel_en) { + tso_header_sz += buf->outer_l2_len + + buf->outer_l3_len; + cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; + } else { + cs_flags |= MLX5_ETH_WQE_L4_CSUM; + } + if (unlikely(tso_header_sz > MLX5_MAX_TSO_HEADER)) { + txq->stats.oerrors++; + break; + } + copy_b = tso_header_sz - pkt_inline_sz; + /* First seg must contain all headers. */ + assert(copy_b <= length); + if (copy_b && ((end - (uintptr_t)raw) > copy_b)) { + uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; + + if (unlikely(max_wqe < n)) break; - } - copy_b = tso_header_sz - pkt_inline_sz; - /* First seg must contain all headers. */ - assert(copy_b <= length); - if (copy_b && - ((end - (uintptr_t)raw) > copy_b)) { - uint16_t n = (MLX5_WQE_DS(copy_b) - - 1 + 3) / 4; - - if (unlikely(max_wqe < n)) - break; - max_wqe -= n; - rte_memcpy((void *)raw, - (void *)addr, copy_b); - addr += copy_b; - length -= copy_b; - /* Include padding for TSO header. */ - copy_b = MLX5_WQE_DS(copy_b) * - MLX5_WQE_DWORD_SIZE; - pkt_inline_sz += copy_b; - raw += copy_b; - } else { - /* NOP WQE. */ - wqe->ctrl = (rte_v128u32_t){ - rte_cpu_to_be_32( - txq->wqe_ci << 8), - rte_cpu_to_be_32( - txq->qp_num_8s | 1), - 0, - 0, - }; - ds = 1; + max_wqe -= n; + rte_memcpy((void *)raw, (void *)addr, copy_b); + addr += copy_b; + length -= copy_b; + /* Include padding for TSO header. */ + copy_b = MLX5_WQE_DS(copy_b) * + MLX5_WQE_DWORD_SIZE; + pkt_inline_sz += copy_b; + raw += copy_b; + } else { + /* NOP WQE. */ + wqe->ctrl = (rte_v128u32_t){ + rte_cpu_to_be_32(txq->wqe_ci << 8), + rte_cpu_to_be_32(txq->qp_num_8s | 1), + 0, + 0, + }; + ds = 1; #ifdef MLX5_PMD_SOFT_COUNTERS - total_length = 0; + total_length = 0; #endif - k++; - goto next_wqe; - } + k++; + goto next_wqe; } } /* Inline if enough room. */ - if (inline_en || tso) { - uint32_t inl; + if (max_inline || tso) { + uint32_t inl = 0; uintptr_t end = (uintptr_t) (((uintptr_t)txq->wqes) + (1 << txq->wqe_n) * MLX5_WQE_SIZE); @@ -542,12 +488,14 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) RTE_CACHE_LINE_SIZE - (pkt_inline_sz - 2) - !!tso * sizeof(inl); - uintptr_t addr_end = (addr + inline_room) & - ~(RTE_CACHE_LINE_SIZE - 1); - unsigned int copy_b = (addr_end > addr) ? - RTE_MIN((addr_end - addr), length) : - 0; - + uintptr_t addr_end; + unsigned int copy_b; + +pkt_inline: + addr_end = RTE_ALIGN_FLOOR(addr + inline_room, + RTE_CACHE_LINE_SIZE); + copy_b = (addr_end > addr) ? + RTE_MIN((addr_end - addr), length) : 0; if (copy_b && ((end - (uintptr_t)raw) > copy_b)) { /* * One Dseg remains in the current WQE. To @@ -559,7 +507,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (unlikely(max_wqe < n)) break; max_wqe -= n; - if (tso) { + if (tso && !inl) { inl = rte_cpu_to_be_32(copy_b | MLX5_INLINE_SEG); rte_memcpy((void *)raw, @@ -594,11 +542,18 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) } else if (!segs_n) { goto next_pkt; } else { - /* dseg will be advance as part of next_seg */ - dseg = (volatile rte_v128u32_t *) - ((uintptr_t)wqe + - ((ds - 1) * MLX5_WQE_DWORD_SIZE)); - goto next_seg; + raw += copy_b; + inline_room -= copy_b; + --segs_n; + buf = buf->next; + assert(buf); + addr = rte_pktmbuf_mtod(buf, uintptr_t); + length = DATA_LEN(buf); +#ifdef MLX5_PMD_SOFT_COUNTERS + total_length += length; +#endif + (*txq->elts)[++elts_head & elts_m] = buf; + goto pkt_inline; } } else { /* @@ -610,12 +565,12 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) ds = 3; use_dseg: /* Add the remaining packet as a simple ds. */ - naddr = rte_cpu_to_be_64(addr); + addr = rte_cpu_to_be_64(addr); *dseg = (rte_v128u32_t){ rte_cpu_to_be_32(length), mlx5_tx_mb2mr(txq, buf), - naddr, - naddr >> 32, + addr, + addr >> 32, }; ++ds; if (!segs_n) @@ -649,20 +604,16 @@ next_seg: total_length += length; #endif /* Store segment information. */ - naddr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)); + addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)); *dseg = (rte_v128u32_t){ rte_cpu_to_be_32(length), mlx5_tx_mb2mr(txq, buf), - naddr, - naddr >> 32, + addr, + addr >> 32, }; (*txq->elts)[++elts_head & elts_m] = buf; - ++sg; - /* Advance counter only if all segs are successfully posted. */ - if (sg < segs_n) + if (--segs_n) goto next_seg; - else - j += sg; next_pkt: if (ds > MLX5_DSEG_MAX) { txq->stats.oerrors++; @@ -671,6 +622,7 @@ next_pkt: ++elts_head; ++pkts; ++i; + j += sg; /* Initialize known and common part of the WQE structure. */ if (tso) { wqe->ctrl = (rte_v128u32_t){ @@ -722,6 +674,9 @@ next_wqe: /* Save elts_head in unused "immediate" field of WQE. */ last_wqe->ctrl3 = txq->elts_head; txq->elts_comp = 0; +#ifndef NDEBUG + ++txq->cq_pi; +#endif } else { txq->elts_comp = comp; } @@ -840,6 +795,8 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Start processing. */ mlx5_tx_complete(txq); max_elts = (elts_n - (elts_head - txq->elts_tail)); + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); if (unlikely(!max_wqe)) return 0; @@ -847,7 +804,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) struct rte_mbuf *buf = *(pkts++); uint32_t length; unsigned int segs_n = buf->nb_segs; - uint32_t cs_flags = 0; + uint32_t cs_flags; /* * Make sure there is enough room to store this packet and @@ -863,10 +820,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) } max_elts -= segs_n; --pkts_n; - /* Should we enable HW CKSUM offload */ - if (buf->ol_flags & - (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) - cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; + cs_flags = txq_ol_cksum_to_cs(txq, buf); /* Retrieve packet information. */ length = PKT_LEN(buf); assert(length); @@ -936,6 +890,9 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Save elts_head in unused "immediate" field of WQE. */ wqe->ctrl[3] = elts_head; txq->elts_comp = 0; +#ifndef NDEBUG + ++txq->cq_pi; +#endif } else { txq->elts_comp = comp; } @@ -1067,12 +1024,14 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, /* Start processing. */ mlx5_tx_complete(txq); max_elts = (elts_n - (elts_head - txq->elts_tail)); + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); do { struct rte_mbuf *buf = *(pkts++); uintptr_t addr; uint32_t length; unsigned int segs_n = buf->nb_segs; - uint32_t cs_flags = 0; + uint8_t cs_flags; /* * Make sure there is enough room to store this packet and @@ -1093,10 +1052,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, * iteration. */ max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); - /* Should we enable HW CKSUM offload */ - if (buf->ol_flags & - (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) - cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; + cs_flags = txq_ol_cksum_to_cs(txq, buf); /* Retrieve packet information. */ length = PKT_LEN(buf); /* Start new session if packet differs. */ @@ -1231,6 +1187,9 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, /* Save elts_head in unused "immediate" field of WQE. */ wqe->ctrl[3] = elts_head; txq->elts_comp = 0; +#ifndef NDEBUG + ++txq->cq_pi; +#endif } else { txq->elts_comp = comp; } @@ -1317,10 +1276,10 @@ mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) } /** - * DPDK callback for TX with Enhanced MPW support. + * TX with Enhanced MPW support. * - * @param dpdk_txq - * Generic pointer to TX queue structure. + * @param txq + * Pointer to TX queue structure. * @param[in] pkts * Packets to transmit. * @param pkts_n @@ -1329,10 +1288,10 @@ mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) * @return * Number of packets successfully transmitted (<= pkts_n). */ -uint16_t -mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) +static inline uint16_t +txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, + uint16_t pkts_n) { - struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; uint16_t elts_head = txq->elts_head; const uint16_t elts_n = 1 << txq->elts_n; const uint16_t elts_m = elts_n - 1; @@ -1361,29 +1320,17 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) do { struct rte_mbuf *buf = *(pkts++); uintptr_t addr; - uint64_t naddr; unsigned int n; unsigned int do_inline = 0; /* Whether inline is possible. */ uint32_t length; - unsigned int segs_n = buf->nb_segs; - uint32_t cs_flags = 0; + uint8_t cs_flags; - /* - * Make sure there is enough room to store this packet and - * that one ring entry remains unused. - */ - assert(segs_n); - if (max_elts - j < segs_n) - break; - /* Do not bother with large packets MPW cannot handle. */ - if (segs_n > MLX5_MPW_DSEG_MAX) { - txq->stats.oerrors++; + /* Multi-segmented packet is handled in slow-path outside. */ + assert(NB_SEGS(buf) == 1); + /* Make sure there is enough room to store this packet. */ + if (max_elts - j == 0) break; - } - /* Should we enable HW CKSUM offload. */ - if (buf->ol_flags & - (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) - cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; + cs_flags = txq_ol_cksum_to_cs(txq, buf); /* Retrieve packet information. */ length = PKT_LEN(buf); /* Start new session if: @@ -1391,50 +1338,35 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) * - no space left even for a dseg * - next packet can be inlined with a new WQE * - cs_flag differs - * It can't be MLX5_MPW_STATE_OPENED as always have a single - * segmented packet. */ if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) { - if ((segs_n != 1) || - (inl_pad + sizeof(struct mlx5_wqe_data_seg) > - mpw_room) || + if ((inl_pad + sizeof(struct mlx5_wqe_data_seg) > + mpw_room) || (length <= txq->inline_max_packet_sz && inl_pad + sizeof(inl_hdr) + length > - mpw_room) || + mpw_room) || (mpw.wqe->eseg.cs_flags != cs_flags)) max_wqe -= mlx5_empw_close(txq, &mpw); } if (unlikely(mpw.state == MLX5_MPW_STATE_CLOSED)) { - if (unlikely(segs_n != 1)) { - /* Fall back to legacy MPW. - * A MPW session consumes 2 WQEs at most to - * include MLX5_MPW_DSEG_MAX pointers. - */ - if (unlikely(max_wqe < 2)) - break; - mlx5_mpw_new(txq, &mpw, length); - } else { - /* In Enhanced MPW, inline as much as the budget - * is allowed. The remaining space is to be - * filled with dsegs. If the title WQEBB isn't - * padded, it will have 2 dsegs there. - */ - mpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX, - (max_inline ? max_inline : - pkts_n * MLX5_WQE_DWORD_SIZE) + - MLX5_WQE_SIZE); - if (unlikely(max_wqe * MLX5_WQE_SIZE < - mpw_room)) - break; - /* Don't pad the title WQEBB to not waste WQ. */ - mlx5_empw_new(txq, &mpw, 0); - mpw_room -= mpw.total_len; - inl_pad = 0; - do_inline = - length <= txq->inline_max_packet_sz && - sizeof(inl_hdr) + length <= mpw_room && - !txq->mpw_hdr_dseg; - } + /* In Enhanced MPW, inline as much as the budget is + * allowed. The remaining space is to be filled with + * dsegs. If the title WQEBB isn't padded, it will have + * 2 dsegs there. + */ + mpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX, + (max_inline ? max_inline : + pkts_n * MLX5_WQE_DWORD_SIZE) + + MLX5_WQE_SIZE); + if (unlikely(max_wqe * MLX5_WQE_SIZE < mpw_room)) + break; + /* Don't pad the title WQEBB to not waste WQ. */ + mlx5_empw_new(txq, &mpw, 0); + mpw_room -= mpw.total_len; + inl_pad = 0; + do_inline = length <= txq->inline_max_packet_sz && + sizeof(inl_hdr) + length <= mpw_room && + !txq->mpw_hdr_dseg; mpw.wqe->eseg.cs_flags = cs_flags; } else { /* Evaluate whether the next packet can be inlined. @@ -1450,41 +1382,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) (!txq->mpw_hdr_dseg || mpw.total_len >= MLX5_WQE_SIZE); } - /* Multi-segment packets must be alone in their MPW. */ - assert((segs_n == 1) || (mpw.pkts_n == 0)); - if (unlikely(mpw.state == MLX5_MPW_STATE_OPENED)) { -#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) - length = 0; -#endif - do { - volatile struct mlx5_wqe_data_seg *dseg; - - assert(buf); - (*txq->elts)[elts_head++ & elts_m] = buf; - dseg = mpw.data.dseg[mpw.pkts_n]; - addr = rte_pktmbuf_mtod(buf, uintptr_t); - *dseg = (struct mlx5_wqe_data_seg){ - .byte_count = rte_cpu_to_be_32( - DATA_LEN(buf)), - .lkey = mlx5_tx_mb2mr(txq, buf), - .addr = rte_cpu_to_be_64(addr), - }; -#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) - length += DATA_LEN(buf); -#endif - buf = buf->next; - ++j; - ++mpw.pkts_n; - } while (--segs_n); - /* A multi-segmented packet takes one MPW session. - * TODO: Pack more multi-segmented packets if possible. - */ - mlx5_mpw_close(txq, &mpw); - if (mpw.pkts_n < 3) - max_wqe--; - else - max_wqe -= 2; - } else if (do_inline) { + if (do_inline) { /* Inline packet into WQE. */ unsigned int max; @@ -1546,12 +1444,12 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) for (n = 0; n * RTE_CACHE_LINE_SIZE < length; n++) rte_prefetch2((void *)(addr + n * RTE_CACHE_LINE_SIZE)); - naddr = rte_cpu_to_be_64(addr); + addr = rte_cpu_to_be_64(addr); *dseg = (rte_v128u32_t) { rte_cpu_to_be_32(length), mlx5_tx_mb2mr(txq, buf), - naddr, - naddr >> 32, + addr, + addr >> 32, }; mpw.data.raw = (volatile void *)(dseg + 1); mpw.total_len += (inl_pad + sizeof(*dseg)); @@ -1581,7 +1479,9 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) wqe->ctrl[3] = elts_head; txq->elts_comp = 0; txq->mpw_comp = txq->wqe_ci; - txq->cq_pi++; +#ifndef NDEBUG + ++txq->cq_pi; +#endif } else { txq->elts_comp += j; } @@ -1591,8 +1491,6 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) #endif if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) mlx5_empw_close(txq, &mpw); - else if (mpw.state == MLX5_MPW_STATE_OPENED) - mlx5_mpw_close(txq, &mpw); /* Ring QP doorbell. */ mlx5_tx_dbrec(txq, mpw.wqe); txq->elts_head = elts_head; @@ -1600,6 +1498,47 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) } /** + * DPDK callback for TX with Enhanced MPW support. + * + * @param dpdk_txq + * Generic pointer to TX queue structure. + * @param[in] pkts + * Packets to transmit. + * @param pkts_n + * Number of packets in array. + * + * @return + * Number of packets successfully transmitted (<= pkts_n). + */ +uint16_t +mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) +{ + struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; + uint16_t nb_tx = 0; + + while (pkts_n > nb_tx) { + uint16_t n; + uint16_t ret; + + n = txq_count_contig_multi_seg(&pkts[nb_tx], pkts_n - nb_tx); + if (n) { + ret = mlx5_tx_burst(dpdk_txq, &pkts[nb_tx], n); + if (!ret) + break; + nb_tx += ret; + } + n = txq_count_contig_single_seg(&pkts[nb_tx], pkts_n - nb_tx); + if (n) { + ret = txq_burst_empw(txq, &pkts[nb_tx], n); + if (!ret) + break; + nb_tx += ret; + } + } + return nb_tx; +} + +/** * Translate RX completion flags to packet type. * * @param[in] cqe @@ -1702,6 +1641,7 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, return 0; ++rxq->cq_ci; op_own = cqe->op_own; + rte_cio_rmb(); if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { volatile struct mlx5_mini_cqe8 (*mc)[8] = (volatile struct mlx5_mini_cqe8 (*)[8]) @@ -1931,9 +1871,9 @@ skip: return 0; /* Update the consumer index. */ rxq->rq_ci = rq_ci >> sges_n; - rte_io_wmb(); + rte_cio_wmb(); *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); - rte_io_wmb(); + rte_cio_wmb(); *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment packets counter. */ @@ -2027,16 +1967,18 @@ mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) } int __attribute__((weak)) -priv_check_raw_vec_tx_support(struct priv *priv) +priv_check_raw_vec_tx_support(struct priv *priv, struct rte_eth_dev *dev) { (void)priv; + (void)dev; return -ENOTSUP; } int __attribute__((weak)) -priv_check_vec_tx_support(struct priv *priv) +priv_check_vec_tx_support(struct priv *priv, struct rte_eth_dev *dev) { (void)priv; + (void)dev; return -ENOTSUP; } diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index d34f3cc0..d7e89055 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ #ifndef RTE_PMD_MLX5_RXTX_H_ @@ -114,8 +86,7 @@ struct mlx5_rxq_data { unsigned int elts_n:4; /* Log 2 of Mbufs. */ unsigned int rss_hash:1; /* RSS hash result is enabled. */ unsigned int mark:1; /* Marked flow available on the queue. */ - unsigned int pending_err:1; /* CQE error needs to be handled. */ - unsigned int :14; /* Remaining bits. */ + unsigned int :15; /* Remaining bits. */ volatile uint32_t *rq_db; volatile uint32_t *cq_db; uint16_t port_id; @@ -185,13 +156,14 @@ struct mlx5_txq_data { uint16_t elts_comp; /* Counter since last completion request. */ uint16_t mpw_comp; /* WQ index since last completion request. */ uint16_t cq_ci; /* Consumer index for completion queue. */ +#ifndef NDEBUG uint16_t cq_pi; /* Producer index for completion queue. */ +#endif uint16_t wqe_ci; /* Consumer index for work queue. */ uint16_t wqe_pi; /* Producer index for work queue. */ uint16_t elts_n:4; /* (*elts)[] length (in log2). */ uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */ - uint16_t inline_en:1; /* When set inline is enabled. */ uint16_t tso_en:1; /* When set hardware TSO is enabled. */ uint16_t tunnel_en:1; /* When set TX offload for tunneled packets are supported. */ @@ -200,12 +172,12 @@ struct mlx5_txq_data { uint16_t inline_max_packet_sz; /* Max packet size for inlining. */ uint16_t mr_cache_idx; /* Index of last hit entry. */ uint32_t qp_num_8s; /* QP number shifted by 8. */ - uint32_t flags; /* Flags for Tx Queue. */ + uint64_t offloads; /* Offloads for Tx Queue. */ volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */ volatile void *wqes; /* Work queue (use volatile to write into). */ volatile uint32_t *qp_db; /* Work queue doorbell. */ volatile uint32_t *cq_db; /* Completion queue doorbell. */ - volatile void *bf_reg; /* Blueflame register. */ + volatile void *bf_reg; /* Blueflame register remapped. */ struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */ struct rte_mbuf *(*elts)[]; /* TX elements. */ struct mlx5_txq_stats stats; /* TX queue counters. */ @@ -230,6 +202,7 @@ struct mlx5_txq_ctrl { struct mlx5_txq_ibv *ibv; /* Verbs queue object. */ struct mlx5_txq_data txq; /* Data path structure. */ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ + volatile void *bf_reg_orig; /* Blueflame register from verbs. */ }; /* mlx5_rxq.c */ @@ -252,6 +225,7 @@ int mlx5_priv_rxq_ibv_releasable(struct priv *, struct mlx5_rxq_ibv *); int mlx5_priv_rxq_ibv_verify(struct priv *); struct mlx5_rxq_ctrl *mlx5_priv_rxq_new(struct priv *, uint16_t, uint16_t, unsigned int, + const struct rte_eth_rxconf *, struct rte_mempool *); struct mlx5_rxq_ctrl *mlx5_priv_rxq_get(struct priv *, uint16_t); int mlx5_priv_rxq_release(struct priv *, uint16_t); @@ -272,6 +246,8 @@ struct mlx5_hrxq *mlx5_priv_hrxq_get(struct priv *, uint8_t *, uint8_t, uint64_t, uint16_t [], uint16_t); int mlx5_priv_hrxq_release(struct priv *, struct mlx5_hrxq *); int mlx5_priv_hrxq_ibv_verify(struct priv *); +uint64_t mlx5_priv_get_rx_port_offloads(struct priv *); +uint64_t mlx5_priv_get_rx_queue_offloads(struct priv *); /* mlx5_txq.c */ @@ -292,6 +268,7 @@ int mlx5_priv_txq_release(struct priv *, uint16_t); int mlx5_priv_txq_releasable(struct priv *, uint16_t); int mlx5_priv_txq_verify(struct priv *); void txq_alloc_elts(struct mlx5_txq_ctrl *); +uint64_t mlx5_priv_get_tx_port_offloads(struct priv *); /* mlx5_rxtx.c */ @@ -309,8 +286,8 @@ int mlx5_rx_descriptor_status(void *, uint16_t); int mlx5_tx_descriptor_status(void *, uint16_t); /* Vectorized version of mlx5_rxtx.c */ -int priv_check_raw_vec_tx_support(struct priv *); -int priv_check_vec_tx_support(struct priv *); +int priv_check_raw_vec_tx_support(struct priv *, struct rte_eth_dev *); +int priv_check_vec_tx_support(struct priv *, struct rte_eth_dev *); int rxq_check_vec_support(struct mlx5_rxq_data *); int priv_check_vec_rx_support(struct priv *); uint16_t mlx5_tx_burst_raw_vec(void *, struct rte_mbuf **, uint16_t); @@ -548,23 +525,21 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb) struct mlx5_mr *mr; assert(i < RTE_DIM(txq->mp2mr)); - if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end >= addr)) + if (likely(txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end > addr)) return txq->mp2mr[i]->lkey; for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) { - if (unlikely(txq->mp2mr[i]->mr == NULL)) { + if (unlikely(txq->mp2mr[i] == NULL || + txq->mp2mr[i]->mr == NULL)) { /* Unknown MP, add a new MR for it. */ break; } if (txq->mp2mr[i]->start <= addr && - txq->mp2mr[i]->end >= addr) { + txq->mp2mr[i]->end > addr) { assert(txq->mp2mr[i]->lkey != (uint32_t)-1); - assert(rte_cpu_to_be_32(txq->mp2mr[i]->mr->lkey) == - txq->mp2mr[i]->lkey); txq->mr_cache_idx = i; return txq->mp2mr[i]->lkey; } } - txq->mr_cache_idx = 0; mr = mlx5_txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i); /* * Request the reference to use in this queue, the original one is @@ -572,7 +547,13 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb) */ if (mr) { rte_atomic32_inc(&mr->refcnt); + txq->mr_cache_idx = i >= RTE_DIM(txq->mp2mr) ? i - 1 : i; return mr->lkey; + } else { + struct rte_mempool *mp = mlx5_tx_mb2mp(mb); + + WARN("Failed to register mempool 0x%p(%s)", + (void *)mp, mp->name); } return (uint32_t)-1; } @@ -594,7 +575,7 @@ mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe, uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg); volatile uint64_t *src = ((volatile uint64_t *)wqe); - rte_io_wmb(); + rte_cio_wmb(); *txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci); /* Ensure ordering between DB record and BF copy. */ rte_wmb(); @@ -617,4 +598,89 @@ mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) mlx5_tx_dbrec_cond_wmb(txq, wqe, 1); } +/** + * Convert the Checksum offloads to Verbs. + * + * @param txq_data + * Pointer to the Tx queue. + * @param buf + * Pointer to the mbuf. + * + * @return + * the converted cs_flags. + */ +static __rte_always_inline uint8_t +txq_ol_cksum_to_cs(struct mlx5_txq_data *txq_data, struct rte_mbuf *buf) +{ + uint8_t cs_flags = 0; + + /* Should we enable HW CKSUM offload */ + if (buf->ol_flags & + (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | + PKT_TX_OUTER_IP_CKSUM)) { + if (txq_data->tunnel_en && + (buf->ol_flags & + (PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN))) { + cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM | + MLX5_ETH_WQE_L4_INNER_CSUM; + if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM) + cs_flags |= MLX5_ETH_WQE_L3_CSUM; + } else { + cs_flags = MLX5_ETH_WQE_L3_CSUM | + MLX5_ETH_WQE_L4_CSUM; + } + } + return cs_flags; +} + +/** + * Count the number of contiguous single segment packets. + * + * @param pkts + * Pointer to array of packets. + * @param pkts_n + * Number of packets. + * + * @return + * Number of contiguous single segment packets. + */ +static __rte_always_inline unsigned int +txq_count_contig_single_seg(struct rte_mbuf **pkts, uint16_t pkts_n) +{ + unsigned int pos; + + if (!pkts_n) + return 0; + /* Count the number of contiguous single segment packets. */ + for (pos = 0; pos < pkts_n; ++pos) + if (NB_SEGS(pkts[pos]) > 1) + break; + return pos; +} + +/** + * Count the number of contiguous multi-segment packets. + * + * @param pkts + * Pointer to array of packets. + * @param pkts_n + * Number of packets. + * + * @return + * Number of contiguous multi-segment packets. + */ +static __rte_always_inline unsigned int +txq_count_contig_multi_seg(struct rte_mbuf **pkts, uint16_t pkts_n) +{ + unsigned int pos; + + if (!pkts_n) + return 0; + /* Count the number of contiguous multi-segment packets. */ + for (pos = 0; pos < pkts_n; ++pos) + if (NB_SEGS(pkts[pos]) == 1) + break; + return pos; +} + #endif /* RTE_PMD_MLX5_RXTX_H_ */ diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c index ba6c8cef..b66c2916 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec.c +++ b/drivers/net/mlx5/mlx5_rxtx_vec.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox. */ #include <assert.h> @@ -68,31 +40,6 @@ #endif /** - * Count the number of continuous single segment packets. - * - * @param pkts - * Pointer to array of packets. - * @param pkts_n - * Number of packets. - * - * @return - * Number of continuous single segment packets. - */ -static inline unsigned int -txq_check_multiseg(struct rte_mbuf **pkts, uint16_t pkts_n) -{ - unsigned int pos; - - if (!pkts_n) - return 0; - /* Count the number of continuous single segment packets. */ - for (pos = 0; pos < pkts_n; ++pos) - if (NB_SEGS(pkts[pos]) > 1) - break; - return pos; -} - -/** * Count the number of packets having same ol_flags and calculate cs_flags. * * @param txq @@ -123,24 +70,7 @@ txq_calc_offload(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, for (pos = 1; pos < pkts_n; ++pos) if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask) break; - /* Should open another MPW session for the rest. */ - if (pkts[0]->ol_flags & - (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - const uint64_t is_tunneled = - pkts[0]->ol_flags & - (PKT_TX_TUNNEL_GRE | - PKT_TX_TUNNEL_VXLAN); - - if (is_tunneled && txq->tunnel_en) { - *cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM | - MLX5_ETH_WQE_L4_INNER_CSUM; - if (pkts[0]->ol_flags & PKT_TX_OUTER_IP_CKSUM) - *cs_flags |= MLX5_ETH_WQE_L3_CSUM; - } else { - *cs_flags = MLX5_ETH_WQE_L3_CSUM | - MLX5_ETH_WQE_L4_CSUM; - } - } + *cs_flags = txq_ol_cksum_to_cs(txq, pkts[0]); return pos; } @@ -202,15 +132,15 @@ mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) uint16_t ret; /* Transmit multi-seg packets in the head of pkts list. */ - if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) && + if ((txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) && NB_SEGS(pkts[nb_tx]) > 1) nb_tx += txq_scatter_v(txq, &pkts[nb_tx], pkts_n - nb_tx); n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST); - if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS)) - n = txq_check_multiseg(&pkts[nb_tx], n); - if (!(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS)) + if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) + n = txq_count_contig_single_seg(&pkts[nb_tx], n); + if (txq->offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP) n = txq_calc_offload(txq, &pkts[nb_tx], n, &cs_flags); ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags); nb_tx += ret; @@ -261,7 +191,6 @@ rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, rxq->stats.ipackets -= (pkts_n - n); rxq->stats.ibytes -= err_bytes; #endif - rxq->pending_err = 0; return n; } @@ -283,9 +212,10 @@ mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct mlx5_rxq_data *rxq = dpdk_rxq; uint16_t nb_rx; + uint64_t err = 0; - nb_rx = rxq_burst_v(rxq, pkts, pkts_n); - if (unlikely(rxq->pending_err)) + nb_rx = rxq_burst_v(rxq, pkts, pkts_n, &err); + if (unlikely(err)) nb_rx = rxq_handle_pending_error(rxq, pkts, nb_rx); return nb_rx; } @@ -295,24 +225,20 @@ mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) * * @param priv * Pointer to private structure. + * @param dev + * Pointer to rte_eth_dev structure. * * @return * 1 if supported, negative errno value if not. */ int __attribute__((cold)) -priv_check_raw_vec_tx_support(struct priv *priv) +priv_check_raw_vec_tx_support(__rte_unused struct priv *priv, + struct rte_eth_dev *dev) { - uint16_t i; - - /* All the configured queues should support. */ - for (i = 0; i < priv->txqs_n; ++i) { - struct mlx5_txq_data *txq = (*priv->txqs)[i]; + uint64_t offloads = dev->data->dev_conf.txmode.offloads; - if (!(txq->flags & ETH_TXQ_FLAGS_NOMULTSEGS) || - !(txq->flags & ETH_TXQ_FLAGS_NOOFFLOADS)) - break; - } - if (i != priv->txqs_n) + /* Doesn't support any offload. */ + if (offloads) return -ENOTSUP; return 1; } @@ -322,17 +248,21 @@ priv_check_raw_vec_tx_support(struct priv *priv) * * @param priv * Pointer to private structure. + * @param dev + * Pointer to rte_eth_dev structure. * * @return * 1 if supported, negative errno value if not. */ int __attribute__((cold)) -priv_check_vec_tx_support(struct priv *priv) +priv_check_vec_tx_support(struct priv *priv, struct rte_eth_dev *dev) { - if (!priv->tx_vec_en || + uint64_t offloads = dev->data->dev_conf.txmode.offloads; + + if (!priv->config.tx_vec_en || priv->txqs_n > MLX5_VPMD_MIN_TXQS || - priv->mps != MLX5_MPW_ENHANCED || - priv->tso) + priv->config.mps != MLX5_MPW_ENHANCED || + offloads & ~MLX5_VEC_TX_OFFLOAD_CAP) return -ENOTSUP; return 1; } @@ -352,7 +282,7 @@ rxq_check_vec_support(struct mlx5_rxq_data *rxq) struct mlx5_rxq_ctrl *ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); - if (!ctrl->priv->rx_vec_en || rxq->sges_n != 0) + if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0) return -ENOTSUP; return 1; } @@ -371,7 +301,7 @@ priv_check_vec_rx_support(struct priv *priv) { uint16_t i; - if (!priv->rx_vec_en) + if (!priv->config.rx_vec_en) return -ENOTSUP; /* All the configured queues should support. */ for (i = 0; i < priv->rxqs_n; ++i) { diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.h b/drivers/net/mlx5/mlx5_rxtx_vec.h index 1f08ed0b..44856bbf 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox. */ #ifndef RTE_PMD_MLX5_RXTX_VEC_H_ @@ -40,6 +12,18 @@ #include "mlx5_autoconf.h" #include "mlx5_prm.h" +/* HW checksum offload capabilities of vectorized Tx. */ +#define MLX5_VEC_TX_CKSUM_OFFLOAD_CAP \ + (DEV_TX_OFFLOAD_IPV4_CKSUM | \ + DEV_TX_OFFLOAD_UDP_CKSUM | \ + DEV_TX_OFFLOAD_TCP_CKSUM | \ + DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) + +/* HW offload capabilities of vectorized Tx. */ +#define MLX5_VEC_TX_OFFLOAD_CAP \ + (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP | \ + DEV_TX_OFFLOAD_MULTI_SEGS) + /* * Compile time sanity check for vectorized functions. */ @@ -123,7 +107,7 @@ mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq, uint16_t n) elts_idx = rxq->rq_ci & q_mask; for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf; - rte_io_wmb(); + rte_cio_wmb(); *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); } diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h index c721d80e..bbe1818e 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox. */ #ifndef RTE_PMD_MLX5_RXTX_VEC_NEON_H_ @@ -135,6 +107,8 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, assert(elts_n > pkts_n); mlx5_tx_complete(txq); + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); if (unlikely(!pkts_n)) return 0; for (n = 0; n < pkts_n; ++n) { @@ -149,7 +123,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, 11, 10, 9, 8, /* bswap32 */ 12, 13, 14, 15 }; - uint8_t cs_flags = 0; + uint8_t cs_flags; uint16_t max_elts; uint16_t max_wqe; uint8x16_t *t_wqe; @@ -168,22 +142,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, break; wqe = &((volatile struct mlx5_wqe64 *) txq->wqes)[wqe_ci & wq_mask].hdr; - if (buf->ol_flags & - (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - const uint64_t is_tunneled = - buf->ol_flags & (PKT_TX_TUNNEL_GRE | - PKT_TX_TUNNEL_VXLAN); - - if (is_tunneled && txq->tunnel_en) { - cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM | - MLX5_ETH_WQE_L4_INNER_CSUM; - if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM) - cs_flags |= MLX5_ETH_WQE_L3_CSUM; - } else { - cs_flags = MLX5_ETH_WQE_L3_CSUM | - MLX5_ETH_WQE_L4_CSUM; - } - } + cs_flags = txq_ol_cksum_to_cs(txq, buf); /* Title WQEBB pointer. */ t_wqe = (uint8x16_t *)wqe; dseg = (uint8_t *)(wqe + 1); @@ -220,7 +179,9 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, wqe->ctrl[2] = rte_cpu_to_be_32(8); wqe->ctrl[3] = txq->elts_head; txq->elts_comp = 0; +#ifndef NDEBUG ++txq->cq_pi; +#endif } #ifdef MLX5_PMD_SOFT_COUNTERS txq->stats.opackets += n; @@ -233,7 +194,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, * Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet, * it returns to make it processed by txq_scatter_v(). All the packets in * the pkts list should be single segment packets having same offload flags. - * This must be checked by txq_check_multiseg() and txq_calc_offload(). + * This must be checked by txq_count_contig_single_seg() and txq_calc_offload(). * * @param txq * Pointer to TX queue structure. @@ -284,6 +245,8 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, assert(elts_n > pkts_n); mlx5_tx_complete(txq); max_elts = (elts_n - (elts_head - txq->elts_tail)); + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts); if (unlikely(!pkts_n)) @@ -321,7 +284,9 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, } else { /* Request a completion. */ txq->elts_comp = 0; +#ifndef NDEBUG ++txq->cq_pi; +#endif comp_req = 8; } /* Fill CTRL in the header. */ @@ -590,11 +555,15 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, if (rxq->mark) { const uint32x4_t ft_def = vdupq_n_u32(MLX5_FLOW_MARK_DEFAULT); const uint32x4_t fdir_flags = vdupq_n_u32(PKT_RX_FDIR); - const uint32x4_t fdir_id_flags = vdupq_n_u32(PKT_RX_FDIR_ID); + uint32x4_t fdir_id_flags = vdupq_n_u32(PKT_RX_FDIR_ID); + uint32x4_t invalid_mask; /* Check if flow tag is non-zero then set PKT_RX_FDIR. */ - ol_flags = vorrq_u32(ol_flags, vbicq_u32(fdir_flags, - vceqzq_u32(flow_tag))); + invalid_mask = vceqzq_u32(flow_tag); + ol_flags = vorrq_u32(ol_flags, + vbicq_u32(fdir_flags, invalid_mask)); + /* Mask out invalid entries. */ + fdir_id_flags = vbicq_u32(fdir_id_flags, invalid_mask); /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */ ol_flags = vorrq_u32(ol_flags, vbicq_u32(fdir_id_flags, @@ -665,12 +634,16 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, * Array to store received packets. * @param pkts_n * Maximum number of packets in array. + * @param[out] err + * Pointer to a flag. Set non-zero value if pkts array has at least one error + * packet to handle. * * @return * Number of packets received including errors (<= pkts_n). */ static inline uint16_t -rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) +rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, + uint64_t *err) { const uint16_t q_n = 1 << rxq->cqe_n; const uint16_t q_mask = q_n - 1; @@ -813,6 +786,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) uint16x4_t mask; uint16x4_t byte_cnt; uint32x4_t ptype_info, flow_tag; + register uint64x2_t c0, c1, c2, c3; uint8_t *p0, *p1, *p2, *p3; uint8_t *e0 = (void *)&elts[pos]->pkt_len; uint8_t *e1 = (void *)&elts[pos + 1]->pkt_len; @@ -829,6 +803,16 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) p1 = p0 + (pkts_n - pos > 1) * sizeof(struct mlx5_cqe); p2 = p1 + (pkts_n - pos > 2) * sizeof(struct mlx5_cqe); p3 = p2 + (pkts_n - pos > 3) * sizeof(struct mlx5_cqe); + /* B.0 (CQE 3) load a block having op_own. */ + c3 = vld1q_u64((uint64_t *)(p3 + 48)); + /* B.0 (CQE 2) load a block having op_own. */ + c2 = vld1q_u64((uint64_t *)(p2 + 48)); + /* B.0 (CQE 1) load a block having op_own. */ + c1 = vld1q_u64((uint64_t *)(p1 + 48)); + /* B.0 (CQE 0) load a block having op_own. */ + c0 = vld1q_u64((uint64_t *)(p0 + 48)); + /* Synchronize for loading the rest of blocks. */ + rte_cio_rmb(); /* Prefetch next 4 CQEs. */ if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) { unsigned int next = pos + MLX5_VPMD_DESCS_PER_LOOP; @@ -838,50 +822,46 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) rte_prefetch_non_temporal(&cq[next + 3]); } __asm__ volatile ( - /* B.1 (CQE 3) load a block having op_own. */ - "ld1 {v19.16b}, [%[p3]] \n\t" - "sub %[p3], %[p3], #48 \n\t" - /* B.2 (CQE 3) load the rest blocks. */ + /* B.1 (CQE 3) load the rest of blocks. */ "ld1 {v16.16b - v18.16b}, [%[p3]] \n\t" + /* B.2 (CQE 3) move the block having op_own. */ + "mov v19.16b, %[c3].16b \n\t" /* B.3 (CQE 3) extract 16B fields. */ "tbl v23.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t" + /* B.1 (CQE 2) load the rest of blocks. */ + "ld1 {v16.16b - v18.16b}, [%[p2]] \n\t" /* B.4 (CQE 3) adjust CRC length. */ "sub v23.8h, v23.8h, %[crc_adj].8h \n\t" - /* B.1 (CQE 2) load a block having op_own. */ - "ld1 {v19.16b}, [%[p2]] \n\t" - "sub %[p2], %[p2], #48 \n\t" /* C.1 (CQE 3) generate final structure for mbuf. */ "tbl v15.16b, {v23.16b}, %[mb_shuf_m].16b \n\t" - /* B.2 (CQE 2) load the rest blocks. */ - "ld1 {v16.16b - v18.16b}, [%[p2]] \n\t" + /* B.2 (CQE 2) move the block having op_own. */ + "mov v19.16b, %[c2].16b \n\t" /* B.3 (CQE 2) extract 16B fields. */ "tbl v22.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t" + /* B.1 (CQE 1) load the rest of blocks. */ + "ld1 {v16.16b - v18.16b}, [%[p1]] \n\t" /* B.4 (CQE 2) adjust CRC length. */ "sub v22.8h, v22.8h, %[crc_adj].8h \n\t" - /* B.1 (CQE 1) load a block having op_own. */ - "ld1 {v19.16b}, [%[p1]] \n\t" - "sub %[p1], %[p1], #48 \n\t" /* C.1 (CQE 2) generate final structure for mbuf. */ "tbl v14.16b, {v22.16b}, %[mb_shuf_m].16b \n\t" - /* B.2 (CQE 1) load the rest blocks. */ - "ld1 {v16.16b - v18.16b}, [%[p1]] \n\t" + /* B.2 (CQE 1) move the block having op_own. */ + "mov v19.16b, %[c1].16b \n\t" /* B.3 (CQE 1) extract 16B fields. */ "tbl v21.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t" + /* B.1 (CQE 0) load the rest of blocks. */ + "ld1 {v16.16b - v18.16b}, [%[p0]] \n\t" /* B.4 (CQE 1) adjust CRC length. */ "sub v21.8h, v21.8h, %[crc_adj].8h \n\t" - /* B.1 (CQE 0) load a block having op_own. */ - "ld1 {v19.16b}, [%[p0]] \n\t" - "sub %[p0], %[p0], #48 \n\t" /* C.1 (CQE 1) generate final structure for mbuf. */ "tbl v13.16b, {v21.16b}, %[mb_shuf_m].16b \n\t" - /* B.2 (CQE 0) load the rest blocks. */ - "ld1 {v16.16b - v18.16b}, [%[p0]] \n\t" + /* B.2 (CQE 0) move the block having op_own. */ + "mov v19.16b, %[c0].16b \n\t" + /* A.1 load mbuf pointers. */ + "ld1 {v24.2d - v25.2d}, [%[elts_p]] \n\t" /* B.3 (CQE 0) extract 16B fields. */ "tbl v20.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t" /* B.4 (CQE 0) adjust CRC length. */ "sub v20.8h, v20.8h, %[crc_adj].8h \n\t" - /* A.1 load mbuf pointers. */ - "ld1 {v24.2d - v25.2d}, [%[elts_p]] \n\t" /* D.1 extract op_own byte. */ "tbl %[op_own].8b, {v20.16b - v23.16b}, %[owner_shuf_m].8b \n\t" /* C.2 (CQE 3) adjust flow mark. */ @@ -916,9 +896,9 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) [byte_cnt]"=&w"(byte_cnt), [ptype_info]"=&w"(ptype_info), [flow_tag]"=&w"(flow_tag) - :[p3]"r"(p3 + 48), [p2]"r"(p2 + 48), - [p1]"r"(p1 + 48), [p0]"r"(p0 + 48), + :[p3]"r"(p3), [p2]"r"(p2), [p1]"r"(p1), [p0]"r"(p0), [e3]"r"(e3), [e2]"r"(e2), [e1]"r"(e1), [e0]"r"(e0), + [c3]"w"(c3), [c2]"w"(c2), [c1]"w"(c1), [c0]"w"(c0), [elts_p]"r"(elts_p), [pkts_p]"r"(pkts_p), [cqe_shuf_m]"w"(cqe_shuf_m), @@ -970,8 +950,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) opcode = vceq_u16(resp_err_check, opcode); opcode = vbic_u16(opcode, invalid_mask); /* D.4 mark if any error is set */ - rxq->pending_err |= - !!vget_lane_u64(vreinterpret_u64_u16(opcode), 0); + *err |= vget_lane_u64(vreinterpret_u64_u16(opcode), 0); /* C.4 fill in mbuf - rearm_data and packet_type. */ rxq_cq_to_ptype_oflags_v(rxq, ptype_info, flow_tag, opcode, &elts[pos]); diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h index 2b9f1601..c088bcb5 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox. */ #ifndef RTE_PMD_MLX5_RXTX_VEC_SSE_H_ @@ -135,6 +107,8 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, assert(elts_n > pkts_n); mlx5_tx_complete(txq); + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); if (unlikely(!pkts_n)) return 0; for (n = 0; n < pkts_n; ++n) { @@ -148,7 +122,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, 8, 9, 10, 11, /* bswap32 */ 4, 5, 6, 7, /* bswap32 */ 0, 1, 2, 3 /* bswap32 */); - uint8_t cs_flags = 0; + uint8_t cs_flags; uint16_t max_elts; uint16_t max_wqe; __m128i *t_wqe, *dseg; @@ -170,22 +144,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, } wqe = &((volatile struct mlx5_wqe64 *) txq->wqes)[wqe_ci & wq_mask].hdr; - if (buf->ol_flags & - (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - const uint64_t is_tunneled = - buf->ol_flags & (PKT_TX_TUNNEL_GRE | - PKT_TX_TUNNEL_VXLAN); - - if (is_tunneled && txq->tunnel_en) { - cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM | - MLX5_ETH_WQE_L4_INNER_CSUM; - if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM) - cs_flags |= MLX5_ETH_WQE_L3_CSUM; - } else { - cs_flags = MLX5_ETH_WQE_L3_CSUM | - MLX5_ETH_WQE_L4_CSUM; - } - } + cs_flags = txq_ol_cksum_to_cs(txq, buf); /* Title WQEBB pointer. */ t_wqe = (__m128i *)wqe; dseg = (__m128i *)(wqe + 1); @@ -221,7 +180,9 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, wqe->ctrl[2] = rte_cpu_to_be_32(8); wqe->ctrl[3] = txq->elts_head; txq->elts_comp = 0; +#ifndef NDEBUG ++txq->cq_pi; +#endif } #ifdef MLX5_PMD_SOFT_COUNTERS txq->stats.opackets += n; @@ -234,7 +195,7 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, * Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet, * it returns to make it processed by txq_scatter_v(). All the packets in * the pkts list should be single segment packets having same offload flags. - * This must be checked by txq_check_multiseg() and txq_calc_offload(). + * This must be checked by txq_count_contig_single_seg() and txq_calc_offload(). * * @param txq * Pointer to TX queue structure. @@ -283,6 +244,8 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, assert(elts_n > pkts_n); mlx5_tx_complete(txq); max_elts = (elts_n - (elts_head - txq->elts_tail)); + /* A CQE slot must always be available. */ + assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts); assert(pkts_n <= MLX5_DSEG_MAX - nb_dword_in_hdr); @@ -322,7 +285,9 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, } else { /* Request a completion. */ txq->elts_comp = 0; +#ifndef NDEBUG ++txq->cq_pi; +#endif comp_req = 8; } /* Fill CTRL in the header. */ @@ -591,7 +556,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], _mm_set_epi32(0xffffff00, 0xffffff00, 0xffffff00, 0xffffff00); const __m128i fdir_flags = _mm_set1_epi32(PKT_RX_FDIR); - const __m128i fdir_id_flags = _mm_set1_epi32(PKT_RX_FDIR_ID); + __m128i fdir_id_flags = _mm_set1_epi32(PKT_RX_FDIR_ID); __m128i flow_tag, invalid_mask; flow_tag = _mm_and_si128(pinfo, pinfo_ft_mask); @@ -601,7 +566,7 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], _mm_andnot_si128(invalid_mask, fdir_flags)); /* Mask out invalid entries. */ - flow_tag = _mm_andnot_si128(invalid_mask, flow_tag); + fdir_id_flags = _mm_andnot_si128(invalid_mask, fdir_id_flags); /* Check if flow tag MLX5_FLOW_MARK_DEFAULT. */ ol_flags = _mm_or_si128(ol_flags, _mm_andnot_si128( @@ -669,12 +634,16 @@ rxq_cq_to_ptype_oflags_v(struct mlx5_rxq_data *rxq, __m128i cqes[4], * Array to store received packets. * @param pkts_n * Maximum number of packets in array. + * @param[out] err + * Pointer to a flag. Set non-zero value if pkts array has at least one error + * packet to handle. * * @return * Number of packets received including errors (<= pkts_n). */ static inline uint16_t -rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) +rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, + uint64_t *err) { const uint16_t q_n = 1 << rxq->cqe_n; const uint16_t q_mask = q_n - 1; @@ -836,7 +805,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) /* B.2 copy mbuf pointers. */ _mm_storeu_si128((__m128i *)&pkts[pos], mbp1); _mm_storeu_si128((__m128i *)&pkts[pos + 2], mbp2); - rte_compiler_barrier(); + rte_cio_rmb(); /* C.1 load remained CQE data and extract necessary fields. */ cqe_tmp2 = _mm_load_si128((__m128i *)&cq[pos + p3]); cqe_tmp1 = _mm_load_si128((__m128i *)&cq[pos + p2]); @@ -936,7 +905,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n) opcode = _mm_packs_epi32(opcode, zero); opcode = _mm_andnot_si128(invalid_mask, opcode); /* D.4 mark if any error is set */ - rxq->pending_err |= !!_mm_cvtsi128_si64(opcode); + *err |= _mm_cvtsi128_si64(opcode); /* D.5 fill in mbuf - rearm_data and packet_type. */ rxq_cq_to_ptype_oflags_v(rxq, cqes, opcode, &pkts[pos]); if (rxq->hw_timestamp) { diff --git a/drivers/net/mlx5/mlx5_socket.c b/drivers/net/mlx5/mlx5_socket.c index 5cd1ab80..61c1a4a5 100644 --- a/drivers/net/mlx5/mlx5_socket.c +++ b/drivers/net/mlx5/mlx5_socket.c @@ -1,34 +1,7 @@ -/*- - * BSD LICENSE - * - * Copyright 2016 6WIND S.A. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2016 6WIND S.A. */ + #define _GNU_SOURCE #include <sys/types.h> diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c index 5e225d37..378472a7 100644 --- a/drivers/net/mlx5/mlx5_stats.c +++ b/drivers/net/mlx5/mlx5_stats.c @@ -1,40 +1,12 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ #include <linux/sockios.h> #include <linux/ethtool.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_common.h> #include <rte_malloc.h> @@ -122,6 +94,22 @@ static const struct mlx5_counter_ctrl mlx5_counters_init[] = { .dpdk_name = "rx_out_of_buffer", .ctr_name = "out_of_buffer", }, + { + .dpdk_name = "tx_packets_phy", + .ctr_name = "tx_packets_phy", + }, + { + .dpdk_name = "rx_packets_phy", + .ctr_name = "rx_packets_phy", + }, + { + .dpdk_name = "tx_bytes_phy", + .ctr_name = "tx_bytes_phy", + }, + { + .dpdk_name = "rx_bytes_phy", + .ctr_name = "rx_bytes_phy", + }, }; static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init); @@ -143,11 +131,9 @@ priv_read_dev_counters(struct priv *priv, uint64_t *stats) struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; unsigned int i; struct ifreq ifr; - unsigned int stats_sz = (xstats_ctrl->stats_n * sizeof(uint64_t)) + - sizeof(struct ethtool_stats); - struct ethtool_stats et_stats[(stats_sz + ( - sizeof(struct ethtool_stats) - 1)) / - sizeof(struct ethtool_stats)]; + unsigned int stats_sz = xstats_ctrl->stats_n * sizeof(uint64_t); + unsigned char et_stat_buf[sizeof(struct ethtool_stats) + stats_sz]; + struct ethtool_stats *et_stats = (struct ethtool_stats *)et_stat_buf; et_stats->cmd = ETHTOOL_GSTATS; et_stats->n_stats = xstats_ctrl->stats_n; @@ -321,7 +307,7 @@ priv_xstats_reset(struct priv *priv) int mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; struct rte_eth_stats tmp = {0}; unsigned int i; unsigned int idx; @@ -427,7 +413,7 @@ int mlx5_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *stats, unsigned int n) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; int ret = xstats_n; if (n >= xstats_n && stats) { @@ -457,7 +443,7 @@ mlx5_xstats_get(struct rte_eth_dev *dev, void mlx5_xstats_reset(struct rte_eth_dev *dev) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; int stats_n; @@ -489,7 +475,7 @@ int mlx5_xstats_get_names(struct rte_eth_dev *dev, struct rte_eth_xstat_name *xstats_names, unsigned int n) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; unsigned int i; if (n >= xstats_n && xstats_names) { diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c index 5de2d026..f5711a99 100644 --- a/drivers/net/mlx5/mlx5_trigger.c +++ b/drivers/net/mlx5/mlx5_trigger.c @@ -1,39 +1,12 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ + #include <unistd.h> #include <rte_ether.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_interrupts.h> #include <rte_alarm.h> @@ -64,8 +37,11 @@ priv_txq_start(struct priv *priv) if (!txq_ctrl) continue; - LIST_FOREACH(mr, &priv->mr, next) + LIST_FOREACH(mr, &priv->mr, next) { priv_txq_mp2mr_reg(priv, &txq_ctrl->txq, mr->mp, idx++); + if (idx == MLX5_PMD_TX_MP_CACHE) + break; + } txq_alloc_elts(txq_ctrl); txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, i); if (!txq_ctrl->ibv) { @@ -73,10 +49,13 @@ priv_txq_start(struct priv *priv) goto error; } } - return -ret; + ret = priv_tx_uar_remap(priv, priv->ctx->cmd_fd); + if (ret) + goto error; + return ret; error: priv_txq_stop(priv); - return -ret; + return ret; } static void @@ -132,9 +111,6 @@ mlx5_dev_start(struct rte_eth_dev *dev) struct mlx5_mr *mr = NULL; int err; - if (mlx5_is_secondary()) - return -E_RTE_SECONDARY; - dev->data->dev_started = 1; priv_lock(priv); err = priv_flow_create_drop_queue(priv); @@ -151,38 +127,29 @@ mlx5_dev_start(struct rte_eth_dev *dev) (void *)dev, strerror(err)); goto error; } - /* Update send callback. */ - priv_dev_select_tx_function(priv, dev); err = priv_rxq_start(priv); if (err) { ERROR("%p: RXQ allocation failed: %s", (void *)dev, strerror(err)); goto error; } - /* Update receive callback. */ - priv_dev_select_rx_function(priv, dev); - err = priv_dev_traffic_enable(priv, dev); - if (err) { - ERROR("%p: an error occurred while configuring control flows:" - " %s", - (void *)priv, strerror(err)); - goto error; - } - err = priv_flow_start(priv, &priv->flows); - if (err) { - ERROR("%p: an error occurred while configuring flows:" - " %s", - (void *)priv, strerror(err)); - goto error; - } err = priv_rx_intr_vec_enable(priv); if (err) { ERROR("%p: RX interrupt vector creation failed", (void *)priv); goto error; } - priv_dev_interrupt_handler_install(priv, dev); priv_xstats_init(priv); + /* Update link status and Tx/Rx callbacks for the first time. */ + memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link)); + INFO("Forcing port %u link to be up", dev->data->port_id); + err = priv_force_link_status_change(priv, ETH_LINK_UP); + if (err) { + DEBUG("Failed to set port %u link to be up", + dev->data->port_id); + goto error; + } + priv_dev_interrupt_handler_install(priv, dev); priv_unlock(priv); return 0; error: @@ -196,7 +163,7 @@ error: priv_rxq_stop(priv); priv_flow_delete_drop_queue(priv); priv_unlock(priv); - return -err; + return err; } /** @@ -213,9 +180,6 @@ mlx5_dev_stop(struct rte_eth_dev *dev) struct priv *priv = dev->data->dev_private; struct mlx5_mr *mr; - if (mlx5_is_secondary()) - return; - priv_lock(priv); dev->data->dev_started = 0; /* Prevent crashes when queues are still in use. */ diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index 9c5860ff..ed1c713e 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ #include <stddef.h> @@ -51,7 +23,7 @@ #include <rte_mbuf.h> #include <rte_malloc.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_common.h> #include "mlx5_utils.h" @@ -59,6 +31,7 @@ #include "mlx5.h" #include "mlx5_rxtx.h" #include "mlx5_autoconf.h" +#include "mlx5_glue.h" /** * Allocate TX queue elements. @@ -116,6 +89,63 @@ txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl) } /** + * Returns the per-port supported offloads. + * + * @param priv + * Pointer to private structure. + * + * @return + * Supported Tx offloads. + */ +uint64_t +mlx5_priv_get_tx_port_offloads(struct priv *priv) +{ + uint64_t offloads = (DEV_TX_OFFLOAD_MULTI_SEGS | + DEV_TX_OFFLOAD_VLAN_INSERT); + struct mlx5_dev_config *config = &priv->config; + + if (config->hw_csum) + offloads |= (DEV_TX_OFFLOAD_IPV4_CKSUM | + DEV_TX_OFFLOAD_UDP_CKSUM | + DEV_TX_OFFLOAD_TCP_CKSUM); + if (config->tso) + offloads |= DEV_TX_OFFLOAD_TCP_TSO; + if (config->tunnel_en) { + if (config->hw_csum) + offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; + if (config->tso) + offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO | + DEV_TX_OFFLOAD_GRE_TNL_TSO); + } + return offloads; +} + +/** + * Checks if the per-queue offload configuration is valid. + * + * @param priv + * Pointer to private structure. + * @param offloads + * Per-queue offloads configuration. + * + * @return + * 1 if the configuration is valid, 0 otherwise. + */ +static int +priv_is_tx_queue_offloads_allowed(struct priv *priv, uint64_t offloads) +{ + uint64_t port_offloads = priv->dev->data->dev_conf.txmode.offloads; + uint64_t port_supp_offloads = mlx5_priv_get_tx_port_offloads(priv); + + /* There are no Tx offloads which are per queue. */ + if ((offloads & port_supp_offloads) != offloads) + return 0; + if ((port_offloads ^ offloads) & port_supp_offloads) + return 0; + return 1; +} + +/** * DPDK callback to configure a TX queue. * * @param dev @@ -142,10 +172,21 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, container_of(txq, struct mlx5_txq_ctrl, txq); int ret = 0; - if (mlx5_is_secondary()) - return -E_RTE_SECONDARY; - priv_lock(priv); + /* + * Don't verify port offloads for application which + * use the old API. + */ + if (!!(conf->txq_flags & ETH_TXQ_FLAGS_IGNORE) && + !priv_is_tx_queue_offloads_allowed(priv, conf->offloads)) { + ret = ENOTSUP; + ERROR("%p: Tx queue offloads 0x%" PRIx64 " don't match port " + "offloads 0x%" PRIx64 " or supported offloads 0x%" PRIx64, + (void *)dev, conf->offloads, + dev->data->dev_conf.txmode.offloads, + mlx5_priv_get_tx_port_offloads(priv)); + goto out; + } if (desc <= MLX5_TX_COMP_THRESH) { WARN("%p: number of descriptors requested for TX queue %u" " must be higher than MLX5_TX_COMP_THRESH, using" @@ -203,9 +244,6 @@ mlx5_tx_queue_release(void *dpdk_txq) struct priv *priv; unsigned int i; - if (mlx5_is_secondary()) - return; - if (txq == NULL) return; txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); @@ -223,7 +261,9 @@ mlx5_tx_queue_release(void *dpdk_txq) /** - * Map locally UAR used in Tx queues for BlueFlame doorbell. + * Mmap TX UAR(HW doorbell) pages into reserved UAR address space. + * Both primary and secondary process do mmap to make UAR address + * aligned. * * @param[in] priv * Pointer to private structure. @@ -240,11 +280,14 @@ priv_tx_uar_remap(struct priv *priv, int fd) uintptr_t pages[priv->txqs_n]; unsigned int pages_n = 0; uintptr_t uar_va; + uintptr_t off; void *addr; + void *ret; struct mlx5_txq_data *txq; struct mlx5_txq_ctrl *txq_ctrl; int already_mapped; size_t page_size = sysconf(_SC_PAGESIZE); + int r; memset(pages, 0, priv->txqs_n * sizeof(uintptr_t)); /* @@ -253,10 +296,14 @@ priv_tx_uar_remap(struct priv *priv, int fd) * Ref to libmlx5 function: mlx5_init_context() */ for (i = 0; i != priv->txqs_n; ++i) { + if (!(*priv->txqs)[i]) + continue; txq = (*priv->txqs)[i]; txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); - uar_va = (uintptr_t)txq_ctrl->txq.bf_reg; - uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); + /* UAR addr form verbs used to find dup and offset in page. */ + uar_va = (uintptr_t)txq_ctrl->bf_reg_orig; + off = uar_va & (page_size - 1); /* offset in page. */ + uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */ already_mapped = 0; for (j = 0; j != pages_n; ++j) { if (pages[j] == uar_va) { @@ -264,21 +311,54 @@ priv_tx_uar_remap(struct priv *priv, int fd) break; } } - if (already_mapped) - continue; - pages[pages_n++] = uar_va; - addr = mmap((void *)uar_va, page_size, - PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, - txq_ctrl->uar_mmap_offset); - if (addr != (void *)uar_va) { - ERROR("call to mmap failed on UAR for txq %d\n", i); - return -1; + /* new address in reserved UAR address space. */ + addr = RTE_PTR_ADD(priv->uar_base, + uar_va & (MLX5_UAR_SIZE - 1)); + if (!already_mapped) { + pages[pages_n++] = uar_va; + /* fixed mmap to specified address in reserved + * address space. + */ + ret = mmap(addr, page_size, + PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, + txq_ctrl->uar_mmap_offset); + if (ret != addr) { + /* fixed mmap have to return same address */ + ERROR("call to mmap failed on UAR for txq %d\n", + i); + r = ENXIO; + return r; + } } + if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */ + txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off); + else + assert(txq_ctrl->txq.bf_reg == + RTE_PTR_ADD((void *)addr, off)); } return 0; } /** + * Check if the burst function is using eMPW. + * + * @param tx_pkt_burst + * Tx burst function pointer. + * + * @return + * 1 if the burst function is using eMPW, 0 otherwise. + */ +static int +is_empw_burst_func(eth_tx_burst_t tx_pkt_burst) +{ + if (tx_pkt_burst == mlx5_tx_burst_raw_vec || + tx_pkt_burst == mlx5_tx_burst_vec || + tx_pkt_burst == mlx5_tx_burst_empw) + return 1; + return 0; +} + +/** * Create the Tx queue Verbs object. * * @param priv @@ -308,9 +388,12 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) struct mlx5dv_cq cq_info; struct mlx5dv_obj obj; const int desc = 1 << txq_data->elts_n; + eth_tx_burst_t tx_pkt_burst = priv_select_tx_function(priv, priv->dev); int ret = 0; assert(txq_data); + priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_TX_QUEUE; + priv->verbs_alloc_ctx.obj = txq_ctrl; if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) { ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set"); goto error; @@ -322,9 +405,9 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) }; cqe_n = ((desc / MLX5_TX_COMP_THRESH) - 1) ? ((desc / MLX5_TX_COMP_THRESH) - 1) : 1; - if (priv->mps == MLX5_MPW_ENHANCED) + if (is_empw_burst_func(tx_pkt_burst)) cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV; - tmpl.cq = ibv_create_cq(priv->ctx, cqe_n, NULL, NULL, 0); + tmpl.cq = mlx5_glue->create_cq(priv->ctx, cqe_n, NULL, NULL, 0); if (tmpl.cq == NULL) { ERROR("%p: CQ creation failure", (void *)txq_ctrl); goto error; @@ -359,13 +442,13 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) .pd = priv->pd, .comp_mask = IBV_QP_INIT_ATTR_PD, }; - if (txq_data->inline_en) + if (txq_data->max_inline) attr.init.cap.max_inline_data = txq_ctrl->max_inline_data; if (txq_data->tso_en) { attr.init.max_tso_header = txq_ctrl->max_tso_header; attr.init.comp_mask |= IBV_QP_INIT_ATTR_MAX_TSO_HEADER; } - tmpl.qp = ibv_create_qp_ex(priv->ctx, &attr.init); + tmpl.qp = mlx5_glue->create_qp_ex(priv->ctx, &attr.init); if (tmpl.qp == NULL) { ERROR("%p: QP creation failure", (void *)txq_ctrl); goto error; @@ -376,7 +459,8 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) /* Primary port number. */ .port_num = priv->port }; - ret = ibv_modify_qp(tmpl.qp, &attr.mod, (IBV_QP_STATE | IBV_QP_PORT)); + ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, + (IBV_QP_STATE | IBV_QP_PORT)); if (ret) { ERROR("%p: QP state to IBV_QPS_INIT failed", (void *)txq_ctrl); goto error; @@ -384,13 +468,13 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) attr.mod = (struct ibv_qp_attr){ .qp_state = IBV_QPS_RTR }; - ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); + ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); if (ret) { ERROR("%p: QP state to IBV_QPS_RTR failed", (void *)txq_ctrl); goto error; } attr.mod.qp_state = IBV_QPS_RTS; - ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); + ret = mlx5_glue->modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); if (ret) { ERROR("%p: QP state to IBV_QPS_RTS failed", (void *)txq_ctrl); goto error; @@ -405,7 +489,7 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) obj.cq.out = &cq_info; obj.qp.in = tmpl.qp; obj.qp.out = &qp; - ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP); + ret = mlx5_glue->dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP); if (ret != 0) goto error; if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { @@ -418,13 +502,15 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) txq_data->wqes = qp.sq.buf; txq_data->wqe_n = log2above(qp.sq.wqe_cnt); txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR]; - txq_data->bf_reg = qp.bf.reg; + txq_ctrl->bf_reg_orig = qp.bf.reg; txq_data->cq_db = cq_info.dbrec; txq_data->cqes = (volatile struct mlx5_cqe (*)[]) (uintptr_t)cq_info.buf; txq_data->cq_ci = 0; +#ifndef NDEBUG txq_data->cq_pi = 0; +#endif txq_data->wqe_ci = 0; txq_data->wqe_pi = 0; txq_ibv->qp = tmpl.qp; @@ -439,12 +525,14 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv, (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt)); LIST_INSERT_HEAD(&priv->txqsibv, txq_ibv, next); + priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; return txq_ibv; error: if (tmpl.cq) - claim_zero(ibv_destroy_cq(tmpl.cq)); + claim_zero(mlx5_glue->destroy_cq(tmpl.cq)); if (tmpl.qp) - claim_zero(ibv_destroy_qp(tmpl.qp)); + claim_zero(mlx5_glue->destroy_qp(tmpl.qp)); + priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; return NULL; } @@ -497,8 +585,8 @@ mlx5_priv_txq_ibv_release(struct priv *priv, struct mlx5_txq_ibv *txq_ibv) DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv, (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt)); if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) { - claim_zero(ibv_destroy_qp(txq_ibv->qp)); - claim_zero(ibv_destroy_cq(txq_ibv->cq)); + claim_zero(mlx5_glue->destroy_qp(txq_ibv->qp)); + claim_zero(mlx5_glue->destroy_cq(txq_ibv->cq)); LIST_REMOVE(txq_ibv, next); rte_free(txq_ibv); return 0; @@ -545,84 +633,73 @@ mlx5_priv_txq_ibv_verify(struct priv *priv) } /** - * Create a DPDK Tx queue. + * Set Tx queue parameters from device configuration. * - * @param priv - * Pointer to private structure. - * @param idx - * TX queue index. - * @param desc - * Number of descriptors to configure in queue. - * @param socket - * NUMA socket on which memory must be allocated. - * @param[in] conf - * Thresholds parameters. - * - * @return - * A DPDK queue object on success. + * @param txq_ctrl + * Pointer to Tx queue control structure. */ -struct mlx5_txq_ctrl* -mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc, - unsigned int socket, - const struct rte_eth_txconf *conf) +static void +txq_set_params(struct mlx5_txq_ctrl *txq_ctrl) { + struct priv *priv = txq_ctrl->priv; + struct mlx5_dev_config *config = &priv->config; const unsigned int max_tso_inline = ((MLX5_MAX_TSO_HEADER + (RTE_CACHE_LINE_SIZE - 1)) / RTE_CACHE_LINE_SIZE); - struct mlx5_txq_ctrl *tmpl; - - tmpl = rte_calloc_socket("TXQ", 1, - sizeof(*tmpl) + - desc * sizeof(struct rte_mbuf *), - 0, socket); - if (!tmpl) - return NULL; - assert(desc > MLX5_TX_COMP_THRESH); - tmpl->txq.flags = conf->txq_flags; - tmpl->priv = priv; - tmpl->socket = socket; - tmpl->txq.elts_n = log2above(desc); - if (priv->mps == MLX5_MPW_ENHANCED) - tmpl->txq.mpw_hdr_dseg = priv->mpw_hdr_dseg; - /* MRs will be registered in mp2mr[] later. */ - DEBUG("priv->device_attr.max_qp_wr is %d", - priv->device_attr.orig_attr.max_qp_wr); - DEBUG("priv->device_attr.max_sge is %d", - priv->device_attr.orig_attr.max_sge); - if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) { + unsigned int txq_inline; + unsigned int txqs_inline; + unsigned int inline_max_packet_sz; + eth_tx_burst_t tx_pkt_burst = priv_select_tx_function(priv, priv->dev); + int is_empw_func = is_empw_burst_func(tx_pkt_burst); + int tso = !!(txq_ctrl->txq.offloads & DEV_TX_OFFLOAD_TCP_TSO); + + txq_inline = (config->txq_inline == MLX5_ARG_UNSET) ? + 0 : config->txq_inline; + txqs_inline = (config->txqs_inline == MLX5_ARG_UNSET) ? + 0 : config->txqs_inline; + inline_max_packet_sz = + (config->inline_max_packet_sz == MLX5_ARG_UNSET) ? + 0 : config->inline_max_packet_sz; + if (is_empw_func) { + if (config->txq_inline == MLX5_ARG_UNSET) + txq_inline = MLX5_WQE_SIZE_MAX - MLX5_WQE_SIZE; + if (config->txqs_inline == MLX5_ARG_UNSET) + txqs_inline = MLX5_EMPW_MIN_TXQS; + if (config->inline_max_packet_sz == MLX5_ARG_UNSET) + inline_max_packet_sz = MLX5_EMPW_MAX_INLINE_LEN; + txq_ctrl->txq.mpw_hdr_dseg = config->mpw_hdr_dseg; + txq_ctrl->txq.inline_max_packet_sz = inline_max_packet_sz; + } + if (txq_inline && priv->txqs_n >= txqs_inline) { unsigned int ds_cnt; - tmpl->txq.max_inline = - ((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) / + txq_ctrl->txq.max_inline = + ((txq_inline + (RTE_CACHE_LINE_SIZE - 1)) / RTE_CACHE_LINE_SIZE); - tmpl->txq.inline_en = 1; - /* TSO and MPS can't be enabled concurrently. */ - assert(!priv->tso || !priv->mps); - if (priv->mps == MLX5_MPW_ENHANCED) { - tmpl->txq.inline_max_packet_sz = - priv->inline_max_packet_sz; + if (is_empw_func) { /* To minimize the size of data set, avoid requesting * too large WQ. */ - tmpl->max_inline_data = - ((RTE_MIN(priv->txq_inline, - priv->inline_max_packet_sz) + + txq_ctrl->max_inline_data = + ((RTE_MIN(txq_inline, + inline_max_packet_sz) + (RTE_CACHE_LINE_SIZE - 1)) / RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE; - } else if (priv->tso) { - int inline_diff = tmpl->txq.max_inline - max_tso_inline; + } else if (tso) { + int inline_diff = txq_ctrl->txq.max_inline - + max_tso_inline; /* * Adjust inline value as Verbs aggregates * tso_inline and txq_inline fields. */ - tmpl->max_inline_data = inline_diff > 0 ? + txq_ctrl->max_inline_data = inline_diff > 0 ? inline_diff * RTE_CACHE_LINE_SIZE : 0; } else { - tmpl->max_inline_data = - tmpl->txq.max_inline * RTE_CACHE_LINE_SIZE; + txq_ctrl->max_inline_data = + txq_ctrl->txq.max_inline * RTE_CACHE_LINE_SIZE; } /* * Check if the inline size is too large in a way which @@ -632,7 +709,7 @@ mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc, * WQE ETH (1 DS) * Inline part (N DS) */ - ds_cnt = 2 + (tmpl->txq.max_inline / MLX5_WQE_DWORD_SIZE); + ds_cnt = 2 + (txq_ctrl->txq.max_inline / MLX5_WQE_DWORD_SIZE); if (ds_cnt > MLX5_DSEG_MAX) { unsigned int max_inline = (MLX5_DSEG_MAX - 2) * MLX5_WQE_DWORD_SIZE; @@ -641,18 +718,61 @@ mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc, RTE_CACHE_LINE_SIZE); WARN("txq inline is too large (%d) setting it to " "the maximum possible: %d\n", - priv->txq_inline, max_inline); - tmpl->txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE; + txq_inline, max_inline); + txq_ctrl->txq.max_inline = max_inline / + RTE_CACHE_LINE_SIZE; } } - if (priv->tso) { - tmpl->max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE; - tmpl->txq.max_inline = RTE_MAX(tmpl->txq.max_inline, - max_tso_inline); - tmpl->txq.tso_en = 1; + if (tso) { + txq_ctrl->max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE; + txq_ctrl->txq.max_inline = RTE_MAX(txq_ctrl->txq.max_inline, + max_tso_inline); + txq_ctrl->txq.tso_en = 1; } - if (priv->tunnel_en) - tmpl->txq.tunnel_en = 1; + txq_ctrl->txq.tunnel_en = config->tunnel_en; +} + +/** + * Create a DPDK Tx queue. + * + * @param priv + * Pointer to private structure. + * @param idx + * TX queue index. + * @param desc + * Number of descriptors to configure in queue. + * @param socket + * NUMA socket on which memory must be allocated. + * @param[in] conf + * Thresholds parameters. + * + * @return + * A DPDK queue object on success. + */ +struct mlx5_txq_ctrl* +mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc, + unsigned int socket, + const struct rte_eth_txconf *conf) +{ + struct mlx5_txq_ctrl *tmpl; + + tmpl = rte_calloc_socket("TXQ", 1, + sizeof(*tmpl) + + desc * sizeof(struct rte_mbuf *), + 0, socket); + if (!tmpl) + return NULL; + assert(desc > MLX5_TX_COMP_THRESH); + tmpl->txq.offloads = conf->offloads; + tmpl->priv = priv; + tmpl->socket = socket; + tmpl->txq.elts_n = log2above(desc); + txq_set_params(tmpl); + /* MRs will be registered in mp2mr[] later. */ + DEBUG("priv->device_attr.max_qp_wr is %d", + priv->device_attr.orig_attr.max_qp_wr); + DEBUG("priv->device_attr.max_sge is %d", + priv->device_attr.orig_attr.max_sge); tmpl->txq.elts = (struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1); tmpl->txq.stats.idx = idx; @@ -717,6 +837,7 @@ mlx5_priv_txq_release(struct priv *priv, uint16_t idx) { unsigned int i; struct mlx5_txq_ctrl *txq; + size_t page_size = sysconf(_SC_PAGESIZE); if (!(*priv->txqs)[idx]) return 0; @@ -736,6 +857,9 @@ mlx5_priv_txq_release(struct priv *priv, uint16_t idx) txq->txq.mp2mr[i] = NULL; } } + if (priv->uar_base) + munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, + page_size), page_size); if (rte_atomic32_dec_and_test(&txq->refcnt)) { txq_free_elts(txq); LIST_REMOVE(txq, next); diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h index 2fbd10b1..e1bfb9cd 100644 --- a/drivers/net/mlx5/mlx5_utils.h +++ b/drivers/net/mlx5/mlx5_utils.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ #ifndef RTE_PMD_MLX5_UTILS_H_ diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c index 6fc315ef..75c34562 100644 --- a/drivers/net/mlx5/mlx5_vlan.c +++ b/drivers/net/mlx5/mlx5_vlan.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2015 6WIND S.A. - * Copyright 2015 Mellanox. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2015 6WIND S.A. + * Copyright 2015 Mellanox. */ #include <stddef.h> @@ -36,12 +8,23 @@ #include <assert.h> #include <stdint.h> -#include <rte_ethdev.h> +/* Verbs headers do not support -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-Wpedantic" +#endif +#include <infiniband/mlx5dv.h> +#include <infiniband/verbs.h> +#ifdef PEDANTIC +#pragma GCC diagnostic error "-Wpedantic" +#endif + +#include <rte_ethdev_driver.h> #include <rte_common.h> #include "mlx5_utils.h" #include "mlx5.h" #include "mlx5_autoconf.h" +#include "mlx5_glue.h" /** * DPDK callback to configure a VLAN filter. @@ -127,13 +110,18 @@ priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on) DEBUG("set VLAN offloads 0x%x for port %d queue %d", vlan_offloads, rxq->port_id, idx); + if (!rxq_ctrl->ibv) { + /* Update related bits in RX queue. */ + rxq->vlan_strip = !!on; + return; + } mod = (struct ibv_wq_attr){ .attr_mask = IBV_WQ_ATTR_FLAGS, .flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING, .flags = vlan_offloads, }; - err = ibv_modify_wq(rxq_ctrl->ibv->wq, &mod); + err = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod); if (err) { ERROR("%p: failed to modified stripping mode: %s", (void *)priv, strerror(err)); @@ -160,7 +148,7 @@ mlx5_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on) struct priv *priv = dev->data->dev_private; /* Validate hw support */ - if (!priv->hw_vlan_strip) { + if (!priv->config.hw_vlan_strip) { ERROR("VLAN stripping is not supported"); return; } @@ -191,9 +179,10 @@ mlx5_vlan_offload_set(struct rte_eth_dev *dev, int mask) unsigned int i; if (mask & ETH_VLAN_STRIP_MASK) { - int hw_vlan_strip = !!dev->data->dev_conf.rxmode.hw_vlan_strip; + int hw_vlan_strip = !!(dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_STRIP); - if (!priv->hw_vlan_strip) { + if (!priv->config.hw_vlan_strip) { ERROR("VLAN stripping is not supported"); return 0; } |