diff options
Diffstat (limited to 'drivers/net/mlx4')
-rw-r--r-- | drivers/net/mlx4/Makefile | 38 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4.c | 214 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4.h | 49 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4_ethdev.c | 73 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4_flow.c | 127 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4_flow.h | 37 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4_glue.c | 279 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4_glue.h | 89 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4_intr.c | 91 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4_mr.c | 41 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4_prm.h | 59 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4_rxq.c | 167 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4_rxtx.c | 532 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4_rxtx.h | 44 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4_txq.c | 160 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4_utils.c | 34 | ||||
-rw-r--r-- | drivers/net/mlx4/mlx4_utils.h | 40 |
17 files changed, 1217 insertions, 857 deletions
diff --git a/drivers/net/mlx4/Makefile b/drivers/net/mlx4/Makefile index f1f47c28..cc800493 100644 --- a/drivers/net/mlx4/Makefile +++ b/drivers/net/mlx4/Makefile @@ -33,11 +33,17 @@ include $(RTE_SDK)/mk/rte.vars.mk # Library name. LIB = librte_pmd_mlx4.a +LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION) +LIB_GLUE_BASE = librte_pmd_mlx4_glue.so +LIB_GLUE_VERSION = 18.02.0 # Sources. SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4.c SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_ethdev.c SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_flow.c +ifneq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y) +SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_glue.c +endif SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_intr.c SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_mr.c SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxq.c @@ -45,6 +51,10 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_rxtx.c SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_txq.c SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4_utils.c +ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y) +INSTALL-$(CONFIG_RTE_LIBRTE_MLX4_PMD)-lib += $(LIB_GLUE) +endif + # Basic CFLAGS. CFLAGS += -O3 CFLAGS += -std=c11 -Wall -Wextra @@ -54,7 +64,14 @@ CFLAGS += -D_BSD_SOURCE CFLAGS += -D_DEFAULT_SOURCE CFLAGS += -D_XOPEN_SOURCE=600 CFLAGS += $(WERROR_FLAGS) +ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y) +CFLAGS += -DMLX4_GLUE='"$(LIB_GLUE)"' +CFLAGS += -DMLX4_GLUE_VERSION='"$(LIB_GLUE_VERSION)"' +CFLAGS_mlx4_glue.o += -fPIC +LDLIBS += -ldl +else LDLIBS += -libverbs -lmlx4 +endif LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs LDLIBS += -lrte_bus_pci @@ -82,10 +99,6 @@ ifdef CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE CFLAGS += -DMLX4_PMD_TX_MP_CACHE=$(CONFIG_RTE_LIBRTE_MLX4_TX_MP_CACHE) endif -ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DEBUG_BROKEN_VERBS),y) -CFLAGS += -DMLX4_PMD_DEBUG_BROKEN_VERBS -endif - include $(RTE_SDK)/mk/rte.lib.mk # Generate and clean-up mlx4_autoconf.h. @@ -112,7 +125,24 @@ mlx4_autoconf.h: mlx4_autoconf.h.new $(SRCS-$(CONFIG_RTE_LIBRTE_MLX4_PMD):.c=.o): mlx4_autoconf.h +# Generate dependency plug-in for rdma-core when the PMD must not be linked +# directly, so that applications do not inherit this dependency. + +ifeq ($(CONFIG_RTE_LIBRTE_MLX4_DLOPEN_DEPS),y) + +$(LIB): $(LIB_GLUE) + +$(LIB_GLUE): mlx4_glue.o + $Q $(LD) $(LDFLAGS) $(EXTRA_LDFLAGS) \ + -Wl,-h,$(LIB_GLUE) \ + -s -shared -o $@ $< -libverbs -lmlx4 + +mlx4_glue.o: mlx4_autoconf.h + +endif + clean_mlx4: FORCE $Q rm -f -- mlx4_autoconf.h mlx4_autoconf.h.new + $Q rm -f -- mlx4_glue.o $(LIB_GLUE_BASE)* clean: clean_mlx4 diff --git a/drivers/net/mlx4/mlx4.c b/drivers/net/mlx4/mlx4.c index f9e4f9d7..ee93dafe 100644 --- a/drivers/net/mlx4/mlx4.c +++ b/drivers/net/mlx4/mlx4.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2012 6WIND S.A. - * Copyright 2012 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2012 6WIND S.A. + * Copyright 2012 Mellanox */ /** @@ -37,6 +9,7 @@ */ #include <assert.h> +#include <dlfcn.h> #include <errno.h> #include <inttypes.h> #include <stddef.h> @@ -44,6 +17,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <unistd.h> /* Verbs headers do not support -pedantic. */ #ifdef PEDANTIC @@ -55,9 +29,10 @@ #endif #include <rte_common.h> +#include <rte_config.h> #include <rte_dev.h> #include <rte_errno.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_ethdev_pci.h> #include <rte_ether.h> #include <rte_flow.h> @@ -67,6 +42,7 @@ #include <rte_mbuf.h> #include "mlx4.h" +#include "mlx4_glue.h" #include "mlx4_flow.h" #include "mlx4_rxtx.h" #include "mlx4_utils.h" @@ -108,7 +84,13 @@ mlx4_dev_configure(struct rte_eth_dev *dev) " flow error type %d, cause %p, message: %s", -ret, strerror(-ret), error.type, error.cause, error.message ? error.message : "(unspecified)"); + goto exit; } + ret = mlx4_intr_install(priv); + if (ret) + ERROR("%p: interrupt handler installation failed", + (void *)dev); +exit: return ret; } @@ -141,7 +123,7 @@ mlx4_dev_start(struct rte_eth_dev *dev) (void *)dev, strerror(-ret)); goto err; } - ret = mlx4_intr_install(priv); + ret = mlx4_rxq_intr_enable(priv); if (ret) { ERROR("%p: interrupt handler installation failed", (void *)dev); @@ -187,7 +169,7 @@ mlx4_dev_stop(struct rte_eth_dev *dev) dev->rx_pkt_burst = mlx4_rx_burst_removed; rte_wmb(); mlx4_flow_sync(priv, NULL); - mlx4_intr_uninstall(priv); + mlx4_rxq_intr_disable(priv); mlx4_rss_deinit(priv); } @@ -218,8 +200,8 @@ mlx4_dev_close(struct rte_eth_dev *dev) mlx4_tx_queue_release(dev->data->tx_queues[i]); if (priv->pd != NULL) { assert(priv->ctx != NULL); - claim_zero(ibv_dealloc_pd(priv->pd)); - claim_zero(ibv_close_device(priv->ctx)); + claim_zero(mlx4_glue->dealloc_pd(priv->pd)); + claim_zero(mlx4_glue->close_device(priv->ctx)); } else assert(priv->ctx == NULL); mlx4_intr_uninstall(priv); @@ -256,6 +238,7 @@ static const struct eth_dev_ops mlx4_dev_ops = { .filter_ctrl = mlx4_filter_ctrl, .rx_queue_intr_enable = mlx4_rx_intr_enable, .rx_queue_intr_disable = mlx4_rx_intr_disable, + .is_removed = mlx4_is_removed, }; /** @@ -336,7 +319,7 @@ mlx4_arg_parse(const char *key, const char *val, struct mlx4_conf *conf) return -rte_errno; } if (strcmp(MLX4_PMD_PORT_KVARG, key) == 0) { - uint32_t ports = rte_log2_u32(conf->ports.present); + uint32_t ports = rte_log2_u32(conf->ports.present + 1); if (tmp >= ports) { ERROR("port index %lu outside range [0,%" PRIu32 ")", @@ -426,6 +409,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) int err = 0; struct ibv_context *attr_ctx = NULL; struct ibv_device_attr device_attr; + struct ibv_device_attr_ex device_attr_ex; struct mlx4_conf conf = { .ports.present = 0, }; @@ -434,7 +418,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) (void)pci_drv; assert(pci_drv == &mlx4_driver); - list = ibv_get_device_list(&i); + list = mlx4_glue->get_device_list(&i); if (list == NULL) { rte_errno = errno; assert(rte_errno); @@ -463,12 +447,12 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) PCI_DEVICE_ID_MELLANOX_CONNECTX3VF); INFO("PCI information matches, using device \"%s\" (VF: %s)", list[i]->name, (vf ? "true" : "false")); - attr_ctx = ibv_open_device(list[i]); + attr_ctx = mlx4_glue->open_device(list[i]); err = errno; break; } if (attr_ctx == NULL) { - ibv_free_device_list(list); + mlx4_glue->free_device_list(list); switch (err) { case 0: rte_errno = ENODEV; @@ -485,7 +469,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) } ibv_dev = list[i]; DEBUG("device opened"); - if (ibv_query_device(attr_ctx, &device_attr)) { + if (mlx4_glue->query_device(attr_ctx, &device_attr)) { rte_errno = ENODEV; goto error; } @@ -499,6 +483,12 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) /* Use all ports when none are defined */ if (!conf.ports.enabled) conf.ports.enabled = conf.ports.present; + /* Retrieve extended device attributes. */ + if (mlx4_glue->query_device_ex(attr_ctx, NULL, &device_attr_ex)) { + rte_errno = ENODEV; + goto error; + } + assert(device_attr.max_sge >= MLX4_MAX_SGE); for (i = 0; i < device_attr.phys_port_cnt; i++) { uint32_t port = i + 1; /* ports are indexed from one */ struct ibv_context *ctx = NULL; @@ -512,13 +502,13 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) if (!(conf.ports.enabled & (1 << i))) continue; DEBUG("using port %u", port); - ctx = ibv_open_device(ibv_dev); + ctx = mlx4_glue->open_device(ibv_dev); if (ctx == NULL) { rte_errno = ENODEV; goto port_error; } /* Check port status. */ - err = ibv_query_port(ctx, port, &port_attr); + err = mlx4_glue->query_port(ctx, port, &port_attr); if (err) { rte_errno = err; ERROR("port query failed: %s", strerror(rte_errno)); @@ -532,7 +522,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) } if (port_attr.state != IBV_PORT_ACTIVE) DEBUG("port %d is not active: \"%s\" (%d)", - port, ibv_port_state_str(port_attr.state), + port, mlx4_glue->port_state_str(port_attr.state), port_attr.state); /* Make asynchronous FD non-blocking to handle interrupts. */ if (mlx4_fd_set_non_blocking(ctx->async_fd) < 0) { @@ -541,7 +531,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) goto port_error; } /* Allocate protection domain. */ - pd = ibv_alloc_pd(ctx); + pd = mlx4_glue->alloc_pd(ctx); if (pd == NULL) { rte_errno = ENOMEM; ERROR("PD allocation failure"); @@ -573,6 +563,21 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO); DEBUG("L2 tunnel checksum offloads are %ssupported", (priv->hw_csum_l2tun ? "" : "not ")); + priv->hw_rss_sup = device_attr_ex.rss_caps.rx_hash_fields_mask; + if (!priv->hw_rss_sup) { + WARN("no RSS capabilities reported; disabling support" + " for UDP RSS and inner VXLAN RSS"); + /* Fake support for all possible RSS hash fields. */ + priv->hw_rss_sup = ~UINT64_C(0); + priv->hw_rss_sup = mlx4_conv_rss_hf(priv, -1); + /* Filter out known unsupported fields. */ + priv->hw_rss_sup &= + ~(uint64_t)(IBV_RX_HASH_SRC_PORT_UDP | + IBV_RX_HASH_DST_PORT_UDP | + IBV_RX_HASH_INNER); + } + DEBUG("supported RSS hash fields mask: %016" PRIx64, + priv->hw_rss_sup); /* Configure the first MAC address by default. */ if (mlx4_get_mac(priv, &mac.addr_bytes)) { ERROR("cannot get MAC address, is mlx4_en loaded?" @@ -605,7 +610,7 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) char name[RTE_ETH_NAME_MAX_LEN]; snprintf(name, sizeof(name), "%s port %u", - ibv_get_device_name(ibv_dev), port); + mlx4_glue->get_device_name(ibv_dev), port); eth_dev = rte_eth_dev_allocate(name); } if (eth_dev == NULL) { @@ -648,9 +653,9 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) port_error: rte_free(priv); if (pd) - claim_zero(ibv_dealloc_pd(pd)); + claim_zero(mlx4_glue->dealloc_pd(pd)); if (ctx) - claim_zero(ibv_close_device(ctx)); + claim_zero(mlx4_glue->close_device(ctx)); if (eth_dev) rte_eth_dev_release_port(eth_dev); break; @@ -665,9 +670,9 @@ port_error: */ error: if (attr_ctx) - claim_zero(ibv_close_device(attr_ctx)); + claim_zero(mlx4_glue->close_device(attr_ctx)); if (list) - ibv_free_device_list(list); + mlx4_glue->free_device_list(list); assert(rte_errno >= 0); return -rte_errno; } @@ -700,6 +705,88 @@ static struct rte_pci_driver mlx4_driver = { RTE_PCI_DRV_INTR_RMV, }; +#ifdef RTE_LIBRTE_MLX4_DLOPEN_DEPS + +/** + * Initialization routine for run-time dependency on rdma-core. + */ +static int +mlx4_glue_init(void) +{ + const char *path[] = { + /* + * A basic security check is necessary before trusting + * MLX4_GLUE_PATH, which may override RTE_EAL_PMD_PATH. + */ + (geteuid() == getuid() && getegid() == getgid() ? + getenv("MLX4_GLUE_PATH") : NULL), + RTE_EAL_PMD_PATH, + }; + unsigned int i = 0; + void *handle = NULL; + void **sym; + const char *dlmsg; + + while (!handle && i != RTE_DIM(path)) { + const char *end; + size_t len; + int ret; + + if (!path[i]) { + ++i; + continue; + } + end = strpbrk(path[i], ":;"); + if (!end) + end = path[i] + strlen(path[i]); + len = end - path[i]; + ret = 0; + do { + char name[ret + 1]; + + ret = snprintf(name, sizeof(name), "%.*s%s" MLX4_GLUE, + (int)len, path[i], + (!len || *(end - 1) == '/') ? "" : "/"); + if (ret == -1) + break; + if (sizeof(name) != (size_t)ret + 1) + continue; + DEBUG("looking for rdma-core glue as \"%s\"", name); + handle = dlopen(name, RTLD_LAZY); + break; + } while (1); + path[i] = end + 1; + if (!*end) + ++i; + } + if (!handle) { + rte_errno = EINVAL; + dlmsg = dlerror(); + if (dlmsg) + WARN("cannot load glue library: %s", dlmsg); + goto glue_error; + } + sym = dlsym(handle, "mlx4_glue"); + if (!sym || !*sym) { + rte_errno = EINVAL; + dlmsg = dlerror(); + if (dlmsg) + ERROR("cannot resolve glue symbol: %s", dlmsg); + goto glue_error; + } + mlx4_glue = *sym; + return 0; +glue_error: + if (handle) + dlclose(handle); + WARN("cannot initialize PMD due to missing run-time" + " dependency on rdma-core libraries (libibverbs," + " libmlx4)"); + return -rte_errno; +} + +#endif + /** * Driver initialization routine. */ @@ -708,13 +795,38 @@ static void rte_mlx4_pmd_init(void) { /* + * MLX4_DEVICE_FATAL_CLEANUP tells ibv_destroy functions we + * want to get success errno value in case of calling them + * when the device was removed. + */ + setenv("MLX4_DEVICE_FATAL_CLEANUP", "1", 1); + /* * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use * huge pages. Calling ibv_fork_init() during init allows * applications to use fork() safely for purposes other than * using this PMD, which is not supported in forked processes. */ setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); - ibv_fork_init(); +#ifdef RTE_LIBRTE_MLX4_DLOPEN_DEPS + if (mlx4_glue_init()) + return; + assert(mlx4_glue); +#endif +#ifndef NDEBUG + /* Glue structure must not contain any NULL pointers. */ + { + unsigned int i; + + for (i = 0; i != sizeof(*mlx4_glue) / sizeof(void *); ++i) + assert(((const void *const *)mlx4_glue)[i]); + } +#endif + if (strcmp(mlx4_glue->version, MLX4_GLUE_VERSION)) { + ERROR("rdma-core glue \"%s\" mismatch: \"%s\" is required", + mlx4_glue->version, MLX4_GLUE_VERSION); + return; + } + mlx4_glue->fork_init(); rte_pci_register(&mlx4_driver); } diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 3aeef87e..19c8a223 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2012 6WIND S.A. - * Copyright 2012 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2012 6WIND S.A. + * Copyright 2012 Mellanox */ #ifndef RTE_PMD_MLX4_H_ @@ -47,12 +19,17 @@ #pragma GCC diagnostic error "-Wpedantic" #endif -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_ether.h> #include <rte_interrupts.h> #include <rte_mempool.h> #include <rte_spinlock.h> +#ifndef IBV_RX_HASH_INNER +/** This is not necessarily defined by supported RDMA core versions. */ +#define IBV_RX_HASH_INNER (1ull << 31) +#endif /* IBV_RX_HASH_INNER */ + /** Maximum number of simultaneous MAC addresses. This value is arbitrary. */ #define MLX4_MAX_MAC_ADDRESSES 128 @@ -126,8 +103,9 @@ struct priv { uint32_t vf:1; /**< This is a VF device. */ uint32_t intr_alarm:1; /**< An interrupt alarm is scheduled. */ uint32_t isolated:1; /**< Toggle isolated mode. */ - uint32_t hw_csum:1; /* Checksum offload is supported. */ - uint32_t hw_csum_l2tun:1; /* Checksum support for L2 tunnels. */ + uint32_t hw_csum:1; /**< Checksum offload is supported. */ + uint32_t hw_csum_l2tun:1; /**< Checksum support for L2 tunnels. */ + uint64_t hw_rss_sup; /**< Supported RSS hash fields (Verbs format). */ struct rte_intr_handle intr_handle; /**< Port interrupt handle. */ struct mlx4_drop *drop; /**< Shared resources for drop flow rules. */ LIST_HEAD(, mlx4_rss) rss; /**< Shared targets for Rx flow rules. */ @@ -165,11 +143,14 @@ int mlx4_flow_ctrl_get(struct rte_eth_dev *dev, int mlx4_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf); const uint32_t *mlx4_dev_supported_ptypes_get(struct rte_eth_dev *dev); +int mlx4_is_removed(struct rte_eth_dev *dev); /* mlx4_intr.c */ int mlx4_intr_uninstall(struct priv *priv); int mlx4_intr_install(struct priv *priv); +int mlx4_rxq_intr_enable(struct priv *priv); +void mlx4_rxq_intr_disable(struct priv *priv); int mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx); int mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx); diff --git a/drivers/net/mlx4/mlx4_ethdev.c b/drivers/net/mlx4/mlx4_ethdev.c index 2f69e7d4..3bc69273 100644 --- a/drivers/net/mlx4/mlx4_ethdev.c +++ b/drivers/net/mlx4/mlx4_ethdev.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox */ /** @@ -63,13 +35,14 @@ #include <rte_bus_pci.h> #include <rte_errno.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_ether.h> #include <rte_flow.h> #include <rte_pci.h> #include "mlx4.h" #include "mlx4_flow.h" +#include "mlx4_glue.h" #include "mlx4_rxtx.h" #include "mlx4_utils.h" @@ -766,18 +739,10 @@ mlx4_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) info->max_rx_queues = max; info->max_tx_queues = max; info->max_mac_addrs = RTE_DIM(priv->mac); - info->rx_offload_capa = 0; - info->tx_offload_capa = 0; - if (priv->hw_csum) { - info->tx_offload_capa |= (DEV_TX_OFFLOAD_IPV4_CKSUM | - DEV_TX_OFFLOAD_UDP_CKSUM | - DEV_TX_OFFLOAD_TCP_CKSUM); - info->rx_offload_capa |= (DEV_RX_OFFLOAD_IPV4_CKSUM | - DEV_RX_OFFLOAD_UDP_CKSUM | - DEV_RX_OFFLOAD_TCP_CKSUM); - } - if (priv->hw_csum_l2tun) - info->tx_offload_capa |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; + info->tx_offload_capa = mlx4_get_tx_port_offloads(priv); + info->rx_queue_offload_capa = mlx4_get_rx_queue_offloads(priv); + info->rx_offload_capa = (mlx4_get_rx_port_offloads(priv) | + info->rx_queue_offload_capa); if (mlx4_get_ifname(priv, &ifname) == 0) info->if_index = if_nametoindex(ifname); info->hash_key_size = MLX4_RSS_HASH_KEY_SIZE; @@ -1060,3 +1025,23 @@ mlx4_dev_supported_ptypes_get(struct rte_eth_dev *dev) } return NULL; } + +/** + * Check if mlx4 device was removed. + * + * @param dev + * Pointer to Ethernet device structure. + * + * @return + * 1 when device is removed, otherwise 0. + */ +int +mlx4_is_removed(struct rte_eth_dev *dev) +{ + struct ibv_device_attr device_attr; + struct priv *priv = dev->data->dev_private; + + if (mlx4_glue->query_device(priv->ctx, &device_attr) == EIO) + return 1; + return 0; +} diff --git a/drivers/net/mlx4/mlx4_flow.c b/drivers/net/mlx4/mlx4_flow.c index 8b87b298..2d55bfe0 100644 --- a/drivers/net/mlx4/mlx4_flow.c +++ b/drivers/net/mlx4/mlx4_flow.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox */ /** @@ -57,7 +29,7 @@ #include <rte_byteorder.h> #include <rte_errno.h> #include <rte_eth_ctrl.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_ether.h> #include <rte_flow.h> #include <rte_flow_driver.h> @@ -65,6 +37,7 @@ /* PMD headers. */ #include "mlx4.h" +#include "mlx4_glue.h" #include "mlx4_flow.h" #include "mlx4_rxtx.h" #include "mlx4_utils.h" @@ -105,6 +78,11 @@ struct mlx4_drop { /** * Convert DPDK RSS hash fields to their Verbs equivalent. * + * This function returns the supported (default) set when @p rss_hf has + * special value (uint64_t)-1. + * + * @param priv + * Pointer to private structure. * @param rss_hf * Hash fields in DPDK format (see struct rte_eth_rss_conf). * @@ -112,8 +90,8 @@ struct mlx4_drop { * A valid Verbs RSS hash fields mask for mlx4 on success, (uint64_t)-1 * otherwise and rte_errno is set. */ -static uint64_t -mlx4_conv_rss_hf(uint64_t rss_hf) +uint64_t +mlx4_conv_rss_hf(struct priv *priv, uint64_t rss_hf) { enum { IPV4, IPV6, TCP, UDP, }; const uint64_t in[] = { @@ -133,11 +111,9 @@ mlx4_conv_rss_hf(uint64_t rss_hf) [TCP] = (ETH_RSS_NONFRAG_IPV4_TCP | ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_IPV6_TCP_EX), - /* - * UDP support is temporarily disabled due to an - * implementation issue in the kernel. - */ - [UDP] = 0, + [UDP] = (ETH_RSS_NONFRAG_IPV4_UDP | + ETH_RSS_NONFRAG_IPV6_UDP | + ETH_RSS_IPV6_UDP_EX), }; const uint64_t out[RTE_DIM(in)] = { [IPV4] = IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4, @@ -154,8 +130,15 @@ mlx4_conv_rss_hf(uint64_t rss_hf) seen |= rss_hf & in[i]; conv |= out[i]; } - if (!(rss_hf & ~seen)) - return conv; + if ((conv & priv->hw_rss_sup) == conv) { + if (rss_hf == (uint64_t)-1) { + /* Include inner RSS by default if supported. */ + conv |= priv->hw_rss_sup & IBV_RX_HASH_INNER; + return conv; + } + if (!(rss_hf & ~seen)) + return conv; + } rte_errno = ENOTSUP; return (uint64_t)-1; } @@ -759,10 +742,7 @@ fill: &(struct rte_eth_rss_conf){ .rss_key = mlx4_rss_hash_key_default, .rss_key_len = MLX4_RSS_HASH_KEY_SIZE, - .rss_hf = (ETH_RSS_IPV4 | - ETH_RSS_NONFRAG_IPV4_TCP | - ETH_RSS_IPV6 | - ETH_RSS_NONFRAG_IPV6_TCP), + .rss_hf = -1, }; /* Sanity checks. */ for (i = 0; i < rss->num; ++i) @@ -801,7 +781,8 @@ fill: goto exit_action_not_supported; } flow->rss = mlx4_rss_get - (priv, mlx4_conv_rss_hf(rss_conf->rss_hf), + (priv, + mlx4_conv_rss_hf(priv, rss_conf->rss_hf), rss_conf->rss_key, rss->num, rss->queue); if (!flow->rss) { msg = "either invalid parameters or not enough" @@ -914,24 +895,25 @@ mlx4_drop_get(struct priv *priv) .priv = priv, .refcnt = 1, }; - drop->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0); + drop->cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0); if (!drop->cq) goto error; - drop->qp = ibv_create_qp(priv->pd, - &(struct ibv_qp_init_attr){ - .send_cq = drop->cq, - .recv_cq = drop->cq, - .qp_type = IBV_QPT_RAW_PACKET, - }); + drop->qp = mlx4_glue->create_qp + (priv->pd, + &(struct ibv_qp_init_attr){ + .send_cq = drop->cq, + .recv_cq = drop->cq, + .qp_type = IBV_QPT_RAW_PACKET, + }); if (!drop->qp) goto error; priv->drop = drop; return drop; error: if (drop->qp) - claim_zero(ibv_destroy_qp(drop->qp)); + claim_zero(mlx4_glue->destroy_qp(drop->qp)); if (drop->cq) - claim_zero(ibv_destroy_cq(drop->cq)); + claim_zero(mlx4_glue->destroy_cq(drop->cq)); if (drop) rte_free(drop); rte_errno = ENOMEM; @@ -951,8 +933,8 @@ mlx4_drop_put(struct mlx4_drop *drop) if (--drop->refcnt) return; drop->priv->drop = NULL; - claim_zero(ibv_destroy_qp(drop->qp)); - claim_zero(ibv_destroy_cq(drop->cq)); + claim_zero(mlx4_glue->destroy_qp(drop->qp)); + claim_zero(mlx4_glue->destroy_cq(drop->cq)); rte_free(drop); } @@ -984,7 +966,7 @@ mlx4_flow_toggle(struct priv *priv, if (!enable) { if (!flow->ibv_flow) return 0; - claim_zero(ibv_destroy_flow(flow->ibv_flow)); + claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow)); flow->ibv_flow = NULL; if (flow->drop) mlx4_drop_put(priv->drop); @@ -997,7 +979,7 @@ mlx4_flow_toggle(struct priv *priv, !priv->isolated && flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) { if (flow->ibv_flow) { - claim_zero(ibv_destroy_flow(flow->ibv_flow)); + claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow)); flow->ibv_flow = NULL; if (flow->drop) mlx4_drop_put(priv->drop); @@ -1027,7 +1009,7 @@ mlx4_flow_toggle(struct priv *priv, if (missing ^ !flow->drop) return 0; /* Verbs flow needs updating. */ - claim_zero(ibv_destroy_flow(flow->ibv_flow)); + claim_zero(mlx4_glue->destroy_flow(flow->ibv_flow)); flow->ibv_flow = NULL; if (flow->drop) mlx4_drop_put(priv->drop); @@ -1048,6 +1030,8 @@ mlx4_flow_toggle(struct priv *priv, flow->drop = missing; } if (flow->drop) { + if (flow->ibv_flow) + return 0; mlx4_drop_get(priv); if (!priv->drop) { err = rte_errno; @@ -1059,7 +1043,7 @@ mlx4_flow_toggle(struct priv *priv, assert(qp); if (flow->ibv_flow) return 0; - flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr); + flow->ibv_flow = mlx4_glue->create_flow(qp, flow->ibv_attr); if (flow->ibv_flow) return 0; if (flow->drop) @@ -1215,16 +1199,19 @@ mlx4_flow_internal_next_vlan(struct priv *priv, uint16_t vlan) * * Various flow rules are created depending on the mode the device is in: * - * 1. Promiscuous: port MAC + catch-all (VLAN filtering is ignored). - * 2. All multicast: port MAC/VLAN + catch-all multicast. - * 3. Otherwise: port MAC/VLAN + broadcast MAC/VLAN. + * 1. Promiscuous: + * port MAC + broadcast + catch-all (VLAN filtering is ignored). + * 2. All multicast: + * port MAC/VLAN + broadcast + catch-all multicast. + * 3. Otherwise: + * port MAC/VLAN + broadcast MAC/VLAN. * * About MAC flow rules: * * - MAC flow rules are generated from @p dev->data->mac_addrs * (@p priv->mac array). * - An additional flow rule for Ethernet broadcasts is also generated. - * - All these are per-VLAN if @p dev->data->dev_conf.rxmode.hw_vlan_filter + * - All these are per-VLAN if @p DEV_RX_OFFLOAD_VLAN_FILTER * is enabled and VLAN filters are configured. * * @param priv @@ -1292,13 +1279,11 @@ mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error) }; struct ether_addr *rule_mac = ð_spec.dst; rte_be16_t *rule_vlan = - priv->dev->data->dev_conf.rxmode.hw_vlan_filter && + (priv->dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_VLAN_FILTER) && !priv->dev->data->promiscuous ? &vlan_spec.tci : NULL; - int broadcast = - !priv->dev->data->promiscuous && - !priv->dev->data->all_multicast; uint16_t vlan = 0; struct rte_flow *flow; unsigned int i; @@ -1332,7 +1317,7 @@ next_vlan: rule_vlan = NULL; } } - for (i = 0; i != RTE_DIM(priv->mac) + broadcast; ++i) { + for (i = 0; i != RTE_DIM(priv->mac) + 1; ++i) { const struct ether_addr *mac; /* Broadcasts are handled by an extra iteration. */ @@ -1396,7 +1381,7 @@ next_vlan: goto next_vlan; } /* Take care of promiscuous and all multicast flow rules. */ - if (!broadcast) { + if (priv->dev->data->promiscuous || priv->dev->data->all_multicast) { for (flow = LIST_FIRST(&priv->flows); flow && flow->internal; flow = LIST_NEXT(flow, next)) { diff --git a/drivers/net/mlx4/mlx4_flow.h b/drivers/net/mlx4/mlx4_flow.h index 651fd37b..00188a65 100644 --- a/drivers/net/mlx4/mlx4_flow.h +++ b/drivers/net/mlx4/mlx4_flow.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox */ #ifndef RTE_PMD_MLX4_FLOW_H_ @@ -47,7 +19,7 @@ #endif #include <rte_eth_ctrl.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_flow.h> #include <rte_flow_driver.h> #include <rte_byteorder.h> @@ -75,6 +47,7 @@ struct rte_flow { /* mlx4_flow.c */ +uint64_t mlx4_conv_rss_hf(struct priv *priv, uint64_t rss_hf); int mlx4_flow_sync(struct priv *priv, struct rte_flow_error *error); void mlx4_flow_clean(struct priv *priv); int mlx4_filter_ctrl(struct rte_eth_dev *dev, diff --git a/drivers/net/mlx4/mlx4_glue.c b/drivers/net/mlx4/mlx4_glue.c new file mode 100644 index 00000000..3b79d320 --- /dev/null +++ b/drivers/net/mlx4/mlx4_glue.c @@ -0,0 +1,279 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 6WIND S.A. + * Copyright 2018 Mellanox + */ + +#include <stddef.h> +#include <stdint.h> + +/* Verbs headers do not support -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-Wpedantic" +#endif +#include <infiniband/mlx4dv.h> +#include <infiniband/verbs.h> +#ifdef PEDANTIC +#pragma GCC diagnostic error "-Wpedantic" +#endif + +#include "mlx4_glue.h" + +static int +mlx4_glue_fork_init(void) +{ + return ibv_fork_init(); +} + +static int +mlx4_glue_get_async_event(struct ibv_context *context, + struct ibv_async_event *event) +{ + return ibv_get_async_event(context, event); +} + +static void +mlx4_glue_ack_async_event(struct ibv_async_event *event) +{ + ibv_ack_async_event(event); +} + +static struct ibv_pd * +mlx4_glue_alloc_pd(struct ibv_context *context) +{ + return ibv_alloc_pd(context); +} + +static int +mlx4_glue_dealloc_pd(struct ibv_pd *pd) +{ + return ibv_dealloc_pd(pd); +} + +static struct ibv_device ** +mlx4_glue_get_device_list(int *num_devices) +{ + return ibv_get_device_list(num_devices); +} + +static void +mlx4_glue_free_device_list(struct ibv_device **list) +{ + ibv_free_device_list(list); +} + +static struct ibv_context * +mlx4_glue_open_device(struct ibv_device *device) +{ + return ibv_open_device(device); +} + +static int +mlx4_glue_close_device(struct ibv_context *context) +{ + return ibv_close_device(context); +} + +static const char * +mlx4_glue_get_device_name(struct ibv_device *device) +{ + return ibv_get_device_name(device); +} + +static int +mlx4_glue_query_device(struct ibv_context *context, + struct ibv_device_attr *device_attr) +{ + return ibv_query_device(context, device_attr); +} + +static int +mlx4_glue_query_device_ex(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr) +{ + return ibv_query_device_ex(context, input, attr); +} + +static int +mlx4_glue_query_port(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr) +{ + return ibv_query_port(context, port_num, port_attr); +} + +static const char * +mlx4_glue_port_state_str(enum ibv_port_state port_state) +{ + return ibv_port_state_str(port_state); +} + +static struct ibv_comp_channel * +mlx4_glue_create_comp_channel(struct ibv_context *context) +{ + return ibv_create_comp_channel(context); +} + +static int +mlx4_glue_destroy_comp_channel(struct ibv_comp_channel *channel) +{ + return ibv_destroy_comp_channel(channel); +} + +static struct ibv_cq * +mlx4_glue_create_cq(struct ibv_context *context, int cqe, void *cq_context, + struct ibv_comp_channel *channel, int comp_vector) +{ + return ibv_create_cq(context, cqe, cq_context, channel, comp_vector); +} + +static int +mlx4_glue_destroy_cq(struct ibv_cq *cq) +{ + return ibv_destroy_cq(cq); +} + +static int +mlx4_glue_get_cq_event(struct ibv_comp_channel *channel, struct ibv_cq **cq, + void **cq_context) +{ + return ibv_get_cq_event(channel, cq, cq_context); +} + +static void +mlx4_glue_ack_cq_events(struct ibv_cq *cq, unsigned int nevents) +{ + ibv_ack_cq_events(cq, nevents); +} + +static struct ibv_flow * +mlx4_glue_create_flow(struct ibv_qp *qp, struct ibv_flow_attr *flow) +{ + return ibv_create_flow(qp, flow); +} + +static int +mlx4_glue_destroy_flow(struct ibv_flow *flow_id) +{ + return ibv_destroy_flow(flow_id); +} + +static struct ibv_qp * +mlx4_glue_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr) +{ + return ibv_create_qp(pd, qp_init_attr); +} + +static struct ibv_qp * +mlx4_glue_create_qp_ex(struct ibv_context *context, + struct ibv_qp_init_attr_ex *qp_init_attr_ex) +{ + return ibv_create_qp_ex(context, qp_init_attr_ex); +} + +static int +mlx4_glue_destroy_qp(struct ibv_qp *qp) +{ + return ibv_destroy_qp(qp); +} + +static int +mlx4_glue_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) +{ + return ibv_modify_qp(qp, attr, attr_mask); +} + +static struct ibv_mr * +mlx4_glue_reg_mr(struct ibv_pd *pd, void *addr, size_t length, int access) +{ + return ibv_reg_mr(pd, addr, length, access); +} + +static int +mlx4_glue_dereg_mr(struct ibv_mr *mr) +{ + return ibv_dereg_mr(mr); +} + +static struct ibv_rwq_ind_table * +mlx4_glue_create_rwq_ind_table(struct ibv_context *context, + struct ibv_rwq_ind_table_init_attr *init_attr) +{ + return ibv_create_rwq_ind_table(context, init_attr); +} + +static int +mlx4_glue_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table) +{ + return ibv_destroy_rwq_ind_table(rwq_ind_table); +} + +static struct ibv_wq * +mlx4_glue_create_wq(struct ibv_context *context, + struct ibv_wq_init_attr *wq_init_attr) +{ + return ibv_create_wq(context, wq_init_attr); +} + +static int +mlx4_glue_destroy_wq(struct ibv_wq *wq) +{ + return ibv_destroy_wq(wq); +} +static int +mlx4_glue_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *wq_attr) +{ + return ibv_modify_wq(wq, wq_attr); +} + +static int +mlx4_glue_dv_init_obj(struct mlx4dv_obj *obj, uint64_t obj_type) +{ + return mlx4dv_init_obj(obj, obj_type); +} + +static int +mlx4_glue_dv_set_context_attr(struct ibv_context *context, + enum mlx4dv_set_ctx_attr_type attr_type, + void *attr) +{ + return mlx4dv_set_context_attr(context, attr_type, attr); +} + +const struct mlx4_glue *mlx4_glue = &(const struct mlx4_glue){ + .version = MLX4_GLUE_VERSION, + .fork_init = mlx4_glue_fork_init, + .get_async_event = mlx4_glue_get_async_event, + .ack_async_event = mlx4_glue_ack_async_event, + .alloc_pd = mlx4_glue_alloc_pd, + .dealloc_pd = mlx4_glue_dealloc_pd, + .get_device_list = mlx4_glue_get_device_list, + .free_device_list = mlx4_glue_free_device_list, + .open_device = mlx4_glue_open_device, + .close_device = mlx4_glue_close_device, + .get_device_name = mlx4_glue_get_device_name, + .query_device = mlx4_glue_query_device, + .query_device_ex = mlx4_glue_query_device_ex, + .query_port = mlx4_glue_query_port, + .port_state_str = mlx4_glue_port_state_str, + .create_comp_channel = mlx4_glue_create_comp_channel, + .destroy_comp_channel = mlx4_glue_destroy_comp_channel, + .create_cq = mlx4_glue_create_cq, + .destroy_cq = mlx4_glue_destroy_cq, + .get_cq_event = mlx4_glue_get_cq_event, + .ack_cq_events = mlx4_glue_ack_cq_events, + .create_flow = mlx4_glue_create_flow, + .destroy_flow = mlx4_glue_destroy_flow, + .create_qp = mlx4_glue_create_qp, + .create_qp_ex = mlx4_glue_create_qp_ex, + .destroy_qp = mlx4_glue_destroy_qp, + .modify_qp = mlx4_glue_modify_qp, + .reg_mr = mlx4_glue_reg_mr, + .dereg_mr = mlx4_glue_dereg_mr, + .create_rwq_ind_table = mlx4_glue_create_rwq_ind_table, + .destroy_rwq_ind_table = mlx4_glue_destroy_rwq_ind_table, + .create_wq = mlx4_glue_create_wq, + .destroy_wq = mlx4_glue_destroy_wq, + .modify_wq = mlx4_glue_modify_wq, + .dv_init_obj = mlx4_glue_dv_init_obj, + .dv_set_context_attr = mlx4_glue_dv_set_context_attr, +}; diff --git a/drivers/net/mlx4/mlx4_glue.h b/drivers/net/mlx4/mlx4_glue.h new file mode 100644 index 00000000..368f906b --- /dev/null +++ b/drivers/net/mlx4/mlx4_glue.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2018 6WIND S.A. + * Copyright 2018 Mellanox + */ + +#ifndef MLX4_GLUE_H_ +#define MLX4_GLUE_H_ + +#include <stddef.h> +#include <stdint.h> + +/* Verbs headers do not support -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-Wpedantic" +#endif +#include <infiniband/mlx4dv.h> +#include <infiniband/verbs.h> +#ifdef PEDANTIC +#pragma GCC diagnostic error "-Wpedantic" +#endif + +#ifndef MLX4_GLUE_VERSION +#define MLX4_GLUE_VERSION "" +#endif + +/* LIB_GLUE_VERSION must be updated every time this structure is modified. */ +struct mlx4_glue { + const char *version; + int (*fork_init)(void); + int (*get_async_event)(struct ibv_context *context, + struct ibv_async_event *event); + void (*ack_async_event)(struct ibv_async_event *event); + struct ibv_pd *(*alloc_pd)(struct ibv_context *context); + int (*dealloc_pd)(struct ibv_pd *pd); + struct ibv_device **(*get_device_list)(int *num_devices); + void (*free_device_list)(struct ibv_device **list); + struct ibv_context *(*open_device)(struct ibv_device *device); + int (*close_device)(struct ibv_context *context); + const char *(*get_device_name)(struct ibv_device *device); + int (*query_device)(struct ibv_context *context, + struct ibv_device_attr *device_attr); + int (*query_device_ex)(struct ibv_context *context, + const struct ibv_query_device_ex_input *input, + struct ibv_device_attr_ex *attr); + int (*query_port)(struct ibv_context *context, uint8_t port_num, + struct ibv_port_attr *port_attr); + const char *(*port_state_str)(enum ibv_port_state port_state); + struct ibv_comp_channel *(*create_comp_channel) + (struct ibv_context *context); + int (*destroy_comp_channel)(struct ibv_comp_channel *channel); + struct ibv_cq *(*create_cq)(struct ibv_context *context, int cqe, + void *cq_context, + struct ibv_comp_channel *channel, + int comp_vector); + int (*destroy_cq)(struct ibv_cq *cq); + int (*get_cq_event)(struct ibv_comp_channel *channel, + struct ibv_cq **cq, void **cq_context); + void (*ack_cq_events)(struct ibv_cq *cq, unsigned int nevents); + struct ibv_flow *(*create_flow)(struct ibv_qp *qp, + struct ibv_flow_attr *flow); + int (*destroy_flow)(struct ibv_flow *flow_id); + struct ibv_qp *(*create_qp)(struct ibv_pd *pd, + struct ibv_qp_init_attr *qp_init_attr); + struct ibv_qp *(*create_qp_ex) + (struct ibv_context *context, + struct ibv_qp_init_attr_ex *qp_init_attr_ex); + int (*destroy_qp)(struct ibv_qp *qp); + int (*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr, + int attr_mask); + struct ibv_mr *(*reg_mr)(struct ibv_pd *pd, void *addr, + size_t length, int access); + int (*dereg_mr)(struct ibv_mr *mr); + struct ibv_rwq_ind_table *(*create_rwq_ind_table) + (struct ibv_context *context, + struct ibv_rwq_ind_table_init_attr *init_attr); + int (*destroy_rwq_ind_table)(struct ibv_rwq_ind_table *rwq_ind_table); + struct ibv_wq *(*create_wq)(struct ibv_context *context, + struct ibv_wq_init_attr *wq_init_attr); + int (*destroy_wq)(struct ibv_wq *wq); + int (*modify_wq)(struct ibv_wq *wq, struct ibv_wq_attr *wq_attr); + int (*dv_init_obj)(struct mlx4dv_obj *obj, uint64_t obj_type); + int (*dv_set_context_attr)(struct ibv_context *context, + enum mlx4dv_set_ctx_attr_type attr_type, + void *attr); +}; + +const struct mlx4_glue *mlx4_glue; + +#endif /* MLX4_GLUE_H_ */ diff --git a/drivers/net/mlx4/mlx4_intr.c b/drivers/net/mlx4/mlx4_intr.c index 50d19769..2141992e 100644 --- a/drivers/net/mlx4/mlx4_intr.c +++ b/drivers/net/mlx4/mlx4_intr.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox */ /** @@ -52,11 +24,12 @@ #include <rte_alarm.h> #include <rte_errno.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_io.h> #include <rte_interrupts.h> #include "mlx4.h" +#include "mlx4_glue.h" #include "mlx4_rxtx.h" #include "mlx4_utils.h" @@ -154,7 +127,7 @@ mlx4_link_status_alarm(struct priv *priv) if (intr_conf->lsc && !mlx4_link_status_check(priv)) _rte_eth_dev_callback_process(priv->dev, RTE_ETH_EVENT_INTR_LSC, - NULL, NULL); + NULL); } /** @@ -216,7 +189,7 @@ mlx4_interrupt_handler(struct priv *priv) unsigned int i; /* Read all message and acknowledge them. */ - while (!ibv_get_async_event(priv->ctx, &event)) { + while (!mlx4_glue->get_async_event(priv->ctx, &event)) { switch (event.event_type) { case IBV_EVENT_PORT_ACTIVE: case IBV_EVENT_PORT_ERR: @@ -231,12 +204,12 @@ mlx4_interrupt_handler(struct priv *priv) DEBUG("event type %d on physical port %d not handled", event.event_type, event.element.port_num); } - ibv_ack_async_event(&event); + mlx4_glue->ack_async_event(&event); } for (i = 0; i != RTE_DIM(caught); ++i) if (caught[i]) _rte_eth_dev_callback_process(priv->dev, type[i], - NULL, NULL); + NULL); } /** @@ -291,7 +264,7 @@ mlx4_intr_uninstall(struct priv *priv) } rte_eal_alarm_cancel((void (*)(void *))mlx4_link_status_alarm, priv); priv->intr_alarm = 0; - mlx4_rx_intr_vec_disable(priv); + mlx4_rxq_intr_disable(priv); rte_errno = err; return 0; } @@ -313,8 +286,6 @@ mlx4_intr_install(struct priv *priv) int rc; mlx4_intr_uninstall(priv); - if (intr_conf->rxq && mlx4_rx_intr_vec_enable(priv) < 0) - goto error; if (intr_conf->lsc | intr_conf->rmv) { priv->intr_handle.fd = priv->ctx->async_fd; rc = rte_intr_callback_register(&priv->intr_handle, @@ -354,7 +325,8 @@ mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx) if (!rxq || !rxq->channel) { ret = EINVAL; } else { - ret = ibv_get_cq_event(rxq->cq->channel, &ev_cq, &ev_ctx); + ret = mlx4_glue->get_cq_event(rxq->cq->channel, &ev_cq, + &ev_ctx); if (ret || ev_cq != rxq->cq) ret = EINVAL; } @@ -364,7 +336,7 @@ mlx4_rx_intr_disable(struct rte_eth_dev *dev, uint16_t idx) idx); } else { rxq->mcq.arm_sn++; - ibv_ack_cq_events(rxq->cq, 1); + mlx4_glue->ack_cq_events(rxq->cq, 1); } return -ret; } @@ -395,3 +367,40 @@ mlx4_rx_intr_enable(struct rte_eth_dev *dev, uint16_t idx) } return -ret; } + +/** + * Enable datapath interrupts. + * + * @param priv + * Pointer to private structure. + * + * @return + * 0 on success, negative errno value otherwise and rte_errno is set. + */ +int +mlx4_rxq_intr_enable(struct priv *priv) +{ + const struct rte_intr_conf *const intr_conf = + &priv->dev->data->dev_conf.intr_conf; + + if (intr_conf->rxq && mlx4_rx_intr_vec_enable(priv) < 0) + goto error; + return 0; +error: + return -rte_errno; +} + +/** + * Disable datapath interrupts, keeping other interrupts intact. + * + * @param priv + * Pointer to private structure. + */ +void +mlx4_rxq_intr_disable(struct priv *priv) +{ + int err = rte_errno; /* Make sure rte_errno remains unchanged. */ + + mlx4_rx_intr_vec_disable(priv); + rte_errno = err; +} diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c index 2a3e2695..9a1e4de3 100644 --- a/drivers/net/mlx4/mlx4_mr.c +++ b/drivers/net/mlx4/mlx4_mr.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox */ /** @@ -60,6 +32,7 @@ #include <rte_mempool.h> #include <rte_spinlock.h> +#include "mlx4_glue.h" #include "mlx4_rxtx.h" #include "mlx4_utils.h" @@ -200,8 +173,8 @@ mlx4_mr_get(struct priv *priv, struct rte_mempool *mp) .end = end, .refcnt = 1, .priv = priv, - .mr = ibv_reg_mr(priv->pd, (void *)start, end - start, - IBV_ACCESS_LOCAL_WRITE), + .mr = mlx4_glue->reg_mr(priv->pd, (void *)start, end - start, + IBV_ACCESS_LOCAL_WRITE), .mp = mp, }; if (mr->mr) { @@ -240,7 +213,7 @@ mlx4_mr_put(struct mlx4_mr *mr) if (--mr->refcnt) goto release; LIST_REMOVE(mr, next); - claim_zero(ibv_dereg_mr(mr->mr)); + claim_zero(mlx4_glue->dereg_mr(mr->mr)); rte_free(mr); release: rte_spinlock_unlock(&priv->mr_lock); diff --git a/drivers/net/mlx4/mlx4_prm.h b/drivers/net/mlx4/mlx4_prm.h index fcc7c129..153dda52 100644 --- a/drivers/net/mlx4/mlx4_prm.h +++ b/drivers/net/mlx4/mlx4_prm.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox */ #ifndef MLX4_PRM_H_ @@ -53,23 +25,22 @@ #define MLX4_TXBB_SIZE (1 << MLX4_TXBB_SHIFT) /* Typical TSO descriptor with 16 gather entries is 352 bytes. */ -#define MLX4_MAX_WQE_SIZE 512 -#define MLX4_MAX_WQE_TXBBS (MLX4_MAX_WQE_SIZE / MLX4_TXBB_SIZE) +#define MLX4_MAX_SGE 32 +#define MLX4_MAX_WQE_SIZE \ + (MLX4_MAX_SGE * sizeof(struct mlx4_wqe_data_seg) + \ + sizeof(struct mlx4_wqe_ctrl_seg)) +#define MLX4_SEG_SHIFT 4 /* Send queue stamping/invalidating information. */ #define MLX4_SQ_STAMP_STRIDE 64 #define MLX4_SQ_STAMP_DWORDS (MLX4_SQ_STAMP_STRIDE / 4) -#define MLX4_SQ_STAMP_SHIFT 31 +#define MLX4_SQ_OWNER_BIT 31 #define MLX4_SQ_STAMP_VAL 0x7fffffff /* Work queue element (WQE) flags. */ -#define MLX4_BIT_WQE_OWN 0x80000000 #define MLX4_WQE_CTRL_IIP_HDR_CSUM (1 << 28) #define MLX4_WQE_CTRL_IL4_HDR_CSUM (1 << 27) -#define MLX4_SIZE_TO_TXBBS(size) \ - (RTE_ALIGN((size), (MLX4_TXBB_SIZE)) >> (MLX4_TXBB_SHIFT)) - /* CQE checksum flags. */ enum { MLX4_CQE_L2_TUNNEL_IPV4 = (int)(1u << 25), @@ -98,17 +69,15 @@ enum { struct mlx4_sq { volatile uint8_t *buf; /**< SQ buffer. */ volatile uint8_t *eob; /**< End of SQ buffer */ - uint32_t head; /**< SQ head counter in units of TXBBS. */ - uint32_t tail; /**< SQ tail counter in units of TXBBS. */ - uint32_t txbb_cnt; /**< Num of WQEBB in the Q (should be ^2). */ - uint32_t txbb_cnt_mask; /**< txbbs_cnt mask (txbb_cnt is ^2). */ - uint32_t headroom_txbbs; /**< Num of txbbs that should be kept free. */ + uint32_t size; /**< SQ size includes headroom. */ + uint32_t remain_size; /**< Remaining WQE room in SQ (bytes). */ + uint32_t owner_opcode; + /**< Default owner opcode with HW valid owner bit. */ + uint32_t stamp; /**< Stamp value with an invalid HW owner bit. */ volatile uint32_t *db; /**< Pointer to the doorbell. */ uint32_t doorbell_qpn; /**< qp number to write to the doorbell. */ }; -#define mlx4_get_send_wqe(sq, n) ((sq)->buf + ((n) * (MLX4_TXBB_SIZE))) - /* Completion queue events, numbers and masks. */ #define MLX4_CQ_DB_GEQ_N_MASK 0x3 #define MLX4_CQ_DOORBELL 0x20 diff --git a/drivers/net/mlx4/mlx4_rxq.c b/drivers/net/mlx4/mlx4_rxq.c index 53313c56..7a036ed8 100644 --- a/drivers/net/mlx4/mlx4_rxq.c +++ b/drivers/net/mlx4/mlx4_rxq.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox */ /** @@ -55,13 +27,14 @@ #include <rte_byteorder.h> #include <rte_common.h> #include <rte_errno.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_flow.h> #include <rte_malloc.h> #include <rte_mbuf.h> #include <rte_mempool.h> #include "mlx4.h" +#include "mlx4_glue.h" #include "mlx4_flow.h" #include "mlx4_rxtx.h" #include "mlx4_utils.h" @@ -231,7 +204,7 @@ mlx4_rss_attach(struct mlx4_rss *rss) } ind_tbl[i] = rxq->wq; } - rss->ind = ibv_create_rwq_ind_table + rss->ind = mlx4_glue->create_rwq_ind_table (priv->ctx, &(struct ibv_rwq_ind_table_init_attr){ .log_ind_tbl_size = rte_log2_u32(RTE_DIM(ind_tbl)), @@ -243,7 +216,7 @@ mlx4_rss_attach(struct mlx4_rss *rss) msg = "RSS indirection table creation failure"; goto error; } - rss->qp = ibv_create_qp_ex + rss->qp = mlx4_glue->create_qp_ex (priv->ctx, &(struct ibv_qp_init_attr_ex){ .comp_mask = (IBV_QP_INIT_ATTR_PD | @@ -264,7 +237,7 @@ mlx4_rss_attach(struct mlx4_rss *rss) msg = "RSS hash QP creation failure"; goto error; } - ret = ibv_modify_qp + ret = mlx4_glue->modify_qp (rss->qp, &(struct ibv_qp_attr){ .qp_state = IBV_QPS_INIT, @@ -275,7 +248,7 @@ mlx4_rss_attach(struct mlx4_rss *rss) msg = "failed to switch RSS hash QP to INIT state"; goto error; } - ret = ibv_modify_qp + ret = mlx4_glue->modify_qp (rss->qp, &(struct ibv_qp_attr){ .qp_state = IBV_QPS_RTR, @@ -288,11 +261,11 @@ mlx4_rss_attach(struct mlx4_rss *rss) return 0; error: if (rss->qp) { - claim_zero(ibv_destroy_qp(rss->qp)); + claim_zero(mlx4_glue->destroy_qp(rss->qp)); rss->qp = NULL; } if (rss->ind) { - claim_zero(ibv_destroy_rwq_ind_table(rss->ind)); + claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind)); rss->ind = NULL; } while (i--) @@ -325,9 +298,9 @@ mlx4_rss_detach(struct mlx4_rss *rss) assert(rss->ind); if (--rss->usecnt) return; - claim_zero(ibv_destroy_qp(rss->qp)); + claim_zero(mlx4_glue->destroy_qp(rss->qp)); rss->qp = NULL; - claim_zero(ibv_destroy_rwq_ind_table(rss->ind)); + claim_zero(mlx4_glue->destroy_rwq_ind_table(rss->ind)); rss->ind = NULL; for (i = 0; i != rss->queues; ++i) mlx4_rxq_detach(priv->dev->data->rx_queues[rss->queue_id[i]]); @@ -364,9 +337,10 @@ mlx4_rss_init(struct priv *priv) int ret; /* Prepare range for RSS contexts before creating the first WQ. */ - ret = mlx4dv_set_context_attr(priv->ctx, - MLX4DV_SET_CTX_ATTR_LOG_WQS_RANGE_SZ, - &log2_range); + ret = mlx4_glue->dv_set_context_attr + (priv->ctx, + MLX4DV_SET_CTX_ATTR_LOG_WQS_RANGE_SZ, + &log2_range); if (ret) { ERROR("cannot set up range size for RSS context to %u" " (for %u Rx queues), error: %s", @@ -402,13 +376,13 @@ mlx4_rss_init(struct priv *priv) * sequentially and are guaranteed to never be reused in the * same context by the underlying implementation. */ - cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0); + cq = mlx4_glue->create_cq(priv->ctx, 1, NULL, NULL, 0); if (!cq) { ret = ENOMEM; msg = "placeholder CQ creation failure"; goto error; } - wq = ibv_create_wq + wq = mlx4_glue->create_wq (priv->ctx, &(struct ibv_wq_init_attr){ .wq_type = IBV_WQT_RQ, @@ -419,11 +393,11 @@ mlx4_rss_init(struct priv *priv) }); if (wq) { wq_num = wq->wq_num; - claim_zero(ibv_destroy_wq(wq)); + claim_zero(mlx4_glue->destroy_wq(wq)); } else { wq_num = 0; /* Shut up GCC 4.8 warnings. */ } - claim_zero(ibv_destroy_cq(cq)); + claim_zero(mlx4_glue->destroy_cq(cq)); if (!wq) { ret = ENOMEM; msg = "placeholder WQ creation failure"; @@ -522,13 +496,14 @@ mlx4_rxq_attach(struct rxq *rxq) int ret; assert(rte_is_power_of_2(elts_n)); - cq = ibv_create_cq(priv->ctx, elts_n / sges_n, NULL, rxq->channel, 0); + cq = mlx4_glue->create_cq(priv->ctx, elts_n / sges_n, NULL, + rxq->channel, 0); if (!cq) { ret = ENOMEM; msg = "CQ creation failure"; goto error; } - wq = ibv_create_wq + wq = mlx4_glue->create_wq (priv->ctx, &(struct ibv_wq_init_attr){ .wq_type = IBV_WQT_RQ, @@ -542,7 +517,7 @@ mlx4_rxq_attach(struct rxq *rxq) msg = "WQ creation failure"; goto error; } - ret = ibv_modify_wq + ret = mlx4_glue->modify_wq (wq, &(struct ibv_wq_attr){ .attr_mask = IBV_WQ_ATTR_STATE, @@ -557,7 +532,7 @@ mlx4_rxq_attach(struct rxq *rxq) mlxdv.cq.out = &dv_cq; mlxdv.rwq.in = wq; mlxdv.rwq.out = &dv_rwq; - ret = mlx4dv_init_obj(&mlxdv, MLX4DV_OBJ_RWQ | MLX4DV_OBJ_CQ); + ret = mlx4_glue->dv_init_obj(&mlxdv, MLX4DV_OBJ_RWQ | MLX4DV_OBJ_CQ); if (ret) { msg = "failed to obtain device information from WQ/CQ objects"; goto error; @@ -619,9 +594,9 @@ mlx4_rxq_attach(struct rxq *rxq) return 0; error: if (wq) - claim_zero(ibv_destroy_wq(wq)); + claim_zero(mlx4_glue->destroy_wq(wq)); if (cq) - claim_zero(ibv_destroy_cq(cq)); + claim_zero(mlx4_glue->destroy_cq(cq)); rte_errno = ret; ERROR("error while attaching Rx queue %p: %s: %s", (void *)rxq, msg, strerror(ret)); @@ -649,9 +624,9 @@ mlx4_rxq_detach(struct rxq *rxq) memset(&rxq->mcq, 0, sizeof(rxq->mcq)); rxq->rq_db = NULL; rxq->wqes = NULL; - claim_zero(ibv_destroy_wq(rxq->wq)); + claim_zero(mlx4_glue->destroy_wq(rxq->wq)); rxq->wq = NULL; - claim_zero(ibv_destroy_cq(rxq->cq)); + claim_zero(mlx4_glue->destroy_cq(rxq->cq)); rxq->cq = NULL; DEBUG("%p: freeing Rx queue elements", (void *)rxq); for (i = 0; (i != RTE_DIM(*elts)); ++i) { @@ -663,6 +638,64 @@ mlx4_rxq_detach(struct rxq *rxq) } /** + * Returns the per-queue supported offloads. + * + * @param priv + * Pointer to private structure. + * + * @return + * Supported Tx offloads. + */ +uint64_t +mlx4_get_rx_queue_offloads(struct priv *priv) +{ + uint64_t offloads = DEV_RX_OFFLOAD_SCATTER | + DEV_RX_OFFLOAD_CRC_STRIP; + + if (priv->hw_csum) + offloads |= DEV_RX_OFFLOAD_CHECKSUM; + return offloads; +} + +/** + * Returns the per-port supported offloads. + * + * @param priv + * Pointer to private structure. + * + * @return + * Supported Rx offloads. + */ +uint64_t +mlx4_get_rx_port_offloads(struct priv *priv) +{ + uint64_t offloads = DEV_RX_OFFLOAD_VLAN_FILTER; + + (void)priv; + return offloads; +} + +/** + * Checks if the per-queue offload configuration is valid. + * + * @param priv + * Pointer to private structure. + * @param requested + * Per-queue offloads configuration. + * + * @return + * Nonzero when configuration is valid. + */ +static int +mlx4_check_rx_queue_offloads(struct priv *priv, uint64_t requested) +{ + uint64_t mandatory = priv->dev->data->dev_conf.rxmode.offloads; + uint64_t supported = mlx4_get_rx_port_offloads(priv); + + return !((mandatory ^ requested) & supported); +} + +/** * DPDK callback to configure a Rx queue. * * @param dev @@ -707,6 +740,16 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, (void)conf; /* Thresholds configuration (ignored). */ DEBUG("%p: configuring queue %u for %u descriptors", (void *)dev, idx, desc); + if (!mlx4_check_rx_queue_offloads(priv, conf->offloads)) { + rte_errno = ENOTSUP; + ERROR("%p: Rx queue offloads 0x%" PRIx64 " don't match port " + "offloads 0x%" PRIx64 " or supported offloads 0x%" PRIx64, + (void *)dev, conf->offloads, + dev->data->dev_conf.rxmode.offloads, + (mlx4_get_rx_port_offloads(priv) | + mlx4_get_rx_queue_offloads(priv))); + return -rte_errno; + } if (idx >= dev->data->nb_rx_queues) { rte_errno = EOVERFLOW; ERROR("%p: queue index out of range (%u >= %u)", @@ -746,10 +789,10 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, .elts_n = rte_log2_u32(desc), .elts = elts, /* Toggle Rx checksum offload if hardware supports it. */ - .csum = (priv->hw_csum && - dev->data->dev_conf.rxmode.hw_ip_checksum), - .csum_l2tun = (priv->hw_csum_l2tun && - dev->data->dev_conf.rxmode.hw_ip_checksum), + .csum = priv->hw_csum && + (conf->offloads & DEV_RX_OFFLOAD_CHECKSUM), + .csum_l2tun = priv->hw_csum_l2tun && + (conf->offloads & DEV_RX_OFFLOAD_CHECKSUM), .l2tun_offload = priv->hw_csum_l2tun, .stats = { .idx = idx, @@ -761,7 +804,7 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, if (dev->data->dev_conf.rxmode.max_rx_pkt_len <= (mb_len - RTE_PKTMBUF_HEADROOM)) { ; - } else if (dev->data->dev_conf.rxmode.enable_scatter) { + } else if (conf->offloads & DEV_RX_OFFLOAD_SCATTER) { uint32_t size = RTE_PKTMBUF_HEADROOM + dev->data->dev_conf.rxmode.max_rx_pkt_len; @@ -812,7 +855,7 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, goto error; } if (dev->data->dev_conf.intr_conf.rxq) { - rxq->channel = ibv_create_comp_channel(priv->ctx); + rxq->channel = mlx4_glue->create_comp_channel(priv->ctx); if (rxq->channel == NULL) { rte_errno = ENOMEM; ERROR("%p: Rx interrupt completion channel creation" @@ -867,7 +910,7 @@ mlx4_rx_queue_release(void *dpdk_rxq) assert(!rxq->wqes); assert(!rxq->rq_db); if (rxq->channel) - claim_zero(ibv_destroy_comp_channel(rxq->channel)); + claim_zero(mlx4_glue->destroy_comp_channel(rxq->channel)); if (rxq->mr) mlx4_mr_put(rxq->mr); rte_free(rxq); diff --git a/drivers/net/mlx4/mlx4_rxtx.c b/drivers/net/mlx4/mlx4_rxtx.c index 2bfa8b1b..8ca8b77c 100644 --- a/drivers/net/mlx4/mlx4_rxtx.c +++ b/drivers/net/mlx4/mlx4_rxtx.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox */ /** @@ -61,9 +33,6 @@ #include "mlx4_rxtx.h" #include "mlx4_utils.h" -#define WQE_ONE_DATA_SEG_SIZE \ - (sizeof(struct mlx4_wqe_ctrl_seg) + sizeof(struct mlx4_wqe_data_seg)) - /** * Pointer-value pair structure used in tx_post_send for saving the first * DWORD (32 byte) of a TXBB. @@ -88,7 +57,8 @@ uint32_t mlx4_ptype_table[0x100] __rte_cache_aligned = { * giving a total of up to 256 entries. */ [0x00] = RTE_PTYPE_L2_ETHER, - [0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, + [0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG, [0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, [0x03] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | @@ -261,59 +231,48 @@ uint32_t mlx4_ptype_table[0x100] __rte_cache_aligned = { }; /** - * Stamp a WQE so it won't be reused by the HW. + * Stamp TXBB burst so it won't be reused by the HW. * * Routine is used when freeing WQE used by the chip or when failing * building an WQ entry has failed leaving partial information on the queue. * * @param sq * Pointer to the SQ structure. - * @param index - * Index of the freed WQE. - * @param num_txbbs - * Number of blocks to stamp. - * If < 0 the routine will use the size written in the WQ entry. - * @param owner - * The value of the WQE owner bit to use in the stamp. + * @param start + * Pointer to the first TXBB to stamp. + * @param end + * Pointer to the followed end TXBB to stamp. * * @return - * The number of Tx basic blocs (TXBB) the WQE contained. + * Stamping burst size in byte units. */ -static int -mlx4_txq_stamp_freed_wqe(struct mlx4_sq *sq, uint16_t index, uint8_t owner) +static uint32_t +mlx4_txq_stamp_freed_wqe(struct mlx4_sq *sq, volatile uint32_t *start, + volatile uint32_t *end) { - uint32_t stamp = rte_cpu_to_be_32(MLX4_SQ_STAMP_VAL | - (!!owner << MLX4_SQ_STAMP_SHIFT)); - volatile uint8_t *wqe = mlx4_get_send_wqe(sq, - (index & sq->txbb_cnt_mask)); - volatile uint32_t *ptr = (volatile uint32_t *)wqe; - int i; - int txbbs_size; - int num_txbbs; + uint32_t stamp = sq->stamp; + int32_t size = (intptr_t)end - (intptr_t)start; - /* Extract the size from the control segment of the WQE. */ - num_txbbs = MLX4_SIZE_TO_TXBBS((((volatile struct mlx4_wqe_ctrl_seg *) - wqe)->fence_size & 0x3f) << 4); - txbbs_size = num_txbbs * MLX4_TXBB_SIZE; - /* Optimize the common case when there is no wrap-around. */ - if (wqe + txbbs_size <= sq->eob) { - /* Stamp the freed descriptor. */ - for (i = 0; i < txbbs_size; i += MLX4_SQ_STAMP_STRIDE) { - *ptr = stamp; - ptr += MLX4_SQ_STAMP_DWORDS; - } - } else { - /* Stamp the freed descriptor. */ - for (i = 0; i < txbbs_size; i += MLX4_SQ_STAMP_STRIDE) { - *ptr = stamp; - ptr += MLX4_SQ_STAMP_DWORDS; - if ((volatile uint8_t *)ptr >= sq->eob) { - ptr = (volatile uint32_t *)sq->buf; - stamp ^= RTE_BE32(0x80000000); - } - } + assert(start != end); + /* Hold SQ ring wrap around. */ + if (size < 0) { + size = (int32_t)sq->size + size; + do { + *start = stamp; + start += MLX4_SQ_STAMP_DWORDS; + } while (start != (volatile uint32_t *)sq->eob); + start = (volatile uint32_t *)sq->buf; + /* Flip invalid stamping ownership. */ + stamp ^= RTE_BE32(0x1 << MLX4_SQ_OWNER_BIT); + sq->stamp = stamp; + if (start == end) + return size; } - return num_txbbs; + do { + *start = stamp; + start += MLX4_SQ_STAMP_DWORDS; + } while (start != end); + return (uint32_t)size; } /** @@ -326,23 +285,21 @@ mlx4_txq_stamp_freed_wqe(struct mlx4_sq *sq, uint16_t index, uint8_t owner) * * @param txq * Pointer to Tx queue structure. - * - * @return - * 0 on success, -1 on failure. + * @param elts_m + * Tx elements number mask. + * @param sq + * Pointer to the SQ structure. */ -static int -mlx4_txq_complete(struct txq *txq, const unsigned int elts_n, - struct mlx4_sq *sq) +static void +mlx4_txq_complete(struct txq *txq, const unsigned int elts_m, + struct mlx4_sq *sq) { - unsigned int elts_comp = txq->elts_comp; unsigned int elts_tail = txq->elts_tail; - unsigned int sq_tail = sq->tail; struct mlx4_cq *cq = &txq->mcq; volatile struct mlx4_cqe *cqe; + uint32_t completed; uint32_t cons_index = cq->cons_index; - uint16_t new_index; - uint16_t nr_txbbs = 0; - int pkts = 0; + volatile uint32_t *first_txbb; /* * Traverse over all CQ entries reported and handle each WQ entry @@ -353,11 +310,11 @@ mlx4_txq_complete(struct txq *txq, const unsigned int elts_n, if (unlikely(!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ !!(cons_index & cq->cqe_cnt))) break; +#ifndef NDEBUG /* * Make sure we read the CQE after we read the ownership bit. */ rte_io_rmb(); -#ifndef NDEBUG if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR)) { volatile struct mlx4_err_cqe *cqe_err = @@ -366,41 +323,24 @@ mlx4_txq_complete(struct txq *txq, const unsigned int elts_n, " syndrome: 0x%x\n", (void *)txq, cqe_err->vendor_err, cqe_err->syndrome); + break; } #endif /* NDEBUG */ - /* Get WQE index reported in the CQE. */ - new_index = - rte_be_to_cpu_16(cqe->wqe_index) & sq->txbb_cnt_mask; - do { - /* Free next descriptor. */ - sq_tail += nr_txbbs; - nr_txbbs = - mlx4_txq_stamp_freed_wqe(sq, - sq_tail & sq->txbb_cnt_mask, - !!(sq_tail & sq->txbb_cnt)); - pkts++; - } while ((sq_tail & sq->txbb_cnt_mask) != new_index); cons_index++; } while (1); - if (unlikely(pkts == 0)) - return 0; - /* Update CQ. */ + completed = (cons_index - cq->cons_index) * txq->elts_comp_cd_init; + if (unlikely(!completed)) + return; + /* First stamping address is the end of the last one. */ + first_txbb = (&(*txq->elts)[elts_tail & elts_m])->eocb; + elts_tail += completed; + /* The new tail element holds the end address. */ + sq->remain_size += mlx4_txq_stamp_freed_wqe(sq, first_txbb, + (&(*txq->elts)[elts_tail & elts_m])->eocb); + /* Update CQ consumer index. */ cq->cons_index = cons_index; - *cq->set_ci_db = rte_cpu_to_be_32(cq->cons_index & MLX4_CQ_DB_CI_MASK); - sq->tail = sq_tail + nr_txbbs; - /* Update the list of packets posted for transmission. */ - elts_comp -= pkts; - assert(elts_comp <= txq->elts_comp); - /* - * Assume completion status is successful as nothing can be done about - * it anyway. - */ - elts_tail += pkts; - if (elts_tail >= elts_n) - elts_tail -= elts_n; + *cq->set_ci_db = rte_cpu_to_be_32(cons_index & MLX4_CQ_DB_CI_MASK); txq->elts_tail = elts_tail; - txq->elts_comp = elts_comp; - return 0; } /** @@ -421,110 +361,166 @@ mlx4_txq_mb2mp(struct rte_mbuf *buf) return buf->pool; } -static int +/** + * Write Tx data segment to the SQ. + * + * @param dseg + * Pointer to data segment in SQ. + * @param lkey + * Memory region lkey. + * @param addr + * Data address. + * @param byte_count + * Big endian bytes count of the data to send. + */ +static inline void +mlx4_fill_tx_data_seg(volatile struct mlx4_wqe_data_seg *dseg, + uint32_t lkey, uintptr_t addr, rte_be32_t byte_count) +{ + dseg->addr = rte_cpu_to_be_64(addr); + dseg->lkey = rte_cpu_to_be_32(lkey); +#if RTE_CACHE_LINE_SIZE < 64 + /* + * Need a barrier here before writing the byte_count + * fields to make sure that all the data is visible + * before the byte_count field is set. + * Otherwise, if the segment begins a new cacheline, + * the HCA prefetcher could grab the 64-byte chunk and + * get a valid (!= 0xffffffff) byte count but stale + * data, and end up sending the wrong data. + */ + rte_io_wmb(); +#endif /* RTE_CACHE_LINE_SIZE */ + dseg->byte_count = byte_count; +} + +/** + * Write data segments of multi-segment packet. + * + * @param buf + * Pointer to the first packet mbuf. + * @param txq + * Pointer to Tx queue structure. + * @param ctrl + * Pointer to the WQE control segment. + * + * @return + * Pointer to the next WQE control segment on success, NULL otherwise. + */ +static volatile struct mlx4_wqe_ctrl_seg * mlx4_tx_burst_segs(struct rte_mbuf *buf, struct txq *txq, - volatile struct mlx4_wqe_ctrl_seg **pctrl) + volatile struct mlx4_wqe_ctrl_seg *ctrl) { - int wqe_real_size; - int nr_txbbs; struct pv *pv = (struct pv *)txq->bounce_buf; struct mlx4_sq *sq = &txq->msq; - uint32_t head_idx = sq->head & sq->txbb_cnt_mask; - volatile struct mlx4_wqe_ctrl_seg *ctrl; - volatile struct mlx4_wqe_data_seg *dseg; - struct rte_mbuf *sbuf; + struct rte_mbuf *sbuf = buf; uint32_t lkey; - uintptr_t addr; - uint32_t byte_count; int pv_counter = 0; + int nb_segs = buf->nb_segs; + uint32_t wqe_size; + volatile struct mlx4_wqe_data_seg *dseg = + (volatile struct mlx4_wqe_data_seg *)(ctrl + 1); - /* Calculate the needed work queue entry size for this packet. */ - wqe_real_size = sizeof(volatile struct mlx4_wqe_ctrl_seg) + - buf->nb_segs * sizeof(volatile struct mlx4_wqe_data_seg); - nr_txbbs = MLX4_SIZE_TO_TXBBS(wqe_real_size); + ctrl->fence_size = 1 + nb_segs; + wqe_size = RTE_ALIGN((uint32_t)(ctrl->fence_size << MLX4_SEG_SHIFT), + MLX4_TXBB_SIZE); + /* Validate WQE size and WQE space in the send queue. */ + if (sq->remain_size < wqe_size || + wqe_size > MLX4_MAX_WQE_SIZE) + return NULL; /* - * Check that there is room for this WQE in the send queue and that - * the WQE size is legal. + * Fill the data segments with buffer information. + * First WQE TXBB head segment is always control segment, + * so jump to tail TXBB data segments code for the first + * WQE data segments filling. */ - if (((sq->head - sq->tail) + nr_txbbs + - sq->headroom_txbbs) >= sq->txbb_cnt || - nr_txbbs > MLX4_MAX_WQE_TXBBS) { - return -1; + goto txbb_tail_segs; +txbb_head_seg: + /* Memory region key (big endian) for this memory pool. */ + lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(sbuf)); + if (unlikely(lkey == (uint32_t)-1)) { + DEBUG("%p: unable to get MP <-> MR association", + (void *)txq); + return NULL; } - /* Get the control and data entries of the WQE. */ - ctrl = (volatile struct mlx4_wqe_ctrl_seg *) - mlx4_get_send_wqe(sq, head_idx); - dseg = (volatile struct mlx4_wqe_data_seg *) - ((uintptr_t)ctrl + sizeof(struct mlx4_wqe_ctrl_seg)); - *pctrl = ctrl; - /* Fill the data segments with buffer information. */ - for (sbuf = buf; sbuf != NULL; sbuf = sbuf->next, dseg++) { - addr = rte_pktmbuf_mtod(sbuf, uintptr_t); - rte_prefetch0((volatile void *)addr); - /* Handle WQE wraparound. */ - if (dseg >= (volatile struct mlx4_wqe_data_seg *)sq->eob) - dseg = (volatile struct mlx4_wqe_data_seg *)sq->buf; - dseg->addr = rte_cpu_to_be_64(addr); - /* Memory region key (big endian) for this memory pool. */ + /* Handle WQE wraparound. */ + if (dseg >= + (volatile struct mlx4_wqe_data_seg *)sq->eob) + dseg = (volatile struct mlx4_wqe_data_seg *) + sq->buf; + dseg->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(sbuf, uintptr_t)); + dseg->lkey = rte_cpu_to_be_32(lkey); + /* + * This data segment starts at the beginning of a new + * TXBB, so we need to postpone its byte_count writing + * for later. + */ + pv[pv_counter].dseg = dseg; + /* + * Zero length segment is treated as inline segment + * with zero data. + */ + pv[pv_counter++].val = rte_cpu_to_be_32(sbuf->data_len ? + sbuf->data_len : 0x80000000); + sbuf = sbuf->next; + dseg++; + nb_segs--; +txbb_tail_segs: + /* Jump to default if there are more than two segments remaining. */ + switch (nb_segs) { + default: lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(sbuf)); - dseg->lkey = rte_cpu_to_be_32(lkey); -#ifndef NDEBUG - /* Calculate the needed work queue entry size for this packet */ - if (unlikely(dseg->lkey == rte_cpu_to_be_32((uint32_t)-1))) { - /* MR does not exist. */ + if (unlikely(lkey == (uint32_t)-1)) { DEBUG("%p: unable to get MP <-> MR association", - (void *)txq); - /* - * Restamp entry in case of failure. - * Make sure that size is written correctly - * Note that we give ownership to the SW, not the HW. - */ - wqe_real_size = sizeof(struct mlx4_wqe_ctrl_seg) + - buf->nb_segs * sizeof(struct mlx4_wqe_data_seg); - ctrl->fence_size = (wqe_real_size >> 4) & 0x3f; - mlx4_txq_stamp_freed_wqe(sq, head_idx, - (sq->head & sq->txbb_cnt) ? 0 : 1); - return -1; + (void *)txq); + return NULL; } -#endif /* NDEBUG */ - if (likely(sbuf->data_len)) { - byte_count = rte_cpu_to_be_32(sbuf->data_len); - } else { - /* - * Zero length segment is treated as inline segment - * with zero data. - */ - byte_count = RTE_BE32(0x80000000); + mlx4_fill_tx_data_seg(dseg, lkey, + rte_pktmbuf_mtod(sbuf, uintptr_t), + rte_cpu_to_be_32(sbuf->data_len ? + sbuf->data_len : + 0x80000000)); + sbuf = sbuf->next; + dseg++; + nb_segs--; + /* fallthrough */ + case 2: + lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(sbuf)); + if (unlikely(lkey == (uint32_t)-1)) { + DEBUG("%p: unable to get MP <-> MR association", + (void *)txq); + return NULL; } - /* - * If the data segment is not at the beginning of a - * Tx basic block (TXBB) then write the byte count, - * else postpone the writing to just before updating the - * control segment. - */ - if ((uintptr_t)dseg & (uintptr_t)(MLX4_TXBB_SIZE - 1)) { -#if RTE_CACHE_LINE_SIZE < 64 - /* - * Need a barrier here before writing the byte_count - * fields to make sure that all the data is visible - * before the byte_count field is set. - * Otherwise, if the segment begins a new cacheline, - * the HCA prefetcher could grab the 64-byte chunk and - * get a valid (!= 0xffffffff) byte count but stale - * data, and end up sending the wrong data. - */ - rte_io_wmb(); -#endif /* RTE_CACHE_LINE_SIZE */ - dseg->byte_count = byte_count; - } else { - /* - * This data segment starts at the beginning of a new - * TXBB, so we need to postpone its byte_count writing - * for later. - */ - pv[pv_counter].dseg = dseg; - pv[pv_counter++].val = byte_count; + mlx4_fill_tx_data_seg(dseg, lkey, + rte_pktmbuf_mtod(sbuf, uintptr_t), + rte_cpu_to_be_32(sbuf->data_len ? + sbuf->data_len : + 0x80000000)); + sbuf = sbuf->next; + dseg++; + nb_segs--; + /* fallthrough */ + case 1: + lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(sbuf)); + if (unlikely(lkey == (uint32_t)-1)) { + DEBUG("%p: unable to get MP <-> MR association", + (void *)txq); + return NULL; } + mlx4_fill_tx_data_seg(dseg, lkey, + rte_pktmbuf_mtod(sbuf, uintptr_t), + rte_cpu_to_be_32(sbuf->data_len ? + sbuf->data_len : + 0x80000000)); + nb_segs--; + if (nb_segs) { + sbuf = sbuf->next; + dseg++; + goto txbb_head_seg; + } + /* fallthrough */ + case 0: + break; } /* Write the first DWORD of each TXBB save earlier. */ if (pv_counter) { @@ -533,9 +529,10 @@ mlx4_tx_burst_segs(struct rte_mbuf *buf, struct txq *txq, for (--pv_counter; pv_counter >= 0; pv_counter--) pv[pv_counter].dseg->byte_count = pv[pv_counter].val; } - /* Fill the control parameters for this packet. */ - ctrl->fence_size = (wqe_real_size >> 4) & 0x3f; - return nr_txbbs; + sq->remain_size -= wqe_size; + /* Align next WQE address to the next TXBB. */ + return (volatile struct mlx4_wqe_ctrl_seg *) + ((volatile uint8_t *)ctrl + wqe_size); } /** @@ -557,40 +554,39 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) struct txq *txq = (struct txq *)dpdk_txq; unsigned int elts_head = txq->elts_head; const unsigned int elts_n = txq->elts_n; + const unsigned int elts_m = elts_n - 1; unsigned int bytes_sent = 0; unsigned int i; - unsigned int max; + unsigned int max = elts_head - txq->elts_tail; struct mlx4_sq *sq = &txq->msq; - int nr_txbbs; + volatile struct mlx4_wqe_ctrl_seg *ctrl; + struct txq_elt *elt; assert(txq->elts_comp_cd != 0); - if (likely(txq->elts_comp != 0)) - mlx4_txq_complete(txq, elts_n, sq); - max = (elts_n - (elts_head - txq->elts_tail)); - if (max > elts_n) - max -= elts_n; + if (likely(max >= txq->elts_comp_cd_init)) + mlx4_txq_complete(txq, elts_m, sq); + max = elts_n - max; assert(max >= 1); assert(max <= elts_n); /* Always leave one free entry in the ring. */ --max; if (max > pkts_n) max = pkts_n; + elt = &(*txq->elts)[elts_head & elts_m]; + /* First Tx burst element saves the next WQE control segment. */ + ctrl = elt->wqe; for (i = 0; (i != max); ++i) { struct rte_mbuf *buf = pkts[i]; - unsigned int elts_head_next = - (((elts_head + 1) == elts_n) ? 0 : elts_head + 1); - struct txq_elt *elt_next = &(*txq->elts)[elts_head_next]; - struct txq_elt *elt = &(*txq->elts)[elts_head]; - uint32_t owner_opcode = MLX4_OPCODE_SEND; - volatile struct mlx4_wqe_ctrl_seg *ctrl; - volatile struct mlx4_wqe_data_seg *dseg; + struct txq_elt *elt_next = &(*txq->elts)[++elts_head & elts_m]; + uint32_t owner_opcode = sq->owner_opcode; + volatile struct mlx4_wqe_data_seg *dseg = + (volatile struct mlx4_wqe_data_seg *)(ctrl + 1); + volatile struct mlx4_wqe_ctrl_seg *ctrl_next; union { uint32_t flags; uint16_t flags16[2]; } srcrb; - uint32_t head_idx = sq->head & sq->txbb_cnt_mask; uint32_t lkey; - uintptr_t addr; /* Clean up old buffer. */ if (likely(elt->buf != NULL)) { @@ -598,7 +594,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) #ifndef NDEBUG /* Poisoning. */ - memset(elt, 0x66, sizeof(*elt)); + memset(&elt->buf, 0x66, sizeof(struct rte_mbuf *)); #endif /* Faster than rte_pktmbuf_free(). */ do { @@ -610,70 +606,48 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) } RTE_MBUF_PREFETCH_TO_FREE(elt_next->buf); if (buf->nb_segs == 1) { - /* - * Check that there is room for this WQE in the send - * queue and that the WQE size is legal - */ - if (((sq->head - sq->tail) + 1 + sq->headroom_txbbs) >= - sq->txbb_cnt || 1 > MLX4_MAX_WQE_TXBBS) { + /* Validate WQE space in the send queue. */ + if (sq->remain_size < MLX4_TXBB_SIZE) { elt->buf = NULL; break; } - /* Get the control and data entries of the WQE. */ - ctrl = (volatile struct mlx4_wqe_ctrl_seg *) - mlx4_get_send_wqe(sq, head_idx); - dseg = (volatile struct mlx4_wqe_data_seg *) - ((uintptr_t)ctrl + - sizeof(struct mlx4_wqe_ctrl_seg)); - addr = rte_pktmbuf_mtod(buf, uintptr_t); - rte_prefetch0((volatile void *)addr); - /* Handle WQE wraparound. */ - if (dseg >= - (volatile struct mlx4_wqe_data_seg *)sq->eob) - dseg = (volatile struct mlx4_wqe_data_seg *) - sq->buf; - dseg->addr = rte_cpu_to_be_64(addr); - /* Memory region key (big endian). */ lkey = mlx4_txq_mp2mr(txq, mlx4_txq_mb2mp(buf)); - dseg->lkey = rte_cpu_to_be_32(lkey); -#ifndef NDEBUG - if (unlikely(dseg->lkey == - rte_cpu_to_be_32((uint32_t)-1))) { + if (unlikely(lkey == (uint32_t)-1)) { /* MR does not exist. */ DEBUG("%p: unable to get MP <-> MR association", (void *)txq); - /* - * Restamp entry in case of failure. - * Make sure that size is written correctly - * Note that we give ownership to the SW, - * not the HW. - */ - ctrl->fence_size = - (WQE_ONE_DATA_SEG_SIZE >> 4) & 0x3f; - mlx4_txq_stamp_freed_wqe(sq, head_idx, - (sq->head & sq->txbb_cnt) ? 0 : 1); elt->buf = NULL; break; } -#endif /* NDEBUG */ - /* Never be TXBB aligned, no need compiler barrier. */ - dseg->byte_count = rte_cpu_to_be_32(buf->data_len); - /* Fill the control parameters for this packet. */ - ctrl->fence_size = (WQE_ONE_DATA_SEG_SIZE >> 4) & 0x3f; - nr_txbbs = 1; + mlx4_fill_tx_data_seg(dseg++, lkey, + rte_pktmbuf_mtod(buf, uintptr_t), + rte_cpu_to_be_32(buf->data_len)); + /* Set WQE size in 16-byte units. */ + ctrl->fence_size = 0x2; + sq->remain_size -= MLX4_TXBB_SIZE; + /* Align next WQE address to the next TXBB. */ + ctrl_next = ctrl + 0x4; } else { - nr_txbbs = mlx4_tx_burst_segs(buf, txq, &ctrl); - if (nr_txbbs < 0) { + ctrl_next = mlx4_tx_burst_segs(buf, txq, ctrl); + if (!ctrl_next) { elt->buf = NULL; break; } } + /* Hold SQ ring wrap around. */ + if ((volatile uint8_t *)ctrl_next >= sq->eob) { + ctrl_next = (volatile struct mlx4_wqe_ctrl_seg *) + ((volatile uint8_t *)ctrl_next - sq->size); + /* Flip HW valid ownership. */ + sq->owner_opcode ^= 0x1 << MLX4_SQ_OWNER_BIT; + } /* * For raw Ethernet, the SOLICIT flag is used to indicate * that no ICRC should be calculated. */ - txq->elts_comp_cd -= nr_txbbs; - if (unlikely(txq->elts_comp_cd <= 0)) { + if (--txq->elts_comp_cd == 0) { + /* Save the completion burst end address. */ + elt_next->eocb = (volatile uint32_t *)ctrl_next; txq->elts_comp_cd = txq->elts_comp_cd_init; srcrb.flags = RTE_BE32(MLX4_WQE_CTRL_SOLICIT | MLX4_WQE_CTRL_CQ_UPDATE); @@ -719,17 +693,17 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) * executing as soon as we do). */ rte_io_wmb(); - ctrl->owner_opcode = rte_cpu_to_be_32(owner_opcode | - ((sq->head & sq->txbb_cnt) ? - MLX4_BIT_WQE_OWN : 0)); - sq->head += nr_txbbs; + ctrl->owner_opcode = rte_cpu_to_be_32(owner_opcode); elt->buf = buf; bytes_sent += buf->pkt_len; - elts_head = elts_head_next; + ctrl = ctrl_next; + elt = elt_next; } /* Take a shortcut if nothing must be sent. */ if (unlikely(i == 0)) return 0; + /* Save WQE address of the next Tx burst element. */ + elt->wqe = ctrl; /* Increment send statistics counters. */ txq->stats.opackets += i; txq->stats.obytes += bytes_sent; @@ -737,8 +711,7 @@ mlx4_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) rte_wmb(); /* Ring QP doorbell. */ rte_write32(txq->msq.doorbell_qpn, txq->msq.db); - txq->elts_head = elts_head; - txq->elts_comp += i; + txq->elts_head += i; return i; } @@ -964,7 +937,8 @@ mlx4_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Update packet information. */ pkt->packet_type = rxq_cq_to_pkt_type(cqe, rxq->l2tun_offload); - pkt->ol_flags = 0; + pkt->ol_flags = PKT_RX_RSS_HASH; + pkt->hash.rss = cqe->immed_rss_invalid; pkt->pkt_len = len; if (rxq->csum | rxq->csum_l2tun) { uint32_t flags = diff --git a/drivers/net/mlx4/mlx4_rxtx.h b/drivers/net/mlx4/mlx4_rxtx.h index 463df2b0..c12bd39a 100644 --- a/drivers/net/mlx4/mlx4_rxtx.h +++ b/drivers/net/mlx4/mlx4_rxtx.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox */ #ifndef MLX4_RXTX_H_ @@ -47,7 +19,7 @@ #pragma GCC diagnostic error "-Wpedantic" #endif -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_mbuf.h> #include <rte_mempool.h> @@ -105,6 +77,10 @@ struct mlx4_rss { /** Tx element. */ struct txq_elt { struct rte_mbuf *buf; /**< Buffer. */ + union { + volatile struct mlx4_wqe_ctrl_seg *wqe; /**< SQ WQE. */ + volatile uint32_t *eocb; /**< End of completion burst. */ + }; }; /** Rx queue counters. */ @@ -121,7 +97,6 @@ struct txq { struct mlx4_cq mcq; /**< Info for directly manipulating the CQ. */ unsigned int elts_head; /**< Current index in (*elts)[]. */ unsigned int elts_tail; /**< First element awaiting completion. */ - unsigned int elts_comp; /**< Number of packets awaiting completion. */ int elts_comp_cd; /**< Countdown for next completion. */ unsigned int elts_comp_cd_init; /**< Initial value for countdown. */ unsigned int elts_n; /**< (*elts)[] length. */ @@ -158,6 +133,8 @@ int mlx4_rss_attach(struct mlx4_rss *rss); void mlx4_rss_detach(struct mlx4_rss *rss); int mlx4_rxq_attach(struct rxq *rxq); void mlx4_rxq_detach(struct rxq *rxq); +uint64_t mlx4_get_rx_port_offloads(struct priv *priv); +uint64_t mlx4_get_rx_queue_offloads(struct priv *priv); int mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, unsigned int socket, const struct rte_eth_rxconf *conf, @@ -177,6 +154,7 @@ uint16_t mlx4_rx_burst_removed(void *dpdk_rxq, struct rte_mbuf **pkts, /* mlx4_txq.c */ +uint64_t mlx4_get_tx_port_offloads(struct priv *priv); int mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, unsigned int socket, const struct rte_eth_txconf *conf); diff --git a/drivers/net/mlx4/mlx4_txq.c b/drivers/net/mlx4/mlx4_txq.c index 7882a4d0..071b2d5d 100644 --- a/drivers/net/mlx4/mlx4_txq.c +++ b/drivers/net/mlx4/mlx4_txq.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox */ /** @@ -41,6 +13,7 @@ #include <stddef.h> #include <stdint.h> #include <string.h> +#include <inttypes.h> /* Verbs headers do not support -pedantic. */ #ifdef PEDANTIC @@ -53,13 +26,13 @@ #include <rte_common.h> #include <rte_errno.h> -#include <rte_ethdev.h> +#include <rte_ethdev_driver.h> #include <rte_malloc.h> #include <rte_mbuf.h> #include <rte_mempool.h> #include "mlx4.h" -#include "mlx4_autoconf.h" +#include "mlx4_glue.h" #include "mlx4_prm.h" #include "mlx4_rxtx.h" #include "mlx4_utils.h" @@ -76,16 +49,16 @@ mlx4_txq_free_elts(struct txq *txq) unsigned int elts_head = txq->elts_head; unsigned int elts_tail = txq->elts_tail; struct txq_elt (*elts)[txq->elts_n] = txq->elts; + unsigned int elts_m = txq->elts_n - 1; DEBUG("%p: freeing WRs", (void *)txq); while (elts_tail != elts_head) { - struct txq_elt *elt = &(*elts)[elts_tail]; + struct txq_elt *elt = &(*elts)[elts_tail++ & elts_m]; assert(elt->buf != NULL); rte_pktmbuf_free(elt->buf); elt->buf = NULL; - if (++elts_tail == RTE_DIM(*elts)) - elts_tail = 0; + elt->wqe = NULL; } txq->elts_tail = txq->elts_head; } @@ -163,20 +136,19 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv) struct mlx4_cq *cq = &txq->mcq; struct mlx4dv_qp *dqp = mlxdv->qp.out; struct mlx4dv_cq *dcq = mlxdv->cq.out; - uint32_t sq_size = (uint32_t)dqp->rq.offset - (uint32_t)dqp->sq.offset; - sq->buf = (uint8_t *)dqp->buf.buf + dqp->sq.offset; /* Total length, including headroom and spare WQEs. */ - sq->eob = sq->buf + sq_size; - sq->head = 0; - sq->tail = 0; - sq->txbb_cnt = - (dqp->sq.wqe_cnt << dqp->sq.wqe_shift) >> MLX4_TXBB_SHIFT; - sq->txbb_cnt_mask = sq->txbb_cnt - 1; + sq->size = (uint32_t)dqp->rq.offset - (uint32_t)dqp->sq.offset; + sq->buf = (uint8_t *)dqp->buf.buf + dqp->sq.offset; + sq->eob = sq->buf + sq->size; + uint32_t headroom_size = 2048 + (1 << dqp->sq.wqe_shift); + /* Continuous headroom size bytes must always stay freed. */ + sq->remain_size = sq->size - headroom_size; + sq->owner_opcode = MLX4_OPCODE_SEND | (0 << MLX4_SQ_OWNER_BIT); + sq->stamp = rte_cpu_to_be_32(MLX4_SQ_STAMP_VAL | + (0 << MLX4_SQ_OWNER_BIT)); sq->db = dqp->sdb; sq->doorbell_qpn = dqp->doorbell_qpn; - sq->headroom_txbbs = - (2048 + (1 << dqp->sq.wqe_shift)) >> MLX4_TXBB_SHIFT; cq->buf = dcq->buf.buf; cq->cqe_cnt = dcq->cqe_cnt; cq->set_ci_db = dcq->set_ci_db; @@ -184,6 +156,50 @@ mlx4_txq_fill_dv_obj_info(struct txq *txq, struct mlx4dv_obj *mlxdv) } /** + * Returns the per-port supported offloads. + * + * @param priv + * Pointer to private structure. + * + * @return + * Supported Tx offloads. + */ +uint64_t +mlx4_get_tx_port_offloads(struct priv *priv) +{ + uint64_t offloads = DEV_TX_OFFLOAD_MULTI_SEGS; + + if (priv->hw_csum) { + offloads |= (DEV_TX_OFFLOAD_IPV4_CKSUM | + DEV_TX_OFFLOAD_UDP_CKSUM | + DEV_TX_OFFLOAD_TCP_CKSUM); + } + if (priv->hw_csum_l2tun) + offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM; + return offloads; +} + +/** + * Checks if the per-queue offload configuration is valid. + * + * @param priv + * Pointer to private structure. + * @param requested + * Per-queue offloads configuration. + * + * @return + * Nonzero when configuration is valid. + */ +static int +mlx4_check_tx_queue_offloads(struct priv *priv, uint64_t requested) +{ + uint64_t mandatory = priv->dev->data->dev_conf.txmode.offloads; + uint64_t supported = mlx4_get_tx_port_offloads(priv); + + return !((mandatory ^ requested) & supported); +} + +/** * DPDK callback to configure a Tx queue. * * @param dev @@ -208,7 +224,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, struct mlx4dv_obj mlxdv; struct mlx4dv_qp dv_qp; struct mlx4dv_cq dv_cq; - struct txq_elt (*elts)[desc]; + struct txq_elt (*elts)[rte_align32pow2(desc)]; struct ibv_qp_init_attr qp_init_attr; struct txq *txq; uint8_t *bounce_buf; @@ -231,9 +247,22 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, }; int ret; - (void)conf; /* Thresholds configuration (ignored). */ DEBUG("%p: configuring queue %u for %u descriptors", (void *)dev, idx, desc); + /* + * Don't verify port offloads for application which + * use the old API. + */ + if ((conf->txq_flags & ETH_TXQ_FLAGS_IGNORE) && + !mlx4_check_tx_queue_offloads(priv, conf->offloads)) { + rte_errno = ENOTSUP; + ERROR("%p: Tx queue offloads 0x%" PRIx64 " don't match port " + "offloads 0x%" PRIx64 " or supported offloads 0x%" PRIx64, + (void *)dev, conf->offloads, + dev->data->dev_conf.txmode.offloads, + mlx4_get_tx_port_offloads(priv)); + return -rte_errno; + } if (idx >= dev->data->nb_tx_queues) { rte_errno = EOVERFLOW; ERROR("%p: queue index out of range (%u >= %u)", @@ -252,6 +281,12 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, ERROR("%p: invalid number of Tx descriptors", (void *)dev); return -rte_errno; } + if (desc != RTE_DIM(*elts)) { + desc = RTE_DIM(*elts); + WARN("%p: increased number of descriptors in Tx queue %u" + " to the next power of two (%u)", + (void *)dev, idx, desc); + } /* Allocate and initialize Tx queue. */ mlx4_zmallocv_socket("TXQ", vec, RTE_DIM(vec), socket); if (!txq) { @@ -269,7 +304,6 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, .elts = elts, .elts_head = 0, .elts_tail = 0, - .elts_comp = 0, /* * Request send completion every MLX4_PMD_TX_PER_COMP_REQ * packets or at least 4 times per ring. @@ -278,13 +312,18 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, RTE_MIN(MLX4_PMD_TX_PER_COMP_REQ, desc / 4), .elts_comp_cd_init = RTE_MIN(MLX4_PMD_TX_PER_COMP_REQ, desc / 4), - .csum = priv->hw_csum, - .csum_l2tun = priv->hw_csum_l2tun, + .csum = priv->hw_csum && + (conf->offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | + DEV_TX_OFFLOAD_UDP_CKSUM | + DEV_TX_OFFLOAD_TCP_CKSUM)), + .csum_l2tun = priv->hw_csum_l2tun && + (conf->offloads & + DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM), /* Enable Tx loopback for VF devices. */ .lb = !!priv->vf, .bounce_buf = bounce_buf, }; - txq->cq = ibv_create_cq(priv->ctx, desc, NULL, NULL, 0); + txq->cq = mlx4_glue->create_cq(priv->ctx, desc, NULL, NULL, 0); if (!txq->cq) { rte_errno = ENOMEM; ERROR("%p: CQ creation failure: %s", @@ -304,7 +343,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, /* No completion events must occur by default. */ .sq_sig_all = 0, }; - txq->qp = ibv_create_qp(priv->pd, &qp_init_attr); + txq->qp = mlx4_glue->create_qp(priv->pd, &qp_init_attr); if (!txq->qp) { rte_errno = errno ? errno : EINVAL; ERROR("%p: QP creation failure: %s", @@ -312,7 +351,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, goto error; } txq->max_inline = qp_init_attr.cap.max_inline_data; - ret = ibv_modify_qp + ret = mlx4_glue->modify_qp (txq->qp, &(struct ibv_qp_attr){ .qp_state = IBV_QPS_INIT, @@ -325,7 +364,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, (void *)dev, strerror(rte_errno)); goto error; } - ret = ibv_modify_qp + ret = mlx4_glue->modify_qp (txq->qp, &(struct ibv_qp_attr){ .qp_state = IBV_QPS_RTR, @@ -337,7 +376,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, (void *)dev, strerror(rte_errno)); goto error; } - ret = ibv_modify_qp + ret = mlx4_glue->modify_qp (txq->qp, &(struct ibv_qp_attr){ .qp_state = IBV_QPS_RTS, @@ -354,7 +393,7 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, mlxdv.cq.out = &dv_cq; mlxdv.qp.in = txq->qp; mlxdv.qp.out = &dv_qp; - ret = mlx4dv_init_obj(&mlxdv, MLX4DV_OBJ_QP | MLX4DV_OBJ_CQ); + ret = mlx4_glue->dv_init_obj(&mlxdv, MLX4DV_OBJ_QP | MLX4DV_OBJ_CQ); if (ret) { rte_errno = EINVAL; ERROR("%p: failed to obtain information needed for" @@ -362,6 +401,9 @@ mlx4_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, goto error; } mlx4_txq_fill_dv_obj_info(txq, &mlxdv); + /* Save first wqe pointer in the first element. */ + (&(*txq->elts)[0])->wqe = + (volatile struct mlx4_wqe_ctrl_seg *)txq->msq.buf; /* Pre-register known mempools. */ rte_mempool_walk(mlx4_txq_mp2mr_iter, txq); DEBUG("%p: adding Tx queue %p to list", (void *)dev, (void *)txq); @@ -401,9 +443,9 @@ mlx4_tx_queue_release(void *dpdk_txq) } mlx4_txq_free_elts(txq); if (txq->qp) - claim_zero(ibv_destroy_qp(txq->qp)); + claim_zero(mlx4_glue->destroy_qp(txq->qp)); if (txq->cq) - claim_zero(ibv_destroy_cq(txq->cq)); + claim_zero(mlx4_glue->destroy_cq(txq->cq)); for (i = 0; i != RTE_DIM(txq->mp2mr); ++i) { if (!txq->mp2mr[i].mp) break; diff --git a/drivers/net/mlx4/mlx4_utils.c b/drivers/net/mlx4/mlx4_utils.c index f18c7145..d10812ec 100644 --- a/drivers/net/mlx4/mlx4_utils.c +++ b/drivers/net/mlx4/mlx4_utils.c @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox */ /** diff --git a/drivers/net/mlx4/mlx4_utils.h b/drivers/net/mlx4/mlx4_utils.h index dc529c9c..9fdbacad 100644 --- a/drivers/net/mlx4/mlx4_utils.h +++ b/drivers/net/mlx4/mlx4_utils.h @@ -1,34 +1,6 @@ -/*- - * BSD LICENSE - * - * Copyright 2017 6WIND S.A. - * Copyright 2017 Mellanox - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of 6WIND S.A. nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2017 6WIND S.A. + * Copyright 2017 Mellanox */ #ifndef MLX4_UTILS_H_ @@ -70,13 +42,7 @@ pmd_drv_log_basename(const char *s) __func__, \ RTE_FMT_TAIL(__VA_ARGS__,))) #define DEBUG(...) PMD_DRV_LOG(DEBUG, __VA_ARGS__) -#ifndef MLX4_PMD_DEBUG_BROKEN_VERBS #define claim_zero(...) assert((__VA_ARGS__) == 0) -#else /* MLX4_PMD_DEBUG_BROKEN_VERBS */ -#define claim_zero(...) \ - (void)(((__VA_ARGS__) == 0) || \ - DEBUG("Assertion `(" # __VA_ARGS__ ") == 0' failed (IGNORED).")) -#endif /* MLX4_PMD_DEBUG_BROKEN_VERBS */ #else /* NDEBUG */ |