diff options
Diffstat (limited to 'drivers/net/mlx5')
-rw-r--r-- | drivers/net/mlx5/mlx5.c | 525 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5.h | 267 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_defs.h | 14 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_ethdev.c | 1018 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_flow.c | 1147 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_mac.c | 49 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_mr.c | 197 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rss.c | 176 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxmode.c | 28 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxq.c | 712 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxtx.c | 116 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxtx.h | 179 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxtx_vec.c | 26 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxtx_vec_neon.h | 65 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_rxtx_vec_sse.h | 2 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_socket.c | 172 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_stats.c | 248 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_trigger.c | 258 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_txq.c | 379 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_utils.h | 29 | ||||
-rw-r--r-- | drivers/net/mlx5/mlx5_vlan.c | 106 |
21 files changed, 2893 insertions, 2820 deletions
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 45e0e8db..10ce3359 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -39,6 +39,7 @@ #include <stdlib.h> #include <errno.h> #include <net/if.h> +#include <sys/mman.h> /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ @@ -56,6 +57,7 @@ #include <rte_pci.h> #include <rte_bus_pci.h> #include <rte_common.h> +#include <rte_eal_memconfig.h> #include <rte_kvargs.h> #include "mlx5.h" @@ -117,6 +119,10 @@ struct mlx5_args { int tx_vec_en; int rx_vec_en; }; + +/** Driver-specific log messages type. */ +int mlx5_logtype; + /** * Retrieve integer value from environment variable. * @@ -148,7 +154,7 @@ mlx5_getenv_int(const char *name) * A pointer to the callback data. * * @return - * a pointer to the allocate space. + * Allocated buffer, NULL otherwise and rte_errno is set. */ static void * mlx5_alloc_verbs_buf(size_t size, void *data) @@ -156,11 +162,22 @@ mlx5_alloc_verbs_buf(size_t size, void *data) struct priv *priv = data; void *ret; size_t alignment = sysconf(_SC_PAGESIZE); + unsigned int socket = SOCKET_ID_ANY; + + if (priv->verbs_alloc_ctx.type == MLX5_VERBS_ALLOC_TYPE_TX_QUEUE) { + const struct mlx5_txq_ctrl *ctrl = priv->verbs_alloc_ctx.obj; + + socket = ctrl->socket; + } else if (priv->verbs_alloc_ctx.type == + MLX5_VERBS_ALLOC_TYPE_RX_QUEUE) { + const struct mlx5_rxq_ctrl *ctrl = priv->verbs_alloc_ctx.obj; + socket = ctrl->socket; + } assert(data != NULL); - ret = rte_malloc_socket(__func__, size, alignment, - priv->dev->device->numa_node); - DEBUG("Extern alloc size: %lu, align: %lu: %p", size, alignment, ret); + ret = rte_malloc_socket(__func__, size, alignment, socket); + if (!ret && size) + rte_errno = ENOMEM; return ret; } @@ -176,7 +193,6 @@ static void mlx5_free_verbs_buf(void *ptr, void *data __rte_unused) { assert(data != NULL); - DEBUG("Extern free request: %p", ptr); rte_free(ptr); } @@ -191,17 +207,16 @@ mlx5_free_verbs_buf(void *ptr, void *data __rte_unused) static void mlx5_dev_close(struct rte_eth_dev *dev) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; unsigned int i; int ret; - priv_lock(priv); - DEBUG("%p: closing device \"%s\"", - (void *)dev, - ((priv->ctx != NULL) ? priv->ctx->device->name : "")); + DRV_LOG(DEBUG, "port %u closing device \"%s\"", + dev->data->port_id, + ((priv->ctx != NULL) ? priv->ctx->device->name : "")); /* In case mlx5_dev_stop() has not been called. */ - priv_dev_interrupt_handler_uninstall(priv, dev); - priv_dev_traffic_disable(priv, dev); + mlx5_dev_interrupt_handler_uninstall(dev); + mlx5_traffic_disable(dev); /* Prevent crashes when queues are still in use. */ dev->rx_pkt_burst = removed_rx_burst; dev->tx_pkt_burst = removed_tx_burst; @@ -209,7 +224,7 @@ mlx5_dev_close(struct rte_eth_dev *dev) /* XXX race condition if mlx5_rx_burst() is still running. */ usleep(1000); for (i = 0; (i != priv->rxqs_n); ++i) - mlx5_priv_rxq_release(priv, i); + mlx5_rxq_release(dev, i); priv->rxqs_n = 0; priv->rxqs = NULL; } @@ -217,7 +232,7 @@ mlx5_dev_close(struct rte_eth_dev *dev) /* XXX race condition if mlx5_tx_burst() is still running. */ usleep(1000); for (i = 0; (i != priv->txqs_n); ++i) - mlx5_priv_txq_release(priv, i); + mlx5_txq_release(dev, i); priv->txqs_n = 0; priv->txqs = NULL; } @@ -231,32 +246,40 @@ mlx5_dev_close(struct rte_eth_dev *dev) rte_free(priv->rss_conf.rss_key); if (priv->reta_idx != NULL) rte_free(priv->reta_idx); - priv_socket_uninit(priv); - ret = mlx5_priv_hrxq_ibv_verify(priv); + if (priv->primary_socket) + mlx5_socket_uninit(dev); + ret = mlx5_hrxq_ibv_verify(dev); if (ret) - WARN("%p: some Hash Rx queue still remain", (void *)priv); - ret = mlx5_priv_ind_table_ibv_verify(priv); + DRV_LOG(WARNING, "port %u some hash Rx queue still remain", + dev->data->port_id); + ret = mlx5_ind_table_ibv_verify(dev); if (ret) - WARN("%p: some Indirection table still remain", (void *)priv); - ret = mlx5_priv_rxq_ibv_verify(priv); + DRV_LOG(WARNING, "port %u some indirection table still remain", + dev->data->port_id); + ret = mlx5_rxq_ibv_verify(dev); if (ret) - WARN("%p: some Verbs Rx queue still remain", (void *)priv); - ret = mlx5_priv_rxq_verify(priv); + DRV_LOG(WARNING, "port %u some Verbs Rx queue still remain", + dev->data->port_id); + ret = mlx5_rxq_verify(dev); if (ret) - WARN("%p: some Rx Queues still remain", (void *)priv); - ret = mlx5_priv_txq_ibv_verify(priv); + DRV_LOG(WARNING, "port %u some Rx queues still remain", + dev->data->port_id); + ret = mlx5_txq_ibv_verify(dev); if (ret) - WARN("%p: some Verbs Tx queue still remain", (void *)priv); - ret = mlx5_priv_txq_verify(priv); + DRV_LOG(WARNING, "port %u some Verbs Tx queue still remain", + dev->data->port_id); + ret = mlx5_txq_verify(dev); if (ret) - WARN("%p: some Tx Queues still remain", (void *)priv); - ret = priv_flow_verify(priv); + DRV_LOG(WARNING, "port %u some Tx queues still remain", + dev->data->port_id); + ret = mlx5_flow_verify(dev); if (ret) - WARN("%p: some flows still remain", (void *)priv); - ret = priv_mr_verify(priv); + DRV_LOG(WARNING, "port %u some flows still remain", + dev->data->port_id); + ret = mlx5_mr_verify(dev); if (ret) - WARN("%p: some Memory Region still remain", (void *)priv); - priv_unlock(priv); + DRV_LOG(WARNING, "port %u some memory region still remain", + dev->data->port_id); memset(priv, 0, sizeof(*priv)); } @@ -394,7 +417,7 @@ mlx5_dev_idx(struct rte_pci_addr *pci_addr) * User data. * * @return - * 0 on success, negative errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int mlx5_args_check(const char *key, const char *val, void *opaque) @@ -405,8 +428,9 @@ mlx5_args_check(const char *key, const char *val, void *opaque) errno = 0; tmp = strtoul(val, NULL, 0); if (errno) { - WARN("%s: \"%s\" is not a valid integer", key, val); - return errno; + rte_errno = errno; + DRV_LOG(WARNING, "%s: \"%s\" is not a valid integer", key, val); + return -rte_errno; } if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { args->cqe_comp = !!tmp; @@ -427,8 +451,9 @@ mlx5_args_check(const char *key, const char *val, void *opaque) } else if (strcmp(MLX5_RX_VEC_EN, key) == 0) { args->rx_vec_en = !!tmp; } else { - WARN("%s: unknown parameter", key); - return -EINVAL; + DRV_LOG(WARNING, "%s: unknown parameter", key); + rte_errno = EINVAL; + return -rte_errno; } return 0; } @@ -442,7 +467,7 @@ mlx5_args_check(const char *key, const char *val, void *opaque) * Device arguments structure. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int mlx5_args(struct mlx5_args *args, struct rte_devargs *devargs) @@ -474,9 +499,10 @@ mlx5_args(struct mlx5_args *args, struct rte_devargs *devargs) if (rte_kvargs_count(kvlist, params[i])) { ret = rte_kvargs_process(kvlist, params[i], mlx5_args_check, args); - if (ret != 0) { + if (ret) { + rte_errno = EINVAL; rte_kvargs_free(kvlist); - return ret; + return -rte_errno; } } } @@ -486,6 +512,112 @@ mlx5_args(struct mlx5_args *args, struct rte_devargs *devargs) static struct rte_pci_driver mlx5_driver; +/* + * Reserved UAR address space for TXQ UAR(hw doorbell) mapping, process + * local resource used by both primary and secondary to avoid duplicate + * reservation. + * The space has to be available on both primary and secondary process, + * TXQ UAR maps to this area using fixed mmap w/o double check. + */ +static void *uar_base; + +/** + * Reserve UAR address space for primary process. + * + * @param[in] dev + * Pointer to Ethernet device. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_uar_init_primary(struct rte_eth_dev *dev) +{ + struct priv *priv = dev->data->dev_private; + void *addr = (void *)0; + int i; + const struct rte_mem_config *mcfg; + + if (uar_base) { /* UAR address space mapped. */ + priv->uar_base = uar_base; + return 0; + } + /* find out lower bound of hugepage segments */ + mcfg = rte_eal_get_configuration()->mem_config; + for (i = 0; i < RTE_MAX_MEMSEG && mcfg->memseg[i].addr; i++) { + if (addr) + addr = RTE_MIN(addr, mcfg->memseg[i].addr); + else + addr = mcfg->memseg[i].addr; + } + /* keep distance to hugepages to minimize potential conflicts. */ + addr = RTE_PTR_SUB(addr, MLX5_UAR_OFFSET + MLX5_UAR_SIZE); + /* anonymous mmap, no real memory consumption. */ + addr = mmap(addr, MLX5_UAR_SIZE, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + DRV_LOG(ERR, + "port %u failed to reserve UAR address space, please" + " adjust MLX5_UAR_SIZE or try --base-virtaddr", + dev->data->port_id); + rte_errno = ENOMEM; + return -rte_errno; + } + /* Accept either same addr or a new addr returned from mmap if target + * range occupied. + */ + DRV_LOG(INFO, "port %u reserved UAR address space: %p", + dev->data->port_id, addr); + priv->uar_base = addr; /* for primary and secondary UAR re-mmap. */ + uar_base = addr; /* process local, don't reserve again. */ + return 0; +} + +/** + * Reserve UAR address space for secondary process, align with + * primary process. + * + * @param[in] dev + * Pointer to Ethernet device. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +mlx5_uar_init_secondary(struct rte_eth_dev *dev) +{ + struct priv *priv = dev->data->dev_private; + void *addr; + + assert(priv->uar_base); + if (uar_base) { /* already reserved. */ + assert(uar_base == priv->uar_base); + return 0; + } + /* anonymous mmap, no real memory consumption. */ + addr = mmap(priv->uar_base, MLX5_UAR_SIZE, + PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) { + DRV_LOG(ERR, "port %u UAR mmap failed: %p size: %llu", + dev->data->port_id, priv->uar_base, MLX5_UAR_SIZE); + rte_errno = ENXIO; + return -rte_errno; + } + if (priv->uar_base != addr) { + DRV_LOG(ERR, + "port %u UAR address %p size %llu occupied, please" + " adjust MLX5_UAR_OFFSET or try EAL parameter" + " --base-virtaddr", + dev->data->port_id, priv->uar_base, MLX5_UAR_SIZE); + rte_errno = ENXIO; + return -rte_errno; + } + uar_base = addr; /* process local, don't reserve again */ + DRV_LOG(INFO, "port %u reserved UAR address space: %p", + dev->data->port_id, addr); + return 0; +} + /** * Assign parameters from args into priv, only non default * values are considered. @@ -530,17 +662,17 @@ mlx5_args_assign(struct priv *priv, struct mlx5_args *args) * PCI device information. * * @return - * 0 on success, negative errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) +mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + struct rte_pci_device *pci_dev) { - struct ibv_device **list; + struct ibv_device **list = NULL; struct ibv_device *ibv_dev; int err = 0; struct ibv_context *attr_ctx = NULL; struct ibv_device_attr_ex device_attr; - unsigned int sriov; unsigned int mps; unsigned int cqe_comp; unsigned int tunnel_en = 0; @@ -551,24 +683,25 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) struct ibv_counter_set_description cs_desc; #endif - (void)pci_drv; assert(pci_drv == &mlx5_driver); /* Get mlx5_dev[] index. */ idx = mlx5_dev_idx(&pci_dev->addr); if (idx == -1) { - ERROR("this driver cannot support any more adapters"); - return -ENOMEM; + DRV_LOG(ERR, "this driver cannot support any more adapters"); + err = ENOMEM; + goto error; } - DEBUG("using driver device index %d", idx); - + DRV_LOG(DEBUG, "using driver device index %d", idx); /* Save PCI address. */ mlx5_dev[idx].pci_addr = pci_dev->addr; list = ibv_get_device_list(&i); if (list == NULL) { assert(errno); + err = errno; if (errno == ENOSYS) - ERROR("cannot list devices, is ib_uverbs loaded?"); - return -errno; + DRV_LOG(ERR, + "cannot list devices, is ib_uverbs loaded?"); + goto error; } assert(i >= 0); /* @@ -579,7 +712,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) struct rte_pci_addr pci_addr; --i; - DEBUG("checking device \"%s\"", list[i]->name); + DRV_LOG(DEBUG, "checking device \"%s\"", list[i]->name); if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr)) continue; if ((pci_dev->addr.domain != pci_addr.domain) || @@ -587,14 +720,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) (pci_dev->addr.devid != pci_addr.devid) || (pci_dev->addr.function != pci_addr.function)) continue; - sriov = ((pci_dev->id.device_id == - PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) || - (pci_dev->id.device_id == - PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) || - (pci_dev->id.device_id == - PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) || - (pci_dev->id.device_id == - PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF)); switch (pci_dev->id.device_id) { case PCI_DEVICE_ID_MELLANOX_CONNECTX4: tunnel_en = 1; @@ -609,30 +734,29 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) default: break; } - INFO("PCI information matches, using device \"%s\"" - " (SR-IOV: %s)", - list[i]->name, - sriov ? "true" : "false"); + DRV_LOG(INFO, "PCI information matches, using device \"%s\"", + list[i]->name); attr_ctx = ibv_open_device(list[i]); - err = errno; + rte_errno = errno; + err = rte_errno; break; } if (attr_ctx == NULL) { - ibv_free_device_list(list); switch (err) { case 0: - ERROR("cannot access device, is mlx5_ib loaded?"); - return -ENODEV; + DRV_LOG(ERR, + "cannot access device, is mlx5_ib loaded?"); + err = ENODEV; + break; case EINVAL: - ERROR("cannot use device, are drivers up to date?"); - return -EINVAL; + DRV_LOG(ERR, + "cannot use device, are drivers up to date?"); + break; } - assert(err > 0); - return -err; + goto error; } ibv_dev = list[i]; - - DEBUG("device opened"); + DRV_LOG(DEBUG, "device opened"); /* * Multi-packet send is supported by ConnectX-4 Lx PF as well * as all ConnectX-5 devices. @@ -640,14 +764,14 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) mlx5dv_query_device(attr_ctx, &attrs_out); if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED) { if (attrs_out.flags & MLX5DV_CONTEXT_FLAGS_ENHANCED_MPW) { - DEBUG("Enhanced MPW is supported"); + DRV_LOG(DEBUG, "enhanced MPW is supported"); mps = MLX5_MPW_ENHANCED; } else { - DEBUG("MPW is supported"); + DRV_LOG(DEBUG, "MPW is supported"); mps = MLX5_MPW; } } else { - DEBUG("MPW isn't supported"); + DRV_LOG(DEBUG, "MPW isn't supported"); mps = MLX5_MPW_DISABLED; } if (RTE_CACHE_LINE_SIZE == 128 && @@ -655,10 +779,13 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) cqe_comp = 0; else cqe_comp = 1; - if (ibv_query_device_ex(attr_ctx, NULL, &device_attr)) + err = ibv_query_device_ex(attr_ctx, NULL, &device_attr); + if (err) { + DEBUG("ibv_query_device_ex() failed"); goto error; - INFO("%u port(s) detected", device_attr.orig_attr.phys_port_cnt); - + } + DRV_LOG(INFO, "%u port(s) detected", + device_attr.orig_attr.phys_port_cnt); for (i = 0; i < device_attr.orig_attr.phys_port_cnt; i++) { char name[RTE_ETH_NAME_MAX_LEN]; uint32_t port = i + 1; /* ports are indexed from one */ @@ -667,11 +794,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) struct ibv_port_attr port_attr; struct ibv_pd *pd = NULL; struct priv *priv = NULL; - struct rte_eth_dev *eth_dev; + struct rte_eth_dev *eth_dev = NULL; struct ibv_device_attr_ex device_attr_ex; struct ether_addr mac; - uint16_t num_vfs = 0; - struct ibv_device_attr_ex device_attr; struct mlx5_args args = { .cqe_comp = MLX5_ARG_UNSET, .txq_inline = MLX5_ARG_UNSET, @@ -687,84 +812,85 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) snprintf(name, sizeof(name), PCI_PRI_FMT, pci_dev->addr.domain, pci_dev->addr.bus, pci_dev->addr.devid, pci_dev->addr.function); - mlx5_dev[idx].ports |= test; - if (rte_eal_process_type() == RTE_PROC_SECONDARY) { eth_dev = rte_eth_dev_attach_secondary(name); if (eth_dev == NULL) { - ERROR("can not attach rte ethdev"); - err = ENOMEM; + DRV_LOG(ERR, "can not attach rte ethdev"); + rte_errno = ENOMEM; + err = rte_errno; goto error; } eth_dev->device = &pci_dev->device; eth_dev->dev_ops = &mlx5_dev_sec_ops; - priv = eth_dev->data->dev_private; + err = mlx5_uar_init_secondary(eth_dev); + if (err) { + err = rte_errno; + goto error; + } /* Receive command fd from primary process */ - err = priv_socket_connect(priv); + err = mlx5_socket_connect(eth_dev); if (err < 0) { - err = -err; + err = rte_errno; goto error; } /* Remap UAR for Tx queues. */ - err = priv_tx_uar_remap(priv, err); - if (err < 0) { - err = -err; + err = mlx5_tx_uar_remap(eth_dev, err); + if (err) { + err = rte_errno; goto error; } - priv_dev_select_rx_function(priv, eth_dev); - priv_dev_select_tx_function(priv, eth_dev); + /* + * Ethdev pointer is still required as input since + * the primary device is not accessible from the + * secondary process. + */ + eth_dev->rx_pkt_burst = + mlx5_select_rx_function(eth_dev); + eth_dev->tx_pkt_burst = + mlx5_select_tx_function(eth_dev); continue; } - - DEBUG("using port %u (%08" PRIx32 ")", port, test); - + DRV_LOG(DEBUG, "using port %u (%08" PRIx32 ")", port, test); ctx = ibv_open_device(ibv_dev); if (ctx == NULL) { err = ENODEV; goto port_error; } - - ibv_query_device_ex(ctx, NULL, &device_attr); /* Check port status. */ err = ibv_query_port(ctx, port, &port_attr); if (err) { - ERROR("port query failed: %s", strerror(err)); + DRV_LOG(ERR, "port query failed: %s", strerror(err)); goto port_error; } - if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) { - ERROR("port %d is not configured in Ethernet mode", - port); + DRV_LOG(ERR, + "port %d is not configured in Ethernet mode", + port); err = EINVAL; goto port_error; } - if (port_attr.state != IBV_PORT_ACTIVE) - DEBUG("port %d is not active: \"%s\" (%d)", - port, ibv_port_state_str(port_attr.state), - port_attr.state); - + DRV_LOG(DEBUG, "port %d is not active: \"%s\" (%d)", + port, ibv_port_state_str(port_attr.state), + port_attr.state); /* Allocate protection domain. */ pd = ibv_alloc_pd(ctx); if (pd == NULL) { - ERROR("PD allocation failure"); + DRV_LOG(ERR, "PD allocation failure"); err = ENOMEM; goto port_error; } - mlx5_dev[idx].ports |= test; - /* from rte_ethdev.c */ priv = rte_zmalloc("ethdev private structure", sizeof(*priv), RTE_CACHE_LINE_SIZE); if (priv == NULL) { - ERROR("priv allocation failure"); + DRV_LOG(ERR, "priv allocation failure"); err = ENOMEM; goto port_error; } - priv->ctx = ctx; strncpy(priv->ibdev_path, priv->ctx->device->ibdev_path, sizeof(priv->ibdev_path)); @@ -780,35 +906,37 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) priv->rx_vec_en = 1; err = mlx5_args(&args, pci_dev->device.devargs); if (err) { - ERROR("failed to process device arguments: %s", - strerror(err)); + DRV_LOG(ERR, "failed to process device arguments: %s", + strerror(err)); + err = rte_errno; goto port_error; } mlx5_args_assign(priv, &args); - if (ibv_query_device_ex(ctx, NULL, &device_attr_ex)) { - ERROR("ibv_query_device_ex() failed"); + err = ibv_query_device_ex(ctx, NULL, &device_attr_ex); + if (err) { + DRV_LOG(ERR, "ibv_query_device_ex() failed"); goto port_error; } - priv->hw_csum = !!(device_attr_ex.device_cap_flags_ex & IBV_DEVICE_RAW_IP_CSUM); - DEBUG("checksum offloading is %ssupported", - (priv->hw_csum ? "" : "not ")); + DRV_LOG(DEBUG, "checksum offloading is %ssupported", + (priv->hw_csum ? "" : "not ")); #ifdef HAVE_IBV_DEVICE_VXLAN_SUPPORT priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags & IBV_DEVICE_VXLAN_SUPPORT); #endif - DEBUG("Rx L2 tunnel checksum offloads are %ssupported", - (priv->hw_csum_l2tun ? "" : "not ")); + DRV_LOG(DEBUG, "Rx L2 tunnel checksum offloads are %ssupported", + (priv->hw_csum_l2tun ? "" : "not ")); #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT priv->counter_set_supported = !!(device_attr.max_counter_sets); ibv_describe_counter_set(ctx, 0, &cs_desc); - DEBUG("counter type = %d, num of cs = %ld, attributes = %d", - cs_desc.counter_type, cs_desc.num_of_cs, - cs_desc.attributes); + DRV_LOG(DEBUG, + "counter type = %d, num of cs = %ld, attributes = %d", + cs_desc.counter_type, cs_desc.num_of_cs, + cs_desc.attributes); #endif priv->ind_table_max_size = device_attr_ex.rss_caps.max_rwq_indirection_table_size; @@ -817,27 +945,24 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) if (priv->ind_table_max_size > (unsigned int)ETH_RSS_RETA_SIZE_512) priv->ind_table_max_size = ETH_RSS_RETA_SIZE_512; - DEBUG("maximum RX indirection table size is %u", - priv->ind_table_max_size); + DRV_LOG(DEBUG, "maximum Rx indirection table size is %u", + priv->ind_table_max_size); priv->hw_vlan_strip = !!(device_attr_ex.raw_packet_caps & IBV_RAW_PACKET_CAP_CVLAN_STRIPPING); - DEBUG("VLAN stripping is %ssupported", - (priv->hw_vlan_strip ? "" : "not ")); + DRV_LOG(DEBUG, "VLAN stripping is %ssupported", + (priv->hw_vlan_strip ? "" : "not ")); - priv->hw_fcs_strip = - !!(device_attr_ex.orig_attr.device_cap_flags & - IBV_WQ_FLAGS_SCATTER_FCS); - DEBUG("FCS stripping configuration is %ssupported", - (priv->hw_fcs_strip ? "" : "not ")); + priv->hw_fcs_strip = !!(device_attr_ex.raw_packet_caps & + IBV_RAW_PACKET_CAP_SCATTER_FCS); + DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported", + (priv->hw_fcs_strip ? "" : "not ")); #ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING priv->hw_padding = !!device_attr_ex.rx_pad_end_addr_align; #endif - DEBUG("hardware RX end alignment padding is %ssupported", - (priv->hw_padding ? "" : "not ")); - - priv_get_num_vfs(priv, &num_vfs); - priv->sriov = (num_vfs || sriov); + DRV_LOG(DEBUG, + "hardware Rx end alignment padding is %ssupported", + (priv->hw_padding ? "" : "not ")); priv->tso = ((priv->tso) && (device_attr_ex.tso_caps.max_tso > 0) && (device_attr_ex.tso_caps.supported_qpts & @@ -846,18 +971,21 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) priv->max_tso_payload_sz = device_attr_ex.tso_caps.max_tso; if (priv->mps && !mps) { - ERROR("multi-packet send not supported on this device" - " (" MLX5_TXQ_MPW_EN ")"); + DRV_LOG(ERR, + "multi-packet send not supported on this device" + " (" MLX5_TXQ_MPW_EN ")"); err = ENOTSUP; goto port_error; } else if (priv->mps && priv->tso) { - WARN("multi-packet send not supported in conjunction " - "with TSO. MPS disabled"); + DRV_LOG(WARNING, + "multi-packet send not supported in conjunction" + " with TSO. MPS disabled"); priv->mps = 0; } - INFO("%sMPS is %s", - priv->mps == MLX5_MPW_ENHANCED ? "Enhanced " : "", - priv->mps != MLX5_MPW_DISABLED ? "enabled" : "disabled"); + DRV_LOG(INFO, "%s MPS is %s", + priv->mps == MLX5_MPW_ENHANCED ? "enhanced " : "", + priv->mps != MLX5_MPW_DISABLED ? "enabled" : + "disabled"); /* Set default values for Enhanced MPW, a.k.a MPWv2. */ if (priv->mps == MLX5_MPW_ENHANCED) { if (args.txqs_inline == MLX5_ARG_UNSET) @@ -870,59 +998,71 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) MLX5_WQE_SIZE; } if (priv->cqe_comp && !cqe_comp) { - WARN("Rx CQE compression isn't supported"); + DRV_LOG(WARNING, "Rx CQE compression isn't supported"); priv->cqe_comp = 0; } + eth_dev = rte_eth_dev_allocate(name); + if (eth_dev == NULL) { + DRV_LOG(ERR, "can not allocate rte ethdev"); + err = ENOMEM; + goto port_error; + } + eth_dev->data->dev_private = priv; + priv->dev_data = eth_dev->data; + eth_dev->data->mac_addrs = priv->mac; + eth_dev->device = &pci_dev->device; + rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->device->driver = &mlx5_driver.driver; + err = mlx5_uar_init_primary(eth_dev); + if (err) { + err = rte_errno; + goto port_error; + } /* Configure the first MAC address by default. */ - if (priv_get_mac(priv, &mac.addr_bytes)) { - ERROR("cannot get MAC address, is mlx5_en loaded?" - " (errno: %s)", strerror(errno)); + if (mlx5_get_mac(eth_dev, &mac.addr_bytes)) { + DRV_LOG(ERR, + "port %u cannot get MAC address, is mlx5_en" + " loaded? (errno: %s)", + eth_dev->data->port_id, strerror(errno)); err = ENODEV; goto port_error; } - INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", - priv->port, - mac.addr_bytes[0], mac.addr_bytes[1], - mac.addr_bytes[2], mac.addr_bytes[3], - mac.addr_bytes[4], mac.addr_bytes[5]); + DRV_LOG(INFO, + "port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", + eth_dev->data->port_id, + mac.addr_bytes[0], mac.addr_bytes[1], + mac.addr_bytes[2], mac.addr_bytes[3], + mac.addr_bytes[4], mac.addr_bytes[5]); #ifndef NDEBUG { char ifname[IF_NAMESIZE]; - if (priv_get_ifname(priv, &ifname) == 0) - DEBUG("port %u ifname is \"%s\"", - priv->port, ifname); + if (mlx5_get_ifname(eth_dev, &ifname) == 0) + DRV_LOG(DEBUG, "port %u ifname is \"%s\"", + eth_dev->data->port_id, ifname); else - DEBUG("port %u ifname is unknown", priv->port); + DRV_LOG(DEBUG, "port %u ifname is unknown", + eth_dev->data->port_id); } #endif /* Get actual MTU if possible. */ - priv_get_mtu(priv, &priv->mtu); - DEBUG("port %u MTU is %u", priv->port, priv->mtu); - - eth_dev = rte_eth_dev_allocate(name); - if (eth_dev == NULL) { - ERROR("can not allocate rte ethdev"); - err = ENOMEM; + err = mlx5_get_mtu(eth_dev, &priv->mtu); + if (err) { + err = rte_errno; goto port_error; } - eth_dev->data->dev_private = priv; - eth_dev->data->mac_addrs = priv->mac; - eth_dev->device = &pci_dev->device; - rte_eth_copy_pci_info(eth_dev, pci_dev); - eth_dev->device->driver = &mlx5_driver.driver; + DRV_LOG(DEBUG, "port %u MTU is %u", eth_dev->data->port_id, + priv->mtu); /* * Initialize burst functions to prevent crashes before link-up. */ eth_dev->rx_pkt_burst = removed_rx_burst; eth_dev->tx_pkt_burst = removed_tx_burst; - priv->dev = eth_dev; eth_dev->dev_ops = &mlx5_dev_ops; /* Register MAC address. */ claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0)); TAILQ_INIT(&priv->flows); TAILQ_INIT(&priv->ctrl_flows); - /* Hint libmlx5 to use PMD allocator for data plane resources */ struct mlx5dv_ctx_allocators alctr = { .alloc = &mlx5_alloc_verbs_buf, @@ -931,12 +1071,17 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) }; mlx5dv_set_context_attr(ctx, MLX5DV_CTX_ATTR_BUF_ALLOCATORS, (void *)((uintptr_t)&alctr)); - /* Bring Ethernet device up. */ - DEBUG("forcing Ethernet interface up"); - priv_set_flags(priv, ~IFF_UP, IFF_UP); + DRV_LOG(DEBUG, "port %u forcing Ethernet interface up", + eth_dev->data->port_id); + mlx5_set_link_up(eth_dev); + /* + * Even though the interrupt handler is not installed yet, + * interrupts will still trigger on the asyn_fd from + * Verbs context returned by ibv_open_device(). + */ + mlx5_link_update(eth_dev, 0); continue; - port_error: if (priv) rte_free(priv); @@ -944,29 +1089,31 @@ port_error: claim_zero(ibv_dealloc_pd(pd)); if (ctx) claim_zero(ibv_close_device(ctx)); + if (eth_dev && rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_eth_dev_release_port(eth_dev); break; } - /* * XXX if something went wrong in the loop above, there is a resource * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as * long as the dpdk does not provide a way to deallocate a ethdev and a * way to enumerate the registered ethdevs to free the previous ones. */ - /* no port found, complain */ if (!mlx5_dev[idx].ports) { - err = ENODEV; - goto error; + rte_errno = ENODEV; + err = rte_errno; } - error: if (attr_ctx) claim_zero(ibv_close_device(attr_ctx)); if (list) ibv_free_device_list(list); - assert(err >= 0); - return -err; + if (err) { + rte_errno = err; + return -rte_errno; + } + return 0; } static const struct rte_pci_id mlx5_pci_id_map[] = { @@ -1042,3 +1189,11 @@ rte_mlx5_pmd_init(void) RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__); RTE_PMD_REGISTER_PCI_TABLE(net_mlx5, mlx5_pci_id_map); RTE_PMD_REGISTER_KMOD_DEP(net_mlx5, "* ib_uverbs & mlx5_core & mlx5_ib"); + +/** Initialize driver log type. */ +RTE_INIT(vdev_netvsc_init_log) +{ + mlx5_logtype = rte_log_register("pmd.net.mlx5"); + if (mlx5_logtype >= 0) + rte_log_set_level(mlx5_logtype, RTE_LOG_NOTICE); +} diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h index d49595bc..5e6027b8 100644 --- a/drivers/net/mlx5/mlx5.h +++ b/drivers/net/mlx5/mlx5.h @@ -90,8 +90,26 @@ struct mlx5_xstats_ctrl { /* Flow list . */ TAILQ_HEAD(mlx5_flows, rte_flow); +/** + * Type of objet being allocated. + */ +enum mlx5_verbs_alloc_type { + MLX5_VERBS_ALLOC_TYPE_NONE, + MLX5_VERBS_ALLOC_TYPE_TX_QUEUE, + MLX5_VERBS_ALLOC_TYPE_RX_QUEUE, +}; + +/** + * Verbs allocator needs a context to know in the callback which kind of + * resources it is allocating. + */ +struct mlx5_verbs_alloc_ctx { + enum mlx5_verbs_alloc_type type; /* Kind of object being allocated. */ + const void *obj; /* Pointer to the DPDK object. */ +}; + struct priv { - struct rte_eth_dev *dev; /* Ethernet device of master process. */ + struct rte_eth_dev_data *dev_data; /* Pointer to device data. */ struct ibv_context *ctx; /* Verbs context. */ struct ibv_device_attr_ex device_attr; /* Device properties. */ struct ibv_pd *pd; /* Protection Domain. */ @@ -107,11 +125,9 @@ struct priv { unsigned int hw_vlan_strip:1; /* VLAN stripping is supported. */ unsigned int hw_fcs_strip:1; /* FCS stripping is supported. */ unsigned int hw_padding:1; /* End alignment padding is supported. */ - unsigned int sriov:1; /* This is a VF or PF with VF devices. */ unsigned int mps:2; /* Multi-packet send mode (0: disabled). */ unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */ unsigned int cqe_comp:1; /* Whether CQE compression is enabled. */ - unsigned int pending_alarm:1; /* An alarm is pending. */ unsigned int tso:1; /* Whether TSO is supported. */ unsigned int tunnel_en:1; unsigned int isolated:1; /* Whether isolated mode is enabled. */ @@ -146,51 +162,16 @@ struct priv { LIST_HEAD(ind_tables, mlx5_ind_table_ibv) ind_tbls; uint32_t link_speed_capa; /* Link speed capabilities. */ struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */ - rte_spinlock_t lock; /* Lock for control functions. */ + rte_spinlock_t mr_lock; /* MR Lock. */ int primary_socket; /* Unix socket for primary process. */ + void *uar_base; /* Reserved address space for UAR mapping */ struct rte_intr_handle intr_handle_socket; /* Interrupt handler. */ + struct mlx5_verbs_alloc_ctx verbs_alloc_ctx; + /* Context for Verbs allocator. */ }; -/** - * Lock private structure to protect it from concurrent access in the - * control path. - * - * @param priv - * Pointer to private structure. - */ -static inline void -priv_lock(struct priv *priv) -{ - rte_spinlock_lock(&priv->lock); -} - -/** - * Try to lock private structure to protect it from concurrent access in the - * control path. - * - * @param priv - * Pointer to private structure. - * - * @return - * 1 if the lock is successfully taken; 0 otherwise. - */ -static inline int -priv_trylock(struct priv *priv) -{ - return rte_spinlock_trylock(&priv->lock); -} - -/** - * Unlock private structure. - * - * @param priv - * Pointer to private structure. - */ -static inline void -priv_unlock(struct priv *priv) -{ - rte_spinlock_unlock(&priv->lock); -} +#define PORT_ID(priv) ((priv)->dev_data->port_id) +#define ETH_DEV(priv) (&rte_eth_devices[PORT_ID(priv)]) /* mlx5.c */ @@ -198,130 +179,138 @@ int mlx5_getenv_int(const char *); /* mlx5_ethdev.c */ -struct priv *mlx5_get_priv(struct rte_eth_dev *dev); -int mlx5_is_secondary(void); -int priv_get_ifname(const struct priv *, char (*)[IF_NAMESIZE]); -int priv_ifreq(const struct priv *, int req, struct ifreq *); -int priv_is_ib_cntr(const char *); -int priv_get_cntr_sysfs(struct priv *, const char *, uint64_t *); -int priv_get_num_vfs(struct priv *, uint16_t *); -int priv_get_mtu(struct priv *, uint16_t *); -int priv_set_flags(struct priv *, unsigned int, unsigned int); -int mlx5_dev_configure(struct rte_eth_dev *); -void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *); +int mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]); +int mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr); +int mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu); +int mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, + unsigned int flags); +int mlx5_dev_configure(struct rte_eth_dev *dev); +void mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info); const uint32_t *mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev); -int priv_link_update(struct priv *, int); -int priv_force_link_status_change(struct priv *, int); -int mlx5_link_update(struct rte_eth_dev *, int); -int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t); -int mlx5_dev_get_flow_ctrl(struct rte_eth_dev *, struct rte_eth_fc_conf *); -int mlx5_dev_set_flow_ctrl(struct rte_eth_dev *, struct rte_eth_fc_conf *); -int mlx5_ibv_device_to_pci_addr(const struct ibv_device *, - struct rte_pci_addr *); -void mlx5_dev_link_status_handler(void *); -void mlx5_dev_interrupt_handler(void *); -void priv_dev_interrupt_handler_uninstall(struct priv *, struct rte_eth_dev *); -void priv_dev_interrupt_handler_install(struct priv *, struct rte_eth_dev *); +int mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete); +int mlx5_force_link_status_change(struct rte_eth_dev *dev, int status); +int mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu); +int mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf); +int mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf); +int mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, + struct rte_pci_addr *pci_addr); +void mlx5_dev_link_status_handler(void *arg); +void mlx5_dev_interrupt_handler(void *arg); +void mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev); +void mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev); int mlx5_set_link_down(struct rte_eth_dev *dev); int mlx5_set_link_up(struct rte_eth_dev *dev); -void priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev); -void priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev); +eth_tx_burst_t mlx5_select_tx_function(struct rte_eth_dev *dev); +eth_rx_burst_t mlx5_select_rx_function(struct rte_eth_dev *dev); /* mlx5_mac.c */ -int priv_get_mac(struct priv *, uint8_t (*)[ETHER_ADDR_LEN]); -void mlx5_mac_addr_remove(struct rte_eth_dev *, uint32_t); -int mlx5_mac_addr_add(struct rte_eth_dev *, struct ether_addr *, uint32_t, - uint32_t); -void mlx5_mac_addr_set(struct rte_eth_dev *, struct ether_addr *); +int mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[ETHER_ADDR_LEN]); +void mlx5_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index); +int mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac, + uint32_t index, uint32_t vmdq); +void mlx5_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr); /* mlx5_rss.c */ -int mlx5_rss_hash_update(struct rte_eth_dev *, struct rte_eth_rss_conf *); -int mlx5_rss_hash_conf_get(struct rte_eth_dev *, struct rte_eth_rss_conf *); -int priv_rss_reta_index_resize(struct priv *, unsigned int); -int mlx5_dev_rss_reta_query(struct rte_eth_dev *, - struct rte_eth_rss_reta_entry64 *, uint16_t); -int mlx5_dev_rss_reta_update(struct rte_eth_dev *, - struct rte_eth_rss_reta_entry64 *, uint16_t); +int mlx5_rss_hash_update(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf); +int mlx5_rss_hash_conf_get(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf); +int mlx5_rss_reta_index_resize(struct rte_eth_dev *dev, unsigned int reta_size); +int mlx5_dev_rss_reta_query(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); +int mlx5_dev_rss_reta_update(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); /* mlx5_rxmode.c */ -void mlx5_promiscuous_enable(struct rte_eth_dev *); -void mlx5_promiscuous_disable(struct rte_eth_dev *); -void mlx5_allmulticast_enable(struct rte_eth_dev *); -void mlx5_allmulticast_disable(struct rte_eth_dev *); +void mlx5_promiscuous_enable(struct rte_eth_dev *dev); +void mlx5_promiscuous_disable(struct rte_eth_dev *dev); +void mlx5_allmulticast_enable(struct rte_eth_dev *dev); +void mlx5_allmulticast_disable(struct rte_eth_dev *dev); /* mlx5_stats.c */ -void priv_xstats_init(struct priv *); -int mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *); -void mlx5_stats_reset(struct rte_eth_dev *); -int mlx5_xstats_get(struct rte_eth_dev *, - struct rte_eth_xstat *, unsigned int); -void mlx5_xstats_reset(struct rte_eth_dev *); -int mlx5_xstats_get_names(struct rte_eth_dev *, - struct rte_eth_xstat_name *, unsigned int); +void mlx5_xstats_init(struct rte_eth_dev *dev); +int mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats); +void mlx5_stats_reset(struct rte_eth_dev *dev); +int mlx5_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *stats, + unsigned int n); +void mlx5_xstats_reset(struct rte_eth_dev *dev); +int mlx5_xstats_get_names(struct rte_eth_dev *dev __rte_unused, + struct rte_eth_xstat_name *xstats_names, + unsigned int n); /* mlx5_vlan.c */ -int mlx5_vlan_filter_set(struct rte_eth_dev *, uint16_t, int); -int mlx5_vlan_offload_set(struct rte_eth_dev *, int); -void mlx5_vlan_strip_queue_set(struct rte_eth_dev *, uint16_t, int); +int mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on); +void mlx5_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on); +int mlx5_vlan_offload_set(struct rte_eth_dev *dev, int mask); /* mlx5_trigger.c */ -int mlx5_dev_start(struct rte_eth_dev *); -void mlx5_dev_stop(struct rte_eth_dev *); -int priv_dev_traffic_enable(struct priv *, struct rte_eth_dev *); -int priv_dev_traffic_disable(struct priv *, struct rte_eth_dev *); -int priv_dev_traffic_restart(struct priv *, struct rte_eth_dev *); -int mlx5_traffic_restart(struct rte_eth_dev *); +int mlx5_dev_start(struct rte_eth_dev *dev); +void mlx5_dev_stop(struct rte_eth_dev *dev); +int mlx5_traffic_enable(struct rte_eth_dev *dev); +void mlx5_traffic_disable(struct rte_eth_dev *dev); +int mlx5_traffic_restart(struct rte_eth_dev *dev); /* mlx5_flow.c */ -int mlx5_dev_filter_ctrl(struct rte_eth_dev *, enum rte_filter_type, - enum rte_filter_op, void *); -int mlx5_flow_validate(struct rte_eth_dev *, const struct rte_flow_attr *, - const struct rte_flow_item [], - const struct rte_flow_action [], - struct rte_flow_error *); -struct rte_flow *mlx5_flow_create(struct rte_eth_dev *, - const struct rte_flow_attr *, - const struct rte_flow_item [], - const struct rte_flow_action [], - struct rte_flow_error *); -int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *, - struct rte_flow_error *); -void priv_flow_flush(struct priv *, struct mlx5_flows *); -int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *); -int mlx5_flow_query(struct rte_eth_dev *, struct rte_flow *, - enum rte_flow_action_type, void *, - struct rte_flow_error *); -int mlx5_flow_isolate(struct rte_eth_dev *, int, struct rte_flow_error *); -int priv_flow_start(struct priv *, struct mlx5_flows *); -void priv_flow_stop(struct priv *, struct mlx5_flows *); -int priv_flow_verify(struct priv *); -int mlx5_ctrl_flow_vlan(struct rte_eth_dev *, struct rte_flow_item_eth *, - struct rte_flow_item_eth *, struct rte_flow_item_vlan *, - struct rte_flow_item_vlan *); -int mlx5_ctrl_flow(struct rte_eth_dev *, struct rte_flow_item_eth *, - struct rte_flow_item_eth *); -int priv_flow_create_drop_queue(struct priv *); -void priv_flow_delete_drop_queue(struct priv *); +int mlx5_flow_validate(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); +struct rte_flow *mlx5_flow_create(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); +int mlx5_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow, + struct rte_flow_error *error); +void mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list); +int mlx5_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error); +int mlx5_flow_query(struct rte_eth_dev *dev, struct rte_flow *flow, + enum rte_flow_action_type action, void *data, + struct rte_flow_error *error); +int mlx5_flow_isolate(struct rte_eth_dev *dev, int enable, + struct rte_flow_error *error); +int mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, + enum rte_filter_type filter_type, + enum rte_filter_op filter_op, + void *arg); +int mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list); +void mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list); +int mlx5_flow_verify(struct rte_eth_dev *dev); +int mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, + struct rte_flow_item_eth *eth_spec, + struct rte_flow_item_eth *eth_mask, + struct rte_flow_item_vlan *vlan_spec, + struct rte_flow_item_vlan *vlan_mask); +int mlx5_ctrl_flow(struct rte_eth_dev *dev, + struct rte_flow_item_eth *eth_spec, + struct rte_flow_item_eth *eth_mask); +int mlx5_flow_create_drop_queue(struct rte_eth_dev *dev); +void mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev); /* mlx5_socket.c */ -int priv_socket_init(struct priv *priv); -int priv_socket_uninit(struct priv *priv); -void priv_socket_handle(struct priv *priv); -int priv_socket_connect(struct priv *priv); +int mlx5_socket_init(struct rte_eth_dev *priv); +void mlx5_socket_uninit(struct rte_eth_dev *priv); +void mlx5_socket_handle(struct rte_eth_dev *priv); +int mlx5_socket_connect(struct rte_eth_dev *priv); /* mlx5_mr.c */ -struct mlx5_mr *priv_mr_new(struct priv *, struct rte_mempool *); -struct mlx5_mr *priv_mr_get(struct priv *, struct rte_mempool *); -int priv_mr_release(struct priv *, struct mlx5_mr *); -int priv_mr_verify(struct priv *); +struct mlx5_mr *mlx5_mr_new(struct rte_eth_dev *dev, struct rte_mempool *mp); +struct mlx5_mr *mlx5_mr_get(struct rte_eth_dev *dev, struct rte_mempool *mp); +int mlx5_mr_release(struct mlx5_mr *mr); +int mlx5_mr_verify(struct rte_eth_dev *dev); #endif /* RTE_PMD_MLX5_H_ */ diff --git a/drivers/net/mlx5/mlx5_defs.h b/drivers/net/mlx5/mlx5_defs.h index 24caf7e7..d7063576 100644 --- a/drivers/net/mlx5/mlx5_defs.h +++ b/drivers/net/mlx5/mlx5_defs.h @@ -110,7 +110,17 @@ /* Supported RSS */ #define MLX5_RSS_HF_MASK (~(ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP)) -/* Maximum number of attempts to query link status before giving up. */ -#define MLX5_MAX_LINK_QUERY_ATTEMPTS 5 +/* Timeout in seconds to get a valid link status. */ +#define MLX5_LINK_STATUS_TIMEOUT 10 + +/* Reserved address space for UAR mapping. */ +#define MLX5_UAR_SIZE (1ULL << 32) + +/* Offset of reserved UAR address space to hugepage memory. Offset is used here + * to minimize possibility of address next to hugepage being used by other code + * in either primary or secondary process, failing to map TX UAR would make TX + * packets invisible to HW. + */ +#define MLX5_UAR_OFFSET (1ULL << 32) #endif /* RTE_PMD_MLX5_DEFS_H_ */ diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c index ffe1cdd6..5edc7511 100644 --- a/drivers/net/mlx5/mlx5_ethdev.c +++ b/drivers/net/mlx5/mlx5_ethdev.c @@ -35,6 +35,7 @@ #include <stddef.h> #include <assert.h> +#include <inttypes.h> #include <unistd.h> #include <stdint.h> #include <stdio.h> @@ -45,14 +46,13 @@ #include <net/if.h> #include <sys/ioctl.h> #include <sys/socket.h> -#include <sys/utsname.h> #include <netinet/in.h> #include <linux/ethtool.h> #include <linux/sockios.h> -#include <linux/version.h> #include <fcntl.h> #include <stdalign.h> #include <sys/un.h> +#include <time.h> #include <rte_atomic.h> #include <rte_ethdev.h> @@ -60,7 +60,6 @@ #include <rte_mbuf.h> #include <rte_common.h> #include <rte_interrupts.h> -#include <rte_alarm.h> #include <rte_malloc.h> #include "mlx5.h" @@ -119,34 +118,20 @@ struct ethtool_link_settings { #endif /** - * Return private structure associated with an Ethernet device. - * - * @param dev - * Pointer to Ethernet device structure. - * - * @return - * Pointer to private structure. - */ -struct priv * -mlx5_get_priv(struct rte_eth_dev *dev) -{ - return dev->data->dev_private; -} - -/** * Get interface name from private structure. * - * @param[in] priv - * Pointer to private structure. + * @param[in] dev + * Pointer to Ethernet device. * @param[out] ifname * Interface name output buffer. * * @return - * 0 on success, -1 on failure and errno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE]) +mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]) { + struct priv *priv = dev->data->dev_private; DIR *dir; struct dirent *dent; unsigned int dev_type = 0; @@ -157,8 +142,10 @@ priv_get_ifname(const struct priv *priv, char (*ifname)[IF_NAMESIZE]) MKSTR(path, "%s/device/net", priv->ibdev_path); dir = opendir(path); - if (dir == NULL) - return -1; + if (dir == NULL) { + rte_errno = errno; + return -rte_errno; + } } while ((dent = readdir(dir)) != NULL) { char *name = dent->d_name; @@ -208,355 +195,131 @@ try_dev_id: snprintf(match, sizeof(match), "%s", name); } closedir(dir); - if (match[0] == '\0') - return -1; - strncpy(*ifname, match, sizeof(*ifname)); - return 0; -} - -/** - * Check if the counter is located on ib counters file. - * - * @param[in] cntr - * Counter name. - * - * @return - * 1 if counter is located on ib counters file , 0 otherwise. - */ -int -priv_is_ib_cntr(const char *cntr) -{ - if (!strcmp(cntr, "out_of_buffer")) - return 1; - return 0; -} - -/** - * Read from sysfs entry. - * - * @param[in] priv - * Pointer to private structure. - * @param[in] entry - * Entry name relative to sysfs path. - * @param[out] buf - * Data output buffer. - * @param size - * Buffer size. - * - * @return - * 0 on success, -1 on failure and errno is set. - */ -static int -priv_sysfs_read(const struct priv *priv, const char *entry, - char *buf, size_t size) -{ - char ifname[IF_NAMESIZE]; - FILE *file; - int ret; - int err; - - if (priv_get_ifname(priv, &ifname)) - return -1; - - if (priv_is_ib_cntr(entry)) { - MKSTR(path, "%s/ports/1/hw_counters/%s", - priv->ibdev_path, entry); - file = fopen(path, "rb"); - } else { - MKSTR(path, "%s/device/net/%s/%s", - priv->ibdev_path, ifname, entry); - file = fopen(path, "rb"); - } - if (file == NULL) - return -1; - ret = fread(buf, 1, size, file); - err = errno; - if (((size_t)ret < size) && (ferror(file))) - ret = -1; - else - ret = size; - fclose(file); - errno = err; - return ret; -} - -/** - * Write to sysfs entry. - * - * @param[in] priv - * Pointer to private structure. - * @param[in] entry - * Entry name relative to sysfs path. - * @param[in] buf - * Data buffer. - * @param size - * Buffer size. - * - * @return - * 0 on success, -1 on failure and errno is set. - */ -static int -priv_sysfs_write(const struct priv *priv, const char *entry, - char *buf, size_t size) -{ - char ifname[IF_NAMESIZE]; - FILE *file; - int ret; - int err; - - if (priv_get_ifname(priv, &ifname)) - return -1; - - MKSTR(path, "%s/device/net/%s/%s", priv->ibdev_path, ifname, entry); - - file = fopen(path, "wb"); - if (file == NULL) - return -1; - ret = fwrite(buf, 1, size, file); - err = errno; - if (((size_t)ret < size) || (ferror(file))) - ret = -1; - else - ret = size; - fclose(file); - errno = err; - return ret; -} - -/** - * Get unsigned long sysfs property. - * - * @param priv - * Pointer to private structure. - * @param[in] name - * Entry name relative to sysfs path. - * @param[out] value - * Value output buffer. - * - * @return - * 0 on success, -1 on failure and errno is set. - */ -static int -priv_get_sysfs_ulong(struct priv *priv, const char *name, unsigned long *value) -{ - int ret; - unsigned long value_ret; - char value_str[32]; - - ret = priv_sysfs_read(priv, name, value_str, (sizeof(value_str) - 1)); - if (ret == -1) { - DEBUG("cannot read %s value from sysfs: %s", - name, strerror(errno)); - return -1; - } - value_str[ret] = '\0'; - errno = 0; - value_ret = strtoul(value_str, NULL, 0); - if (errno) { - DEBUG("invalid %s value `%s': %s", name, value_str, - strerror(errno)); - return -1; - } - *value = value_ret; - return 0; -} - -/** - * Set unsigned long sysfs property. - * - * @param priv - * Pointer to private structure. - * @param[in] name - * Entry name relative to sysfs path. - * @param value - * Value to set. - * - * @return - * 0 on success, -1 on failure and errno is set. - */ -static int -priv_set_sysfs_ulong(struct priv *priv, const char *name, unsigned long value) -{ - int ret; - MKSTR(value_str, "%lu", value); - - ret = priv_sysfs_write(priv, name, value_str, (sizeof(value_str) - 1)); - if (ret == -1) { - DEBUG("cannot write %s `%s' (%lu) to sysfs: %s", - name, value_str, value, strerror(errno)); - return -1; + if (match[0] == '\0') { + rte_errno = ENOENT; + return -rte_errno; } + strncpy(*ifname, match, sizeof(*ifname)); return 0; } /** * Perform ifreq ioctl() on associated Ethernet device. * - * @param[in] priv - * Pointer to private structure. + * @param[in] dev + * Pointer to Ethernet device. * @param req * Request number to pass to ioctl(). * @param[out] ifr * Interface request structure output buffer. * * @return - * 0 on success, -1 on failure and errno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -priv_ifreq(const struct priv *priv, int req, struct ifreq *ifr) +mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr) { int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP); - int ret = -1; + int ret = 0; - if (sock == -1) - return ret; - if (priv_get_ifname(priv, &ifr->ifr_name) == 0) - ret = ioctl(sock, req, ifr); + if (sock == -1) { + rte_errno = errno; + return -rte_errno; + } + ret = mlx5_get_ifname(dev, &ifr->ifr_name); + if (ret) + goto error; + ret = ioctl(sock, req, ifr); + if (ret == -1) { + rte_errno = errno; + goto error; + } close(sock); - return ret; -} - -/** - * Return the number of active VFs for the current device. - * - * @param[in] priv - * Pointer to private structure. - * @param[out] num_vfs - * Number of active VFs. - * - * @return - * 0 on success, -1 on failure and errno is set. - */ -int -priv_get_num_vfs(struct priv *priv, uint16_t *num_vfs) -{ - /* The sysfs entry name depends on the operating system. */ - const char **name = (const char *[]){ - "device/sriov_numvfs", - "device/mlx5_num_vfs", - NULL, - }; - int ret; - - do { - unsigned long ulong_num_vfs; - - ret = priv_get_sysfs_ulong(priv, *name, &ulong_num_vfs); - if (!ret) - *num_vfs = ulong_num_vfs; - } while (*(++name) && ret); - return ret; + return 0; +error: + close(sock); + return -rte_errno; } /** * Get device MTU. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param[out] mtu * MTU value output buffer. * * @return - * 0 on success, -1 on failure and errno is set. - */ -int -priv_get_mtu(struct priv *priv, uint16_t *mtu) -{ - unsigned long ulong_mtu; - - if (priv_get_sysfs_ulong(priv, "mtu", &ulong_mtu) == -1) - return -1; - *mtu = ulong_mtu; - return 0; -} - -/** - * Read device counter from sysfs. - * - * @param priv - * Pointer to private structure. - * @param name - * Counter name. - * @param[out] cntr - * Counter output buffer. - * - * @return - * 0 on success, -1 on failure and errno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -priv_get_cntr_sysfs(struct priv *priv, const char *name, uint64_t *cntr) +mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu) { - unsigned long ulong_ctr; + struct ifreq request; + int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request); - if (priv_get_sysfs_ulong(priv, name, &ulong_ctr) == -1) - return -1; - *cntr = ulong_ctr; + if (ret) + return ret; + *mtu = request.ifr_mtu; return 0; } /** * Set device MTU. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param mtu * MTU value to set. * * @return - * 0 on success, -1 on failure and errno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_set_mtu(struct priv *priv, uint16_t mtu) +mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) { - uint16_t new_mtu; + struct ifreq request = { .ifr_mtu = mtu, }; - if (priv_set_sysfs_ulong(priv, "mtu", mtu) || - priv_get_mtu(priv, &new_mtu)) - return -1; - if (new_mtu == mtu) - return 0; - errno = EINVAL; - return -1; + return mlx5_ifreq(dev, SIOCSIFMTU, &request); } /** * Set device flags. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param keep * Bitmask for flags that must remain untouched. * @param flags * Bitmask for flags to modify. * * @return - * 0 on success, -1 on failure and errno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -priv_set_flags(struct priv *priv, unsigned int keep, unsigned int flags) +mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags) { - unsigned long tmp; + struct ifreq request; + int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request); - if (priv_get_sysfs_ulong(priv, "flags", &tmp) == -1) - return -1; - tmp &= keep; - tmp |= (flags & (~keep)); - return priv_set_sysfs_ulong(priv, "flags", tmp); + if (ret) + return ret; + request.ifr_flags &= keep; + request.ifr_flags |= flags & ~keep; + return mlx5_ifreq(dev, SIOCSIFFLAGS, &request); } /** - * Ethernet device configuration. - * - * Prepare the driver for a given number of TX and RX queues. + * DPDK callback for Ethernet device configuration. * * @param dev * Pointer to Ethernet device structure. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ -static int -dev_configure(struct rte_eth_dev *dev) +int +mlx5_dev_configure(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; unsigned int rxqs_n = dev->data->nb_rx_queues; @@ -566,19 +329,24 @@ dev_configure(struct rte_eth_dev *dev) unsigned int reta_idx_n; const uint8_t use_app_rss_key = !!dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key; + int ret = 0; if (use_app_rss_key && (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len != rss_hash_default_key_len)) { - /* MLX5 RSS only support 40bytes key. */ - return EINVAL; + DRV_LOG(ERR, "port %u RSS key len must be %zu Bytes long", + dev->data->port_id, rss_hash_default_key_len); + rte_errno = EINVAL; + return -rte_errno; } priv->rss_conf.rss_key = rte_realloc(priv->rss_conf.rss_key, rss_hash_default_key_len, 0); if (!priv->rss_conf.rss_key) { - ERROR("cannot allocate RSS hash key memory (%u)", rxqs_n); - return ENOMEM; + DRV_LOG(ERR, "port %u cannot allocate RSS hash key memory (%u)", + dev->data->port_id, rxqs_n); + rte_errno = ENOMEM; + return -rte_errno; } memcpy(priv->rss_conf.rss_key, use_app_rss_key ? @@ -590,18 +358,20 @@ dev_configure(struct rte_eth_dev *dev) priv->rxqs = (void *)dev->data->rx_queues; priv->txqs = (void *)dev->data->tx_queues; if (txqs_n != priv->txqs_n) { - INFO("%p: TX queues number update: %u -> %u", - (void *)dev, priv->txqs_n, txqs_n); + DRV_LOG(INFO, "port %u Tx queues number update: %u -> %u", + dev->data->port_id, priv->txqs_n, txqs_n); priv->txqs_n = txqs_n; } if (rxqs_n > priv->ind_table_max_size) { - ERROR("cannot handle this many RX queues (%u)", rxqs_n); - return EINVAL; + DRV_LOG(ERR, "port %u cannot handle this many Rx queues (%u)", + dev->data->port_id, rxqs_n); + rte_errno = EINVAL; + return -rte_errno; } if (rxqs_n == priv->rxqs_n) return 0; - INFO("%p: RX queues number update: %u -> %u", - (void *)dev, priv->rxqs_n, rxqs_n); + DRV_LOG(INFO, "port %u Rx queues number update: %u -> %u", + dev->data->port_id, priv->rxqs_n, rxqs_n); priv->rxqs_n = rxqs_n; /* If the requested number of RX queues is not a power of two, use the * maximum indirection table size for better balancing. @@ -609,8 +379,9 @@ dev_configure(struct rte_eth_dev *dev) reta_idx_n = (1 << log2above((rxqs_n & (rxqs_n - 1)) ? priv->ind_table_max_size : rxqs_n)); - if (priv_rss_reta_index_resize(priv, reta_idx_n)) - return ENOMEM; + ret = mlx5_rss_reta_index_resize(dev, reta_idx_n); + if (ret) + return ret; /* When the number of RX queues is not a power of two, the remaining * table entries are padded with reused WQs and hashes are not spread * uniformly. */ @@ -623,28 +394,6 @@ dev_configure(struct rte_eth_dev *dev) } /** - * DPDK callback for Ethernet device configuration. - * - * @param dev - * Pointer to Ethernet device structure. - * - * @return - * 0 on success, negative errno value on failure. - */ -int -mlx5_dev_configure(struct rte_eth_dev *dev) -{ - struct priv *priv = dev->data->dev_private; - int ret; - - priv_lock(priv); - ret = dev_configure(dev); - assert(ret >= 0); - priv_unlock(priv); - return -ret; -} - -/** * DPDK callback to get information about the device. * * @param dev @@ -655,13 +404,11 @@ mlx5_dev_configure(struct rte_eth_dev *dev) void mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; unsigned int max; char ifname[IF_NAMESIZE]; info->pci_dev = RTE_ETH_DEV_TO_PCI(dev); - - priv_lock(priv); /* FIXME: we should ask the device for these values. */ info->min_rx_bufsize = 32; info->max_rx_pktlen = 65536; @@ -699,16 +446,24 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) info->tx_offload_capa |= (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | DEV_TX_OFFLOAD_VXLAN_TNL_TSO | DEV_TX_OFFLOAD_GRE_TNL_TSO); - if (priv_get_ifname(priv, &ifname) == 0) + if (mlx5_get_ifname(dev, &ifname) == 0) info->if_index = if_nametoindex(ifname); info->reta_size = priv->reta_idx_n ? priv->reta_idx_n : priv->ind_table_max_size; - info->hash_key_size = priv->rss_conf.rss_key_len; + info->hash_key_size = rss_hash_default_key_len; info->speed_capa = priv->link_speed_capa; info->flow_type_rss_offloads = ~MLX5_RSS_HF_MASK; - priv_unlock(priv); } +/** + * Get supported packet types. + * + * @param dev + * Pointer to Ethernet device structure. + * + * @return + * A pointer to the supported Packet types array. + */ const uint32_t * mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) { @@ -741,35 +496,41 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) * * @param dev * Pointer to Ethernet device structure. - * @param wait_to_complete - * Wait for request completion (ignored). + * @param[out] link + * Storage for current link status. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) +mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, + struct rte_eth_link *link) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; struct ethtool_cmd edata = { .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ }; struct ifreq ifr; struct rte_eth_link dev_link; int link_speed = 0; + int ret; - /* priv_lock() is not taken to allow concurrent calls. */ - - (void)wait_to_complete; - if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { - WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); - return -1; + ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); + if (ret) { + DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", + dev->data->port_id, strerror(rte_errno)); + return ret; } memset(&dev_link, 0, sizeof(dev_link)); dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && (ifr.ifr_flags & IFF_RUNNING)); ifr.ifr_data = (void *)&edata; - if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { - WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", - strerror(errno)); - return -1; + ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); + if (ret) { + DRV_LOG(WARNING, + "port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s", + dev->data->port_id, strerror(rte_errno)); + return ret; } link_speed = ethtool_cmd_speed(&edata); if (link_speed == -1) @@ -793,13 +554,13 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & ETH_LINK_SPEED_FIXED); - if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { - /* Link status changed. */ - dev->data->dev_link = dev_link; - return 0; + if ((dev_link.link_speed && !dev_link.link_status) || + (!dev_link.link_speed && dev_link.link_status)) { + rte_errno = EAGAIN; + return -rte_errno; } - /* Link status is still the same. */ - return -1; + *link = dev_link; + return 0; } /** @@ -807,31 +568,41 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) * * @param dev * Pointer to Ethernet device structure. - * @param wait_to_complete - * Wait for request completion (ignored). + * @param[out] link + * Storage for current link status. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) +mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, + struct rte_eth_link *link) + { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS }; struct ifreq ifr; struct rte_eth_link dev_link; uint64_t sc; + int ret; - (void)wait_to_complete; - if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { - WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); - return -1; + ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr); + if (ret) { + DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s", + dev->data->port_id, strerror(rte_errno)); + return ret; } memset(&dev_link, 0, sizeof(dev_link)); dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && (ifr.ifr_flags & IFF_RUNNING)); ifr.ifr_data = (void *)&gcmd; - if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { - DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", - strerror(errno)); - return -1; + ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); + if (ret) { + DRV_LOG(DEBUG, + "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" + " failed: %s", + dev->data->port_id, strerror(rte_errno)); + return ret; } gcmd.link_mode_masks_nwords = -gcmd.link_mode_masks_nwords; @@ -842,10 +613,13 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) *ecmd = gcmd; ifr.ifr_data = (void *)ecmd; - if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { - DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", - strerror(errno)); - return -1; + ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); + if (ret) { + DRV_LOG(DEBUG, + "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)" + " failed: %s", + dev->data->port_id, strerror(rte_errno)); + return ret; } dev_link.link_speed = ecmd->speed; sc = ecmd->link_mode_masks[0] | @@ -889,121 +663,13 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & ETH_LINK_SPEED_FIXED); - if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { - /* Link status changed. */ - dev->data->dev_link = dev_link; - return 0; + if ((dev_link.link_speed && !dev_link.link_status) || + (!dev_link.link_speed && dev_link.link_status)) { + rte_errno = EAGAIN; + return -rte_errno; } - /* Link status is still the same. */ - return -1; -} - -/** - * Enable receiving and transmitting traffic. - * - * @param priv - * Pointer to private structure. - */ -static void -priv_link_start(struct priv *priv) -{ - struct rte_eth_dev *dev = priv->dev; - int err; - - priv_dev_select_tx_function(priv, dev); - priv_dev_select_rx_function(priv, dev); - err = priv_dev_traffic_enable(priv, dev); - if (err) - ERROR("%p: error occurred while configuring control flows: %s", - (void *)priv, strerror(err)); - err = priv_flow_start(priv, &priv->flows); - if (err) - ERROR("%p: error occurred while configuring flows: %s", - (void *)priv, strerror(err)); -} - -/** - * Disable receiving and transmitting traffic. - * - * @param priv - * Pointer to private structure. - */ -static void -priv_link_stop(struct priv *priv) -{ - struct rte_eth_dev *dev = priv->dev; - - priv_flow_stop(priv, &priv->flows); - priv_dev_traffic_disable(priv, dev); - dev->rx_pkt_burst = removed_rx_burst; - dev->tx_pkt_burst = removed_tx_burst; -} - -/** - * Retrieve physical link information and update rx/tx_pkt_burst callbacks - * accordingly. - * - * @param priv - * Pointer to private structure. - * @param wait_to_complete - * Wait for request completion (ignored). - */ -int -priv_link_update(struct priv *priv, int wait_to_complete) -{ - struct rte_eth_dev *dev = priv->dev; - struct utsname utsname; - int ver[3]; - int ret; - struct rte_eth_link dev_link = dev->data->dev_link; - - if (uname(&utsname) == -1 || - sscanf(utsname.release, "%d.%d.%d", - &ver[0], &ver[1], &ver[2]) != 3 || - KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0)) - ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete); - else - ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete); - /* If lsc interrupt is disabled, should always be ready for traffic. */ - if (!dev->data->dev_conf.intr_conf.lsc) { - priv_link_start(priv); - return ret; - } - /* Re-select burst callbacks only if link status has been changed. */ - if (!ret && dev_link.link_status != dev->data->dev_link.link_status) { - if (dev->data->dev_link.link_status == ETH_LINK_UP) - priv_link_start(priv); - else - priv_link_stop(priv); - } - return ret; -} - -/** - * Querying the link status till it changes to the desired state. - * Number of query attempts is bounded by MLX5_MAX_LINK_QUERY_ATTEMPTS. - * - * @param priv - * Pointer to private structure. - * @param status - * Link desired status. - * - * @return - * 0 on success, negative errno value on failure. - */ -int -priv_force_link_status_change(struct priv *priv, int status) -{ - int try = 0; - - while (try < MLX5_MAX_LINK_QUERY_ATTEMPTS) { - priv_link_update(priv, 0); - if (priv->dev->data->dev_link.link_status == status) - return 0; - try++; - sleep(1); - } - return -EAGAIN; + *link = dev_link; + return 0; } /** @@ -1012,17 +678,42 @@ priv_force_link_status_change(struct priv *priv, int status) * @param dev * Pointer to Ethernet device structure. * @param wait_to_complete - * Wait for request completion (ignored). + * Wait for request completion. + * + * @return + * 0 if link status was not updated, positive if it was, a negative errno + * value otherwise and rte_errno is set. */ int mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) { - struct priv *priv = dev->data->dev_private; int ret; + struct rte_eth_link dev_link; + time_t start_time = time(NULL); - priv_lock(priv); - ret = priv_link_update(priv, wait_to_complete); - priv_unlock(priv); + do { + ret = mlx5_link_update_unlocked_gs(dev, &dev_link); + if (ret) + ret = mlx5_link_update_unlocked_gset(dev, &dev_link); + if (ret == 0) + break; + /* Handle wait to complete situation. */ + if (wait_to_complete && ret == -EAGAIN) { + if (abs((int)difftime(time(NULL), start_time)) < + MLX5_LINK_STATUS_TIMEOUT) { + usleep(0); + continue; + } else { + rte_errno = EBUSY; + return -rte_errno; + } + } else if (ret < 0) { + return ret; + } + } while (wait_to_complete); + ret = !!memcmp(&dev->data->dev_link, &dev_link, + sizeof(struct rte_eth_link)); + dev->data->dev_link = dev_link; return ret; } @@ -1035,39 +726,33 @@ mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) * New MTU. * * @return - * 0 on success, negative errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_dev_set_mtu(struct rte_eth_dev *dev, uint16_t mtu) { struct priv *priv = dev->data->dev_private; - uint16_t kern_mtu; - int ret = 0; + uint16_t kern_mtu = 0; + int ret; - priv_lock(priv); - ret = priv_get_mtu(priv, &kern_mtu); + ret = mlx5_get_mtu(dev, &kern_mtu); if (ret) - goto out; + return ret; /* Set kernel interface MTU first. */ - ret = priv_set_mtu(priv, mtu); + ret = mlx5_set_mtu(dev, mtu); if (ret) - goto out; - ret = priv_get_mtu(priv, &kern_mtu); + return ret; + ret = mlx5_get_mtu(dev, &kern_mtu); if (ret) - goto out; + return ret; if (kern_mtu == mtu) { priv->mtu = mtu; - DEBUG("adapter port %u MTU set to %u", priv->port, mtu); + DRV_LOG(DEBUG, "port %u adapter MTU set to %u", + dev->data->port_id, mtu); + return 0; } - priv_unlock(priv); - return 0; -out: - ret = errno; - WARN("cannot set port %u MTU to %u: %s", priv->port, mtu, - strerror(ret)); - priv_unlock(priv); - assert(ret >= 0); - return -ret; + rte_errno = EAGAIN; + return -rte_errno; } /** @@ -1079,12 +764,11 @@ out: * Flow control output buffer. * * @return - * 0 on success, negative errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) { - struct priv *priv = dev->data->dev_private; struct ifreq ifr; struct ethtool_pauseparam ethpause = { .cmd = ETHTOOL_GPAUSEPARAM @@ -1092,15 +776,14 @@ mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) int ret; ifr.ifr_data = (void *)ðpause; - priv_lock(priv); - if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { - ret = errno; - WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)" - " failed: %s", - strerror(ret)); - goto out; + ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); + if (ret) { + DRV_LOG(WARNING, + "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:" + " %s", + dev->data->port_id, strerror(rte_errno)); + return ret; } - fc_conf->autoneg = ethpause.autoneg; if (ethpause.rx_pause && ethpause.tx_pause) fc_conf->mode = RTE_FC_FULL; @@ -1110,12 +793,7 @@ mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) fc_conf->mode = RTE_FC_TX_PAUSE; else fc_conf->mode = RTE_FC_NONE; - ret = 0; - -out: - priv_unlock(priv); - assert(ret >= 0); - return -ret; + return 0; } /** @@ -1127,12 +805,11 @@ out: * Flow control parameters. * * @return - * 0 on success, negative errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) { - struct priv *priv = dev->data->dev_private; struct ifreq ifr; struct ethtool_pauseparam ethpause = { .cmd = ETHTOOL_SPAUSEPARAM @@ -1152,21 +829,15 @@ mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) ethpause.tx_pause = 1; else ethpause.tx_pause = 0; - - priv_lock(priv); - if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { - ret = errno; - WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" - " failed: %s", - strerror(ret)); - goto out; + ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); + if (ret) { + DRV_LOG(WARNING, + "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)" + " failed: %s", + dev->data->port_id, strerror(rte_errno)); + return ret; } - ret = 0; - -out: - priv_unlock(priv); - assert(ret >= 0); - return -ret; + return 0; } /** @@ -1178,7 +849,7 @@ out: * PCI bus address output buffer. * * @return - * 0 on success, -1 on failure and errno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, @@ -1189,8 +860,10 @@ mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, MKSTR(path, "%s/device/uevent", device->ibdev_path); file = fopen(path, "rb"); - if (file == NULL) - return -1; + if (file == NULL) { + rte_errno = errno; + return -rte_errno; + } while (fgets(line, sizeof(line), file) == line) { size_t len = strlen(line); int ret; @@ -1220,46 +893,10 @@ mlx5_ibv_device_to_pci_addr(const struct ibv_device *device, } /** - * Update the link status. - * - * @param priv - * Pointer to private structure. - * - * @return - * Zero if the callback process can be called immediately. - */ -static int -priv_link_status_update(struct priv *priv) -{ - struct rte_eth_link *link = &priv->dev->data->dev_link; - - priv_link_update(priv, 0); - if (((link->link_speed == 0) && link->link_status) || - ((link->link_speed != 0) && !link->link_status)) { - /* - * Inconsistent status. Event likely occurred before the - * kernel netdevice exposes the new status. - */ - if (!priv->pending_alarm) { - priv->pending_alarm = 1; - rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, - mlx5_dev_link_status_handler, - priv->dev); - } - return 1; - } else if (unlikely(priv->pending_alarm)) { - /* Link interrupt occurred while alarm is already scheduled. */ - priv->pending_alarm = 0; - rte_eal_alarm_cancel(mlx5_dev_link_status_handler, priv->dev); - } - return 0; -} - -/** * Device status handler. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param events * Pointer to event flags holder. * @@ -1267,61 +904,37 @@ priv_link_status_update(struct priv *priv) * Events bitmap of callback process which can be called immediately. */ static uint32_t -priv_dev_status_handler(struct priv *priv) +mlx5_dev_status_handler(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; struct ibv_async_event event; uint32_t ret = 0; + if (mlx5_link_update(dev, 0) == -EAGAIN) { + usleep(0); + return 0; + } /* Read all message and acknowledge them. */ for (;;) { if (ibv_get_async_event(priv->ctx, &event)) break; if ((event.event_type == IBV_EVENT_PORT_ACTIVE || event.event_type == IBV_EVENT_PORT_ERR) && - (priv->dev->data->dev_conf.intr_conf.lsc == 1)) + (dev->data->dev_conf.intr_conf.lsc == 1)) ret |= (1 << RTE_ETH_EVENT_INTR_LSC); else if (event.event_type == IBV_EVENT_DEVICE_FATAL && - priv->dev->data->dev_conf.intr_conf.rmv == 1) + dev->data->dev_conf.intr_conf.rmv == 1) ret |= (1 << RTE_ETH_EVENT_INTR_RMV); else - DEBUG("event type %d on port %d not handled", - event.event_type, event.element.port_num); + DRV_LOG(DEBUG, + "port %u event type %d on not handled", + dev->data->port_id, event.event_type); ibv_ack_async_event(&event); } - if (ret & (1 << RTE_ETH_EVENT_INTR_LSC)) - if (priv_link_status_update(priv)) - ret &= ~(1 << RTE_ETH_EVENT_INTR_LSC); return ret; } /** - * Handle delayed link status event. - * - * @param arg - * Registered argument. - */ -void -mlx5_dev_link_status_handler(void *arg) -{ - struct rte_eth_dev *dev = arg; - struct priv *priv = dev->data->dev_private; - int ret; - - while (!priv_trylock(priv)) { - /* Alarm is being canceled. */ - if (priv->pending_alarm == 0) - return; - rte_pause(); - } - priv->pending_alarm = 0; - ret = priv_link_status_update(priv); - priv_unlock(priv); - if (!ret) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL, - NULL); -} - -/** * Handle interrupts from the NIC. * * @param[in] intr_handle @@ -1333,12 +946,9 @@ void mlx5_dev_interrupt_handler(void *cb_arg) { struct rte_eth_dev *dev = cb_arg; - struct priv *priv = dev->data->dev_private; uint32_t events; - priv_lock(priv); - events = priv_dev_status_handler(priv); - priv_unlock(priv); + events = mlx5_dev_status_handler(dev); if (events & (1 << RTE_ETH_EVENT_INTR_LSC)) _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL, NULL); @@ -1357,24 +967,21 @@ static void mlx5_dev_handler_socket(void *cb_arg) { struct rte_eth_dev *dev = cb_arg; - struct priv *priv = dev->data->dev_private; - priv_lock(priv); - priv_socket_handle(priv); - priv_unlock(priv); + mlx5_socket_handle(dev); } /** * Uninstall interrupt handler. * - * @param priv - * Pointer to private structure. * @param dev - * Pointer to the rte_eth_dev structure. + * Pointer to Ethernet device. */ void -priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev) +mlx5_dev_interrupt_handler_uninstall(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; + if (dev->data->dev_conf.intr_conf.lsc || dev->data->dev_conf.intr_conf.rmv) rte_intr_callback_unregister(&priv->intr_handle, @@ -1382,10 +989,6 @@ priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev) if (priv->primary_socket) rte_intr_callback_unregister(&priv->intr_handle_socket, mlx5_dev_handler_socket, dev); - if (priv->pending_alarm) { - priv->pending_alarm = 0; - rte_eal_alarm_cancel(mlx5_dev_link_status_handler, dev); - } priv->intr_handle.fd = 0; priv->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; priv->intr_handle_socket.fd = 0; @@ -1395,21 +998,24 @@ priv_dev_interrupt_handler_uninstall(struct priv *priv, struct rte_eth_dev *dev) /** * Install interrupt handler. * - * @param priv - * Pointer to private structure. * @param dev - * Pointer to the rte_eth_dev structure. + * Pointer to Ethernet device. */ void -priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) +mlx5_dev_interrupt_handler_install(struct rte_eth_dev *dev) { - int rc, flags; + struct priv *priv = dev->data->dev_private; + int ret; + int flags; assert(priv->ctx->async_fd > 0); flags = fcntl(priv->ctx->async_fd, F_GETFL); - rc = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); - if (rc < 0) { - INFO("failed to change file descriptor async event queue"); + ret = fcntl(priv->ctx->async_fd, F_SETFL, flags | O_NONBLOCK); + if (ret) { + DRV_LOG(INFO, + "port %u failed to change file descriptor async event" + " queue", + dev->data->port_id); dev->data->dev_conf.intr_conf.lsc = 0; dev->data->dev_conf.intr_conf.rmv = 0; } @@ -1420,9 +1026,11 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) rte_intr_callback_register(&priv->intr_handle, mlx5_dev_interrupt_handler, dev); } - - rc = priv_socket_init(priv); - if (!rc && priv->primary_socket) { + ret = mlx5_socket_init(dev); + if (ret) + DRV_LOG(ERR, "port %u cannot initialise socket: %s", + dev->data->port_id, strerror(rte_errno)); + else if (priv->primary_socket) { priv->intr_handle_socket.fd = priv->primary_socket; priv->intr_handle_socket.type = RTE_INTR_HANDLE_EXT; rte_intr_callback_register(&priv->intr_handle_socket, @@ -1431,41 +1039,18 @@ priv_dev_interrupt_handler_install(struct priv *priv, struct rte_eth_dev *dev) } /** - * Change the link state (UP / DOWN). - * - * @param priv - * Pointer to private data structure. - * @param up - * Nonzero for link up, otherwise link down. - * - * @return - * 0 on success, errno value on failure. - */ -static int -priv_dev_set_link(struct priv *priv, int up) -{ - return priv_set_flags(priv, ~IFF_UP, up ? IFF_UP : ~IFF_UP); -} - -/** * DPDK callback to bring the link DOWN. * * @param dev * Pointer to Ethernet device structure. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_set_link_down(struct rte_eth_dev *dev) { - struct priv *priv = dev->data->dev_private; - int err; - - priv_lock(priv); - err = priv_dev_set_link(priv, 0); - priv_unlock(priv); - return err; + return mlx5_set_flags(dev, ~IFF_UP, ~IFF_UP); } /** @@ -1475,72 +1060,77 @@ mlx5_set_link_down(struct rte_eth_dev *dev) * Pointer to Ethernet device structure. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_set_link_up(struct rte_eth_dev *dev) { - struct priv *priv = dev->data->dev_private; - int err; - - priv_lock(priv); - err = priv_dev_set_link(priv, 1); - priv_unlock(priv); - return err; + return mlx5_set_flags(dev, ~IFF_UP, IFF_UP); } /** * Configure the TX function to use. * - * @param priv - * Pointer to private data structure. * @param dev * Pointer to rte_eth_dev structure. + * + * @return + * Pointer to selected Tx burst function. */ -void -priv_dev_select_tx_function(struct priv *priv, struct rte_eth_dev *dev) +eth_tx_burst_t +mlx5_select_tx_function(struct rte_eth_dev *dev) { - assert(priv != NULL); - assert(dev != NULL); - dev->tx_pkt_burst = mlx5_tx_burst; + struct priv *priv = dev->data->dev_private; + eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst; + /* Select appropriate TX function. */ if (priv->mps == MLX5_MPW_ENHANCED) { - if (priv_check_vec_tx_support(priv) > 0) { - if (priv_check_raw_vec_tx_support(priv) > 0) - dev->tx_pkt_burst = mlx5_tx_burst_raw_vec; + if (mlx5_check_vec_tx_support(dev) > 0) { + if (mlx5_check_raw_vec_tx_support(dev) > 0) + tx_pkt_burst = mlx5_tx_burst_raw_vec; else - dev->tx_pkt_burst = mlx5_tx_burst_vec; - DEBUG("selected Enhanced MPW TX vectorized function"); + tx_pkt_burst = mlx5_tx_burst_vec; + DRV_LOG(DEBUG, + "port %u selected enhanced MPW Tx vectorized" + " function", + dev->data->port_id); } else { - dev->tx_pkt_burst = mlx5_tx_burst_empw; - DEBUG("selected Enhanced MPW TX function"); + tx_pkt_burst = mlx5_tx_burst_empw; + DRV_LOG(DEBUG, + "port %u selected enhanced MPW Tx function", + dev->data->port_id); } } else if (priv->mps && priv->txq_inline) { - dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline; - DEBUG("selected MPW inline TX function"); + tx_pkt_burst = mlx5_tx_burst_mpw_inline; + DRV_LOG(DEBUG, "port %u selected MPW inline Tx function", + dev->data->port_id); } else if (priv->mps) { - dev->tx_pkt_burst = mlx5_tx_burst_mpw; - DEBUG("selected MPW TX function"); + tx_pkt_burst = mlx5_tx_burst_mpw; + DRV_LOG(DEBUG, "port %u selected MPW Tx function", + dev->data->port_id); } + return tx_pkt_burst; } /** * Configure the RX function to use. * - * @param priv - * Pointer to private data structure. * @param dev * Pointer to rte_eth_dev structure. + * + * @return + * Pointer to selected Rx burst function. */ -void -priv_dev_select_rx_function(struct priv *priv, struct rte_eth_dev *dev) +eth_rx_burst_t +mlx5_select_rx_function(struct rte_eth_dev *dev) { - assert(priv != NULL); + eth_rx_burst_t rx_pkt_burst = mlx5_rx_burst; + assert(dev != NULL); - if (priv_check_vec_rx_support(priv) > 0) { - dev->rx_pkt_burst = mlx5_rx_burst_vec; - DEBUG("selected RX vectorized function"); - } else { - dev->rx_pkt_burst = mlx5_rx_burst; + if (mlx5_check_vec_rx_support(dev) > 0) { + rx_pkt_burst = mlx5_rx_burst_vec; + DRV_LOG(DEBUG, "port %u selected Rx vectorized function", + dev->data->port_id); } + return rx_pkt_burst; } diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index 092644ff..57b654c3 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -44,10 +44,12 @@ #pragma GCC diagnostic error "-Wpedantic" #endif +#include <rte_common.h> #include <rte_ethdev.h> #include <rte_flow.h> #include <rte_flow_driver.h> #include <rte_malloc.h> +#include <rte_ip.h> #include "mlx5.h" #include "mlx5_defs.h" @@ -83,40 +85,46 @@ ibv_destroy_counter_set(struct ibv_counter_set *cs) extern const struct eth_dev_ops mlx5_dev_ops; extern const struct eth_dev_ops mlx5_dev_ops_isolate; +/** Structure give to the conversion functions. */ +struct mlx5_flow_data { + struct mlx5_flow_parse *parser; /** Parser context. */ + struct rte_flow_error *error; /** Error context. */ +}; + static int mlx5_flow_create_eth(const struct rte_flow_item *item, const void *default_mask, - void *data); + struct mlx5_flow_data *data); static int mlx5_flow_create_vlan(const struct rte_flow_item *item, const void *default_mask, - void *data); + struct mlx5_flow_data *data); static int mlx5_flow_create_ipv4(const struct rte_flow_item *item, const void *default_mask, - void *data); + struct mlx5_flow_data *data); static int mlx5_flow_create_ipv6(const struct rte_flow_item *item, const void *default_mask, - void *data); + struct mlx5_flow_data *data); static int mlx5_flow_create_udp(const struct rte_flow_item *item, const void *default_mask, - void *data); + struct mlx5_flow_data *data); static int mlx5_flow_create_tcp(const struct rte_flow_item *item, const void *default_mask, - void *data); + struct mlx5_flow_data *data); static int mlx5_flow_create_vxlan(const struct rte_flow_item *item, const void *default_mask, - void *data); + struct mlx5_flow_data *data); struct mlx5_flow_parse; @@ -128,7 +136,7 @@ static int mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id); static int -mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser); +mlx5_flow_create_count(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser); /* Hash RX queue types. */ enum hash_rxq_type { @@ -157,7 +165,7 @@ const struct hash_rxq_init hash_rxq_init[] = { IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP), .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP, - .flow_priority = 0, + .flow_priority = 1, .ip_version = MLX5_IPV4, }, [HASH_RXQ_UDPV4] = { @@ -166,7 +174,7 @@ const struct hash_rxq_init hash_rxq_init[] = { IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP), .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP, - .flow_priority = 0, + .flow_priority = 1, .ip_version = MLX5_IPV4, }, [HASH_RXQ_IPV4] = { @@ -174,7 +182,7 @@ const struct hash_rxq_init hash_rxq_init[] = { IBV_RX_HASH_DST_IPV4), .dpdk_rss_hf = (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4), - .flow_priority = 1, + .flow_priority = 2, .ip_version = MLX5_IPV4, }, [HASH_RXQ_TCPV6] = { @@ -183,7 +191,7 @@ const struct hash_rxq_init hash_rxq_init[] = { IBV_RX_HASH_SRC_PORT_TCP | IBV_RX_HASH_DST_PORT_TCP), .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP, - .flow_priority = 0, + .flow_priority = 1, .ip_version = MLX5_IPV6, }, [HASH_RXQ_UDPV6] = { @@ -192,7 +200,7 @@ const struct hash_rxq_init hash_rxq_init[] = { IBV_RX_HASH_SRC_PORT_UDP | IBV_RX_HASH_DST_PORT_UDP), .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP, - .flow_priority = 0, + .flow_priority = 1, .ip_version = MLX5_IPV6, }, [HASH_RXQ_IPV6] = { @@ -200,13 +208,13 @@ const struct hash_rxq_init hash_rxq_init[] = { IBV_RX_HASH_DST_IPV6), .dpdk_rss_hf = (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6), - .flow_priority = 1, + .flow_priority = 2, .ip_version = MLX5_IPV6, }, [HASH_RXQ_ETH] = { .hash_fields = 0, .dpdk_rss_hf = 0, - .flow_priority = 2, + .flow_priority = 3, }, }; @@ -286,11 +294,12 @@ struct mlx5_flow_items { * Internal structure to store the conversion. * * @return - * 0 on success, negative value otherwise. + * 0 on success, a negative errno value otherwise and rte_errno is + * set. */ int (*convert)(const struct rte_flow_item *item, const void *default_mask, - void *data); + struct mlx5_flow_data *data); /** Size in bytes of the destination structure. */ const unsigned int dst_sz; /** List of possible following items. */ @@ -475,9 +484,17 @@ struct mlx5_fdir { struct rte_flow_item_ipv6 ipv6; } l3; union { + struct rte_flow_item_ipv4 ipv4; + struct rte_flow_item_ipv6 ipv6; + } l3_mask; + union { struct rte_flow_item_udp udp; struct rte_flow_item_tcp tcp; } l4; + union { + struct rte_flow_item_udp udp; + struct rte_flow_item_tcp tcp; + } l4_mask; struct rte_flow_action_queue queue; }; @@ -488,7 +505,7 @@ struct ibv_spec_header { }; /** - * Check support for a given item. + * Check item is fully supported by the NIC matching capability. * * @param item[in] * Item specification. @@ -499,87 +516,71 @@ struct ibv_spec_header { * Bit-Mask size in bytes. * * @return - * 0 on success. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int mlx5_flow_item_validate(const struct rte_flow_item *item, const uint8_t *mask, unsigned int size) { - int ret = 0; - - if (!item->spec && (item->mask || item->last)) - return -1; - if (item->spec && !item->mask) { - unsigned int i; - const uint8_t *spec = item->spec; - - for (i = 0; i < size; ++i) - if ((spec[i] | mask[i]) != mask[i]) - return -1; - } - if (item->last && !item->mask) { - unsigned int i; - const uint8_t *spec = item->last; - - for (i = 0; i < size; ++i) - if ((spec[i] | mask[i]) != mask[i]) - return -1; - } - if (item->mask) { - unsigned int i; - const uint8_t *spec = item->spec; - - for (i = 0; i < size; ++i) - if ((spec[i] | mask[i]) != mask[i]) - return -1; - } - if (item->spec && item->last) { - uint8_t spec[size]; - uint8_t last[size]; - const uint8_t *apply = mask; - unsigned int i; + unsigned int i; + const uint8_t *spec = item->spec; + const uint8_t *last = item->last; + const uint8_t *m = item->mask ? item->mask : mask; - if (item->mask) - apply = item->mask; - for (i = 0; i < size; ++i) { - spec[i] = ((const uint8_t *)item->spec)[i] & apply[i]; - last[i] = ((const uint8_t *)item->last)[i] & apply[i]; - } - ret = memcmp(spec, last, size); + if (!spec && (item->mask || last)) + goto error; + if (!spec) + return 0; + /* + * Single-pass check to make sure that: + * - item->mask is supported, no bits are set outside mask. + * - Both masked item->spec and item->last are equal (no range + * supported). + */ + for (i = 0; i < size; i++) { + if (!m[i]) + continue; + if ((m[i] | mask[i]) != mask[i]) + goto error; + if (last && ((spec[i] & m[i]) != (last[i] & m[i]))) + goto error; } - return ret; + return 0; +error: + rte_errno = ENOTSUP; + return -rte_errno; } /** * Copy the RSS configuration from the user ones, of the rss_conf is null, * uses the driver one. * - * @param priv - * Pointer to private structure. * @param parser * Internal parser structure. * @param rss_conf * User RSS configuration to save. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_flow_convert_rss_conf(struct priv *priv, - struct mlx5_flow_parse *parser, +mlx5_flow_convert_rss_conf(struct mlx5_flow_parse *parser, const struct rte_eth_rss_conf *rss_conf) { /* * This function is also called at the beginning of - * priv_flow_convert_actions() to initialize the parser with the + * mlx5_flow_convert_actions() to initialize the parser with the * device default RSS configuration. */ - (void)priv; if (rss_conf) { - if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) - return EINVAL; - if (rss_conf->rss_key_len != 40) - return EINVAL; + if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) { + rte_errno = EINVAL; + return -rte_errno; + } + if (rss_conf->rss_key_len != 40) { + rte_errno = EINVAL; + return -rte_errno; + } if (rss_conf->rss_key_len && rss_conf->rss_key) { parser->rss_conf.rss_key_len = rss_conf->rss_key_len; memcpy(parser->rss_key, rss_conf->rss_key, @@ -594,26 +595,18 @@ priv_flow_convert_rss_conf(struct priv *priv, /** * Extract attribute to the parser. * - * @param priv - * Pointer to private structure. * @param[in] attr * Flow rule attributes. * @param[out] error * Perform verbose error reporting if not NULL. - * @param[in, out] parser - * Internal parser structure. * * @return * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_flow_convert_attributes(struct priv *priv, - const struct rte_flow_attr *attr, - struct rte_flow_error *error, - struct mlx5_flow_parse *parser) +mlx5_flow_convert_attributes(const struct rte_flow_attr *attr, + struct rte_flow_error *error) { - (void)priv; - (void)parser; if (attr->group) { rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP, @@ -648,8 +641,8 @@ priv_flow_convert_attributes(struct priv *priv, /** * Extract actions request to the parser. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param[in] actions * Associated actions (list terminated by the END action). * @param[out] error @@ -661,18 +654,23 @@ priv_flow_convert_attributes(struct priv *priv, * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_flow_convert_actions(struct priv *priv, +mlx5_flow_convert_actions(struct rte_eth_dev *dev, const struct rte_flow_action actions[], struct rte_flow_error *error, struct mlx5_flow_parse *parser) { + struct priv *priv = dev->data->dev_private; + int ret; + /* * Add default RSS configuration necessary for Verbs to create QP even * if no RSS is necessary. */ - priv_flow_convert_rss_conf(priv, parser, - (const struct rte_eth_rss_conf *) - &priv->rss_conf); + ret = mlx5_flow_convert_rss_conf(parser, + (const struct rte_eth_rss_conf *) + &priv->rss_conf); + if (ret) + return ret; for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) { if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) { continue; @@ -737,6 +735,14 @@ priv_flow_convert_actions(struct priv *priv, return -rte_errno; } } + if (rss->num > RTE_DIM(parser->queues)) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, + "too many queues for RSS" + " context"); + return -rte_errno; + } for (n = 0; n < rss->num; ++n) { if (rss->queue[n] >= priv->rxqs_n) { rte_flow_error_set(error, EINVAL, @@ -750,8 +756,7 @@ priv_flow_convert_actions(struct priv *priv, for (n = 0; n < rss->num; ++n) parser->queues[n] = rss->queue[n]; parser->queues_n = rss->num; - if (priv_flow_convert_rss_conf(priv, parser, - rss->rss_conf)) { + if (mlx5_flow_convert_rss_conf(parser, rss->rss_conf)) { rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, actions, @@ -805,8 +810,6 @@ exit_action_not_supported: /** * Validate items. * - * @param priv - * Pointer to private structure. * @param[in] items * Pattern specification (list terminated by the END pattern item). * @param[out] error @@ -818,22 +821,20 @@ exit_action_not_supported: * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_flow_convert_items_validate(struct priv *priv, - const struct rte_flow_item items[], +mlx5_flow_convert_items_validate(const struct rte_flow_item items[], struct rte_flow_error *error, struct mlx5_flow_parse *parser) { const struct mlx5_flow_items *cur_item = mlx5_flow_items; unsigned int i; + int ret = 0; - (void)priv; /* Initialise the offsets to start after verbs attribute. */ for (i = 0; i != hash_rxq_init_n; ++i) parser->queue[i].offset = sizeof(struct ibv_flow_attr); for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) { const struct mlx5_flow_items *token = NULL; unsigned int n; - int err; if (items->type == RTE_FLOW_ITEM_TYPE_VOID) continue; @@ -846,13 +847,15 @@ priv_flow_convert_items_validate(struct priv *priv, break; } } - if (!token) + if (!token) { + ret = -ENOTSUP; goto exit_item_not_supported; + } cur_item = token; - err = mlx5_flow_item_validate(items, + ret = mlx5_flow_item_validate(items, (const uint8_t *)cur_item->mask, cur_item->mask_sz); - if (err) + if (ret) goto exit_item_not_supported; if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) { if (parser->inner) { @@ -889,57 +892,76 @@ priv_flow_convert_items_validate(struct priv *priv, } return 0; exit_item_not_supported: - rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, - items, "item not supported"); - return -rte_errno; + return rte_flow_error_set(error, -ret, RTE_FLOW_ERROR_TYPE_ITEM, + items, "item not supported"); } /** * Allocate memory space to store verbs flow attributes. * - * @param priv - * Pointer to private structure. - * @param[in] priority - * Flow priority. * @param[in] size * Amount of byte to allocate. * @param[out] error * Perform verbose error reporting if not NULL. * * @return - * A verbs flow attribute on success, NULL otherwise. + * A verbs flow attribute on success, NULL otherwise and rte_errno is set. */ -static struct ibv_flow_attr* -priv_flow_convert_allocate(struct priv *priv, - unsigned int priority, - unsigned int size, - struct rte_flow_error *error) +static struct ibv_flow_attr * +mlx5_flow_convert_allocate(unsigned int size, struct rte_flow_error *error) { struct ibv_flow_attr *ibv_attr; - (void)priv; ibv_attr = rte_calloc(__func__, 1, size, 0); if (!ibv_attr) { rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, - "cannot allocate verbs spec attributes."); + "cannot allocate verbs spec attributes"); return NULL; } - ibv_attr->priority = priority; return ibv_attr; } /** + * Make inner packet matching with an higher priority from the non Inner + * matching. + * + * @param[in, out] parser + * Internal parser structure. + * @param attr + * User flow attribute. + */ +static void +mlx5_flow_update_priority(struct mlx5_flow_parse *parser, + const struct rte_flow_attr *attr) +{ + unsigned int i; + + if (parser->drop) { + parser->queue[HASH_RXQ_ETH].ibv_attr->priority = + attr->priority + + hash_rxq_init[HASH_RXQ_ETH].flow_priority; + return; + } + for (i = 0; i != hash_rxq_init_n; ++i) { + if (parser->queue[i].ibv_attr) { + parser->queue[i].ibv_attr->priority = + attr->priority + + hash_rxq_init[i].flow_priority - + (parser->inner ? 1 : 0); + } + } +} + +/** * Finalise verbs flow attributes. * - * @param priv - * Pointer to private structure. * @param[in, out] parser * Internal parser structure. */ static void -priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser) +mlx5_flow_convert_finalise(struct mlx5_flow_parse *parser) { const unsigned int ipv4 = hash_rxq_init[parser->layer].ip_version == MLX5_IPV4; @@ -950,7 +972,16 @@ priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser) const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6; unsigned int i; - (void)priv; + /* Remove any other flow not matching the pattern. */ + if (parser->queues_n == 1 && !parser->rss_conf.rss_hf) { + for (i = 0; i != hash_rxq_init_n; ++i) { + if (i == HASH_RXQ_ETH) + continue; + rte_free(parser->queue[i].ibv_attr); + parser->queue[i].ibv_attr = NULL; + } + return; + } if (parser->layer == HASH_RXQ_ETH) { goto fill; } else { @@ -1049,8 +1080,8 @@ fill: /** * Validate and convert a flow supported by the NIC. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param[in] attr * Flow rule attributes. * @param[in] pattern @@ -1066,7 +1097,7 @@ fill: * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_flow_convert(struct priv *priv, +mlx5_flow_convert(struct rte_eth_dev *dev, const struct rte_flow_attr *attr, const struct rte_flow_item items[], const struct rte_flow_action actions[], @@ -1083,35 +1114,31 @@ priv_flow_convert(struct priv *priv, .layer = HASH_RXQ_ETH, .mark_id = MLX5_FLOW_MARK_DEFAULT, }; - ret = priv_flow_convert_attributes(priv, attr, error, parser); + ret = mlx5_flow_convert_attributes(attr, error); if (ret) return ret; - ret = priv_flow_convert_actions(priv, actions, error, parser); + ret = mlx5_flow_convert_actions(dev, actions, error, parser); if (ret) return ret; - ret = priv_flow_convert_items_validate(priv, items, error, parser); + ret = mlx5_flow_convert_items_validate(items, error, parser); if (ret) return ret; - priv_flow_convert_finalise(priv, parser); + mlx5_flow_convert_finalise(parser); /* * Second step. * Allocate the memory space to store verbs specifications. */ if (parser->drop) { + unsigned int offset = parser->queue[HASH_RXQ_ETH].offset; + parser->queue[HASH_RXQ_ETH].ibv_attr = - priv_flow_convert_allocate - (priv, attr->priority, - parser->queue[HASH_RXQ_ETH].offset, - error); + mlx5_flow_convert_allocate(offset, error); if (!parser->queue[HASH_RXQ_ETH].ibv_attr) - return ENOMEM; + goto exit_enomem; parser->queue[HASH_RXQ_ETH].offset = sizeof(struct ibv_flow_attr); } else { for (i = 0; i != hash_rxq_init_n; ++i) { - unsigned int priority = - attr->priority + - hash_rxq_init[i].flow_priority; unsigned int offset; if (!(parser->rss_conf.rss_hf & @@ -1120,8 +1147,7 @@ priv_flow_convert(struct priv *priv, continue; offset = parser->queue[i].offset; parser->queue[i].ibv_attr = - priv_flow_convert_allocate(priv, priority, - offset, error); + mlx5_flow_convert_allocate(offset, error); if (!parser->queue[i].ibv_attr) goto exit_enomem; parser->queue[i].offset = sizeof(struct ibv_flow_attr); @@ -1130,6 +1156,11 @@ priv_flow_convert(struct priv *priv, /* Third step. Conversion parse, fill the specifications. */ parser->inner = 0; for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) { + struct mlx5_flow_data data = { + .parser = parser, + .error = error, + }; + if (items->type == RTE_FLOW_ITEM_TYPE_VOID) continue; cur_item = &mlx5_flow_items[items->type]; @@ -1137,18 +1168,14 @@ priv_flow_convert(struct priv *priv, (cur_item->default_mask ? cur_item->default_mask : cur_item->mask), - parser); - if (ret) { - rte_flow_error_set(error, ret, - RTE_FLOW_ERROR_TYPE_ITEM, - items, "item not supported"); + &data); + if (ret) goto exit_free; - } } if (parser->mark) mlx5_flow_create_flag_mark(parser, parser->mark_id); if (parser->count && parser->create) { - mlx5_flow_create_count(priv, parser); + mlx5_flow_create_count(dev, parser); if (!parser->cs) goto exit_count_error; } @@ -1156,13 +1183,9 @@ priv_flow_convert(struct priv *priv, * Last step. Complete missing specification to reach the RSS * configuration. */ - if (!parser->drop) { - priv_flow_convert_finalise(priv, parser); - } else { - parser->queue[HASH_RXQ_ETH].ibv_attr->priority = - attr->priority + - hash_rxq_init[parser->layer].flow_priority; - } + if (!parser->drop) + mlx5_flow_convert_finalise(parser); + mlx5_flow_update_priority(parser, attr); exit_free: /* Only verification is expected, all resources should be released. */ if (!parser->create) { @@ -1181,13 +1204,13 @@ exit_enomem: parser->queue[i].ibv_attr = NULL; } } - rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, "cannot allocate verbs spec attributes."); - return ret; + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, "cannot allocate verbs spec attributes"); + return -rte_errno; exit_count_error: rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, "cannot create counter."); - return rte_errno; + NULL, "cannot create counter"); + return -rte_errno; } /** @@ -1233,15 +1256,18 @@ mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src, * Default bit-masks to use when item->mask is not provided. * @param data[in, out] * User structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int mlx5_flow_create_eth(const struct rte_flow_item *item, const void *default_mask, - void *data) + struct mlx5_flow_data *data) { const struct rte_flow_item_eth *spec = item->spec; const struct rte_flow_item_eth *mask = item->mask; - struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data; + struct mlx5_flow_parse *parser = data->parser; const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth); struct ibv_flow_spec_eth eth = { .type = parser->inner | IBV_FLOW_SPEC_ETH, @@ -1282,15 +1308,18 @@ mlx5_flow_create_eth(const struct rte_flow_item *item, * Default bit-masks to use when item->mask is not provided. * @param data[in, out] * User structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int mlx5_flow_create_vlan(const struct rte_flow_item *item, const void *default_mask, - void *data) + struct mlx5_flow_data *data) { const struct rte_flow_item_vlan *spec = item->spec; const struct rte_flow_item_vlan *mask = item->mask; - struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data; + struct mlx5_flow_parse *parser = data->parser; struct ibv_flow_spec_eth *eth; const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth); @@ -1308,9 +1337,18 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item, eth->val.vlan_tag = spec->tci; eth->mask.vlan_tag = mask->tci; eth->val.vlan_tag &= eth->mask.vlan_tag; + /* + * From verbs perspective an empty VLAN is equivalent + * to a packet without VLAN layer. + */ + if (!eth->mask.vlan_tag) + goto error; } + return 0; } - return 0; +error: + return rte_flow_error_set(data->error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, + item, "VLAN cannot be empty"); } /** @@ -1322,15 +1360,18 @@ mlx5_flow_create_vlan(const struct rte_flow_item *item, * Default bit-masks to use when item->mask is not provided. * @param data[in, out] * User structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int mlx5_flow_create_ipv4(const struct rte_flow_item *item, const void *default_mask, - void *data) + struct mlx5_flow_data *data) { const struct rte_flow_item_ipv4 *spec = item->spec; const struct rte_flow_item_ipv4 *mask = item->mask; - struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data; + struct mlx5_flow_parse *parser = data->parser; unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext); struct ibv_flow_spec_ipv4_ext ipv4 = { .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT, @@ -1374,15 +1415,18 @@ mlx5_flow_create_ipv4(const struct rte_flow_item *item, * Default bit-masks to use when item->mask is not provided. * @param data[in, out] * User structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int mlx5_flow_create_ipv6(const struct rte_flow_item *item, const void *default_mask, - void *data) + struct mlx5_flow_data *data) { const struct rte_flow_item_ipv6 *spec = item->spec; const struct rte_flow_item_ipv6 *mask = item->mask; - struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data; + struct mlx5_flow_parse *parser = data->parser; unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6); struct ibv_flow_spec_ipv6 ipv6 = { .type = parser->inner | IBV_FLOW_SPEC_IPV6, @@ -1394,6 +1438,8 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item, parser->layer = HASH_RXQ_IPV6; if (spec) { unsigned int i; + uint32_t vtc_flow_val; + uint32_t vtc_flow_mask; if (!mask) mask = default_mask; @@ -1405,7 +1451,20 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item, RTE_DIM(ipv6.mask.src_ip)); memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr, RTE_DIM(ipv6.mask.dst_ip)); - ipv6.mask.flow_label = mask->hdr.vtc_flow; + vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow); + vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow); + ipv6.val.flow_label = + rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >> + IPV6_HDR_FL_SHIFT); + ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >> + IPV6_HDR_TC_SHIFT; + ipv6.val.next_hdr = spec->hdr.proto; + ipv6.val.hop_limit = spec->hdr.hop_limits; + ipv6.mask.flow_label = + rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >> + IPV6_HDR_FL_SHIFT); + ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >> + IPV6_HDR_TC_SHIFT; ipv6.mask.next_hdr = mask->hdr.proto; ipv6.mask.hop_limit = mask->hdr.hop_limits; /* Remove unwanted bits from values. */ @@ -1414,6 +1473,7 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item, ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i]; } ipv6.val.flow_label &= ipv6.mask.flow_label; + ipv6.val.traffic_class &= ipv6.mask.traffic_class; ipv6.val.next_hdr &= ipv6.mask.next_hdr; ipv6.val.hop_limit &= ipv6.mask.hop_limit; } @@ -1430,15 +1490,18 @@ mlx5_flow_create_ipv6(const struct rte_flow_item *item, * Default bit-masks to use when item->mask is not provided. * @param data[in, out] * User structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int mlx5_flow_create_udp(const struct rte_flow_item *item, const void *default_mask, - void *data) + struct mlx5_flow_data *data) { const struct rte_flow_item_udp *spec = item->spec; const struct rte_flow_item_udp *mask = item->mask; - struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data; + struct mlx5_flow_parse *parser = data->parser; unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp); struct ibv_flow_spec_tcp_udp udp = { .type = parser->inner | IBV_FLOW_SPEC_UDP, @@ -1476,15 +1539,18 @@ mlx5_flow_create_udp(const struct rte_flow_item *item, * Default bit-masks to use when item->mask is not provided. * @param data[in, out] * User structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int mlx5_flow_create_tcp(const struct rte_flow_item *item, const void *default_mask, - void *data) + struct mlx5_flow_data *data) { const struct rte_flow_item_tcp *spec = item->spec; const struct rte_flow_item_tcp *mask = item->mask; - struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data; + struct mlx5_flow_parse *parser = data->parser; unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp); struct ibv_flow_spec_tcp_udp tcp = { .type = parser->inner | IBV_FLOW_SPEC_TCP, @@ -1522,15 +1588,18 @@ mlx5_flow_create_tcp(const struct rte_flow_item *item, * Default bit-masks to use when item->mask is not provided. * @param data[in, out] * User structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int mlx5_flow_create_vxlan(const struct rte_flow_item *item, const void *default_mask, - void *data) + struct mlx5_flow_data *data) { const struct rte_flow_item_vxlan *spec = item->spec; const struct rte_flow_item_vxlan *mask = item->mask; - struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data; + struct mlx5_flow_parse *parser = data->parser; unsigned int size = sizeof(struct ibv_flow_spec_tunnel); struct ibv_flow_spec_tunnel vxlan = { .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL, @@ -1562,7 +1631,10 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item, * To avoid such situation, VNI 0 is currently refused. */ if (!vxlan.val.tunnel_id) - return EINVAL; + return rte_flow_error_set(data->error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "VxLAN vni cannot be 0"); mlx5_flow_create_copy(parser, &vxlan, size); return 0; } @@ -1574,6 +1646,9 @@ mlx5_flow_create_vxlan(const struct rte_flow_item *item, * Internal parser structure. * @param mark_id * Mark identifier. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id) @@ -1593,19 +1668,20 @@ mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id) /** * Convert count action to Verbs specification. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param parser * Pointer to MLX5 flow parser structure. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -mlx5_flow_create_count(struct priv *priv __rte_unused, +mlx5_flow_create_count(struct rte_eth_dev *dev __rte_unused, struct mlx5_flow_parse *parser __rte_unused) { #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT + struct priv *priv = dev->data->dev_private; unsigned int size = sizeof(struct ibv_flow_spec_counter_action); struct ibv_counter_set_init_attr init_attr = {0}; struct ibv_flow_spec_counter_action counter = { @@ -1616,8 +1692,10 @@ mlx5_flow_create_count(struct priv *priv __rte_unused, init_attr.counter_set_id = 0; parser->cs = ibv_create_counter_set(priv->ctx, &init_attr); - if (!parser->cs) - return EINVAL; + if (!parser->cs) { + rte_errno = EINVAL; + return -rte_errno; + } counter.counter_set_handle = parser->cs->handle; mlx5_flow_create_copy(parser, &counter, size); #endif @@ -1627,8 +1705,8 @@ mlx5_flow_create_count(struct priv *priv __rte_unused, /** * Complete flow rule creation with a drop queue. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param parser * Internal parser structure. * @param flow @@ -1637,17 +1715,17 @@ mlx5_flow_create_count(struct priv *priv __rte_unused, * Perform verbose error reporting if not NULL. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_flow_create_action_queue_drop(struct priv *priv, +mlx5_flow_create_action_queue_drop(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser, struct rte_flow *flow, struct rte_flow_error *error) { + struct priv *priv = dev->data->dev_private; struct ibv_flow_spec_action_drop *drop; unsigned int size = sizeof(struct ibv_flow_spec_action_drop); - int err = 0; assert(priv->pd); assert(priv->ctx); @@ -1664,7 +1742,7 @@ priv_flow_create_action_queue_drop(struct priv *priv, parser->queue[HASH_RXQ_ETH].ibv_attr; if (parser->count) flow->cs = parser->cs; - if (!priv->dev->data->dev_started) + if (!dev->data->dev_started) return 0; parser->queue[HASH_RXQ_ETH].ibv_attr = NULL; flow->frxq[HASH_RXQ_ETH].ibv_flow = @@ -1673,7 +1751,6 @@ priv_flow_create_action_queue_drop(struct priv *priv, if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) { rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, "flow rule creation failure"); - err = ENOMEM; goto error; } return 0; @@ -1692,14 +1769,14 @@ error: flow->cs = NULL; parser->cs = NULL; } - return err; + return -rte_errno; } /** * Create hash Rx queues when RSS is enabled. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param parser * Internal parser structure. * @param flow @@ -1708,10 +1785,10 @@ error: * Perform verbose error reporting if not NULL. * * @return - * 0 on success, a errno value otherwise and rte_errno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_flow_create_action_queue_rss(struct priv *priv, +mlx5_flow_create_action_queue_rss(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser, struct rte_flow *flow, struct rte_flow_error *error) @@ -1726,29 +1803,29 @@ priv_flow_create_action_queue_rss(struct priv *priv, flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr; parser->queue[i].ibv_attr = NULL; hash_fields = hash_rxq_init[i].hash_fields; - if (!priv->dev->data->dev_started) + if (!dev->data->dev_started) continue; flow->frxq[i].hrxq = - mlx5_priv_hrxq_get(priv, - parser->rss_conf.rss_key, - parser->rss_conf.rss_key_len, - hash_fields, - parser->queues, - parser->queues_n); + mlx5_hrxq_get(dev, + parser->rss_conf.rss_key, + parser->rss_conf.rss_key_len, + hash_fields, + parser->queues, + parser->queues_n); if (flow->frxq[i].hrxq) continue; flow->frxq[i].hrxq = - mlx5_priv_hrxq_new(priv, - parser->rss_conf.rss_key, - parser->rss_conf.rss_key_len, - hash_fields, - parser->queues, - parser->queues_n); + mlx5_hrxq_new(dev, + parser->rss_conf.rss_key, + parser->rss_conf.rss_key_len, + hash_fields, + parser->queues, + parser->queues_n); if (!flow->frxq[i].hrxq) { - rte_flow_error_set(error, ENOMEM, - RTE_FLOW_ERROR_TYPE_HANDLE, - NULL, "cannot create hash rxq"); - return ENOMEM; + return rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, + "cannot create hash rxq"); } } return 0; @@ -1757,8 +1834,8 @@ priv_flow_create_action_queue_rss(struct priv *priv, /** * Complete flow rule creation. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param parser * Internal parser structure. * @param flow @@ -1767,26 +1844,28 @@ priv_flow_create_action_queue_rss(struct priv *priv, * Perform verbose error reporting if not NULL. * * @return - * 0 on success, a errno value otherwise and rte_errno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_flow_create_action_queue(struct priv *priv, +mlx5_flow_create_action_queue(struct rte_eth_dev *dev, struct mlx5_flow_parse *parser, struct rte_flow *flow, struct rte_flow_error *error) { - int err = 0; + struct priv *priv __rte_unused = dev->data->dev_private; + int ret; unsigned int i; + unsigned int flows_n = 0; assert(priv->pd); assert(priv->ctx); assert(!parser->drop); - err = priv_flow_create_action_queue_rss(priv, parser, flow, error); - if (err) + ret = mlx5_flow_create_action_queue_rss(dev, parser, flow, error); + if (ret) goto error; if (parser->count) flow->cs = parser->cs; - if (!priv->dev->data->dev_started) + if (!dev->data->dev_started) return 0; for (i = 0; i != hash_rxq_init_n; ++i) { if (!flow->frxq[i].hrxq) @@ -1798,13 +1877,19 @@ priv_flow_create_action_queue(struct priv *priv, rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, "flow rule creation failure"); - err = ENOMEM; goto error; } - DEBUG("%p type %d QP %p ibv_flow %p", - (void *)flow, i, - (void *)flow->frxq[i].hrxq, - (void *)flow->frxq[i].ibv_flow); + ++flows_n; + DRV_LOG(DEBUG, "port %u %p type %d QP %p ibv_flow %p", + dev->data->port_id, + (void *)flow, i, + (void *)flow->frxq[i].hrxq, + (void *)flow->frxq[i].ibv_flow); + } + if (!flows_n) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "internal error in flow creation"); + goto error; } for (i = 0; i != parser->queues_n; ++i) { struct mlx5_rxq_data *q = @@ -1814,6 +1899,7 @@ priv_flow_create_action_queue(struct priv *priv, } return 0; error: + ret = rte_errno; /* Save rte_errno before cleanup. */ assert(flow); for (i = 0; i != hash_rxq_init_n; ++i) { if (flow->frxq[i].ibv_flow) { @@ -1822,7 +1908,7 @@ error: claim_zero(ibv_destroy_flow(ibv_flow)); } if (flow->frxq[i].hrxq) - mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq); + mlx5_hrxq_release(dev, flow->frxq[i].hrxq); if (flow->frxq[i].ibv_attr) rte_free(flow->frxq[i].ibv_attr); } @@ -1831,14 +1917,15 @@ error: flow->cs = NULL; parser->cs = NULL; } - return err; + rte_errno = ret; /* Restore rte_errno. */ + return -rte_errno; } /** * Convert a flow. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param list * Pointer to a TAILQ flow list. * @param[in] attr @@ -1851,23 +1938,23 @@ error: * Perform verbose error reporting if not NULL. * * @return - * A flow on success, NULL otherwise. + * A flow on success, NULL otherwise and rte_errno is set. */ static struct rte_flow * -priv_flow_create(struct priv *priv, - struct mlx5_flows *list, - const struct rte_flow_attr *attr, - const struct rte_flow_item items[], - const struct rte_flow_action actions[], - struct rte_flow_error *error) +mlx5_flow_list_create(struct rte_eth_dev *dev, + struct mlx5_flows *list, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) { struct mlx5_flow_parse parser = { .create = 1, }; struct rte_flow *flow = NULL; unsigned int i; - int err; + int ret; - err = priv_flow_convert(priv, attr, items, actions, error, &parser); - if (err) + ret = mlx5_flow_convert(dev, attr, items, actions, error, &parser); + if (ret) goto exit; flow = rte_calloc(__func__, 1, sizeof(*flow) + parser.queues_n * sizeof(uint16_t), @@ -1890,14 +1977,15 @@ priv_flow_create(struct priv *priv, memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len); /* finalise the flow. */ if (parser.drop) - err = priv_flow_create_action_queue_drop(priv, &parser, flow, + ret = mlx5_flow_create_action_queue_drop(dev, &parser, flow, error); else - err = priv_flow_create_action_queue(priv, &parser, flow, error); - if (err) + ret = mlx5_flow_create_action_queue(dev, &parser, flow, error); + if (ret) goto exit; TAILQ_INSERT_TAIL(list, flow, next); - DEBUG("Flow created %p", (void *)flow); + DRV_LOG(DEBUG, "port %u flow created %p", dev->data->port_id, + (void *)flow); return flow; exit: for (i = 0; i != hash_rxq_init_n; ++i) { @@ -1921,14 +2009,9 @@ mlx5_flow_validate(struct rte_eth_dev *dev, const struct rte_flow_action actions[], struct rte_flow_error *error) { - struct priv *priv = dev->data->dev_private; - int ret; struct mlx5_flow_parse parser = { .create = 0, }; - priv_lock(priv); - ret = priv_flow_convert(priv, attr, items, actions, error, &parser); - priv_unlock(priv); - return ret; + return mlx5_flow_convert(dev, attr, items, actions, error, &parser); } /** @@ -1945,30 +2028,26 @@ mlx5_flow_create(struct rte_eth_dev *dev, struct rte_flow_error *error) { struct priv *priv = dev->data->dev_private; - struct rte_flow *flow; - priv_lock(priv); - flow = priv_flow_create(priv, &priv->flows, attr, items, actions, - error); - priv_unlock(priv); - return flow; + return mlx5_flow_list_create(dev, &priv->flows, attr, items, actions, + error); } /** - * Destroy a flow. + * Destroy a flow in a list. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param list * Pointer to a TAILQ flow list. * @param[in] flow * Flow to destroy. */ static void -priv_flow_destroy(struct priv *priv, - struct mlx5_flows *list, - struct rte_flow *flow) +mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list, + struct rte_flow *flow) { + struct priv *priv = dev->data->dev_private; unsigned int i; if (flow->drop || !flow->mark) @@ -2015,7 +2094,7 @@ free: if (frxq->ibv_flow) claim_zero(ibv_destroy_flow(frxq->ibv_flow)); if (frxq->hrxq) - mlx5_priv_hrxq_release(priv, frxq->hrxq); + mlx5_hrxq_release(dev, frxq->hrxq); if (frxq->ibv_attr) rte_free(frxq->ibv_attr); } @@ -2025,53 +2104,60 @@ free: flow->cs = NULL; } TAILQ_REMOVE(list, flow, next); - DEBUG("Flow destroyed %p", (void *)flow); + DRV_LOG(DEBUG, "port %u flow destroyed %p", dev->data->port_id, + (void *)flow); rte_free(flow); } /** * Destroy all flows. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param list * Pointer to a TAILQ flow list. */ void -priv_flow_flush(struct priv *priv, struct mlx5_flows *list) +mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list) { while (!TAILQ_EMPTY(list)) { struct rte_flow *flow; flow = TAILQ_FIRST(list); - priv_flow_destroy(priv, list, flow); + mlx5_flow_list_destroy(dev, list, flow); } } /** * Create drop queue. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * * @return - * 0 on success. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -priv_flow_create_drop_queue(struct priv *priv) +mlx5_flow_create_drop_queue(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; struct mlx5_hrxq_drop *fdq = NULL; assert(priv->pd); assert(priv->ctx); fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0); if (!fdq) { - WARN("cannot allocate memory for drop queue"); - goto error; + DRV_LOG(WARNING, + "port %u cannot allocate memory for drop queue", + dev->data->port_id); + rte_errno = ENOMEM; + return -rte_errno; } fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0); if (!fdq->cq) { - WARN("cannot allocate CQ for drop queue"); + DRV_LOG(WARNING, "port %u cannot allocate CQ for drop queue", + dev->data->port_id); + rte_errno = errno; goto error; } fdq->wq = ibv_create_wq(priv->ctx, @@ -2083,7 +2169,9 @@ priv_flow_create_drop_queue(struct priv *priv) .cq = fdq->cq, }); if (!fdq->wq) { - WARN("cannot allocate WQ for drop queue"); + DRV_LOG(WARNING, "port %u cannot allocate WQ for drop queue", + dev->data->port_id); + rte_errno = errno; goto error; } fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx, @@ -2093,7 +2181,11 @@ priv_flow_create_drop_queue(struct priv *priv) .comp_mask = 0, }); if (!fdq->ind_table) { - WARN("cannot allocate indirection table for drop queue"); + DRV_LOG(WARNING, + "port %u cannot allocate indirection table for drop" + " queue", + dev->data->port_id); + rte_errno = errno; goto error; } fdq->qp = ibv_create_qp_ex(priv->ctx, @@ -2114,7 +2206,9 @@ priv_flow_create_drop_queue(struct priv *priv) .pd = priv->pd }); if (!fdq->qp) { - WARN("cannot allocate QP for drop queue"); + DRV_LOG(WARNING, "port %u cannot allocate QP for drop queue", + dev->data->port_id); + rte_errno = errno; goto error; } priv->flow_drop_queue = fdq; @@ -2131,18 +2225,19 @@ error: if (fdq) rte_free(fdq); priv->flow_drop_queue = NULL; - return -1; + return -rte_errno; } /** * Delete drop queue. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. */ void -priv_flow_delete_drop_queue(struct priv *priv) +mlx5_flow_delete_drop_queue(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue; if (!fdq) @@ -2162,14 +2257,15 @@ priv_flow_delete_drop_queue(struct priv *priv) /** * Remove all flows. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param list * Pointer to a TAILQ flow list. */ void -priv_flow_stop(struct priv *priv, struct mlx5_flows *list) +mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list) { + struct priv *priv = dev->data->dev_private; struct rte_flow *flow; TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) { @@ -2182,7 +2278,8 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list) claim_zero(ibv_destroy_flow (flow->frxq[HASH_RXQ_ETH].ibv_flow)); flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL; - DEBUG("Flow %p removed", (void *)flow); + DRV_LOG(DEBUG, "port %u flow %p removed", + dev->data->port_id, (void *)flow); /* Next flow. */ continue; } @@ -2211,27 +2308,29 @@ priv_flow_stop(struct priv *priv, struct mlx5_flows *list) continue; claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow)); flow->frxq[i].ibv_flow = NULL; - mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq); + mlx5_hrxq_release(dev, flow->frxq[i].hrxq); flow->frxq[i].hrxq = NULL; } - DEBUG("Flow %p removed", (void *)flow); + DRV_LOG(DEBUG, "port %u flow %p removed", dev->data->port_id, + (void *)flow); } } /** * Add all flows. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param list * Pointer to a TAILQ flow list. * * @return - * 0 on success, a errno value otherwise and rte_errno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -priv_flow_start(struct priv *priv, struct mlx5_flows *list) +mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list) { + struct priv *priv = dev->data->dev_private; struct rte_flow *flow; TAILQ_FOREACH(flow, list, next) { @@ -2243,12 +2342,14 @@ priv_flow_start(struct priv *priv, struct mlx5_flows *list) (priv->flow_drop_queue->qp, flow->frxq[HASH_RXQ_ETH].ibv_attr); if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) { - DEBUG("Flow %p cannot be applied", - (void *)flow); + DRV_LOG(DEBUG, + "port %u flow %p cannot be applied", + dev->data->port_id, (void *)flow); rte_errno = EINVAL; - return rte_errno; + return -rte_errno; } - DEBUG("Flow %p applied", (void *)flow); + DRV_LOG(DEBUG, "port %u flow %p applied", + dev->data->port_id, (void *)flow); /* Next flow. */ continue; } @@ -2256,36 +2357,39 @@ priv_flow_start(struct priv *priv, struct mlx5_flows *list) if (!flow->frxq[i].ibv_attr) continue; flow->frxq[i].hrxq = - mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key, - flow->rss_conf.rss_key_len, - hash_rxq_init[i].hash_fields, - (*flow->queues), - flow->queues_n); + mlx5_hrxq_get(dev, flow->rss_conf.rss_key, + flow->rss_conf.rss_key_len, + hash_rxq_init[i].hash_fields, + (*flow->queues), + flow->queues_n); if (flow->frxq[i].hrxq) goto flow_create; flow->frxq[i].hrxq = - mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key, - flow->rss_conf.rss_key_len, - hash_rxq_init[i].hash_fields, - (*flow->queues), - flow->queues_n); + mlx5_hrxq_new(dev, flow->rss_conf.rss_key, + flow->rss_conf.rss_key_len, + hash_rxq_init[i].hash_fields, + (*flow->queues), + flow->queues_n); if (!flow->frxq[i].hrxq) { - DEBUG("Flow %p cannot be applied", - (void *)flow); + DRV_LOG(DEBUG, + "port %u flow %p cannot be applied", + dev->data->port_id, (void *)flow); rte_errno = EINVAL; - return rte_errno; + return -rte_errno; } flow_create: flow->frxq[i].ibv_flow = ibv_create_flow(flow->frxq[i].hrxq->qp, flow->frxq[i].ibv_attr); if (!flow->frxq[i].ibv_flow) { - DEBUG("Flow %p cannot be applied", - (void *)flow); + DRV_LOG(DEBUG, + "port %u flow %p cannot be applied", + dev->data->port_id, (void *)flow); rte_errno = EINVAL; - return rte_errno; + return -rte_errno; } - DEBUG("Flow %p applied", (void *)flow); + DRV_LOG(DEBUG, "port %u flow %p applied", + dev->data->port_id, (void *)flow); } if (!flow->mark) continue; @@ -2298,20 +2402,21 @@ flow_create: /** * Verify the flow list is empty * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * * @return the number of flows not released. */ int -priv_flow_verify(struct priv *priv) +mlx5_flow_verify(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; struct rte_flow *flow; int ret = 0; TAILQ_FOREACH(flow, &priv->flows, next) { - DEBUG("%p: flow %p still referenced", (void *)priv, - (void *)flow); + DRV_LOG(DEBUG, "port %u flow %p still referenced", + dev->data->port_id, (void *)flow); ++ret; } return ret; @@ -2332,7 +2437,7 @@ priv_flow_verify(struct priv *priv) * A VLAN flow mask to apply. * * @return - * 0 on success. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, @@ -2384,17 +2489,19 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, } local; } action_rss; - if (!priv->reta_idx_n) - return EINVAL; + if (!priv->reta_idx_n) { + rte_errno = EINVAL; + return -rte_errno; + } for (i = 0; i != priv->reta_idx_n; ++i) action_rss.local.queue[i] = (*priv->reta_idx)[i]; action_rss.local.rss_conf = &priv->rss_conf; action_rss.local.num = priv->reta_idx_n; actions[0].conf = (const void *)&action_rss.rss; - flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions, - &error); + flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items, + actions, &error); if (!flow) - return rte_errno; + return -rte_errno; return 0; } @@ -2409,7 +2516,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, * An Ethernet flow mask to apply. * * @return - * 0 on success. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_ctrl_flow(struct rte_eth_dev *dev, @@ -2428,14 +2535,11 @@ mlx5_ctrl_flow(struct rte_eth_dev *dev, int mlx5_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow, - struct rte_flow_error *error) + struct rte_flow_error *error __rte_unused) { struct priv *priv = dev->data->dev_private; - (void)error; - priv_lock(priv); - priv_flow_destroy(priv, &priv->flows, flow); - priv_unlock(priv); + mlx5_flow_list_destroy(dev, &priv->flows, flow); return 0; } @@ -2447,14 +2551,11 @@ mlx5_flow_destroy(struct rte_eth_dev *dev, */ int mlx5_flow_flush(struct rte_eth_dev *dev, - struct rte_flow_error *error) + struct rte_flow_error *error __rte_unused) { struct priv *priv = dev->data->dev_private; - (void)error; - priv_lock(priv); - priv_flow_flush(priv, &priv->flows); - priv_unlock(priv); + mlx5_flow_list_flush(dev, &priv->flows); return 0; } @@ -2468,10 +2569,10 @@ mlx5_flow_flush(struct rte_eth_dev *dev, * returned data from the counter. * * @return - * 0 on success, a errno value otherwise and rte_errno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_flow_query_count(struct ibv_counter_set *cs, +mlx5_flow_query_count(struct ibv_counter_set *cs, struct mlx5_flow_counter_stats *counter_stats, struct rte_flow_query_count *query_count, struct rte_flow_error *error) @@ -2485,15 +2586,13 @@ priv_flow_query_count(struct ibv_counter_set *cs, .out = counters, .outlen = 2 * sizeof(uint64_t), }; - int res = ibv_query_counter_set(&query_cs_attr, &query_out); + int err = ibv_query_counter_set(&query_cs_attr, &query_out); - if (res) { - rte_flow_error_set(error, -res, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, - "cannot read counter"); - return -res; - } + if (err) + return rte_flow_error_set(error, err, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, + "cannot read counter"); query_count->hits_set = 1; query_count->bytes_set = 1; query_count->hits = counters[0] - counter_stats->hits; @@ -2512,29 +2611,28 @@ priv_flow_query_count(struct ibv_counter_set *cs, * @see rte_flow_ops */ int -mlx5_flow_query(struct rte_eth_dev *dev, +mlx5_flow_query(struct rte_eth_dev *dev __rte_unused, struct rte_flow *flow, enum rte_flow_action_type action __rte_unused, void *data, struct rte_flow_error *error) { - struct priv *priv = dev->data->dev_private; - int res = EINVAL; - - priv_lock(priv); if (flow->cs) { - res = priv_flow_query_count(flow->cs, - &flow->counter_stats, - (struct rte_flow_query_count *)data, - error); + int ret; + + ret = mlx5_flow_query_count(flow->cs, + &flow->counter_stats, + (struct rte_flow_query_count *)data, + error); + if (ret) + return ret; } else { - rte_flow_error_set(error, res, - RTE_FLOW_ERROR_TYPE_UNSPECIFIED, - NULL, - "no counter found for flow"); + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, + "no counter found for flow"); } - priv_unlock(priv); - return -res; + return 0; } #endif @@ -2551,48 +2649,50 @@ mlx5_flow_isolate(struct rte_eth_dev *dev, { struct priv *priv = dev->data->dev_private; - priv_lock(priv); if (dev->data->dev_started) { rte_flow_error_set(error, EBUSY, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, "port must be stopped first"); - priv_unlock(priv); return -rte_errno; } priv->isolated = !!enable; if (enable) - priv->dev->dev_ops = &mlx5_dev_ops_isolate; + dev->dev_ops = &mlx5_dev_ops_isolate; else - priv->dev->dev_ops = &mlx5_dev_ops; - priv_unlock(priv); + dev->dev_ops = &mlx5_dev_ops; return 0; } /** * Convert a flow director filter to a generic flow. * - * @param priv - * Private structure. + * @param dev + * Pointer to Ethernet device. * @param fdir_filter * Flow director filter to add. * @param attributes * Generic flow parameters structure. * * @return - * 0 on success, errno value on error. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_fdir_filter_convert(struct priv *priv, +mlx5_fdir_filter_convert(struct rte_eth_dev *dev, const struct rte_eth_fdir_filter *fdir_filter, struct mlx5_fdir *attributes) { + struct priv *priv = dev->data->dev_private; const struct rte_eth_fdir_input *input = &fdir_filter->input; + const struct rte_eth_fdir_masks *mask = + &dev->data->dev_conf.fdir_conf.mask; /* Validate queue number. */ if (fdir_filter->action.rx_queue >= priv->rxqs_n) { - ERROR("invalid queue number %d", fdir_filter->action.rx_queue); - return EINVAL; + DRV_LOG(ERR, "port %u invalid queue number %d", + dev->data->port_id, fdir_filter->action.rx_queue); + rte_errno = EINVAL; + return -rte_errno; } attributes->attr.ingress = 1; attributes->items[0] = (struct rte_flow_item) { @@ -2613,134 +2713,140 @@ priv_fdir_filter_convert(struct priv *priv, }; break; default: - ERROR("invalid behavior %d", fdir_filter->action.behavior); - return ENOTSUP; + DRV_LOG(ERR, "port %u invalid behavior %d", + dev->data->port_id, + fdir_filter->action.behavior); + rte_errno = ENOTSUP; + return -rte_errno; } attributes->queue.index = fdir_filter->action.rx_queue; + /* Handle L3. */ switch (fdir_filter->input.flow_type) { case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: + case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: + case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: attributes->l3.ipv4.hdr = (struct ipv4_hdr){ - .src_addr = input->flow.udp4_flow.ip.src_ip, - .dst_addr = input->flow.udp4_flow.ip.dst_ip, - .time_to_live = input->flow.udp4_flow.ip.ttl, - .type_of_service = input->flow.udp4_flow.ip.tos, - .next_proto_id = input->flow.udp4_flow.ip.proto, + .src_addr = input->flow.ip4_flow.src_ip, + .dst_addr = input->flow.ip4_flow.dst_ip, + .time_to_live = input->flow.ip4_flow.ttl, + .type_of_service = input->flow.ip4_flow.tos, + .next_proto_id = input->flow.ip4_flow.proto, }; - attributes->l4.udp.hdr = (struct udp_hdr){ - .src_port = input->flow.udp4_flow.src_port, - .dst_port = input->flow.udp4_flow.dst_port, + attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){ + .src_addr = mask->ipv4_mask.src_ip, + .dst_addr = mask->ipv4_mask.dst_ip, + .time_to_live = mask->ipv4_mask.ttl, + .type_of_service = mask->ipv4_mask.tos, + .next_proto_id = mask->ipv4_mask.proto, }; attributes->items[1] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_IPV4, .spec = &attributes->l3, + .mask = &attributes->l3_mask, + }; + break; + case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: + case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: + case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: + attributes->l3.ipv6.hdr = (struct ipv6_hdr){ + .hop_limits = input->flow.ipv6_flow.hop_limits, + .proto = input->flow.ipv6_flow.proto, + }; + + memcpy(attributes->l3.ipv6.hdr.src_addr, + input->flow.ipv6_flow.src_ip, + RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); + memcpy(attributes->l3.ipv6.hdr.dst_addr, + input->flow.ipv6_flow.dst_ip, + RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); + memcpy(attributes->l3_mask.ipv6.hdr.src_addr, + mask->ipv6_mask.src_ip, + RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); + memcpy(attributes->l3_mask.ipv6.hdr.dst_addr, + mask->ipv6_mask.dst_ip, + RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr)); + attributes->items[1] = (struct rte_flow_item){ + .type = RTE_FLOW_ITEM_TYPE_IPV6, + .spec = &attributes->l3, + .mask = &attributes->l3_mask, + }; + break; + default: + DRV_LOG(ERR, "port %u invalid flow type%d", + dev->data->port_id, fdir_filter->input.flow_type); + rte_errno = ENOTSUP; + return -rte_errno; + } + /* Handle L4. */ + switch (fdir_filter->input.flow_type) { + case RTE_ETH_FLOW_NONFRAG_IPV4_UDP: + attributes->l4.udp.hdr = (struct udp_hdr){ + .src_port = input->flow.udp4_flow.src_port, + .dst_port = input->flow.udp4_flow.dst_port, + }; + attributes->l4_mask.udp.hdr = (struct udp_hdr){ + .src_port = mask->src_port_mask, + .dst_port = mask->dst_port_mask, }; attributes->items[2] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_UDP, .spec = &attributes->l4, + .mask = &attributes->l4_mask, }; break; case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: - attributes->l3.ipv4.hdr = (struct ipv4_hdr){ - .src_addr = input->flow.tcp4_flow.ip.src_ip, - .dst_addr = input->flow.tcp4_flow.ip.dst_ip, - .time_to_live = input->flow.tcp4_flow.ip.ttl, - .type_of_service = input->flow.tcp4_flow.ip.tos, - .next_proto_id = input->flow.tcp4_flow.ip.proto, - }; attributes->l4.tcp.hdr = (struct tcp_hdr){ .src_port = input->flow.tcp4_flow.src_port, .dst_port = input->flow.tcp4_flow.dst_port, }; - attributes->items[1] = (struct rte_flow_item){ - .type = RTE_FLOW_ITEM_TYPE_IPV4, - .spec = &attributes->l3, + attributes->l4_mask.tcp.hdr = (struct tcp_hdr){ + .src_port = mask->src_port_mask, + .dst_port = mask->dst_port_mask, }; attributes->items[2] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_TCP, .spec = &attributes->l4, - }; - break; - case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: - attributes->l3.ipv4.hdr = (struct ipv4_hdr){ - .src_addr = input->flow.ip4_flow.src_ip, - .dst_addr = input->flow.ip4_flow.dst_ip, - .time_to_live = input->flow.ip4_flow.ttl, - .type_of_service = input->flow.ip4_flow.tos, - .next_proto_id = input->flow.ip4_flow.proto, - }; - attributes->items[1] = (struct rte_flow_item){ - .type = RTE_FLOW_ITEM_TYPE_IPV4, - .spec = &attributes->l3, + .mask = &attributes->l4_mask, }; break; case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: - attributes->l3.ipv6.hdr = (struct ipv6_hdr){ - .hop_limits = input->flow.udp6_flow.ip.hop_limits, - .proto = input->flow.udp6_flow.ip.proto, - }; - memcpy(attributes->l3.ipv6.hdr.src_addr, - input->flow.udp6_flow.ip.src_ip, - RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); - memcpy(attributes->l3.ipv6.hdr.dst_addr, - input->flow.udp6_flow.ip.dst_ip, - RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); attributes->l4.udp.hdr = (struct udp_hdr){ .src_port = input->flow.udp6_flow.src_port, .dst_port = input->flow.udp6_flow.dst_port, }; - attributes->items[1] = (struct rte_flow_item){ - .type = RTE_FLOW_ITEM_TYPE_IPV6, - .spec = &attributes->l3, + attributes->l4_mask.udp.hdr = (struct udp_hdr){ + .src_port = mask->src_port_mask, + .dst_port = mask->dst_port_mask, }; attributes->items[2] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_UDP, .spec = &attributes->l4, + .mask = &attributes->l4_mask, }; break; case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: - attributes->l3.ipv6.hdr = (struct ipv6_hdr){ - .hop_limits = input->flow.tcp6_flow.ip.hop_limits, - .proto = input->flow.tcp6_flow.ip.proto, - }; - memcpy(attributes->l3.ipv6.hdr.src_addr, - input->flow.tcp6_flow.ip.src_ip, - RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); - memcpy(attributes->l3.ipv6.hdr.dst_addr, - input->flow.tcp6_flow.ip.dst_ip, - RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); attributes->l4.tcp.hdr = (struct tcp_hdr){ .src_port = input->flow.tcp6_flow.src_port, .dst_port = input->flow.tcp6_flow.dst_port, }; - attributes->items[1] = (struct rte_flow_item){ - .type = RTE_FLOW_ITEM_TYPE_IPV6, - .spec = &attributes->l3, + attributes->l4_mask.tcp.hdr = (struct tcp_hdr){ + .src_port = mask->src_port_mask, + .dst_port = mask->dst_port_mask, }; attributes->items[2] = (struct rte_flow_item){ .type = RTE_FLOW_ITEM_TYPE_TCP, .spec = &attributes->l4, + .mask = &attributes->l4_mask, }; break; + case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER: - attributes->l3.ipv6.hdr = (struct ipv6_hdr){ - .hop_limits = input->flow.ipv6_flow.hop_limits, - .proto = input->flow.ipv6_flow.proto, - }; - memcpy(attributes->l3.ipv6.hdr.src_addr, - input->flow.ipv6_flow.src_ip, - RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); - memcpy(attributes->l3.ipv6.hdr.dst_addr, - input->flow.ipv6_flow.dst_ip, - RTE_DIM(attributes->l3.ipv6.hdr.src_addr)); - attributes->items[1] = (struct rte_flow_item){ - .type = RTE_FLOW_ITEM_TYPE_IPV6, - .spec = &attributes->l3, - }; break; default: - ERROR("invalid flow type%d", - fdir_filter->input.flow_type); - return ENOTSUP; + DRV_LOG(ERR, "port %u invalid flow type%d", + dev->data->port_id, fdir_filter->input.flow_type); + rte_errno = ENOTSUP; + return -rte_errno; } return 0; } @@ -2748,18 +2854,19 @@ priv_fdir_filter_convert(struct priv *priv, /** * Add new flow director filter and store it in list. * - * @param priv - * Private structure. + * @param dev + * Pointer to Ethernet device. * @param fdir_filter * Flow director filter to add. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_fdir_filter_add(struct priv *priv, +mlx5_fdir_filter_add(struct rte_eth_dev *dev, const struct rte_eth_fdir_filter *fdir_filter) { + struct priv *priv = dev->data->dev_private; struct mlx5_fdir attributes = { .attr.group = 0, .l2_mask = { @@ -2775,41 +2882,40 @@ priv_fdir_filter_add(struct priv *priv, struct rte_flow *flow; int ret; - ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes); + ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes); if (ret) - return -ret; - ret = priv_flow_convert(priv, &attributes.attr, attributes.items, + return ret; + ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items, attributes.actions, &error, &parser); if (ret) - return -ret; - flow = priv_flow_create(priv, - &priv->flows, - &attributes.attr, - attributes.items, - attributes.actions, - &error); + return ret; + flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr, + attributes.items, attributes.actions, + &error); if (flow) { - DEBUG("FDIR created %p", (void *)flow); + DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id, + (void *)flow); return 0; } - return ENOTSUP; + return -rte_errno; } /** * Delete specific filter. * - * @param priv - * Private structure. + * @param dev + * Pointer to Ethernet device. * @param fdir_filter * Filter to be deleted. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_fdir_filter_delete(struct priv *priv, +mlx5_fdir_filter_delete(struct rte_eth_dev *dev, const struct rte_eth_fdir_filter *fdir_filter) { + struct priv *priv = dev->data->dev_private; struct mlx5_fdir attributes = { .attr.group = 0, }; @@ -2822,10 +2928,10 @@ priv_fdir_filter_delete(struct priv *priv, unsigned int i; int ret; - ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes); + ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes); if (ret) - return -ret; - ret = priv_flow_convert(priv, &attributes.attr, attributes.items, + return ret; + ret = mlx5_flow_convert(dev, &attributes.attr, attributes.items, attributes.actions, &error, &parser); if (ret) goto exit; @@ -2853,11 +2959,14 @@ priv_fdir_filter_delete(struct priv *priv, struct ibv_spec_header *flow_h; void *flow_spec; unsigned int specs_n; + unsigned int queue_id = parser.drop ? HASH_RXQ_ETH : + parser.layer; - attr = parser.queue[HASH_RXQ_ETH].ibv_attr; - flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr; + attr = parser.queue[queue_id].ibv_attr; + flow_attr = flow->frxq[queue_id].ibv_attr; /* Compare first the attributes. */ - if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr))) + if (!flow_attr || + memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr))) continue; if (attr->num_of_specs == 0) continue; @@ -2882,67 +2991,70 @@ wrong_flow: /* The flow does not match. */ continue; } + ret = rte_errno; /* Save rte_errno before cleanup. */ if (flow) - priv_flow_destroy(priv, &priv->flows, flow); + mlx5_flow_list_destroy(dev, &priv->flows, flow); exit: for (i = 0; i != hash_rxq_init_n; ++i) { if (parser.queue[i].ibv_attr) rte_free(parser.queue[i].ibv_attr); } - return -ret; + rte_errno = ret; /* Restore rte_errno. */ + return -rte_errno; } /** * Update queue for specific filter. * - * @param priv - * Private structure. + * @param dev + * Pointer to Ethernet device. * @param fdir_filter * Filter to be updated. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_fdir_filter_update(struct priv *priv, +mlx5_fdir_filter_update(struct rte_eth_dev *dev, const struct rte_eth_fdir_filter *fdir_filter) { int ret; - ret = priv_fdir_filter_delete(priv, fdir_filter); + ret = mlx5_fdir_filter_delete(dev, fdir_filter); if (ret) return ret; - ret = priv_fdir_filter_add(priv, fdir_filter); - return ret; + return mlx5_fdir_filter_add(dev, fdir_filter); } /** * Flush all filters. * - * @param priv - * Private structure. + * @param dev + * Pointer to Ethernet device. */ static void -priv_fdir_filter_flush(struct priv *priv) +mlx5_fdir_filter_flush(struct rte_eth_dev *dev) { - priv_flow_flush(priv, &priv->flows); + struct priv *priv = dev->data->dev_private; + + mlx5_flow_list_flush(dev, &priv->flows); } /** * Get flow director information. * - * @param priv - * Private structure. + * @param dev + * Pointer to Ethernet device. * @param[out] fdir_info * Resulting flow director information. */ static void -priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info) +mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info) { struct rte_eth_fdir_masks *mask = - &priv->dev->data->dev_conf.fdir_conf.mask; + &dev->data->dev_conf.fdir_conf.mask; - fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode; + fdir_info->mode = dev->data->dev_conf.fdir_conf.mode; fdir_info->guarant_spc = 0; rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask)); fdir_info->max_flexpayload = 0; @@ -2956,54 +3068,52 @@ priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info) /** * Deal with flow director operations. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param filter_op * Operation to perform. * @param arg * Pointer to operation-specific structure. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int -priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg) +mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, + void *arg) { enum rte_fdir_mode fdir_mode = - priv->dev->data->dev_conf.fdir_conf.mode; - int ret = 0; + dev->data->dev_conf.fdir_conf.mode; if (filter_op == RTE_ETH_FILTER_NOP) return 0; if (fdir_mode != RTE_FDIR_MODE_PERFECT && fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) { - ERROR("%p: flow director mode %d not supported", - (void *)priv, fdir_mode); - return EINVAL; + DRV_LOG(ERR, "port %u flow director mode %d not supported", + dev->data->port_id, fdir_mode); + rte_errno = EINVAL; + return -rte_errno; } switch (filter_op) { case RTE_ETH_FILTER_ADD: - ret = priv_fdir_filter_add(priv, arg); - break; + return mlx5_fdir_filter_add(dev, arg); case RTE_ETH_FILTER_UPDATE: - ret = priv_fdir_filter_update(priv, arg); - break; + return mlx5_fdir_filter_update(dev, arg); case RTE_ETH_FILTER_DELETE: - ret = priv_fdir_filter_delete(priv, arg); - break; + return mlx5_fdir_filter_delete(dev, arg); case RTE_ETH_FILTER_FLUSH: - priv_fdir_filter_flush(priv); + mlx5_fdir_filter_flush(dev); break; case RTE_ETH_FILTER_INFO: - priv_fdir_info_get(priv, arg); + mlx5_fdir_info_get(dev, arg); break; default: - DEBUG("%p: unknown operation %u", (void *)priv, - filter_op); - ret = EINVAL; - break; + DRV_LOG(DEBUG, "port %u unknown operation %u", + dev->data->port_id, filter_op); + rte_errno = EINVAL; + return -rte_errno; } - return ret; + return 0; } /** @@ -3019,7 +3129,7 @@ priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg) * Pointer to operation-specific structure. * * @return - * 0 on success, negative errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, @@ -3027,24 +3137,21 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg) { - int ret = EINVAL; - struct priv *priv = dev->data->dev_private; - switch (filter_type) { case RTE_ETH_FILTER_GENERIC: - if (filter_op != RTE_ETH_FILTER_GET) - return -EINVAL; + if (filter_op != RTE_ETH_FILTER_GET) { + rte_errno = EINVAL; + return -rte_errno; + } *(const void **)arg = &mlx5_flow_ops; return 0; case RTE_ETH_FILTER_FDIR: - priv_lock(priv); - ret = priv_fdir_ctrl_func(priv, filter_op, arg); - priv_unlock(priv); - break; + return mlx5_fdir_ctrl_func(dev, filter_op, arg); default: - ERROR("%p: filter type (%d) not supported", - (void *)dev, filter_type); - break; + DRV_LOG(ERR, "port %u filter type (%d) not supported", + dev->data->port_id, filter_type); + rte_errno = ENOTSUP; + return -rte_errno; } - return -ret; + return 0; } diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c index 9fb5ba5e..9de35142 100644 --- a/drivers/net/mlx5/mlx5_mac.c +++ b/drivers/net/mlx5/mlx5_mac.c @@ -63,21 +63,23 @@ /** * Get MAC address by querying netdevice. * - * @param[in] priv - * struct priv for the requested device. + * @param[in] dev + * Pointer to Ethernet device. * @param[out] mac * MAC address output buffer. * * @return - * 0 on success, -1 on failure and errno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -priv_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN]) +mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[ETHER_ADDR_LEN]) { struct ifreq request; + int ret; - if (priv_ifreq(priv, SIOCGIFHWADDR, &request)) - return -1; + ret = mlx5_ifreq(dev, SIOCGIFHWADDR, &request); + if (ret) + return ret; memcpy(mac, request.ifr_hwaddr.sa_data, ETHER_ADDR_LEN); return 0; } @@ -95,8 +97,13 @@ mlx5_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index) { assert(index < MLX5_MAX_MAC_ADDRESSES); memset(&dev->data->mac_addrs[index], 0, sizeof(struct ether_addr)); - if (!dev->data->promiscuous && !dev->data->all_multicast) - mlx5_traffic_restart(dev); + if (!dev->data->promiscuous) { + int ret = mlx5_traffic_restart(dev); + + if (ret) + DRV_LOG(ERR, "port %u cannot remove mac address: %s", + dev->data->port_id, strerror(rte_errno)); + } } /** @@ -112,16 +119,14 @@ mlx5_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index) * VMDq pool index to associate address with (ignored). * * @return - * 0 on success. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac, - uint32_t index, uint32_t vmdq) + uint32_t index, uint32_t vmdq __rte_unused) { unsigned int i; - int ret = 0; - (void)vmdq; assert(index < MLX5_MAX_MAC_ADDRESSES); /* First, make sure this address isn't already configured. */ for (i = 0; (i != MLX5_MAX_MAC_ADDRESSES); ++i) { @@ -131,12 +136,13 @@ mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac, if (memcmp(&dev->data->mac_addrs[i], mac, sizeof(*mac))) continue; /* Address already configured elsewhere, return with error. */ - return EADDRINUSE; + rte_errno = EADDRINUSE; + return -rte_errno; } dev->data->mac_addrs[index] = *mac; - if (!dev->data->promiscuous && !dev->data->all_multicast) - mlx5_traffic_restart(dev); - return ret; + if (!dev->data->promiscuous) + return mlx5_traffic_restart(dev); + return 0; } /** @@ -150,6 +156,13 @@ mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac, void mlx5_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr) { - DEBUG("%p: setting primary MAC address", (void *)dev); - mlx5_mac_addr_add(dev, mac_addr, 0, 0); + int ret; + + DRV_LOG(DEBUG, "port %u setting primary MAC address", + dev->data->port_id); + + ret = mlx5_mac_addr_add(dev, mac_addr, 0, 0); + if (ret) + DRV_LOG(ERR, "port %u cannot set mac address: %s", + dev->data->port_id, strerror(rte_errno)); } diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c index 2776dc70..a50c5208 100644 --- a/drivers/net/mlx5/mlx5_mr.c +++ b/drivers/net/mlx5/mlx5_mr.c @@ -55,15 +55,12 @@ struct mlx5_check_mempool_data { /* Called by mlx5_check_mempool() when iterating the memory chunks. */ static void -mlx5_check_mempool_cb(struct rte_mempool *mp, +mlx5_check_mempool_cb(struct rte_mempool *mp __rte_unused, void *opaque, struct rte_mempool_memhdr *memhdr, - unsigned int mem_idx) + unsigned int mem_idx __rte_unused) { struct mlx5_check_mempool_data *data = opaque; - (void)mp; - (void)mem_idx; - /* It already failed, skip the next chunks. */ if (data->ret != 0) return; @@ -98,8 +95,9 @@ mlx5_check_mempool_cb(struct rte_mempool *mp, * @return * 0 on success (mempool is virtually contiguous), -1 on error. */ -static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start, - uintptr_t *end) +static int +mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start, + uintptr_t *end) { struct mlx5_check_mempool_data data; @@ -107,7 +105,6 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start, rte_mempool_mem_iter(mp, mlx5_check_mempool_cb, &data); *start = (uintptr_t)data.start; *end = (uintptr_t)data.end; - return data.ret; } @@ -115,10 +112,6 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start, * Register a Memory Region (MR) <-> Memory Pool (MP) association in * txq->mp2mr[]. If mp2mr[] is full, remove an entry first. * - * This function should only be called by txq_mp2mr(). - * - * @param priv - * Pointer to private structure. * @param txq * Pointer to TX queue structure. * @param[in] mp @@ -127,71 +120,63 @@ static int mlx5_check_mempool(struct rte_mempool *mp, uintptr_t *start, * Index of the next available entry. * * @return - * mr on success, NULL on failure. + * mr on success, NULL on failure and rte_errno is set. */ -struct mlx5_mr* -priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *txq, - struct rte_mempool *mp, unsigned int idx) +struct mlx5_mr * +mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp, + unsigned int idx) { struct mlx5_txq_ctrl *txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); + struct rte_eth_dev *dev; struct mlx5_mr *mr; + rte_spinlock_lock(&txq_ctrl->priv->mr_lock); /* Add a new entry, register MR first. */ - DEBUG("%p: discovered new memory pool \"%s\" (%p)", - (void *)txq_ctrl, mp->name, (void *)mp); - mr = priv_mr_get(priv, mp); - if (mr == NULL) - mr = priv_mr_new(priv, mp); + DRV_LOG(DEBUG, "port %u discovered new memory pool \"%s\" (%p)", + PORT_ID(txq_ctrl->priv), mp->name, (void *)mp); + dev = ETH_DEV(txq_ctrl->priv); + mr = mlx5_mr_get(dev, mp); + if (mr == NULL) { + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + DRV_LOG(DEBUG, + "port %u using unregistered mempool 0x%p(%s)" + " in secondary process, please create mempool" + " before rte_eth_dev_start()", + PORT_ID(txq_ctrl->priv), (void *)mp, mp->name); + rte_spinlock_unlock(&txq_ctrl->priv->mr_lock); + rte_errno = ENOTSUP; + return NULL; + } + mr = mlx5_mr_new(dev, mp); + } if (unlikely(mr == NULL)) { - DEBUG("%p: unable to configure MR, ibv_reg_mr() failed.", - (void *)txq_ctrl); + DRV_LOG(DEBUG, + "port %u unable to configure memory region," + " ibv_reg_mr() failed.", + PORT_ID(txq_ctrl->priv)); + rte_spinlock_unlock(&txq_ctrl->priv->mr_lock); return NULL; } if (unlikely(idx == RTE_DIM(txq->mp2mr))) { /* Table is full, remove oldest entry. */ - DEBUG("%p: MR <-> MP table full, dropping oldest entry.", - (void *)txq_ctrl); + DRV_LOG(DEBUG, + "port %u memory region <-> memory pool table full, " + " dropping oldest entry", + PORT_ID(txq_ctrl->priv)); --idx; - priv_mr_release(priv, txq->mp2mr[0]); + mlx5_mr_release(txq->mp2mr[0]); memmove(&txq->mp2mr[0], &txq->mp2mr[1], (sizeof(txq->mp2mr) - sizeof(txq->mp2mr[0]))); } /* Store the new entry. */ txq_ctrl->txq.mp2mr[idx] = mr; - DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32, - (void *)txq_ctrl, mp->name, (void *)mp, - txq_ctrl->txq.mp2mr[idx]->lkey); - return mr; -} - -/** - * Register a Memory Region (MR) <-> Memory Pool (MP) association in - * txq->mp2mr[]. If mp2mr[] is full, remove an entry first. - * - * This function should only be called by txq_mp2mr(). - * - * @param txq - * Pointer to TX queue structure. - * @param[in] mp - * Memory Pool for which a Memory Region lkey must be returned. - * @param idx - * Index of the next available entry. - * - * @return - * mr on success, NULL on failure. - */ -struct mlx5_mr* -mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, struct rte_mempool *mp, - unsigned int idx) -{ - struct mlx5_txq_ctrl *txq_ctrl = - container_of(txq, struct mlx5_txq_ctrl, txq); - struct mlx5_mr *mr; - - priv_lock(txq_ctrl->priv); - mr = priv_txq_mp2mr_reg(txq_ctrl->priv, txq, mp, idx); - priv_unlock(txq_ctrl->priv); + DRV_LOG(DEBUG, + "port %u new memory region lkey for MP \"%s\" (%p): 0x%08" + PRIu32, + PORT_ID(txq_ctrl->priv), mp->name, (void *)mp, + txq_ctrl->txq.mp2mr[idx]->lkey); + rte_spinlock_unlock(&txq_ctrl->priv->mr_lock); return mr; } @@ -250,28 +235,33 @@ mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg) if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 || data.ret == -1) return; - mr = priv_mr_get(priv, mp); + mr = mlx5_mr_get(ETH_DEV(priv), mp); if (mr) { - priv_mr_release(priv, mr); + mlx5_mr_release(mr); return; } - priv_mr_new(priv, mp); + mr = mlx5_mr_new(ETH_DEV(priv), mp); + if (!mr) + DRV_LOG(ERR, "port %u cannot create memory region: %s", + PORT_ID(priv), strerror(rte_errno)); } /** * Register a new memory region from the mempool and store it in the memory * region list. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param mp * Pointer to the memory pool to register. + * * @return - * The memory region on success. + * The memory region on success, NULL on failure and rte_errno is set. */ -struct mlx5_mr* -priv_mr_new(struct priv *priv, struct rte_mempool *mp) +struct mlx5_mr * +mlx5_mr_new(struct rte_eth_dev *dev, struct rte_mempool *mp) { + struct priv *priv = dev->data->dev_private; const struct rte_memseg *ms = rte_eal_get_physmem_layout(); uintptr_t start; uintptr_t end; @@ -280,17 +270,22 @@ priv_mr_new(struct priv *priv, struct rte_mempool *mp) mr = rte_zmalloc_socket(__func__, sizeof(*mr), 0, mp->socket_id); if (!mr) { - DEBUG("unable to configure MR, ibv_reg_mr() failed."); + DRV_LOG(DEBUG, + "port %u unable to configure memory region," + " ibv_reg_mr() failed.", + dev->data->port_id); + rte_errno = ENOMEM; return NULL; } if (mlx5_check_mempool(mp, &start, &end) != 0) { - ERROR("mempool %p: not virtually contiguous", - (void *)mp); + DRV_LOG(ERR, "port %u mempool %p: not virtually contiguous", + dev->data->port_id, (void *)mp); + rte_errno = ENOMEM; return NULL; } - DEBUG("mempool %p area start=%p end=%p size=%zu", - (void *)mp, (void *)start, (void *)end, - (size_t)(end - start)); + DRV_LOG(DEBUG, "port %u mempool %p area start=%p end=%p size=%zu", + dev->data->port_id, (void *)mp, (void *)start, (void *)end, + (size_t)(end - start)); /* Save original addresses for exact MR lookup. */ mr->start = start; mr->end = end; @@ -305,16 +300,22 @@ priv_mr_new(struct priv *priv, struct rte_mempool *mp) if ((end > addr) && (end < addr + len)) end = RTE_ALIGN_CEIL(end, align); } - DEBUG("mempool %p using start=%p end=%p size=%zu for MR", - (void *)mp, (void *)start, (void *)end, - (size_t)(end - start)); + DRV_LOG(DEBUG, + "port %u mempool %p using start=%p end=%p size=%zu for memory" + " region", + dev->data->port_id, (void *)mp, (void *)start, (void *)end, + (size_t)(end - start)); mr->mr = ibv_reg_mr(priv->pd, (void *)start, end - start, IBV_ACCESS_LOCAL_WRITE); + if (!mr->mr) { + rte_errno = ENOMEM; + return NULL; + } mr->mp = mp; mr->lkey = rte_cpu_to_be_32(mr->mr->lkey); rte_atomic32_inc(&mr->refcnt); - DEBUG("%p: new Memory Region %p refcnt: %d", (void *)priv, - (void *)mr, rte_atomic32_read(&mr->refcnt)); + DRV_LOG(DEBUG, "port %u new memory Region %p refcnt: %d", + dev->data->port_id, (void *)mr, rte_atomic32_read(&mr->refcnt)); LIST_INSERT_HEAD(&priv->mr, mr, next); return mr; } @@ -322,16 +323,18 @@ priv_mr_new(struct priv *priv, struct rte_mempool *mp) /** * Search the memory region object in the memory region list. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param mp * Pointer to the memory pool to register. + * * @return * The memory region on success. */ -struct mlx5_mr* -priv_mr_get(struct priv *priv, struct rte_mempool *mp) +struct mlx5_mr * +mlx5_mr_get(struct rte_eth_dev *dev, struct rte_mempool *mp) { + struct priv *priv = dev->data->dev_private; struct mlx5_mr *mr; assert(mp); @@ -340,8 +343,9 @@ priv_mr_get(struct priv *priv, struct rte_mempool *mp) LIST_FOREACH(mr, &priv->mr, next) { if (mr->mp == mp) { rte_atomic32_inc(&mr->refcnt); - DEBUG("Memory Region %p refcnt: %d", - (void *)mr, rte_atomic32_read(&mr->refcnt)); + DRV_LOG(DEBUG, "port %u memory region %p refcnt: %d", + dev->data->port_id, (void *)mr, + rte_atomic32_read(&mr->refcnt)); return mr; } } @@ -355,41 +359,42 @@ priv_mr_get(struct priv *priv, struct rte_mempool *mp) * Pointer to memory region to release. * * @return - * 0 on success, errno on failure. + * 1 while a reference on it exists, 0 when freed. */ int -priv_mr_release(struct priv *priv, struct mlx5_mr *mr) +mlx5_mr_release(struct mlx5_mr *mr) { - (void)priv; assert(mr); - DEBUG("Memory Region %p refcnt: %d", - (void *)mr, rte_atomic32_read(&mr->refcnt)); + DRV_LOG(DEBUG, "memory region %p refcnt: %d", (void *)mr, + rte_atomic32_read(&mr->refcnt)); if (rte_atomic32_dec_and_test(&mr->refcnt)) { claim_zero(ibv_dereg_mr(mr->mr)); LIST_REMOVE(mr, next); rte_free(mr); return 0; } - return EBUSY; + return 1; } /** * Verify the flow list is empty * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * - * @return the number of object not released. + * @return + * The number of object not released. */ int -priv_mr_verify(struct priv *priv) +mlx5_mr_verify(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; int ret = 0; struct mlx5_mr *mr; LIST_FOREACH(mr, &priv->mr, next) { - DEBUG("%p: mr %p still referenced", (void *)priv, - (void *)mr); + DRV_LOG(DEBUG, "port %u memory region %p still referenced", + dev->data->port_id, (void *)mr); ++ret; } return ret; diff --git a/drivers/net/mlx5/mlx5_rss.c b/drivers/net/mlx5/mlx5_rss.c index f47bda66..029e0ec4 100644 --- a/drivers/net/mlx5/mlx5_rss.c +++ b/drivers/net/mlx5/mlx5_rss.c @@ -63,35 +63,48 @@ * RSS configuration data. * * @return - * 0 on success, negative errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf) { struct priv *priv = dev->data->dev_private; - int ret = 0; + unsigned int i; + unsigned int idx; - priv_lock(priv); if (rss_conf->rss_hf & MLX5_RSS_HF_MASK) { - ret = -EINVAL; - goto out; + rte_errno = EINVAL; + return -rte_errno; } if (rss_conf->rss_key && rss_conf->rss_key_len) { + if (rss_conf->rss_key_len != rss_hash_default_key_len) { + DRV_LOG(ERR, + "port %u RSS key len must be %zu Bytes long", + dev->data->port_id, rss_hash_default_key_len); + rte_errno = EINVAL; + return -rte_errno; + } priv->rss_conf.rss_key = rte_realloc(priv->rss_conf.rss_key, rss_conf->rss_key_len, 0); if (!priv->rss_conf.rss_key) { - ret = -ENOMEM; - goto out; + rte_errno = ENOMEM; + return -rte_errno; } memcpy(priv->rss_conf.rss_key, rss_conf->rss_key, rss_conf->rss_key_len); priv->rss_conf.rss_key_len = rss_conf->rss_key_len; } priv->rss_conf.rss_hf = rss_conf->rss_hf; -out: - priv_unlock(priv); - return ret; + /* Enable the RSS hash in all Rx queues. */ + for (i = 0, idx = 0; idx != priv->rxqs_n; ++i) { + if (!(*priv->rxqs)[i]) + continue; + (*priv->rxqs)[i]->rss_hash = !!rss_conf->rss_hf && + !!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS); + ++idx; + } + return 0; } /** @@ -103,7 +116,7 @@ out: * RSS configuration data. * * @return - * 0 on success, negative errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_rss_hash_conf_get(struct rte_eth_dev *dev, @@ -111,9 +124,10 @@ mlx5_rss_hash_conf_get(struct rte_eth_dev *dev, { struct priv *priv = dev->data->dev_private; - if (!rss_conf) - return -EINVAL; - priv_lock(priv); + if (!rss_conf) { + rte_errno = EINVAL; + return -rte_errno; + } if (rss_conf->rss_key && (rss_conf->rss_key_len >= priv->rss_conf.rss_key_len)) { memcpy(rss_conf->rss_key, priv->rss_conf.rss_key, @@ -121,24 +135,24 @@ mlx5_rss_hash_conf_get(struct rte_eth_dev *dev, } rss_conf->rss_key_len = priv->rss_conf.rss_key_len; rss_conf->rss_hf = priv->rss_conf.rss_hf; - priv_unlock(priv); return 0; } /** * Allocate/reallocate RETA index table. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @praram reta_size * The size of the array to allocate. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -priv_rss_reta_index_resize(struct priv *priv, unsigned int reta_size) +mlx5_rss_reta_index_resize(struct rte_eth_dev *dev, unsigned int reta_size) { + struct priv *priv = dev->data->dev_private; void *mem; unsigned int old_size = priv->reta_idx_n; @@ -147,11 +161,12 @@ priv_rss_reta_index_resize(struct priv *priv, unsigned int reta_size) mem = rte_realloc(priv->reta_idx, reta_size * sizeof((*priv->reta_idx)[0]), 0); - if (!mem) - return ENOMEM; + if (!mem) { + rte_errno = ENOMEM; + return -rte_errno; + } priv->reta_idx = mem; priv->reta_idx_n = reta_size; - if (old_size < reta_size) memset(&(*priv->reta_idx)[old_size], 0, (reta_size - old_size) * @@ -160,28 +175,31 @@ priv_rss_reta_index_resize(struct priv *priv, unsigned int reta_size) } /** - * Query RETA table. + * DPDK callback to get the RETA indirection table. * - * @param priv - * Pointer to private structure. - * @param[in, out] reta_conf - * Pointer to the first RETA configuration structure. + * @param dev + * Pointer to Ethernet device structure. + * @param reta_conf + * Pointer to RETA configuration structure array. * @param reta_size - * Number of entries. + * Size of the RETA table. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ -static int -priv_dev_rss_reta_query(struct priv *priv, +int +mlx5_dev_rss_reta_query(struct rte_eth_dev *dev, struct rte_eth_rss_reta_entry64 *reta_conf, - unsigned int reta_size) + uint16_t reta_size) { + struct priv *priv = dev->data->dev_private; unsigned int idx; unsigned int i; - if (!reta_size || reta_size > priv->reta_idx_n) - return EINVAL; + if (!reta_size || reta_size > priv->reta_idx_n) { + rte_errno = EINVAL; + return -rte_errno; + } /* Fill each entry of the table even if its bit is not set. */ for (idx = 0, i = 0; (i != reta_size); ++i) { idx = i / RTE_RETA_GROUP_SIZE; @@ -192,34 +210,36 @@ priv_dev_rss_reta_query(struct priv *priv, } /** - * Update RETA table. + * DPDK callback to update the RETA indirection table. * - * @param priv - * Pointer to private structure. - * @param[in] reta_conf - * Pointer to the first RETA configuration structure. + * @param dev + * Pointer to Ethernet device structure. + * @param reta_conf + * Pointer to RETA configuration structure array. * @param reta_size - * Number of entries. + * Size of the RETA table. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ -static int -priv_dev_rss_reta_update(struct priv *priv, +int +mlx5_dev_rss_reta_update(struct rte_eth_dev *dev, struct rte_eth_rss_reta_entry64 *reta_conf, - unsigned int reta_size) + uint16_t reta_size) { + int ret; + struct priv *priv = dev->data->dev_private; unsigned int idx; unsigned int i; unsigned int pos; - int ret; - if (!reta_size) - return EINVAL; - ret = priv_rss_reta_index_resize(priv, reta_size); + if (!reta_size) { + rte_errno = EINVAL; + return -rte_errno; + } + ret = mlx5_rss_reta_index_resize(dev, reta_size); if (ret) return ret; - for (idx = 0, i = 0; (i != reta_size); ++i) { idx = i / RTE_RETA_GROUP_SIZE; pos = i % RTE_RETA_GROUP_SIZE; @@ -228,63 +248,9 @@ priv_dev_rss_reta_update(struct priv *priv, assert(reta_conf[idx].reta[pos] < priv->rxqs_n); (*priv->reta_idx)[i] = reta_conf[idx].reta[pos]; } - return 0; -} - -/** - * DPDK callback to get the RETA indirection table. - * - * @param dev - * Pointer to Ethernet device structure. - * @param reta_conf - * Pointer to RETA configuration structure array. - * @param reta_size - * Size of the RETA table. - * - * @return - * 0 on success, negative errno value on failure. - */ -int -mlx5_dev_rss_reta_query(struct rte_eth_dev *dev, - struct rte_eth_rss_reta_entry64 *reta_conf, - uint16_t reta_size) -{ - int ret; - struct priv *priv = dev->data->dev_private; - - priv_lock(priv); - ret = priv_dev_rss_reta_query(priv, reta_conf, reta_size); - priv_unlock(priv); - return -ret; -} - -/** - * DPDK callback to update the RETA indirection table. - * - * @param dev - * Pointer to Ethernet device structure. - * @param reta_conf - * Pointer to RETA configuration structure array. - * @param reta_size - * Size of the RETA table. - * - * @return - * 0 on success, negative errno value on failure. - */ -int -mlx5_dev_rss_reta_update(struct rte_eth_dev *dev, - struct rte_eth_rss_reta_entry64 *reta_conf, - uint16_t reta_size) -{ - int ret; - struct priv *priv = dev->data->dev_private; - - priv_lock(priv); - ret = priv_dev_rss_reta_update(priv, reta_conf, reta_size); - priv_unlock(priv); if (dev->data->dev_started) { mlx5_dev_stop(dev); - mlx5_dev_start(dev); + return mlx5_dev_start(dev); } - return -ret; + return 0; } diff --git a/drivers/net/mlx5/mlx5_rxmode.c b/drivers/net/mlx5/mlx5_rxmode.c index 6fb245ba..23eae7c1 100644 --- a/drivers/net/mlx5/mlx5_rxmode.c +++ b/drivers/net/mlx5/mlx5_rxmode.c @@ -60,8 +60,13 @@ void mlx5_promiscuous_enable(struct rte_eth_dev *dev) { + int ret; + dev->data->promiscuous = 1; - mlx5_traffic_restart(dev); + ret = mlx5_traffic_restart(dev); + if (ret) + DRV_LOG(ERR, "port %u cannot enable promiscuous mode: %s", + dev->data->port_id, strerror(rte_errno)); } /** @@ -73,8 +78,13 @@ mlx5_promiscuous_enable(struct rte_eth_dev *dev) void mlx5_promiscuous_disable(struct rte_eth_dev *dev) { + int ret; + dev->data->promiscuous = 0; - mlx5_traffic_restart(dev); + ret = mlx5_traffic_restart(dev); + if (ret) + DRV_LOG(ERR, "port %u cannot disable promiscuous mode: %s", + dev->data->port_id, strerror(rte_errno)); } /** @@ -86,8 +96,13 @@ mlx5_promiscuous_disable(struct rte_eth_dev *dev) void mlx5_allmulticast_enable(struct rte_eth_dev *dev) { + int ret; + dev->data->all_multicast = 1; - mlx5_traffic_restart(dev); + ret = mlx5_traffic_restart(dev); + if (ret) + DRV_LOG(ERR, "port %u cannot enable allmulicast mode: %s", + dev->data->port_id, strerror(rte_errno)); } /** @@ -99,6 +114,11 @@ mlx5_allmulticast_enable(struct rte_eth_dev *dev) void mlx5_allmulticast_disable(struct rte_eth_dev *dev) { + int ret; + dev->data->all_multicast = 0; - mlx5_traffic_restart(dev); + ret = mlx5_traffic_restart(dev); + if (ret) + DRV_LOG(ERR, "port %u cannot disable allmulicast mode: %s", + dev->data->port_id, strerror(rte_errno)); } diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 20f3ec6c..dcc5a87b 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -88,7 +88,7 @@ const size_t rss_hash_default_key_len = sizeof(rss_hash_default_key); * Pointer to RX queue structure. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) @@ -96,7 +96,7 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n; unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n; unsigned int i; - int ret = 0; + int err; /* Iterate on segments. */ for (i = 0; (i != elts_n); ++i) { @@ -104,8 +104,9 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) buf = rte_pktmbuf_alloc(rxq_ctrl->rxq.mp); if (buf == NULL) { - ERROR("%p: empty mbuf pool", (void *)rxq_ctrl); - ret = ENOMEM; + DRV_LOG(ERR, "port %u empty mbuf pool", + PORT_ID(rxq_ctrl->priv)); + rte_errno = ENOMEM; goto error; } /* Headroom is reserved by rte_pktmbuf_alloc(). */ @@ -124,7 +125,7 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) (*rxq_ctrl->rxq.elts)[i] = buf; } /* If Rx vector is activated. */ - if (rxq_check_vec_support(&rxq_ctrl->rxq) > 0) { + if (mlx5_rxq_check_vec_support(&rxq_ctrl->rxq) > 0) { struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq; struct rte_mbuf *mbuf_init = &rxq->fake_mbuf; int j; @@ -145,20 +146,24 @@ rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl) for (j = 0; j < MLX5_VPMD_DESCS_PER_LOOP; ++j) (*rxq->elts)[elts_n + j] = &rxq->fake_mbuf; } - DEBUG("%p: allocated and configured %u segments (max %u packets)", - (void *)rxq_ctrl, elts_n, elts_n / (1 << rxq_ctrl->rxq.sges_n)); - assert(ret == 0); + DRV_LOG(DEBUG, + "port %u Rx queue %u allocated and configured %u segments" + " (max %u packets)", + PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx, elts_n, + elts_n / (1 << rxq_ctrl->rxq.sges_n)); return 0; error: + err = rte_errno; /* Save rte_errno before cleanup. */ elts_n = i; for (i = 0; (i != elts_n); ++i) { if ((*rxq_ctrl->rxq.elts)[i] != NULL) rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); (*rxq_ctrl->rxq.elts)[i] = NULL; } - DEBUG("%p: failed, freed everything", (void *)rxq_ctrl); - assert(ret > 0); - return ret; + DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything", + PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx); + rte_errno = err; /* Restore rte_errno. */ + return -rte_errno; } /** @@ -176,14 +181,15 @@ rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl) uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi); uint16_t i; - DEBUG("%p: freeing WRs", (void *)rxq_ctrl); + DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs", + PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx); if (rxq->elts == NULL) return; /** * Some mbuf in the Ring belongs to the application. They cannot be * freed. */ - if (rxq_check_vec_support(rxq) > 0) { + if (mlx5_rxq_check_vec_support(rxq) > 0) { for (i = 0; i < used; ++i) (*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL; rxq->rq_pi = rxq->rq_ci; @@ -206,9 +212,10 @@ rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl) void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl) { - DEBUG("cleaning up %p", (void *)rxq_ctrl); + DRV_LOG(DEBUG, "port %u cleaning up Rx queue %u", + PORT_ID(rxq_ctrl->priv), rxq_ctrl->idx); if (rxq_ctrl->ibv) - mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv); + mlx5_rxq_ibv_release(rxq_ctrl->ibv); memset(rxq_ctrl, 0, sizeof(*rxq_ctrl)); } @@ -228,55 +235,52 @@ mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl) * Memory pool for buffer allocations. * * @return - * 0 on success, negative errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, - unsigned int socket, const struct rte_eth_rxconf *conf, + unsigned int socket, + const struct rte_eth_rxconf *conf __rte_unused, struct rte_mempool *mp) { struct priv *priv = dev->data->dev_private; struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx]; struct mlx5_rxq_ctrl *rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); - int ret = 0; - (void)conf; - priv_lock(priv); if (!rte_is_power_of_2(desc)) { desc = 1 << log2above(desc); - WARN("%p: increased number of descriptors in RX queue %u" - " to the next power of two (%d)", - (void *)dev, idx, desc); + DRV_LOG(WARNING, + "port %u increased number of descriptors in Rx queue %u" + " to the next power of two (%d)", + dev->data->port_id, idx, desc); } - DEBUG("%p: configuring queue %u for %u descriptors", - (void *)dev, idx, desc); + DRV_LOG(DEBUG, "port %u configuring Rx queue %u for %u descriptors", + dev->data->port_id, idx, desc); if (idx >= priv->rxqs_n) { - ERROR("%p: queue index out of range (%u >= %u)", - (void *)dev, idx, priv->rxqs_n); - priv_unlock(priv); - return -EOVERFLOW; + DRV_LOG(ERR, "port %u Rx queue index out of range (%u >= %u)", + dev->data->port_id, idx, priv->rxqs_n); + rte_errno = EOVERFLOW; + return -rte_errno; } - if (!mlx5_priv_rxq_releasable(priv, idx)) { - ret = EBUSY; - ERROR("%p: unable to release queue index %u", - (void *)dev, idx); - goto out; + if (!mlx5_rxq_releasable(dev, idx)) { + DRV_LOG(ERR, "port %u unable to release queue index %u", + dev->data->port_id, idx); + rte_errno = EBUSY; + return -rte_errno; } - mlx5_priv_rxq_release(priv, idx); - rxq_ctrl = mlx5_priv_rxq_new(priv, idx, desc, socket, mp); + mlx5_rxq_release(dev, idx); + rxq_ctrl = mlx5_rxq_new(dev, idx, desc, socket, mp); if (!rxq_ctrl) { - ERROR("%p: unable to allocate queue index %u", - (void *)dev, idx); - ret = ENOMEM; - goto out; + DRV_LOG(ERR, "port %u unable to allocate queue index %u", + dev->data->port_id, idx); + rte_errno = ENOMEM; + return -rte_errno; } - DEBUG("%p: adding RX queue %p to list", - (void *)dev, (void *)rxq_ctrl); + DRV_LOG(DEBUG, "port %u adding Rx queue %u to list", + dev->data->port_id, idx); (*priv->rxqs)[idx] = &rxq_ctrl->rxq; -out: - priv_unlock(priv); - return -ret; + return 0; } /** @@ -296,45 +300,48 @@ mlx5_rx_queue_release(void *dpdk_rxq) return; rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); priv = rxq_ctrl->priv; - priv_lock(priv); - if (!mlx5_priv_rxq_releasable(priv, rxq_ctrl->rxq.stats.idx)) - rte_panic("Rx queue %p is still used by a flow and cannot be" - " removed\n", (void *)rxq_ctrl); - mlx5_priv_rxq_release(priv, rxq_ctrl->rxq.stats.idx); - priv_unlock(priv); + if (!mlx5_rxq_releasable(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx)) + rte_panic("port %u Rx queue %u is still used by a flow and" + " cannot be removed\n", + PORT_ID(priv), rxq_ctrl->idx); + mlx5_rxq_release(ETH_DEV(priv), rxq_ctrl->rxq.stats.idx); } /** * Allocate queue vector and fill epoll fd list for Rx interrupts. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * * @return - * 0 on success, negative on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -priv_rx_intr_vec_enable(struct priv *priv) +mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; unsigned int i; unsigned int rxqs_n = priv->rxqs_n; unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); unsigned int count = 0; - struct rte_intr_handle *intr_handle = priv->dev->intr_handle; + struct rte_intr_handle *intr_handle = dev->intr_handle; - if (!priv->dev->data->dev_conf.intr_conf.rxq) + if (!dev->data->dev_conf.intr_conf.rxq) return 0; - priv_rx_intr_vec_disable(priv); + mlx5_rx_intr_vec_disable(dev); intr_handle->intr_vec = malloc(n * sizeof(intr_handle->intr_vec[0])); if (intr_handle->intr_vec == NULL) { - ERROR("failed to allocate memory for interrupt vector," - " Rx interrupts will not be supported"); - return -ENOMEM; + DRV_LOG(ERR, + "port %u failed to allocate memory for interrupt" + " vector, Rx interrupts will not be supported", + dev->data->port_id); + rte_errno = ENOMEM; + return -rte_errno; } intr_handle->type = RTE_INTR_HANDLE_EXT; for (i = 0; i != n; ++i) { /* This rxq ibv must not be released in this function. */ - struct mlx5_rxq_ibv *rxq_ibv = mlx5_priv_rxq_ibv_get(priv, i); + struct mlx5_rxq_ibv *rxq_ibv = mlx5_rxq_ibv_get(dev, i); int fd; int flags; int rc; @@ -348,27 +355,34 @@ priv_rx_intr_vec_enable(struct priv *priv) continue; } if (count >= RTE_MAX_RXTX_INTR_VEC_ID) { - ERROR("too many Rx queues for interrupt vector size" - " (%d), Rx interrupts cannot be enabled", - RTE_MAX_RXTX_INTR_VEC_ID); - priv_rx_intr_vec_disable(priv); - return -1; + DRV_LOG(ERR, + "port %u too many Rx queues for interrupt" + " vector size (%d), Rx interrupts cannot be" + " enabled", + dev->data->port_id, RTE_MAX_RXTX_INTR_VEC_ID); + mlx5_rx_intr_vec_disable(dev); + rte_errno = ENOMEM; + return -rte_errno; } fd = rxq_ibv->channel->fd; flags = fcntl(fd, F_GETFL); rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK); if (rc < 0) { - ERROR("failed to make Rx interrupt file descriptor" - " %d non-blocking for queue index %d", fd, i); - priv_rx_intr_vec_disable(priv); - return -1; + rte_errno = errno; + DRV_LOG(ERR, + "port %u failed to make Rx interrupt file" + " descriptor %d non-blocking for queue index" + " %d", + dev->data->port_id, fd, i); + mlx5_rx_intr_vec_disable(dev); + return -rte_errno; } intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count; intr_handle->efds[count] = fd; count++; } if (!count) - priv_rx_intr_vec_disable(priv); + mlx5_rx_intr_vec_disable(dev); else intr_handle->nb_efd = count; return 0; @@ -377,18 +391,19 @@ priv_rx_intr_vec_enable(struct priv *priv) /** * Clean up Rx interrupts handler. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. */ void -priv_rx_intr_vec_disable(struct priv *priv) +mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev) { - struct rte_intr_handle *intr_handle = priv->dev->intr_handle; + struct priv *priv = dev->data->dev_private; + struct rte_intr_handle *intr_handle = dev->intr_handle; unsigned int i; unsigned int rxqs_n = priv->rxqs_n; unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); - if (!priv->dev->data->dev_conf.intr_conf.rxq) + if (!dev->data->dev_conf.intr_conf.rxq) return; if (!intr_handle->intr_vec) goto free; @@ -405,7 +420,7 @@ priv_rx_intr_vec_disable(struct priv *priv) */ rxq_data = (*priv->rxqs)[i]; rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); - mlx5_priv_rxq_ibv_release(priv, rxq_ctrl->ibv); + mlx5_rxq_ibv_release(rxq_ctrl->ibv); } free: rte_intr_free_epoll_fd(intr_handle); @@ -449,39 +464,33 @@ mlx5_arm_cq(struct mlx5_rxq_data *rxq, int sq_n_rxq) * Rx queue number. * * @return - * 0 on success, negative on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; struct mlx5_rxq_data *rxq_data; struct mlx5_rxq_ctrl *rxq_ctrl; - int ret = 0; - priv_lock(priv); rxq_data = (*priv->rxqs)[rx_queue_id]; if (!rxq_data) { - ret = EINVAL; - goto exit; + rte_errno = EINVAL; + return -rte_errno; } rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); if (rxq_ctrl->irq) { struct mlx5_rxq_ibv *rxq_ibv; - rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id); + rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id); if (!rxq_ibv) { - ret = EINVAL; - goto exit; + rte_errno = EINVAL; + return -rte_errno; } mlx5_arm_cq(rxq_data, rxq_data->cq_arm_sn); - mlx5_priv_rxq_ibv_release(priv, rxq_ibv); + mlx5_rxq_ibv_release(rxq_ibv); } -exit: - priv_unlock(priv); - if (ret) - WARN("unable to arm interrupt on rx queue %d", rx_queue_id); - return -ret; + return 0; } /** @@ -493,64 +502,65 @@ exit: * Rx queue number. * * @return - * 0 on success, negative on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; struct mlx5_rxq_data *rxq_data; struct mlx5_rxq_ctrl *rxq_ctrl; struct mlx5_rxq_ibv *rxq_ibv = NULL; struct ibv_cq *ev_cq; void *ev_ctx; - int ret = 0; + int ret; - priv_lock(priv); rxq_data = (*priv->rxqs)[rx_queue_id]; if (!rxq_data) { - ret = EINVAL; - goto exit; + rte_errno = EINVAL; + return -rte_errno; } rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); if (!rxq_ctrl->irq) - goto exit; - rxq_ibv = mlx5_priv_rxq_ibv_get(priv, rx_queue_id); + return 0; + rxq_ibv = mlx5_rxq_ibv_get(dev, rx_queue_id); if (!rxq_ibv) { - ret = EINVAL; - goto exit; + rte_errno = EINVAL; + return -rte_errno; } ret = ibv_get_cq_event(rxq_ibv->channel, &ev_cq, &ev_ctx); if (ret || ev_cq != rxq_ibv->cq) { - ret = EINVAL; + rte_errno = EINVAL; goto exit; } rxq_data->cq_arm_sn++; ibv_ack_cq_events(rxq_ibv->cq, 1); + return 0; exit: + ret = rte_errno; /* Save rte_errno before cleanup. */ if (rxq_ibv) - mlx5_priv_rxq_ibv_release(priv, rxq_ibv); - priv_unlock(priv); - if (ret) - WARN("unable to disable interrupt on rx queue %d", - rx_queue_id); - return -ret; + mlx5_rxq_ibv_release(rxq_ibv); + DRV_LOG(WARNING, "port %u unable to disable interrupt on Rx queue %d", + dev->data->port_id, rx_queue_id); + rte_errno = ret; /* Restore rte_errno. */ + return -rte_errno; } /** * Create the Rx queue Verbs object. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param idx * Queue index in DPDK Rx queue array * * @return - * The Verbs object initialised if it can be created. + * The Verbs object initialised, NULL otherwise and rte_errno is set. */ -struct mlx5_rxq_ibv* -mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) +struct mlx5_rxq_ibv * +mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx) { + struct priv *priv = dev->data->dev_private; struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; struct mlx5_rxq_ctrl *rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); @@ -573,28 +583,34 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) assert(rxq_data); assert(!rxq_ctrl->ibv); + priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_RX_QUEUE; + priv->verbs_alloc_ctx.obj = rxq_ctrl; tmpl = rte_calloc_socket(__func__, 1, sizeof(*tmpl), 0, rxq_ctrl->socket); if (!tmpl) { - ERROR("%p: cannot allocate verbs resources", - (void *)rxq_ctrl); + DRV_LOG(ERR, + "port %u Rx queue %u cannot allocate verbs resources", + dev->data->port_id, rxq_ctrl->idx); + rte_errno = ENOMEM; goto error; } tmpl->rxq_ctrl = rxq_ctrl; /* Use the entire RX mempool as the memory region. */ - tmpl->mr = priv_mr_get(priv, rxq_data->mp); + tmpl->mr = mlx5_mr_get(dev, rxq_data->mp); if (!tmpl->mr) { - tmpl->mr = priv_mr_new(priv, rxq_data->mp); + tmpl->mr = mlx5_mr_new(dev, rxq_data->mp); if (!tmpl->mr) { - ERROR("%p: MR creation failure", (void *)rxq_ctrl); + DRV_LOG(ERR, "port %u: memeroy region creation failure", + dev->data->port_id); goto error; } } if (rxq_ctrl->irq) { tmpl->channel = ibv_create_comp_channel(priv->ctx); if (!tmpl->channel) { - ERROR("%p: Comp Channel creation failure", - (void *)rxq_ctrl); + DRV_LOG(ERR, "port %u: comp channel creation failure", + dev->data->port_id); + rte_errno = ENOMEM; goto error; } } @@ -614,21 +630,26 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) * For vectorized Rx, it must not be doubled in order to * make cq_ci and rq_ci aligned. */ - if (rxq_check_vec_support(rxq_data) < 0) + if (mlx5_rxq_check_vec_support(rxq_data) < 0) attr.cq.ibv.cqe *= 2; } else if (priv->cqe_comp && rxq_data->hw_timestamp) { - DEBUG("Rx CQE compression is disabled for HW timestamp"); + DRV_LOG(DEBUG, + "port %u Rx CQE compression is disabled for HW" + " timestamp", + dev->data->port_id); } tmpl->cq = ibv_cq_ex_to_cq(mlx5dv_create_cq(priv->ctx, &attr.cq.ibv, &attr.cq.mlx5)); if (tmpl->cq == NULL) { - ERROR("%p: CQ creation failure", (void *)rxq_ctrl); + DRV_LOG(ERR, "port %u Rx queue %u CQ creation failure", + dev->data->port_id, idx); + rte_errno = ENOMEM; goto error; } - DEBUG("priv->device_attr.max_qp_wr is %d", - priv->device_attr.orig_attr.max_qp_wr); - DEBUG("priv->device_attr.max_sge is %d", - priv->device_attr.orig_attr.max_sge); + DRV_LOG(DEBUG, "port %u priv->device_attr.max_qp_wr is %d", + dev->data->port_id, priv->device_attr.orig_attr.max_qp_wr); + DRV_LOG(DEBUG, "port %u priv->device_attr.max_sge is %d", + dev->data->port_id, priv->device_attr.orig_attr.max_sge); attr.wq = (struct ibv_wq_init_attr){ .wq_context = NULL, /* Could be useful in the future. */ .wq_type = IBV_WQT_RQ, @@ -658,7 +679,9 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) #endif tmpl->wq = ibv_create_wq(priv->ctx, &attr.wq); if (tmpl->wq == NULL) { - ERROR("%p: WQ creation failure", (void *)rxq_ctrl); + DRV_LOG(ERR, "port %u Rx queue %u WQ creation failure", + dev->data->port_id, idx); + rte_errno = ENOMEM; goto error; } /* @@ -668,11 +691,14 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) if (((int)attr.wq.max_wr != ((1 << rxq_data->elts_n) >> rxq_data->sges_n)) || ((int)attr.wq.max_sge != (1 << rxq_data->sges_n))) { - ERROR("%p: requested %u*%u but got %u*%u WRs*SGEs", - (void *)rxq_ctrl, - ((1 << rxq_data->elts_n) >> rxq_data->sges_n), - (1 << rxq_data->sges_n), - attr.wq.max_wr, attr.wq.max_sge); + DRV_LOG(ERR, + "port %u Rx queue %u requested %u*%u but got %u*%u" + " WRs*SGEs", + dev->data->port_id, idx, + ((1 << rxq_data->elts_n) >> rxq_data->sges_n), + (1 << rxq_data->sges_n), + attr.wq.max_wr, attr.wq.max_sge); + rte_errno = EINVAL; goto error; } /* Change queue state to ready. */ @@ -682,8 +708,10 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) }; ret = ibv_modify_wq(tmpl->wq, &mod); if (ret) { - ERROR("%p: WQ state to IBV_WQS_RDY failed", - (void *)rxq_ctrl); + DRV_LOG(ERR, + "port %u Rx queue %u WQ state to IBV_WQS_RDY failed", + dev->data->port_id, idx); + rte_errno = ret; goto error; } obj.cq.in = tmpl->cq; @@ -691,11 +719,16 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) obj.rwq.in = tmpl->wq; obj.rwq.out = &rwq; ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_RWQ); - if (ret != 0) + if (ret) { + rte_errno = ret; goto error; + } if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { - ERROR("Wrong MLX5_CQE_SIZE environment variable value: " - "it should be set to %u", RTE_CACHE_LINE_SIZE); + DRV_LOG(ERR, + "port %u wrong MLX5_CQE_SIZE environment variable" + " value: it should be set to %u", + dev->data->port_id, RTE_CACHE_LINE_SIZE); + rte_errno = EINVAL; goto error; } /* Fill the rings. */ @@ -731,13 +764,16 @@ mlx5_priv_rxq_ibv_new(struct priv *priv, uint16_t idx) rxq_data->rq_ci = (1 << rxq_data->elts_n) >> rxq_data->sges_n; rte_wmb(); *rxq_data->rq_db = rte_cpu_to_be_32(rxq_data->rq_ci); - DEBUG("%p: rxq updated with %p", (void *)rxq_ctrl, (void *)&tmpl); + DRV_LOG(DEBUG, "port %u rxq %u updated with %p", dev->data->port_id, + idx, (void *)&tmpl); rte_atomic32_inc(&tmpl->refcnt); - DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv, - (void *)tmpl, rte_atomic32_read(&tmpl->refcnt)); + DRV_LOG(DEBUG, "port %u Verbs Rx queue %u: refcnt %d", + dev->data->port_id, idx, rte_atomic32_read(&tmpl->refcnt)); LIST_INSERT_HEAD(&priv->rxqsibv, tmpl, next); + priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; return tmpl; error: + ret = rte_errno; /* Save rte_errno before cleanup. */ if (tmpl->wq) claim_zero(ibv_destroy_wq(tmpl->wq)); if (tmpl->cq) @@ -745,24 +781,27 @@ error: if (tmpl->channel) claim_zero(ibv_destroy_comp_channel(tmpl->channel)); if (tmpl->mr) - priv_mr_release(priv, tmpl->mr); + mlx5_mr_release(tmpl->mr); + priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; + rte_errno = ret; /* Restore rte_errno. */ return NULL; } /** * Get an Rx queue Verbs object. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param idx * Queue index in DPDK Rx queue array * * @return * The Verbs object if it exists. */ -struct mlx5_rxq_ibv* -mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx) +struct mlx5_rxq_ibv * +mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx) { + struct priv *priv = dev->data->dev_private; struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[idx]; struct mlx5_rxq_ctrl *rxq_ctrl; @@ -772,11 +811,11 @@ mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx) return NULL; rxq_ctrl = container_of(rxq_data, struct mlx5_rxq_ctrl, rxq); if (rxq_ctrl->ibv) { - priv_mr_get(priv, rxq_data->mp); + mlx5_mr_get(dev, rxq_data->mp); rte_atomic32_inc(&rxq_ctrl->ibv->refcnt); - DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv, - (void *)rxq_ctrl->ibv, - rte_atomic32_read(&rxq_ctrl->ibv->refcnt)); + DRV_LOG(DEBUG, "port %u Verbs Rx queue %u: refcnt %d", + dev->data->port_id, rxq_ctrl->idx, + rte_atomic32_read(&rxq_ctrl->ibv->refcnt)); } return rxq_ctrl->ibv; } @@ -784,16 +823,14 @@ mlx5_priv_rxq_ibv_get(struct priv *priv, uint16_t idx) /** * Release an Rx verbs queue object. * - * @param priv - * Pointer to private structure. * @param rxq_ibv * Verbs Rx queue object. * * @return - * 0 on success, errno value on failure. + * 1 while a reference on it exists, 0 when freed. */ int -mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv) +mlx5_rxq_ibv_release(struct mlx5_rxq_ibv *rxq_ibv) { int ret; @@ -801,11 +838,12 @@ mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv) assert(rxq_ibv->wq); assert(rxq_ibv->cq); assert(rxq_ibv->mr); - ret = priv_mr_release(priv, rxq_ibv->mr); + ret = mlx5_mr_release(rxq_ibv->mr); if (!ret) rxq_ibv->mr = NULL; - DEBUG("%p: Verbs Rx queue %p: refcnt %d", (void *)priv, - (void *)rxq_ibv, rte_atomic32_read(&rxq_ibv->refcnt)); + DRV_LOG(DEBUG, "port %u Verbs Rx queue %u: refcnt %d", + PORT_ID(rxq_ibv->rxq_ctrl->priv), + rxq_ibv->rxq_ctrl->idx, rte_atomic32_read(&rxq_ibv->refcnt)); if (rte_atomic32_dec_and_test(&rxq_ibv->refcnt)) { rxq_free_elts(rxq_ibv->rxq_ctrl); claim_zero(ibv_destroy_wq(rxq_ibv->wq)); @@ -816,26 +854,28 @@ mlx5_priv_rxq_ibv_release(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv) rte_free(rxq_ibv); return 0; } - return EBUSY; + return 1; } /** * Verify the Verbs Rx queue list is empty * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * - * @return the number of object not released. + * @return + * The number of object not released. */ int -mlx5_priv_rxq_ibv_verify(struct priv *priv) +mlx5_rxq_ibv_verify(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; int ret = 0; struct mlx5_rxq_ibv *rxq_ibv; LIST_FOREACH(rxq_ibv, &priv->rxqsibv, next) { - DEBUG("%p: Verbs Rx queue %p still referenced", (void *)priv, - (void *)rxq_ibv); + DRV_LOG(DEBUG, "port %u Verbs Rx queue %u still referenced", + dev->data->port_id, rxq_ibv->rxq_ctrl->idx); ++ret; } return ret; @@ -844,15 +884,12 @@ mlx5_priv_rxq_ibv_verify(struct priv *priv) /** * Return true if a single reference exists on the object. * - * @param priv - * Pointer to private structure. * @param rxq_ibv * Verbs Rx queue object. */ int -mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv) +mlx5_rxq_ibv_releasable(struct mlx5_rxq_ibv *rxq_ibv) { - (void)priv; assert(rxq_ibv); return (rte_atomic32_read(&rxq_ibv->refcnt) == 1); } @@ -860,8 +897,8 @@ mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv) /** * Create a DPDK Rx queue. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param idx * TX queue index. * @param desc @@ -870,13 +907,13 @@ mlx5_priv_rxq_ibv_releasable(struct priv *priv, struct mlx5_rxq_ibv *rxq_ibv) * NUMA socket on which memory must be allocated. * * @return - * A DPDK queue object on success. + * A DPDK queue object on success, NULL otherwise and rte_errno is set. */ -struct mlx5_rxq_ctrl* -mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc, - unsigned int socket, struct rte_mempool *mp) +struct mlx5_rxq_ctrl * +mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + unsigned int socket, struct rte_mempool *mp) { - struct rte_eth_dev *dev = priv->dev; + struct priv *priv = dev->data->dev_private; struct mlx5_rxq_ctrl *tmpl; const uint16_t desc_n = desc + priv->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP; @@ -886,10 +923,12 @@ mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc, sizeof(*tmpl) + desc_n * sizeof(struct rte_mbuf *), 0, socket); - if (!tmpl) + if (!tmpl) { + rte_errno = ENOMEM; return NULL; + } tmpl->socket = socket; - if (priv->dev->data->dev_conf.intr_conf.rxq) + if (dev->data->dev_conf.intr_conf.rxq) tmpl->irq = 1; /* Enable scattered packets support for this queue if necessary. */ assert(mb_len >= RTE_PKTMBUF_HEADROOM); @@ -912,29 +951,34 @@ mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc, size = mb_len * (1 << tmpl->rxq.sges_n); size -= RTE_PKTMBUF_HEADROOM; if (size < dev->data->dev_conf.rxmode.max_rx_pkt_len) { - ERROR("%p: too many SGEs (%u) needed to handle" - " requested maximum packet size %u", - (void *)dev, - 1 << sges_n, - dev->data->dev_conf.rxmode.max_rx_pkt_len); + DRV_LOG(ERR, + "port %u too many SGEs (%u) needed to handle" + " requested maximum packet size %u", + dev->data->port_id, + 1 << sges_n, + dev->data->dev_conf.rxmode.max_rx_pkt_len); + rte_errno = EOVERFLOW; goto error; } } else { - WARN("%p: the requested maximum Rx packet size (%u) is" - " larger than a single mbuf (%u) and scattered" - " mode has not been requested", - (void *)dev, - dev->data->dev_conf.rxmode.max_rx_pkt_len, - mb_len - RTE_PKTMBUF_HEADROOM); + DRV_LOG(WARNING, + "port %u the requested maximum Rx packet size (%u) is" + " larger than a single mbuf (%u) and scattered mode has" + " not been requested", + dev->data->port_id, + dev->data->dev_conf.rxmode.max_rx_pkt_len, + mb_len - RTE_PKTMBUF_HEADROOM); } - DEBUG("%p: maximum number of segments per packet: %u", - (void *)dev, 1 << tmpl->rxq.sges_n); + DRV_LOG(DEBUG, "port %u maximum number of segments per packet: %u", + dev->data->port_id, 1 << tmpl->rxq.sges_n); if (desc % (1 << tmpl->rxq.sges_n)) { - ERROR("%p: number of RX queue descriptors (%u) is not a" - " multiple of SGEs per packet (%u)", - (void *)dev, - desc, - 1 << tmpl->rxq.sges_n); + DRV_LOG(ERR, + "port %u number of Rx queue descriptors (%u) is not a" + " multiple of SGEs per packet (%u)", + dev->data->port_id, + desc, + 1 << tmpl->rxq.sges_n); + rte_errno = EINVAL; goto error; } /* Toggle RX checksum offload if hardware supports it. */ @@ -954,19 +998,22 @@ mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc, } else if (priv->hw_fcs_strip) { tmpl->rxq.crc_present = 1; } else { - WARN("%p: CRC stripping has been disabled but will still" - " be performed by hardware, make sure MLNX_OFED and" - " firmware are up to date", - (void *)dev); + DRV_LOG(WARNING, + "port %u CRC stripping has been disabled but will" + " still be performed by hardware, make sure MLNX_OFED" + " and firmware are up to date", + dev->data->port_id); tmpl->rxq.crc_present = 0; } - DEBUG("%p: CRC stripping is %s, %u bytes will be subtracted from" - " incoming frames to hide it", - (void *)dev, - tmpl->rxq.crc_present ? "disabled" : "enabled", - tmpl->rxq.crc_present << 2); + DRV_LOG(DEBUG, + "port %u CRC stripping is %s, %u bytes will be subtracted from" + " incoming frames to hide it", + dev->data->port_id, + tmpl->rxq.crc_present ? "disabled" : "enabled", + tmpl->rxq.crc_present << 2); /* Save port ID. */ - tmpl->rxq.rss_hash = priv->rxqs_n > 1; + tmpl->rxq.rss_hash = !!priv->rss_conf.rss_hf && + (!!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS)); tmpl->rxq.port_id = dev->data->port_id; tmpl->priv = priv; tmpl->rxq.mp = mp; @@ -974,9 +1021,10 @@ mlx5_priv_rxq_new(struct priv *priv, uint16_t idx, uint16_t desc, tmpl->rxq.elts_n = log2above(desc); tmpl->rxq.elts = (struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1); + tmpl->idx = idx; rte_atomic32_inc(&tmpl->refcnt); - DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv, - (void *)tmpl, rte_atomic32_read(&tmpl->refcnt)); + DRV_LOG(DEBUG, "port %u Rx queue %u: refcnt %d", dev->data->port_id, + idx, rte_atomic32_read(&tmpl->refcnt)); LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next); return tmpl; error: @@ -987,28 +1035,29 @@ error: /** * Get a Rx queue. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param idx * TX queue index. * * @return - * A pointer to the queue if it exists. + * A pointer to the queue if it exists, NULL otherwise. */ -struct mlx5_rxq_ctrl* -mlx5_priv_rxq_get(struct priv *priv, uint16_t idx) +struct mlx5_rxq_ctrl * +mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx) { + struct priv *priv = dev->data->dev_private; struct mlx5_rxq_ctrl *rxq_ctrl = NULL; if ((*priv->rxqs)[idx]) { rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); - - mlx5_priv_rxq_ibv_get(priv, idx); + mlx5_rxq_ibv_get(dev, idx); rte_atomic32_inc(&rxq_ctrl->refcnt); - DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv, - (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt)); + DRV_LOG(DEBUG, "port %u Rx queue %u: refcnt %d", + dev->data->port_id, rxq_ctrl->idx, + rte_atomic32_read(&rxq_ctrl->refcnt)); } return rxq_ctrl; } @@ -1016,59 +1065,59 @@ mlx5_priv_rxq_get(struct priv *priv, uint16_t idx) /** * Release a Rx queue. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param idx * TX queue index. * * @return - * 0 on success, errno value on failure. + * 1 while a reference on it exists, 0 when freed. */ int -mlx5_priv_rxq_release(struct priv *priv, uint16_t idx) +mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx) { + struct priv *priv = dev->data->dev_private; struct mlx5_rxq_ctrl *rxq_ctrl; if (!(*priv->rxqs)[idx]) return 0; rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); assert(rxq_ctrl->priv); - if (rxq_ctrl->ibv) { - int ret; - - ret = mlx5_priv_rxq_ibv_release(rxq_ctrl->priv, rxq_ctrl->ibv); - if (!ret) - rxq_ctrl->ibv = NULL; - } - DEBUG("%p: Rx queue %p: refcnt %d", (void *)priv, - (void *)rxq_ctrl, rte_atomic32_read(&rxq_ctrl->refcnt)); + if (rxq_ctrl->ibv && !mlx5_rxq_ibv_release(rxq_ctrl->ibv)) + rxq_ctrl->ibv = NULL; + DRV_LOG(DEBUG, "port %u Rx queue %u: refcnt %d", dev->data->port_id, + rxq_ctrl->idx, rte_atomic32_read(&rxq_ctrl->refcnt)); if (rte_atomic32_dec_and_test(&rxq_ctrl->refcnt)) { LIST_REMOVE(rxq_ctrl, next); rte_free(rxq_ctrl); (*priv->rxqs)[idx] = NULL; return 0; } - return EBUSY; + return 1; } /** * Verify if the queue can be released. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param idx * TX queue index. * * @return - * 1 if the queue can be released. + * 1 if the queue can be released, negative errno otherwise and rte_errno is + * set. */ int -mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx) +mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx) { + struct priv *priv = dev->data->dev_private; struct mlx5_rxq_ctrl *rxq_ctrl; - if (!(*priv->rxqs)[idx]) - return -1; + if (!(*priv->rxqs)[idx]) { + rte_errno = EINVAL; + return -rte_errno; + } rxq_ctrl = container_of((*priv->rxqs)[idx], struct mlx5_rxq_ctrl, rxq); return (rte_atomic32_read(&rxq_ctrl->refcnt) == 1); } @@ -1076,20 +1125,22 @@ mlx5_priv_rxq_releasable(struct priv *priv, uint16_t idx) /** * Verify the Rx Queue list is empty * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * - * @return the number of object not released. + * @return + * The number of object not released. */ int -mlx5_priv_rxq_verify(struct priv *priv) +mlx5_rxq_verify(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; struct mlx5_rxq_ctrl *rxq_ctrl; int ret = 0; LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) { - DEBUG("%p: Rx Queue %p still referenced", (void *)priv, - (void *)rxq_ctrl); + DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced", + dev->data->port_id, rxq_ctrl->idx); ++ret; } return ret; @@ -1098,20 +1149,21 @@ mlx5_priv_rxq_verify(struct priv *priv) /** * Create an indirection table. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param queues * Queues entering in the indirection table. * @param queues_n * Number of queues in the array. * * @return - * A new indirection table. + * The Verbs object initialised, NULL otherwise and rte_errno is set. */ -struct mlx5_ind_table_ibv* -mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[], - uint16_t queues_n) +struct mlx5_ind_table_ibv * +mlx5_ind_table_ibv_new(struct rte_eth_dev *dev, uint16_t queues[], + uint16_t queues_n) { + struct priv *priv = dev->data->dev_private; struct mlx5_ind_table_ibv *ind_tbl; const unsigned int wq_n = rte_is_power_of_2(queues_n) ? log2above(queues_n) : @@ -1122,11 +1174,12 @@ mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[], ind_tbl = rte_calloc(__func__, 1, sizeof(*ind_tbl) + queues_n * sizeof(uint16_t), 0); - if (!ind_tbl) + if (!ind_tbl) { + rte_errno = ENOMEM; return NULL; + } for (i = 0; i != queues_n; ++i) { - struct mlx5_rxq_ctrl *rxq = - mlx5_priv_rxq_get(priv, queues[i]); + struct mlx5_rxq_ctrl *rxq = mlx5_rxq_get(dev, queues[i]); if (!rxq) goto error; @@ -1144,24 +1197,28 @@ mlx5_priv_ind_table_ibv_new(struct priv *priv, uint16_t queues[], .ind_tbl = wq, .comp_mask = 0, }); - if (!ind_tbl->ind_table) + if (!ind_tbl->ind_table) { + rte_errno = errno; goto error; + } rte_atomic32_inc(&ind_tbl->refcnt); LIST_INSERT_HEAD(&priv->ind_tbls, ind_tbl, next); - DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv, - (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt)); + DRV_LOG(DEBUG, "port %u indirection table %p: refcnt %d", + dev->data->port_id, (void *)ind_tbl, + rte_atomic32_read(&ind_tbl->refcnt)); return ind_tbl; error: rte_free(ind_tbl); - DEBUG("%p cannot create indirection table", (void *)priv); + DRV_LOG(DEBUG, "port %u cannot create indirection table", + dev->data->port_id); return NULL; } /** * Get an indirection table. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param queues * Queues entering in the indirection table. * @param queues_n @@ -1170,10 +1227,11 @@ error: * @return * An indirection table if found. */ -struct mlx5_ind_table_ibv* -mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[], - uint16_t queues_n) +struct mlx5_ind_table_ibv * +mlx5_ind_table_ibv_get(struct rte_eth_dev *dev, uint16_t queues[], + uint16_t queues_n) { + struct priv *priv = dev->data->dev_private; struct mlx5_ind_table_ibv *ind_tbl; LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { @@ -1187,10 +1245,11 @@ mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[], unsigned int i; rte_atomic32_inc(&ind_tbl->refcnt); - DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv, - (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt)); + DRV_LOG(DEBUG, "port %u indirection table %p: refcnt %d", + dev->data->port_id, (void *)ind_tbl, + rte_atomic32_read(&ind_tbl->refcnt)); for (i = 0; i != ind_tbl->queues_n; ++i) - mlx5_priv_rxq_get(priv, ind_tbl->queues[i]); + mlx5_rxq_get(dev, ind_tbl->queues[i]); } return ind_tbl; } @@ -1198,51 +1257,55 @@ mlx5_priv_ind_table_ibv_get(struct priv *priv, uint16_t queues[], /** * Release an indirection table. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param ind_table * Indirection table to release. * * @return - * 0 on success, errno value on failure. + * 1 while a reference on it exists, 0 when freed. */ int -mlx5_priv_ind_table_ibv_release(struct priv *priv, - struct mlx5_ind_table_ibv *ind_tbl) +mlx5_ind_table_ibv_release(struct rte_eth_dev *dev, + struct mlx5_ind_table_ibv *ind_tbl) { unsigned int i; - DEBUG("%p: Indirection table %p: refcnt %d", (void *)priv, - (void *)ind_tbl, rte_atomic32_read(&ind_tbl->refcnt)); + DRV_LOG(DEBUG, "port %u indirection table %p: refcnt %d", + dev->data->port_id, (void *)ind_tbl, + rte_atomic32_read(&ind_tbl->refcnt)); if (rte_atomic32_dec_and_test(&ind_tbl->refcnt)) claim_zero(ibv_destroy_rwq_ind_table(ind_tbl->ind_table)); for (i = 0; i != ind_tbl->queues_n; ++i) - claim_nonzero(mlx5_priv_rxq_release(priv, ind_tbl->queues[i])); + claim_nonzero(mlx5_rxq_release(dev, ind_tbl->queues[i])); if (!rte_atomic32_read(&ind_tbl->refcnt)) { LIST_REMOVE(ind_tbl, next); rte_free(ind_tbl); return 0; } - return EBUSY; + return 1; } /** * Verify the Rx Queue list is empty * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * - * @return the number of object not released. + * @return + * The number of object not released. */ int -mlx5_priv_ind_table_ibv_verify(struct priv *priv) +mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; struct mlx5_ind_table_ibv *ind_tbl; int ret = 0; LIST_FOREACH(ind_tbl, &priv->ind_tbls, next) { - DEBUG("%p: Verbs indirection table %p still referenced", - (void *)priv, (void *)ind_tbl); + DRV_LOG(DEBUG, + "port %u Verbs indirection table %p still referenced", + dev->data->port_id, (void *)ind_tbl); ++ret; } return ret; @@ -1251,8 +1314,8 @@ mlx5_priv_ind_table_ibv_verify(struct priv *priv) /** * Create an Rx Hash queue. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param rss_key * RSS key for the Rx hash queue. * @param rss_key_len @@ -1266,22 +1329,26 @@ mlx5_priv_ind_table_ibv_verify(struct priv *priv) * Number of queues. * * @return - * An hash Rx queue on success. + * The Verbs object initialised, NULL otherwise and rte_errno is set. */ -struct mlx5_hrxq* -mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len, - uint64_t hash_fields, uint16_t queues[], uint16_t queues_n) +struct mlx5_hrxq * +mlx5_hrxq_new(struct rte_eth_dev *dev, uint8_t *rss_key, uint8_t rss_key_len, + uint64_t hash_fields, uint16_t queues[], uint16_t queues_n) { + struct priv *priv = dev->data->dev_private; struct mlx5_hrxq *hrxq; struct mlx5_ind_table_ibv *ind_tbl; struct ibv_qp *qp; + int err; queues_n = hash_fields ? queues_n : 1; - ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n); - if (!ind_tbl) - ind_tbl = mlx5_priv_ind_table_ibv_new(priv, queues, queues_n); + ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n); if (!ind_tbl) + ind_tbl = mlx5_ind_table_ibv_new(dev, queues, queues_n); + if (!ind_tbl) { + rte_errno = ENOMEM; return NULL; + } qp = ibv_create_qp_ex( priv->ctx, &(struct ibv_qp_init_attr_ex){ @@ -1299,8 +1366,10 @@ mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len, .rwq_ind_tbl = ind_tbl->ind_table, .pd = priv->pd, }); - if (!qp) + if (!qp) { + rte_errno = errno; goto error; + } hrxq = rte_calloc(__func__, 1, sizeof(*hrxq) + rss_key_len, 0); if (!hrxq) goto error; @@ -1311,21 +1380,24 @@ mlx5_priv_hrxq_new(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len, memcpy(hrxq->rss_key, rss_key, rss_key_len); rte_atomic32_inc(&hrxq->refcnt); LIST_INSERT_HEAD(&priv->hrxqs, hrxq, next); - DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv, - (void *)hrxq, rte_atomic32_read(&hrxq->refcnt)); + DRV_LOG(DEBUG, "port %u hash Rx queue %p: refcnt %d", + dev->data->port_id, (void *)hrxq, + rte_atomic32_read(&hrxq->refcnt)); return hrxq; error: - mlx5_priv_ind_table_ibv_release(priv, ind_tbl); + err = rte_errno; /* Save rte_errno before cleanup. */ + mlx5_ind_table_ibv_release(dev, ind_tbl); if (qp) claim_zero(ibv_destroy_qp(qp)); + rte_errno = err; /* Restore rte_errno. */ return NULL; } /** * Get an Rx Hash queue. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param rss_conf * RSS configuration for the Rx hash queue. * @param queues @@ -1337,10 +1409,11 @@ error: * @return * An hash Rx queue on success. */ -struct mlx5_hrxq* -mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len, - uint64_t hash_fields, uint16_t queues[], uint16_t queues_n) +struct mlx5_hrxq * +mlx5_hrxq_get(struct rte_eth_dev *dev, uint8_t *rss_key, uint8_t rss_key_len, + uint64_t hash_fields, uint16_t queues[], uint16_t queues_n) { + struct priv *priv = dev->data->dev_private; struct mlx5_hrxq *hrxq; queues_n = hash_fields ? queues_n : 1; @@ -1353,16 +1426,17 @@ mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len, continue; if (hrxq->hash_fields != hash_fields) continue; - ind_tbl = mlx5_priv_ind_table_ibv_get(priv, queues, queues_n); + ind_tbl = mlx5_ind_table_ibv_get(dev, queues, queues_n); if (!ind_tbl) continue; if (ind_tbl != hrxq->ind_table) { - mlx5_priv_ind_table_ibv_release(priv, ind_tbl); + mlx5_ind_table_ibv_release(dev, ind_tbl); continue; } rte_atomic32_inc(&hrxq->refcnt); - DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv, - (void *)hrxq, rte_atomic32_read(&hrxq->refcnt)); + DRV_LOG(DEBUG, "port %u hash Rx queue %p: refcnt %d", + dev->data->port_id, (void *)hrxq, + rte_atomic32_read(&hrxq->refcnt)); return hrxq; } return NULL; @@ -1371,47 +1445,51 @@ mlx5_priv_hrxq_get(struct priv *priv, uint8_t *rss_key, uint8_t rss_key_len, /** * Release the hash Rx queue. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param hrxq * Pointer to Hash Rx queue to release. * * @return - * 0 on success, errno value on failure. + * 1 while a reference on it exists, 0 when freed. */ int -mlx5_priv_hrxq_release(struct priv *priv, struct mlx5_hrxq *hrxq) +mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hrxq) { - DEBUG("%p: Hash Rx queue %p: refcnt %d", (void *)priv, - (void *)hrxq, rte_atomic32_read(&hrxq->refcnt)); + DRV_LOG(DEBUG, "port %u hash Rx queue %p: refcnt %d", + dev->data->port_id, (void *)hrxq, + rte_atomic32_read(&hrxq->refcnt)); if (rte_atomic32_dec_and_test(&hrxq->refcnt)) { claim_zero(ibv_destroy_qp(hrxq->qp)); - mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table); + mlx5_ind_table_ibv_release(dev, hrxq->ind_table); LIST_REMOVE(hrxq, next); rte_free(hrxq); return 0; } - claim_nonzero(mlx5_priv_ind_table_ibv_release(priv, hrxq->ind_table)); - return EBUSY; + claim_nonzero(mlx5_ind_table_ibv_release(dev, hrxq->ind_table)); + return 1; } /** * Verify the Rx Queue list is empty * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * - * @return the number of object not released. + * @return + * The number of object not released. */ int -mlx5_priv_hrxq_ibv_verify(struct priv *priv) +mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; struct mlx5_hrxq *hrxq; int ret = 0; LIST_FOREACH(hrxq, &priv->hrxqs, next) { - DEBUG("%p: Verbs Hash Rx queue %p still referenced", - (void *)priv, (void *)hrxq); + DRV_LOG(DEBUG, + "port %u Verbs hash Rx queue %p still referenced", + dev->data->port_id, (void *)hrxq); ++ret; } return ret; diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c index 32bfa307..2e003aea 100644 --- a/drivers/net/mlx5/mlx5_rxtx.c +++ b/drivers/net/mlx5/mlx5_rxtx.c @@ -114,6 +114,14 @@ mlx5_set_ptype_table(void) RTE_PTYPE_L4_TCP; (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP; + (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; + (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; + (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; + (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; /* UDP */ (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP; @@ -132,6 +140,14 @@ mlx5_set_ptype_table(void) RTE_PTYPE_L4_TCP; (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP; + (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; + (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; + (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; + (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP; (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP; (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | @@ -169,12 +185,36 @@ mlx5_set_ptype_table(void) (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_INNER_L4_TCP; + (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP; + (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP; + (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP; + (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP; (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_INNER_L4_TCP; (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_INNER_L4_TCP; + (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP; + (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP; + (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP; + (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP; /* Tunneled - UDP */ (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | @@ -370,7 +410,6 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) unsigned int ds = 0; unsigned int sg = 0; /* counter of additional segs attached. */ uintptr_t addr; - uint64_t naddr; uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2; uint16_t tso_header_sz = 0; uint16_t ehdr; @@ -594,12 +633,12 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) ds = 3; use_dseg: /* Add the remaining packet as a simple ds. */ - naddr = rte_cpu_to_be_64(addr); + addr = rte_cpu_to_be_64(addr); *dseg = (rte_v128u32_t){ rte_cpu_to_be_32(length), mlx5_tx_mb2mr(txq, buf), - naddr, - naddr >> 32, + addr, + addr >> 32, }; ++ds; if (!segs_n) @@ -633,12 +672,12 @@ next_seg: total_length += length; #endif /* Store segment information. */ - naddr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)); + addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)); *dseg = (rte_v128u32_t){ rte_cpu_to_be_32(length), mlx5_tx_mb2mr(txq, buf), - naddr, - naddr >> 32, + addr, + addr >> 32, }; (*txq->elts)[++elts_head & elts_m] = buf; ++sg; @@ -1339,8 +1378,6 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) do { struct rte_mbuf *buf = *(pkts++); uintptr_t addr; - uint64_t naddr; - unsigned int n; unsigned int do_inline = 0; /* Whether inline is possible. */ uint32_t length; unsigned int segs_n = buf->nb_segs; @@ -1459,7 +1496,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) max_wqe--; else max_wqe -= 2; - } else if (do_inline) { + } else if (max_inline && do_inline) { /* Inline packet into WQE. */ unsigned int max; @@ -1517,16 +1554,13 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) ((uintptr_t)mpw.data.raw + inl_pad); (*txq->elts)[elts_head++ & elts_m] = buf; - addr = rte_pktmbuf_mtod(buf, uintptr_t); - for (n = 0; n * RTE_CACHE_LINE_SIZE < length; n++) - rte_prefetch2((void *)(addr + - n * RTE_CACHE_LINE_SIZE)); - naddr = rte_cpu_to_be_64(addr); + addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, + uintptr_t)); *dseg = (rte_v128u32_t) { rte_cpu_to_be_32(length), mlx5_tx_mb2mr(txq, buf), - naddr, - naddr >> 32, + addr, + addr >> 32, }; mpw.data.raw = (volatile void *)(dseg + 1); mpw.total_len += (inl_pad + sizeof(*dseg)); @@ -1677,6 +1711,7 @@ mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, return 0; ++rxq->cq_ci; op_own = cqe->op_own; + rte_io_rmb(); if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { volatile struct mlx5_mini_cqe8 (*mc)[8] = (volatile struct mlx5_mini_cqe8 (*)[8]) @@ -1934,11 +1969,10 @@ skip: * Number of packets successfully transmitted (<= pkts_n). */ uint16_t -removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) +removed_tx_burst(void *dpdk_txq __rte_unused, + struct rte_mbuf **pkts __rte_unused, + uint16_t pkts_n __rte_unused) { - (void)dpdk_txq; - (void)pkts; - (void)pkts_n; return 0; } @@ -1959,11 +1993,10 @@ removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) * Number of packets successfully received (<= pkts_n). */ uint16_t -removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) +removed_rx_burst(void *dpdk_txq __rte_unused, + struct rte_mbuf **pkts __rte_unused, + uint16_t pkts_n __rte_unused) { - (void)dpdk_rxq; - (void)pkts; - (void)pkts_n; return 0; } @@ -1975,56 +2008,49 @@ removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) */ uint16_t __attribute__((weak)) -mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) +mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused, + struct rte_mbuf **pkts __rte_unused, + uint16_t pkts_n __rte_unused) { - (void)dpdk_txq; - (void)pkts; - (void)pkts_n; return 0; } uint16_t __attribute__((weak)) -mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) +mlx5_tx_burst_vec(void *dpdk_txq __rte_unused, + struct rte_mbuf **pkts __rte_unused, + uint16_t pkts_n __rte_unused) { - (void)dpdk_txq; - (void)pkts; - (void)pkts_n; return 0; } uint16_t __attribute__((weak)) -mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) +mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, + struct rte_mbuf **pkts __rte_unused, + uint16_t pkts_n __rte_unused) { - (void)dpdk_rxq; - (void)pkts; - (void)pkts_n; return 0; } int __attribute__((weak)) -priv_check_raw_vec_tx_support(struct priv *priv) +mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused) { - (void)priv; return -ENOTSUP; } int __attribute__((weak)) -priv_check_vec_tx_support(struct priv *priv) +mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused) { - (void)priv; return -ENOTSUP; } int __attribute__((weak)) -rxq_check_vec_support(struct mlx5_rxq_data *rxq) +mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) { - (void)rxq; return -ENOTSUP; } int __attribute__((weak)) -priv_check_vec_rx_support(struct priv *priv) +mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) { - (void)priv; return -ENOTSUP; } diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index de5b769e..29019f79 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -154,6 +154,7 @@ struct mlx5_rxq_ctrl { struct mlx5_rxq_data rxq; /* Data path structure. */ unsigned int socket; /* CPU socket ID for allocations. */ unsigned int irq:1; /* Whether IRQ is enabled. */ + uint16_t idx; /* Queue index. */ }; /* Indirection table. */ @@ -204,7 +205,7 @@ struct mlx5_txq_data { volatile void *wqes; /* Work queue (use volatile to write into). */ volatile uint32_t *qp_db; /* Work queue doorbell. */ volatile uint32_t *cq_db; /* Completion queue doorbell. */ - volatile void *bf_reg; /* Blueflame register. */ + volatile void *bf_reg; /* Blueflame register remapped. */ struct mlx5_mr *mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MR translation table. */ struct rte_mbuf *(*elts)[]; /* TX elements. */ struct mlx5_txq_stats stats; /* TX queue counters. */ @@ -214,6 +215,7 @@ struct mlx5_txq_data { struct mlx5_txq_ibv { LIST_ENTRY(mlx5_txq_ibv) next; /* Pointer to the next element. */ rte_atomic32_t refcnt; /* Reference counter. */ + struct mlx5_txq_ctrl *txq_ctrl; /* Pointer to the control queue. */ struct ibv_cq *cq; /* Completion Queue. */ struct ibv_qp *qp; /* Queue Pair. */ }; @@ -229,6 +231,8 @@ struct mlx5_txq_ctrl { struct mlx5_txq_ibv *ibv; /* Verbs queue object. */ struct mlx5_txq_data txq; /* Data path structure. */ off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ + volatile void *bf_reg_orig; /* Blueflame register from verbs. */ + uint16_t idx; /* Queue index. */ }; /* mlx5_rxq.c */ @@ -236,93 +240,104 @@ struct mlx5_txq_ctrl { extern uint8_t rss_hash_default_key[]; extern const size_t rss_hash_default_key_len; -void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *); -int mlx5_rx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int, - const struct rte_eth_rxconf *, struct rte_mempool *); -void mlx5_rx_queue_release(void *); -int priv_rx_intr_vec_enable(struct priv *priv); -void priv_rx_intr_vec_disable(struct priv *priv); +void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl); +int mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + unsigned int socket, const struct rte_eth_rxconf *conf, + struct rte_mempool *mp); +void mlx5_rx_queue_release(void *dpdk_rxq); +int mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev); +void mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev); int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id); int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id); -struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_new(struct priv *, uint16_t); -struct mlx5_rxq_ibv *mlx5_priv_rxq_ibv_get(struct priv *, uint16_t); -int mlx5_priv_rxq_ibv_release(struct priv *, struct mlx5_rxq_ibv *); -int mlx5_priv_rxq_ibv_releasable(struct priv *, struct mlx5_rxq_ibv *); -int mlx5_priv_rxq_ibv_verify(struct priv *); -struct mlx5_rxq_ctrl *mlx5_priv_rxq_new(struct priv *, uint16_t, - uint16_t, unsigned int, - struct rte_mempool *); -struct mlx5_rxq_ctrl *mlx5_priv_rxq_get(struct priv *, uint16_t); -int mlx5_priv_rxq_release(struct priv *, uint16_t); -int mlx5_priv_rxq_releasable(struct priv *, uint16_t); -int mlx5_priv_rxq_verify(struct priv *); -int rxq_alloc_elts(struct mlx5_rxq_ctrl *); -struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_new(struct priv *, - uint16_t [], - uint16_t); -struct mlx5_ind_table_ibv *mlx5_priv_ind_table_ibv_get(struct priv *, - uint16_t [], - uint16_t); -int mlx5_priv_ind_table_ibv_release(struct priv *, struct mlx5_ind_table_ibv *); -int mlx5_priv_ind_table_ibv_verify(struct priv *); -struct mlx5_hrxq *mlx5_priv_hrxq_new(struct priv *, uint8_t *, uint8_t, - uint64_t, uint16_t [], uint16_t); -struct mlx5_hrxq *mlx5_priv_hrxq_get(struct priv *, uint8_t *, uint8_t, - uint64_t, uint16_t [], uint16_t); -int mlx5_priv_hrxq_release(struct priv *, struct mlx5_hrxq *); -int mlx5_priv_hrxq_ibv_verify(struct priv *); +struct mlx5_rxq_ibv *mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx); +struct mlx5_rxq_ibv *mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx); +int mlx5_rxq_ibv_release(struct mlx5_rxq_ibv *rxq_ibv); +int mlx5_rxq_ibv_releasable(struct mlx5_rxq_ibv *rxq_ibv); +int mlx5_rxq_ibv_verify(struct rte_eth_dev *dev); +struct mlx5_rxq_ctrl *mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, + uint16_t desc, unsigned int socket, + struct rte_mempool *mp); +struct mlx5_rxq_ctrl *mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx); +int mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx); +int mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx); +int mlx5_rxq_verify(struct rte_eth_dev *dev); +int rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl); +struct mlx5_ind_table_ibv *mlx5_ind_table_ibv_new(struct rte_eth_dev *dev, + uint16_t queues[], + uint16_t queues_n); +struct mlx5_ind_table_ibv *mlx5_ind_table_ibv_get(struct rte_eth_dev *dev, + uint16_t queues[], + uint16_t queues_n); +int mlx5_ind_table_ibv_release(struct rte_eth_dev *dev, + struct mlx5_ind_table_ibv *ind_tbl); +int mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev); +struct mlx5_hrxq *mlx5_hrxq_new(struct rte_eth_dev *dev, uint8_t *rss_key, + uint8_t rss_key_len, uint64_t hash_fields, + uint16_t queues[], uint16_t queues_n); +struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev, uint8_t *rss_key, + uint8_t rss_key_len, uint64_t hash_fields, + uint16_t queues[], uint16_t queues_n); +int mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hxrq); +int mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev); /* mlx5_txq.c */ -int mlx5_tx_queue_setup(struct rte_eth_dev *, uint16_t, uint16_t, unsigned int, - const struct rte_eth_txconf *); -void mlx5_tx_queue_release(void *); -int priv_tx_uar_remap(struct priv *priv, int fd); -struct mlx5_txq_ibv *mlx5_priv_txq_ibv_new(struct priv *, uint16_t); -struct mlx5_txq_ibv *mlx5_priv_txq_ibv_get(struct priv *, uint16_t); -int mlx5_priv_txq_ibv_release(struct priv *, struct mlx5_txq_ibv *); -int mlx5_priv_txq_ibv_releasable(struct priv *, struct mlx5_txq_ibv *); -int mlx5_priv_txq_ibv_verify(struct priv *); -struct mlx5_txq_ctrl *mlx5_priv_txq_new(struct priv *, uint16_t, - uint16_t, unsigned int, - const struct rte_eth_txconf *); -struct mlx5_txq_ctrl *mlx5_priv_txq_get(struct priv *, uint16_t); -int mlx5_priv_txq_release(struct priv *, uint16_t); -int mlx5_priv_txq_releasable(struct priv *, uint16_t); -int mlx5_priv_txq_verify(struct priv *); -void txq_alloc_elts(struct mlx5_txq_ctrl *); +int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + unsigned int socket, const struct rte_eth_txconf *conf); +void mlx5_tx_queue_release(void *dpdk_txq); +int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd); +struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx); +struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx); +int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv); +int mlx5_txq_ibv_releasable(struct mlx5_txq_ibv *txq_ibv); +int mlx5_txq_ibv_verify(struct rte_eth_dev *dev); +struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, + uint16_t desc, unsigned int socket, + const struct rte_eth_txconf *conf); +struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); +int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); +int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); +int mlx5_txq_verify(struct rte_eth_dev *dev); +void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); /* mlx5_rxtx.c */ extern uint32_t mlx5_ptype_table[]; void mlx5_set_ptype_table(void); -uint16_t mlx5_tx_burst(void *, struct rte_mbuf **, uint16_t); -uint16_t mlx5_tx_burst_mpw(void *, struct rte_mbuf **, uint16_t); -uint16_t mlx5_tx_burst_mpw_inline(void *, struct rte_mbuf **, uint16_t); -uint16_t mlx5_tx_burst_empw(void *, struct rte_mbuf **, uint16_t); -uint16_t mlx5_rx_burst(void *, struct rte_mbuf **, uint16_t); -uint16_t removed_tx_burst(void *, struct rte_mbuf **, uint16_t); -uint16_t removed_rx_burst(void *, struct rte_mbuf **, uint16_t); -int mlx5_rx_descriptor_status(void *, uint16_t); -int mlx5_tx_descriptor_status(void *, uint16_t); +uint16_t mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, + uint16_t pkts_n); +uint16_t mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, + uint16_t pkts_n); +uint16_t mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, + uint16_t pkts_n); +uint16_t mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, + uint16_t pkts_n); +uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n); +uint16_t removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, + uint16_t pkts_n); +uint16_t removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, + uint16_t pkts_n); +int mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset); +int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); /* Vectorized version of mlx5_rxtx.c */ -int priv_check_raw_vec_tx_support(struct priv *); -int priv_check_vec_tx_support(struct priv *); -int rxq_check_vec_support(struct mlx5_rxq_data *); -int priv_check_vec_rx_support(struct priv *); -uint16_t mlx5_tx_burst_raw_vec(void *, struct rte_mbuf **, uint16_t); -uint16_t mlx5_tx_burst_vec(void *, struct rte_mbuf **, uint16_t); -uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t); +int mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev); +int mlx5_check_vec_tx_support(struct rte_eth_dev *dev); +int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq_data); +int mlx5_check_vec_rx_support(struct rte_eth_dev *dev); +uint16_t mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts, + uint16_t pkts_n); +uint16_t mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, + uint16_t pkts_n); +uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, + uint16_t pkts_n); /* mlx5_mr.c */ -void mlx5_mp2mr_iter(struct rte_mempool *, void *); -struct mlx5_mr *priv_txq_mp2mr_reg(struct priv *priv, struct mlx5_txq_data *, - struct rte_mempool *, unsigned int); -struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *, struct rte_mempool *, - unsigned int); +void mlx5_mp2mr_iter(struct rte_mempool *mp, void *arg); +struct mlx5_mr *mlx5_txq_mp2mr_reg(struct mlx5_txq_data *txq, + struct rte_mempool *mp, unsigned int idx); #ifndef NDEBUG /** @@ -385,9 +400,10 @@ check_cqe(volatile struct mlx5_cqe *cqe, (syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR)) return 0; if (!check_cqe_seen(cqe)) { - ERROR("unexpected CQE error %u (0x%02x)" - " syndrome 0x%02x", - op_code, op_code, syndrome); + DRV_LOG(ERR, + "unexpected CQE error %u (0x%02x) syndrome" + " 0x%02x", + op_code, op_code, syndrome); rte_hexdump(stderr, "MLX5 Error CQE:", (const void *)((uintptr_t)err_cqe), sizeof(*err_cqe)); @@ -396,8 +412,8 @@ check_cqe(volatile struct mlx5_cqe *cqe, } else if ((op_code != MLX5_CQE_RESP_SEND) && (op_code != MLX5_CQE_REQ)) { if (!check_cqe_seen(cqe)) { - ERROR("unexpected CQE opcode %u (0x%02x)", - op_code, op_code); + DRV_LOG(ERR, "unexpected CQE opcode %u (0x%02x)", + op_code, op_code); rte_hexdump(stderr, "MLX5 CQE:", (const void *)((uintptr_t)cqe), sizeof(*cqe)); @@ -457,7 +473,7 @@ mlx5_tx_complete(struct mlx5_txq_data *txq) if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) || (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) { if (!check_cqe_seen(cqe)) { - ERROR("unexpected error CQE, TX stopped"); + DRV_LOG(ERR, "unexpected error CQE, Tx stopped"); rte_hexdump(stderr, "MLX5 TXQ:", (const void *)((uintptr_t)txq->wqes), ((1 << txq->wqe_n) * @@ -558,8 +574,6 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb) if (txq->mp2mr[i]->start <= addr && txq->mp2mr[i]->end > addr) { assert(txq->mp2mr[i]->lkey != (uint32_t)-1); - assert(rte_cpu_to_be_32(txq->mp2mr[i]->mr->lkey) == - txq->mp2mr[i]->lkey); txq->mr_cache_idx = i; return txq->mp2mr[i]->lkey; } @@ -573,6 +587,11 @@ mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb) rte_atomic32_inc(&mr->refcnt); txq->mr_cache_idx = i >= RTE_DIM(txq->mp2mr) ? i - 1 : i; return mr->lkey; + } else { + struct rte_mempool *mp = mlx5_tx_mb2mp(mb); + + DRV_LOG(WARNING, "failed to register mempool 0x%p(%s)", + (void *)mp, mp->name); } return (uint32_t)-1; } diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c index 101aa156..982b8f1f 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec.c +++ b/drivers/net/mlx5/mlx5_rxtx_vec.c @@ -276,15 +276,16 @@ mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) /** * Check Tx queue flags are set for raw vectorized Tx. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * * @return * 1 if supported, negative errno value if not. */ int __attribute__((cold)) -priv_check_raw_vec_tx_support(struct priv *priv) +mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; uint16_t i; /* All the configured queues should support. */ @@ -303,15 +304,17 @@ priv_check_raw_vec_tx_support(struct priv *priv) /** * Check a device can support vectorized TX. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * * @return * 1 if supported, negative errno value if not. */ int __attribute__((cold)) -priv_check_vec_tx_support(struct priv *priv) +mlx5_check_vec_tx_support(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; + if (!priv->tx_vec_en || priv->txqs_n > MLX5_VPMD_MIN_TXQS || priv->mps != MLX5_MPW_ENHANCED || @@ -330,7 +333,7 @@ priv_check_vec_tx_support(struct priv *priv) * 1 if supported, negative errno value if not. */ int __attribute__((cold)) -rxq_check_vec_support(struct mlx5_rxq_data *rxq) +mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq) { struct mlx5_rxq_ctrl *ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); @@ -343,15 +346,16 @@ rxq_check_vec_support(struct mlx5_rxq_data *rxq) /** * Check a device can support vectorized RX. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * * @return * 1 if supported, negative errno value if not. */ int __attribute__((cold)) -priv_check_vec_rx_support(struct priv *priv) +mlx5_check_vec_rx_support(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; uint16_t i; if (!priv->rx_vec_en) @@ -362,7 +366,7 @@ priv_check_vec_rx_support(struct priv *priv) if (!rxq) continue; - if (rxq_check_vec_support(rxq) < 0) + if (mlx5_rxq_check_vec_support(rxq) < 0) break; } if (i != priv->rxqs_n) diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h index 06f83ef1..cf424778 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h @@ -193,8 +193,8 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, vst1q_u8((void *)t_wqe, ctrl); /* Fill ESEG in the header. */ vst1q_u16((void *)(t_wqe + 1), - (uint16x8_t) { 0, 0, cs_flags, rte_cpu_to_be_16(len), - 0, 0, 0, 0 }); + ((uint16x8_t) { 0, 0, cs_flags, rte_cpu_to_be_16(len), + 0, 0, 0, 0 })); txq->wqe_ci = wqe_ci; } if (!n) @@ -320,10 +320,10 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n, vst1q_u8((void *)t_wqe, ctrl); /* Fill ESEG in the header. */ vst1q_u8((void *)(t_wqe + 1), - (uint8x16_t) { 0, 0, 0, 0, - cs_flags, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0 }); + ((uint8x16_t) { 0, 0, 0, 0, + cs_flags, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 })); #ifdef MLX5_PMD_SOFT_COUNTERS txq->stats.opackets += pkts_n; #endif @@ -806,6 +806,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, uint16x4_t mask; uint16x4_t byte_cnt; uint32x4_t ptype_info, flow_tag; + register uint64x2_t c0, c1, c2, c3; uint8_t *p0, *p1, *p2, *p3; uint8_t *e0 = (void *)&elts[pos]->pkt_len; uint8_t *e1 = (void *)&elts[pos + 1]->pkt_len; @@ -822,6 +823,16 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, p1 = p0 + (pkts_n - pos > 1) * sizeof(struct mlx5_cqe); p2 = p1 + (pkts_n - pos > 2) * sizeof(struct mlx5_cqe); p3 = p2 + (pkts_n - pos > 3) * sizeof(struct mlx5_cqe); + /* B.0 (CQE 3) load a block having op_own. */ + c3 = vld1q_u64((uint64_t *)(p3 + 48)); + /* B.0 (CQE 2) load a block having op_own. */ + c2 = vld1q_u64((uint64_t *)(p2 + 48)); + /* B.0 (CQE 1) load a block having op_own. */ + c1 = vld1q_u64((uint64_t *)(p1 + 48)); + /* B.0 (CQE 0) load a block having op_own. */ + c0 = vld1q_u64((uint64_t *)(p0 + 48)); + /* Synchronize for loading the rest of blocks. */ + rte_io_rmb(); /* Prefetch next 4 CQEs. */ if (pkts_n - pos >= 2 * MLX5_VPMD_DESCS_PER_LOOP) { unsigned int next = pos + MLX5_VPMD_DESCS_PER_LOOP; @@ -831,50 +842,46 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, rte_prefetch_non_temporal(&cq[next + 3]); } __asm__ volatile ( - /* B.1 (CQE 3) load a block having op_own. */ - "ld1 {v19.16b}, [%[p3]] \n\t" - "sub %[p3], %[p3], #48 \n\t" - /* B.2 (CQE 3) load the rest blocks. */ + /* B.1 (CQE 3) load the rest of blocks. */ "ld1 {v16.16b - v18.16b}, [%[p3]] \n\t" + /* B.2 (CQE 3) move the block having op_own. */ + "mov v19.16b, %[c3].16b \n\t" /* B.3 (CQE 3) extract 16B fields. */ "tbl v23.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t" + /* B.1 (CQE 2) load the rest of blocks. */ + "ld1 {v16.16b - v18.16b}, [%[p2]] \n\t" /* B.4 (CQE 3) adjust CRC length. */ "sub v23.8h, v23.8h, %[crc_adj].8h \n\t" - /* B.1 (CQE 2) load a block having op_own. */ - "ld1 {v19.16b}, [%[p2]] \n\t" - "sub %[p2], %[p2], #48 \n\t" /* C.1 (CQE 3) generate final structure for mbuf. */ "tbl v15.16b, {v23.16b}, %[mb_shuf_m].16b \n\t" - /* B.2 (CQE 2) load the rest blocks. */ - "ld1 {v16.16b - v18.16b}, [%[p2]] \n\t" + /* B.2 (CQE 2) move the block having op_own. */ + "mov v19.16b, %[c2].16b \n\t" /* B.3 (CQE 2) extract 16B fields. */ "tbl v22.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t" + /* B.1 (CQE 1) load the rest of blocks. */ + "ld1 {v16.16b - v18.16b}, [%[p1]] \n\t" /* B.4 (CQE 2) adjust CRC length. */ "sub v22.8h, v22.8h, %[crc_adj].8h \n\t" - /* B.1 (CQE 1) load a block having op_own. */ - "ld1 {v19.16b}, [%[p1]] \n\t" - "sub %[p1], %[p1], #48 \n\t" /* C.1 (CQE 2) generate final structure for mbuf. */ "tbl v14.16b, {v22.16b}, %[mb_shuf_m].16b \n\t" - /* B.2 (CQE 1) load the rest blocks. */ - "ld1 {v16.16b - v18.16b}, [%[p1]] \n\t" + /* B.2 (CQE 1) move the block having op_own. */ + "mov v19.16b, %[c1].16b \n\t" /* B.3 (CQE 1) extract 16B fields. */ "tbl v21.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t" + /* B.1 (CQE 0) load the rest of blocks. */ + "ld1 {v16.16b - v18.16b}, [%[p0]] \n\t" /* B.4 (CQE 1) adjust CRC length. */ "sub v21.8h, v21.8h, %[crc_adj].8h \n\t" - /* B.1 (CQE 0) load a block having op_own. */ - "ld1 {v19.16b}, [%[p0]] \n\t" - "sub %[p0], %[p0], #48 \n\t" /* C.1 (CQE 1) generate final structure for mbuf. */ "tbl v13.16b, {v21.16b}, %[mb_shuf_m].16b \n\t" - /* B.2 (CQE 0) load the rest blocks. */ - "ld1 {v16.16b - v18.16b}, [%[p0]] \n\t" + /* B.2 (CQE 0) move the block having op_own. */ + "mov v19.16b, %[c0].16b \n\t" + /* A.1 load mbuf pointers. */ + "ld1 {v24.2d - v25.2d}, [%[elts_p]] \n\t" /* B.3 (CQE 0) extract 16B fields. */ "tbl v20.16b, {v16.16b - v19.16b}, %[cqe_shuf_m].16b \n\t" /* B.4 (CQE 0) adjust CRC length. */ "sub v20.8h, v20.8h, %[crc_adj].8h \n\t" - /* A.1 load mbuf pointers. */ - "ld1 {v24.2d - v25.2d}, [%[elts_p]] \n\t" /* D.1 extract op_own byte. */ "tbl %[op_own].8b, {v20.16b - v23.16b}, %[owner_shuf_m].8b \n\t" /* C.2 (CQE 3) adjust flow mark. */ @@ -909,9 +916,9 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, [byte_cnt]"=&w"(byte_cnt), [ptype_info]"=&w"(ptype_info), [flow_tag]"=&w"(flow_tag) - :[p3]"r"(p3 + 48), [p2]"r"(p2 + 48), - [p1]"r"(p1 + 48), [p0]"r"(p0 + 48), + :[p3]"r"(p3), [p2]"r"(p2), [p1]"r"(p1), [p0]"r"(p0), [e3]"r"(e3), [e2]"r"(e2), [e1]"r"(e1), [e0]"r"(e0), + [c3]"w"(c3), [c2]"w"(c2), [c1]"w"(c1), [c0]"w"(c0), [elts_p]"r"(elts_p), [pkts_p]"r"(pkts_p), [cqe_shuf_m]"w"(cqe_shuf_m), diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h index 7ef2c59e..79314292 100644 --- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h +++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h @@ -825,7 +825,7 @@ rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, uint16_t pkts_n, /* B.2 copy mbuf pointers. */ _mm_storeu_si128((__m128i *)&pkts[pos], mbp1); _mm_storeu_si128((__m128i *)&pkts[pos + 2], mbp2); - rte_compiler_barrier(); + rte_io_rmb(); /* C.1 load remained CQE data and extract necessary fields. */ cqe_tmp2 = _mm_load_si128((__m128i *)&cq[pos + p3]); cqe_tmp1 = _mm_load_si128((__m128i *)&cq[pos + p2]); diff --git a/drivers/net/mlx5/mlx5_socket.c b/drivers/net/mlx5/mlx5_socket.c index 5cd1ab80..7ab31000 100644 --- a/drivers/net/mlx5/mlx5_socket.c +++ b/drivers/net/mlx5/mlx5_socket.c @@ -45,21 +45,21 @@ /** * Initialise the socket to communicate with the secondary process * - * @param[in] priv - * Pointer to private structure. + * @param[in] dev + * Pointer to Ethernet device. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -priv_socket_init(struct priv *priv) +mlx5_socket_init(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; struct sockaddr_un sun = { .sun_family = AF_UNIX, }; int ret; int flags; - struct stat file_stat; /* * Initialise the socket to communicate with the secondary @@ -67,70 +67,77 @@ priv_socket_init(struct priv *priv) */ ret = socket(AF_UNIX, SOCK_STREAM, 0); if (ret < 0) { - WARN("secondary process not supported: %s", strerror(errno)); - return ret; + rte_errno = errno; + DRV_LOG(WARNING, "port %u secondary process not supported: %s", + dev->data->port_id, strerror(errno)); + goto error; } priv->primary_socket = ret; flags = fcntl(priv->primary_socket, F_GETFL, 0); - if (flags == -1) - goto out; + if (flags == -1) { + rte_errno = errno; + goto error; + } ret = fcntl(priv->primary_socket, F_SETFL, flags | O_NONBLOCK); - if (ret < 0) - goto out; + if (ret < 0) { + rte_errno = errno; + goto error; + } snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d", MLX5_DRIVER_NAME, priv->primary_socket); - ret = stat(sun.sun_path, &file_stat); - if (!ret) - claim_zero(remove(sun.sun_path)); + remove(sun.sun_path); ret = bind(priv->primary_socket, (const struct sockaddr *)&sun, sizeof(sun)); if (ret < 0) { - WARN("cannot bind socket, secondary process not supported: %s", - strerror(errno)); + rte_errno = errno; + DRV_LOG(WARNING, + "port %u cannot bind socket, secondary process not" + " supported: %s", + dev->data->port_id, strerror(errno)); goto close; } ret = listen(priv->primary_socket, 0); if (ret < 0) { - WARN("Secondary process not supported: %s", strerror(errno)); + rte_errno = errno; + DRV_LOG(WARNING, "port %u secondary process not supported: %s", + dev->data->port_id, strerror(errno)); goto close; } - return ret; + return 0; close: remove(sun.sun_path); -out: +error: claim_zero(close(priv->primary_socket)); priv->primary_socket = 0; - return -(ret); + return -rte_errno; } /** * Un-Initialise the socket to communicate with the secondary process * - * @param[in] priv - * Pointer to private structure. - * - * @return - * 0 on success, errno value on failure. + * @param[in] dev */ -int -priv_socket_uninit(struct priv *priv) +void +mlx5_socket_uninit(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; + MKSTR(path, "/var/tmp/%s_%d", MLX5_DRIVER_NAME, priv->primary_socket); claim_zero(close(priv->primary_socket)); priv->primary_socket = 0; claim_zero(remove(path)); - return 0; } /** * Handle socket interrupts. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. */ void -priv_socket_handle(struct priv *priv) +mlx5_socket_handle(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; int conn_sock; int ret = 0; struct cmsghdr *cmsg = NULL; @@ -152,25 +159,30 @@ priv_socket_handle(struct priv *priv) /* Accept the connection from the client. */ conn_sock = accept(priv->primary_socket, NULL, NULL); if (conn_sock < 0) { - WARN("connection failed: %s", strerror(errno)); + DRV_LOG(WARNING, "port %u connection failed: %s", + dev->data->port_id, strerror(errno)); return; } ret = setsockopt(conn_sock, SOL_SOCKET, SO_PASSCRED, &(int){1}, sizeof(int)); if (ret < 0) { - WARN("cannot change socket options"); - goto out; + ret = errno; + DRV_LOG(WARNING, "port %u cannot change socket options: %s", + dev->data->port_id, strerror(rte_errno)); + goto error; } ret = recvmsg(conn_sock, &msg, MSG_WAITALL); if (ret < 0) { - WARN("received an empty message: %s", strerror(errno)); - goto out; + ret = errno; + DRV_LOG(WARNING, "port %u received an empty message: %s", + dev->data->port_id, strerror(rte_errno)); + goto error; } /* Expect to receive credentials only. */ cmsg = CMSG_FIRSTHDR(&msg); if (cmsg == NULL) { - WARN("no message"); - goto out; + DRV_LOG(WARNING, "port %u no message", dev->data->port_id); + goto error; } if ((cmsg->cmsg_type == SCM_CREDENTIALS) && (cmsg->cmsg_len >= sizeof(*cred))) { @@ -179,14 +191,16 @@ priv_socket_handle(struct priv *priv) } cmsg = CMSG_NXTHDR(&msg, cmsg); if (cmsg != NULL) { - WARN("Message wrongly formatted"); - goto out; + DRV_LOG(WARNING, "port %u message wrongly formatted", + dev->data->port_id); + goto error; } /* Make sure all the ancillary data was received and valid. */ if ((cred == NULL) || (cred->uid != getuid()) || (cred->gid != getgid())) { - WARN("wrong credentials"); - goto out; + DRV_LOG(WARNING, "port %u wrong credentials", + dev->data->port_id); + goto error; } /* Set-up the ancillary data. */ cmsg = CMSG_FIRSTHDR(&msg); @@ -198,27 +212,29 @@ priv_socket_handle(struct priv *priv) *fd = priv->ctx->cmd_fd; ret = sendmsg(conn_sock, &msg, 0); if (ret < 0) - WARN("cannot send response"); -out: + DRV_LOG(WARNING, "port %u cannot send response", + dev->data->port_id); +error: close(conn_sock); } /** * Connect to the primary process. * - * @param[in] priv - * Pointer to private structure. + * @param[in] dev + * Pointer to Ethernet structure. * * @return - * fd on success, negative errno value on failure. + * fd on success, negative errno value otherwise and rte_errno is set. */ int -priv_socket_connect(struct priv *priv) +mlx5_socket_connect(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; struct sockaddr_un sun = { .sun_family = AF_UNIX, }; - int socket_fd; + int socket_fd = -1; int *fd = NULL; int ret; struct ucred *cred; @@ -238,57 +254,75 @@ priv_socket_connect(struct priv *priv) ret = socket(AF_UNIX, SOCK_STREAM, 0); if (ret < 0) { - WARN("cannot connect to primary"); - return ret; + rte_errno = errno; + DRV_LOG(WARNING, "port %u cannot connect to primary", + dev->data->port_id); + goto error; } socket_fd = ret; snprintf(sun.sun_path, sizeof(sun.sun_path), "/var/tmp/%s_%d", MLX5_DRIVER_NAME, priv->primary_socket); ret = connect(socket_fd, (const struct sockaddr *)&sun, sizeof(sun)); if (ret < 0) { - WARN("cannot connect to primary"); - goto out; + rte_errno = errno; + DRV_LOG(WARNING, "port %u cannot connect to primary", + dev->data->port_id); + goto error; } cmsg = CMSG_FIRSTHDR(&msg); if (cmsg == NULL) { - DEBUG("cannot get first message"); - goto out; + rte_errno = EINVAL; + DRV_LOG(DEBUG, "port %u cannot get first message", + dev->data->port_id); + goto error; } cmsg->cmsg_level = SOL_SOCKET; cmsg->cmsg_type = SCM_CREDENTIALS; cmsg->cmsg_len = CMSG_LEN(sizeof(*cred)); cred = (struct ucred *)CMSG_DATA(cmsg); if (cred == NULL) { - DEBUG("no credentials received"); - goto out; + rte_errno = EINVAL; + DRV_LOG(DEBUG, "port %u no credentials received", + dev->data->port_id); + goto error; } cred->pid = getpid(); cred->uid = getuid(); cred->gid = getgid(); ret = sendmsg(socket_fd, &msg, MSG_DONTWAIT); if (ret < 0) { - WARN("cannot send credentials to primary: %s", - strerror(errno)); - goto out; + rte_errno = errno; + DRV_LOG(WARNING, + "port %u cannot send credentials to primary: %s", + dev->data->port_id, strerror(errno)); + goto error; } ret = recvmsg(socket_fd, &msg, MSG_WAITALL); if (ret <= 0) { - WARN("no message from primary: %s", strerror(errno)); - goto out; + rte_errno = errno; + DRV_LOG(WARNING, "port %u no message from primary: %s", + dev->data->port_id, strerror(errno)); + goto error; } cmsg = CMSG_FIRSTHDR(&msg); if (cmsg == NULL) { - WARN("No file descriptor received"); - goto out; + rte_errno = EINVAL; + DRV_LOG(WARNING, "port %u no file descriptor received", + dev->data->port_id); + goto error; } fd = (int *)CMSG_DATA(cmsg); - if (*fd <= 0) { - WARN("no file descriptor received: %s", strerror(errno)); - ret = *fd; - goto out; + if (*fd < 0) { + DRV_LOG(WARNING, "port %u no file descriptor received: %s", + dev->data->port_id, strerror(errno)); + rte_errno = *fd; + goto error; } ret = *fd; -out: close(socket_fd); return ret; +error: + if (socket_fd != -1) + close(socket_fd); + return -rte_errno; } diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c index 2427585f..345ed707 100644 --- a/drivers/net/mlx5/mlx5_stats.c +++ b/drivers/net/mlx5/mlx5_stats.c @@ -31,8 +31,11 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <inttypes.h> #include <linux/sockios.h> #include <linux/ethtool.h> +#include <stdint.h> +#include <stdio.h> #include <rte_ethdev.h> #include <rte_common.h> @@ -47,6 +50,7 @@ struct mlx5_counter_ctrl { char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE]; /* Name of the counter on the device table. */ char ctr_name[RTE_ETH_XSTATS_NAME_SIZE]; + uint32_t ib:1; /**< Nonzero for IB counters. */ }; static const struct mlx5_counter_ctrl mlx5_counters_init[] = { @@ -121,6 +125,7 @@ static const struct mlx5_counter_ctrl mlx5_counters_init[] = { { .dpdk_name = "rx_out_of_buffer", .ctr_name = "out_of_buffer", + .ib = 1, }, }; @@ -129,39 +134,56 @@ static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init); /** * Read device counters table. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param[out] stats * Counters table output buffer. * * @return - * 0 on success and stats is filled, negative on error. + * 0 on success and stats is filled, negative errno value otherwise and + * rte_errno is set. */ static int -priv_read_dev_counters(struct priv *priv, uint64_t *stats) +mlx5_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats) { + struct priv *priv = dev->data->dev_private; struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; unsigned int i; struct ifreq ifr; unsigned int stats_sz = xstats_ctrl->stats_n * sizeof(uint64_t); unsigned char et_stat_buf[sizeof(struct ethtool_stats) + stats_sz]; struct ethtool_stats *et_stats = (struct ethtool_stats *)et_stat_buf; + int ret; et_stats->cmd = ETHTOOL_GSTATS; et_stats->n_stats = xstats_ctrl->stats_n; ifr.ifr_data = (caddr_t)et_stats; - if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { - WARN("unable to read statistic values from device"); - return -1; + ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); + if (ret) { + DRV_LOG(WARNING, + "port %u unable to read statistic values from device", + dev->data->port_id); + return ret; } for (i = 0; i != xstats_n; ++i) { - if (priv_is_ib_cntr(mlx5_counters_init[i].ctr_name)) - priv_get_cntr_sysfs(priv, - mlx5_counters_init[i].ctr_name, - &stats[i]); - else + if (mlx5_counters_init[i].ib) { + FILE *file; + MKSTR(path, "%s/ports/1/hw_counters/%s", + priv->ibdev_path, + mlx5_counters_init[i].ctr_name); + + file = fopen(path, "rb"); + if (file) { + int n = fscanf(file, "%" SCNu64, &stats[i]); + + fclose(file); + if (n != 1) + stats[i] = 0; + } + } else { stats[i] = (uint64_t) et_stats->data[xstats_ctrl->dev_table_idx[i]]; + } } return 0; } @@ -169,22 +191,26 @@ priv_read_dev_counters(struct priv *priv, uint64_t *stats) /** * Query the number of statistics provided by ETHTOOL. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * * @return - * Number of statistics on success, -1 on error. + * Number of statistics on success, negative errno value otherwise and + * rte_errno is set. */ static int -priv_ethtool_get_stats_n(struct priv *priv) { +mlx5_ethtool_get_stats_n(struct rte_eth_dev *dev) { struct ethtool_drvinfo drvinfo; struct ifreq ifr; + int ret; drvinfo.cmd = ETHTOOL_GDRVINFO; ifr.ifr_data = (caddr_t)&drvinfo; - if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { - WARN("unable to query number of statistics"); - return -1; + ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); + if (ret) { + DRV_LOG(WARNING, "port %u unable to query number of statistics", + dev->data->port_id); + return ret; } return drvinfo.n_stats; } @@ -192,12 +218,13 @@ priv_ethtool_get_stats_n(struct priv *priv) { /** * Init the structures to read device counters. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. */ void -priv_xstats_init(struct priv *priv) +mlx5_xstats_init(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; unsigned int i; unsigned int j; @@ -205,12 +232,15 @@ priv_xstats_init(struct priv *priv) struct ethtool_gstrings *strings = NULL; unsigned int dev_stats_n; unsigned int str_sz; + int ret; - dev_stats_n = priv_ethtool_get_stats_n(priv); - if (dev_stats_n < 1) { - WARN("no extended statistics available"); + ret = mlx5_ethtool_get_stats_n(dev); + if (ret < 0) { + DRV_LOG(WARNING, "port %u no extended statistics available", + dev->data->port_id); return; } + dev_stats_n = ret; xstats_ctrl->stats_n = dev_stats_n; /* Allocate memory to grab stat names and values. */ str_sz = dev_stats_n * ETH_GSTRING_LEN; @@ -218,15 +248,18 @@ priv_xstats_init(struct priv *priv) rte_malloc("xstats_strings", str_sz + sizeof(struct ethtool_gstrings), 0); if (!strings) { - WARN("unable to allocate memory for xstats"); + DRV_LOG(WARNING, "port %u unable to allocate memory for xstats", + dev->data->port_id); return; } strings->cmd = ETHTOOL_GSTRINGS; strings->string_set = ETH_SS_STATS; strings->len = dev_stats_n; ifr.ifr_data = (caddr_t)strings; - if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { - WARN("unable to get statistic names"); + ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr); + if (ret) { + DRV_LOG(WARNING, "port %u unable to get statistic names", + dev->data->port_id); goto free; } for (j = 0; j != xstats_n; ++j) @@ -244,68 +277,67 @@ priv_xstats_init(struct priv *priv) } } for (j = 0; j != xstats_n; ++j) { - if (priv_is_ib_cntr(mlx5_counters_init[j].ctr_name)) + if (mlx5_counters_init[j].ib) continue; if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) { - WARN("counter \"%s\" is not recognized", - mlx5_counters_init[j].dpdk_name); + DRV_LOG(WARNING, + "port %u counter \"%s\" is not recognized", + dev->data->port_id, + mlx5_counters_init[j].dpdk_name); goto free; } } /* Copy to base at first time. */ assert(xstats_n <= MLX5_MAX_XSTATS); - priv_read_dev_counters(priv, xstats_ctrl->base); + ret = mlx5_read_dev_counters(dev, xstats_ctrl->base); + if (ret) + DRV_LOG(ERR, "port %u cannot read device counters: %s", + dev->data->port_id, strerror(rte_errno)); free: rte_free(strings); } /** - * Get device extended statistics. + * DPDK callback to get extended device statistics. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param[out] stats * Pointer to rte extended stats table. + * @param n + * The size of the stats table. * * @return * Number of extended stats on success and stats is filled, - * negative on error. + * negative on error and rte_errno is set. */ -static int -priv_xstats_get(struct priv *priv, struct rte_eth_xstat *stats) +int +mlx5_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *stats, + unsigned int n) { - struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; + struct priv *priv = dev->data->dev_private; unsigned int i; - unsigned int n = xstats_n; uint64_t counters[n]; - if (priv_read_dev_counters(priv, counters) < 0) - return -1; - for (i = 0; i != xstats_n; ++i) { - stats[i].id = i; - stats[i].value = (counters[i] - xstats_ctrl->base[i]); - } - return n; -} - -/** - * Reset device extended statistics. - * - * @param priv - * Pointer to private structure. - */ -static void -priv_xstats_reset(struct priv *priv) -{ - struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; - unsigned int i; - unsigned int n = xstats_n; - uint64_t counters[n]; + if (n >= xstats_n && stats) { + struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; + int stats_n; + int ret; - if (priv_read_dev_counters(priv, counters) < 0) - return; - for (i = 0; i != n; ++i) - xstats_ctrl->base[i] = counters[i]; + stats_n = mlx5_ethtool_get_stats_n(dev); + if (stats_n < 0) + return stats_n; + if (xstats_ctrl->stats_n != stats_n) + mlx5_xstats_init(dev); + ret = mlx5_read_dev_counters(dev, counters); + if (ret) + return ret; + for (i = 0; i != xstats_n; ++i) { + stats[i].id = i; + stats[i].value = (counters[i] - xstats_ctrl->base[i]); + } + } + return xstats_n; } /** @@ -315,16 +347,19 @@ priv_xstats_reset(struct priv *priv) * Pointer to Ethernet device structure. * @param[out] stats * Stats structure output buffer. + * + * @return + * 0 on success and stats is filled, negative errno value otherwise and + * rte_errno is set. */ int mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; struct rte_eth_stats tmp = {0}; unsigned int i; unsigned int idx; - priv_lock(priv); /* Add software counters. */ for (i = 0; (i != priv->rxqs_n); ++i) { struct mlx5_rxq_data *rxq = (*priv->rxqs)[i]; @@ -370,7 +405,6 @@ mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) /* FIXME: retrieve and add hardware counters. */ #endif *stats = tmp; - priv_unlock(priv); return 0; } @@ -387,7 +421,6 @@ mlx5_stats_reset(struct rte_eth_dev *dev) unsigned int i; unsigned int idx; - priv_lock(priv); for (i = 0; (i != priv->rxqs_n); ++i) { if ((*priv->rxqs)[i] == NULL) continue; @@ -405,45 +438,6 @@ mlx5_stats_reset(struct rte_eth_dev *dev) #ifndef MLX5_PMD_SOFT_COUNTERS /* FIXME: reset hardware counters. */ #endif - priv_unlock(priv); -} - -/** - * DPDK callback to get extended device statistics. - * - * @param dev - * Pointer to Ethernet device structure. - * @param[out] stats - * Stats table output buffer. - * @param n - * The size of the stats table. - * - * @return - * Number of xstats on success, negative on failure. - */ -int -mlx5_xstats_get(struct rte_eth_dev *dev, - struct rte_eth_xstat *stats, unsigned int n) -{ - struct priv *priv = mlx5_get_priv(dev); - int ret = xstats_n; - - if (n >= xstats_n && stats) { - struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; - int stats_n; - - priv_lock(priv); - stats_n = priv_ethtool_get_stats_n(priv); - if (stats_n < 0) { - priv_unlock(priv); - return -1; - } - if (xstats_ctrl->stats_n != stats_n) - priv_xstats_init(priv); - ret = priv_xstats_get(priv, stats); - priv_unlock(priv); - } - return ret; } /** @@ -455,19 +449,30 @@ mlx5_xstats_get(struct rte_eth_dev *dev, void mlx5_xstats_reset(struct rte_eth_dev *dev) { - struct priv *priv = mlx5_get_priv(dev); + struct priv *priv = dev->data->dev_private; struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; int stats_n; + unsigned int i; + unsigned int n = xstats_n; + uint64_t counters[n]; + int ret; - priv_lock(priv); - stats_n = priv_ethtool_get_stats_n(priv); - if (stats_n < 0) - goto unlock; + stats_n = mlx5_ethtool_get_stats_n(dev); + if (stats_n < 0) { + DRV_LOG(ERR, "port %u cannot get stats: %s", dev->data->port_id, + strerror(-stats_n)); + return; + } if (xstats_ctrl->stats_n != stats_n) - priv_xstats_init(priv); - priv_xstats_reset(priv); -unlock: - priv_unlock(priv); + mlx5_xstats_init(dev); + ret = mlx5_read_dev_counters(dev, counters); + if (ret) { + DRV_LOG(ERR, "port %u cannot read device counters: %s", + dev->data->port_id, strerror(rte_errno)); + return; + } + for (i = 0; i != n; ++i) + xstats_ctrl->base[i] = counters[i]; } /** @@ -484,21 +489,18 @@ unlock: * Number of xstats names. */ int -mlx5_xstats_get_names(struct rte_eth_dev *dev, - struct rte_eth_xstat_name *xstats_names, unsigned int n) +mlx5_xstats_get_names(struct rte_eth_dev *dev __rte_unused, + struct rte_eth_xstat_name *xstats_names, unsigned int n) { - struct priv *priv = mlx5_get_priv(dev); unsigned int i; if (n >= xstats_n && xstats_names) { - priv_lock(priv); for (i = 0; i != xstats_n; ++i) { strncpy(xstats_names[i].name, mlx5_counters_init[i].dpdk_name, RTE_ETH_XSTATS_NAME_SIZE); xstats_names[i].name[RTE_ETH_XSTATS_NAME_SIZE - 1] = 0; } - priv_unlock(priv); } return xstats_n; } diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c index d682ea2c..214543f8 100644 --- a/drivers/net/mlx5/mlx5_trigger.c +++ b/drivers/net/mlx5/mlx5_trigger.c @@ -41,80 +41,119 @@ #include "mlx5_rxtx.h" #include "mlx5_utils.h" +/** + * Stop traffic on Tx queues. + * + * @param dev + * Pointer to Ethernet device structure. + */ static void -priv_txq_stop(struct priv *priv) +mlx5_txq_stop(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; unsigned int i; for (i = 0; i != priv->txqs_n; ++i) - mlx5_priv_txq_release(priv, i); + mlx5_txq_release(dev, i); } +/** + * Start traffic on Tx queues. + * + * @param dev + * Pointer to Ethernet device structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ static int -priv_txq_start(struct priv *priv) +mlx5_txq_start(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; unsigned int i; - int ret = 0; + int ret; /* Add memory regions to Tx queues. */ for (i = 0; i != priv->txqs_n; ++i) { unsigned int idx = 0; struct mlx5_mr *mr; - struct mlx5_txq_ctrl *txq_ctrl = mlx5_priv_txq_get(priv, i); + struct mlx5_txq_ctrl *txq_ctrl = mlx5_txq_get(dev, i); if (!txq_ctrl) continue; LIST_FOREACH(mr, &priv->mr, next) { - priv_txq_mp2mr_reg(priv, &txq_ctrl->txq, mr->mp, idx++); + mlx5_txq_mp2mr_reg(&txq_ctrl->txq, mr->mp, idx++); if (idx == MLX5_PMD_TX_MP_CACHE) break; } txq_alloc_elts(txq_ctrl); - txq_ctrl->ibv = mlx5_priv_txq_ibv_new(priv, i); + txq_ctrl->ibv = mlx5_txq_ibv_new(dev, i); if (!txq_ctrl->ibv) { - ret = ENOMEM; + rte_errno = ENOMEM; goto error; } } - return -ret; + ret = mlx5_tx_uar_remap(dev, priv->ctx->cmd_fd); + if (ret) + goto error; + return 0; error: - priv_txq_stop(priv); - return -ret; + ret = rte_errno; /* Save rte_errno before cleanup. */ + mlx5_txq_stop(dev); + rte_errno = ret; /* Restore rte_errno. */ + return -rte_errno; } +/** + * Stop traffic on Rx queues. + * + * @param dev + * Pointer to Ethernet device structure. + */ static void -priv_rxq_stop(struct priv *priv) +mlx5_rxq_stop(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; unsigned int i; for (i = 0; i != priv->rxqs_n; ++i) - mlx5_priv_rxq_release(priv, i); + mlx5_rxq_release(dev, i); } +/** + * Start traffic on Rx queues. + * + * @param dev + * Pointer to Ethernet device structure. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ static int -priv_rxq_start(struct priv *priv) +mlx5_rxq_start(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; unsigned int i; int ret = 0; for (i = 0; i != priv->rxqs_n; ++i) { - struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_priv_rxq_get(priv, i); + struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_get(dev, i); if (!rxq_ctrl) continue; ret = rxq_alloc_elts(rxq_ctrl); if (ret) goto error; - rxq_ctrl->ibv = mlx5_priv_rxq_ibv_new(priv, i); - if (!rxq_ctrl->ibv) { - ret = ENOMEM; + rxq_ctrl->ibv = mlx5_rxq_ibv_new(dev, i); + if (!rxq_ctrl->ibv) goto error; - } } - return -ret; + return 0; error: - priv_rxq_stop(priv); - return -ret; + ret = rte_errno; /* Save rte_errno before cleanup. */ + mlx5_rxq_stop(dev); + rte_errno = ret; /* Restore rte_errno. */ + return -rte_errno; } /** @@ -126,68 +165,73 @@ error: * Pointer to Ethernet device structure. * * @return - * 0 on success, negative errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_dev_start(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; struct mlx5_mr *mr = NULL; - int err; + int ret; dev->data->dev_started = 1; - priv_lock(priv); - err = priv_flow_create_drop_queue(priv); - if (err) { - ERROR("%p: Drop queue allocation failed: %s", - (void *)dev, strerror(err)); + ret = mlx5_flow_create_drop_queue(dev); + if (ret) { + DRV_LOG(ERR, "port %u drop queue allocation failed: %s", + dev->data->port_id, strerror(rte_errno)); goto error; } - DEBUG("%p: allocating and configuring hash RX queues", (void *)dev); + DRV_LOG(DEBUG, "port %u allocating and configuring hash Rx queues", + dev->data->port_id); rte_mempool_walk(mlx5_mp2mr_iter, priv); - err = priv_txq_start(priv); - if (err) { - ERROR("%p: TXQ allocation failed: %s", - (void *)dev, strerror(err)); + ret = mlx5_txq_start(dev); + if (ret) { + DRV_LOG(ERR, "port %u Tx queue allocation failed: %s", + dev->data->port_id, strerror(rte_errno)); + goto error; + } + ret = mlx5_rxq_start(dev); + if (ret) { + DRV_LOG(ERR, "port %u Rx queue allocation failed: %s", + dev->data->port_id, strerror(rte_errno)); goto error; } - err = priv_rxq_start(priv); - if (err) { - ERROR("%p: RXQ allocation failed: %s", - (void *)dev, strerror(err)); + ret = mlx5_rx_intr_vec_enable(dev); + if (ret) { + DRV_LOG(ERR, "port %u Rx interrupt vector creation failed", + dev->data->port_id); goto error; } - err = priv_rx_intr_vec_enable(priv); - if (err) { - ERROR("%p: RX interrupt vector creation failed", - (void *)priv); + mlx5_xstats_init(dev); + ret = mlx5_traffic_enable(dev); + if (ret) { + DRV_LOG(DEBUG, "port %u failed to set defaults flows", + dev->data->port_id); goto error; } - priv_xstats_init(priv); - /* Update link status and Tx/Rx callbacks for the first time. */ - memset(&dev->data->dev_link, 0, sizeof(struct rte_eth_link)); - INFO("Forcing port %u link to be up", dev->data->port_id); - err = priv_force_link_status_change(priv, ETH_LINK_UP); - if (err) { - DEBUG("Failed to set port %u link to be up", - dev->data->port_id); + ret = mlx5_flow_start(dev, &priv->flows); + if (ret) { + DRV_LOG(DEBUG, "port %u failed to set flows", + dev->data->port_id); goto error; } - priv_dev_interrupt_handler_install(priv, dev); - priv_unlock(priv); + dev->tx_pkt_burst = mlx5_select_tx_function(dev); + dev->rx_pkt_burst = mlx5_select_rx_function(dev); + mlx5_dev_interrupt_handler_install(dev); return 0; error: + ret = rte_errno; /* Save rte_errno before cleanup. */ /* Rollback. */ dev->data->dev_started = 0; for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr)) - priv_mr_release(priv, mr); - priv_flow_stop(priv, &priv->flows); - priv_dev_traffic_disable(priv, dev); - priv_txq_stop(priv); - priv_rxq_stop(priv); - priv_flow_delete_drop_queue(priv); - priv_unlock(priv); - return err; + mlx5_mr_release(mr); + mlx5_flow_stop(dev, &priv->flows); + mlx5_traffic_disable(dev); + mlx5_txq_stop(dev); + mlx5_rxq_stop(dev); + mlx5_flow_delete_drop_queue(dev); + rte_errno = ret; /* Restore rte_errno. */ + return -rte_errno; } /** @@ -204,40 +248,40 @@ mlx5_dev_stop(struct rte_eth_dev *dev) struct priv *priv = dev->data->dev_private; struct mlx5_mr *mr; - priv_lock(priv); dev->data->dev_started = 0; /* Prevent crashes when queues are still in use. */ dev->rx_pkt_burst = removed_rx_burst; dev->tx_pkt_burst = removed_tx_burst; rte_wmb(); usleep(1000 * priv->rxqs_n); - DEBUG("%p: cleaning up and destroying hash RX queues", (void *)dev); - priv_flow_stop(priv, &priv->flows); - priv_dev_traffic_disable(priv, dev); - priv_rx_intr_vec_disable(priv); - priv_dev_interrupt_handler_uninstall(priv, dev); - priv_txq_stop(priv); - priv_rxq_stop(priv); + DRV_LOG(DEBUG, "port %u cleaning up and destroying hash Rx queues", + dev->data->port_id); + mlx5_flow_stop(dev, &priv->flows); + mlx5_traffic_disable(dev); + mlx5_rx_intr_vec_disable(dev); + mlx5_dev_interrupt_handler_uninstall(dev); + mlx5_txq_stop(dev); + mlx5_rxq_stop(dev); for (mr = LIST_FIRST(&priv->mr); mr; mr = LIST_FIRST(&priv->mr)) - priv_mr_release(priv, mr); - priv_flow_delete_drop_queue(priv); - priv_unlock(priv); + mlx5_mr_release(mr); + mlx5_flow_delete_drop_queue(dev); } /** * Enable traffic flows configured by control plane * - * @param priv + * @param dev * Pointer to Ethernet device private data. * @param dev * Pointer to Ethernet device structure. * * @return - * 0 on success. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -priv_dev_traffic_enable(struct priv *priv, struct rte_eth_dev *dev) +mlx5_traffic_enable(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; struct rte_flow_item_eth bcast = { .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", }; @@ -270,8 +314,9 @@ priv_dev_traffic_enable(struct priv *priv, struct rte_eth_dev *dev) .type = 0, }; - claim_zero(mlx5_ctrl_flow(dev, &promisc, &promisc)); - return 0; + ret = mlx5_ctrl_flow(dev, &promisc, &promisc); + if (ret) + goto error; } if (dev->data->all_multicast) { struct rte_flow_item_eth multicast = { @@ -280,7 +325,9 @@ priv_dev_traffic_enable(struct priv *priv, struct rte_eth_dev *dev) .type = 0, }; - claim_zero(mlx5_ctrl_flow(dev, &multicast, &multicast)); + ret = mlx5_ctrl_flow(dev, &multicast, &multicast); + if (ret) + goto error; } else { /* Add broadcast/multicast flows. */ for (i = 0; i != vlan_filter_n; ++i) { @@ -340,74 +387,49 @@ priv_dev_traffic_enable(struct priv *priv, struct rte_eth_dev *dev) goto error; } if (!vlan_filter_n) { - ret = mlx5_ctrl_flow(dev, &unicast, - &unicast_mask); + ret = mlx5_ctrl_flow(dev, &unicast, &unicast_mask); if (ret) goto error; } } return 0; error: - return rte_errno; + ret = rte_errno; /* Save rte_errno before cleanup. */ + mlx5_flow_list_flush(dev, &priv->ctrl_flows); + rte_errno = ret; /* Restore rte_errno. */ + return -rte_errno; } /** * Disable traffic flows configured by control plane * - * @param priv - * Pointer to Ethernet device private data. * @param dev - * Pointer to Ethernet device structure. - * - * @return - * 0 on success. - */ -int -priv_dev_traffic_disable(struct priv *priv, struct rte_eth_dev *dev) -{ - (void)dev; - priv_flow_flush(priv, &priv->ctrl_flows); - return 0; -} - -/** - * Restart traffic flows configured by control plane - * - * @param priv * Pointer to Ethernet device private data. - * @param dev - * Pointer to Ethernet device structure. - * - * @return - * 0 on success. */ -int -priv_dev_traffic_restart(struct priv *priv, struct rte_eth_dev *dev) +void +mlx5_traffic_disable(struct rte_eth_dev *dev) { - if (dev->data->dev_started) { - priv_dev_traffic_disable(priv, dev); - priv_dev_traffic_enable(priv, dev); - } - return 0; + struct priv *priv = dev->data->dev_private; + + mlx5_flow_list_flush(dev, &priv->ctrl_flows); } /** * Restart traffic flows configured by control plane * * @param dev - * Pointer to Ethernet device structure. + * Pointer to Ethernet device private data. * * @return - * 0 on success. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_traffic_restart(struct rte_eth_dev *dev) { - struct priv *priv = dev->data->dev_private; - - priv_lock(priv); - priv_dev_traffic_restart(priv, dev); - priv_unlock(priv); + if (dev->data->dev_started) { + mlx5_traffic_disable(dev); + return mlx5_traffic_enable(dev); + } return 0; } diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c index 7ca99f5a..a5c6b585 100644 --- a/drivers/net/mlx5/mlx5_txq.c +++ b/drivers/net/mlx5/mlx5_txq.c @@ -74,7 +74,8 @@ txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl) for (i = 0; (i != elts_n); ++i) (*txq_ctrl->txq.elts)[i] = NULL; - DEBUG("%p: allocated and configured %u WRs", (void *)txq_ctrl, elts_n); + DRV_LOG(DEBUG, "port %u Tx queue %u allocated and configured %u WRs", + PORT_ID(txq_ctrl->priv), txq_ctrl->idx, elts_n); txq_ctrl->txq.elts_head = 0; txq_ctrl->txq.elts_tail = 0; txq_ctrl->txq.elts_comp = 0; @@ -95,7 +96,8 @@ txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl) uint16_t elts_tail = txq_ctrl->txq.elts_tail; struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts; - DEBUG("%p: freeing WRs", (void *)txq_ctrl); + DRV_LOG(DEBUG, "port %u Tx queue %u freeing WRs", + PORT_ID(txq_ctrl->priv), txq_ctrl->idx); txq_ctrl->txq.elts_head = 0; txq_ctrl->txq.elts_tail = 0; txq_ctrl->txq.elts_comp = 0; @@ -130,7 +132,7 @@ txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl) * Thresholds parameters. * * @return - * 0 on success, negative errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, @@ -140,50 +142,47 @@ mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, struct mlx5_txq_data *txq = (*priv->txqs)[idx]; struct mlx5_txq_ctrl *txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); - int ret = 0; - priv_lock(priv); if (desc <= MLX5_TX_COMP_THRESH) { - WARN("%p: number of descriptors requested for TX queue %u" - " must be higher than MLX5_TX_COMP_THRESH, using" - " %u instead of %u", - (void *)dev, idx, MLX5_TX_COMP_THRESH + 1, desc); + DRV_LOG(WARNING, + "port %u number of descriptors requested for Tx queue" + " %u must be higher than MLX5_TX_COMP_THRESH, using %u" + " instead of %u", + dev->data->port_id, idx, MLX5_TX_COMP_THRESH + 1, desc); desc = MLX5_TX_COMP_THRESH + 1; } if (!rte_is_power_of_2(desc)) { desc = 1 << log2above(desc); - WARN("%p: increased number of descriptors in TX queue %u" - " to the next power of two (%d)", - (void *)dev, idx, desc); + DRV_LOG(WARNING, + "port %u increased number of descriptors in Tx queue" + " %u to the next power of two (%d)", + dev->data->port_id, idx, desc); } - DEBUG("%p: configuring queue %u for %u descriptors", - (void *)dev, idx, desc); + DRV_LOG(DEBUG, "port %u configuring queue %u for %u descriptors", + dev->data->port_id, idx, desc); if (idx >= priv->txqs_n) { - ERROR("%p: queue index out of range (%u >= %u)", - (void *)dev, idx, priv->txqs_n); - priv_unlock(priv); - return -EOVERFLOW; + DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)", + dev->data->port_id, idx, priv->txqs_n); + rte_errno = EOVERFLOW; + return -rte_errno; } - if (!mlx5_priv_txq_releasable(priv, idx)) { - ret = EBUSY; - ERROR("%p: unable to release queue index %u", - (void *)dev, idx); - goto out; + if (!mlx5_txq_releasable(dev, idx)) { + rte_errno = EBUSY; + DRV_LOG(ERR, "port %u unable to release queue index %u", + dev->data->port_id, idx); + return -rte_errno; } - mlx5_priv_txq_release(priv, idx); - txq_ctrl = mlx5_priv_txq_new(priv, idx, desc, socket, conf); + mlx5_txq_release(dev, idx); + txq_ctrl = mlx5_txq_new(dev, idx, desc, socket, conf); if (!txq_ctrl) { - ERROR("%p: unable to allocate queue index %u", - (void *)dev, idx); - ret = ENOMEM; - goto out; + DRV_LOG(ERR, "port %u unable to allocate queue index %u", + dev->data->port_id, idx); + return -rte_errno; } - DEBUG("%p: adding TX queue %p to list", - (void *)dev, (void *)txq_ctrl); + DRV_LOG(DEBUG, "port %u adding Tx queue %u to list", + dev->data->port_id, idx); (*priv->txqs)[idx] = &txq_ctrl->txq; -out: - priv_unlock(priv); - return -ret; + return 0; } /** @@ -204,37 +203,40 @@ mlx5_tx_queue_release(void *dpdk_txq) return; txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); priv = txq_ctrl->priv; - priv_lock(priv); for (i = 0; (i != priv->txqs_n); ++i) if ((*priv->txqs)[i] == txq) { - DEBUG("%p: removing TX queue %p from list", - (void *)priv->dev, (void *)txq_ctrl); - mlx5_priv_txq_release(priv, i); + mlx5_txq_release(ETH_DEV(priv), i); + DRV_LOG(DEBUG, "port %u removing Tx queue %u from list", + PORT_ID(priv), txq_ctrl->idx); break; } - priv_unlock(priv); } /** - * Map locally UAR used in Tx queues for BlueFlame doorbell. + * Mmap TX UAR(HW doorbell) pages into reserved UAR address space. + * Both primary and secondary process do mmap to make UAR address + * aligned. * - * @param[in] priv - * Pointer to private structure. + * @param[in] dev + * Pointer to Ethernet device. * @param fd * Verbs file descriptor to map UAR pages. * * @return - * 0 on success, errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int -priv_tx_uar_remap(struct priv *priv, int fd) +mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd) { + struct priv *priv = dev->data->dev_private; unsigned int i, j; uintptr_t pages[priv->txqs_n]; unsigned int pages_n = 0; uintptr_t uar_va; + uintptr_t off; void *addr; + void *ret; struct mlx5_txq_data *txq; struct mlx5_txq_ctrl *txq_ctrl; int already_mapped; @@ -251,8 +253,11 @@ priv_tx_uar_remap(struct priv *priv, int fd) continue; txq = (*priv->txqs)[i]; txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); - uar_va = (uintptr_t)txq_ctrl->txq.bf_reg; - uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); + assert(txq_ctrl->idx == (uint16_t)i); + /* UAR addr form verbs used to find dup and offset in page. */ + uar_va = (uintptr_t)txq_ctrl->bf_reg_orig; + off = uar_va & (page_size - 1); /* offset in page. */ + uar_va = RTE_ALIGN_FLOOR(uar_va, page_size); /* page addr. */ already_mapped = 0; for (j = 0; j != pages_n; ++j) { if (pages[j] == uar_va) { @@ -260,16 +265,32 @@ priv_tx_uar_remap(struct priv *priv, int fd) break; } } - if (already_mapped) - continue; - pages[pages_n++] = uar_va; - addr = mmap((void *)uar_va, page_size, - PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, - txq_ctrl->uar_mmap_offset); - if (addr != (void *)uar_va) { - ERROR("call to mmap failed on UAR for txq %d\n", i); - return -1; + /* new address in reserved UAR address space. */ + addr = RTE_PTR_ADD(priv->uar_base, + uar_va & (MLX5_UAR_SIZE - 1)); + if (!already_mapped) { + pages[pages_n++] = uar_va; + /* fixed mmap to specified address in reserved + * address space. + */ + ret = mmap(addr, page_size, + PROT_WRITE, MAP_FIXED | MAP_SHARED, fd, + txq_ctrl->uar_mmap_offset); + if (ret != addr) { + /* fixed mmap have to return same address */ + DRV_LOG(ERR, + "port %u call to mmap failed on UAR" + " for txq %u", + dev->data->port_id, txq_ctrl->idx); + rte_errno = ENXIO; + return -rte_errno; + } } + if (rte_eal_process_type() == RTE_PROC_PRIMARY) /* save once */ + txq_ctrl->txq.bf_reg = RTE_PTR_ADD((void *)addr, off); + else + assert(txq_ctrl->txq.bf_reg == + RTE_PTR_ADD((void *)addr, off)); } return 0; } @@ -277,17 +298,18 @@ priv_tx_uar_remap(struct priv *priv, int fd) /** * Create the Tx queue Verbs object. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param idx * Queue index in DPDK Rx queue array * * @return - * The Verbs object initialised if it can be created. + * The Verbs object initialised, NULL otherwise and rte_errno is set. */ -struct mlx5_txq_ibv* -mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) +struct mlx5_txq_ibv * +mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx) { + struct priv *priv = dev->data->dev_private; struct mlx5_txq_data *txq_data = (*priv->txqs)[idx]; struct mlx5_txq_ctrl *txq_ctrl = container_of(txq_data, struct mlx5_txq_ctrl, txq); @@ -307,9 +329,14 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) int ret = 0; assert(txq_data); + priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_TX_QUEUE; + priv->verbs_alloc_ctx.obj = txq_ctrl; if (mlx5_getenv_int("MLX5_ENABLE_CQE_COMPRESSION")) { - ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set"); - goto error; + DRV_LOG(ERR, + "port %u MLX5_ENABLE_CQE_COMPRESSION must never be set", + dev->data->port_id); + rte_errno = EINVAL; + return NULL; } memset(&tmpl, 0, sizeof(struct mlx5_txq_ibv)); /* MRs will be registered in mp2mr[] later. */ @@ -322,7 +349,9 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV; tmpl.cq = ibv_create_cq(priv->ctx, cqe_n, NULL, NULL, 0); if (tmpl.cq == NULL) { - ERROR("%p: CQ creation failure", (void *)txq_ctrl); + DRV_LOG(ERR, "port %u Tx queue %u CQ creation failure", + dev->data->port_id, idx); + rte_errno = errno; goto error; } attr.init = (struct ibv_qp_init_attr_ex){ @@ -363,7 +392,9 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) } tmpl.qp = ibv_create_qp_ex(priv->ctx, &attr.init); if (tmpl.qp == NULL) { - ERROR("%p: QP creation failure", (void *)txq_ctrl); + DRV_LOG(ERR, "port %u Tx queue %u QP creation failure", + dev->data->port_id, idx); + rte_errno = errno; goto error; } attr.mod = (struct ibv_qp_attr){ @@ -374,7 +405,10 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) }; ret = ibv_modify_qp(tmpl.qp, &attr.mod, (IBV_QP_STATE | IBV_QP_PORT)); if (ret) { - ERROR("%p: QP state to IBV_QPS_INIT failed", (void *)txq_ctrl); + DRV_LOG(ERR, + "port %u Tx queue %u QP state to IBV_QPS_INIT failed", + dev->data->port_id, idx); + rte_errno = errno; goto error; } attr.mod = (struct ibv_qp_attr){ @@ -382,19 +416,27 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) }; ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); if (ret) { - ERROR("%p: QP state to IBV_QPS_RTR failed", (void *)txq_ctrl); + DRV_LOG(ERR, + "port %u Tx queue %u QP state to IBV_QPS_RTR failed", + dev->data->port_id, idx); + rte_errno = errno; goto error; } attr.mod.qp_state = IBV_QPS_RTS; ret = ibv_modify_qp(tmpl.qp, &attr.mod, IBV_QP_STATE); if (ret) { - ERROR("%p: QP state to IBV_QPS_RTS failed", (void *)txq_ctrl); + DRV_LOG(ERR, + "port %u Tx queue %u QP state to IBV_QPS_RTS failed", + dev->data->port_id, idx); + rte_errno = errno; goto error; } txq_ibv = rte_calloc_socket(__func__, 1, sizeof(struct mlx5_txq_ibv), 0, txq_ctrl->socket); if (!txq_ibv) { - ERROR("%p: cannot allocate memory", (void *)txq_ctrl); + DRV_LOG(ERR, "port %u Tx queue %u cannot allocate memory", + dev->data->port_id, idx); + rte_errno = ENOMEM; goto error; } obj.cq.in = tmpl.cq; @@ -402,11 +444,16 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) obj.qp.in = tmpl.qp; obj.qp.out = &qp; ret = mlx5dv_init_obj(&obj, MLX5DV_OBJ_CQ | MLX5DV_OBJ_QP); - if (ret != 0) + if (ret != 0) { + rte_errno = errno; goto error; + } if (cq_info.cqe_size != RTE_CACHE_LINE_SIZE) { - ERROR("Wrong MLX5_CQE_SIZE environment variable value: " - "it should be set to %u", RTE_CACHE_LINE_SIZE); + DRV_LOG(ERR, + "port %u wrong MLX5_CQE_SIZE environment variable" + " value: it should be set to %u", + dev->data->port_id, RTE_CACHE_LINE_SIZE); + rte_errno = EINVAL; goto error; } txq_data->cqe_n = log2above(cq_info.cqe_cnt); @@ -414,7 +461,7 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) txq_data->wqes = qp.sq.buf; txq_data->wqe_n = log2above(qp.sq.wqe_cnt); txq_data->qp_db = &qp.dbrec[MLX5_SND_DBR]; - txq_data->bf_reg = qp.bf.reg; + txq_ctrl->bf_reg_orig = qp.bf.reg; txq_data->cq_db = cq_info.dbrec; txq_data->cqes = (volatile struct mlx5_cqe (*)[]) @@ -429,35 +476,45 @@ mlx5_priv_txq_ibv_new(struct priv *priv, uint16_t idx) if (qp.comp_mask & MLX5DV_QP_MASK_UAR_MMAP_OFFSET) { txq_ctrl->uar_mmap_offset = qp.uar_mmap_offset; } else { - ERROR("Failed to retrieve UAR info, invalid libmlx5.so version"); + DRV_LOG(ERR, + "port %u failed to retrieve UAR info, invalid" + " libmlx5.so", + dev->data->port_id); + rte_errno = EINVAL; goto error; } - DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv, - (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt)); + DRV_LOG(DEBUG, "port %u Verbs Tx queue %u: refcnt %d", + dev->data->port_id, idx, rte_atomic32_read(&txq_ibv->refcnt)); LIST_INSERT_HEAD(&priv->txqsibv, txq_ibv, next); + txq_ibv->txq_ctrl = txq_ctrl; + priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; return txq_ibv; error: + ret = rte_errno; /* Save rte_errno before cleanup. */ if (tmpl.cq) claim_zero(ibv_destroy_cq(tmpl.cq)); if (tmpl.qp) claim_zero(ibv_destroy_qp(tmpl.qp)); + priv->verbs_alloc_ctx.type = MLX5_VERBS_ALLOC_TYPE_NONE; + rte_errno = ret; /* Restore rte_errno. */ return NULL; } /** * Get an Tx queue Verbs object. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param idx * Queue index in DPDK Rx queue array * * @return * The Verbs object if it exists. */ -struct mlx5_txq_ibv* -mlx5_priv_txq_ibv_get(struct priv *priv, uint16_t idx) +struct mlx5_txq_ibv * +mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx) { + struct priv *priv = dev->data->dev_private; struct mlx5_txq_ctrl *txq_ctrl; if (idx >= priv->txqs_n) @@ -467,8 +524,8 @@ mlx5_priv_txq_ibv_get(struct priv *priv, uint16_t idx) txq_ctrl = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); if (txq_ctrl->ibv) { rte_atomic32_inc(&txq_ctrl->ibv->refcnt); - DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv, - (void *)txq_ctrl->ibv, + DRV_LOG(DEBUG, "port %u Verbs Tx queue %u: refcnt %d", + dev->data->port_id, txq_ctrl->idx, rte_atomic32_read(&txq_ctrl->ibv->refcnt)); } return txq_ctrl->ibv; @@ -477,21 +534,19 @@ mlx5_priv_txq_ibv_get(struct priv *priv, uint16_t idx) /** * Release an Tx verbs queue object. * - * @param priv - * Pointer to private structure. * @param txq_ibv * Verbs Tx queue object. * * @return - * 0 on success, errno on failure. + * 1 while a reference on it exists, 0 when freed. */ int -mlx5_priv_txq_ibv_release(struct priv *priv, struct mlx5_txq_ibv *txq_ibv) +mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv) { - (void)priv; assert(txq_ibv); - DEBUG("%p: Verbs Tx queue %p: refcnt %d", (void *)priv, - (void *)txq_ibv, rte_atomic32_read(&txq_ibv->refcnt)); + DRV_LOG(DEBUG, "port %u Verbs Tx queue %u: refcnt %d", + PORT_ID(txq_ibv->txq_ctrl->priv), + txq_ibv->txq_ctrl->idx, rte_atomic32_read(&txq_ibv->refcnt)); if (rte_atomic32_dec_and_test(&txq_ibv->refcnt)) { claim_zero(ibv_destroy_qp(txq_ibv->qp)); claim_zero(ibv_destroy_cq(txq_ibv->cq)); @@ -499,21 +554,18 @@ mlx5_priv_txq_ibv_release(struct priv *priv, struct mlx5_txq_ibv *txq_ibv) rte_free(txq_ibv); return 0; } - return EBUSY; + return 1; } /** * Return true if a single reference exists on the object. * - * @param priv - * Pointer to private structure. * @param txq_ibv * Verbs Tx queue object. */ int -mlx5_priv_txq_ibv_releasable(struct priv *priv, struct mlx5_txq_ibv *txq_ibv) +mlx5_txq_ibv_releasable(struct mlx5_txq_ibv *txq_ibv) { - (void)priv; assert(txq_ibv); return (rte_atomic32_read(&txq_ibv->refcnt) == 1); } @@ -521,20 +573,22 @@ mlx5_priv_txq_ibv_releasable(struct priv *priv, struct mlx5_txq_ibv *txq_ibv) /** * Verify the Verbs Tx queue list is empty * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * - * @return the number of object not released. + * @return + * The number of object not released. */ int -mlx5_priv_txq_ibv_verify(struct priv *priv) +mlx5_txq_ibv_verify(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; int ret = 0; struct mlx5_txq_ibv *txq_ibv; LIST_FOREACH(txq_ibv, &priv->txqsibv, next) { - DEBUG("%p: Verbs Tx queue %p still referenced", (void *)priv, - (void *)txq_ibv); + DRV_LOG(DEBUG, "port %u Verbs Tx queue %u still referenced", + dev->data->port_id, txq_ibv->txq_ctrl->idx); ++ret; } return ret; @@ -543,8 +597,8 @@ mlx5_priv_txq_ibv_verify(struct priv *priv) /** * Create a DPDK Tx queue. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param idx * TX queue index. * @param desc @@ -555,13 +609,13 @@ mlx5_priv_txq_ibv_verify(struct priv *priv) * Thresholds parameters. * * @return - * A DPDK queue object on success. + * A DPDK queue object on success, NULL otherwise and rte_errno is set. */ -struct mlx5_txq_ctrl* -mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc, - unsigned int socket, - const struct rte_eth_txconf *conf) +struct mlx5_txq_ctrl * +mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, + unsigned int socket, const struct rte_eth_txconf *conf) { + struct priv *priv = dev->data->dev_private; const unsigned int max_tso_inline = ((MLX5_MAX_TSO_HEADER + (RTE_CACHE_LINE_SIZE - 1)) / RTE_CACHE_LINE_SIZE); @@ -571,20 +625,23 @@ mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc, sizeof(*tmpl) + desc * sizeof(struct rte_mbuf *), 0, socket); - if (!tmpl) + if (!tmpl) { + rte_errno = ENOMEM; return NULL; + } assert(desc > MLX5_TX_COMP_THRESH); tmpl->txq.flags = conf->txq_flags; tmpl->priv = priv; tmpl->socket = socket; tmpl->txq.elts_n = log2above(desc); + tmpl->idx = idx; if (priv->mps == MLX5_MPW_ENHANCED) tmpl->txq.mpw_hdr_dseg = priv->mpw_hdr_dseg; /* MRs will be registered in mp2mr[] later. */ - DEBUG("priv->device_attr.max_qp_wr is %d", - priv->device_attr.orig_attr.max_qp_wr); - DEBUG("priv->device_attr.max_sge is %d", - priv->device_attr.orig_attr.max_sge); + DRV_LOG(DEBUG, "port %u priv->device_attr.max_qp_wr is %d", + dev->data->port_id, priv->device_attr.orig_attr.max_qp_wr); + DRV_LOG(DEBUG, "port %u priv->device_attr.max_sge is %d", + dev->data->port_id, priv->device_attr.orig_attr.max_sge); if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) { unsigned int ds_cnt; @@ -605,17 +662,6 @@ mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc, priv->inline_max_packet_sz) + (RTE_CACHE_LINE_SIZE - 1)) / RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE; - } else if (priv->tso) { - int inline_diff = tmpl->txq.max_inline - max_tso_inline; - - /* - * Adjust inline value as Verbs aggregates - * tso_inline and txq_inline fields. - */ - tmpl->max_inline_data = inline_diff > 0 ? - inline_diff * - RTE_CACHE_LINE_SIZE : - 0; } else { tmpl->max_inline_data = tmpl->txq.max_inline * RTE_CACHE_LINE_SIZE; @@ -635,9 +681,10 @@ mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc, max_inline = max_inline - (max_inline % RTE_CACHE_LINE_SIZE); - WARN("txq inline is too large (%d) setting it to " - "the maximum possible: %d\n", - priv->txq_inline, max_inline); + DRV_LOG(WARNING, + "port %u txq inline is too large (%d) setting it" + " to the maximum possible: %d\n", + PORT_ID(priv), priv->txq_inline, max_inline); tmpl->txq.max_inline = max_inline / RTE_CACHE_LINE_SIZE; } } @@ -653,8 +700,8 @@ mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc, (struct rte_mbuf *(*)[1 << tmpl->txq.elts_n])(tmpl + 1); tmpl->txq.stats.idx = idx; rte_atomic32_inc(&tmpl->refcnt); - DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv, - (void *)tmpl, rte_atomic32_read(&tmpl->refcnt)); + DRV_LOG(DEBUG, "port %u Tx queue %u: refcnt %d", dev->data->port_id, + idx, rte_atomic32_read(&tmpl->refcnt)); LIST_INSERT_HEAD(&priv->txqsctrl, tmpl, next); return tmpl; } @@ -662,17 +709,18 @@ mlx5_priv_txq_new(struct priv *priv, uint16_t idx, uint16_t desc, /** * Get a Tx queue. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param idx * TX queue index. * * @return * A pointer to the queue if it exists. */ -struct mlx5_txq_ctrl* -mlx5_priv_txq_get(struct priv *priv, uint16_t idx) +struct mlx5_txq_ctrl * +mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx) { + struct priv *priv = dev->data->dev_private; struct mlx5_txq_ctrl *ctrl = NULL; if ((*priv->txqs)[idx]) { @@ -680,19 +728,17 @@ mlx5_priv_txq_get(struct priv *priv, uint16_t idx) txq); unsigned int i; - mlx5_priv_txq_ibv_get(priv, idx); + mlx5_txq_ibv_get(dev, idx); for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) { - struct mlx5_mr *mr = NULL; - - (void)mr; - if (ctrl->txq.mp2mr[i]) { - mr = priv_mr_get(priv, ctrl->txq.mp2mr[i]->mp); - assert(mr); - } + if (ctrl->txq.mp2mr[i]) + claim_nonzero + (mlx5_mr_get(dev, + ctrl->txq.mp2mr[i]->mp)); } rte_atomic32_inc(&ctrl->refcnt); - DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv, - (void *)ctrl, rte_atomic32_read(&ctrl->refcnt)); + DRV_LOG(DEBUG, "port %u Tx queue %u refcnt %d", + dev->data->port_id, + ctrl->idx, rte_atomic32_read(&ctrl->refcnt)); } return ctrl; } @@ -700,38 +746,38 @@ mlx5_priv_txq_get(struct priv *priv, uint16_t idx) /** * Release a Tx queue. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param idx * TX queue index. * * @return - * 0 on success, errno on failure. + * 1 while a reference on it exists, 0 when freed. */ int -mlx5_priv_txq_release(struct priv *priv, uint16_t idx) +mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx) { + struct priv *priv = dev->data->dev_private; unsigned int i; struct mlx5_txq_ctrl *txq; + size_t page_size = sysconf(_SC_PAGESIZE); if (!(*priv->txqs)[idx]) return 0; txq = container_of((*priv->txqs)[idx], struct mlx5_txq_ctrl, txq); - DEBUG("%p: Tx queue %p: refcnt %d", (void *)priv, - (void *)txq, rte_atomic32_read(&txq->refcnt)); - if (txq->ibv) { - int ret; - - ret = mlx5_priv_txq_ibv_release(priv, txq->ibv); - if (!ret) - txq->ibv = NULL; - } + DRV_LOG(DEBUG, "port %u Tx queue %u: refcnt %d", dev->data->port_id, + txq->idx, rte_atomic32_read(&txq->refcnt)); + if (txq->ibv && !mlx5_txq_ibv_release(txq->ibv)) + txq->ibv = NULL; for (i = 0; i != MLX5_PMD_TX_MP_CACHE; ++i) { if (txq->txq.mp2mr[i]) { - priv_mr_release(priv, txq->txq.mp2mr[i]); + mlx5_mr_release(txq->txq.mp2mr[i]); txq->txq.mp2mr[i] = NULL; } } + if (priv->uar_base) + munmap((void *)RTE_ALIGN_FLOOR((uintptr_t)txq->txq.bf_reg, + page_size), page_size); if (rte_atomic32_dec_and_test(&txq->refcnt)) { txq_free_elts(txq); LIST_REMOVE(txq, next); @@ -739,14 +785,14 @@ mlx5_priv_txq_release(struct priv *priv, uint16_t idx) (*priv->txqs)[idx] = NULL; return 0; } - return EBUSY; + return 1; } /** * Verify if the queue can be released. * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * @param idx * TX queue index. * @@ -754,8 +800,9 @@ mlx5_priv_txq_release(struct priv *priv, uint16_t idx) * 1 if the queue can be released. */ int -mlx5_priv_txq_releasable(struct priv *priv, uint16_t idx) +mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx) { + struct priv *priv = dev->data->dev_private; struct mlx5_txq_ctrl *txq; if (!(*priv->txqs)[idx]) @@ -767,20 +814,22 @@ mlx5_priv_txq_releasable(struct priv *priv, uint16_t idx) /** * Verify the Tx Queue list is empty * - * @param priv - * Pointer to private structure. + * @param dev + * Pointer to Ethernet device. * - * @return the number of object not released. + * @return + * The number of object not released. */ int -mlx5_priv_txq_verify(struct priv *priv) +mlx5_txq_verify(struct rte_eth_dev *dev) { + struct priv *priv = dev->data->dev_private; struct mlx5_txq_ctrl *txq; int ret = 0; LIST_FOREACH(txq, &priv->txqsctrl, next) { - DEBUG("%p: Tx Queue %p still referenced", (void *)priv, - (void *)txq); + DRV_LOG(DEBUG, "port %u Tx queue %u still referenced", + dev->data->port_id, txq->idx); ++ret; } return ret; diff --git a/drivers/net/mlx5/mlx5_utils.h b/drivers/net/mlx5/mlx5_utils.h index 2fbd10b1..e48352f3 100644 --- a/drivers/net/mlx5/mlx5_utils.h +++ b/drivers/net/mlx5/mlx5_utils.h @@ -89,14 +89,21 @@ pmd_drv_log_basename(const char *s) return s; } +extern int mlx5_logtype; + +#define PMD_DRV_LOG___(level, ...) \ + rte_log(RTE_LOG_ ## level, \ + mlx5_logtype, \ + RTE_FMT(MLX5_DRIVER_NAME ": " \ + RTE_FMT_HEAD(__VA_ARGS__,), \ + RTE_FMT_TAIL(__VA_ARGS__,))) + /* * When debugging is enabled (NDEBUG not defined), file, line and function * information replace the driver name (MLX5_DRIVER_NAME) in log messages. */ #ifndef NDEBUG -#define PMD_DRV_LOG___(level, ...) \ - ERRNO_SAFE(RTE_LOG(level, PMD, __VA_ARGS__)) #define PMD_DRV_LOG__(level, ...) \ PMD_DRV_LOG___(level, "%s:%u: %s(): " __VA_ARGS__) #define PMD_DRV_LOG_(level, s, ...) \ @@ -108,9 +115,6 @@ pmd_drv_log_basename(const char *s) __VA_ARGS__) #else /* NDEBUG */ - -#define PMD_DRV_LOG___(level, ...) \ - ERRNO_SAFE(RTE_LOG(level, PMD, MLX5_DRIVER_NAME ": " __VA_ARGS__)) #define PMD_DRV_LOG__(level, ...) \ PMD_DRV_LOG___(level, __VA_ARGS__) #define PMD_DRV_LOG_(level, s, ...) \ @@ -119,18 +123,15 @@ pmd_drv_log_basename(const char *s) #endif /* NDEBUG */ /* Generic printf()-like logging macro with automatic line feed. */ -#define PMD_DRV_LOG(level, ...) \ +#define DRV_LOG(level, ...) \ PMD_DRV_LOG_(level, \ __VA_ARGS__ PMD_DRV_LOG_STRIP PMD_DRV_LOG_OPAREN, \ PMD_DRV_LOG_CPAREN) -/* - * Like assert(), DEBUG() becomes a no-op and claim_zero() does not perform - * any check when debugging is disabled. - */ +/* claim_zero() does not perform any check when debugging is disabled. */ #ifndef NDEBUG -#define DEBUG(...) PMD_DRV_LOG(DEBUG, __VA_ARGS__) +#define DEBUG(...) DRV_LOG(DEBUG, __VA_ARGS__) #define claim_zero(...) assert((__VA_ARGS__) == 0) #define claim_nonzero(...) assert((__VA_ARGS__) != 0) @@ -142,9 +143,9 @@ pmd_drv_log_basename(const char *s) #endif /* NDEBUG */ -#define INFO(...) PMD_DRV_LOG(INFO, __VA_ARGS__) -#define WARN(...) PMD_DRV_LOG(WARNING, __VA_ARGS__) -#define ERROR(...) PMD_DRV_LOG(ERR, __VA_ARGS__) +#define INFO(...) DRV_LOG(INFO, __VA_ARGS__) +#define WARN(...) DRV_LOG(WARNING, __VA_ARGS__) +#define ERROR(...) DRV_LOG(ERR, __VA_ARGS__) /* Convenience macros for accessing mbuf fields. */ #define NEXT(m) ((m)->next) diff --git a/drivers/net/mlx5/mlx5_vlan.c b/drivers/net/mlx5/mlx5_vlan.c index 198a69e3..dbfa8a0c 100644 --- a/drivers/net/mlx5/mlx5_vlan.c +++ b/drivers/net/mlx5/mlx5_vlan.c @@ -54,26 +54,24 @@ * Toggle filter. * * @return - * 0 on success, negative errno value on failure. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) { struct priv *priv = dev->data->dev_private; unsigned int i; - int ret = 0; - priv_lock(priv); - DEBUG("%p: %s VLAN filter ID %" PRIu16, - (void *)dev, (on ? "enable" : "disable"), vlan_id); + DRV_LOG(DEBUG, "port %u %s VLAN filter ID %" PRIu16, + dev->data->port_id, (on ? "enable" : "disable"), vlan_id); assert(priv->vlan_filter_n <= RTE_DIM(priv->vlan_filter)); for (i = 0; (i != priv->vlan_filter_n); ++i) if (priv->vlan_filter[i] == vlan_id) break; /* Check if there's room for another VLAN filter. */ if (i == RTE_DIM(priv->vlan_filter)) { - ret = -ENOMEM; - goto out; + rte_errno = ENOMEM; + return -rte_errno; } if (i < priv->vlan_filter_n) { assert(priv->vlan_filter_n != 0); @@ -96,37 +94,49 @@ mlx5_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) priv->vlan_filter[priv->vlan_filter_n] = vlan_id; ++priv->vlan_filter_n; } - if (dev->data->dev_started) - priv_dev_traffic_restart(priv, dev); out: - priv_unlock(priv); - return ret; + if (dev->data->dev_started) + return mlx5_traffic_restart(dev); + return 0; } /** - * Set/reset VLAN stripping for a specific queue. + * Callback to set/reset VLAN stripping for a specific queue. * - * @param priv - * Pointer to private structure. - * @param idx + * @param dev + * Pointer to Ethernet device structure. + * @param queue * RX queue index. * @param on * Enable/disable VLAN stripping. */ -static void -priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on) +void +mlx5_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on) { - struct mlx5_rxq_data *rxq = (*priv->rxqs)[idx]; + struct priv *priv = dev->data->dev_private; + struct mlx5_rxq_data *rxq = (*priv->rxqs)[queue]; struct mlx5_rxq_ctrl *rxq_ctrl = container_of(rxq, struct mlx5_rxq_ctrl, rxq); struct ibv_wq_attr mod; uint16_t vlan_offloads = (on ? IBV_WQ_FLAGS_CVLAN_STRIPPING : 0) | 0; - int err; + int ret; - DEBUG("set VLAN offloads 0x%x for port %d queue %d", - vlan_offloads, rxq->port_id, idx); + /* Validate hw support */ + if (!priv->hw_vlan_strip) { + DRV_LOG(ERR, "port %u VLAN stripping is not supported", + dev->data->port_id); + return; + } + /* Validate queue number */ + if (queue >= priv->rxqs_n) { + DRV_LOG(ERR, "port %u VLAN stripping, invalid queue number %d", + dev->data->port_id, queue); + return; + } + DRV_LOG(DEBUG, "port %u set VLAN offloads 0x%x for port %uqueue %d", + dev->data->port_id, vlan_offloads, rxq->port_id, queue); if (!rxq_ctrl->ibv) { /* Update related bits in RX queue. */ rxq->vlan_strip = !!on; @@ -137,57 +147,26 @@ priv_vlan_strip_queue_set(struct priv *priv, uint16_t idx, int on) .flags_mask = IBV_WQ_FLAGS_CVLAN_STRIPPING, .flags = vlan_offloads, }; - - err = ibv_modify_wq(rxq_ctrl->ibv->wq, &mod); - if (err) { - ERROR("%p: failed to modified stripping mode: %s", - (void *)priv, strerror(err)); + ret = ibv_modify_wq(rxq_ctrl->ibv->wq, &mod); + if (ret) { + DRV_LOG(ERR, "port %u failed to modified stripping mode: %s", + dev->data->port_id, strerror(rte_errno)); return; } - /* Update related bits in RX queue. */ rxq->vlan_strip = !!on; } /** - * Callback to set/reset VLAN stripping for a specific queue. - * - * @param dev - * Pointer to Ethernet device structure. - * @param queue - * RX queue index. - * @param on - * Enable/disable VLAN stripping. - */ -void -mlx5_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue, int on) -{ - struct priv *priv = dev->data->dev_private; - - /* Validate hw support */ - if (!priv->hw_vlan_strip) { - ERROR("VLAN stripping is not supported"); - return; - } - - /* Validate queue number */ - if (queue >= priv->rxqs_n) { - ERROR("VLAN stripping, invalid queue number %d", queue); - return; - } - - priv_lock(priv); - priv_vlan_strip_queue_set(priv, queue, on); - priv_unlock(priv); -} - -/** * Callback to set/reset VLAN offloads for a port. * * @param dev * Pointer to Ethernet device structure. * @param mask * VLAN offload bit mask. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. */ int mlx5_vlan_offload_set(struct rte_eth_dev *dev, int mask) @@ -199,16 +178,13 @@ mlx5_vlan_offload_set(struct rte_eth_dev *dev, int mask) int hw_vlan_strip = !!dev->data->dev_conf.rxmode.hw_vlan_strip; if (!priv->hw_vlan_strip) { - ERROR("VLAN stripping is not supported"); + DRV_LOG(ERR, "port %u VLAN stripping is not supported", + dev->data->port_id); return 0; } - /* Run on every RX queue and set/reset VLAN stripping. */ - priv_lock(priv); for (i = 0; (i != priv->rxqs_n); i++) - priv_vlan_strip_queue_set(priv, i, hw_vlan_strip); - priv_unlock(priv); + mlx5_vlan_strip_queue_set(dev, i, hw_vlan_strip); } - return 0; } |