diff options
Diffstat (limited to 'src/dpdk/drivers/net')
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5.c | 531 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5.h | 43 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_defs.h | 2 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_ethdev.c | 178 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_fdir.c | 376 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_mac.c | 8 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_mr.c | 8 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_prm.h | 117 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_rss.c | 8 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_rxmode.c | 8 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_rxq.c | 34 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_rxtx.c | 892 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_rxtx.h | 50 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_stats.c | 325 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_trigger.c | 4 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_txq.c | 35 | ||||
-rw-r--r-- | src/dpdk/drivers/net/mlx5/mlx5_vlan.c | 7 |
17 files changed, 1439 insertions, 1187 deletions
diff --git a/src/dpdk/drivers/net/mlx5/mlx5.c b/src/dpdk/drivers/net/mlx5/mlx5.c index 303b917b..7fc6ccf5 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5.c +++ b/src/dpdk/drivers/net/mlx5/mlx5.c @@ -43,16 +43,16 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_malloc.h> #include <rte_ethdev.h> @@ -60,7 +60,7 @@ #include <rte_common.h> #include <rte_kvargs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" @@ -181,9 +181,6 @@ mlx5_dev_close(struct rte_eth_dev *dev) } if (priv->reta_idx != NULL) rte_free(priv->reta_idx); - - mlx5_stats_free(dev); - priv_unlock(priv); memset(priv, 0, sizeof(*priv)); } @@ -257,6 +254,7 @@ mlx5_dev_idx(struct rte_pci_addr *pci_addr) return ret; } + /** * Verify and store value for device argument. * @@ -297,6 +295,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque) return 0; } + + /** * Parse device parameters. * @@ -341,6 +341,8 @@ mlx5_args(struct priv *priv, struct rte_devargs *devargs) return 0; } + + static struct eth_driver mlx5_driver; /** @@ -527,7 +529,7 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) /* TREX PATCH */ /* set for maximum performance default */ - priv->txq_inline =128; + priv->txq_inline =64; priv->txqs_inline =4; @@ -781,3 +783,516 @@ static struct rte_driver rte_mlx5_driver = { PMD_REGISTER_DRIVER(rte_mlx5_driver, mlx5); DRIVER_REGISTER_PCI_TABLE(mlx5, mlx5_pci_id_map); + + + + + + +#if 0 +/** + * Verify and store value for device argument. + * + * @param[in] key + * Key argument to verify. + * @param[in] val + * Value associated with key. + * @param opaque + * User data. + * + * @return + * 0 on success, negative errno value on failure. + */ +static int +mlx5_args_check(const char *key, const char *val, void *opaque) +{ + struct priv *priv = opaque; + unsigned long tmp; + + errno = 0; + tmp = strtoul(val, NULL, 0); + if (errno) { + WARN("%s: \"%s\" is not a valid integer", key, val); + return errno; + } + if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { + priv->cqe_comp = !!tmp; + } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) { + priv->txq_inline = tmp; + } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { + priv->txqs_inline = tmp; + } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { + priv->mps = !!tmp; + } else { + WARN("%s: unknown parameter", key); + return -EINVAL; + } + return 0; +} + +/** + * Parse device parameters. + * + * @param priv + * Pointer to private structure. + * @param devargs + * Device arguments structure. + * + * @return + * 0 on success, errno value on failure. + */ +static int +mlx5_args(struct priv *priv, struct rte_devargs *devargs) +{ + const char **params = (const char *[]){ + MLX5_RXQ_CQE_COMP_EN, + MLX5_TXQ_INLINE, + MLX5_TXQS_MIN_INLINE, + MLX5_TXQ_MPW_EN, + NULL, + }; + struct rte_kvargs *kvlist; + int ret = 0; + int i; + + if (devargs == NULL) + return 0; + /* Following UGLY cast is done to pass checkpatch. */ + kvlist = rte_kvargs_parse(devargs->args, params); + if (kvlist == NULL) + return 0; + /* Process parameters. */ + for (i = 0; (params[i] != NULL); ++i) { + if (rte_kvargs_count(kvlist, params[i])) { + ret = rte_kvargs_process(kvlist, params[i], + mlx5_args_check, priv); + if (ret != 0) + return ret; + } + } + rte_kvargs_free(kvlist); + return 0; +} + +static struct eth_driver mlx5_driver; + +/** + * DPDK callback to register a PCI device. + * + * This function creates an Ethernet device for each port of a given + * PCI device. + * + * @param[in] pci_drv + * PCI driver structure (mlx5_driver). + * @param[in] pci_dev + * PCI device information. + * + * @return + * 0 on success, negative errno value on failure. + */ +static int +mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) +{ + struct ibv_device **list; + struct ibv_device *ibv_dev; + int err = 0; + struct ibv_context *attr_ctx = NULL; + struct ibv_device_attr device_attr; + unsigned int sriov; + unsigned int mps; + int idx; + int i; + + (void)pci_drv; + assert(pci_drv == &mlx5_driver.pci_drv); + /* Get mlx5_dev[] index. */ + idx = mlx5_dev_idx(&pci_dev->addr); + if (idx == -1) { + ERROR("this driver cannot support any more adapters"); + return -ENOMEM; + } + DEBUG("using driver device index %d", idx); + + /* Save PCI address. */ + mlx5_dev[idx].pci_addr = pci_dev->addr; + list = ibv_get_device_list(&i); + if (list == NULL) { + assert(errno); + if (errno == ENOSYS) { + WARN("cannot list devices, is ib_uverbs loaded?"); + return 0; + } + return -errno; + } + assert(i >= 0); + /* + * For each listed device, check related sysfs entry against + * the provided PCI ID. + */ + while (i != 0) { + struct rte_pci_addr pci_addr; + + --i; + DEBUG("checking device \"%s\"", list[i]->name); + if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr)) + continue; + if ((pci_dev->addr.domain != pci_addr.domain) || + (pci_dev->addr.bus != pci_addr.bus) || + (pci_dev->addr.devid != pci_addr.devid) || + (pci_dev->addr.function != pci_addr.function)) + continue; + sriov = ((pci_dev->id.device_id == + PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) || + (pci_dev->id.device_id == + PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF)); + /* Multi-packet send is only supported by ConnectX-4 Lx PF. */ + mps = (pci_dev->id.device_id == + PCI_DEVICE_ID_MELLANOX_CONNECTX4LX); + INFO("PCI information matches, using device \"%s\"" + " (SR-IOV: %s, MPS: %s)", + list[i]->name, + sriov ? "true" : "false", + mps ? "true" : "false"); + attr_ctx = ibv_open_device(list[i]); + err = errno; + break; + } + if (attr_ctx == NULL) { + ibv_free_device_list(list); + switch (err) { + case 0: + WARN("cannot access device, is mlx5_ib loaded?"); + return 0; + case EINVAL: + WARN("cannot use device, are drivers up to date?"); + return 0; + } + assert(err > 0); + return -err; + } + ibv_dev = list[i]; + + DEBUG("device opened"); + if (ibv_query_device(attr_ctx, &device_attr)) + goto error; + INFO("%u port(s) detected", device_attr.phys_port_cnt); + + for (i = 0; i < device_attr.phys_port_cnt; i++) { + uint32_t port = i + 1; /* ports are indexed from one */ + uint32_t test = (1 << i); + struct ibv_context *ctx = NULL; + struct ibv_port_attr port_attr; + struct ibv_pd *pd = NULL; + struct priv *priv = NULL; + struct rte_eth_dev *eth_dev; + struct ibv_exp_device_attr exp_device_attr; + struct ether_addr mac; + uint16_t num_vfs = 0; + + exp_device_attr.comp_mask = + IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS | + IBV_EXP_DEVICE_ATTR_RX_HASH | + IBV_EXP_DEVICE_ATTR_VLAN_OFFLOADS | + IBV_EXP_DEVICE_ATTR_RX_PAD_END_ALIGN | + 0; + + DEBUG("using port %u (%08" PRIx32 ")", port, test); + + ctx = ibv_open_device(ibv_dev); + if (ctx == NULL) + goto port_error; + + /* Check port status. */ + err = ibv_query_port(ctx, port, &port_attr); + if (err) { + ERROR("port query failed: %s", strerror(err)); + goto port_error; + } + + if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) { + ERROR("port %d is not configured in Ethernet mode", + port); + goto port_error; + } + + if (port_attr.state != IBV_PORT_ACTIVE) + DEBUG("port %d is not active: \"%s\" (%d)", + port, ibv_port_state_str(port_attr.state), + port_attr.state); + + /* Allocate protection domain. */ + pd = ibv_alloc_pd(ctx); + if (pd == NULL) { + ERROR("PD allocation failure"); + err = ENOMEM; + goto port_error; + } + + mlx5_dev[idx].ports |= test; + + /* from rte_ethdev.c */ + priv = rte_zmalloc("ethdev private structure", + sizeof(*priv), + RTE_CACHE_LINE_SIZE); + if (priv == NULL) { + ERROR("priv allocation failure"); + err = ENOMEM; + goto port_error; + } + + priv->ctx = ctx; + priv->device_attr = device_attr; + priv->port = port; + priv->pd = pd; + priv->mtu = ETHER_MTU; + priv->mps = mps; /* Enable MPW by default if supported. */ + priv->cqe_comp = 1; /* Enable compression by default. */ + err = mlx5_args(priv, pci_dev->device.devargs); + if (err) { + ERROR("failed to process device arguments: %s", + strerror(err)); + goto port_error; + } + if (ibv_exp_query_device(ctx, &exp_device_attr)) { + ERROR("ibv_exp_query_device() failed"); + goto port_error; + } + + priv->hw_csum = + ((exp_device_attr.exp_device_cap_flags & + IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) && + (exp_device_attr.exp_device_cap_flags & + IBV_EXP_DEVICE_RX_CSUM_IP_PKT)); + DEBUG("checksum offloading is %ssupported", + (priv->hw_csum ? "" : "not ")); + + priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags & + IBV_EXP_DEVICE_VXLAN_SUPPORT); + DEBUG("L2 tunnel checksum offloads are %ssupported", + (priv->hw_csum_l2tun ? "" : "not ")); + + priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size; + /* Remove this check once DPDK supports larger/variable + * indirection tables. */ + if (priv->ind_table_max_size > (unsigned int)RSS_INDIRECTION_TABLE_SIZE) + priv->ind_table_max_size = RSS_INDIRECTION_TABLE_SIZE; + DEBUG("maximum RX indirection table size is %u", + priv->ind_table_max_size); + priv->hw_vlan_strip = !!(exp_device_attr.wq_vlan_offloads_cap & + IBV_EXP_RECEIVE_WQ_CVLAN_STRIP); + DEBUG("VLAN stripping is %ssupported", + (priv->hw_vlan_strip ? "" : "not ")); + + priv->hw_fcs_strip = !!(exp_device_attr.exp_device_cap_flags & + IBV_EXP_DEVICE_SCATTER_FCS); + DEBUG("FCS stripping configuration is %ssupported", + (priv->hw_fcs_strip ? "" : "not ")); + + priv->hw_padding = !!exp_device_attr.rx_pad_end_addr_align; + DEBUG("hardware RX end alignment padding is %ssupported", + (priv->hw_padding ? "" : "not ")); + + priv_get_num_vfs(priv, &num_vfs); + priv->sriov = (num_vfs || sriov); + if (priv->mps && !mps) { + ERROR("multi-packet send not supported on this device" + " (" MLX5_TXQ_MPW_EN ")"); + err = ENOTSUP; + goto port_error; + } + /* Allocate and register default RSS hash keys. */ + priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n, + sizeof((*priv->rss_conf)[0]), 0); + if (priv->rss_conf == NULL) { + err = ENOMEM; + goto port_error; + } + err = rss_hash_rss_conf_new_key(priv, + rss_hash_default_key, + rss_hash_default_key_len, + ETH_RSS_PROTO_MASK); + if (err) + goto port_error; + /* Configure the first MAC address by default. */ + if (priv_get_mac(priv, &mac.addr_bytes)) { + ERROR("cannot get MAC address, is mlx5_en loaded?" + " (errno: %s)", strerror(errno)); + goto port_error; + } + INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", + priv->port, + mac.addr_bytes[0], mac.addr_bytes[1], + mac.addr_bytes[2], mac.addr_bytes[3], + mac.addr_bytes[4], mac.addr_bytes[5]); + /* Register MAC address. */ + claim_zero(priv_mac_addr_add(priv, 0, + (const uint8_t (*)[ETHER_ADDR_LEN]) + mac.addr_bytes)); + /* Initialize FD filters list. */ + err = fdir_init_filters_list(priv); + if (err) + goto port_error; +#ifndef NDEBUG + { + char ifname[IF_NAMESIZE]; + + if (priv_get_ifname(priv, &ifname) == 0) + DEBUG("port %u ifname is \"%s\"", + priv->port, ifname); + else + DEBUG("port %u ifname is unknown", priv->port); + } +#endif + /* Get actual MTU if possible. */ + priv_get_mtu(priv, &priv->mtu); + DEBUG("port %u MTU is %u", priv->port, priv->mtu); + + /* from rte_ethdev.c */ + { + char name[RTE_ETH_NAME_MAX_LEN]; + + snprintf(name, sizeof(name), "%s port %u", + ibv_get_device_name(ibv_dev), port); + eth_dev = rte_eth_dev_allocate(name); + } + if (eth_dev == NULL) { + ERROR("can not allocate rte ethdev"); + err = ENOMEM; + goto port_error; + } + + /* Secondary processes have to use local storage for their + * private data as well as a copy of eth_dev->data, but this + * pointer must not be modified before burst functions are + * actually called. */ + if (mlx5_is_secondary()) { + struct mlx5_secondary_data *sd = + &mlx5_secondary_data[eth_dev->data->port_id]; + sd->primary_priv = eth_dev->data->dev_private; + if (sd->primary_priv == NULL) { + ERROR("no private data for port %u", + eth_dev->data->port_id); + err = EINVAL; + goto port_error; + } + sd->shared_dev_data = eth_dev->data; + rte_spinlock_init(&sd->lock); + memcpy(sd->data.name, sd->shared_dev_data->name, + sizeof(sd->data.name)); + sd->data.dev_private = priv; + sd->data.rx_mbuf_alloc_failed = 0; + sd->data.mtu = ETHER_MTU; + sd->data.port_id = sd->shared_dev_data->port_id; + sd->data.mac_addrs = priv->mac; + eth_dev->tx_pkt_burst = mlx5_tx_burst_secondary_setup; + eth_dev->rx_pkt_burst = mlx5_rx_burst_secondary_setup; + } else { + eth_dev->data->dev_private = priv; + eth_dev->data->rx_mbuf_alloc_failed = 0; + eth_dev->data->mtu = ETHER_MTU; + eth_dev->data->mac_addrs = priv->mac; + } + + eth_dev->pci_dev = pci_dev; + rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->driver = &mlx5_driver; + priv->dev = eth_dev; + eth_dev->dev_ops = &mlx5_dev_ops; + + TAILQ_INIT(ð_dev->link_intr_cbs); + + /* Bring Ethernet device up. */ + DEBUG("forcing Ethernet interface up"); + priv_set_flags(priv, ~IFF_UP, IFF_UP); + mlx5_link_update_unlocked(priv->dev, 1); + continue; + +port_error: + if (priv) { + rte_free(priv->rss_conf); + rte_free(priv); + } + if (pd) + claim_zero(ibv_dealloc_pd(pd)); + if (ctx) + claim_zero(ibv_close_device(ctx)); + break; + } + + /* + * XXX if something went wrong in the loop above, there is a resource + * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as + * long as the dpdk does not provide a way to deallocate a ethdev and a + * way to enumerate the registered ethdevs to free the previous ones. + */ + + /* no port found, complain */ + if (!mlx5_dev[idx].ports) { + err = ENODEV; + goto error; + } + +error: + if (attr_ctx) + claim_zero(ibv_close_device(attr_ctx)); + if (list) + ibv_free_device_list(list); + assert(err >= 0); + return -err; +} + +static const struct rte_pci_id mlx5_pci_id_map[] = { + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, + PCI_DEVICE_ID_MELLANOX_CONNECTX4) + }, + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, + PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) + }, + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, + PCI_DEVICE_ID_MELLANOX_CONNECTX4LX) + }, + { + RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, + PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) + }, + { + .vendor_id = 0 + } +}; + +static struct eth_driver mlx5_driver = { + .pci_drv = { + .driver = { + .name = MLX5_DRIVER_NAME + }, + .id_table = mlx5_pci_id_map, + .probe = mlx5_pci_probe, + .drv_flags = RTE_PCI_DRV_INTR_LSC, + }, + .dev_private_size = sizeof(struct priv) +}; + +/** + * Driver initialization routine. + */ +RTE_INIT(rte_mlx5_pmd_init); +static void +rte_mlx5_pmd_init(void) +{ + /* + * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use + * huge pages. Calling ibv_fork_init() during init allows + * applications to use fork() safely for purposes other than + * using this PMD, which is not supported in forked processes. + */ + setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); + ibv_fork_init(); + rte_eal_pci_register(&mlx5_driver.pci_drv); +} + +RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__); +RTE_PMD_REGISTER_PCI_TABLE(net_mlx5, mlx5_pci_id_map); +#endif diff --git a/src/dpdk/drivers/net/mlx5/mlx5.h b/src/dpdk/drivers/net/mlx5/mlx5.h index 68bad904..79b7a600 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5.h +++ b/src/dpdk/drivers/net/mlx5/mlx5.h @@ -43,16 +43,16 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ether.h> #include <rte_ethdev.h> @@ -60,7 +60,7 @@ #include <rte_interrupts.h> #include <rte_errno.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5_utils.h" @@ -84,34 +84,6 @@ enum { PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF = 0x1016, }; -struct mlx5_stats_priv { - - struct rte_eth_stats m_shadow; - uint32_t n_stats; /* number of counters */ - - void * et_stats ;/* point to ethtool counter struct ethtool_stats*/ - - /* index into ethtool */ - uint16_t inx_rx_vport_unicast_bytes; - uint16_t inx_rx_vport_multicast_bytes; - uint16_t inx_rx_vport_broadcast_bytes; - uint16_t inx_rx_vport_unicast_packets; - uint16_t inx_rx_vport_multicast_packets; - uint16_t inx_rx_vport_broadcast_packets; - uint16_t inx_tx_vport_unicast_bytes; - uint16_t inx_tx_vport_multicast_bytes; - uint16_t inx_tx_vport_broadcast_bytes; - uint16_t inx_tx_vport_unicast_packets; - uint16_t inx_tx_vport_multicast_packets; - uint16_t inx_tx_vport_broadcast_packets; - uint16_t inx_rx_wqe_err; - uint16_t inx_rx_crc_errors_phy; - uint16_t inx_rx_in_range_len_errors_phy; - uint16_t inx_rx_symbol_err_phy; - uint16_t inx_tx_errors_phy; -}; - - struct priv { struct rte_eth_dev *dev; /* Ethernet device. */ struct ibv_context *ctx; /* Verbs context. */ @@ -162,8 +134,9 @@ struct priv { unsigned int (*reta_idx)[]; /* RETA index table. */ unsigned int reta_idx_n; /* RETA index size. */ struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */ + struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */ + uint32_t link_speed_capa; /* Link speed capabilities. */ rte_spinlock_t lock; /* Lock for control functions. */ - struct mlx5_stats_priv m_stats; }; /* Local storage for secondary process data. */ @@ -215,6 +188,7 @@ int priv_set_flags(struct priv *, unsigned int, unsigned int); int mlx5_dev_configure(struct rte_eth_dev *); void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *); const uint32_t *mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev); +int mlx5_link_update_unlocked(struct rte_eth_dev *, int); int mlx5_link_update(struct rte_eth_dev *, int); int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t); int mlx5_dev_get_flow_ctrl(struct rte_eth_dev *, struct rte_eth_fc_conf *); @@ -272,8 +246,6 @@ void mlx5_allmulticast_disable(struct rte_eth_dev *); void mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *); void mlx5_stats_reset(struct rte_eth_dev *); -void mlx5_stats_free(struct rte_eth_dev *dev); - /* mlx5_vlan.c */ @@ -288,6 +260,7 @@ void mlx5_dev_stop(struct rte_eth_dev *); /* mlx5_fdir.c */ +void priv_fdir_queue_destroy(struct priv *, struct fdir_queue *); int fdir_init_filters_list(struct priv *); void priv_fdir_delete_filters_list(struct priv *); void priv_fdir_disable(struct priv *); diff --git a/src/dpdk/drivers/net/mlx5/mlx5_defs.h b/src/dpdk/drivers/net/mlx5/mlx5_defs.h index cc2a6f3e..b32816e6 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_defs.h +++ b/src/dpdk/drivers/net/mlx5/mlx5_defs.h @@ -37,7 +37,7 @@ #include "mlx5_autoconf.h" /* Reported driver name. */ -#define MLX5_DRIVER_NAME "librte_pmd_mlx5" +#define MLX5_DRIVER_NAME "net_mlx5" /* Maximum number of simultaneous MAC addresses. */ #define MLX5_MAX_MAC_ADDRESSES 128 diff --git a/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c b/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c index 130e15d5..85b81360 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c @@ -50,7 +50,7 @@ /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_atomic.h> #include <rte_ethdev.h> @@ -60,7 +60,7 @@ #include <rte_alarm.h> #include <rte_malloc.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" @@ -583,7 +583,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) (DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM | DEV_RX_OFFLOAD_TCP_CKSUM) : - 0); + 0) | + (priv->hw_vlan_strip ? DEV_RX_OFFLOAD_VLAN_STRIP : 0); if (!priv->mps) info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT; if (priv->hw_csum) @@ -599,15 +600,10 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) * size if it is not fixed. * The API should be updated to solve this problem. */ info->reta_size = priv->ind_table_max_size; - info->speed_capa = - ETH_LINK_SPEED_1G | - ETH_LINK_SPEED_10G | - ETH_LINK_SPEED_20G | - ETH_LINK_SPEED_25G | - ETH_LINK_SPEED_40G | - ETH_LINK_SPEED_50G | - ETH_LINK_SPEED_56G | - ETH_LINK_SPEED_100G; + info->hash_key_size = ((*priv->rss_conf) ? + (*priv->rss_conf)[0]->rss_key_len : + 0); + info->speed_capa = priv->link_speed_capa; priv_unlock(priv); } @@ -630,7 +626,7 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) } /** - * DPDK callback to retrieve physical link information (unlocked version). + * Retrieve physical link information (unlocked version using legacy ioctl). * * @param dev * Pointer to Ethernet device structure. @@ -638,11 +634,11 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) * Wait for request completion (ignored). */ static int -mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) +mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) { struct priv *priv = mlx5_get_priv(dev); struct ethtool_cmd edata = { - .cmd = ETHTOOL_GSET + .cmd = ETHTOOL_GSET /* Deprecated since Linux v4.5. */ }; struct ifreq ifr; struct rte_eth_link dev_link; @@ -667,6 +663,19 @@ mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) dev_link.link_speed = 0; else dev_link.link_speed = link_speed; + priv->link_speed_capa = 0; + if (edata.supported & SUPPORTED_Autoneg) + priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; + if (edata.supported & (SUPPORTED_1000baseT_Full | + SUPPORTED_1000baseKX_Full)) + priv->link_speed_capa |= ETH_LINK_SPEED_1G; + if (edata.supported & SUPPORTED_10000baseKR_Full) + priv->link_speed_capa |= ETH_LINK_SPEED_10G; + if (edata.supported & (SUPPORTED_40000baseKR4_Full | + SUPPORTED_40000baseCR4_Full | + SUPPORTED_40000baseSR4_Full | + SUPPORTED_40000baseLR4_Full)) + priv->link_speed_capa |= ETH_LINK_SPEED_40G; dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & @@ -681,6 +690,123 @@ mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) } /** + * Retrieve physical link information (unlocked version using new ioctl from + * Linux 4.5). + * + * @param dev + * Pointer to Ethernet device structure. + * @param wait_to_complete + * Wait for request completion (ignored). + */ +static int +mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) +{ +#ifdef ETHTOOL_GLINKSETTINGS + struct priv *priv = mlx5_get_priv(dev); + struct ethtool_link_settings edata = { + .cmd = ETHTOOL_GLINKSETTINGS, + }; + struct ifreq ifr; + struct rte_eth_link dev_link; + uint64_t sc; + + (void)wait_to_complete; + if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { + WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); + return -1; + } + memset(&dev_link, 0, sizeof(dev_link)); + dev_link.link_status = ((ifr.ifr_flags & IFF_UP) && + (ifr.ifr_flags & IFF_RUNNING)); + ifr.ifr_data = (void *)&edata; + if (priv_ifreq(priv, SIOCETHTOOL, &ifr)) { + DEBUG("ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS) failed: %s", + strerror(errno)); + return -1; + } + dev_link.link_speed = edata.speed; + sc = edata.link_mode_masks[0] | + ((uint64_t)edata.link_mode_masks[1] << 32); + priv->link_speed_capa = 0; + /* Link speeds available in kernel v4.5. */ + if (sc & ETHTOOL_LINK_MODE_Autoneg_BIT) + priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; + if (sc & (ETHTOOL_LINK_MODE_1000baseT_Full_BIT | + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_1G; + if (sc & (ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT | + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT | + ETHTOOL_LINK_MODE_10000baseR_FEC_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_10G; + if (sc & (ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT | + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_20G; + if (sc & (ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT | + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT | + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT | + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_40G; + if (sc & (ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT | + ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT | + ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT | + ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_56G; + /* Link speeds available in kernel v4.6. */ +#ifdef HAVE_ETHTOOL_LINK_MODE_25G + if (sc & (ETHTOOL_LINK_MODE_25000baseCR_Full_BIT | + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT | + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_25G; +#endif +#ifdef HAVE_ETHTOOL_LINK_MODE_50G + if (sc & (ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT | + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_50G; +#endif +#ifdef HAVE_ETHTOOL_LINK_MODE_100G + if (sc & (ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT | + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT | + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT | + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT)) + priv->link_speed_capa |= ETH_LINK_SPEED_100G; +#endif + dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? + ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); + dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & + ETH_LINK_SPEED_FIXED); + if (memcmp(&dev_link, &dev->data->dev_link, sizeof(dev_link))) { + /* Link status changed. */ + dev->data->dev_link = dev_link; + return 0; + } +#else + (void)dev; + (void)wait_to_complete; +#endif + /* Link status is still the same. */ + return -1; +} + +/** + * DPDK callback to retrieve physical link information (unlocked version). + * + * @param dev + * Pointer to Ethernet device structure. + * @param wait_to_complete + * Wait for request completion (ignored). + */ +int +mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) +{ + int ret; + + ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete); + if (ret < 0) + ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete); + return ret; +} + +/** * DPDK callback to retrieve physical link information. * * @param dev @@ -807,7 +933,7 @@ recover: if (rehash) ret = rxq_rehash(dev, rxq_ctrl); else - ret = rxq_ctrl_setup(dev, rxq_ctrl, rxq->elts_n, + ret = rxq_ctrl_setup(dev, rxq_ctrl, 1 << rxq->elts_n, rxq_ctrl->socket, NULL, rxq->mp); if (!ret) continue; @@ -1067,8 +1193,8 @@ mlx5_dev_link_status_handler(void *arg) assert(priv->pending_alarm == 1); ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); - if (ret) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); + //if (ret) + // _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } /** @@ -1090,8 +1216,8 @@ mlx5_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg) priv_lock(priv); ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); - if (ret) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); + //if (ret) + // _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } /** @@ -1308,12 +1434,14 @@ mlx5_secondary_data_setup(struct priv *priv) continue; primary_txq_ctrl = container_of(primary_txq, struct txq_ctrl, txq); - txq_ctrl = rte_calloc_socket("TXQ", 1, sizeof(*txq_ctrl), 0, + txq_ctrl = rte_calloc_socket("TXQ", 1, sizeof(*txq_ctrl) + + (1 << primary_txq->elts_n) * + sizeof(struct rte_mbuf *), 0, primary_txq_ctrl->socket); if (txq_ctrl != NULL) { if (txq_ctrl_setup(priv->dev, - primary_txq_ctrl, - primary_txq->elts_n, + txq_ctrl, + 1 << primary_txq->elts_n, primary_txq_ctrl->socket, NULL) == 0) { txq_ctrl->txq.stats.idx = @@ -1397,10 +1525,6 @@ priv_select_tx_function(struct priv *priv) } else if ((priv->sriov == 0) && priv->mps) { priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw; DEBUG("selected MPW TX function"); - } else if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) { - priv->dev->tx_pkt_burst = mlx5_tx_burst_inline; - DEBUG("selected inline TX function (%u >= %u queues)", - priv->txqs_n, priv->txqs_inline); } } diff --git a/src/dpdk/drivers/net/mlx5/mlx5_fdir.c b/src/dpdk/drivers/net/mlx5/mlx5_fdir.c index 4ba3bb9f..1acf6826 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_fdir.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_fdir.c @@ -37,28 +37,26 @@ #include <string.h> #include <errno.h> -#define TREX_PATCH - /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif -#include <infiniband/verbs_exp.h> +#include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ether.h> #include <rte_malloc.h> #include <rte_ethdev.h> #include <rte_common.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" @@ -69,10 +67,6 @@ struct fdir_flow_desc { uint16_t src_port; uint32_t src_ip[4]; uint32_t dst_ip[4]; - uint8_t tos; - uint8_t ip_id; - uint8_t proto; - uint8_t mac[6]; uint16_t vlan_tag; enum hash_rxq_type type; @@ -81,6 +75,7 @@ struct fdir_flow_desc { struct mlx5_fdir_filter { LIST_ENTRY(mlx5_fdir_filter) next; uint16_t queue; /* Queue assigned to if FDIR match. */ + enum rte_eth_fdir_behavior behavior; struct fdir_flow_desc desc; struct ibv_exp_flow *flow; }; @@ -108,7 +103,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, desc->vlan_tag = fdir_filter->input.flow_ext.vlan_tci; /* Set MAC address. */ -#ifndef TREX_PATCH if (mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) { rte_memcpy(desc->mac, fdir_filter->input.flow.mac_vlan_flow.mac_addr. @@ -117,13 +111,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, desc->type = HASH_RXQ_ETH; return; } -#else - if (fdir_filter->input.flow.ip4_flow.ip_id == 2) { - desc->type = HASH_RXQ_ETH; - desc->ip_id = fdir_filter->input.flow.ip4_flow.ip_id; - return; - } -#endif /* Set mode */ switch (fdir_filter->input.flow_type) { @@ -155,13 +142,9 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, case RTE_ETH_FLOW_NONFRAG_IPV4_TCP: desc->src_port = fdir_filter->input.flow.udp4_flow.src_port; desc->dst_port = fdir_filter->input.flow.udp4_flow.dst_port; - case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: desc->src_ip[0] = fdir_filter->input.flow.ip4_flow.src_ip; desc->dst_ip[0] = fdir_filter->input.flow.ip4_flow.dst_ip; - desc->tos = fdir_filter->input.flow.ip4_flow.ttl; /* TTL is mapped to TOS TREX_PATCH */ - desc->ip_id = fdir_filter->input.flow.ip4_flow.ip_id; - desc->proto = fdir_filter->input.flow.ip4_flow.proto; break; case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: @@ -175,17 +158,12 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, rte_memcpy(desc->dst_ip, fdir_filter->input.flow.ipv6_flow.dst_ip, sizeof(desc->dst_ip)); - desc->tos = (uint8_t)fdir_filter->input.flow.ipv6_flow.hop_limits; /* TTL is mapped to TOS - TREX_PATCH */ - desc->ip_id = (uint8_t)fdir_filter->input.flow.ipv6_flow.flow_label; - desc->proto = fdir_filter->input.flow.ipv6_flow.proto; - break; default: break; } } - /** * Check if two flow descriptors overlap according to configured mask. * @@ -220,12 +198,6 @@ priv_fdir_overlap(const struct priv *priv, ((desc1->dst_port & mask->dst_port_mask) != (desc2->dst_port & mask->dst_port_mask))) return 0; - - if ( (desc1->tos != desc2->tos) || - (desc1->ip_id != desc2->ip_id) || - (desc1->proto != desc2->proto) ) - return 0; - switch (desc1->type) { case HASH_RXQ_IPV4: case HASH_RXQ_UDPV4: @@ -233,9 +205,8 @@ priv_fdir_overlap(const struct priv *priv, if (((desc1->src_ip[0] & mask->ipv4_mask.src_ip) != (desc2->src_ip[0] & mask->ipv4_mask.src_ip)) || ((desc1->dst_ip[0] & mask->ipv4_mask.dst_ip) != - (desc2->dst_ip[0] & mask->ipv4_mask.dst_ip))) + (desc2->dst_ip[0] & mask->ipv4_mask.dst_ip))) return 0; - break; case HASH_RXQ_IPV6: case HASH_RXQ_UDPV6: @@ -281,8 +252,8 @@ priv_fdir_flow_add(struct priv *priv, struct ibv_exp_flow_attr *attr = &data->attr; uintptr_t spec_offset = (uintptr_t)&data->spec; struct ibv_exp_flow_spec_eth *spec_eth; - struct ibv_exp_flow_spec_ipv4_ext *spec_ipv4; - struct ibv_exp_flow_spec_ipv6_ext *spec_ipv6; + struct ibv_exp_flow_spec_ipv4 *spec_ipv4; + struct ibv_exp_flow_spec_ipv6 *spec_ipv6; struct ibv_exp_flow_spec_tcp_udp *spec_tcp_udp; struct mlx5_fdir_filter *iter_fdir_filter; unsigned int i; @@ -294,10 +265,8 @@ priv_fdir_flow_add(struct priv *priv, (iter_fdir_filter->flow != NULL) && (priv_fdir_overlap(priv, &mlx5_fdir_filter->desc, - &iter_fdir_filter->desc))){ - ERROR("overlap rules, please check your rules"); - return EEXIST; - } + &iter_fdir_filter->desc))) + return EEXIST; /* * No padding must be inserted by the compiler between attr and spec. @@ -320,7 +289,6 @@ priv_fdir_flow_add(struct priv *priv, /* Update priority */ attr->priority = 2; -#ifndef TREX_PATCH if (fdir_mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) { /* MAC Address */ for (i = 0; i != RTE_DIM(spec_eth->mask.dst_mac); ++i) { @@ -330,14 +298,6 @@ priv_fdir_flow_add(struct priv *priv, } goto create_flow; } -#else - // empty mask means "match everything". This rule will match all packets, no matter what is the ether type - if (desc->ip_id == 2) { - spec_eth->val.ether_type = 0x0806; - spec_eth->mask.ether_type = 0x0000; - goto create_flow; - } -#endif switch (desc->type) { case HASH_RXQ_IPV4: @@ -346,10 +306,10 @@ priv_fdir_flow_add(struct priv *priv, spec_offset += spec_eth->size; /* Set IP spec */ - spec_ipv4 = (struct ibv_exp_flow_spec_ipv4_ext *)spec_offset; + spec_ipv4 = (struct ibv_exp_flow_spec_ipv4 *)spec_offset; /* The second specification must be IP. */ - assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4_EXT); + assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4); assert(spec_ipv4->size == sizeof(*spec_ipv4)); spec_ipv4->val.src_ip = @@ -359,21 +319,6 @@ priv_fdir_flow_add(struct priv *priv, spec_ipv4->mask.src_ip = mask->ipv4_mask.src_ip; spec_ipv4->mask.dst_ip = mask->ipv4_mask.dst_ip; - /* PROTO */ - spec_ipv4->val.proto = desc->proto & mask->ipv4_mask.proto; - spec_ipv4->mask.proto = mask->ipv4_mask.proto; - -#ifdef TREX_PATCH - /* TOS */ - if (desc->ip_id == 1) { - spec_ipv4->mask.tos = 0x1; - spec_ipv4->val.tos = 0x1; - } else { - spec_ipv4->mask.tos = 0x0; - spec_ipv4->val.tos = 0x0; - } - // spec_ipv4->val.tos = desc->tos & spec_ipv4->mask.tos;// & mask->ipv4_mask.tos; -#endif /* Update priority */ attr->priority = 1; @@ -388,10 +333,10 @@ priv_fdir_flow_add(struct priv *priv, spec_offset += spec_eth->size; /* Set IP spec */ - spec_ipv6 = (struct ibv_exp_flow_spec_ipv6_ext *)spec_offset; + spec_ipv6 = (struct ibv_exp_flow_spec_ipv6 *)spec_offset; /* The second specification must be IP. */ - assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6_EXT); + assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6); assert(spec_ipv6->size == sizeof(*spec_ipv6)); for (i = 0; i != RTE_DIM(desc->src_ip); ++i) { @@ -407,20 +352,6 @@ priv_fdir_flow_add(struct priv *priv, mask->ipv6_mask.dst_ip, sizeof(spec_ipv6->mask.dst_ip)); - spec_ipv6->val.next_hdr = desc->proto & mask->ipv6_mask.proto; - spec_ipv6->mask.next_hdr = mask->ipv6_mask.proto; - -#ifdef TREX_PATCH - /* TOS */ - if (desc->ip_id == 1) { - spec_ipv6->mask.traffic_class = 0x1; - spec_ipv6->val.traffic_class = 0x1; - } else { - spec_ipv6->mask.traffic_class = 0; - spec_ipv6->val.traffic_class = 0; - } -#endif - /* Update priority */ attr->priority = 1; @@ -470,6 +401,145 @@ create_flow: } /** + * Destroy a flow director queue. + * + * @param fdir_queue + * Flow director queue to be destroyed. + */ +void +priv_fdir_queue_destroy(struct priv *priv, struct fdir_queue *fdir_queue) +{ + struct mlx5_fdir_filter *fdir_filter; + + /* Disable filter flows still applying to this queue. */ + LIST_FOREACH(fdir_filter, priv->fdir_filter_list, next) { + unsigned int idx = fdir_filter->queue; + struct rxq_ctrl *rxq_ctrl = + container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq); + + assert(idx < priv->rxqs_n); + if (fdir_queue == rxq_ctrl->fdir_queue && + fdir_filter->flow != NULL) { + claim_zero(ibv_exp_destroy_flow(fdir_filter->flow)); + fdir_filter->flow = NULL; + } + } + assert(fdir_queue->qp); + claim_zero(ibv_destroy_qp(fdir_queue->qp)); + assert(fdir_queue->ind_table); + claim_zero(ibv_exp_destroy_rwq_ind_table(fdir_queue->ind_table)); + if (fdir_queue->wq) + claim_zero(ibv_exp_destroy_wq(fdir_queue->wq)); + if (fdir_queue->cq) + claim_zero(ibv_destroy_cq(fdir_queue->cq)); +#ifndef NDEBUG + memset(fdir_queue, 0x2a, sizeof(*fdir_queue)); +#endif + rte_free(fdir_queue); +} + +/** + * Create a flow director queue. + * + * @param priv + * Private structure. + * @param wq + * Work queue to route matched packets to, NULL if one needs to + * be created. + * + * @return + * Related flow director queue on success, NULL otherwise. + */ +static struct fdir_queue * +priv_fdir_queue_create(struct priv *priv, struct ibv_exp_wq *wq, + unsigned int socket) +{ + struct fdir_queue *fdir_queue; + + fdir_queue = rte_calloc_socket(__func__, 1, sizeof(*fdir_queue), + 0, socket); + if (!fdir_queue) { + ERROR("cannot allocate flow director queue"); + return NULL; + } + assert(priv->pd); + assert(priv->ctx); + if (!wq) { + fdir_queue->cq = ibv_exp_create_cq( + priv->ctx, 1, NULL, NULL, 0, + &(struct ibv_exp_cq_init_attr){ + .comp_mask = 0, + }); + if (!fdir_queue->cq) { + ERROR("cannot create flow director CQ"); + goto error; + } + fdir_queue->wq = ibv_exp_create_wq( + priv->ctx, + &(struct ibv_exp_wq_init_attr){ + .wq_type = IBV_EXP_WQT_RQ, + .max_recv_wr = 1, + .max_recv_sge = 1, + .pd = priv->pd, + .cq = fdir_queue->cq, + }); + if (!fdir_queue->wq) { + ERROR("cannot create flow director WQ"); + goto error; + } + wq = fdir_queue->wq; + } + fdir_queue->ind_table = ibv_exp_create_rwq_ind_table( + priv->ctx, + &(struct ibv_exp_rwq_ind_table_init_attr){ + .pd = priv->pd, + .log_ind_tbl_size = 0, + .ind_tbl = &wq, + .comp_mask = 0, + }); + if (!fdir_queue->ind_table) { + ERROR("cannot create flow director indirection table"); + goto error; + } + fdir_queue->qp = ibv_exp_create_qp( + priv->ctx, + &(struct ibv_exp_qp_init_attr){ + .qp_type = IBV_QPT_RAW_PACKET, + .comp_mask = + IBV_EXP_QP_INIT_ATTR_PD | + IBV_EXP_QP_INIT_ATTR_PORT | + IBV_EXP_QP_INIT_ATTR_RX_HASH, + .pd = priv->pd, + .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){ + .rx_hash_function = + IBV_EXP_RX_HASH_FUNC_TOEPLITZ, + .rx_hash_key_len = rss_hash_default_key_len, + .rx_hash_key = rss_hash_default_key, + .rx_hash_fields_mask = 0, + .rwq_ind_tbl = fdir_queue->ind_table, + }, + .port_num = priv->port, + }); + if (!fdir_queue->qp) { + ERROR("cannot create flow director hash RX QP"); + goto error; + } + return fdir_queue; +error: + assert(fdir_queue); + assert(!fdir_queue->qp); + if (fdir_queue->ind_table) + claim_zero(ibv_exp_destroy_rwq_ind_table + (fdir_queue->ind_table)); + if (fdir_queue->wq) + claim_zero(ibv_exp_destroy_wq(fdir_queue->wq)); + if (fdir_queue->cq) + claim_zero(ibv_destroy_cq(fdir_queue->cq)); + rte_free(fdir_queue); + return NULL; +} + +/** * Get flow director queue for a specific RX queue, create it in case * it does not exist. * @@ -486,74 +556,42 @@ priv_get_fdir_queue(struct priv *priv, uint16_t idx) { struct rxq_ctrl *rxq_ctrl = container_of((*priv->rxqs)[idx], struct rxq_ctrl, rxq); - struct fdir_queue *fdir_queue = &rxq_ctrl->fdir_queue; - struct ibv_exp_rwq_ind_table *ind_table = NULL; - struct ibv_qp *qp = NULL; - struct ibv_exp_rwq_ind_table_init_attr ind_init_attr; - struct ibv_exp_rx_hash_conf hash_conf; - struct ibv_exp_qp_init_attr qp_init_attr; - int err = 0; - - /* Return immediately if it has already been created. */ - if (fdir_queue->qp != NULL) - return fdir_queue; - - ind_init_attr = (struct ibv_exp_rwq_ind_table_init_attr){ - .pd = priv->pd, - .log_ind_tbl_size = 0, - .ind_tbl = &rxq_ctrl->wq, - .comp_mask = 0, - }; + struct fdir_queue *fdir_queue = rxq_ctrl->fdir_queue; - errno = 0; - ind_table = ibv_exp_create_rwq_ind_table(priv->ctx, - &ind_init_attr); - if (ind_table == NULL) { - /* Not clear whether errno is set. */ - err = (errno ? errno : EINVAL); - ERROR("RX indirection table creation failed with error %d: %s", - err, strerror(err)); - goto error; - } - - /* Create fdir_queue qp. */ - hash_conf = (struct ibv_exp_rx_hash_conf){ - .rx_hash_function = IBV_EXP_RX_HASH_FUNC_TOEPLITZ, - .rx_hash_key_len = rss_hash_default_key_len, - .rx_hash_key = rss_hash_default_key, - .rx_hash_fields_mask = 0, - .rwq_ind_tbl = ind_table, - }; - qp_init_attr = (struct ibv_exp_qp_init_attr){ - .max_inl_recv = 0, /* Currently not supported. */ - .qp_type = IBV_QPT_RAW_PACKET, - .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD | - IBV_EXP_QP_INIT_ATTR_RX_HASH), - .pd = priv->pd, - .rx_hash_conf = &hash_conf, - .port_num = priv->port, - }; - - qp = ibv_exp_create_qp(priv->ctx, &qp_init_attr); - if (qp == NULL) { - err = (errno ? errno : EINVAL); - ERROR("hash RX QP creation failure: %s", strerror(err)); - goto error; + assert(rxq_ctrl->wq); + if (fdir_queue == NULL) { + fdir_queue = priv_fdir_queue_create(priv, rxq_ctrl->wq, + rxq_ctrl->socket); + rxq_ctrl->fdir_queue = fdir_queue; } - - fdir_queue->ind_table = ind_table; - fdir_queue->qp = qp; - return fdir_queue; +} -error: - if (qp != NULL) - claim_zero(ibv_destroy_qp(qp)); - - if (ind_table != NULL) - claim_zero(ibv_exp_destroy_rwq_ind_table(ind_table)); +/** + * Get or flow director drop queue. Create it if it does not exist. + * + * @param priv + * Private structure. + * + * @return + * Flow director drop queue on success, NULL otherwise. + */ +static struct fdir_queue * +priv_get_fdir_drop_queue(struct priv *priv) +{ + struct fdir_queue *fdir_queue = priv->fdir_drop_queue; - return NULL; + if (fdir_queue == NULL) { + unsigned int socket = SOCKET_ID_ANY; + + /* Select a known NUMA socket if possible. */ + if (priv->rxqs_n && (*priv->rxqs)[0]) + socket = container_of((*priv->rxqs)[0], + struct rxq_ctrl, rxq)->socket; + fdir_queue = priv_fdir_queue_create(priv, NULL, socket); + priv->fdir_drop_queue = fdir_queue; + } + return fdir_queue; } /** @@ -578,7 +616,11 @@ priv_fdir_filter_enable(struct priv *priv, return 0; /* Get fdir_queue for specific queue. */ - fdir_queue = priv_get_fdir_queue(priv, mlx5_fdir_filter->queue); + if (mlx5_fdir_filter->behavior == RTE_ETH_FDIR_REJECT) + fdir_queue = priv_get_fdir_drop_queue(priv); + else + fdir_queue = priv_get_fdir_queue(priv, + mlx5_fdir_filter->queue); if (fdir_queue == NULL) { ERROR("failed to create flow director rxq for queue %d", @@ -671,7 +713,6 @@ priv_fdir_disable(struct priv *priv) { unsigned int i; struct mlx5_fdir_filter *mlx5_fdir_filter; - struct fdir_queue *fdir_queue; /* Run on every flow director filter and destroy flow handle. */ LIST_FOREACH(mlx5_fdir_filter, priv->fdir_filter_list, next) { @@ -688,23 +729,19 @@ priv_fdir_disable(struct priv *priv) } } - /* Run on every RX queue to destroy related flow director QP and - * indirection table. */ + /* Destroy flow director context in each RX queue. */ for (i = 0; (i != priv->rxqs_n); i++) { struct rxq_ctrl *rxq_ctrl = container_of((*priv->rxqs)[i], struct rxq_ctrl, rxq); - fdir_queue = &rxq_ctrl->fdir_queue; - if (fdir_queue->qp != NULL) { - claim_zero(ibv_destroy_qp(fdir_queue->qp)); - fdir_queue->qp = NULL; - } - - if (fdir_queue->ind_table != NULL) { - claim_zero(ibv_exp_destroy_rwq_ind_table - (fdir_queue->ind_table)); - fdir_queue->ind_table = NULL; - } + if (!rxq_ctrl->fdir_queue) + continue; + priv_fdir_queue_destroy(priv, rxq_ctrl->fdir_queue); + rxq_ctrl->fdir_queue = NULL; + } + if (priv->fdir_drop_queue) { + priv_fdir_queue_destroy(priv, priv->fdir_drop_queue); + priv->fdir_drop_queue = NULL; } } @@ -792,10 +829,8 @@ priv_fdir_filter_add(struct priv *priv, /* Duplicate filters are currently unsupported. */ mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter); if (mlx5_fdir_filter != NULL) { -#ifndef TREX_PATCH ERROR("filter already exists"); -#endif - return EEXIST; + return EINVAL; } /* Create new flow director filter. */ @@ -808,8 +843,9 @@ priv_fdir_filter_add(struct priv *priv, return err; } - /* Set queue. */ + /* Set action parameters. */ mlx5_fdir_filter->queue = fdir_filter->action.rx_queue; + mlx5_fdir_filter->behavior = fdir_filter->action.behavior; /* Convert to mlx5 filter descriptor. */ fdir_filter_to_flow_desc(fdir_filter, @@ -919,11 +955,9 @@ priv_fdir_filter_delete(struct priv *priv, return 0; } -#ifndef TREX_PATCH ERROR("%p: flow director delete failed, cannot find filter", (void *)priv); -#endif - return ENOENT; + return EINVAL; } /** @@ -1029,7 +1063,7 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg) { - int ret = -EINVAL; + int ret = EINVAL; struct priv *priv = dev->data->dev_private; switch (filter_type) { @@ -1044,5 +1078,5 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, break; } - return ret; + return -ret; } diff --git a/src/dpdk/drivers/net/mlx5/mlx5_mac.c b/src/dpdk/drivers/net/mlx5/mlx5_mac.c index f6b27bb8..4fcfd3b8 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_mac.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_mac.c @@ -44,22 +44,22 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ether.h> #include <rte_ethdev.h> #include <rte_common.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" diff --git a/src/dpdk/drivers/net/mlx5/mlx5_mr.c b/src/dpdk/drivers/net/mlx5/mlx5_mr.c index 67dfefa8..0a363846 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_mr.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_mr.c @@ -34,20 +34,20 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_mempool.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" diff --git a/src/dpdk/drivers/net/mlx5/mlx5_prm.h b/src/dpdk/drivers/net/mlx5/mlx5_prm.h index 5db219b3..8426adb3 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_prm.h +++ b/src/dpdk/drivers/net/mlx5/mlx5_prm.h @@ -37,13 +37,15 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/mlx5_hw.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif +#include "mlx5_autoconf.h" + /* Get CQE owner bit. */ #define MLX5_CQE_OWNER(op_own) ((op_own) & MLX5_CQE_OWNER_MASK) @@ -65,12 +67,44 @@ /* Maximum number of packets a multi-packet WQE can handle. */ #define MLX5_MPW_DSEG_MAX 5 -/* Room for inline data in regular work queue element. */ -#define MLX5_WQE64_INL_DATA 12 +/* WQE DWORD size */ +#define MLX5_WQE_DWORD_SIZE 16 + +/* WQE size */ +#define MLX5_WQE_SIZE (4 * MLX5_WQE_DWORD_SIZE) + +/* Compute the number of DS. */ +#define MLX5_WQE_DS(n) \ + (((n) + MLX5_WQE_DWORD_SIZE - 1) / MLX5_WQE_DWORD_SIZE) /* Room for inline data in multi-packet WQE. */ #define MLX5_MWQE64_INL_DATA 28 +//#ifndef HAVE_VERBS_MLX5_OPCODE_TSO +//#define MLX5_OPCODE_TSO MLX5_OPCODE_LSO_MPW /* Compat with OFED 3.3. */ +//#endif + +/* IPv4 packet. */ +#define MLX5_CQE_RX_IPV4_PACKET (1u << 2) + +/* IPv6 packet. */ +#define MLX5_CQE_RX_IPV6_PACKET (1u << 3) + +/* Outer IPv4 packet. */ +#define MLX5_CQE_RX_OUTER_IPV4_PACKET (1u << 7) + +/* Outer IPv6 packet. */ +#define MLX5_CQE_RX_OUTER_IPV6_PACKET (1u << 8) + +/* Tunnel packet bit in the CQE. */ +#define MLX5_CQE_RX_TUNNEL_PACKET (1u << 4) + +/* Outer IP checksum OK. */ +#define MLX5_CQE_RX_OUTER_IP_CSUM_OK (1u << 5) + +/* Outer UDP header and checksum OK. */ +#define MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK (1u << 6) + /* Subset of struct mlx5_wqe_eth_seg. */ struct mlx5_wqe_eth_seg_small { uint32_t rsvd0; @@ -79,59 +113,26 @@ struct mlx5_wqe_eth_seg_small { uint16_t mss; uint32_t rsvd2; uint16_t inline_hdr_sz; + uint8_t inline_hdr[2]; }; -/* Regular WQE. */ -struct mlx5_wqe_regular { - union { - struct mlx5_wqe_ctrl_seg ctrl; - uint32_t data[4]; - } ctrl; - struct mlx5_wqe_eth_seg eseg; - struct mlx5_wqe_data_seg dseg; -} __rte_aligned(64); - -/* Inline WQE. */ -struct mlx5_wqe_inl { - union { - struct mlx5_wqe_ctrl_seg ctrl; - uint32_t data[4]; - } ctrl; - struct mlx5_wqe_eth_seg eseg; +struct mlx5_wqe_inl_small { uint32_t byte_cnt; - uint8_t data[MLX5_WQE64_INL_DATA]; -} __rte_aligned(64); + uint8_t raw; +}; -/* Multi-packet WQE. */ -struct mlx5_wqe_mpw { - union { - struct mlx5_wqe_ctrl_seg ctrl; - uint32_t data[4]; - } ctrl; +/* Small common part of the WQE. */ +struct mlx5_wqe { + uint32_t ctrl[4]; struct mlx5_wqe_eth_seg_small eseg; - struct mlx5_wqe_data_seg dseg[2]; -} __rte_aligned(64); +}; -/* Multi-packet WQE with inline. */ -struct mlx5_wqe_mpw_inl { - union { - struct mlx5_wqe_ctrl_seg ctrl; - uint32_t data[4]; - } ctrl; - struct mlx5_wqe_eth_seg_small eseg; - uint32_t byte_cnt; - uint8_t data[MLX5_MWQE64_INL_DATA]; +/* WQE. */ +struct mlx5_wqe64 { + struct mlx5_wqe hdr; + uint8_t raw[32]; } __rte_aligned(64); -/* Union of all WQE types. */ -union mlx5_wqe { - struct mlx5_wqe_regular wqe; - struct mlx5_wqe_inl inl; - struct mlx5_wqe_mpw mpw; - struct mlx5_wqe_mpw_inl mpw_inl; - uint8_t data[64]; -}; - /* MPW session status. */ enum mlx5_mpw_state { MLX5_MPW_STATE_OPENED, @@ -145,7 +146,7 @@ struct mlx5_mpw { unsigned int pkts_n; unsigned int len; unsigned int total_len; - volatile union mlx5_wqe *wqe; + volatile struct mlx5_wqe *wqe; union { volatile struct mlx5_wqe_data_seg *dseg[MLX5_MPW_DSEG_MAX]; volatile uint8_t *raw; @@ -157,7 +158,21 @@ struct mlx5_cqe { #if (RTE_CACHE_LINE_SIZE == 128) uint8_t padding[64]; #endif - struct mlx5_cqe64 cqe64; + uint8_t pkt_info; + uint8_t rsvd0[11]; + uint32_t rx_hash_res; + uint8_t rx_hash_type; + uint8_t rsvd1[11]; + uint8_t hds_ip_ext; + uint8_t l4_hdr_type_etc; + uint16_t vlan_info; + uint8_t rsvd2[12]; + uint32_t byte_cnt; + uint64_t timestamp; + uint8_t rsvd3[4]; + uint16_t wqe_counter; + uint8_t rsvd4; + uint8_t op_own; }; #endif /* RTE_PMD_MLX5_PRM_H_ */ diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rss.c b/src/dpdk/drivers/net/mlx5/mlx5_rss.c index 639e935b..0bed74ee 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rss.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_rss.c @@ -40,21 +40,21 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_malloc.h> #include <rte_ethdev.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rxmode.c b/src/dpdk/drivers/net/mlx5/mlx5_rxmode.c index 8b585554..173e6e84 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rxmode.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_rxmode.c @@ -38,20 +38,20 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ethdev.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rxq.c b/src/dpdk/drivers/net/mlx5/mlx5_rxq.c index 6be01d39..28e93d3e 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rxq.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_rxq.c @@ -40,25 +40,25 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #include <infiniband/arch.h> #include <infiniband/mlx5_hw.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_mbuf.h> #include <rte_malloc.h> #include <rte_ethdev.h> #include <rte_common.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" @@ -102,7 +102,7 @@ const struct hash_rxq_init hash_rxq_init[] = { ETH_RSS_FRAG_IPV4), .flow_priority = 1, .flow_spec.ipv4 = { - .type = IBV_EXP_FLOW_SPEC_IPV4_EXT, + .type = IBV_EXP_FLOW_SPEC_IPV4, .size = sizeof(hash_rxq_init[0].flow_spec.ipv4), }, .underlayer = &hash_rxq_init[HASH_RXQ_ETH], @@ -140,7 +140,7 @@ const struct hash_rxq_init hash_rxq_init[] = { ETH_RSS_FRAG_IPV6), .flow_priority = 1, .flow_spec.ipv6 = { - .type = IBV_EXP_FLOW_SPEC_IPV6_EXT, + .type = IBV_EXP_FLOW_SPEC_IPV6, .size = sizeof(hash_rxq_init[0].flow_spec.ipv6), }, .underlayer = &hash_rxq_init[HASH_RXQ_ETH], @@ -723,7 +723,7 @@ rxq_free_elts(struct rxq_ctrl *rxq_ctrl) if (rxq_ctrl->rxq.elts == NULL) return; - for (i = 0; (i != rxq_ctrl->rxq.elts_n); ++i) { + for (i = 0; (i != (1u << rxq_ctrl->rxq.elts_n)); ++i) { if ((*rxq_ctrl->rxq.elts)[i] != NULL) rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]); (*rxq_ctrl->rxq.elts)[i] = NULL; @@ -745,6 +745,8 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl) DEBUG("cleaning up %p", (void *)rxq_ctrl); rxq_free_elts(rxq_ctrl); + if (rxq_ctrl->fdir_queue != NULL) + priv_fdir_queue_destroy(rxq_ctrl->priv, rxq_ctrl->fdir_queue); if (rxq_ctrl->if_wq != NULL) { assert(rxq_ctrl->priv != NULL); assert(rxq_ctrl->priv->ctx != NULL); @@ -805,7 +807,7 @@ rxq_cleanup(struct rxq_ctrl *rxq_ctrl) int rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl) { - unsigned int elts_n = rxq_ctrl->rxq.elts_n; + unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n; unsigned int i; struct ibv_exp_wq_attr mod; int err; @@ -868,7 +870,7 @@ rxq_setup(struct rxq_ctrl *tmpl) struct ibv_cq *ibcq = tmpl->cq; struct mlx5_cq *cq = to_mxxx(cq, cq); struct mlx5_rwq *rwq = container_of(tmpl->wq, struct mlx5_rwq, wq); - struct rte_mbuf *(*elts)[tmpl->rxq.elts_n] = + struct rte_mbuf *(*elts)[1 << tmpl->rxq.elts_n] = rte_calloc_socket("RXQ", 1, sizeof(*elts), 0, tmpl->socket); if (cq->cqe_sz != RTE_CACHE_LINE_SIZE) { @@ -879,7 +881,7 @@ rxq_setup(struct rxq_ctrl *tmpl) if (elts == NULL) return ENOMEM; tmpl->rxq.rq_db = rwq->rq.db; - tmpl->rxq.cqe_n = ibcq->cqe + 1; + tmpl->rxq.cqe_n = log2above(ibcq->cqe); tmpl->rxq.cq_ci = 0; tmpl->rxq.rq_ci = 0; tmpl->rxq.cq_db = cq->dbrec; @@ -922,8 +924,9 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, .priv = priv, .socket = socket, .rxq = { - .elts_n = desc, + .elts_n = log2above(desc), .mp = mp, + .rss_hash = priv->rxqs_n > 1, }, }; struct ibv_exp_wq_attr mod; @@ -943,6 +946,11 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, (void)conf; /* Thresholds configuration (ignored). */ /* Enable scattered packets support for this queue if necessary. */ assert(mb_len >= RTE_PKTMBUF_HEADROOM); + /* If smaller than MRU, multi-segment support must be enabled. */ + if (mb_len < (priv->mtu > dev->data->dev_conf.rxmode.max_rx_pkt_len ? + dev->data->dev_conf.rxmode.max_rx_pkt_len : + priv->mtu)) + dev->data->dev_conf.rxmode.jumbo_frame = 1; if ((dev->data->dev_conf.rxmode.jumbo_frame) && (dev->data->dev_conf.rxmode.max_rx_pkt_len > (mb_len - RTE_PKTMBUF_HEADROOM))) { @@ -1146,7 +1154,7 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl, } /* Reuse buffers from original queue if possible. */ if (rxq_ctrl->rxq.elts_n) { - assert(rxq_ctrl->rxq.elts_n == desc); + assert(1 << rxq_ctrl->rxq.elts_n == desc); assert(rxq_ctrl->rxq.elts != tmpl.rxq.elts); ret = rxq_alloc_elts(&tmpl, desc, rxq_ctrl->rxq.elts); } else @@ -1259,7 +1267,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, (void *)dev, (void *)rxq_ctrl); (*priv->rxqs)[idx] = &rxq_ctrl->rxq; /* Update receive callback. */ - dev->rx_pkt_burst = mlx5_rx_burst; + priv_select_rx_function(priv); } priv_unlock(priv); return -ret; diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c index c0bcfd03..b56c0a11 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c @@ -39,18 +39,18 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #include <infiniband/mlx5_hw.h> #include <infiniband/arch.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_mbuf.h> #include <rte_mempool.h> @@ -59,7 +59,7 @@ #include <rte_branch_prediction.h> #include <rte_ether.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" @@ -69,6 +69,8 @@ #include "mlx5_defs.h" #include "mlx5_prm.h" +//#define MLX5_OPCODE_TSO 0xe + #ifndef NDEBUG /** @@ -81,10 +83,10 @@ * 0 the first time. */ static inline int -check_cqe64_seen(volatile struct mlx5_cqe64 *cqe) +check_cqe_seen(volatile struct mlx5_cqe *cqe) { static const uint8_t magic[] = "seen"; - volatile uint8_t (*buf)[sizeof(cqe->rsvd40)] = &cqe->rsvd40; + volatile uint8_t (*buf)[sizeof(cqe->rsvd3)] = &cqe->rsvd3; int ret = 1; unsigned int i; @@ -99,9 +101,9 @@ check_cqe64_seen(volatile struct mlx5_cqe64 *cqe) #endif /* NDEBUG */ static inline int -check_cqe64(volatile struct mlx5_cqe64 *cqe, - unsigned int cqes_n, const uint16_t ci) - __attribute__((always_inline)); +check_cqe(volatile struct mlx5_cqe *cqe, + unsigned int cqes_n, const uint16_t ci) + __attribute__((always_inline)); /** * Check whether CQE is valid. @@ -117,8 +119,8 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe, * 0 on success, 1 on failure. */ static inline int -check_cqe64(volatile struct mlx5_cqe64 *cqe, - unsigned int cqes_n, const uint16_t ci) +check_cqe(volatile struct mlx5_cqe *cqe, + unsigned int cqes_n, const uint16_t ci) { uint16_t idx = ci & cqes_n; uint8_t op_own = cqe->op_own; @@ -136,14 +138,14 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe, if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) || (syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR)) return 0; - if (!check_cqe64_seen(cqe)) + if (!check_cqe_seen(cqe)) ERROR("unexpected CQE error %u (0x%02x)" " syndrome 0x%02x", op_code, op_code, syndrome); return 1; } else if ((op_code != MLX5_CQE_RESP_SEND) && (op_code != MLX5_CQE_REQ)) { - if (!check_cqe64_seen(cqe)) + if (!check_cqe_seen(cqe)) ERROR("unexpected CQE opcode %u (0x%02x)", op_code, op_code); return 1; @@ -152,6 +154,9 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe, return 0; } +static inline void +txq_complete(struct txq *txq) __attribute__((always_inline)); + /** * Manage TX completions. * @@ -160,34 +165,34 @@ check_cqe64(volatile struct mlx5_cqe64 *cqe, * @param txq * Pointer to TX queue structure. */ -static void +static inline void txq_complete(struct txq *txq) { - const unsigned int elts_n = txq->elts_n; - const unsigned int cqe_n = txq->cqe_n; + const unsigned int elts_n = 1 << txq->elts_n; + const unsigned int cqe_n = 1 << txq->cqe_n; const unsigned int cqe_cnt = cqe_n - 1; uint16_t elts_free = txq->elts_tail; uint16_t elts_tail; uint16_t cq_ci = txq->cq_ci; - volatile struct mlx5_cqe64 *cqe = NULL; - volatile union mlx5_wqe *wqe; + volatile struct mlx5_cqe *cqe = NULL; + volatile struct mlx5_wqe *wqe; do { - volatile struct mlx5_cqe64 *tmp; + volatile struct mlx5_cqe *tmp; - tmp = &(*txq->cqes)[cq_ci & cqe_cnt].cqe64; - if (check_cqe64(tmp, cqe_n, cq_ci)) + tmp = &(*txq->cqes)[cq_ci & cqe_cnt]; + if (check_cqe(tmp, cqe_n, cq_ci)) break; cqe = tmp; #ifndef NDEBUG if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) { - if (!check_cqe64_seen(cqe)) + if (!check_cqe_seen(cqe)) ERROR("unexpected compressed CQE, TX stopped"); return; } if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) || (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) { - if (!check_cqe64_seen(cqe)) + if (!check_cqe_seen(cqe)) ERROR("unexpected error CQE, TX stopped"); return; } @@ -196,9 +201,10 @@ txq_complete(struct txq *txq) } while (1); if (unlikely(cqe == NULL)) return; - wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)]; - elts_tail = wqe->wqe.ctrl.data[3]; - assert(elts_tail < txq->wqe_n); + wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & + ((1 << txq->wqe_n) - 1)].hdr; + elts_tail = wqe->ctrl[3]; + assert(elts_tail < (1 << txq->wqe_n)); /* Free buffers. */ while (elts_free != elts_tail) { struct rte_mbuf *elt = (*txq->elts)[elts_free]; @@ -284,235 +290,6 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp) } /** - * Write a regular WQE. - * - * @param txq - * Pointer to TX queue structure. - * @param wqe - * Pointer to the WQE to fill. - * @param addr - * Buffer data address. - * @param length - * Packet length. - * @param lkey - * Memory region lkey. - */ -static inline void -mlx5_wqe_write(struct txq *txq, volatile union mlx5_wqe *wqe, - uintptr_t addr, uint32_t length, uint32_t lkey) -{ - wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); - wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4); - wqe->wqe.ctrl.data[2] = 0; - wqe->wqe.ctrl.data[3] = 0; - wqe->inl.eseg.rsvd0 = 0; - wqe->inl.eseg.rsvd1 = 0; - wqe->inl.eseg.mss = 0; - wqe->inl.eseg.rsvd2 = 0; - wqe->wqe.eseg.inline_hdr_sz = htons(MLX5_ETH_INLINE_HEADER_SIZE); - /* Copy the first 16 bytes into inline header. */ - rte_memcpy((uint8_t *)(uintptr_t)wqe->wqe.eseg.inline_hdr_start, - (uint8_t *)(uintptr_t)addr, - MLX5_ETH_INLINE_HEADER_SIZE); - addr += MLX5_ETH_INLINE_HEADER_SIZE; - length -= MLX5_ETH_INLINE_HEADER_SIZE; - /* Store remaining data in data segment. */ - wqe->wqe.dseg.byte_count = htonl(length); - wqe->wqe.dseg.lkey = lkey; - wqe->wqe.dseg.addr = htonll(addr); - /* Increment consumer index. */ - ++txq->wqe_ci; -} - -/** - * Write a regular WQE with VLAN. - * - * @param txq - * Pointer to TX queue structure. - * @param wqe - * Pointer to the WQE to fill. - * @param addr - * Buffer data address. - * @param length - * Packet length. - * @param lkey - * Memory region lkey. - * @param vlan_tci - * VLAN field to insert in packet. - */ -static inline void -mlx5_wqe_write_vlan(struct txq *txq, volatile union mlx5_wqe *wqe, - uintptr_t addr, uint32_t length, uint32_t lkey, - uint16_t vlan_tci) -{ - uint32_t vlan = htonl(0x81000000 | vlan_tci); - - wqe->wqe.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); - wqe->wqe.ctrl.data[1] = htonl((txq->qp_num_8s) | 4); - wqe->wqe.ctrl.data[2] = 0; - wqe->wqe.ctrl.data[3] = 0; - wqe->inl.eseg.rsvd0 = 0; - wqe->inl.eseg.rsvd1 = 0; - wqe->inl.eseg.mss = 0; - wqe->inl.eseg.rsvd2 = 0; - wqe->wqe.eseg.inline_hdr_sz = htons(MLX5_ETH_VLAN_INLINE_HEADER_SIZE); - /* - * Copy 12 bytes of source & destination MAC address. - * Copy 4 bytes of VLAN. - * Copy 2 bytes of Ether type. - */ - rte_memcpy((uint8_t *)(uintptr_t)wqe->wqe.eseg.inline_hdr_start, - (uint8_t *)(uintptr_t)addr, 12); - rte_memcpy((uint8_t *)((uintptr_t)wqe->wqe.eseg.inline_hdr_start + 12), - &vlan, sizeof(vlan)); - rte_memcpy((uint8_t *)((uintptr_t)wqe->wqe.eseg.inline_hdr_start + 16), - (uint8_t *)((uintptr_t)addr + 12), 2); - addr += MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan); - length -= MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan); - /* Store remaining data in data segment. */ - wqe->wqe.dseg.byte_count = htonl(length); - wqe->wqe.dseg.lkey = lkey; - wqe->wqe.dseg.addr = htonll(addr); - /* Increment consumer index. */ - ++txq->wqe_ci; -} - -/** - * Write a inline WQE. - * - * @param txq - * Pointer to TX queue structure. - * @param wqe - * Pointer to the WQE to fill. - * @param addr - * Buffer data address. - * @param length - * Packet length. - * @param lkey - * Memory region lkey. - */ -static inline void -mlx5_wqe_write_inline(struct txq *txq, volatile union mlx5_wqe *wqe, - uintptr_t addr, uint32_t length) -{ - uint32_t size; - uint16_t wqe_cnt = txq->wqe_n - 1; - uint16_t wqe_ci = txq->wqe_ci + 1; - - /* Copy the first 16 bytes into inline header. */ - rte_memcpy((void *)(uintptr_t)wqe->inl.eseg.inline_hdr_start, - (void *)(uintptr_t)addr, - MLX5_ETH_INLINE_HEADER_SIZE); - addr += MLX5_ETH_INLINE_HEADER_SIZE; - length -= MLX5_ETH_INLINE_HEADER_SIZE; - size = 3 + ((4 + length + 15) / 16); - wqe->inl.byte_cnt = htonl(length | MLX5_INLINE_SEG); - rte_memcpy((void *)(uintptr_t)&wqe->inl.data[0], - (void *)addr, MLX5_WQE64_INL_DATA); - addr += MLX5_WQE64_INL_DATA; - length -= MLX5_WQE64_INL_DATA; - while (length) { - volatile union mlx5_wqe *wqe_next = - &(*txq->wqes)[wqe_ci & wqe_cnt]; - uint32_t copy_bytes = (length > sizeof(*wqe)) ? - sizeof(*wqe) : - length; - - rte_mov64((uint8_t *)(uintptr_t)&wqe_next->data[0], - (uint8_t *)addr); - addr += copy_bytes; - length -= copy_bytes; - ++wqe_ci; - } - assert(size < 64); - wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); - wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size); - wqe->inl.ctrl.data[2] = 0; - wqe->inl.ctrl.data[3] = 0; - wqe->inl.eseg.rsvd0 = 0; - wqe->inl.eseg.rsvd1 = 0; - wqe->inl.eseg.mss = 0; - wqe->inl.eseg.rsvd2 = 0; - wqe->inl.eseg.inline_hdr_sz = htons(MLX5_ETH_INLINE_HEADER_SIZE); - /* Increment consumer index. */ - txq->wqe_ci = wqe_ci; -} - -/** - * Write a inline WQE with VLAN. - * - * @param txq - * Pointer to TX queue structure. - * @param wqe - * Pointer to the WQE to fill. - * @param addr - * Buffer data address. - * @param length - * Packet length. - * @param lkey - * Memory region lkey. - * @param vlan_tci - * VLAN field to insert in packet. - */ -static inline void -mlx5_wqe_write_inline_vlan(struct txq *txq, volatile union mlx5_wqe *wqe, - uintptr_t addr, uint32_t length, uint16_t vlan_tci) -{ - uint32_t size; - uint32_t wqe_cnt = txq->wqe_n - 1; - uint16_t wqe_ci = txq->wqe_ci + 1; - uint32_t vlan = htonl(0x81000000 | vlan_tci); - - /* - * Copy 12 bytes of source & destination MAC address. - * Copy 4 bytes of VLAN. - * Copy 2 bytes of Ether type. - */ - rte_memcpy((uint8_t *)(uintptr_t)wqe->inl.eseg.inline_hdr_start, - (uint8_t *)addr, 12); - rte_memcpy((uint8_t *)(uintptr_t)wqe->inl.eseg.inline_hdr_start + 12, - &vlan, sizeof(vlan)); - rte_memcpy((uint8_t *)((uintptr_t)wqe->inl.eseg.inline_hdr_start + 16), - (uint8_t *)(addr + 12), 2); - addr += MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan); - length -= MLX5_ETH_VLAN_INLINE_HEADER_SIZE - sizeof(vlan); - size = (sizeof(wqe->inl.ctrl.ctrl) + - sizeof(wqe->inl.eseg) + - sizeof(wqe->inl.byte_cnt) + - length + 15) / 16; - wqe->inl.byte_cnt = htonl(length | MLX5_INLINE_SEG); - rte_memcpy((void *)(uintptr_t)&wqe->inl.data[0], - (void *)addr, MLX5_WQE64_INL_DATA); - addr += MLX5_WQE64_INL_DATA; - length -= MLX5_WQE64_INL_DATA; - while (length) { - volatile union mlx5_wqe *wqe_next = - &(*txq->wqes)[wqe_ci & wqe_cnt]; - uint32_t copy_bytes = (length > sizeof(*wqe)) ? - sizeof(*wqe) : - length; - - rte_mov64((uint8_t *)(uintptr_t)&wqe_next->data[0], - (uint8_t *)addr); - addr += copy_bytes; - length -= copy_bytes; - ++wqe_ci; - } - assert(size < 64); - wqe->inl.ctrl.data[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); - wqe->inl.ctrl.data[1] = htonl(txq->qp_num_8s | size); - wqe->inl.ctrl.data[2] = 0; - wqe->inl.ctrl.data[3] = 0; - wqe->inl.eseg.rsvd0 = 0; - wqe->inl.eseg.rsvd1 = 0; - wqe->inl.eseg.mss = 0; - wqe->inl.eseg.rsvd2 = 0; - wqe->inl.eseg.inline_hdr_sz = htons(MLX5_ETH_VLAN_INLINE_HEADER_SIZE); - /* Increment consumer index. */ - txq->wqe_ci = wqe_ci; -} - -/** * Ring TX queue doorbell. * * @param txq @@ -532,8 +309,8 @@ mlx5_tx_dbrec(struct txq *txq) *txq->qp_db = htonl(txq->wqe_ci); /* Ensure ordering between DB record and BF copy. */ rte_wmb(); - rte_mov16(dst, (uint8_t *)data); - txq->bf_offset ^= txq->bf_buf_size; + memcpy(dst, (uint8_t *)data, 16); + txq->bf_offset ^= (1 << txq->bf_buf_size); } /** @@ -547,9 +324,9 @@ mlx5_tx_dbrec(struct txq *txq) static inline void tx_prefetch_cqe(struct txq *txq, uint16_t ci) { - volatile struct mlx5_cqe64 *cqe; + volatile struct mlx5_cqe *cqe; - cqe = &(*txq->cqes)[ci & (txq->cqe_n - 1)].cqe64; + cqe = &(*txq->cqes)[ci & ((1 << txq->cqe_n) - 1)]; rte_prefetch0(cqe); } @@ -564,9 +341,9 @@ tx_prefetch_cqe(struct txq *txq, uint16_t ci) static inline void tx_prefetch_wqe(struct txq *txq, uint16_t ci) { - volatile union mlx5_wqe *wqe; + volatile struct mlx5_wqe64 *wqe; - wqe = &(*txq->wqes)[ci & (txq->wqe_n - 1)]; + wqe = &(*txq->wqes)[ci & ((1 << txq->wqe_n) - 1)]; rte_prefetch0(wqe); } @@ -588,12 +365,15 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct txq *txq = (struct txq *)dpdk_txq; uint16_t elts_head = txq->elts_head; - const unsigned int elts_n = txq->elts_n; + const unsigned int elts_n = 1 << txq->elts_n; unsigned int i = 0; unsigned int j = 0; unsigned int max; unsigned int comp; - volatile union mlx5_wqe *wqe = NULL; + volatile struct mlx5_wqe *wqe = NULL; + unsigned int segs_n = 0; + struct rte_mbuf *buf = NULL; + uint8_t *raw; if (unlikely(!pkts_n)) return 0; @@ -607,15 +387,17 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (max > elts_n) max -= elts_n; do { - struct rte_mbuf *buf = *(pkts++); - unsigned int elts_head_next; - uintptr_t addr; + volatile struct mlx5_wqe_data_seg *dseg = NULL; uint32_t length; - uint32_t lkey; - unsigned int segs_n = buf->nb_segs; - volatile struct mlx5_wqe_data_seg *dseg; - unsigned int ds = sizeof(*wqe) / 16; + unsigned int ds = 0; + uintptr_t addr; +#ifdef MLX5_PMD_SOFT_COUNTERS + uint32_t total_length = 0; +#endif + /* first_seg */ + buf = *(pkts++); + segs_n = buf->nb_segs; /* * Make sure there is enough room to store this packet and * that one ring entry remains unused. @@ -624,235 +406,180 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (max < segs_n + 1) break; max -= segs_n; - --pkts_n; - elts_head_next = (elts_head + 1) & (elts_n - 1); - wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)]; - dseg = &wqe->wqe.dseg; - rte_prefetch0(wqe); - if (pkts_n) + --segs_n; + if (!segs_n) + --pkts_n; + wqe = &(*txq->wqes)[txq->wqe_ci & + ((1 << txq->wqe_n) - 1)].hdr; + tx_prefetch_wqe(txq, txq->wqe_ci + 1); + if (pkts_n > 1) rte_prefetch0(*pkts); - /* Retrieve buffer information. */ addr = rte_pktmbuf_mtod(buf, uintptr_t); length = DATA_LEN(buf); +#ifdef MLX5_PMD_SOFT_COUNTERS + total_length = length; +#endif + assert(length >= MLX5_WQE_DWORD_SIZE); /* Update element. */ (*txq->elts)[elts_head] = buf; + elts_head = (elts_head + 1) & (elts_n - 1); /* Prefetch next buffer data. */ - if (pkts_n) - rte_prefetch0(rte_pktmbuf_mtod(*pkts, - volatile void *)); - /* Retrieve Memory Region key for this memory pool. */ - lkey = txq_mp2mr(txq, txq_mb2mp(buf)); - if (buf->ol_flags & PKT_TX_VLAN_PKT) - mlx5_wqe_write_vlan(txq, wqe, addr, length, lkey, - buf->vlan_tci); - else - mlx5_wqe_write(txq, wqe, addr, length, lkey); + if (pkts_n > 1) { + volatile void *pkt_addr; + + pkt_addr = rte_pktmbuf_mtod(*pkts, volatile void *); + rte_prefetch0(pkt_addr); + } /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - wqe->wqe.eseg.cs_flags = + wqe->eseg.cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; } else { - wqe->wqe.eseg.cs_flags = 0; + wqe->eseg.cs_flags = 0; + } + raw = (uint8_t *)(uintptr_t)&wqe->eseg.inline_hdr[0]; + /* Start the know and common part of the WQE structure. */ + wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); + wqe->ctrl[2] = 0; + wqe->ctrl[3] = 0; + wqe->eseg.rsvd0 = 0; + wqe->eseg.rsvd1 = 0; + wqe->eseg.mss = 0; + wqe->eseg.rsvd2 = 0; + /* Start by copying the Ethernet Header. */ + memcpy((uint8_t *)raw, ((uint8_t *)addr), 16); + length -= MLX5_WQE_DWORD_SIZE; + addr += MLX5_WQE_DWORD_SIZE; + /* Replace the Ethernet type by the VLAN if necessary. */ + if (buf->ol_flags & PKT_TX_VLAN_PKT) { + uint32_t vlan = htonl(0x81000000 | buf->vlan_tci); + + memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE - + sizeof(vlan)), + &vlan, sizeof(vlan)); + addr -= sizeof(vlan); + length += sizeof(vlan); } - while (--segs_n) { + /* Inline if enough room. */ + if (txq->max_inline != 0) { + uintptr_t end = + (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n]; + uint16_t max_inline = + txq->max_inline * RTE_CACHE_LINE_SIZE; + uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE; + uint16_t room; + + raw += MLX5_WQE_DWORD_SIZE; + room = end - (uintptr_t)raw; + if (room > max_inline) { + uintptr_t addr_end = (addr + max_inline) & + ~(RTE_CACHE_LINE_SIZE - 1); + uint16_t copy_b = ((addr_end - addr) > length) ? + length : + (addr_end - addr); + + rte_memcpy((void *)raw, (void *)addr, copy_b); + addr += copy_b; + length -= copy_b; + pkt_inline_sz += copy_b; + /* Sanity check. */ + assert(addr <= addr_end); + } + /* Store the inlined packet size in the WQE. */ + wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz); + /* + * 2 DWORDs consumed by the WQE header + 1 DSEG + + * the size of the inline part of the packet. + */ + ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2); + if (length > 0) { + dseg = (struct mlx5_wqe_data_seg *) + ((uintptr_t)wqe + + (ds * MLX5_WQE_DWORD_SIZE)); + if ((uintptr_t)dseg >= end) + dseg = (struct mlx5_wqe_data_seg *) + ((uintptr_t)&(*txq->wqes)[0]); + goto use_dseg; + } else if (!segs_n) { + goto next_pkt; + } else { + goto next_seg; + } + } else { /* - * Spill on next WQE when the current one does not have - * enough room left. Size of WQE must a be a multiple - * of data segment size. + * No inline has been done in the packet, only the + * Ethernet Header as been stored. */ - assert(!(sizeof(*wqe) % sizeof(*dseg))); - if (!(ds % (sizeof(*wqe) / 16))) - dseg = (volatile void *) - &(*txq->wqes)[txq->wqe_ci++ & - (txq->wqe_n - 1)]; - else - ++dseg; + wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE); + dseg = (struct mlx5_wqe_data_seg *) + ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE)); + ds = 3; +use_dseg: + /* Add the remaining packet as a simple ds. */ + *dseg = (struct mlx5_wqe_data_seg) { + .addr = htonll(addr), + .byte_count = htonl(length), + .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + }; ++ds; - buf = buf->next; - assert(buf); - /* Store segment information. */ - dseg->byte_count = htonl(DATA_LEN(buf)); - dseg->lkey = txq_mp2mr(txq, txq_mb2mp(buf)); - dseg->addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)); - (*txq->elts)[elts_head_next] = buf; - elts_head_next = (elts_head_next + 1) & (elts_n - 1); -#ifdef MLX5_PMD_SOFT_COUNTERS - length += DATA_LEN(buf); -#endif - ++j; + if (!segs_n) + goto next_pkt; } - /* Update DS field in WQE. */ - wqe->wqe.ctrl.data[1] &= htonl(0xffffffc0); - wqe->wqe.ctrl.data[1] |= htonl(ds & 0x3f); - elts_head = elts_head_next; -#ifdef MLX5_PMD_SOFT_COUNTERS - /* Increment sent bytes counter. */ - txq->stats.obytes += length; -#endif - elts_head = elts_head_next; - ++i; - } while (pkts_n); - /* Take a shortcut if nothing must be sent. */ - if (unlikely(i == 0)) - return 0; - /* Check whether completion threshold has been reached. */ - comp = txq->elts_comp + i + j; - if (comp >= MLX5_TX_COMP_THRESH) { - /* Request completion on last WQE. */ - wqe->wqe.ctrl.data[2] = htonl(8); - /* Save elts_head in unused "immediate" field of WQE. */ - wqe->wqe.ctrl.data[3] = elts_head; - txq->elts_comp = 0; - } else { - txq->elts_comp = comp; - } -#ifdef MLX5_PMD_SOFT_COUNTERS - /* Increment sent packets counter. */ - txq->stats.opackets += i; -#endif - /* Ring QP doorbell. */ - mlx5_tx_dbrec(txq); - txq->elts_head = elts_head; - return i; -} - -/** - * DPDK callback for TX with inline support. - * - * @param dpdk_txq - * Generic pointer to TX queue structure. - * @param[in] pkts - * Packets to transmit. - * @param pkts_n - * Number of packets in array. - * - * @return - * Number of packets successfully transmitted (<= pkts_n). - */ -uint16_t -mlx5_tx_burst_inline(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) -{ - struct txq *txq = (struct txq *)dpdk_txq; - uint16_t elts_head = txq->elts_head; - const unsigned int elts_n = txq->elts_n; - unsigned int i = 0; - unsigned int j = 0; - unsigned int max; - unsigned int comp; - volatile union mlx5_wqe *wqe = NULL; - unsigned int max_inline = txq->max_inline; - - if (unlikely(!pkts_n)) - return 0; - /* Prefetch first packet cacheline. */ - tx_prefetch_cqe(txq, txq->cq_ci); - tx_prefetch_cqe(txq, txq->cq_ci + 1); - rte_prefetch0(*pkts); - /* Start processing. */ - txq_complete(txq); - max = (elts_n - (elts_head - txq->elts_tail)); - if (max > elts_n) - max -= elts_n; - do { - struct rte_mbuf *buf = *(pkts++); - unsigned int elts_head_next; - uintptr_t addr; - uint32_t length; - uint32_t lkey; - unsigned int segs_n = buf->nb_segs; - volatile struct mlx5_wqe_data_seg *dseg; - unsigned int ds = sizeof(*wqe) / 16; - +next_seg: + assert(buf); + assert(ds); + assert(wqe); /* - * Make sure there is enough room to store this packet and - * that one ring entry remains unused. + * Spill on next WQE when the current one does not have + * enough room left. Size of WQE must a be a multiple + * of data segment size. */ - assert(segs_n); - if (max < segs_n + 1) - break; - max -= segs_n; - --pkts_n; - elts_head_next = (elts_head + 1) & (elts_n - 1); - wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)]; - dseg = &wqe->wqe.dseg; - tx_prefetch_wqe(txq, txq->wqe_ci); - tx_prefetch_wqe(txq, txq->wqe_ci + 1); - if (pkts_n) - rte_prefetch0(*pkts); - /* Should we enable HW CKSUM offload */ - if (buf->ol_flags & - (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - wqe->inl.eseg.cs_flags = - MLX5_ETH_WQE_L3_CSUM | - MLX5_ETH_WQE_L4_CSUM; - } else { - wqe->inl.eseg.cs_flags = 0; + assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE)); + if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) { + unsigned int n = (txq->wqe_ci + ((ds + 3) / 4)) & + ((1 << txq->wqe_n) - 1); + + dseg = (struct mlx5_wqe_data_seg *) + ((uintptr_t)&(*txq->wqes)[n]); + tx_prefetch_wqe(txq, n + 1); + } else if (!dseg) { + dseg = (struct mlx5_wqe_data_seg *) + ((uintptr_t)wqe + + (ds * MLX5_WQE_DWORD_SIZE)); + } else { + ++dseg; } - /* Retrieve buffer information. */ - addr = rte_pktmbuf_mtod(buf, uintptr_t); + ++ds; + buf = buf->next; + assert(buf); length = DATA_LEN(buf); - /* Update element. */ - (*txq->elts)[elts_head] = buf; - /* Prefetch next buffer data. */ - if (pkts_n) - rte_prefetch0(rte_pktmbuf_mtod(*pkts, - volatile void *)); - if ((length <= max_inline) && (segs_n == 1)) { - if (buf->ol_flags & PKT_TX_VLAN_PKT) - mlx5_wqe_write_inline_vlan(txq, wqe, - addr, length, - buf->vlan_tci); - else - mlx5_wqe_write_inline(txq, wqe, addr, length); - goto skip_segs; - } else { - /* Retrieve Memory Region key for this memory pool. */ - lkey = txq_mp2mr(txq, txq_mb2mp(buf)); - if (buf->ol_flags & PKT_TX_VLAN_PKT) - mlx5_wqe_write_vlan(txq, wqe, addr, length, - lkey, buf->vlan_tci); - else - mlx5_wqe_write(txq, wqe, addr, length, lkey); - } - while (--segs_n) { - /* - * Spill on next WQE when the current one does not have - * enough room left. Size of WQE must a be a multiple - * of data segment size. - */ - assert(!(sizeof(*wqe) % sizeof(*dseg))); - if (!(ds % (sizeof(*wqe) / 16))) - dseg = (volatile void *) - &(*txq->wqes)[txq->wqe_ci++ & - (txq->wqe_n - 1)]; - else - ++dseg; - ++ds; - buf = buf->next; - assert(buf); - /* Store segment information. */ - dseg->byte_count = htonl(DATA_LEN(buf)); - dseg->lkey = txq_mp2mr(txq, txq_mb2mp(buf)); - dseg->addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)); - (*txq->elts)[elts_head_next] = buf; - elts_head_next = (elts_head_next + 1) & (elts_n - 1); #ifdef MLX5_PMD_SOFT_COUNTERS - length += DATA_LEN(buf); + total_length += length; #endif - ++j; - } - /* Update DS field in WQE. */ - wqe->inl.ctrl.data[1] &= htonl(0xffffffc0); - wqe->inl.ctrl.data[1] |= htonl(ds & 0x3f); -skip_segs: - elts_head = elts_head_next; + /* Store segment information. */ + *dseg = (struct mlx5_wqe_data_seg) { + .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)), + .byte_count = htonl(length), + .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + }; + (*txq->elts)[elts_head] = buf; + elts_head = (elts_head + 1) & (elts_n - 1); + ++j; + --segs_n; + if (segs_n) + goto next_seg; + else + --pkts_n; +next_pkt: + ++i; + wqe->ctrl[1] = htonl(txq->qp_num_8s | ds); + txq->wqe_ci += (ds + 3) / 4; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ - txq->stats.obytes += length; + txq->stats.obytes += total_length; #endif - ++i; } while (pkts_n); /* Take a shortcut if nothing must be sent. */ if (unlikely(i == 0)) @@ -861,9 +588,9 @@ skip_segs: comp = txq->elts_comp + i + j; if (comp >= MLX5_TX_COMP_THRESH) { /* Request completion on last WQE. */ - wqe->inl.ctrl.data[2] = htonl(8); + wqe->ctrl[2] = htonl(8); /* Save elts_head in unused "immediate" field of WQE. */ - wqe->inl.ctrl.data[3] = elts_head; + wqe->ctrl[3] = elts_head; txq->elts_comp = 0; } else { txq->elts_comp = comp; @@ -891,28 +618,29 @@ skip_segs: static inline void mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length) { - uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1); + uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] = (volatile struct mlx5_wqe_data_seg (*)[]) - (uintptr_t)&(*txq->wqes)[(idx + 1) & (txq->wqe_n - 1)]; + (uintptr_t)&(*txq->wqes)[(idx + 1) & ((1 << txq->wqe_n) - 1)]; mpw->state = MLX5_MPW_STATE_OPENED; mpw->pkts_n = 0; mpw->len = length; mpw->total_len = 0; - mpw->wqe = &(*txq->wqes)[idx]; - mpw->wqe->mpw.eseg.mss = htons(length); - mpw->wqe->mpw.eseg.inline_hdr_sz = 0; - mpw->wqe->mpw.eseg.rsvd0 = 0; - mpw->wqe->mpw.eseg.rsvd1 = 0; - mpw->wqe->mpw.eseg.rsvd2 = 0; - mpw->wqe->mpw.ctrl.data[0] = htonl((MLX5_OPC_MOD_MPW << 24) | - (txq->wqe_ci << 8) | - MLX5_OPCODE_TSO); - mpw->wqe->mpw.ctrl.data[2] = 0; - mpw->wqe->mpw.ctrl.data[3] = 0; - mpw->data.dseg[0] = &mpw->wqe->mpw.dseg[0]; - mpw->data.dseg[1] = &mpw->wqe->mpw.dseg[1]; + mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr; + mpw->wqe->eseg.mss = htons(length); + mpw->wqe->eseg.inline_hdr_sz = 0; + mpw->wqe->eseg.rsvd0 = 0; + mpw->wqe->eseg.rsvd1 = 0; + mpw->wqe->eseg.rsvd2 = 0; + mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) | + (txq->wqe_ci << 8) | MLX5_OPCODE_TSO); + mpw->wqe->ctrl[2] = 0; + mpw->wqe->ctrl[3] = 0; + mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *) + (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); + mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *) + (((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE)); mpw->data.dseg[2] = &(*dseg)[0]; mpw->data.dseg[3] = &(*dseg)[1]; mpw->data.dseg[4] = &(*dseg)[2]; @@ -935,7 +663,7 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw) * Store size in multiple of 16 bytes. Control and Ethernet segments * count as 2. */ - mpw->wqe->mpw.ctrl.data[1] = htonl(txq->qp_num_8s | (2 + num)); + mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | (2 + num)); mpw->state = MLX5_MPW_STATE_CLOSED; if (num < 3) ++txq->wqe_ci; @@ -963,7 +691,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct txq *txq = (struct txq *)dpdk_txq; uint16_t elts_head = txq->elts_head; - const unsigned int elts_n = txq->elts_n; + const unsigned int elts_n = 1 << txq->elts_n; unsigned int i = 0; unsigned int j = 0; unsigned int max; @@ -1013,11 +741,11 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if ((mpw.state == MLX5_MPW_STATE_OPENED) && ((mpw.len != length) || (segs_n != 1) || - (mpw.wqe->mpw.eseg.cs_flags != cs_flags))) + (mpw.wqe->eseg.cs_flags != cs_flags))) mlx5_mpw_close(txq, &mpw); if (mpw.state == MLX5_MPW_STATE_CLOSED) { mlx5_mpw_new(txq, &mpw, length); - mpw.wqe->mpw.eseg.cs_flags = cs_flags; + mpw.wqe->eseg.cs_flags = cs_flags; } /* Multi-segment packets must be alone in their MPW. */ assert((segs_n == 1) || (mpw.pkts_n == 0)); @@ -1063,12 +791,12 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* "j" includes both packets and segments. */ comp = txq->elts_comp + j; if (comp >= MLX5_TX_COMP_THRESH) { - volatile union mlx5_wqe *wqe = mpw.wqe; + volatile struct mlx5_wqe *wqe = mpw.wqe; /* Request completion on last WQE. */ - wqe->mpw.ctrl.data[2] = htonl(8); + wqe->ctrl[2] = htonl(8); /* Save elts_head in unused "immediate" field of WQE. */ - wqe->mpw.ctrl.data[3] = elts_head; + wqe->ctrl[3] = elts_head; txq->elts_comp = 0; } else { txq->elts_comp = comp; @@ -1098,25 +826,28 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) static inline void mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length) { - uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1); + uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); + struct mlx5_wqe_inl_small *inl; mpw->state = MLX5_MPW_INL_STATE_OPENED; mpw->pkts_n = 0; mpw->len = length; mpw->total_len = 0; - mpw->wqe = &(*txq->wqes)[idx]; - mpw->wqe->mpw_inl.ctrl.data[0] = htonl((MLX5_OPC_MOD_MPW << 24) | - (txq->wqe_ci << 8) | - MLX5_OPCODE_TSO); - mpw->wqe->mpw_inl.ctrl.data[2] = 0; - mpw->wqe->mpw_inl.ctrl.data[3] = 0; - mpw->wqe->mpw_inl.eseg.mss = htons(length); - mpw->wqe->mpw_inl.eseg.inline_hdr_sz = 0; - mpw->wqe->mpw_inl.eseg.cs_flags = 0; - mpw->wqe->mpw_inl.eseg.rsvd0 = 0; - mpw->wqe->mpw_inl.eseg.rsvd1 = 0; - mpw->wqe->mpw_inl.eseg.rsvd2 = 0; - mpw->data.raw = &mpw->wqe->mpw_inl.data[0]; + mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr; + mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) | + (txq->wqe_ci << 8) | + MLX5_OPCODE_TSO); + mpw->wqe->ctrl[2] = 0; + mpw->wqe->ctrl[3] = 0; + mpw->wqe->eseg.mss = htons(length); + mpw->wqe->eseg.inline_hdr_sz = 0; + mpw->wqe->eseg.cs_flags = 0; + mpw->wqe->eseg.rsvd0 = 0; + mpw->wqe->eseg.rsvd1 = 0; + mpw->wqe->eseg.rsvd2 = 0; + inl = (struct mlx5_wqe_inl_small *) + (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE); + mpw->data.raw = (uint8_t *)&inl->raw; } /** @@ -1131,17 +862,18 @@ static inline void mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw) { unsigned int size; + struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *) + (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); - size = sizeof(*mpw->wqe) - MLX5_MWQE64_INL_DATA + mpw->total_len; + size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len; /* * Store size in multiple of 16 bytes. Control and Ethernet segments * count as 2. */ - mpw->wqe->mpw_inl.ctrl.data[1] = - htonl(txq->qp_num_8s | ((size + 15) / 16)); + mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(size)); mpw->state = MLX5_MPW_STATE_CLOSED; - mpw->wqe->mpw_inl.byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG); - txq->wqe_ci += (size + (sizeof(*mpw->wqe) - 1)) / sizeof(*mpw->wqe); + inl->byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG); + txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; } /** @@ -1163,12 +895,12 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, { struct txq *txq = (struct txq *)dpdk_txq; uint16_t elts_head = txq->elts_head; - const unsigned int elts_n = txq->elts_n; + const unsigned int elts_n = 1 << txq->elts_n; unsigned int i = 0; unsigned int j = 0; unsigned int max; unsigned int comp; - unsigned int inline_room = txq->max_inline; + unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE; struct mlx5_mpw mpw = { .state = MLX5_MPW_STATE_CLOSED, }; @@ -1214,31 +946,33 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, if (mpw.state == MLX5_MPW_STATE_OPENED) { if ((mpw.len != length) || (segs_n != 1) || - (mpw.wqe->mpw.eseg.cs_flags != cs_flags)) + (mpw.wqe->eseg.cs_flags != cs_flags)) mlx5_mpw_close(txq, &mpw); } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) { if ((mpw.len != length) || (segs_n != 1) || (length > inline_room) || - (mpw.wqe->mpw_inl.eseg.cs_flags != cs_flags)) { + (mpw.wqe->eseg.cs_flags != cs_flags)) { mlx5_mpw_inline_close(txq, &mpw); - inline_room = txq->max_inline; + inline_room = + txq->max_inline * RTE_CACHE_LINE_SIZE; } } if (mpw.state == MLX5_MPW_STATE_CLOSED) { if ((segs_n != 1) || (length > inline_room)) { mlx5_mpw_new(txq, &mpw, length); - mpw.wqe->mpw.eseg.cs_flags = cs_flags; + mpw.wqe->eseg.cs_flags = cs_flags; } else { mlx5_mpw_inline_new(txq, &mpw, length); - mpw.wqe->mpw_inl.eseg.cs_flags = cs_flags; + mpw.wqe->eseg.cs_flags = cs_flags; } } /* Multi-segment packets must be alone in their MPW. */ assert((segs_n == 1) || (mpw.pkts_n == 0)); if (mpw.state == MLX5_MPW_STATE_OPENED) { - assert(inline_room == txq->max_inline); + assert(inline_room == + txq->max_inline * RTE_CACHE_LINE_SIZE); #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) length = 0; #endif @@ -1277,7 +1011,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, addr = rte_pktmbuf_mtod(buf, uintptr_t); (*txq->elts)[elts_head] = buf; /* Maximum number of bytes before wrapping. */ - max = ((uintptr_t)&(*txq->wqes)[txq->wqe_n] - + max = ((uintptr_t)&(*txq->wqes)[1 << txq->wqe_n] - (uintptr_t)mpw.data.raw); if (length > max) { rte_memcpy((void *)(uintptr_t)mpw.data.raw, @@ -1296,14 +1030,15 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, mpw.data.raw += length; } if ((uintptr_t)mpw.data.raw == - (uintptr_t)&(*txq->wqes)[txq->wqe_n]) + (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n]) mpw.data.raw = (volatile void *)&(*txq->wqes)[0]; ++mpw.pkts_n; ++j; if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) { mlx5_mpw_inline_close(txq, &mpw); - inline_room = txq->max_inline; + inline_room = + txq->max_inline * RTE_CACHE_LINE_SIZE; } else { inline_room -= length; } @@ -1323,12 +1058,12 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, /* "j" includes both packets and segments. */ comp = txq->elts_comp + j; if (comp >= MLX5_TX_COMP_THRESH) { - volatile union mlx5_wqe *wqe = mpw.wqe; + volatile struct mlx5_wqe *wqe = mpw.wqe; /* Request completion on last WQE. */ - wqe->mpw_inl.ctrl.data[2] = htonl(8); + wqe->ctrl[2] = htonl(8); /* Save elts_head in unused "immediate" field of WQE. */ - wqe->mpw_inl.ctrl.data[3] = elts_head; + wqe->ctrl[3] = elts_head; txq->elts_comp = 0; } else { txq->elts_comp = comp; @@ -1359,25 +1094,24 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, * Packet type for struct rte_mbuf. */ static inline uint32_t -rxq_cq_to_pkt_type(volatile struct mlx5_cqe64 *cqe) +rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe) { uint32_t pkt_type; uint8_t flags = cqe->l4_hdr_type_etc; - uint8_t info = cqe->rsvd0[0]; - if (info & IBV_EXP_CQ_RX_TUNNEL_PACKET) + if (cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) pkt_type = TRANSPOSE(flags, - IBV_EXP_CQ_RX_OUTER_IPV4_PACKET, + MLX5_CQE_RX_OUTER_IPV4_PACKET, RTE_PTYPE_L3_IPV4) | TRANSPOSE(flags, - IBV_EXP_CQ_RX_OUTER_IPV6_PACKET, + MLX5_CQE_RX_OUTER_IPV6_PACKET, RTE_PTYPE_L3_IPV6) | TRANSPOSE(flags, - IBV_EXP_CQ_RX_IPV4_PACKET, + MLX5_CQE_RX_IPV4_PACKET, RTE_PTYPE_INNER_L3_IPV4) | TRANSPOSE(flags, - IBV_EXP_CQ_RX_IPV6_PACKET, + MLX5_CQE_RX_IPV6_PACKET, RTE_PTYPE_INNER_L3_IPV6); else pkt_type = @@ -1399,14 +1133,16 @@ rxq_cq_to_pkt_type(volatile struct mlx5_cqe64 *cqe) * Pointer to RX queue. * @param cqe * CQE to process. + * @param[out] rss_hash + * Packet RSS Hash result. * * @return * Packet size in bytes (0 if there is none), -1 in case of completion * with error. */ static inline int -mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe, - uint16_t cqe_cnt) +mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, + uint16_t cqe_cnt, uint32_t *rss_hash) { struct rxq_zip *zip = &rxq->zip; uint16_t cqe_n = cqe_cnt + 1; @@ -1416,9 +1152,10 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe, if (zip->ai) { volatile struct mlx5_mini_cqe8 (*mc)[8] = (volatile struct mlx5_mini_cqe8 (*)[8]) - (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].cqe64); + (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt]); len = ntohl((*mc)[zip->ai & 7].byte_cnt); + *rss_hash = ntohl((*mc)[zip->ai & 7].rx_hash_result); if ((++zip->ai & 7) == 0) { /* * Increment consumer index to skip the number of @@ -1433,7 +1170,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe, uint16_t end = zip->cq_ci; while (idx != end) { - (*rxq->cqes)[idx & cqe_cnt].cqe64.op_own = + (*rxq->cqes)[idx & cqe_cnt].op_own = MLX5_CQE_INVALIDATE; ++idx; } @@ -1445,7 +1182,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe, int ret; int8_t op_own; - ret = check_cqe64(cqe, cqe_n, rxq->cq_ci); + ret = check_cqe(cqe, cqe_n, rxq->cq_ci); if (unlikely(ret == 1)) return 0; ++rxq->cq_ci; @@ -1454,7 +1191,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe, volatile struct mlx5_mini_cqe8 (*mc)[8] = (volatile struct mlx5_mini_cqe8 (*)[8]) (uintptr_t)(&(*rxq->cqes)[rxq->cq_ci & - cqe_cnt].cqe64); + cqe_cnt]); /* Fix endianness. */ zip->cqe_cnt = ntohl(cqe->byte_cnt); @@ -1473,9 +1210,11 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe, zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; /* Get packet size to return. */ len = ntohl((*mc)[0].byte_cnt); + *rss_hash = ntohl((*mc)[0].rx_hash_result); zip->ai = 1; } else { len = ntohl(cqe->byte_cnt); + *rss_hash = ntohl(cqe->rx_hash_res); } /* Error while receiving packet. */ if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR)) @@ -1496,38 +1235,32 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe, * Offload flags (ol_flags) for struct rte_mbuf. */ static inline uint32_t -rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe) +rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe) { uint32_t ol_flags = 0; uint8_t l3_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L3_HDR_TYPE_MASK; uint8_t l4_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L4_HDR_TYPE_MASK; - uint8_t info = cqe->rsvd0[0]; if ((l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV4) || (l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV6)) - ol_flags |= - (!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) * - PKT_RX_IP_CKSUM_BAD); + ol_flags |= TRANSPOSE(cqe->hds_ip_ext, + MLX5_CQE_L3_OK, + PKT_RX_IP_CKSUM_GOOD); if ((l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP) || (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_EMP_ACK) || (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_ACK) || (l4_hdr == MLX5_CQE_L4_HDR_TYPE_UDP)) + ol_flags |= TRANSPOSE(cqe->hds_ip_ext, + MLX5_CQE_L4_OK, + PKT_RX_L4_CKSUM_GOOD); + if ((cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) && (rxq->csum_l2tun)) ol_flags |= - (!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) * - PKT_RX_L4_CKSUM_BAD); - /* - * PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place - * of PKT_RX_EIP_CKSUM_BAD because the latter is not functional - * (its value is 0). - */ - if ((info & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun)) - ol_flags |= - TRANSPOSE(~cqe->l4_hdr_type_etc, - IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK, - PKT_RX_IP_CKSUM_BAD) | - TRANSPOSE(~cqe->l4_hdr_type_etc, - IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK, - PKT_RX_L4_CKSUM_BAD); + TRANSPOSE(cqe->l4_hdr_type_etc, + MLX5_CQE_RX_OUTER_IP_CSUM_OK, + PKT_RX_IP_CKSUM_GOOD) | + TRANSPOSE(cqe->l4_hdr_type_etc, + MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK, + PKT_RX_L4_CKSUM_GOOD); return ol_flags; } @@ -1548,21 +1281,22 @@ uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) { struct rxq *rxq = dpdk_rxq; - const unsigned int wqe_cnt = rxq->elts_n - 1; - const unsigned int cqe_cnt = rxq->cqe_n - 1; + const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; + const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; const unsigned int sges_n = rxq->sges_n; struct rte_mbuf *pkt = NULL; struct rte_mbuf *seg = NULL; - volatile struct mlx5_cqe64 *cqe = - &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64; + volatile struct mlx5_cqe *cqe = + &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; unsigned int i = 0; unsigned int rq_ci = rxq->rq_ci << sges_n; - int len; + int len; /* keep its value across iterations. */ while (pkts_n) { unsigned int idx = rq_ci & wqe_cnt; volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx]; struct rte_mbuf *rep = (*rxq->elts)[idx]; + uint32_t rss_hash_res = 0; if (pkt) NEXT(seg) = rep; @@ -1572,6 +1306,14 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) rte_prefetch0(wqe); rep = rte_mbuf_raw_alloc(rxq->mp); if (unlikely(rep == NULL)) { + ++rxq->stats.rx_nombuf; + if (!pkt) { + /* + * no buffers before we even started, + * bail out silently. + */ + break; + } while (pkt != seg) { assert(pkt != (*rxq->elts)[idx]); seg = NEXT(pkt); @@ -1579,13 +1321,13 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) __rte_mbuf_raw_free(pkt); pkt = seg; } - ++rxq->stats.rx_nombuf; break; } if (!pkt) { - cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64; - len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt); - if (len == 0) { + cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; + len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, + &rss_hash_res); + if (!len) { rte_mbuf_refcnt_set(rep, 0); __rte_mbuf_raw_free(rep); break; @@ -1602,12 +1344,16 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Update packet information. */ pkt->packet_type = 0; pkt->ol_flags = 0; + if (rxq->rss_hash) { + pkt->hash.rss = rss_hash_res; + pkt->ol_flags = PKT_RX_RSS_HASH; + } if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip | rxq->crc_present) { if (rxq->csum) { pkt->packet_type = rxq_cq_to_pkt_type(cqe); - pkt->ol_flags = + pkt->ol_flags |= rxq_cq_to_ol_flags(rxq, cqe); } if (cqe->l4_hdr_type_etc & diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h index d87dd19b..5579f89c 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h +++ b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h @@ -40,22 +40,23 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #include <infiniband/mlx5_hw.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_mbuf.h> #include <rte_mempool.h> +#include <rte_common.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5_utils.h" @@ -87,6 +88,8 @@ struct mlx5_txq_stats { struct fdir_queue { struct ibv_qp *qp; /* Associated RX QP. */ struct ibv_exp_rwq_ind_table *ind_table; /* Indirection table. */ + struct ibv_exp_wq *wq; /* Work queue. */ + struct ibv_cq *cq; /* Completion queue. */ }; struct priv; @@ -107,16 +110,18 @@ struct rxq { unsigned int vlan_strip:1; /* Enable VLAN stripping. */ unsigned int crc_present:1; /* CRC must be subtracted. */ unsigned int sges_n:2; /* Log 2 of SGEs (max buffers per packet). */ + unsigned int cqe_n:4; /* Log 2 of CQ elements. */ + unsigned int elts_n:4; /* Log 2 of Mbufs. */ + unsigned int port_id:8; + unsigned int rss_hash:1; /* RSS hash result is enabled. */ + unsigned int :9; /* Remaining bits. */ + volatile uint32_t *rq_db; + volatile uint32_t *cq_db; uint16_t rq_ci; uint16_t cq_ci; - uint16_t elts_n; - uint16_t cqe_n; /* Number of CQ elements. */ - uint16_t port_id; volatile struct mlx5_wqe_data_seg(*wqes)[]; volatile struct mlx5_cqe(*cqes)[]; struct rxq_zip zip; /* Compressed context. */ - volatile uint32_t *rq_db; - volatile uint32_t *cq_db; struct rte_mbuf *(*elts)[]; struct rte_mempool *mp; struct mlx5_rxq_stats stats; @@ -128,7 +133,7 @@ struct rxq_ctrl { struct ibv_cq *cq; /* Completion Queue. */ struct ibv_exp_wq *wq; /* Work Queue. */ struct ibv_exp_res_domain *rd; /* Resource Domain. */ - struct fdir_queue fdir_queue; /* Flow director queue. */ + struct fdir_queue *fdir_queue; /* Flow director queue. */ struct ibv_mr *mr; /* Memory Region (for mp). */ struct ibv_exp_wq_family *if_wq; /* WQ burst interface. */ struct ibv_exp_cq_family_v1 *if_cq; /* CQ interface. */ @@ -173,8 +178,8 @@ struct hash_rxq_init { uint16_t size; } hdr; struct ibv_exp_flow_spec_tcp_udp tcp_udp; - struct ibv_exp_flow_spec_ipv4_ext ipv4; - struct ibv_exp_flow_spec_ipv6_ext ipv6; + struct ibv_exp_flow_spec_ipv4 ipv4; + struct ibv_exp_flow_spec_ipv6 ipv6; struct ibv_exp_flow_spec_eth eth; } flow_spec; /* Flow specification template. */ const struct hash_rxq_init *underlayer; /* Pointer to underlayer. */ @@ -235,22 +240,30 @@ struct hash_rxq { [MLX5_MAX_SPECIAL_FLOWS][MLX5_MAX_VLAN_IDS]; }; +/** C extension macro for environments lacking C11 features. */ +#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L +#define RTE_STD_C11 __extension__ +#else +#define RTE_STD_C11 +#endif + /* TX queue descriptor. */ +RTE_STD_C11 struct txq { uint16_t elts_head; /* Current index in (*elts)[]. */ uint16_t elts_tail; /* First element awaiting completion. */ uint16_t elts_comp; /* Counter since last completion request. */ - uint16_t elts_n; /* (*elts)[] length. */ uint16_t cq_ci; /* Consumer index for completion queue. */ - uint16_t cqe_n; /* Number of CQ elements. */ uint16_t wqe_ci; /* Consumer index for work queue. */ - uint16_t wqe_n; /* Number of WQ elements. */ + uint16_t elts_n:4; /* (*elts)[] length (in log2). */ + uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ + uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */ + uint16_t bf_buf_size:4; /* Log2 Blueflame size. */ uint16_t bf_offset; /* Blueflame offset. */ - uint16_t bf_buf_size; /* Blueflame size. */ - uint16_t max_inline; /* Maximum size to inline in a WQE. */ + uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */ uint32_t qp_num_8s; /* QP number shifted by 8. */ volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */ - volatile union mlx5_wqe (*wqes)[]; /* Work queue. */ + volatile struct mlx5_wqe64 (*wqes)[]; /* Work queue. */ volatile uint32_t *qp_db; /* Work queue doorbell. */ volatile uint32_t *cq_db; /* Completion queue doorbell. */ volatile void *bf_reg; /* Blueflame register. */ @@ -312,7 +325,6 @@ uint16_t mlx5_tx_burst_secondary_setup(void *, struct rte_mbuf **, uint16_t); /* mlx5_rxtx.c */ uint16_t mlx5_tx_burst(void *, struct rte_mbuf **, uint16_t); -uint16_t mlx5_tx_burst_inline(void *, struct rte_mbuf **, uint16_t); uint16_t mlx5_tx_burst_mpw(void *, struct rte_mbuf **, uint16_t); uint16_t mlx5_tx_burst_mpw_inline(void *, struct rte_mbuf **, uint16_t); uint16_t mlx5_rx_burst(void *, struct rte_mbuf **, uint16_t); diff --git a/src/dpdk/drivers/net/mlx5/mlx5_stats.c b/src/dpdk/drivers/net/mlx5/mlx5_stats.c index 788ef939..f2b5781a 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_stats.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_stats.c @@ -33,21 +33,17 @@ /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ethdev.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" #include "mlx5_rxtx.h" #include "mlx5_defs.h" - -#include <linux/ethtool.h> -#include <linux/sockios.h> - /** * DPDK callback to get device statistics. * @@ -56,241 +52,60 @@ * @param[out] stats * Stats structure output buffer. */ - - -static void -mlx5_stats_read_hw(struct rte_eth_dev *dev, - struct rte_eth_stats *stats){ - struct priv *priv = mlx5_get_priv(dev); - struct mlx5_stats_priv * lps = &priv->m_stats; - unsigned int i; - - struct rte_eth_stats tmp = {0}; - struct ethtool_stats *et_stats = (struct ethtool_stats *)lps->et_stats; - struct ifreq ifr; - - et_stats->cmd = ETHTOOL_GSTATS; - et_stats->n_stats = lps->n_stats; - - ifr.ifr_data = (caddr_t) et_stats; - - if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { - WARN("unable to get statistic values for mlnx5 "); - } - - tmp.ibytes += et_stats->data[lps->inx_rx_vport_unicast_bytes] + - et_stats->data[lps->inx_rx_vport_multicast_bytes] + - et_stats->data[lps->inx_rx_vport_broadcast_bytes]; - - tmp.ipackets += et_stats->data[lps->inx_rx_vport_unicast_packets] + - et_stats->data[lps->inx_rx_vport_multicast_packets] + - et_stats->data[lps->inx_rx_vport_broadcast_packets]; - - tmp.ierrors += (et_stats->data[lps->inx_rx_wqe_err] + - et_stats->data[lps->inx_rx_crc_errors_phy] + - et_stats->data[lps->inx_rx_in_range_len_errors_phy] + - et_stats->data[lps->inx_rx_symbol_err_phy]); - - tmp.obytes += et_stats->data[lps->inx_tx_vport_unicast_bytes] + - et_stats->data[lps->inx_tx_vport_multicast_bytes] + - et_stats->data[lps->inx_tx_vport_broadcast_bytes]; - - tmp.opackets += (et_stats->data[lps->inx_tx_vport_unicast_packets] + - et_stats->data[lps->inx_tx_vport_multicast_packets] + - et_stats->data[lps->inx_tx_vport_broadcast_packets]); - - tmp.oerrors += et_stats->data[lps->inx_tx_errors_phy]; - - /* SW Rx */ - for (i = 0; (i != priv->rxqs_n); ++i) { - struct rxq *rxq = (*priv->rxqs)[i]; - if (rxq) { - tmp.imissed += rxq->stats.idropped; - tmp.rx_nombuf += rxq->stats.rx_nombuf; - } - } - - /*SW Tx */ - for (i = 0; (i != priv->txqs_n); ++i) { - struct txq *txq = (*priv->txqs)[i]; - if (txq) { - tmp.oerrors += txq->stats.odropped; - } - } - - *stats =tmp; -} - -void -mlx5_stats_free(struct rte_eth_dev *dev) -{ - struct priv *priv = mlx5_get_priv(dev); - struct mlx5_stats_priv * lps = &priv->m_stats; - - if ( lps->et_stats ){ - free(lps->et_stats); - lps->et_stats=0; - } -} - - -static void -mlx5_stats_init(struct rte_eth_dev *dev) -{ - struct priv *priv = mlx5_get_priv(dev); - struct mlx5_stats_priv * lps = &priv->m_stats; - struct rte_eth_stats tmp = {0}; - - unsigned int i; - unsigned int idx; - char ifname[IF_NAMESIZE]; - struct ifreq ifr; - - struct ethtool_stats *et_stats = NULL; - struct ethtool_drvinfo drvinfo; - struct ethtool_gstrings *strings = NULL; - unsigned int n_stats, sz_str, sz_stats; - - if (priv_get_ifname(priv, &ifname)) { - WARN("unable to get interface name"); - return; - } - /* How many statistics are available ? */ - drvinfo.cmd = ETHTOOL_GDRVINFO; - ifr.ifr_data = (caddr_t) &drvinfo; - if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { - WARN("unable to get driver info for %s", ifname); - return; - } - - n_stats = drvinfo.n_stats; - if (n_stats < 1) { - WARN("no statistics available for %s", ifname); - return; - } - lps->n_stats = n_stats; - - /* Allocate memory to grab stat names and values */ - sz_str = n_stats * ETH_GSTRING_LEN; - sz_stats = n_stats * sizeof(uint64_t); - strings = calloc(1, sz_str + sizeof(struct ethtool_gstrings)); - if (!strings) { - WARN("unable to allocate memory for strings"); - return; - } - - et_stats = calloc(1, sz_stats + sizeof(struct ethtool_stats)); - if (!et_stats) { - free(strings); - WARN("unable to allocate memory for stats"); - } - - strings->cmd = ETHTOOL_GSTRINGS; - strings->string_set = ETH_SS_STATS; - strings->len = n_stats; - ifr.ifr_data = (caddr_t) strings; - if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { - WARN("unable to get statistic names for %s", ifname); - free(strings); - free(et_stats); - return; - } - - for (i = 0; (i != n_stats); ++i) { - - const char * curr_string = (const char*) &(strings->data[i * ETH_GSTRING_LEN]); - - if (!strcmp("rx_vport_unicast_bytes", curr_string)) lps->inx_rx_vport_unicast_bytes = i; - if (!strcmp("rx_vport_multicast_bytes", curr_string)) lps->inx_rx_vport_multicast_bytes = i; - if (!strcmp("rx_vport_broadcast_bytes", curr_string)) lps->inx_rx_vport_broadcast_bytes = i; - - if (!strcmp("rx_vport_unicast_packets", curr_string)) lps->inx_rx_vport_unicast_packets = i; - if (!strcmp("rx_vport_multicast_packets", curr_string)) lps->inx_rx_vport_multicast_packets = i; - if (!strcmp("rx_vport_broadcast_packets", curr_string)) lps->inx_rx_vport_broadcast_packets = i; - - if (!strcmp("tx_vport_unicast_bytes", curr_string)) lps->inx_tx_vport_unicast_bytes = i; - if (!strcmp("tx_vport_multicast_bytes", curr_string)) lps->inx_tx_vport_multicast_bytes = i; - if (!strcmp("tx_vport_broadcast_bytes", curr_string)) lps->inx_tx_vport_broadcast_bytes = i; - - if (!strcmp("tx_vport_unicast_packets", curr_string)) lps->inx_tx_vport_unicast_packets = i; - if (!strcmp("tx_vport_multicast_packets", curr_string)) lps->inx_tx_vport_multicast_packets = i; - if (!strcmp("tx_vport_broadcast_packets", curr_string)) lps->inx_tx_vport_broadcast_packets = i; - - if (!strcmp("rx_wqe_err", curr_string)) lps->inx_rx_wqe_err = i; - if (!strcmp("rx_crc_errors_phy", curr_string)) lps->inx_rx_crc_errors_phy = i; - if (!strcmp("rx_in_range_len_errors_phy", curr_string)) lps->inx_rx_in_range_len_errors_phy = i; - if (!strcmp("rx_symbol_err_phy", curr_string)) lps->inx_rx_symbol_err_phy = i; - - if (!strcmp("tx_errors_phy", curr_string)) lps->inx_tx_errors_phy = i; - } - - lps->et_stats =(void *)et_stats; - - if (!lps->inx_rx_vport_unicast_bytes || - !lps->inx_rx_vport_multicast_bytes || - !lps->inx_rx_vport_broadcast_bytes || - !lps->inx_rx_vport_unicast_packets || - !lps->inx_rx_vport_multicast_packets || - !lps->inx_rx_vport_broadcast_packets || - !lps->inx_tx_vport_unicast_bytes || - !lps->inx_tx_vport_multicast_bytes || - !lps->inx_tx_vport_broadcast_bytes || - !lps->inx_tx_vport_unicast_packets || - !lps->inx_tx_vport_multicast_packets || - !lps->inx_tx_vport_broadcast_packets || - !lps->inx_rx_wqe_err || - !lps->inx_rx_crc_errors_phy || - !lps->inx_rx_in_range_len_errors_phy) { - WARN("Counters are not recognized %s", ifname); - return; - } - - mlx5_stats_read_hw(dev,&tmp); - - /* copy yo shadow at first time */ - lps->m_shadow = tmp; - - free(strings); -} - - -static void -mlx5_stats_diff(struct rte_eth_stats *a, - struct rte_eth_stats *b, - struct rte_eth_stats *c){ - #define MLX5_DIFF(cnt) { a->cnt = (b->cnt - c->cnt); } - - MLX5_DIFF(ipackets); - MLX5_DIFF(opackets); - MLX5_DIFF(ibytes); - MLX5_DIFF(obytes); - MLX5_DIFF(imissed); - - MLX5_DIFF(ierrors); - MLX5_DIFF(oerrors); - MLX5_DIFF(rx_nombuf); -} - - void mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { struct priv *priv = mlx5_get_priv(dev); - - struct mlx5_stats_priv * lps = &priv->m_stats; - priv_lock(priv); - - if (lps->et_stats == NULL) { - mlx5_stats_init(dev); - } - struct rte_eth_stats tmp = {0}; - - mlx5_stats_read_hw(dev,&tmp); - - mlx5_stats_diff(stats, - &tmp, - &lps->m_shadow); - + struct rte_eth_stats tmp = {0}; + unsigned int i; + unsigned int idx; + + priv_lock(priv); + /* Add software counters. */ + for (i = 0; (i != priv->rxqs_n); ++i) { + struct rxq *rxq = (*priv->rxqs)[i]; + + if (rxq == NULL) + continue; + idx = rxq->stats.idx; + if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) { +#ifdef MLX5_PMD_SOFT_COUNTERS + tmp.q_ipackets[idx] += rxq->stats.ipackets; + tmp.q_ibytes[idx] += rxq->stats.ibytes; +#endif + tmp.q_errors[idx] += (rxq->stats.idropped + + rxq->stats.rx_nombuf); + } +#ifdef MLX5_PMD_SOFT_COUNTERS + tmp.ipackets += rxq->stats.ipackets; + tmp.ibytes += rxq->stats.ibytes; +#endif + tmp.ierrors += rxq->stats.idropped; + tmp.rx_nombuf += rxq->stats.rx_nombuf; + } + for (i = 0; (i != priv->txqs_n); ++i) { + struct txq *txq = (*priv->txqs)[i]; + + if (txq == NULL) + continue; + idx = txq->stats.idx; + if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) { +#ifdef MLX5_PMD_SOFT_COUNTERS + tmp.q_opackets[idx] += txq->stats.opackets; + tmp.q_obytes[idx] += txq->stats.obytes; +#endif + tmp.q_errors[idx] += txq->stats.odropped; + } +#ifdef MLX5_PMD_SOFT_COUNTERS + tmp.opackets += txq->stats.opackets; + tmp.obytes += txq->stats.obytes; +#endif + tmp.oerrors += txq->stats.odropped; + } +#ifndef MLX5_PMD_SOFT_COUNTERS + /* FIXME: retrieve and add hardware counters. */ +#endif + *stats = tmp; priv_unlock(priv); } @@ -304,20 +119,26 @@ void mlx5_stats_reset(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; - struct mlx5_stats_priv * lps = &priv->m_stats; - - priv_lock(priv); - - if (lps->et_stats == NULL) { - mlx5_stats_init(dev); - } - struct rte_eth_stats tmp = {0}; - - - mlx5_stats_read_hw(dev,&tmp); - - /* copy to shadow */ - lps->m_shadow = tmp; - + unsigned int i; + unsigned int idx; + + priv_lock(priv); + for (i = 0; (i != priv->rxqs_n); ++i) { + if ((*priv->rxqs)[i] == NULL) + continue; + idx = (*priv->rxqs)[i]->stats.idx; + (*priv->rxqs)[i]->stats = + (struct mlx5_rxq_stats){ .idx = idx }; + } + for (i = 0; (i != priv->txqs_n); ++i) { + if ((*priv->txqs)[i] == NULL) + continue; + idx = (*priv->txqs)[i]->stats.idx; + (*priv->txqs)[i]->stats = + (struct mlx5_txq_stats){ .idx = idx }; + } +#ifndef MLX5_PMD_SOFT_COUNTERS + /* FIXME: reset hardware counters. */ +#endif priv_unlock(priv); } diff --git a/src/dpdk/drivers/net/mlx5/mlx5_trigger.c b/src/dpdk/drivers/net/mlx5/mlx5_trigger.c index e9b9a293..d4dccd88 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_trigger.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_trigger.c @@ -33,14 +33,14 @@ /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ether.h> #include <rte_ethdev.h> #include <rte_interrupts.h> #include <rte_alarm.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5.h" diff --git a/src/dpdk/drivers/net/mlx5/mlx5_txq.c b/src/dpdk/drivers/net/mlx5/mlx5_txq.c index 6fe61c4a..053665d5 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_txq.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_txq.c @@ -40,23 +40,23 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_mbuf.h> #include <rte_malloc.h> #include <rte_ethdev.h> #include <rte_common.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5_utils.h" @@ -81,8 +81,8 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n) for (i = 0; (i != elts_n); ++i) (*txq_ctrl->txq.elts)[i] = NULL; - for (i = 0; (i != txq_ctrl->txq.wqe_n); ++i) { - volatile union mlx5_wqe *wqe = &(*txq_ctrl->txq.wqes)[i]; + for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) { + volatile struct mlx5_wqe64 *wqe = &(*txq_ctrl->txq.wqes)[i]; memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe)); } @@ -101,7 +101,7 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n) static void txq_free_elts(struct txq_ctrl *txq_ctrl) { - unsigned int elts_n = txq_ctrl->txq.elts_n; + unsigned int elts_n = 1 << txq_ctrl->txq.elts_n; unsigned int elts_head = txq_ctrl->txq.elts_head; unsigned int elts_tail = txq_ctrl->txq.elts_tail; struct rte_mbuf *(*elts)[elts_n] = txq_ctrl->txq.elts; @@ -212,22 +212,22 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl) "it should be set to %u", RTE_CACHE_LINE_SIZE); return EINVAL; } - tmpl->txq.cqe_n = ibcq->cqe + 1; + tmpl->txq.cqe_n = log2above(ibcq->cqe); tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8; tmpl->txq.wqes = - (volatile union mlx5_wqe (*)[]) + (volatile struct mlx5_wqe64 (*)[]) (uintptr_t)qp->gen_data.sqstart; - tmpl->txq.wqe_n = qp->sq.wqe_cnt; + tmpl->txq.wqe_n = log2above(qp->sq.wqe_cnt); tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR]; tmpl->txq.bf_reg = qp->gen_data.bf->reg; tmpl->txq.bf_offset = qp->gen_data.bf->offset; - tmpl->txq.bf_buf_size = qp->gen_data.bf->buf_size; + tmpl->txq.bf_buf_size = log2above(qp->gen_data.bf->buf_size); tmpl->txq.cq_db = cq->dbrec; tmpl->txq.cqes = (volatile struct mlx5_cqe (*)[]) (uintptr_t)cq->active_buf->buf; tmpl->txq.elts = - (struct rte_mbuf *(*)[tmpl->txq.elts_n]) + (struct rte_mbuf *(*)[1 << tmpl->txq.elts_n]) ((uintptr_t)txq_ctrl + sizeof(*txq_ctrl)); return 0; } @@ -277,7 +277,7 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl, } (void)conf; /* Thresholds configuration (ignored). */ assert(desc > MLX5_TX_COMP_THRESH); - tmpl.txq.elts_n = desc; + tmpl.txq.elts_n = log2above(desc); /* MRs will be registered in mp2mr[] later. */ attr.rd = (struct ibv_exp_res_domain_init_attr){ .comp_mask = (IBV_EXP_RES_DOMAIN_THREAD_MODEL | @@ -338,9 +338,12 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl, .comp_mask = (IBV_EXP_QP_INIT_ATTR_PD | IBV_EXP_QP_INIT_ATTR_RES_DOMAIN), }; - if (priv->txq_inline && priv->txqs_n >= priv->txqs_inline) { - tmpl.txq.max_inline = priv->txq_inline; - attr.init.cap.max_inline_data = tmpl.txq.max_inline; + if (priv->txq_inline && (priv->txqs_n >= priv->txqs_inline)) { + tmpl.txq.max_inline = + ((priv->txq_inline + (RTE_CACHE_LINE_SIZE - 1)) / + RTE_CACHE_LINE_SIZE); + attr.init.cap.max_inline_data = + tmpl.txq.max_inline * RTE_CACHE_LINE_SIZE; } tmpl.qp = ibv_exp_create_qp(priv->ctx, &attr.init); if (tmpl.qp == NULL) { diff --git a/src/dpdk/drivers/net/mlx5/mlx5_vlan.c b/src/dpdk/drivers/net/mlx5/mlx5_vlan.c index 4719e697..1b0fa40a 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_vlan.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_vlan.c @@ -38,12 +38,12 @@ /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ethdev.h> #include <rte_common.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif #include "mlx5_utils.h" @@ -87,7 +87,8 @@ vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) --priv->vlan_filter_n; memmove(&priv->vlan_filter[i], &priv->vlan_filter[i + 1], - priv->vlan_filter_n - i); + sizeof(priv->vlan_filter[i]) * + (priv->vlan_filter_n - i)); priv->vlan_filter[priv->vlan_filter_n] = 0; } else { assert(i == priv->vlan_filter_n); |