diff options
Diffstat (limited to 'drivers/net')
68 files changed, 1311 insertions, 716 deletions
diff --git a/drivers/net/af_packet/rte_eth_af_packet.c b/drivers/net/af_packet/rte_eth_af_packet.c index 95a98c6b..264cfc08 100644 --- a/drivers/net/af_packet/rte_eth_af_packet.c +++ b/drivers/net/af_packet/rte_eth_af_packet.c @@ -433,8 +433,7 @@ eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) int ret; int s; unsigned int data_size = internals->req.tp_frame_size - - TPACKET2_HDRLEN - - sizeof(struct sockaddr_ll); + TPACKET2_HDRLEN; if (mtu > data_size) return -EINVAL; diff --git a/drivers/net/atlantic/atl_rxtx.c b/drivers/net/atlantic/atl_rxtx.c index fd909476..40c91379 100644 --- a/drivers/net/atlantic/atl_rxtx.c +++ b/drivers/net/atlantic/atl_rxtx.c @@ -21,6 +21,8 @@ #define ATL_TX_OFFLOAD_MASK ( \ PKT_TX_VLAN | \ + PKT_TX_IPV6 | \ + PKT_TX_IPV4 | \ PKT_TX_IP_CKSUM | \ PKT_TX_L4_MASK | \ PKT_TX_TCP_SEG) diff --git a/drivers/net/avf/base/avf_adminq_cmd.h b/drivers/net/avf/base/avf_adminq_cmd.h index 1709f317..79549118 100644 --- a/drivers/net/avf/base/avf_adminq_cmd.h +++ b/drivers/net/avf/base/avf_adminq_cmd.h @@ -1435,8 +1435,7 @@ struct avf_aqc_add_remove_cloud_filters_element_data { }; /* avf_aqc_add_rm_cloud_filt_elem_ext is used when - * AVF_AQC_ADD_REM_CLOUD_CMD_BIG_BUFFER flag is set. refer to - * DCR288 + * AVF_AQC_ADD_REM_CLOUD_CMD_BIG_BUFFER flag is set. */ struct avf_aqc_add_rm_cloud_filt_elem_ext { struct avf_aqc_add_remove_cloud_filters_element_data element; diff --git a/drivers/net/bnx2x/bnx2x.c b/drivers/net/bnx2x/bnx2x.c index 86c79c28..26b3828e 100644 --- a/drivers/net/bnx2x/bnx2x.c +++ b/drivers/net/bnx2x/bnx2x.c @@ -4201,6 +4201,9 @@ static uint16_t bnx2x_update_dsb_idx(struct bnx2x_softc *sc) struct host_sp_status_block *def_sb = sc->def_sb; uint16_t rc = 0; + if (!def_sb) + return 0; + mb(); /* status block is written to by the chip */ if (sc->def_att_idx != def_sb->atten_status_block.attn_bits_index) { @@ -4525,6 +4528,10 @@ static void bnx2x_handle_fp_tq(struct bnx2x_fastpath *fp, int scan_fp) struct bnx2x_softc *sc = fp->sc; uint8_t more_rx = FALSE; + /* Make sure FP is initialized */ + if (!fp->sb_running_index) + return; + PMD_DEBUG_PERIODIC_LOG(DEBUG, sc, "---> FP TASK QUEUE (%d) <--", fp->index); @@ -5809,7 +5816,7 @@ static int bnx2x_set_power_state(struct bnx2x_softc *sc, uint8_t state) /* If there is no power capability, silently succeed */ if (!(sc->devinfo.pcie_cap_flags & BNX2X_PM_CAPABLE_FLAG)) { - PMD_DRV_LOG(WARNING, sc, "No power capability"); + PMD_DRV_LOG(INFO, sc, "No power capability"); return 0; } @@ -6918,19 +6925,19 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc) return; } - PMD_DRV_LOG(INFO, sc, "Change in link status : cur_data = %lx, last_reported_link = %lx\n", - cur_data.link_report_flags, - sc->last_reported_link.link_report_flags); + ELINK_DEBUG_P2(sc, "Change in link status : cur_data = %lx, last_reported_link = %lx", + cur_data.link_report_flags, + sc->last_reported_link.link_report_flags); sc->link_cnt++; - PMD_DRV_LOG(INFO, sc, "link status change count = %x\n", sc->link_cnt); + ELINK_DEBUG_P1(sc, "link status change count = %x", sc->link_cnt); /* report new link params and remember the state for the next time */ rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data)); if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN, &cur_data.link_report_flags)) { - PMD_DRV_LOG(INFO, sc, "NIC Link is Down"); + ELINK_DEBUG_P0(sc, "NIC Link is Down"); } else { __rte_unused const char *duplex; __rte_unused const char *flow; @@ -6938,8 +6945,10 @@ static void bnx2x_link_report_locked(struct bnx2x_softc *sc) if (bnx2x_test_and_clear_bit(BNX2X_LINK_REPORT_FULL_DUPLEX, &cur_data.link_report_flags)) { duplex = "full"; + ELINK_DEBUG_P0(sc, "link set to full duplex"); } else { duplex = "half"; + ELINK_DEBUG_P0(sc, "link set to half duplex"); } /* @@ -7123,7 +7132,7 @@ void bnx2x_periodic_callout(struct bnx2x_softc *sc) { if ((sc->state != BNX2X_STATE_OPEN) || (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_STOP)) { - PMD_DRV_LOG(INFO, sc, "periodic callout exit (state=0x%x)", + PMD_DRV_LOG(DEBUG, sc, "periodic callout exit (state=0x%x)", sc->state); return; } @@ -8317,7 +8326,7 @@ static int bnx2x_get_device_info(struct bnx2x_softc *sc) ((sc->devinfo.bc_ver >> 24) & 0xff), ((sc->devinfo.bc_ver >> 16) & 0xff), ((sc->devinfo.bc_ver >> 8) & 0xff)); - PMD_DRV_LOG(INFO, sc, "Bootcode version: %s", sc->devinfo.bc_ver_str); + PMD_DRV_LOG(DEBUG, sc, "Bootcode version: %s", sc->devinfo.bc_ver_str); /* get the bootcode shmem address */ sc->devinfo.mf_cfg_base = bnx2x_get_shmem_mf_cfg_base(sc); @@ -11743,42 +11752,36 @@ static const char *get_bnx2x_flags(uint32_t flags) return flag_str; } -/* - * Prints useful adapter info. - */ +/* Prints useful adapter info. */ void bnx2x_print_adapter_info(struct bnx2x_softc *sc) { int i = 0; - __rte_unused uint32_t ext_phy_type; - PMD_INIT_FUNC_TRACE(sc); - if (sc->link_vars.phy_flags & PHY_XGXS_FLAG) - ext_phy_type = ELINK_XGXS_EXT_PHY_TYPE(REG_RD(sc, - sc-> - devinfo.shmem_base - + offsetof(struct - shmem_region, - dev_info.port_hw_config - [0].external_phy_config))); - else - ext_phy_type = ELINK_SERDES_EXT_PHY_TYPE(REG_RD(sc, - sc-> - devinfo.shmem_base - + - offsetof(struct - shmem_region, - dev_info.port_hw_config - [0].external_phy_config))); - - PMD_DRV_LOG(INFO, sc, "\n\n===================================\n"); + PMD_DRV_LOG(INFO, sc, "========================================"); + /* DPDK and Driver versions */ + PMD_DRV_LOG(INFO, sc, "%12s : %s", "DPDK", + rte_version()); + PMD_DRV_LOG(INFO, sc, "%12s : %s", "Driver", + bnx2x_pmd_version()); + /* Firmware versions. */ + PMD_DRV_LOG(INFO, sc, "%12s : %d.%d.%d", + "Firmware", + BNX2X_5710_FW_MAJOR_VERSION, + BNX2X_5710_FW_MINOR_VERSION, + BNX2X_5710_FW_REVISION_VERSION); + PMD_DRV_LOG(INFO, sc, "%12s : %s", + "Bootcode", sc->devinfo.bc_ver_str); /* Hardware chip info. */ PMD_DRV_LOG(INFO, sc, "%12s : %#08x", "ASIC", sc->devinfo.chip_id); PMD_DRV_LOG(INFO, sc, "%12s : %c%d", "Rev", (CHIP_REV(sc) >> 12) + 'A', (CHIP_METAL(sc) >> 4)); - - /* Bus info. */ - PMD_DRV_LOG(INFO, sc, - "%12s : %d, ", "Bus PCIe", sc->devinfo.pcie_link_width); + /* Bus PCIe info. */ + PMD_DRV_LOG(INFO, sc, "%12s : 0x%x", "Vendor Id", + sc->devinfo.vendor_id); + PMD_DRV_LOG(INFO, sc, "%12s : 0x%x", "Device Id", + sc->devinfo.device_id); + PMD_DRV_LOG(INFO, sc, "%12s : width x%d, ", "Bus PCIe", + sc->devinfo.pcie_link_width); switch (sc->devinfo.pcie_link_speed) { case 1: PMD_DRV_LOG(INFO, sc, "%23s", "2.5 Gbps"); @@ -11792,62 +11795,45 @@ void bnx2x_print_adapter_info(struct bnx2x_softc *sc) default: PMD_DRV_LOG(INFO, sc, "%33s", "Unknown link speed"); } - /* Device features. */ PMD_DRV_LOG(INFO, sc, "%12s : ", "Flags"); - /* Miscellaneous flags. */ if (sc->devinfo.pcie_cap_flags & BNX2X_MSI_CAPABLE_FLAG) { PMD_DRV_LOG(INFO, sc, "%18s", "MSI"); i++; } - if (sc->devinfo.pcie_cap_flags & BNX2X_MSIX_CAPABLE_FLAG) { if (i > 0) PMD_DRV_LOG(INFO, sc, "|"); PMD_DRV_LOG(INFO, sc, "%20s", "MSI-X"); i++; } + PMD_DRV_LOG(INFO, sc, "%12s : %s", "OVLAN", (OVLAN(sc) ? "YES" : "NO")); + PMD_DRV_LOG(INFO, sc, "%12s : %s", "MF", (IS_MF(sc) ? "YES" : "NO")); + PMD_DRV_LOG(INFO, sc, "========================================"); +} - if (IS_PF(sc)) { - PMD_DRV_LOG(INFO, sc, "%12s : ", "Queues"); - switch (sc->sp->rss_rdata.rss_mode) { - case ETH_RSS_MODE_DISABLED: - PMD_DRV_LOG(INFO, sc, "%19s", "None"); - break; - case ETH_RSS_MODE_REGULAR: - PMD_DRV_LOG(INFO, sc, - "%18s : %d", "RSS", sc->num_queues); - break; - default: - PMD_DRV_LOG(INFO, sc, "%22s", "Unknown"); - break; - } - } - - /* RTE and Driver versions */ - PMD_DRV_LOG(INFO, sc, "%12s : %s", "DPDK", - rte_version()); - PMD_DRV_LOG(INFO, sc, "%12s : %s", "Driver", - bnx2x_pmd_version()); +/* Prints useful device info. */ +void bnx2x_print_device_info(struct bnx2x_softc *sc) +{ + __rte_unused uint32_t ext_phy_type; + uint32_t offset, reg_val; - /* Firmware versions and device features. */ - PMD_DRV_LOG(INFO, sc, "%12s : %d.%d.%d", - "Firmware", - BNX2X_5710_FW_MAJOR_VERSION, - BNX2X_5710_FW_MINOR_VERSION, - BNX2X_5710_FW_REVISION_VERSION); - PMD_DRV_LOG(INFO, sc, "%12s : %s", - "Bootcode", sc->devinfo.bc_ver_str); + PMD_INIT_FUNC_TRACE(sc); + offset = offsetof(struct shmem_region, + dev_info.port_hw_config[0].external_phy_config); + reg_val = REG_RD(sc, sc->devinfo.shmem_base + offset); + if (sc->link_vars.phy_flags & PHY_XGXS_FLAG) + ext_phy_type = ELINK_XGXS_EXT_PHY_TYPE(reg_val); + else + ext_phy_type = ELINK_SERDES_EXT_PHY_TYPE(reg_val); - PMD_DRV_LOG(INFO, sc, "\n\n===================================\n"); + /* Device features. */ PMD_DRV_LOG(INFO, sc, "%12s : %u", "Bnx2x Func", sc->pcie_func); PMD_DRV_LOG(INFO, sc, "%12s : %s", "Bnx2x Flags", get_bnx2x_flags(sc->flags)); PMD_DRV_LOG(INFO, sc, "%12s : %s", "DMAE Is", (sc->dmae_ready ? "Ready" : "Not Ready")); - PMD_DRV_LOG(INFO, sc, "%12s : %s", "OVLAN", (OVLAN(sc) ? "YES" : "NO")); - PMD_DRV_LOG(INFO, sc, "%12s : %s", "MF", (IS_MF(sc) ? "YES" : "NO")); PMD_DRV_LOG(INFO, sc, "%12s : %u", "MTU", sc->mtu); PMD_DRV_LOG(INFO, sc, "%12s : %s", "PHY Type", get_ext_phy_type(ext_phy_type)); @@ -11863,9 +11849,30 @@ void bnx2x_print_adapter_info(struct bnx2x_softc *sc) if (sc->recovery_state) PMD_DRV_LOG(INFO, sc, "%12s : %s", "Recovery", get_recovery_state(sc->recovery_state)); + /* Queue info. */ + if (IS_PF(sc)) { + switch (sc->sp->rss_rdata.rss_mode) { + case ETH_RSS_MODE_DISABLED: + PMD_DRV_LOG(INFO, sc, "%12s : %s", "Queues", "RSS mode - None"); + break; + case ETH_RSS_MODE_REGULAR: + PMD_DRV_LOG(INFO, sc, "%12s : %s,", "Queues", "RSS mode - Regular"); + PMD_DRV_LOG(INFO, sc, "%16d", sc->num_queues); + break; + default: + PMD_DRV_LOG(INFO, sc, "%12s : %s", "Queues", "RSS mode - Unknown"); + break; + } + } PMD_DRV_LOG(INFO, sc, "%12s : CQ = %lx, EQ = %lx", "SPQ Left", sc->cq_spq_left, sc->eq_spq_left); + PMD_DRV_LOG(INFO, sc, "%12s : %x", "Switch", sc->link_params.switch_cfg); - PMD_DRV_LOG(INFO, sc, "\n\n===================================\n"); + PMD_DRV_LOG(INFO, sc, "pcie_bus=%d, pcie_device=%d", + sc->pcie_bus, sc->pcie_device); + PMD_DRV_LOG(INFO, sc, "bar0.addr=%p, bar1.addr=%p", + sc->bar[BAR0].base_addr, sc->bar[BAR1].base_addr); + PMD_DRV_LOG(INFO, sc, "port=%d, path=%d, vnic=%d, func=%d", + PORT_ID(sc), PATH_ID(sc), VNIC_ID(sc), FUNC_ID(sc)); } diff --git a/drivers/net/bnx2x/bnx2x.h b/drivers/net/bnx2x/bnx2x.h index 74780725..32a12294 100644 --- a/drivers/net/bnx2x/bnx2x.h +++ b/drivers/net/bnx2x/bnx2x.h @@ -1937,6 +1937,7 @@ void bnx2x_dump_tx_chain(struct bnx2x_fastpath * fp, int bd_prod, int count); int bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf *m0); uint8_t bnx2x_txeof(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp); void bnx2x_print_adapter_info(struct bnx2x_softc *sc); +void bnx2x_print_device_info(struct bnx2x_softc *sc); int bnx2x_intr_legacy(struct bnx2x_softc *sc, int scan_fp); void bnx2x_link_status_update(struct bnx2x_softc *sc); int bnx2x_complete_sp(struct bnx2x_softc *sc); @@ -1984,7 +1985,7 @@ bnx2x_set_rx_mode(struct bnx2x_softc *sc) bnx2x_vf_set_rx_mode(sc); } } else { - PMD_DRV_LOG(NOTICE, sc, "Card is not ready to change mode"); + PMD_DRV_LOG(INFO, sc, "Card is not ready to change mode"); } } diff --git a/drivers/net/bnx2x/bnx2x_ethdev.c b/drivers/net/bnx2x/bnx2x_ethdev.c index 0057843b..cc7816dd 100644 --- a/drivers/net/bnx2x/bnx2x_ethdev.c +++ b/drivers/net/bnx2x/bnx2x_ethdev.c @@ -245,8 +245,7 @@ bnx2x_dev_start(struct rte_eth_dev *dev) return -3; } - /* Print important adapter info for the user. */ - bnx2x_print_adapter_info(sc); + bnx2x_print_device_info(sc); return ret; } @@ -574,6 +573,7 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf) struct rte_pci_device *pci_dev; struct rte_pci_addr pci_addr; struct bnx2x_softc *sc; + static bool adapter_info = true; /* Extract key data structures */ sc = eth_dev->data->dev_private; @@ -632,8 +632,15 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf) return ret; } + /* Print important adapter info for the user. */ + if (adapter_info) { + bnx2x_print_adapter_info(sc); + adapter_info = false; + } + /* schedule periodic poll for slowpath link events */ if (IS_PF(sc)) { + PMD_DRV_LOG(DEBUG, sc, "Scheduling periodic poll for slowpath link events"); ret = rte_eal_alarm_set(BNX2X_SP_TIMER_PERIOD, bnx2x_periodic_start, (void *)eth_dev); if (ret) { @@ -645,15 +652,6 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf) eth_dev->data->mac_addrs = (struct ether_addr *)sc->link_params.mac_addr; - PMD_DRV_LOG(INFO, sc, "pcie_bus=%d, pcie_device=%d", - sc->pcie_bus, sc->pcie_device); - PMD_DRV_LOG(INFO, sc, "bar0.addr=%p, bar1.addr=%p", - sc->bar[BAR0].base_addr, sc->bar[BAR1].base_addr); - PMD_DRV_LOG(INFO, sc, "port=%d, path=%d, vnic=%d, func=%d", - PORT_ID(sc), PATH_ID(sc), VNIC_ID(sc), FUNC_ID(sc)); - PMD_DRV_LOG(INFO, sc, "portID=%d vendorID=0x%x deviceID=0x%x", - eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); - if (IS_VF(sc)) { rte_spinlock_init(&sc->vf2pf_lock); diff --git a/drivers/net/bnx2x/bnx2x_ethdev.h b/drivers/net/bnx2x/bnx2x_ethdev.h index 807ba178..45958db9 100644 --- a/drivers/net/bnx2x/bnx2x_ethdev.h +++ b/drivers/net/bnx2x/bnx2x_ethdev.h @@ -47,6 +47,7 @@ #define FALSE 0 #define TRUE 1 +typedef int bool; #define false 0 #define true 1 #define min(a,b) RTE_MIN(a,b) diff --git a/drivers/net/bnx2x/ecore_sp.c b/drivers/net/bnx2x/ecore_sp.c index ab730abe..6d2bb815 100644 --- a/drivers/net/bnx2x/ecore_sp.c +++ b/drivers/net/bnx2x/ecore_sp.c @@ -530,17 +530,15 @@ static void __ecore_vlan_mac_h_read_unlock(struct bnx2x_softc *sc, #endif } else { o->head_reader--; - PMD_DRV_LOG(INFO, sc, - "vlan_mac_lock - decreased readers to %d", - o->head_reader); + ECORE_MSG(sc, "vlan_mac_lock - decreased readers to %d", + o->head_reader); } /* It's possible a new pending execution was added, and that this reader * was last - if so we need to execute the command. */ if (!o->head_reader && o->head_exe_request) { - PMD_DRV_LOG(INFO, sc, - "vlan_mac_lock - reader release encountered a pending request"); + ECORE_MSG(sc, "vlan_mac_lock - reader release encountered a pending request"); /* Writer release will do the trick */ __ecore_vlan_mac_h_write_unlock(sc, o); diff --git a/drivers/net/bonding/rte_eth_bond_api.c b/drivers/net/bonding/rte_eth_bond_api.c index 21bcd504..ac084c4f 100644 --- a/drivers/net/bonding/rte_eth_bond_api.c +++ b/drivers/net/bonding/rte_eth_bond_api.c @@ -19,7 +19,10 @@ int check_for_bonded_ethdev(const struct rte_eth_dev *eth_dev) { /* Check valid pointer */ - if (eth_dev->device->driver->name == NULL) + if (eth_dev == NULL || + eth_dev->device == NULL || + eth_dev->device->driver == NULL || + eth_dev->device->driver->name == NULL) return -1; /* return 0 if driver name matches */ @@ -126,6 +129,12 @@ deactivate_slave(struct rte_eth_dev *eth_dev, uint16_t port_id) RTE_ASSERT(active_count < RTE_DIM(internals->active_slaves)); internals->active_slave_count = active_count; + /* Resetting active_slave when reaches to max + * no of slaves in active list + */ + if (internals->active_slave >= active_count) + internals->active_slave = 0; + if (eth_dev->data->dev_started) { if (internals->mode == BONDING_MODE_8023AD) { bond_mode_8023ad_start(eth_dev); diff --git a/drivers/net/bonding/rte_eth_bond_pmd.c b/drivers/net/bonding/rte_eth_bond_pmd.c index 44deaf11..7ed69b38 100644 --- a/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/drivers/net/bonding/rte_eth_bond_pmd.c @@ -84,7 +84,7 @@ bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) active_slave = 0; } - if (++internals->active_slave == slave_count) + if (++internals->active_slave >= slave_count) internals->active_slave = 0; return num_rx_total; } @@ -288,7 +288,7 @@ bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs, active_slave = 0; } - if (++internals->active_slave == slave_count) + if (++internals->active_slave >= slave_count) internals->active_slave = 0; return num_rx_total; @@ -474,7 +474,7 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, idx = 0; } - if (++internals->active_slave == slave_count) + if (++internals->active_slave >= slave_count) internals->active_slave = 0; return num_rx_total; diff --git a/drivers/net/cxgbe/cxgbe_flow.c b/drivers/net/cxgbe/cxgbe_flow.c index 4deaff8f..7b87bdf5 100644 --- a/drivers/net/cxgbe/cxgbe_flow.c +++ b/drivers/net/cxgbe/cxgbe_flow.c @@ -732,6 +732,10 @@ cxgbe_rtef_parse_items(struct rte_flow *flow, "parse items cannot be repeated (except void)"); repeat[i->type] = 1; + /* No spec found for this pattern item. Skip it */ + if (!i->spec) + break; + /* validate the item */ ret = cxgbe_validate_item(i, e); if (ret) diff --git a/drivers/net/cxgbe/cxgbe_main.c b/drivers/net/cxgbe/cxgbe_main.c index ec080e5d..5fa6cdd0 100644 --- a/drivers/net/cxgbe/cxgbe_main.c +++ b/drivers/net/cxgbe/cxgbe_main.c @@ -122,6 +122,7 @@ int setup_sge_ctrl_txq(struct adapter *adapter) int err = 0, i = 0; for_each_port(adapter, i) { + struct port_info *pi = adap2pinfo(adapter, i); char name[RTE_ETH_NAME_MAX_LEN]; struct sge_ctrl_txq *q = &s->ctrlq[i]; @@ -135,16 +136,19 @@ int setup_sge_ctrl_txq(struct adapter *adapter) err); goto out; } - snprintf(name, sizeof(name), "cxgbe_ctrl_pool_%d", i); + snprintf(name, sizeof(name), "%s_ctrl_pool_%d", + pi->eth_dev->device->driver->name, + pi->eth_dev->data->port_id); q->mb_pool = rte_pktmbuf_pool_create(name, s->ctrlq[i].q.size, RTE_CACHE_LINE_SIZE, RTE_MBUF_PRIV_ALIGN, RTE_MBUF_DEFAULT_BUF_SIZE, SOCKET_ID_ANY); if (!q->mb_pool) { - dev_err(adapter, "Can't create ctrl pool for port: %d", - i); - err = -ENOMEM; + err = -rte_errno; + dev_err(adapter, + "Can't create ctrl pool for port %d. Err: %d\n", + pi->eth_dev->data->port_id, err); goto out; } } @@ -411,7 +415,7 @@ static int tid_init(struct tid_info *t) return -ENOMEM; t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids]; - t->ftid_tab = (struct filter_entry *)&t->tid_tab[t->natids]; + t->ftid_tab = (struct filter_entry *)&t->atid_tab[t->natids]; t->ftid_bmap_array = t4_os_alloc(ftid_bmap_size); if (!t->ftid_bmap_array) { tid_free(t); diff --git a/drivers/net/dpaa/dpaa_ethdev.c b/drivers/net/dpaa/dpaa_ethdev.c index d0572b3d..5448a2ca 100644 --- a/drivers/net/dpaa/dpaa_ethdev.c +++ b/drivers/net/dpaa/dpaa_ethdev.c @@ -1223,8 +1223,12 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev) PMD_INIT_FUNC_TRACE(); /* For secondary processes, the primary has done all the work */ - if (rte_eal_process_type() != RTE_PROC_PRIMARY) + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + eth_dev->dev_ops = &dpaa_devops; + /* Plugging of UCODE burst API not supported in Secondary */ + eth_dev->rx_pkt_burst = dpaa_eth_queue_rx; return 0; + } dpaa_device = DEV_TO_DPAA_DEVICE(eth_dev->device); dev_id = dpaa_device->id.dev_id; diff --git a/drivers/net/dpaa2/dpaa2_ethdev.c b/drivers/net/dpaa2/dpaa2_ethdev.c index fa71807e..39f85ae7 100644 --- a/drivers/net/dpaa2/dpaa2_ethdev.c +++ b/drivers/net/dpaa2/dpaa2_ethdev.c @@ -311,8 +311,7 @@ dpaa2_free_rx_tx_queues(struct rte_eth_dev *dev) /* cleanup tx queue cscn */ for (i = 0; i < priv->nb_tx_queues; i++) { dpaa2_q = (struct dpaa2_queue *)priv->tx_vq[i]; - if (!dpaa2_q->cscn) - rte_free(dpaa2_q->cscn); + rte_free(dpaa2_q->cscn); } /*free memory for all queues (RX+TX) */ rte_free(priv->rx_vq[0]); @@ -1919,8 +1918,15 @@ dpaa2_dev_init(struct rte_eth_dev *eth_dev) PMD_INIT_FUNC_TRACE(); /* For secondary processes, the primary has done all the work */ - if (rte_eal_process_type() != RTE_PROC_PRIMARY) + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { + /* In case of secondary, only burst and ops API need to be + * plugged. + */ + eth_dev->dev_ops = &dpaa2_ethdev_ops; + eth_dev->rx_pkt_burst = dpaa2_dev_prefetch_rx; + eth_dev->tx_pkt_burst = dpaa2_dev_tx; return 0; + } dpaa2_dev = container_of(dev, struct rte_dpaa2_device, device); diff --git a/drivers/net/e1000/em_rxtx.c b/drivers/net/e1000/em_rxtx.c index a9cd7651..005e1ea9 100644 --- a/drivers/net/e1000/em_rxtx.c +++ b/drivers/net/e1000/em_rxtx.c @@ -50,6 +50,8 @@ #define E1000_RXDCTL_GRAN 0x01000000 /* RXDCTL Granularity */ #define E1000_TX_OFFLOAD_MASK ( \ + PKT_TX_IPV6 | \ + PKT_TX_IPV4 | \ PKT_TX_IP_CKSUM | \ PKT_TX_L4_MASK | \ PKT_TX_VLAN_PKT) diff --git a/drivers/net/e1000/igb_ethdev.c b/drivers/net/e1000/igb_ethdev.c index d9d29d22..87c9aedf 100644 --- a/drivers/net/e1000/igb_ethdev.c +++ b/drivers/net/e1000/igb_ethdev.c @@ -68,6 +68,9 @@ #define E1000_VET_VET_EXT 0xFFFF0000 #define E1000_VET_VET_EXT_SHIFT 16 +/* MSI-X other interrupt vector */ +#define IGB_MSIX_OTHER_INTR_VEC 0 + static int eth_igb_configure(struct rte_eth_dev *dev); static int eth_igb_start(struct rte_eth_dev *dev); static void eth_igb_stop(struct rte_eth_dev *dev); @@ -138,7 +141,7 @@ static void igb_vlan_hw_extend_disable(struct rte_eth_dev *dev); static int eth_igb_led_on(struct rte_eth_dev *dev); static int eth_igb_led_off(struct rte_eth_dev *dev); -static void igb_intr_disable(struct e1000_hw *hw); +static void igb_intr_disable(struct rte_eth_dev *dev); static int igb_get_rx_buffer_size(struct e1000_hw *hw); static int eth_igb_rar_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr, @@ -538,14 +541,31 @@ igb_intr_enable(struct rte_eth_dev *dev) E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private); struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + + if (rte_intr_allow_others(intr_handle) && + dev->data->dev_conf.intr_conf.lsc != 0) { + E1000_WRITE_REG(hw, E1000_EIMS, 1 << IGB_MSIX_OTHER_INTR_VEC); + } E1000_WRITE_REG(hw, E1000_IMS, intr->mask); E1000_WRITE_FLUSH(hw); } static void -igb_intr_disable(struct e1000_hw *hw) +igb_intr_disable(struct rte_eth_dev *dev) { + struct e1000_hw *hw = + E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + + if (rte_intr_allow_others(intr_handle) && + dev->data->dev_conf.intr_conf.lsc != 0) { + E1000_WRITE_REG(hw, E1000_EIMC, 1 << IGB_MSIX_OTHER_INTR_VEC); + } + E1000_WRITE_REG(hw, E1000_IMC, ~0); E1000_WRITE_FLUSH(hw); } @@ -1486,7 +1506,7 @@ eth_igb_stop(struct rte_eth_dev *dev) eth_igb_rxtx_control(dev, false); - igb_intr_disable(hw); + igb_intr_disable(dev); /* disable intr eventfd mapping */ rte_intr_disable(intr_handle); @@ -2768,12 +2788,15 @@ static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev) uint32_t mask, regval; struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; + int misc_shift = rte_intr_allow_others(intr_handle) ? 1 : 0; struct rte_eth_dev_info dev_info; memset(&dev_info, 0, sizeof(dev_info)); eth_igb_infos_get(dev, &dev_info); - mask = 0xFFFFFFFF >> (32 - dev_info.max_rx_queues); + mask = (0xFFFFFFFF >> (32 - dev_info.max_rx_queues)) << misc_shift; regval = E1000_READ_REG(hw, E1000_EIMS); E1000_WRITE_REG(hw, E1000_EIMS, regval | mask); @@ -2800,7 +2823,7 @@ eth_igb_interrupt_get_status(struct rte_eth_dev *dev) struct e1000_interrupt *intr = E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private); - igb_intr_disable(hw); + igb_intr_disable(dev); /* read-on-clear nic registers here */ icr = E1000_READ_REG(hw, E1000_ICR); @@ -5583,13 +5606,17 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev) E1000_GPIE_NSICR); intr_mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) << misc_shift; + + if (dev->data->dev_conf.intr_conf.lsc != 0) + intr_mask |= (1 << IGB_MSIX_OTHER_INTR_VEC); + regval = E1000_READ_REG(hw, E1000_EIAC); E1000_WRITE_REG(hw, E1000_EIAC, regval | intr_mask); /* enable msix_other interrupt */ regval = E1000_READ_REG(hw, E1000_EIMS); E1000_WRITE_REG(hw, E1000_EIMS, regval | intr_mask); - tmpval = (dev->data->nb_rx_queues | E1000_IVAR_VALID) << 8; + tmpval = (IGB_MSIX_OTHER_INTR_VEC | E1000_IVAR_VALID) << 8; E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmpval); } @@ -5598,6 +5625,10 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev) */ intr_mask = RTE_LEN2MASK(intr_handle->nb_efd, uint32_t) << misc_shift; + + if (dev->data->dev_conf.intr_conf.lsc != 0) + intr_mask |= (1 << IGB_MSIX_OTHER_INTR_VEC); + regval = E1000_READ_REG(hw, E1000_EIAM); E1000_WRITE_REG(hw, E1000_EIAM, regval | intr_mask); diff --git a/drivers/net/ena/ena_ethdev.c b/drivers/net/ena/ena_ethdev.c index a07bd2b4..6f424337 100644 --- a/drivers/net/ena/ena_ethdev.c +++ b/drivers/net/ena/ena_ethdev.c @@ -189,6 +189,8 @@ static const struct ena_stats ena_stats_ena_com_strings[] = { #define ENA_TX_OFFLOAD_MASK (\ PKT_TX_L4_MASK | \ + PKT_TX_IPV6 | \ + PKT_TX_IPV4 | \ PKT_TX_IP_CKSUM | \ PKT_TX_TCP_SEG) @@ -240,10 +242,12 @@ static void ena_tx_queue_release_bufs(struct ena_ring *ring); static int ena_link_update(struct rte_eth_dev *dev, int wait_to_complete); static int ena_create_io_queue(struct ena_ring *ring); -static void ena_free_io_queues_all(struct ena_adapter *adapter); -static int ena_queue_restart(struct ena_ring *ring); -static int ena_queue_restart_all(struct rte_eth_dev *dev, - enum ena_ring_type ring_type); +static void ena_queue_stop(struct ena_ring *ring); +static void ena_queue_stop_all(struct rte_eth_dev *dev, + enum ena_ring_type ring_type); +static int ena_queue_start(struct ena_ring *ring); +static int ena_queue_start_all(struct rte_eth_dev *dev, + enum ena_ring_type ring_type); static void ena_stats_restart(struct rte_eth_dev *dev); static void ena_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info); @@ -256,6 +260,8 @@ static int ena_rss_reta_query(struct rte_eth_dev *dev, static int ena_get_sset_count(struct rte_eth_dev *dev, int sset); static void ena_interrupt_handler_rte(void *cb_arg); static void ena_timer_wd_callback(struct rte_timer *timer, void *arg); +static void ena_destroy_device(struct rte_eth_dev *eth_dev); +static int eth_ena_dev_init(struct rte_eth_dev *eth_dev); static const struct eth_dev_ops ena_dev_ops = { .dev_configure = ena_dev_configure, @@ -539,64 +545,14 @@ static void ena_close(struct rte_eth_dev *dev) static int ena_dev_reset(struct rte_eth_dev *dev) { - struct rte_mempool *mb_pool_rx[ENA_MAX_NUM_QUEUES]; - struct rte_eth_dev *eth_dev; - struct rte_pci_device *pci_dev; - struct rte_intr_handle *intr_handle; - struct ena_com_dev *ena_dev; - struct ena_com_dev_get_features_ctx get_feat_ctx; - struct ena_adapter *adapter; - int nb_queues; - int rc, i; - bool wd_state; - - adapter = (struct ena_adapter *)(dev->data->dev_private); - ena_dev = &adapter->ena_dev; - eth_dev = adapter->rte_dev; - pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); - intr_handle = &pci_dev->intr_handle; - nb_queues = eth_dev->data->nb_rx_queues; - - ena_com_set_admin_running_state(ena_dev, false); + int rc = 0; - rc = ena_com_dev_reset(ena_dev, adapter->reset_reason); + ena_destroy_device(dev); + rc = eth_ena_dev_init(dev); if (rc) - RTE_LOG(ERR, PMD, "Device reset failed\n"); - - for (i = 0; i < nb_queues; i++) - mb_pool_rx[i] = adapter->rx_ring[i].mb_pool; - - ena_rx_queue_release_all(eth_dev); - ena_tx_queue_release_all(eth_dev); - - rte_intr_disable(intr_handle); - - ena_com_abort_admin_commands(ena_dev); - ena_com_wait_for_abort_completion(ena_dev); - ena_com_admin_destroy(ena_dev); - ena_com_mmio_reg_read_request_destroy(ena_dev); - - rc = ena_device_init(ena_dev, &get_feat_ctx, &wd_state); - if (rc) { PMD_INIT_LOG(CRIT, "Cannot initialize device\n"); - return rc; - } - adapter->wd_state = wd_state; - - rte_intr_enable(intr_handle); - ena_com_set_admin_polling_mode(ena_dev, false); - ena_com_admin_aenq_enable(ena_dev); - - for (i = 0; i < nb_queues; ++i) - ena_rx_queue_setup(eth_dev, i, adapter->rx_ring_size, 0, NULL, - mb_pool_rx[i]); - - for (i = 0; i < nb_queues; ++i) - ena_tx_queue_setup(eth_dev, i, adapter->tx_ring_size, 0, NULL); - adapter->trigger_reset = false; - - return 0; + return rc; } static int ena_rss_reta_update(struct rte_eth_dev *dev, @@ -766,11 +722,6 @@ static void ena_rx_queue_release(void *queue) { struct ena_ring *ring = (struct ena_ring *)queue; - ena_assert_msg(ring->configured, - "API violation - releasing not configured queue"); - ena_assert_msg(ring->adapter->state != ENA_ADAPTER_STATE_RUNNING, - "API violation"); - /* Free ring resources */ if (ring->rx_buffer_info) rte_free(ring->rx_buffer_info); @@ -794,14 +745,6 @@ static void ena_tx_queue_release(void *queue) { struct ena_ring *ring = (struct ena_ring *)queue; - ena_assert_msg(ring->configured, - "API violation. Releasing not configured queue"); - ena_assert_msg(ring->adapter->state != ENA_ADAPTER_STATE_RUNNING, - "API violation"); - - /* Free all bufs */ - ena_tx_queue_release_bufs(ring); - /* Free ring resources */ if (ring->tx_buffer_info) rte_free(ring->tx_buffer_info); @@ -820,17 +763,13 @@ static void ena_tx_queue_release(void *queue) static void ena_rx_queue_release_bufs(struct ena_ring *ring) { - unsigned int ring_mask = ring->ring_size - 1; - - while (ring->next_to_clean != ring->next_to_use) { - struct rte_mbuf *m = - ring->rx_buffer_info[ring->next_to_clean & ring_mask]; - - if (m) - rte_mbuf_raw_free(m); + unsigned int i; - ring->next_to_clean++; - } + for (i = 0; i < ring->ring_size; ++i) + if (ring->rx_buffer_info[i]) { + rte_mbuf_raw_free(ring->rx_buffer_info[i]); + ring->rx_buffer_info[i] = NULL; + } } static void ena_tx_queue_release_bufs(struct ena_ring *ring) @@ -842,8 +781,6 @@ static void ena_tx_queue_release_bufs(struct ena_ring *ring) if (tx_buf->mbuf) rte_pktmbuf_free(tx_buf->mbuf); - - ring->next_to_clean++; } } @@ -862,8 +799,8 @@ static int ena_link_update(struct rte_eth_dev *dev, return 0; } -static int ena_queue_restart_all(struct rte_eth_dev *dev, - enum ena_ring_type ring_type) +static int ena_queue_start_all(struct rte_eth_dev *dev, + enum ena_ring_type ring_type) { struct ena_adapter *adapter = (struct ena_adapter *)(dev->data->dev_private); @@ -891,18 +828,25 @@ static int ena_queue_restart_all(struct rte_eth_dev *dev, "Inconsistent state of tx queues\n"); } - rc = ena_queue_restart(&queues[i]); + rc = ena_queue_start(&queues[i]); if (rc) { PMD_INIT_LOG(ERR, - "failed to restart queue %d type(%d)", + "failed to start queue %d type(%d)", i, ring_type); - return rc; + goto err; } } } return 0; + +err: + while (i--) + if (queues[i].configured) + ena_queue_stop(&queues[i]); + + return rc; } static uint32_t ena_get_mtu_conf(struct ena_adapter *adapter) @@ -1051,19 +995,19 @@ static int ena_start(struct rte_eth_dev *dev) if (rc) return rc; - rc = ena_queue_restart_all(dev, ENA_RING_TYPE_RX); + rc = ena_queue_start_all(dev, ENA_RING_TYPE_RX); if (rc) return rc; - rc = ena_queue_restart_all(dev, ENA_RING_TYPE_TX); + rc = ena_queue_start_all(dev, ENA_RING_TYPE_TX); if (rc) - return rc; + goto err_start_tx; if (adapter->rte_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG && adapter->rte_dev->data->nb_rx_queues > 0) { rc = ena_rss_init_default(adapter); if (rc) - return rc; + goto err_rss_init; } ena_stats_restart(dev); @@ -1078,15 +1022,30 @@ static int ena_start(struct rte_eth_dev *dev) adapter->state = ENA_ADAPTER_STATE_RUNNING; return 0; + +err_rss_init: + ena_queue_stop_all(dev, ENA_RING_TYPE_TX); +err_start_tx: + ena_queue_stop_all(dev, ENA_RING_TYPE_RX); + return rc; } static void ena_stop(struct rte_eth_dev *dev) { struct ena_adapter *adapter = (struct ena_adapter *)(dev->data->dev_private); + struct ena_com_dev *ena_dev = &adapter->ena_dev; + int rc; rte_timer_stop_sync(&adapter->timer_wd); - ena_free_io_queues_all(adapter); + ena_queue_stop_all(dev, ENA_RING_TYPE_TX); + ena_queue_stop_all(dev, ENA_RING_TYPE_RX); + + if (adapter->trigger_reset) { + rc = ena_com_dev_reset(ena_dev, adapter->reset_reason); + if (rc) + RTE_LOG(ERR, PMD, "Device reset failed rc=%d\n", rc); + } adapter->state = ENA_ADAPTER_STATE_STOPPED; } @@ -1149,36 +1108,46 @@ static int ena_create_io_queue(struct ena_ring *ring) return 0; } -static void ena_free_io_queues_all(struct ena_adapter *adapter) +static void ena_queue_stop(struct ena_ring *ring) { - struct rte_eth_dev *eth_dev = adapter->rte_dev; - struct ena_com_dev *ena_dev = &adapter->ena_dev; - int i; - uint16_t ena_qid; - uint16_t nb_rxq = eth_dev->data->nb_rx_queues; - uint16_t nb_txq = eth_dev->data->nb_tx_queues; + struct ena_com_dev *ena_dev = &ring->adapter->ena_dev; - for (i = 0; i < nb_txq; ++i) { - ena_qid = ENA_IO_TXQ_IDX(i); - ena_com_destroy_io_queue(ena_dev, ena_qid); - - ena_tx_queue_release_bufs(&adapter->tx_ring[i]); + if (ring->type == ENA_RING_TYPE_RX) { + ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(ring->id)); + ena_rx_queue_release_bufs(ring); + } else { + ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(ring->id)); + ena_tx_queue_release_bufs(ring); } +} - for (i = 0; i < nb_rxq; ++i) { - ena_qid = ENA_IO_RXQ_IDX(i); - ena_com_destroy_io_queue(ena_dev, ena_qid); +static void ena_queue_stop_all(struct rte_eth_dev *dev, + enum ena_ring_type ring_type) +{ + struct ena_adapter *adapter = + (struct ena_adapter *)(dev->data->dev_private); + struct ena_ring *queues = NULL; + uint16_t nb_queues, i; - ena_rx_queue_release_bufs(&adapter->rx_ring[i]); + if (ring_type == ENA_RING_TYPE_RX) { + queues = adapter->rx_ring; + nb_queues = dev->data->nb_rx_queues; + } else { + queues = adapter->tx_ring; + nb_queues = dev->data->nb_tx_queues; } + + for (i = 0; i < nb_queues; ++i) + if (queues[i].configured) + ena_queue_stop(&queues[i]); } -static int ena_queue_restart(struct ena_ring *ring) +static int ena_queue_start(struct ena_ring *ring) { int rc, bufs_num; ena_assert_msg(ring->configured == 1, - "Trying to restart unconfigured queue\n"); + "Trying to start unconfigured queue\n"); rc = ena_create_io_queue(ring); if (rc) { @@ -1195,6 +1164,8 @@ static int ena_queue_restart(struct ena_ring *ring) bufs_num = ring->ring_size - 1; rc = ena_populate_rx_queue(ring, bufs_num); if (rc != bufs_num) { + ena_com_destroy_io_queue(&ring->adapter->ena_dev, + ENA_IO_RXQ_IDX(ring->id)); PMD_INIT_LOG(ERR, "Failed to populate rx ring !"); return ENA_COM_FAULT; } @@ -1346,7 +1317,7 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev, } for (i = 0; i < nb_desc; i++) - rxq->empty_tx_reqs[i] = i; + rxq->empty_rx_reqs[i] = i; /* Store pointer to this queue in upper layer */ rxq->configured = 1; @@ -1603,19 +1574,20 @@ static int eth_ena_dev_init(struct rte_eth_dev *eth_dev) static int adapters_found; bool wd_state; - memset(adapter, 0, sizeof(struct ena_adapter)); - ena_dev = &adapter->ena_dev; - eth_dev->dev_ops = &ena_dev_ops; eth_dev->rx_pkt_burst = ð_ena_recv_pkts; eth_dev->tx_pkt_burst = ð_ena_xmit_pkts; eth_dev->tx_pkt_prepare = ð_ena_prep_pkts; - adapter->rte_eth_dev_data = eth_dev->data; - adapter->rte_dev = eth_dev; if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; + memset(adapter, 0, sizeof(struct ena_adapter)); + ena_dev = &adapter->ena_dev; + + adapter->rte_eth_dev_data = eth_dev->data; + adapter->rte_dev = eth_dev; + pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev); adapter->pdev = pci_dev; @@ -1726,24 +1698,43 @@ err: return rc; } -static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) +static void ena_destroy_device(struct rte_eth_dev *eth_dev) { struct ena_adapter *adapter = (struct ena_adapter *)(eth_dev->data->dev_private); + struct ena_com_dev *ena_dev = &adapter->ena_dev; - if (rte_eal_process_type() != RTE_PROC_PRIMARY) - return 0; + if (adapter->state == ENA_ADAPTER_STATE_FREE) + return; + + ena_com_set_admin_running_state(ena_dev, false); if (adapter->state != ENA_ADAPTER_STATE_CLOSED) ena_close(eth_dev); + ena_com_delete_debug_area(ena_dev); + ena_com_delete_host_info(ena_dev); + + ena_com_abort_admin_commands(ena_dev); + ena_com_wait_for_abort_completion(ena_dev); + ena_com_admin_destroy(ena_dev); + ena_com_mmio_reg_read_request_destroy(ena_dev); + + adapter->state = ENA_ADAPTER_STATE_FREE; +} + +static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev) +{ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + ena_destroy_device(eth_dev); + eth_dev->dev_ops = NULL; eth_dev->rx_pkt_burst = NULL; eth_dev->tx_pkt_burst = NULL; eth_dev->tx_pkt_prepare = NULL; - adapter->state = ENA_ADAPTER_STATE_FREE; - return 0; } @@ -1843,6 +1834,9 @@ static void ena_infos_get(struct rte_eth_dev *dev, dev_info->tx_offload_capa = tx_feat; dev_info->tx_queue_offload_capa = tx_feat; + dev_info->flow_type_rss_offloads = ETH_RSS_IP | ETH_RSS_TCP | + ETH_RSS_UDP; + dev_info->min_rx_bufsize = ENA_MIN_FRAME_LEN; dev_info->max_rx_pktlen = adapter->max_mtu; dev_info->max_mac_addrs = 1; @@ -1907,6 +1901,8 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, &ena_rx_ctx); if (unlikely(rc)) { RTE_LOG(ERR, PMD, "ena_com_rx_pkt error %d\n", rc); + rx_ring->adapter->reset_reason = + ENA_REGS_RESET_TOO_MANY_RX_DESCS; rx_ring->adapter->trigger_reset = true; return 0; } @@ -1917,10 +1913,14 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, while (segments < ena_rx_ctx.descs) { req_id = ena_rx_ctx.ena_bufs[segments].req_id; rc = validate_rx_req_id(rx_ring, req_id); - if (unlikely(rc)) + if (unlikely(rc)) { + if (segments != 0) + rte_mbuf_raw_free(mbuf_head); break; + } mbuf = rx_buff_info[req_id]; + rx_buff_info[req_id] = NULL; mbuf->data_len = ena_rx_ctx.ena_bufs[segments].len; mbuf->data_off = RTE_PKTMBUF_HEADROOM; mbuf->refcnt = 1; @@ -1942,6 +1942,8 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, segments++; next_to_clean++; } + if (unlikely(rc)) + break; /* fill mbuf attributes if any */ ena_rx_mbuf_prepare(mbuf_head, &ena_rx_ctx); @@ -1956,8 +1958,10 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, desc_in_use = desc_in_use - completed + 1; /* Burst refill to save doorbells, memory barriers, const interval */ - if (ring_size - desc_in_use > ENA_RING_DESCS_RATIO(ring_size)) + if (ring_size - desc_in_use > ENA_RING_DESCS_RATIO(ring_size)) { + ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); ena_populate_rx_queue(rx_ring, ring_size - desc_in_use); + } return recv_idx; } @@ -2004,14 +2008,14 @@ eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, if ((ol_flags & ENA_TX_OFFLOAD_NOTSUP_MASK) != 0 || (ol_flags & PKT_TX_L4_MASK) == PKT_TX_SCTP_CKSUM) { - rte_errno = -ENOTSUP; + rte_errno = ENOTSUP; return i; } #ifdef RTE_LIBRTE_ETHDEV_DEBUG ret = rte_validate_tx_offload(m); if (ret != 0) { - rte_errno = ret; + rte_errno = -ret; return i; } #endif @@ -2024,7 +2028,7 @@ eth_ena_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, ret = rte_net_intel_cksum_flags_prepare(m, ol_flags & ~PKT_TX_TCP_SEG); if (ret != 0) { - rte_errno = ret; + rte_errno = -ret; return i; } } @@ -2207,8 +2211,9 @@ static uint16_t eth_ena_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, if (total_tx_descs > 0) { /* acknowledge completion of sent packets */ - ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs); tx_ring->next_to_clean = next_to_clean; + ena_com_comp_ack(tx_ring->ena_com_io_sq, total_tx_descs); + ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); } return sent_idx; diff --git a/drivers/net/enic/enic_clsf.c b/drivers/net/enic/enic_clsf.c index 9d95201e..9e9e548c 100644 --- a/drivers/net/enic/enic_clsf.c +++ b/drivers/net/enic/enic_clsf.c @@ -3,8 +3,6 @@ * Copyright 2007 Nuova Systems, Inc. All rights reserved. */ -#include <libgen.h> - #include <rte_ethdev_driver.h> #include <rte_malloc.h> #include <rte_hash.h> diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c index c3869de3..406f92a8 100644 --- a/drivers/net/enic/enic_main.c +++ b/drivers/net/enic/enic_main.c @@ -8,7 +8,6 @@ #include <sys/stat.h> #include <sys/mman.h> #include <fcntl.h> -#include <libgen.h> #include <rte_pci.h> #include <rte_bus_pci.h> diff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c index 85fb6c5c..caf4d1bc 100644 --- a/drivers/net/fm10k/fm10k_ethdev.c +++ b/drivers/net/fm10k/fm10k_ethdev.c @@ -3003,6 +3003,7 @@ fm10k_params_init(struct rte_eth_dev *dev) hw->bus.payload = fm10k_bus_payload_256; info->rx_vec_allowed = true; + info->sm_down = false; } static int diff --git a/drivers/net/fm10k/fm10k_rxtx.c b/drivers/net/fm10k/fm10k_rxtx.c index 4a5b46ec..1d0f09d2 100644 --- a/drivers/net/fm10k/fm10k_rxtx.c +++ b/drivers/net/fm10k/fm10k_rxtx.c @@ -39,6 +39,8 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd) #define FM10K_TX_OFFLOAD_MASK ( \ PKT_TX_VLAN_PKT | \ + PKT_TX_IPV6 | \ + PKT_TX_IPV4 | \ PKT_TX_IP_CKSUM | \ PKT_TX_L4_MASK | \ PKT_TX_TCP_SEG) diff --git a/drivers/net/i40e/base/i40e_type.h b/drivers/net/i40e/base/i40e_type.h index 77562f24..7ba62cc1 100644 --- a/drivers/net/i40e/base/i40e_type.h +++ b/drivers/net/i40e/base/i40e_type.h @@ -670,6 +670,7 @@ struct i40e_hw { u8 revision_id; u8 port; bool adapter_stopped; + bool adapter_closed; /* capabilities for entire device and PCI func */ struct i40e_hw_capabilities dev_caps; diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c index 7030eb1f..dca61f03 100644 --- a/drivers/net/i40e/i40e_ethdev.c +++ b/drivers/net/i40e/i40e_ethdev.c @@ -1273,7 +1273,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused) struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_vsi *vsi; int ret; - uint32_t len; + uint32_t len, val; uint8_t aq_fail = 0; PMD_INIT_FUNC_TRACE(); @@ -1316,6 +1316,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused) hw->bus.device = pci_dev->addr.devid; hw->bus.func = pci_dev->addr.function; hw->adapter_stopped = 0; + hw->adapter_closed = 0; /* * Switch Tag value should not be identical to either the First Tag @@ -1324,6 +1325,15 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused) */ hw->switch_tag = 0xffff; + val = I40E_READ_REG(hw, I40E_GL_FWSTS); + if (val & I40E_GL_FWSTS_FWS1B_MASK) { + PMD_INIT_LOG(ERR, "\nERROR: " + "Firmware recovery mode detected. Limiting functionality.\n" + "Refer to the Intel(R) Ethernet Adapters and Devices " + "User Guide for details on firmware recovery mode."); + return -EIO; + } + /* Check if need to support multi-driver */ i40e_support_multi_driver(dev); /* Check if users want the latest supported vec path */ @@ -1483,9 +1493,6 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused) goto err_setup_pf_switch; } - /* reset all stats of the device, including pf and main vsi */ - i40e_dev_stats_reset(dev); - vsi = pf->main_vsi; /* Disable double vlan by default */ @@ -1580,6 +1587,9 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused) memset(&pf->rss_info, 0, sizeof(struct i40e_rte_flow_rss_conf)); + /* reset all stats of the device, including pf and main vsi */ + i40e_dev_stats_reset(dev); + return 0; err_init_fdir_filter_list: @@ -1704,7 +1714,7 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev) if (ret) PMD_INIT_LOG(WARNING, "failed to free switch domain: %d", ret); - if (hw->adapter_stopped == 0) + if (hw->adapter_closed == 0) i40e_dev_close(dev); dev->dev_ops = NULL; @@ -2444,6 +2454,8 @@ i40e_dev_stop(struct rte_eth_dev *dev) pf->tm_conf.committed = false; hw->adapter_stopped = 1; + + pf->adapter->rss_reta_updated = 0; } static void @@ -2523,6 +2535,8 @@ i40e_dev_close(struct rte_eth_dev *dev) I40E_WRITE_REG(hw, I40E_PFGEN_CTRL, (reg | I40E_PFGEN_CTRL_PFSWR_MASK)); I40E_WRITE_FLUSH(hw); + + hw->adapter_closed = 1; } /* @@ -3160,20 +3174,20 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_hw_port_stats *ns = &pf->stats; /* new stats */ + struct i40e_vsi *vsi; unsigned i; /* call read registers - updates values, now write them to struct */ i40e_read_stats_registers(pf, hw); - stats->ipackets = ns->eth.rx_unicast + - ns->eth.rx_multicast + - ns->eth.rx_broadcast - - ns->eth.rx_discards - + stats->ipackets = pf->main_vsi->eth_stats.rx_unicast + + pf->main_vsi->eth_stats.rx_multicast + + pf->main_vsi->eth_stats.rx_broadcast - pf->main_vsi->eth_stats.rx_discards; stats->opackets = ns->eth.tx_unicast + ns->eth.tx_multicast + ns->eth.tx_broadcast; - stats->ibytes = ns->eth.rx_bytes; + stats->ibytes = pf->main_vsi->eth_stats.rx_bytes; stats->obytes = ns->eth.tx_bytes; stats->oerrors = ns->eth.tx_errors + pf->main_vsi->eth_stats.tx_errors; @@ -3185,6 +3199,21 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) ns->rx_length_errors + ns->rx_undersize + ns->rx_oversize + ns->rx_fragments + ns->rx_jabber; + if (pf->vfs) { + for (i = 0; i < pf->vf_num; i++) { + vsi = pf->vfs[i].vsi; + i40e_update_vsi_stats(vsi); + + stats->ipackets += (vsi->eth_stats.rx_unicast + + vsi->eth_stats.rx_multicast + + vsi->eth_stats.rx_broadcast - + vsi->eth_stats.rx_discards); + stats->ibytes += vsi->eth_stats.rx_bytes; + stats->oerrors += vsi->eth_stats.tx_errors; + stats->imissed += vsi->eth_stats.rx_discards; + } + } + PMD_DRV_LOG(DEBUG, "***************** PF stats start *******************"); PMD_DRV_LOG(DEBUG, "rx_bytes: %"PRIu64"", ns->eth.rx_bytes); PMD_DRV_LOG(DEBUG, "rx_unicast: %"PRIu64"", ns->eth.rx_unicast); @@ -3431,6 +3460,31 @@ i40e_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) return 0; } +/* + * When using NVM 6.01(for X710 XL710 XXV710)/3.33(for X722) or later, + * the Rx data path does not hang if the FW LLDP is stopped. + * return true if lldp need to stop + * return false if we cannot disable the LLDP to avoid Rx data path blocking. + */ +static bool +i40e_need_stop_lldp(struct rte_eth_dev *dev) +{ + double nvm_ver; + char ver_str[64] = {0}; + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + i40e_fw_version_get(dev, ver_str, 64); + nvm_ver = atof(ver_str); + if ((hw->mac.type == I40E_MAC_X722 || + hw->mac.type == I40E_MAC_X722_VF) && + ((uint32_t)(nvm_ver * 1000) >= (uint32_t)(3.33 * 1000))) + return true; + else if ((uint32_t)(nvm_ver * 1000) >= (uint32_t)(6.01 * 1000)) + return true; + + return false; +} + static void i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { @@ -4154,7 +4208,8 @@ i40e_get_rss_lut(struct i40e_vsi *vsi, uint8_t *lut, uint16_t lut_size) return -EINVAL; if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) { - ret = i40e_aq_get_rss_lut(hw, vsi->vsi_id, TRUE, + ret = i40e_aq_get_rss_lut(hw, vsi->vsi_id, + vsi->type != I40E_VSI_SRIOV, lut, lut_size); if (ret) { PMD_DRV_LOG(ERR, "Failed to get RSS lookup table"); @@ -4193,7 +4248,8 @@ i40e_set_rss_lut(struct i40e_vsi *vsi, uint8_t *lut, uint16_t lut_size) hw = I40E_VSI_TO_HW(vsi); if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) { - ret = i40e_aq_set_rss_lut(hw, vsi->vsi_id, TRUE, + ret = i40e_aq_set_rss_lut(hw, vsi->vsi_id, + vsi->type != I40E_VSI_SRIOV, lut, lut_size); if (ret) { PMD_DRV_LOG(ERR, "Failed to set RSS lookup table"); @@ -4255,6 +4311,8 @@ i40e_dev_rss_reta_update(struct rte_eth_dev *dev, } ret = i40e_set_rss_lut(pf->main_vsi, lut, reta_size); + pf->adapter->rss_reta_updated = 1; + out: rte_free(lut); @@ -7376,7 +7434,7 @@ i40e_get_rss_key(struct i40e_vsi *vsi, uint8_t *key, uint8_t *key_len) int ret; if (!key || !key_len) - return -EINVAL; + return 0; if (pf->flags & I40E_FLAG_RSS_AQ_CAPABLE) { ret = i40e_aq_get_rss_key(hw, vsi->vsi_id, @@ -7459,9 +7517,15 @@ i40e_dev_rss_hash_conf_get(struct rte_eth_dev *dev, struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint64_t hena; + int ret; + + if (!rss_conf) + return -EINVAL; - i40e_get_rss_key(pf->main_vsi, rss_conf->rss_key, + ret = i40e_get_rss_key(pf->main_vsi, rss_conf->rss_key, &rss_conf->rss_key_len); + if (ret) + return ret; hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)); hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32; @@ -8489,13 +8553,16 @@ i40e_pf_config_rss(struct i40e_pf *pf) return -ENOTSUP; } - for (i = 0, j = 0; i < hw->func_caps.rss_table_size; i++, j++) { - if (j == num) - j = 0; - lut = (lut << 8) | (j & ((0x1 << - hw->func_caps.rss_table_entry_width) - 1)); - if ((i & 3) == 3) - I40E_WRITE_REG(hw, I40E_PFQF_HLUT(i >> 2), lut); + if (pf->adapter->rss_reta_updated == 0) { + for (i = 0, j = 0; i < hw->func_caps.rss_table_size; i++, j++) { + if (j == num) + j = 0; + lut = (lut << 8) | (j & ((0x1 << + hw->func_caps.rss_table_entry_width) - 1)); + if ((i & 3) == 3) + I40E_WRITE_REG(hw, I40E_PFQF_HLUT(i >> 2), + rte_bswap32(lut)); + } } rss_conf = pf->dev_data->dev_conf.rx_adv_conf.rss_conf; @@ -11385,11 +11452,7 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb) * LLDP MIB change event. */ if (sw_dcb == TRUE) { - /* When using NVM 6.01 or later, the RX data path does - * not hang if the FW LLDP is stopped. - */ - if (((hw->nvm.version >> 12) & 0xf) >= 6 && - ((hw->nvm.version >> 4) & 0xff) >= 1) { + if (i40e_need_stop_lldp(dev)) { ret = i40e_aq_stop_lldp(hw, TRUE, NULL); if (ret != I40E_SUCCESS) PMD_INIT_LOG(DEBUG, "Failed to stop lldp"); diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h index 11ecfc30..930eb9ab 100644 --- a/drivers/net/i40e/i40e_ethdev.h +++ b/drivers/net/i40e/i40e_ethdev.h @@ -1081,6 +1081,9 @@ struct i40e_adapter { /* For devargs */ uint8_t use_latest_vec; + + /* For RSS reta table update */ + uint8_t rss_reta_updated; }; /** diff --git a/drivers/net/i40e/i40e_ethdev_vf.c b/drivers/net/i40e/i40e_ethdev_vf.c index ae55b9b1..100e71cc 100644 --- a/drivers/net/i40e/i40e_ethdev_vf.c +++ b/drivers/net/i40e/i40e_ethdev_vf.c @@ -1080,9 +1080,11 @@ i40evf_enable_irq0(struct i40e_hw *hw) } static int -i40evf_check_vf_reset_done(struct i40e_hw *hw) +i40evf_check_vf_reset_done(struct rte_eth_dev *dev) { int i, reset; + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); for (i = 0; i < MAX_RESET_WAIT_CNT; i++) { reset = I40E_READ_REG(hw, I40E_VFGEN_RSTAT) & @@ -1097,12 +1099,16 @@ i40evf_check_vf_reset_done(struct i40e_hw *hw) if (i >= MAX_RESET_WAIT_CNT) return -1; + vf->vf_reset = false; + vf->pend_msg &= ~PFMSG_RESET_IMPENDING; + return 0; } static int -i40evf_reset_vf(struct i40e_hw *hw) +i40evf_reset_vf(struct rte_eth_dev *dev) { int ret; + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); if (i40e_vf_reset(hw) != I40E_SUCCESS) { PMD_INIT_LOG(ERR, "Reset VF NIC failed"); @@ -1119,7 +1125,7 @@ i40evf_reset_vf(struct i40e_hw *hw) */ rte_delay_ms(200); - ret = i40evf_check_vf_reset_done(hw); + ret = i40evf_check_vf_reset_done(dev); if (ret) { PMD_INIT_LOG(ERR, "VF is still resetting"); return ret; @@ -1145,7 +1151,7 @@ i40evf_init_vf(struct rte_eth_dev *dev) goto err; } - err = i40evf_check_vf_reset_done(hw); + err = i40evf_check_vf_reset_done(dev); if (err) goto err; @@ -1157,7 +1163,7 @@ i40evf_init_vf(struct rte_eth_dev *dev) } /* Reset VF and wait until it's complete */ - if (i40evf_reset_vf(hw)) { + if (i40evf_reset_vf(dev)) { PMD_INIT_LOG(ERR, "reset NIC failed"); goto err_aq; } @@ -1256,7 +1262,7 @@ i40evf_uninit_vf(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); - if (hw->adapter_stopped == 0) + if (hw->adapter_closed == 0) i40evf_dev_close(dev); rte_free(vf->vf_res); vf->vf_res = NULL; @@ -1438,6 +1444,7 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev) hw->bus.func = pci_dev->addr.function; hw->hw_addr = (void *)pci_dev->mem_resource[0].addr; hw->adapter_stopped = 0; + hw->adapter_closed = 0; if(i40evf_init_vf(eth_dev) != 0) { PMD_INIT_LOG(ERR, "Init vf failed"); @@ -2256,10 +2263,11 @@ i40evf_dev_close(struct rte_eth_dev *dev) i40evf_dev_promiscuous_disable(dev); i40evf_dev_allmulticast_disable(dev); - i40evf_reset_vf(hw); + i40evf_reset_vf(dev); i40e_shutdown_adminq(hw); i40evf_disable_irq0(hw); rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev); + hw->adapter_closed = 1; } /* diff --git a/drivers/net/i40e/i40e_pf.c b/drivers/net/i40e/i40e_pf.c index dd3962d3..1e2d1746 100644 --- a/drivers/net/i40e/i40e_pf.c +++ b/drivers/net/i40e/i40e_pf.c @@ -1232,6 +1232,7 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, uint16_t vf_id = abs_vf_id - hw->func_caps.vf_base_id; struct rte_pmd_i40e_mb_event_param ret_param; bool b_op = TRUE; + int ret; if (vf_id > pf->vf_num - 1 || !pf->vfs) { PMD_DRV_LOG(ERR, "invalid argument"); @@ -1246,6 +1247,30 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, return; } + /* perform basic checks on the msg */ + ret = virtchnl_vc_validate_vf_msg(&vf->version, opcode, msg, msglen); + + /* perform additional checks specific to this driver */ + if (opcode == VIRTCHNL_OP_CONFIG_RSS_KEY) { + struct virtchnl_rss_key *vrk = (struct virtchnl_rss_key *)msg; + + if (vrk->key_len != ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4)) + ret = VIRTCHNL_ERR_PARAM; + } else if (opcode == VIRTCHNL_OP_CONFIG_RSS_LUT) { + struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg; + + if (vrl->lut_entries != ((I40E_VFQF_HLUT1_MAX_INDEX + 1) * 4)) + ret = VIRTCHNL_ERR_PARAM; + } + + if (ret) { + PMD_DRV_LOG(ERR, "Invalid message from VF %u, opcode %u, len %u", + vf_id, opcode, msglen); + i40e_pf_host_send_msg_to_vf(vf, opcode, + I40E_ERR_PARAM, NULL, 0); + return; + } + /** * initialise structure to send to user application * will return response from user in retval field diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index e1152ff0..8f727fae 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -69,7 +69,7 @@ I40E_TX_IEEE1588_TMST) #define I40E_TX_OFFLOAD_NOTSUP_MASK \ - ~(PKT_TX_OFFLOAD_MASK & I40E_TX_OFFLOAD_MASK) + (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK) static inline void i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp) @@ -2753,7 +2753,6 @@ i40e_dev_free_queues(struct rte_eth_dev *dev) i40e_dev_rx_queue_release(dev->data->rx_queues[i]); dev->data->rx_queues[i] = NULL; } - dev->data->nb_rx_queues = 0; for (i = 0; i < dev->data->nb_tx_queues; i++) { if (!dev->data->tx_queues[i]) @@ -2761,7 +2760,6 @@ i40e_dev_free_queues(struct rte_eth_dev *dev) i40e_dev_tx_queue_release(dev->data->tx_queues[i]); dev->data->tx_queues[i] = NULL; } - dev->data->nb_tx_queues = 0; } #define I40E_FDIR_NUM_TX_DESC I40E_MIN_RING_DESC @@ -3184,7 +3182,7 @@ i40e_set_default_pctype_table(struct rte_eth_dev *dev) } } -/* Stubs needed for linkage when CONFIG_RTE_I40E_INC_VECTOR is set to 'n' */ +/* Stubs needed for linkage when CONFIG_RTE_LIBRTE_I40E_INC_VECTOR is set to 'n' */ __rte_weak int i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev) { diff --git a/drivers/net/i40e/rte_pmd_i40e.c b/drivers/net/i40e/rte_pmd_i40e.c index 7ce5d02f..c49c872b 100644 --- a/drivers/net/i40e/rte_pmd_i40e.c +++ b/drivers/net/i40e/rte_pmd_i40e.c @@ -2818,13 +2818,23 @@ i40e_queue_region_dcb_configure(struct i40e_hw *hw, struct i40e_dcbx_config *old_cfg = &hw->local_dcbx_config; int32_t ret = -EINVAL; uint16_t i, j, prio_index, region_index; - uint8_t tc_map, tc_bw, bw_lf; + uint8_t tc_map, tc_bw, bw_lf, dcb_flag = 0; if (!info->queue_region_number) { PMD_DRV_LOG(ERR, "No queue region been set before"); return ret; } + for (i = 0; i < info->queue_region_number; i++) { + if (info->region[i].user_priority_num) { + dcb_flag = 1; + break; + } + } + + if (dcb_flag == 0) + return 0; + dcb_cfg = &dcb_cfg_local; memset(dcb_cfg, 0, sizeof(struct i40e_dcbx_config)); diff --git a/drivers/net/ifc/base/ifcvf.h b/drivers/net/ifc/base/ifcvf.h index f026c70a..c15c6910 100644 --- a/drivers/net/ifc/base/ifcvf.h +++ b/drivers/net/ifc/base/ifcvf.h @@ -121,7 +121,7 @@ struct ifcvf_hw { u8 notify_region; u32 notify_off_multiplier; struct ifcvf_pci_common_cfg *common_cfg; - struct ifcvf_net_device_config *dev_cfg; + struct ifcvf_net_config *dev_cfg; u8 *isr; u16 *notify_base; u16 *notify_addr[IFCVF_MAX_QUEUES * 2]; diff --git a/drivers/net/ifc/ifcvf_vdpa.c b/drivers/net/ifc/ifcvf_vdpa.c index 97a57f18..698d14f8 100644 --- a/drivers/net/ifc/ifcvf_vdpa.c +++ b/drivers/net/ifc/ifcvf_vdpa.c @@ -773,15 +773,15 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, internal->dev_addr.type = PCI_ADDR; list->internal = internal; - pthread_mutex_lock(&internal_list_lock); - TAILQ_INSERT_TAIL(&internal_list, list, next); - pthread_mutex_unlock(&internal_list_lock); - internal->did = rte_vdpa_register_device(&internal->dev_addr, &ifcvf_ops); if (internal->did < 0) goto error; + pthread_mutex_lock(&internal_list_lock); + TAILQ_INSERT_TAIL(&internal_list, list, next); + pthread_mutex_unlock(&internal_list_lock); + rte_atomic32_set(&internal->started, 1); update_datapath(internal); diff --git a/drivers/net/ixgbe/base/ixgbe_82599.c b/drivers/net/ixgbe/base/ixgbe_82599.c index 7de753fd..96bdde62 100644 --- a/drivers/net/ixgbe/base/ixgbe_82599.c +++ b/drivers/net/ixgbe/base/ixgbe_82599.c @@ -392,6 +392,8 @@ s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, /* Check if 1G SFP module. */ if (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core1 || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 || hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || diff --git a/drivers/net/ixgbe/base/ixgbe_phy.c b/drivers/net/ixgbe/base/ixgbe_phy.c index 6cdd8fba..dd118f91 100644 --- a/drivers/net/ixgbe/base/ixgbe_phy.c +++ b/drivers/net/ixgbe/base/ixgbe_phy.c @@ -1402,6 +1402,13 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) else hw->phy.sfp_type = ixgbe_sfp_type_1g_lx_core1; + } else if (comp_codes_1g & IXGBE_SFF_1GBASELHA_CAPABLE) { + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_1g_lha_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_1g_lha_core1; } else { hw->phy.sfp_type = ixgbe_sfp_type_unknown; } @@ -1489,6 +1496,8 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) if (comp_codes_10g == 0 && !(hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 || hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core1 || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 || hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || @@ -1508,6 +1517,8 @@ s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) if (!(enforce_sfp & IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP) && !(hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core1 || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1 || hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || @@ -1835,11 +1846,13 @@ s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, */ if (sfp_type == ixgbe_sfp_type_da_act_lmt_core0 || sfp_type == ixgbe_sfp_type_1g_lx_core0 || + sfp_type == ixgbe_sfp_type_1g_lha_core0 || sfp_type == ixgbe_sfp_type_1g_cu_core0 || sfp_type == ixgbe_sfp_type_1g_sx_core0) sfp_type = ixgbe_sfp_type_srlr_core0; else if (sfp_type == ixgbe_sfp_type_da_act_lmt_core1 || sfp_type == ixgbe_sfp_type_1g_lx_core1 || + sfp_type == ixgbe_sfp_type_1g_lha_core1 || sfp_type == ixgbe_sfp_type_1g_cu_core1 || sfp_type == ixgbe_sfp_type_1g_sx_core1) sfp_type = ixgbe_sfp_type_srlr_core1; diff --git a/drivers/net/ixgbe/base/ixgbe_phy.h b/drivers/net/ixgbe/base/ixgbe_phy.h index 132fa542..f1605f2c 100644 --- a/drivers/net/ixgbe/base/ixgbe_phy.h +++ b/drivers/net/ixgbe/base/ixgbe_phy.h @@ -41,6 +41,7 @@ #define IXGBE_SFF_1GBASESX_CAPABLE 0x1 #define IXGBE_SFF_1GBASELX_CAPABLE 0x2 #define IXGBE_SFF_1GBASET_CAPABLE 0x8 +#define IXGBE_SFF_1GBASELHA_CAPABLE 0x10 #define IXGBE_SFF_10GBASESR_CAPABLE 0x10 #define IXGBE_SFF_10GBASELR_CAPABLE 0x20 #define IXGBE_SFF_SOFT_RS_SELECT_MASK 0x8 diff --git a/drivers/net/ixgbe/base/ixgbe_type.h b/drivers/net/ixgbe/base/ixgbe_type.h index cee6ba2e..077b8f01 100644 --- a/drivers/net/ixgbe/base/ixgbe_type.h +++ b/drivers/net/ixgbe/base/ixgbe_type.h @@ -3724,6 +3724,8 @@ enum ixgbe_sfp_type { ixgbe_sfp_type_1g_sx_core1 = 12, ixgbe_sfp_type_1g_lx_core0 = 13, ixgbe_sfp_type_1g_lx_core1 = 14, + ixgbe_sfp_type_1g_lha_core0 = 15, + ixgbe_sfp_type_1g_lha_core1 = 16, ixgbe_sfp_type_not_present = 0xFFFE, ixgbe_sfp_type_unknown = 0xFFFF }; diff --git a/drivers/net/ixgbe/base/ixgbe_x550.c b/drivers/net/ixgbe/base/ixgbe_x550.c index f7b98af5..a920a146 100644 --- a/drivers/net/ixgbe/base/ixgbe_x550.c +++ b/drivers/net/ixgbe/base/ixgbe_x550.c @@ -1534,6 +1534,8 @@ STATIC s32 ixgbe_supported_sfp_modules_X550em(struct ixgbe_hw *hw, bool *linear) case ixgbe_sfp_type_1g_sx_core1: case ixgbe_sfp_type_1g_lx_core0: case ixgbe_sfp_type_1g_lx_core1: + case ixgbe_sfp_type_1g_lha_core0: + case ixgbe_sfp_type_1g_lha_core1: *linear = false; break; case ixgbe_sfp_type_unknown: @@ -1874,6 +1876,8 @@ s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, /* Check if 1G SFP module. */ if (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1 + || hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_lha_core1 || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core0 || hw->phy.sfp_type == ixgbe_sfp_type_1g_lx_core1) { *speed = IXGBE_LINK_SPEED_1GB_FULL; diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 91ba6201..e9533e5a 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -1336,6 +1336,9 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev) rte_delay_ms(100); } while (retries++ < (10 + IXGBE_LINK_UP_TIME)); + /* cancel the delay handler before remove dev */ + rte_eal_alarm_cancel(ixgbe_dev_interrupt_delayed_handler, eth_dev); + /* uninitialize PF if max_vfs not zero */ ixgbe_pf_host_uninit(eth_dev); @@ -2790,6 +2793,8 @@ static void ixgbe_dev_stop(struct rte_eth_dev *dev) { struct rte_eth_link link; + struct ixgbe_adapter *adapter = + (struct ixgbe_adapter *)dev->data->dev_private; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_vf_info *vfinfo = @@ -2850,6 +2855,8 @@ ixgbe_dev_stop(struct rte_eth_dev *dev) /* reset hierarchy commit */ tm_conf->committed = false; + + adapter->rss_reta_updated = 0; } /* @@ -4779,6 +4786,8 @@ ixgbe_dev_rss_reta_update(struct rte_eth_dev *dev, uint8_t j, mask; uint32_t reta, r; uint16_t idx, shift; + struct ixgbe_adapter *adapter = + (struct ixgbe_adapter *)dev->data->dev_private; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t reta_reg; @@ -4820,6 +4829,7 @@ ixgbe_dev_rss_reta_update(struct rte_eth_dev *dev, } IXGBE_WRITE_REG(hw, reta_reg, reta); } + adapter->rss_reta_updated = 1; return 0; } @@ -5143,6 +5153,8 @@ static void ixgbevf_dev_stop(struct rte_eth_dev *dev) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_adapter *adapter = + (struct ixgbe_adapter *)dev->data->dev_private; struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev); struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; @@ -5172,6 +5184,8 @@ ixgbevf_dev_stop(struct rte_eth_dev *dev) rte_free(intr_handle->intr_vec); intr_handle->intr_vec = NULL; } + + adapter->rss_reta_updated = 0; } static void diff --git a/drivers/net/ixgbe/ixgbe_ethdev.h b/drivers/net/ixgbe/ixgbe_ethdev.h index d0b93968..565c69c9 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.h +++ b/drivers/net/ixgbe/ixgbe_ethdev.h @@ -490,6 +490,9 @@ struct ixgbe_adapter { struct rte_timecounter rx_tstamp_tc; struct rte_timecounter tx_tstamp_tc; struct ixgbe_tm_conf tm_conf; + + /* For RSS reta table update */ + uint8_t rss_reta_updated; }; struct ixgbe_vf_representor { diff --git a/drivers/net/ixgbe/ixgbe_pf.c b/drivers/net/ixgbe/ixgbe_pf.c index 4b833ffa..be0c0768 100644 --- a/drivers/net/ixgbe/ixgbe_pf.c +++ b/drivers/net/ixgbe/ixgbe_pf.c @@ -351,7 +351,7 @@ ixgbe_vf_reset_event(struct rte_eth_dev *dev, uint16_t vf) int rar_entry = hw->mac.num_rar_entries - (vf + 1); uint32_t vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf)); - vmolr |= (IXGBE_VMOLR_ROPE | IXGBE_VMOLR_ROMPE | + vmolr |= (IXGBE_VMOLR_ROPE | IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE); IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr); @@ -503,6 +503,7 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf) const uint32_t IXGBE_MTA_BIT_MASK = (0x1 << IXGBE_MTA_BIT_SHIFT) - 1; uint32_t reg_val; int i; + u32 vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf)); /* Disable multicast promiscuous first */ ixgbe_disable_vf_mc_promisc(dev, vf); @@ -516,6 +517,12 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf) vfinfo->vf_mc_hashes[i] = hash_list[i]; } + if (nb_entries == 0) { + vmolr &= ~IXGBE_VMOLR_ROMPE; + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr); + return 0; + } + for (i = 0; i < vfinfo->num_vf_mc_hashes; i++) { mta_idx = (vfinfo->vf_mc_hashes[i] >> IXGBE_MTA_BIT_SHIFT) & IXGBE_MTA_INDEX_MASK; @@ -525,6 +532,9 @@ ixgbe_vf_set_multicast(struct rte_eth_dev *dev, uint32_t vf, uint32_t *msgbuf) IXGBE_WRITE_REG(hw, IXGBE_MTA(mta_idx), reg_val); } + vmolr |= IXGBE_VMOLR_ROMPE; + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr); + return 0; } diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c index ddc7efa8..9a79d18e 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/ixgbe/ixgbe_rxtx.c @@ -2867,7 +2867,8 @@ ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev) * mode. */ if ((hw->mac.type == ixgbe_mac_82599EB || - hw->mac.type == ixgbe_mac_X540) && + hw->mac.type == ixgbe_mac_X540 || + hw->mac.type == ixgbe_mac_X550) && !RTE_ETH_DEV_SRIOV(dev).active) offloads |= DEV_RX_OFFLOAD_TCP_LRO; @@ -3417,6 +3418,7 @@ static void ixgbe_rss_configure(struct rte_eth_dev *dev) { struct rte_eth_rss_conf rss_conf; + struct ixgbe_adapter *adapter; struct ixgbe_hw *hw; uint32_t reta; uint16_t i; @@ -3425,6 +3427,7 @@ ixgbe_rss_configure(struct rte_eth_dev *dev) uint32_t reta_reg; PMD_INIT_FUNC_TRACE(); + adapter = (struct ixgbe_adapter *)dev->data->dev_private; hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); sp_reta_size = ixgbe_reta_size_get(hw->mac.type); @@ -3434,16 +3437,18 @@ ixgbe_rss_configure(struct rte_eth_dev *dev) * The byte-swap is needed because NIC registers are in * little-endian order. */ - reta = 0; - for (i = 0, j = 0; i < sp_reta_size; i++, j++) { - reta_reg = ixgbe_reta_reg_get(hw->mac.type, i); - - if (j == dev->data->nb_rx_queues) - j = 0; - reta = (reta << 8) | j; - if ((i & 3) == 3) - IXGBE_WRITE_REG(hw, reta_reg, - rte_bswap32(reta)); + if (adapter->rss_reta_updated == 0) { + reta = 0; + for (i = 0, j = 0; i < sp_reta_size; i++, j++) { + reta_reg = ixgbe_reta_reg_get(hw->mac.type, i); + + if (j == dev->data->nb_rx_queues) + j = 0; + reta = (reta << 8) | j; + if ((i & 3) == 3) + IXGBE_WRITE_REG(hw, reta_reg, + rte_bswap32(reta)); + } } /* diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile index 895cdfee..63dfa45b 100644 --- a/drivers/net/mlx5/Makefile +++ b/drivers/net/mlx5/Makefile @@ -118,6 +118,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh enum IBV_FLOW_SPEC_MPLS \ $(AUTOCONF_OUTPUT) $Q sh -- '$<' '$@' \ + HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING \ + infiniband/verbs.h \ + enum IBV_WQ_FLAGS_PCI_WRITE_END_PADDING \ + $(AUTOCONF_OUTPUT) + $Q sh -- '$<' '$@' \ HAVE_IBV_WQ_FLAG_RX_END_PADDING \ infiniband/verbs.h \ enum IBV_WQ_FLAG_RX_END_PADDING \ diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build index 28938db0..de0c32bc 100644 --- a/drivers/net/mlx5/meson.build +++ b/drivers/net/mlx5/meson.build @@ -102,6 +102,8 @@ if build 'mlx5dv_create_flow_action_packet_reformat' ], [ 'HAVE_IBV_DEVICE_MPLS_SUPPORT', 'infiniband/verbs.h', 'IBV_FLOW_SPEC_MPLS' ], + [ 'HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING', 'infiniband/verbs.h', + 'IBV_WQ_FLAGS_PCI_WRITE_END_PADDING' ], [ 'HAVE_IBV_WQ_FLAG_RX_END_PADDING', 'infiniband/verbs.h', 'IBV_WQ_FLAG_RX_END_PADDING' ], [ 'HAVE_SUPPORTED_40000baseKR4_Full', 'linux/ethtool.h', diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c index 9e5cab16..e7668bd5 100644 --- a/drivers/net/mlx5/mlx5.c +++ b/drivers/net/mlx5/mlx5.c @@ -54,6 +54,9 @@ /* Device parameter to enable RX completion entry padding to 128B. */ #define MLX5_RXQ_CQE_PAD_EN "rxq_cqe_pad_en" +/* Device parameter to enable padding Rx packet to cacheline size. */ +#define MLX5_RXQ_PKT_PAD_EN "rxq_pkt_pad_en" + /* Device parameter to enable Multi-Packet Rx queue. */ #define MLX5_RX_MPRQ_EN "mprq_en" @@ -486,6 +489,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque) config->cqe_comp = !!tmp; } else if (strcmp(MLX5_RXQ_CQE_PAD_EN, key) == 0) { config->cqe_pad = !!tmp; + } else if (strcmp(MLX5_RXQ_PKT_PAD_EN, key) == 0) { + config->hw_padding = !!tmp; } else if (strcmp(MLX5_RX_MPRQ_EN, key) == 0) { config->mprq.enabled = !!tmp; } else if (strcmp(MLX5_RX_MPRQ_LOG_STRIDE_NUM, key) == 0) { @@ -541,6 +546,7 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs) const char **params = (const char *[]){ MLX5_RXQ_CQE_COMP_EN, MLX5_RXQ_CQE_PAD_EN, + MLX5_RXQ_PKT_PAD_EN, MLX5_RX_MPRQ_EN, MLX5_RX_MPRQ_LOG_STRIDE_NUM, MLX5_RX_MPRQ_MAX_MEMCPY_LEN, @@ -735,6 +741,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, struct rte_eth_dev *eth_dev = NULL; struct priv *priv = NULL; int err = 0; + unsigned int hw_padding = 0; unsigned int mps; unsigned int cqe_comp; unsigned int cqe_pad = 0; @@ -1053,11 +1060,18 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev, IBV_RAW_PACKET_CAP_SCATTER_FCS); DRV_LOG(DEBUG, "FCS stripping configuration is %ssupported", (config.hw_fcs_strip ? "" : "not ")); -#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING - config.hw_padding = !!attr.rx_pad_end_addr_align; +#if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING) + hw_padding = !!attr.rx_pad_end_addr_align; +#elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING) + hw_padding = !!(attr.device_cap_flags_ex & + IBV_DEVICE_PCI_WRITE_END_PADDING); #endif - DRV_LOG(DEBUG, "hardware Rx end alignment padding is %ssupported", - (config.hw_padding ? "" : "not ")); + if (config.hw_padding && !hw_padding) { + DRV_LOG(DEBUG, "Rx end alignment padding isn't supported"); + config.hw_padding = 0; + } else if (config.hw_padding) { + DRV_LOG(DEBUG, "Rx end alignment padding is enabled"); + } config.tso = (attr.tso_caps.max_tso > 0 && (attr.tso_caps.supported_qpts & (1 << IBV_QPT_RAW_PACKET))); @@ -1434,6 +1448,7 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, qsort(list, n, sizeof(*list), mlx5_dev_spawn_data_cmp); /* Default configuration. */ dev_config = (struct mlx5_dev_config){ + .hw_padding = 0, .mps = MLX5_ARG_UNSET, .tx_vec_en = 1, .rx_vec_en = 1, diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c index 97dc3e1f..ee129b97 100644 --- a/drivers/net/mlx5/mlx5_flow.c +++ b/drivers/net/mlx5/mlx5_flow.c @@ -2314,7 +2314,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev, struct rte_flow_error error; unsigned int i; - if (!priv->reta_idx_n) { + if (!priv->reta_idx_n || !priv->rxqs_n) { rte_errno = EINVAL; return -rte_errno; } diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c index fb284c35..96b9dd72 100644 --- a/drivers/net/mlx5/mlx5_flow_tcf.c +++ b/drivers/net/mlx5/mlx5_flow_tcf.c @@ -28,6 +28,7 @@ #include <rte_flow.h> #include <rte_malloc.h> #include <rte_common.h> +#include <rte_cycles.h> #include "mlx5.h" #include "mlx5_flow.h" @@ -320,6 +321,11 @@ struct tc_tunnel_key { #define MLX5_VXLAN_PORT_MIN 30000 #define MLX5_VXLAN_PORT_MAX 60000 #define MLX5_VXLAN_DEVICE_PFX "vmlx_" +/** + * Timeout in milliseconds to wait VXLAN UDP offloaded port + * registration completed within the mlx5 driver. + */ +#define MLX5_VXLAN_WAIT_PORT_REG_MS 250 /** Tunnel action type, used for @p type in header structure. */ enum flow_tcf_tunact_type { @@ -403,7 +409,8 @@ struct tcf_vtep { unsigned int ifindex; /**< Own interface index. */ unsigned int ifouter; /**< Index of device attached to. */ uint16_t port; - uint8_t created; + uint32_t created:1; /**< Actually created by PMD. */ + uint32_t waitreg:1; /**< Wait for VXLAN UDP port registration. */ }; /** Tunnel descriptor header, common for all tunnel types. */ @@ -436,7 +443,7 @@ struct flow_tcf_vxlan_encap { uint8_t src[IPV6_ADDR_LEN]; } ipv6; }; -struct { + struct { rte_be16_t src; rte_be16_t dst; } udp; @@ -463,7 +470,9 @@ static const union { struct rte_flow_item_tcp tcp; struct rte_flow_item_udp udp; struct rte_flow_item_vxlan vxlan; -} flow_tcf_mask_empty; +} flow_tcf_mask_empty = { + {0}, +}; /** Supported masks for known item types. */ static const struct { @@ -1279,7 +1288,7 @@ flow_tcf_validate_vxlan_encap_ipv4(const struct rte_flow_item *item, * Pointer to the error structure. * * @return - * 0 on success, a negative errno value otherwise and rte_ernno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. **/ static int flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item, @@ -1365,7 +1374,7 @@ flow_tcf_validate_vxlan_encap_ipv6(const struct rte_flow_item *item, * Pointer to the error structure. * * @return - * 0 on success, a negative errno value otherwise and rte_ernno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. **/ static int flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item *item, @@ -1433,7 +1442,7 @@ flow_tcf_validate_vxlan_encap_udp(const struct rte_flow_item *item, * Pointer to the error structure. * * @return - * 0 on success, a negative errno value otherwise and rte_ernno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. **/ static int flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item *item, @@ -1481,7 +1490,7 @@ flow_tcf_validate_vxlan_encap_vni(const struct rte_flow_item *item, * Pointer to the error structure. * * @return - * 0 on success, a negative errno value otherwise and rte_ernno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. **/ static int flow_tcf_validate_vxlan_encap(const struct rte_flow_action *action, @@ -1584,141 +1593,8 @@ flow_tcf_validate_vxlan_encap(const struct rte_flow_action *action, } /** - * Validate RTE_FLOW_ITEM_TYPE_IPV4 item if VXLAN_DECAP action - * is present in actions list. - * - * @param[in] ipv4 - * Outer IPv4 address item (if any, NULL otherwise). - * @param[out] error - * Pointer to the error structure. - * - * @return - * 0 on success, a negative errno value otherwise and rte_ernno is set. - **/ -static int -flow_tcf_validate_vxlan_decap_ipv4(const struct rte_flow_item *ipv4, - struct rte_flow_error *error) -{ - const struct rte_flow_item_ipv4 *spec = ipv4->spec; - const struct rte_flow_item_ipv4 *mask = ipv4->mask; - - if (!spec) { - /* - * Specification for IP addresses cannot be empty - * because it is required as decap parameter. - */ - return rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ITEM, ipv4, - "NULL outer ipv4 address" - " specification for vxlan" - " for vxlan decapsulation"); - } - if (!mask) - mask = &rte_flow_item_ipv4_mask; - if (mask->hdr.dst_addr != RTE_BE32(0x00000000)) { - if (mask->hdr.dst_addr != RTE_BE32(0xffffffff)) - return rte_flow_error_set - (error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, - "no support for partial mask on" - " \"ipv4.hdr.dst_addr\" field"); - /* More IP address validations can be put here. */ - } else { - /* - * Kernel uses the destination IP address - * to determine the ingress network interface - * for traffic being decapsulated. - */ - return rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ITEM, ipv4, - "outer ipv4 destination address" - " must be specified for" - " vxlan decapsulation"); - } - /* Source IP address is optional for decap. */ - if (mask->hdr.src_addr != RTE_BE32(0x00000000) && - mask->hdr.src_addr != RTE_BE32(0xffffffff)) - return rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, - "no support for partial mask on" - " \"ipv4.hdr.src_addr\" field"); - return 0; -} - -/** - * Validate RTE_FLOW_ITEM_TYPE_IPV6 item if VXLAN_DECAP action - * is present in actions list. - * - * @param[in] ipv6 - * Outer IPv6 address item (if any, NULL otherwise). - * @param[out] error - * Pointer to the error structure. - * - * @return - * 0 on success, a negative errno value otherwise and rte_ernno is set. - **/ -static int -flow_tcf_validate_vxlan_decap_ipv6(const struct rte_flow_item *ipv6, - struct rte_flow_error *error) -{ - const struct rte_flow_item_ipv6 *spec = ipv6->spec; - const struct rte_flow_item_ipv6 *mask = ipv6->mask; - - if (!spec) { - /* - * Specification for IP addresses cannot be empty - * because it is required as decap parameter. - */ - return rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ITEM, ipv6, - "NULL outer ipv6 address" - " specification for vxlan" - " decapsulation"); - } - if (!mask) - mask = &rte_flow_item_ipv6_mask; - if (memcmp(&mask->hdr.dst_addr, - &flow_tcf_mask_empty.ipv6.hdr.dst_addr, - IPV6_ADDR_LEN)) { - if (memcmp(&mask->hdr.dst_addr, - &rte_flow_item_ipv6_mask.hdr.dst_addr, - IPV6_ADDR_LEN)) - return rte_flow_error_set - (error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, - "no support for partial mask on" - " \"ipv6.hdr.dst_addr\" field"); - /* More IP address validations can be put here. */ - } else { - /* - * Kernel uses the destination IP address - * to determine the ingress network interface - * for traffic being decapsulated. - */ - return rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ITEM, ipv6, - "outer ipv6 destination address must be " - "specified for vxlan decapsulation"); - } - /* Source IP address is optional for decap. */ - if (memcmp(&mask->hdr.src_addr, - &flow_tcf_mask_empty.ipv6.hdr.src_addr, - IPV6_ADDR_LEN)) { - if (memcmp(&mask->hdr.src_addr, - &rte_flow_item_ipv6_mask.hdr.src_addr, - IPV6_ADDR_LEN)) - return rte_flow_error_set - (error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask, - "no support for partial mask on" - " \"ipv6.hdr.src_addr\" field"); - } - return 0; -} - -/** - * Validate RTE_FLOW_ITEM_TYPE_UDP item if VXLAN_DECAP action - * is present in actions list. + * Validate outer RTE_FLOW_ITEM_TYPE_UDP item if tunnel item + * RTE_FLOW_ITEM_TYPE_VXLAN is present in item list. * * @param[in] udp * Outer UDP layer item (if any, NULL otherwise). @@ -1726,7 +1602,7 @@ flow_tcf_validate_vxlan_decap_ipv6(const struct rte_flow_item *ipv6, * Pointer to the error structure. * * @return - * 0 on success, a negative errno value otherwise and rte_ernno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. **/ static int flow_tcf_validate_vxlan_decap_udp(const struct rte_flow_item *udp, @@ -1794,7 +1670,7 @@ flow_tcf_validate_vxlan_decap_udp(const struct rte_flow_item *udp, * Pointer to the error structure. * * @return - * 0 on success, a negative errno value otherwise and rte_ernno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int flow_tcf_validate(struct rte_eth_dev *dev, @@ -1825,9 +1701,13 @@ flow_tcf_validate(struct rte_eth_dev *dev, const struct rte_flow_action_set_ipv4 *set_ipv4; const struct rte_flow_action_set_ipv6 *set_ipv6; } conf; + const struct rte_flow_item *outer_udp = NULL; + rte_be16_t inner_etype = RTE_BE16(ETH_P_ALL); + rte_be16_t outer_etype = RTE_BE16(ETH_P_ALL); + rte_be16_t vlan_etype = RTE_BE16(ETH_P_ALL); uint64_t item_flags = 0; uint64_t action_flags = 0; - uint8_t next_protocol = -1; + uint8_t next_protocol = 0xff; unsigned int tcm_ifindex = 0; uint8_t pedit_validated = 0; struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)]; @@ -2011,17 +1891,16 @@ flow_tcf_validate(struct rte_eth_dev *dev, for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) { unsigned int i; - if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) && - items->type != RTE_FLOW_ITEM_TYPE_ETH) - return rte_flow_error_set(error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ITEM, - items, - "only L2 inner item" - " is supported"); switch (items->type) { case RTE_FLOW_ITEM_TYPE_VOID: break; case RTE_FLOW_ITEM_TYPE_PORT_ID: + if (item_flags & MLX5_FLOW_LAYER_TUNNEL) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, items, + "inner tunnel port id" + " item is not supported"); mask.port_id = flow_tcf_item_mask (items, &rte_flow_item_port_id_mask, &flow_tcf_mask_supported.port_id, @@ -2072,8 +1951,8 @@ flow_tcf_validate(struct rte_eth_dev *dev, if (ret < 0) return ret; item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? - MLX5_FLOW_LAYER_INNER_L2 : - MLX5_FLOW_LAYER_OUTER_L2; + MLX5_FLOW_LAYER_INNER_L2 : + MLX5_FLOW_LAYER_OUTER_L2; /* TODO: * Redundant check due to different supported mask. * Same for the rest of items. @@ -2094,8 +1973,40 @@ flow_tcf_validate(struct rte_eth_dev *dev, mask.eth, "no support for partial mask on" " \"type\" field"); + assert(items->spec); + spec.eth = items->spec; + if (mask.eth->type && + (item_flags & MLX5_FLOW_LAYER_TUNNEL) && + inner_etype != RTE_BE16(ETH_P_ALL) && + inner_etype != spec.eth->type) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + items, + "inner eth_type conflict"); + if (mask.eth->type && + !(item_flags & MLX5_FLOW_LAYER_TUNNEL) && + outer_etype != RTE_BE16(ETH_P_ALL) && + outer_etype != spec.eth->type) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + items, + "outer eth_type conflict"); + if (mask.eth->type) { + if (item_flags & MLX5_FLOW_LAYER_TUNNEL) + inner_etype = spec.eth->type; + else + outer_etype = spec.eth->type; + } break; case RTE_FLOW_ITEM_TYPE_VLAN: + if (item_flags & MLX5_FLOW_LAYER_TUNNEL) + return rte_flow_error_set + (error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ITEM, items, + "inner tunnel VLAN" + " is not supported"); ret = mlx5_flow_validate_item_vlan(items, item_flags, error); if (ret < 0) @@ -2124,13 +2035,36 @@ flow_tcf_validate(struct rte_eth_dev *dev, "no support for partial masks on" " \"tci\" (PCP and VID parts) and" " \"inner_type\" fields"); + if (outer_etype != RTE_BE16(ETH_P_ALL) && + outer_etype != RTE_BE16(ETH_P_8021Q)) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + items, + "outer eth_type conflict," + " must be 802.1Q"); + outer_etype = RTE_BE16(ETH_P_8021Q); + assert(items->spec); + spec.vlan = items->spec; + if (mask.vlan->inner_type && + vlan_etype != RTE_BE16(ETH_P_ALL) && + vlan_etype != spec.vlan->inner_type) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + items, + "vlan eth_type conflict"); + if (mask.vlan->inner_type) + vlan_etype = spec.vlan->inner_type; break; case RTE_FLOW_ITEM_TYPE_IPV4: ret = mlx5_flow_validate_item_ipv4(items, item_flags, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? + MLX5_FLOW_LAYER_INNER_L3_IPV4 : + MLX5_FLOW_LAYER_OUTER_L3_IPV4; mask.ipv4 = flow_tcf_item_mask (items, &rte_flow_item_ipv4_mask, &flow_tcf_mask_supported.ipv4, @@ -2151,11 +2085,36 @@ flow_tcf_validate(struct rte_eth_dev *dev, next_protocol = ((const struct rte_flow_item_ipv4 *) (items->spec))->hdr.next_proto_id; - if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) { - ret = flow_tcf_validate_vxlan_decap_ipv4 - (items, error); - if (ret < 0) - return ret; + if (item_flags & MLX5_FLOW_LAYER_TUNNEL) { + if (inner_etype != RTE_BE16(ETH_P_ALL) && + inner_etype != RTE_BE16(ETH_P_IP)) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + items, + "inner eth_type conflict," + " IPv4 is required"); + inner_etype = RTE_BE16(ETH_P_IP); + } else if (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN) { + if (vlan_etype != RTE_BE16(ETH_P_ALL) && + vlan_etype != RTE_BE16(ETH_P_IP)) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + items, + "vlan eth_type conflict," + " IPv4 is required"); + vlan_etype = RTE_BE16(ETH_P_IP); + } else { + if (outer_etype != RTE_BE16(ETH_P_ALL) && + outer_etype != RTE_BE16(ETH_P_IP)) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + items, + "eth_type conflict," + " IPv4 is required"); + outer_etype = RTE_BE16(ETH_P_IP); } break; case RTE_FLOW_ITEM_TYPE_IPV6: @@ -2163,7 +2122,9 @@ flow_tcf_validate(struct rte_eth_dev *dev, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? + MLX5_FLOW_LAYER_INNER_L3_IPV6 : + MLX5_FLOW_LAYER_OUTER_L3_IPV6; mask.ipv6 = flow_tcf_item_mask (items, &rte_flow_item_ipv6_mask, &flow_tcf_mask_supported.ipv6, @@ -2184,11 +2145,36 @@ flow_tcf_validate(struct rte_eth_dev *dev, next_protocol = ((const struct rte_flow_item_ipv6 *) (items->spec))->hdr.proto; - if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) { - ret = flow_tcf_validate_vxlan_decap_ipv6 - (items, error); - if (ret < 0) - return ret; + if (item_flags & MLX5_FLOW_LAYER_TUNNEL) { + if (inner_etype != RTE_BE16(ETH_P_ALL) && + inner_etype != RTE_BE16(ETH_P_IPV6)) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + items, + "inner eth_type conflict," + " IPv6 is required"); + inner_etype = RTE_BE16(ETH_P_IPV6); + } else if (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN) { + if (vlan_etype != RTE_BE16(ETH_P_ALL) && + vlan_etype != RTE_BE16(ETH_P_IPV6)) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + items, + "vlan eth_type conflict," + " IPv6 is required"); + vlan_etype = RTE_BE16(ETH_P_IPV6); + } else { + if (outer_etype != RTE_BE16(ETH_P_ALL) && + outer_etype != RTE_BE16(ETH_P_IPV6)) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + items, + "eth_type conflict," + " IPv6 is required"); + outer_etype = RTE_BE16(ETH_P_IPV6); } break; case RTE_FLOW_ITEM_TYPE_UDP: @@ -2196,7 +2182,9 @@ flow_tcf_validate(struct rte_eth_dev *dev, next_protocol, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP; + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? + MLX5_FLOW_LAYER_INNER_L4_UDP : + MLX5_FLOW_LAYER_OUTER_L4_UDP; mask.udp = flow_tcf_item_mask (items, &rte_flow_item_udp_mask, &flow_tcf_mask_supported.udp, @@ -2205,12 +2193,12 @@ flow_tcf_validate(struct rte_eth_dev *dev, error); if (!mask.udp) return -rte_errno; - if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) { - ret = flow_tcf_validate_vxlan_decap_udp - (items, error); - if (ret < 0) - return ret; - } + /* + * Save the presumed outer UDP item for extra check + * if the tunnel item will be found later in the list. + */ + if (!(item_flags & MLX5_FLOW_LAYER_TUNNEL)) + outer_udp = items; break; case RTE_FLOW_ITEM_TYPE_TCP: ret = mlx5_flow_validate_item_tcp @@ -2220,7 +2208,9 @@ flow_tcf_validate(struct rte_eth_dev *dev, error); if (ret < 0) return ret; - item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP; + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? + MLX5_FLOW_LAYER_INNER_L4_TCP : + MLX5_FLOW_LAYER_OUTER_L4_TCP; mask.tcp = flow_tcf_item_mask (items, &rte_flow_item_tcp_mask, &flow_tcf_mask_supported.tcp, @@ -2231,13 +2221,12 @@ flow_tcf_validate(struct rte_eth_dev *dev, return -rte_errno; break; case RTE_FLOW_ITEM_TYPE_VXLAN: - if (!(action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP)) + if (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN) return rte_flow_error_set (error, ENOTSUP, - RTE_FLOW_ERROR_TYPE_ITEM, - items, - "vni pattern should be followed by" - " vxlan decapsulation action"); + RTE_FLOW_ERROR_TYPE_ITEM, items, + "vxlan tunnel over vlan" + " is not supported"); ret = mlx5_flow_validate_item_vxlan(items, item_flags, error); if (ret < 0) @@ -2259,6 +2248,45 @@ flow_tcf_validate(struct rte_eth_dev *dev, mask.vxlan, "no support for partial or " "empty mask on \"vxlan.vni\" field"); + /* + * The VNI item assumes the VXLAN tunnel, it requires + * at least the outer destination UDP port must be + * specified without wildcards to allow kernel select + * the virtual VXLAN device by port. Also outer IPv4 + * or IPv6 item must be specified (wilcards or even + * zero mask are allowed) to let driver know the tunnel + * IP version and process UDP traffic correctly. + */ + if (!(item_flags & + (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | + MLX5_FLOW_LAYER_OUTER_L3_IPV6))) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "no outer IP pattern found" + " for vxlan tunnel"); + if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "no outer UDP pattern found" + " for vxlan tunnel"); + /* + * All items preceding the tunnel item become outer + * ones and we should do extra validation for them + * due to tc limitations for tunnel outer parameters. + * Currently only outer UDP item requres extra check, + * use the saved pointer instead of item list rescan. + */ + assert(outer_udp); + ret = flow_tcf_validate_vxlan_decap_udp + (outer_udp, error); + if (ret < 0) + return ret; + /* Reset L4 protocol for inner parameters. */ + next_protocol = 0xff; break; default: return rte_flow_error_set(error, ENOTSUP, @@ -2361,28 +2389,20 @@ flow_tcf_validate(struct rte_eth_dev *dev, "no ethernet found in" " pattern"); } - if (action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) { - if (!(item_flags & - (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | - MLX5_FLOW_LAYER_OUTER_L3_IPV6))) - return rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ACTION, - NULL, - "no outer IP pattern found" - " for vxlan decap action"); - if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)) - return rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ACTION, - NULL, - "no outer UDP pattern found" - " for vxlan decap action"); - if (!(item_flags & MLX5_FLOW_LAYER_VXLAN)) - return rte_flow_error_set(error, EINVAL, - RTE_FLOW_ERROR_TYPE_ACTION, - NULL, - "no VNI pattern found" - " for vxlan decap action"); - } + if ((action_flags & MLX5_FLOW_ACTION_VXLAN_DECAP) && + !(item_flags & MLX5_FLOW_LAYER_VXLAN)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "no VNI pattern found" + " for vxlan decap action"); + if ((action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) && + (item_flags & MLX5_FLOW_LAYER_TUNNEL)) + return rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + NULL, + "vxlan encap not supported" + " for tunneled traffic"); return 0; } @@ -2393,17 +2413,21 @@ flow_tcf_validate(struct rte_eth_dev *dev, * Pointer to the flow attributes. * @param[in] items * Pointer to the list of items. + * @param[out] action_flags + * Pointer to the detected actions. * * @return * Maximum size of memory for items. */ static int flow_tcf_get_items_size(const struct rte_flow_attr *attr, - const struct rte_flow_item items[]) + const struct rte_flow_item items[], + uint64_t *action_flags) { int size = 0; size += SZ_NLATTR_STRZ_OF("flower") + + SZ_NLATTR_TYPE_OF(uint16_t) + /* Outer ether type. */ SZ_NLATTR_NEST + /* TCA_OPTIONS. */ SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */ if (attr->group > 0) @@ -2415,26 +2439,22 @@ flow_tcf_get_items_size(const struct rte_flow_attr *attr, case RTE_FLOW_ITEM_TYPE_PORT_ID: break; case RTE_FLOW_ITEM_TYPE_ETH: - size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */ - SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4; + size += SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4; /* dst/src MAC addr and mask. */ break; case RTE_FLOW_ITEM_TYPE_VLAN: - size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */ - SZ_NLATTR_TYPE_OF(uint16_t) + + size += SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN Ether type. */ SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */ SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */ break; case RTE_FLOW_ITEM_TYPE_IPV4: - size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */ - SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */ + size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */ SZ_NLATTR_TYPE_OF(uint32_t) * 4; /* dst/src IP addr and mask. */ break; case RTE_FLOW_ITEM_TYPE_IPV6: - size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */ - SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */ + size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */ SZ_NLATTR_DATA_OF(IPV6_ADDR_LEN) * 4; /* dst/src IP addr and mask. */ break; @@ -2450,6 +2470,16 @@ flow_tcf_get_items_size(const struct rte_flow_attr *attr, break; case RTE_FLOW_ITEM_TYPE_VXLAN: size += SZ_NLATTR_TYPE_OF(uint32_t); + /* + * There might be no VXLAN decap action in the action + * list, nonetheless the VXLAN tunnel flow requires + * the decap structure to be correctly applied to + * VXLAN device, set the flag to create the structure. + * Translation routine will not put the decap action + * in tne Netlink message if there is no actual action + * in the list. + */ + *action_flags |= MLX5_FLOW_ACTION_VXLAN_DECAP; break; default: DRV_LOG(WARNING, @@ -2542,7 +2572,7 @@ flow_tcf_get_actions_and_size(const struct rte_flow_action actions[], uint64_t *action_flags) { int size = 0; - uint64_t flags = 0; + uint64_t flags = *action_flags; size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { @@ -2643,27 +2673,6 @@ action_of_vlan: } /** - * Brand rtnetlink buffer with unique handle. - * - * This handle should be unique for a given network interface to avoid - * collisions. - * - * @param nlh - * Pointer to Netlink message. - * @param handle - * Unique 32-bit handle to use. - */ -static void -flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle) -{ - struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh); - - tcm->tcm_handle = handle; - DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x", - (void *)nlh, handle); -} - -/** * Prepare a flow object for Linux TC flower. It calculates the maximum size of * memory required, allocates the memory, initializes Netlink message headers * and set unique TC message handle. @@ -2679,7 +2688,7 @@ flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle) * * @return * Pointer to mlx5_flow object on success, - * otherwise NULL and rte_ernno is set. + * otherwise NULL and rte_errno is set. */ static struct mlx5_flow * flow_tcf_prepare(const struct rte_flow_attr *attr, @@ -2698,7 +2707,7 @@ flow_tcf_prepare(const struct rte_flow_attr *attr, struct tcmsg *tcm; uint8_t *sp, *tun = NULL; - size += flow_tcf_get_items_size(attr, items); + size += flow_tcf_get_items_size(attr, items, &action_flags); size += flow_tcf_get_actions_and_size(actions, &action_flags); dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO); if (!dev_flow) { @@ -2753,20 +2762,6 @@ flow_tcf_prepare(const struct rte_flow_attr *attr, dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_DECAP; else if (action_flags & MLX5_FLOW_ACTION_VXLAN_ENCAP) dev_flow->tcf.tunnel->type = FLOW_TCF_TUNACT_VXLAN_ENCAP; - /* - * Generate a reasonably unique handle based on the address of the - * target buffer. - * - * This is straightforward on 32-bit systems where the flow pointer can - * be used directly. Otherwise, its least significant part is taken - * after shifting it by the previous power of two of the pointed buffer - * size. - */ - if (sizeof(dev_flow) <= 4) - flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow); - else - flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >> - rte_log2_u32(rte_align32prevpow2(size))); return dev_flow; } @@ -3054,7 +3049,7 @@ flow_tcf_vxlan_encap_parse(const struct rte_flow_action *action, * Pointer to the error structure. * * @return - * 0 on success, a negative errno value otherwise and rte_ernno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, @@ -3098,10 +3093,11 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, struct nlmsghdr *nlh = dev_flow->tcf.nlh; struct tcmsg *tcm = dev_flow->tcf.tcm; uint32_t na_act_index_cur; - bool eth_type_set = 0; - bool vlan_present = 0; - bool vlan_eth_type_set = 0; + rte_be16_t inner_etype = RTE_BE16(ETH_P_ALL); + rte_be16_t outer_etype = RTE_BE16(ETH_P_ALL); + rte_be16_t vlan_etype = RTE_BE16(ETH_P_ALL); bool ip_proto_set = 0; + bool tunnel_outer = 0; struct nlattr *na_flower; struct nlattr *na_flower_act; struct nlattr *na_vlan_id = NULL; @@ -3115,6 +3111,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, switch (dev_flow->tcf.tunnel->type) { case FLOW_TCF_TUNACT_VXLAN_DECAP: decap.vxlan = dev_flow->tcf.vxlan_decap; + tunnel_outer = 1; break; case FLOW_TCF_TUNACT_VXLAN_ENCAP: encap.vxlan = dev_flow->tcf.vxlan_encap; @@ -3136,8 +3133,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, * Priority cannot be zero to prevent the kernel from picking one * automatically. */ - tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16, - RTE_BE16(ETH_P_ALL)); + tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16, outer_etype); if (attr->group > 0) mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group); mnl_attr_put_strz(nlh, TCA_KIND, "flower"); @@ -3169,7 +3165,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, tcm->tcm_ifindex = ptoi[i].ifindex; break; case RTE_FLOW_ITEM_TYPE_ETH: - item_flags |= (item_flags & MLX5_FLOW_LAYER_VXLAN) ? + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? MLX5_FLOW_LAYER_INNER_L2 : MLX5_FLOW_LAYER_OUTER_L2; mask.eth = flow_tcf_item_mask @@ -3182,19 +3178,19 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, if (mask.eth == &flow_tcf_mask_empty.eth) break; spec.eth = items->spec; - if (decap.vxlan && - !(item_flags & MLX5_FLOW_LAYER_VXLAN)) { + if (mask.eth->type) { + if (item_flags & MLX5_FLOW_LAYER_TUNNEL) + inner_etype = spec.eth->type; + else + outer_etype = spec.eth->type; + } + if (tunnel_outer) { DRV_LOG(WARNING, - "outer L2 addresses cannot be forced" - " for vxlan decapsulation, parameter" - " ignored"); + "outer L2 addresses cannot be" + " forced is outer ones for tunnel," + " parameter is ignored"); break; } - if (mask.eth->type) { - mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE, - spec.eth->type); - eth_type_set = 1; - } if (!is_zero_ether_addr(&mask.eth->dst)) { mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST, ETHER_ADDR_LEN, @@ -3216,6 +3212,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, case RTE_FLOW_ITEM_TYPE_VLAN: assert(!encap.hdr); assert(!decap.hdr); + assert(!tunnel_outer); item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN; mask.vlan = flow_tcf_item_mask (items, &rte_flow_item_vlan_mask, @@ -3224,20 +3221,14 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, sizeof(flow_tcf_mask_supported.vlan), error); assert(mask.vlan); - if (!eth_type_set) - mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE, - RTE_BE16(ETH_P_8021Q)); - eth_type_set = 1; - vlan_present = 1; if (mask.vlan == &flow_tcf_mask_empty.vlan) break; spec.vlan = items->spec; - if (mask.vlan->inner_type) { - mnl_attr_put_u16(nlh, - TCA_FLOWER_KEY_VLAN_ETH_TYPE, - spec.vlan->inner_type); - vlan_eth_type_set = 1; - } + assert(outer_etype == RTE_BE16(ETH_P_ALL) || + outer_etype == RTE_BE16(ETH_P_8021Q)); + outer_etype = RTE_BE16(ETH_P_8021Q); + if (mask.vlan->inner_type) + vlan_etype = spec.vlan->inner_type; if (mask.vlan->tci & RTE_BE16(0xe000)) mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO, (rte_be_to_cpu_16 @@ -3250,7 +3241,9 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); break; case RTE_FLOW_ITEM_TYPE_IPV4: - item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4; + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? + MLX5_FLOW_LAYER_INNER_L3_IPV4 : + MLX5_FLOW_LAYER_OUTER_L3_IPV4; mask.ipv4 = flow_tcf_item_mask (items, &rte_flow_item_ipv4_mask, &flow_tcf_mask_supported.ipv4, @@ -3258,57 +3251,83 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, sizeof(flow_tcf_mask_supported.ipv4), error); assert(mask.ipv4); + if (item_flags & MLX5_FLOW_LAYER_TUNNEL) { + assert(inner_etype == RTE_BE16(ETH_P_ALL) || + inner_etype == RTE_BE16(ETH_P_IP)); + inner_etype = RTE_BE16(ETH_P_IP); + } else if (outer_etype == RTE_BE16(ETH_P_8021Q)) { + assert(vlan_etype == RTE_BE16(ETH_P_ALL) || + vlan_etype == RTE_BE16(ETH_P_IP)); + vlan_etype = RTE_BE16(ETH_P_IP); + } else { + assert(outer_etype == RTE_BE16(ETH_P_ALL) || + outer_etype == RTE_BE16(ETH_P_IP)); + outer_etype = RTE_BE16(ETH_P_IP); + } spec.ipv4 = items->spec; - if (!decap.vxlan) { - if (!eth_type_set || - (!vlan_eth_type_set && vlan_present)) - mnl_attr_put_u16 - (nlh, - vlan_present ? - TCA_FLOWER_KEY_VLAN_ETH_TYPE : - TCA_FLOWER_KEY_ETH_TYPE, - RTE_BE16(ETH_P_IP)); - eth_type_set = 1; - vlan_eth_type_set = 1; - if (mask.ipv4 == &flow_tcf_mask_empty.ipv4) + if (!tunnel_outer && mask.ipv4->hdr.next_proto_id) { + /* + * No way to set IP protocol for outer tunnel + * layers. Usually it is fixed, for example, + * to UDP for VXLAN/GPE. + */ + assert(spec.ipv4); /* Mask is not empty. */ + mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO, + spec.ipv4->hdr.next_proto_id); + ip_proto_set = 1; + } + if (mask.ipv4 == &flow_tcf_mask_empty.ipv4 || + (!mask.ipv4->hdr.src_addr && + !mask.ipv4->hdr.dst_addr)) { + if (!tunnel_outer) break; - if (mask.ipv4->hdr.next_proto_id) { - mnl_attr_put_u8 - (nlh, TCA_FLOWER_KEY_IP_PROTO, - spec.ipv4->hdr.next_proto_id); - ip_proto_set = 1; - } - } else { - assert(mask.ipv4 != &flow_tcf_mask_empty.ipv4); + /* + * For tunnel outer we must set outer IP key + * anyway, even if the specification/mask is + * empty. There is no another way to tell + * kernel about he outer layer protocol. + */ + mnl_attr_put_u32 + (nlh, TCA_FLOWER_KEY_ENC_IPV4_SRC, + mask.ipv4->hdr.src_addr); + mnl_attr_put_u32 + (nlh, TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, + mask.ipv4->hdr.src_addr); + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); + break; } if (mask.ipv4->hdr.src_addr) { mnl_attr_put_u32 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV4_SRC : TCA_FLOWER_KEY_IPV4_SRC, spec.ipv4->hdr.src_addr); mnl_attr_put_u32 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK : TCA_FLOWER_KEY_IPV4_SRC_MASK, mask.ipv4->hdr.src_addr); } if (mask.ipv4->hdr.dst_addr) { mnl_attr_put_u32 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV4_DST : TCA_FLOWER_KEY_IPV4_DST, spec.ipv4->hdr.dst_addr); mnl_attr_put_u32 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV4_DST_MASK : TCA_FLOWER_KEY_IPV4_DST_MASK, mask.ipv4->hdr.dst_addr); } assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); break; - case RTE_FLOW_ITEM_TYPE_IPV6: - item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6; + case RTE_FLOW_ITEM_TYPE_IPV6: { + bool ipv6_src, ipv6_dst; + + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? + MLX5_FLOW_LAYER_INNER_L3_IPV6 : + MLX5_FLOW_LAYER_OUTER_L3_IPV6; mask.ipv6 = flow_tcf_item_mask (items, &rte_flow_item_ipv6_mask, &flow_tcf_mask_supported.ipv6, @@ -3316,48 +3335,75 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, sizeof(flow_tcf_mask_supported.ipv6), error); assert(mask.ipv6); + if (item_flags & MLX5_FLOW_LAYER_TUNNEL) { + assert(inner_etype == RTE_BE16(ETH_P_ALL) || + inner_etype == RTE_BE16(ETH_P_IPV6)); + inner_etype = RTE_BE16(ETH_P_IPV6); + } else if (outer_etype == RTE_BE16(ETH_P_8021Q)) { + assert(vlan_etype == RTE_BE16(ETH_P_ALL) || + vlan_etype == RTE_BE16(ETH_P_IPV6)); + vlan_etype = RTE_BE16(ETH_P_IPV6); + } else { + assert(outer_etype == RTE_BE16(ETH_P_ALL) || + outer_etype == RTE_BE16(ETH_P_IPV6)); + outer_etype = RTE_BE16(ETH_P_IPV6); + } spec.ipv6 = items->spec; - if (!decap.vxlan) { - if (!eth_type_set || - (!vlan_eth_type_set && vlan_present)) - mnl_attr_put_u16 - (nlh, - vlan_present ? - TCA_FLOWER_KEY_VLAN_ETH_TYPE : - TCA_FLOWER_KEY_ETH_TYPE, - RTE_BE16(ETH_P_IPV6)); - eth_type_set = 1; - vlan_eth_type_set = 1; - if (mask.ipv6 == &flow_tcf_mask_empty.ipv6) + if (!tunnel_outer && mask.ipv6->hdr.proto) { + /* + * No way to set IP protocol for outer tunnel + * layers. Usually it is fixed, for example, + * to UDP for VXLAN/GPE. + */ + assert(spec.ipv6); /* Mask is not empty. */ + mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO, + spec.ipv6->hdr.proto); + ip_proto_set = 1; + } + ipv6_dst = !IN6_IS_ADDR_UNSPECIFIED + (mask.ipv6->hdr.dst_addr); + ipv6_src = !IN6_IS_ADDR_UNSPECIFIED + (mask.ipv6->hdr.src_addr); + if (mask.ipv6 == &flow_tcf_mask_empty.ipv6 || + (!ipv6_dst && !ipv6_src)) { + if (!tunnel_outer) break; - if (mask.ipv6->hdr.proto) { - mnl_attr_put_u8 - (nlh, TCA_FLOWER_KEY_IP_PROTO, - spec.ipv6->hdr.proto); - ip_proto_set = 1; - } - } else { - assert(mask.ipv6 != &flow_tcf_mask_empty.ipv6); + /* + * For tunnel outer we must set outer IP key + * anyway, even if the specification/mask is + * empty. There is no another way to tell + * kernel about he outer layer protocol. + */ + mnl_attr_put(nlh, + TCA_FLOWER_KEY_ENC_IPV6_SRC, + IPV6_ADDR_LEN, + mask.ipv6->hdr.src_addr); + mnl_attr_put(nlh, + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK, + IPV6_ADDR_LEN, + mask.ipv6->hdr.src_addr); + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); + break; } - if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) { - mnl_attr_put(nlh, decap.vxlan ? + if (ipv6_src) { + mnl_attr_put(nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV6_SRC : TCA_FLOWER_KEY_IPV6_SRC, IPV6_ADDR_LEN, spec.ipv6->hdr.src_addr); - mnl_attr_put(nlh, decap.vxlan ? + mnl_attr_put(nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK : TCA_FLOWER_KEY_IPV6_SRC_MASK, IPV6_ADDR_LEN, mask.ipv6->hdr.src_addr); } - if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) { - mnl_attr_put(nlh, decap.vxlan ? + if (ipv6_dst) { + mnl_attr_put(nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV6_DST : TCA_FLOWER_KEY_IPV6_DST, IPV6_ADDR_LEN, spec.ipv6->hdr.dst_addr); - mnl_attr_put(nlh, decap.vxlan ? + mnl_attr_put(nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_IPV6_DST_MASK : TCA_FLOWER_KEY_IPV6_DST_MASK, IPV6_ADDR_LEN, @@ -3365,8 +3411,11 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, } assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); break; + } case RTE_FLOW_ITEM_TYPE_UDP: - item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP; + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? + MLX5_FLOW_LAYER_INNER_L4_UDP : + MLX5_FLOW_LAYER_OUTER_L4_UDP; mask.udp = flow_tcf_item_mask (items, &rte_flow_item_udp_mask, &flow_tcf_mask_supported.udp, @@ -3375,7 +3424,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, error); assert(mask.udp); spec.udp = items->spec; - if (!decap.vxlan) { + if (!tunnel_outer) { if (!ip_proto_set) mnl_attr_put_u8 (nlh, TCA_FLOWER_KEY_IP_PROTO, @@ -3390,24 +3439,24 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, } if (mask.udp->hdr.src_port) { mnl_attr_put_u16 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_UDP_SRC_PORT : TCA_FLOWER_KEY_UDP_SRC, spec.udp->hdr.src_port); mnl_attr_put_u16 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK : TCA_FLOWER_KEY_UDP_SRC_MASK, mask.udp->hdr.src_port); } if (mask.udp->hdr.dst_port) { mnl_attr_put_u16 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_UDP_DST_PORT : TCA_FLOWER_KEY_UDP_DST, spec.udp->hdr.dst_port); mnl_attr_put_u16 - (nlh, decap.vxlan ? + (nlh, tunnel_outer ? TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK : TCA_FLOWER_KEY_UDP_DST_MASK, mask.udp->hdr.dst_port); @@ -3415,7 +3464,9 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); break; case RTE_FLOW_ITEM_TYPE_TCP: - item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP; + item_flags |= (item_flags & MLX5_FLOW_LAYER_TUNNEL) ? + MLX5_FLOW_LAYER_INNER_L4_TCP : + MLX5_FLOW_LAYER_OUTER_L4_TCP; mask.tcp = flow_tcf_item_mask (items, &rte_flow_item_tcp_mask, &flow_tcf_mask_supported.tcp, @@ -3459,6 +3510,7 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, break; case RTE_FLOW_ITEM_TYPE_VXLAN: assert(decap.vxlan); + tunnel_outer = 0; item_flags |= MLX5_FLOW_LAYER_VXLAN; spec.vxlan = items->spec; mnl_attr_put_u32(nlh, @@ -3472,6 +3524,34 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, NULL, "item not supported"); } } + /* + * Set the ether_type flower key and tc rule protocol: + * - if there is nor VLAN neither VXLAN the key is taken from + * eth item directly or deduced from L3 items. + * - if there is vlan item then key is fixed to 802.1q. + * - if there is vxlan item then key is set to inner tunnel type. + * - simultaneous vlan and vxlan items are prohibited. + */ + if (outer_etype != RTE_BE16(ETH_P_ALL)) { + tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16, + outer_etype); + if (item_flags & MLX5_FLOW_LAYER_TUNNEL) { + if (inner_etype != RTE_BE16(ETH_P_ALL)) + mnl_attr_put_u16(nlh, + TCA_FLOWER_KEY_ETH_TYPE, + inner_etype); + } else { + mnl_attr_put_u16(nlh, + TCA_FLOWER_KEY_ETH_TYPE, + outer_etype); + if (outer_etype == RTE_BE16(ETH_P_8021Q) && + vlan_etype != RTE_BE16(ETH_P_ALL)) + mnl_attr_put_u16(nlh, + TCA_FLOWER_KEY_VLAN_ETH_TYPE, + vlan_etype); + } + assert(dev_flow->tcf.nlsize >= nlh->nlmsg_len); + } na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT); na_act_index_cur = 1; for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { @@ -3505,6 +3585,10 @@ flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow, mnl_attr_get_payload (mnl_nlmsg_get_payload_tail (nlh)))->ifindex; + } else if (decap.hdr) { + assert(dev_flow->tcf.tunnel); + dev_flow->tcf.tunnel->ifindex_ptr = + (unsigned int *)&tcm->tcm_ifindex; } mnl_attr_put(nlh, TCA_MIRRED_PARMS, sizeof(struct tc_mirred), @@ -4266,8 +4350,8 @@ flow_tcf_collect_vxlan_cb(const struct nlmsghdr *nlh, void *arg) /** * Cleanup the outer interface. Removes all found vxlan devices - * attached to specified index, flushes the meigh and local IP - * datavase. + * attached to specified index, flushes the neigh and local IP + * database. * * @param[in] tcf * Context object initialized by mlx5_flow_tcf_context_create(). @@ -4815,6 +4899,7 @@ flow_tcf_vtep_create(struct mlx5_flow_tcf_context *tcf, * when we do not need it anymore. */ vtep->created = 1; + vtep->waitreg = 1; } /* Try to get ifindex of created of pre-existing device. */ ret = if_nametoindex(name); @@ -5240,6 +5325,7 @@ flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow) struct mlx5_flow_tcf_context *ctx = priv->tcf_context; struct mlx5_flow *dev_flow; struct nlmsghdr *nlh; + struct tcmsg *tcm; if (!flow) return; @@ -5260,11 +5346,54 @@ flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow) dev_flow); dev_flow->tcf.tunnel->vtep = NULL; } + /* Cleanup the rule handle value. */ + tcm = mnl_nlmsg_get_payload(nlh); + tcm->tcm_handle = 0; dev_flow->tcf.applied = 0; } } /** + * Fetch the applied rule handle. This is callback routine called by + * libmnl mnl_cb_run() in loop for every message in received packet. + * When the NLM_F_ECHO flag i sspecified the kernel sends the created + * rule descriptor back to the application and we can retrieve the + * actual rule handle from updated descriptor. + * + * @param[in] nlh + * Pointer to reply header. + * @param[in, out] arg + * Context pointer for this callback. + * + * @return + * A positive, nonzero value on success (required by libmnl + * to continue messages processing). + */ +static int +flow_tcf_collect_apply_cb(const struct nlmsghdr *nlh, void *arg) +{ + struct nlmsghdr *nlhrq = arg; + struct tcmsg *tcmrq = mnl_nlmsg_get_payload(nlhrq); + struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh); + struct nlattr *na; + + if (nlh->nlmsg_type != RTM_NEWTFILTER || + nlh->nlmsg_seq != nlhrq->nlmsg_seq) + return 1; + mnl_attr_for_each(na, nlh, sizeof(*tcm)) { + switch (mnl_attr_get_type(na)) { + case TCA_KIND: + if (strcmp(mnl_attr_get_payload(na), "flower")) { + /* Not flower filter, drop entire message. */ + return 1; + } + tcmrq->tcm_handle = tcm->tcm_handle; + return 1; + } + } + return 1; +} +/** * Apply flow to E-Switch by sending Netlink message. * * @param[in] dev @@ -5275,7 +5404,7 @@ flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow) * Pointer to the error structure. * * @return - * 0 on success, a negative errno value otherwise and rte_ernno is set. + * 0 on success, a negative errno value otherwise and rte_errno is set. */ static int flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow, @@ -5285,6 +5414,10 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow, struct mlx5_flow_tcf_context *ctx = priv->tcf_context; struct mlx5_flow *dev_flow; struct nlmsghdr *nlh; + struct tcmsg *tcm; + uint64_t start = 0; + uint64_t twait = 0; + int ret; dev_flow = LIST_FIRST(&flow->dev_flows); /* E-Switch flow can't be expanded. */ @@ -5293,7 +5426,11 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow, return 0; nlh = dev_flow->tcf.nlh; nlh->nlmsg_type = RTM_NEWTFILTER; - nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_EXCL | NLM_F_ECHO; + tcm = mnl_nlmsg_get_payload(nlh); + /* Allow kernel to assign handle on its own. */ + tcm->tcm_handle = 0; if (dev_flow->tcf.tunnel) { /* * Replace the interface index, target for @@ -5313,8 +5450,52 @@ flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow, dev_flow->tcf.tunnel->ifindex_org); *dev_flow->tcf.tunnel->ifindex_ptr = dev_flow->tcf.tunnel->vtep->ifindex; + if (dev_flow->tcf.tunnel->vtep->waitreg) { + /* Clear wait flag for VXLAN port registration. */ + dev_flow->tcf.tunnel->vtep->waitreg = 0; + twait = rte_get_timer_hz(); + assert(twait > MS_PER_S); + twait = twait * MLX5_VXLAN_WAIT_PORT_REG_MS; + twait = twait / MS_PER_S; + start = rte_get_timer_cycles(); + } } - if (!flow_tcf_nl_ack(ctx, nlh, NULL, NULL)) { + /* + * Kernel creates the VXLAN devices and registers UDP ports to + * be hardware offloaded within the NIC kernel drivers. The + * registration process is being performed into context of + * working kernel thread and the race conditions might happen. + * The VXLAN device is created and success is returned to + * calling application, but the UDP port registration process + * is not completed yet. The next applied rule may be rejected + * by the driver with ENOSUP code. We are going to wait a bit, + * allowing registration process to be completed. The waiting + * is performed once after device been created. + */ + do { + struct timespec onems; + + ret = flow_tcf_nl_ack(ctx, nlh, + flow_tcf_collect_apply_cb, nlh); + if (!ret || ret != -ENOTSUP || !twait) + break; + /* Wait one millisecond and try again till timeout. */ + onems.tv_sec = 0; + onems.tv_nsec = NS_PER_S / MS_PER_S; + nanosleep(&onems, 0); + if ((rte_get_timer_cycles() - start) > twait) { + /* Timeout elapsed, try once more and exit. */ + twait = 0; + } + } while (true); + if (!ret) { + if (!tcm->tcm_handle) { + flow_tcf_remove(dev, flow); + return rte_flow_error_set + (error, ENOENT, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, + "netlink: rule zero handle returned"); + } dev_flow->tcf.applied = 1; if (*dev_flow->tcf.ptc_flags & TCA_CLS_FLAGS_SKIP_SW) return 0; diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c index 81ec59d7..409e1cd0 100644 --- a/drivers/net/mlx5/mlx5_flow_verbs.c +++ b/drivers/net/mlx5/mlx5_flow_verbs.c @@ -121,13 +121,13 @@ flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id) struct mlx5_flow_counter *cnt; int ret; - LIST_FOREACH(cnt, &priv->flow_counters, next) { - if (!cnt->shared || cnt->shared != shared) - continue; - if (cnt->id != id) - continue; - cnt->ref_cnt++; - return cnt; + if (shared) { + LIST_FOREACH(cnt, &priv->flow_counters, next) { + if (cnt->shared && cnt->id == id) { + cnt->ref_cnt++; + return cnt; + } + } } cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0); if (!cnt) { diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c index 183da0e2..10b6ce0c 100644 --- a/drivers/net/mlx5/mlx5_rxq.c +++ b/drivers/net/mlx5/mlx5_rxq.c @@ -881,12 +881,15 @@ mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx) attr.wq.ibv.create_flags |= IBV_WQ_FLAGS_SCATTER_FCS; attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; } -#ifdef HAVE_IBV_WQ_FLAG_RX_END_PADDING if (config->hw_padding) { +#if defined(HAVE_IBV_WQ_FLAG_RX_END_PADDING) attr.wq.ibv.create_flags |= IBV_WQ_FLAG_RX_END_PADDING; attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; - } +#elif defined(HAVE_IBV_WQ_FLAGS_PCI_WRITE_END_PADDING) + attr.wq.ibv.create_flags |= IBV_WQ_FLAGS_PCI_WRITE_END_PADDING; + attr.wq.ibv.comp_mask |= IBV_WQ_INIT_ATTR_FLAGS; #endif + } #ifdef HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT attr.wq.mlx5 = (struct mlx5dv_wq_init_attr){ .comp_mask = 0, @@ -1179,6 +1182,7 @@ mlx5_mprq_free_mp(struct rte_eth_dev *dev) continue; rxq->mprq_mp = NULL; } + priv->mprq_mp = NULL; return 0; } diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h index f47d327c..75194a3f 100644 --- a/drivers/net/mlx5/mlx5_rxtx.h +++ b/drivers/net/mlx5/mlx5_rxtx.h @@ -733,10 +733,6 @@ mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) * Pointer to the Tx queue. * @param buf * Pointer to the mbuf. - * @param tso - * TSO offloads enabled. - * @param vlan - * VLAN offloads enabled * @param offsets * Pointer to the SWP header offsets. * @param swp_types diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c index b330bf3d..da76b0db 100644 --- a/drivers/net/netvsc/hn_ethdev.c +++ b/drivers/net/netvsc/hn_ethdev.c @@ -732,6 +732,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) hv->chim_res = &vmbus->resource[HV_SEND_BUF_MAP]; hv->port_id = eth_dev->data->port_id; hv->latency = HN_CHAN_LATENCY_NS; + hv->max_queues = 1; err = hn_parse_args(eth_dev); if (err) @@ -770,6 +771,10 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) if (err) goto failed; + /* Multi queue requires later versions of windows server */ + if (hv->nvs_ver < NVS_VERSION_5) + return 0; + max_chan = rte_vmbus_max_channels(vmbus); PMD_INIT_LOG(DEBUG, "VMBus max channels %d", max_chan); if (max_chan <= 0) @@ -786,7 +791,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) err = hn_vf_add(eth_dev, hv); if (err) - goto failed; + hv->vf_present = 0; } return 0; @@ -794,6 +799,7 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev) failed: PMD_INIT_LOG(NOTICE, "device init failed"); + hn_tx_pool_uninit(eth_dev); hn_detach(hv); return err; } @@ -816,6 +822,7 @@ eth_hn_dev_uninit(struct rte_eth_dev *eth_dev) eth_dev->rx_pkt_burst = NULL; hn_detach(hv); + hn_tx_pool_uninit(eth_dev); rte_vmbus_chan_close(hv->primary->chan); rte_free(hv->primary); rte_eth_dev_owner_delete(hv->owner.id); diff --git a/drivers/net/netvsc/hn_nvs.c b/drivers/net/netvsc/hn_nvs.c index 9690c5f8..d58770e0 100644 --- a/drivers/net/netvsc/hn_nvs.c +++ b/drivers/net/netvsc/hn_nvs.c @@ -326,9 +326,9 @@ hn_nvs_conf_ndis(struct hn_data *hv, unsigned int mtu) conf.mtu = mtu + ETHER_HDR_LEN; conf.caps = NVS_NDIS_CONF_VLAN; - /* TODO enable SRIOV */ - //if (hv->nvs_ver >= NVS_VERSION_5) - // conf.caps |= NVS_NDIS_CONF_SRIOV; + /* enable SRIOV */ + if (hv->nvs_ver >= NVS_VERSION_5) + conf.caps |= NVS_NDIS_CONF_SRIOV; /* NOTE: No response. */ error = hn_nvs_req_send(hv, &conf, sizeof(conf)); diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c index f4a36641..487f7646 100644 --- a/drivers/net/netvsc/hn_rxtx.c +++ b/drivers/net/netvsc/hn_rxtx.c @@ -199,6 +199,17 @@ hn_tx_pool_init(struct rte_eth_dev *dev) return 0; } +void +hn_tx_pool_uninit(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + + if (hv->tx_pool) { + rte_mempool_free(hv->tx_pool); + hv->tx_pool = NULL; + } +} + static void hn_reset_txagg(struct hn_tx_queue *txq) { txq->agg_szleft = txq->agg_szmax; diff --git a/drivers/net/netvsc/hn_var.h b/drivers/net/netvsc/hn_var.h index e1072c7c..a6516c1e 100644 --- a/drivers/net/netvsc/hn_var.h +++ b/drivers/net/netvsc/hn_var.h @@ -149,6 +149,7 @@ uint16_t hn_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); int hn_tx_pool_init(struct rte_eth_dev *dev); +void hn_tx_pool_uninit(struct rte_eth_dev *dev); int hn_dev_link_update(struct rte_eth_dev *dev, int wait); int hn_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, unsigned int socket_id, diff --git a/drivers/net/qede/qede_rxtx.c b/drivers/net/qede/qede_rxtx.c index 0e33be1a..eda19b2b 100644 --- a/drivers/net/qede/qede_rxtx.c +++ b/drivers/net/qede/qede_rxtx.c @@ -35,6 +35,52 @@ static inline int qede_alloc_rx_buffer(struct qede_rx_queue *rxq) return 0; } +#define QEDE_MAX_BULK_ALLOC_COUNT 512 + +static inline int qede_alloc_rx_bulk_mbufs(struct qede_rx_queue *rxq, int count) +{ + void *obj_p[QEDE_MAX_BULK_ALLOC_COUNT] __rte_cache_aligned; + struct rte_mbuf *mbuf = NULL; + struct eth_rx_bd *rx_bd; + dma_addr_t mapping; + int i, ret = 0; + uint16_t idx; + + idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq); + + if (count > QEDE_MAX_BULK_ALLOC_COUNT) + count = QEDE_MAX_BULK_ALLOC_COUNT; + + ret = rte_mempool_get_bulk(rxq->mb_pool, obj_p, count); + if (unlikely(ret)) { + PMD_RX_LOG(ERR, rxq, + "Failed to allocate %d rx buffers " + "sw_rx_prod %u sw_rx_cons %u mp entries %u free %u", + count, idx, rxq->sw_rx_cons & NUM_RX_BDS(rxq), + rte_mempool_avail_count(rxq->mb_pool), + rte_mempool_in_use_count(rxq->mb_pool)); + return -ENOMEM; + } + + for (i = 0; i < count; i++) { + mbuf = obj_p[i]; + if (likely(i < count - 1)) + rte_prefetch0(obj_p[i + 1]); + + idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq); + rxq->sw_rx_ring[idx].mbuf = mbuf; + rxq->sw_rx_ring[idx].page_offset = 0; + mapping = rte_mbuf_data_iova_default(mbuf); + rx_bd = (struct eth_rx_bd *) + ecore_chain_produce(&rxq->rx_bd_ring); + rx_bd->addr.hi = rte_cpu_to_le_32(U64_HI(mapping)); + rx_bd->addr.lo = rte_cpu_to_le_32(U64_LO(mapping)); + rxq->sw_rx_prod++; + } + + return 0; +} + /* Criterias for calculating Rx buffer size - * 1) rx_buf_size should not exceed the size of mbuf * 2) In scattered_rx mode - minimum rx_buf_size should be @@ -1131,7 +1177,7 @@ qede_reuse_page(__rte_unused struct qede_dev *qdev, struct qede_rx_queue *rxq, struct qede_rx_entry *curr_cons) { struct eth_rx_bd *rx_bd_prod = ecore_chain_produce(&rxq->rx_bd_ring); - uint16_t idx = rxq->sw_rx_cons & NUM_RX_BDS(rxq); + uint16_t idx = rxq->sw_rx_prod & NUM_RX_BDS(rxq); struct qede_rx_entry *curr_prod; dma_addr_t new_mapping; @@ -1364,7 +1410,6 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) uint8_t bitfield_val; #endif uint8_t tunn_parse_flag; - uint8_t j; struct eth_fast_path_rx_tpa_start_cqe *cqe_start_tpa; uint64_t ol_flags; uint32_t packet_type; @@ -1373,6 +1418,7 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) uint8_t offset, tpa_agg_idx, flags; struct qede_agg_info *tpa_info = NULL; uint32_t rss_hash; + int rx_alloc_count = 0; hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr); sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); @@ -1382,6 +1428,25 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) if (hw_comp_cons == sw_comp_cons) return 0; + /* Allocate buffers that we used in previous loop */ + if (rxq->rx_alloc_count) { + if (unlikely(qede_alloc_rx_bulk_mbufs(rxq, + rxq->rx_alloc_count))) { + struct rte_eth_dev *dev; + + PMD_RX_LOG(ERR, rxq, + "New buffer allocation failed," + "dropping incoming packetn"); + dev = &rte_eth_devices[rxq->port_id]; + dev->data->rx_mbuf_alloc_failed += + rxq->rx_alloc_count; + rxq->rx_alloc_errors += rxq->rx_alloc_count; + return 0; + } + qede_update_rx_prod(qdev, rxq); + rxq->rx_alloc_count = 0; + } + while (sw_comp_cons != hw_comp_cons) { ol_flags = 0; packet_type = RTE_PTYPE_UNKNOWN; @@ -1553,16 +1618,7 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rx_mb->hash.rss = rss_hash; } - if (unlikely(qede_alloc_rx_buffer(rxq) != 0)) { - PMD_RX_LOG(ERR, rxq, - "New buffer allocation failed," - "dropping incoming packet\n"); - qede_recycle_rx_bd_ring(rxq, qdev, fp_cqe->bd_num); - rte_eth_devices[rxq->port_id]. - data->rx_mbuf_alloc_failed++; - rxq->rx_alloc_errors++; - break; - } + rx_alloc_count++; qede_rx_bd_ring_consume(rxq); if (!tpa_start_flg && fp_cqe->bd_num > 1) { @@ -1574,17 +1630,9 @@ qede_recv_pkts(void *p_rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) if (qede_process_sg_pkts(p_rxq, seg1, num_segs, pkt_len - len)) goto next_cqe; - for (j = 0; j < num_segs; j++) { - if (qede_alloc_rx_buffer(rxq)) { - PMD_RX_LOG(ERR, rxq, - "Buffer allocation failed"); - rte_eth_devices[rxq->port_id]. - data->rx_mbuf_alloc_failed++; - rxq->rx_alloc_errors++; - break; - } - rxq->rx_segs++; - } + + rx_alloc_count += num_segs; + rxq->rx_segs += num_segs; } rxq->rx_segs++; /* for the first segment */ @@ -1626,7 +1674,8 @@ next_cqe: } } - qede_update_rx_prod(qdev, rxq); + /* Request number of bufferes to be allocated in next loop */ + rxq->rx_alloc_count = rx_alloc_count; rxq->rcv_pkts += rx_pkt; @@ -2132,7 +2181,6 @@ qede_xmit_pkts(void *p_txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) txq->nb_tx_avail -= bd1->data.nbds; txq->sw_tx_prod++; - rte_prefetch0(txq->sw_tx_ring[TX_PROD(txq)].mbuf); bd_prod = rte_cpu_to_le_16(ecore_chain_get_prod_idx(&txq->tx_pbl)); #ifdef RTE_LIBRTE_QEDE_DEBUG_TX diff --git a/drivers/net/qede/qede_rxtx.h b/drivers/net/qede/qede_rxtx.h index 454daa07..5b249cbb 100644 --- a/drivers/net/qede/qede_rxtx.h +++ b/drivers/net/qede/qede_rxtx.h @@ -192,6 +192,8 @@ struct qede_rx_queue { uint16_t queue_id; uint16_t port_id; uint16_t rx_buf_size; + uint16_t rx_alloc_count; + uint16_t unused; uint64_t rcv_pkts; uint64_t rx_segs; uint64_t rx_hw_errors; diff --git a/drivers/net/sfc/base/efx_tx.c b/drivers/net/sfc/base/efx_tx.c index bf1180a1..9fa9e2ed 100644 --- a/drivers/net/sfc/base/efx_tx.c +++ b/drivers/net/sfc/base/efx_tx.c @@ -926,7 +926,8 @@ siena_tx_qcreate( EFX_STATIC_ASSERT(ISP2(EFX_TXQ_MINNDESCS)); if (!ISP2(ndescs) || - (ndescs < EFX_TXQ_MINNDESCS) || (ndescs > EFX_EVQ_MAXNEVS)) { + (ndescs < EFX_TXQ_MINNDESCS) || + (ndescs > encp->enc_txq_max_ndescs)) { rc = EINVAL; goto fail1; } diff --git a/drivers/net/sfc/efsys.h b/drivers/net/sfc/efsys.h index 0b4795da..f7bcc74d 100644 --- a/drivers/net/sfc/efsys.h +++ b/drivers/net/sfc/efsys.h @@ -84,7 +84,7 @@ typedef bool boolean_t; #define P2ALIGN(_x, _a) ((_x) & -(_a)) #endif -#ifndef IS2P +#ifndef ISP2 #define ISP2(x) rte_is_power_of_2(x) #endif diff --git a/drivers/net/sfc/sfc.h b/drivers/net/sfc/sfc.h index 51be4403..a94ca8e7 100644 --- a/drivers/net/sfc/sfc.h +++ b/drivers/net/sfc/sfc.h @@ -149,6 +149,8 @@ struct sfc_port { uint64_t mac_stats_last_request_timestamp; uint32_t mac_stats_mask[EFX_MAC_STATS_MASK_NPAGES]; + + uint64_t ipackets; }; struct sfc_rss_hf_rte_to_efx { @@ -251,7 +253,7 @@ struct sfc_adapter { /* * Shared memory copy of the Tx datapath name to be used by - * the secondary process to find Rx datapath to be used. + * the secondary process to find Tx datapath to be used. */ char *dp_tx_name; const struct sfc_dp_tx *dp_tx; diff --git a/drivers/net/sfc/sfc_ef10_tx.c b/drivers/net/sfc/sfc_ef10_tx.c index bcd3153f..ff6d5b48 100644 --- a/drivers/net/sfc/sfc_ef10_tx.c +++ b/drivers/net/sfc/sfc_ef10_tx.c @@ -381,6 +381,9 @@ sfc_ef10_xmit_tso_pkt(struct sfc_ef10_txq * const txq, struct rte_mbuf *m_seg, hdr_addr = rte_pktmbuf_mtod(m_seg, uint8_t *); hdr_iova = rte_mbuf_data_iova(m_seg); if (rte_pktmbuf_data_len(m_seg) == header_len) { + /* Cannot send a packet that consists only of header */ + if (unlikely(m_seg->next == NULL)) + return EMSGSIZE; /* * Associate header mbuf with header descriptor * which is located after TSO descriptors. @@ -409,6 +412,10 @@ sfc_ef10_xmit_tso_pkt(struct sfc_ef10_txq * const txq, struct rte_mbuf *m_seg, copied_segs = sfc_tso_prepare_header(hdr_addr, header_len, &m_seg, &in_off); + /* Cannot send a packet that consists only of header */ + if (unlikely(m_seg == NULL)) + return EMSGSIZE; + m_seg_to_free_up_to = m_seg; /* * Reduce the number of needed descriptors by the number of diff --git a/drivers/net/sfc/sfc_ethdev.c b/drivers/net/sfc/sfc_ethdev.c index 3886daf7..a7322a1e 100644 --- a/drivers/net/sfc/sfc_ethdev.c +++ b/drivers/net/sfc/sfc_ethdev.c @@ -503,6 +503,29 @@ sfc_tx_queue_release(void *queue) sfc_adapter_unlock(sa); } +/* + * Some statistics are computed as A - B where A and B each increase + * monotonically with some hardware counter(s) and the counters are read + * asynchronously. + * + * If packet X is counted in A, but not counted in B yet, computed value is + * greater than real. + * + * If packet X is not counted in A at the moment of reading the counter, + * but counted in B at the moment of reading the counter, computed value + * is less than real. + * + * However, counter which grows backward is worse evil than slightly wrong + * value. So, let's try to guarantee that it never happens except may be + * the case when the MAC stats are zeroed as a result of a NIC reset. + */ +static void +sfc_update_diff_stat(uint64_t *stat, uint64_t newval) +{ + if ((int64_t)(newval - *stat) > 0 || newval == 0) + *stat = newval; +} + static int sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { @@ -537,11 +560,9 @@ sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) mac_stats[EFX_MAC_VADAPTER_TX_UNICAST_BYTES] + mac_stats[EFX_MAC_VADAPTER_TX_MULTICAST_BYTES] + mac_stats[EFX_MAC_VADAPTER_TX_BROADCAST_BYTES]; - stats->imissed = mac_stats[EFX_MAC_VADAPTER_RX_OVERFLOW]; - stats->ierrors = mac_stats[EFX_MAC_VADAPTER_RX_BAD_PACKETS]; + stats->imissed = mac_stats[EFX_MAC_VADAPTER_RX_BAD_PACKETS]; stats->oerrors = mac_stats[EFX_MAC_VADAPTER_TX_BAD_PACKETS]; } else { - stats->ipackets = mac_stats[EFX_MAC_RX_PKTS]; stats->opackets = mac_stats[EFX_MAC_TX_PKTS]; stats->ibytes = mac_stats[EFX_MAC_RX_OCTETS]; stats->obytes = mac_stats[EFX_MAC_TX_OCTETS]; @@ -567,6 +588,13 @@ sfc_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) mac_stats[EFX_MAC_RX_ALIGN_ERRORS] + mac_stats[EFX_MAC_RX_JABBER_PKTS]; /* no oerrors counters supported on EF10 */ + + /* Exclude missed, errors and pauses from Rx packets */ + sfc_update_diff_stat(&port->ipackets, + mac_stats[EFX_MAC_RX_PKTS] - + mac_stats[EFX_MAC_RX_PAUSE_PKTS] - + stats->imissed - stats->ierrors); + stats->ipackets = port->ipackets; } unlock: @@ -1863,13 +1891,13 @@ sfc_eth_dev_secondary_set_ops(struct rte_eth_dev *dev) dp_rx = sfc_dp_find_rx_by_name(&sfc_dp_head, sa->dp_rx_name); if (dp_rx == NULL) { - sfc_err(sa, "cannot find %s Rx datapath", sa->dp_tx_name); + sfc_err(sa, "cannot find %s Rx datapath", sa->dp_rx_name); rc = ENOENT; goto fail_dp_rx; } if (~dp_rx->features & SFC_DP_RX_FEAT_MULTI_PROCESS) { sfc_err(sa, "%s Rx datapath does not support multi-process", - sa->dp_tx_name); + sa->dp_rx_name); rc = EINVAL; goto fail_dp_rx_multi_process; } diff --git a/drivers/net/sfc/sfc_log.h b/drivers/net/sfc/sfc_log.h index d6f34352..b11fbfb0 100644 --- a/drivers/net/sfc/sfc_log.h +++ b/drivers/net/sfc/sfc_log.h @@ -37,7 +37,8 @@ extern uint32_t sfc_logtype_driver; const struct sfc_adapter *__sa = (sa); \ \ rte_log(level, type, \ - RTE_FMT("PMD: sfc_efx " PCI_PRI_FMT " #%" PRIu8 \ + RTE_FMT("PMD: sfc_efx " \ + PCI_PRI_FMT " #%" PRIu16 \ ": " RTE_FMT_HEAD(__VA_ARGS__ ,) "\n", \ __sa->pci_addr.domain, \ __sa->pci_addr.bus, \ diff --git a/drivers/net/sfc/sfc_port.c b/drivers/net/sfc/sfc_port.c index 5384dbbd..5eb4b3ac 100644 --- a/drivers/net/sfc/sfc_port.c +++ b/drivers/net/sfc/sfc_port.c @@ -87,6 +87,18 @@ sfc_port_update_mac_stats(struct sfc_adapter *sa) return 0; } +static void +sfc_port_reset_sw_stats(struct sfc_adapter *sa) +{ + struct sfc_port *port = &sa->port; + + /* + * Reset diff stats explicitly since check which does not allow + * the statistics to grow backward could deny it. + */ + port->ipackets = 0; +} + int sfc_port_reset_mac_stats(struct sfc_adapter *sa) { @@ -95,6 +107,8 @@ sfc_port_reset_mac_stats(struct sfc_adapter *sa) rte_spinlock_lock(&port->mac_stats_lock); rc = efx_mac_stats_clear(sa->nic); + if (rc == 0) + sfc_port_reset_sw_stats(sa); rte_spinlock_unlock(&port->mac_stats_lock); return rc; diff --git a/drivers/net/sfc/sfc_tso.h b/drivers/net/sfc/sfc_tso.h index 3d2faf54..f89aef07 100644 --- a/drivers/net/sfc/sfc_tso.h +++ b/drivers/net/sfc/sfc_tso.h @@ -7,6 +7,13 @@ * for Solarflare) and Solarflare Communications, Inc. */ +#ifndef _SFC_TSO_H +#define _SFC_TSO_H + +#ifdef __cplusplus +extern "C" { +#endif + /** Standard TSO header length */ #define SFC_TSOH_STD_LEN 256 @@ -21,3 +28,9 @@ unsigned int sfc_tso_prepare_header(uint8_t *tsoh, size_t header_len, struct rte_mbuf **in_seg, size_t *in_off); + +#ifdef __cplusplus +} +#endif + +#endif /* _SFC_TSO_H */ diff --git a/drivers/net/sfc/sfc_tx.c b/drivers/net/sfc/sfc_tx.c index 147f9336..aa73d264 100644 --- a/drivers/net/sfc/sfc_tx.c +++ b/drivers/net/sfc/sfc_tx.c @@ -451,7 +451,7 @@ sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index) if (txq->offloads & DEV_TX_OFFLOAD_TCP_TSO) flags |= EFX_TXQ_FATSOV2; - rc = efx_tx_qcreate(sa->nic, sw_index, 0, &txq->mem, + rc = efx_tx_qcreate(sa->nic, txq->hw_index, 0, &txq->mem, txq_info->entries, 0 /* not used on EF10 */, flags, evq->common, &txq->common, &desc_index); @@ -712,6 +712,7 @@ sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) for (pkts_sent = 0, pktp = &tx_pkts[0]; (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill); pkts_sent++, pktp++) { + uint16_t hw_vlan_tci_prev = txq->hw_vlan_tci; struct rte_mbuf *m_seg = *pktp; size_t pkt_len = m_seg->pkt_len; unsigned int pkt_descs = 0; @@ -750,6 +751,7 @@ sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) * mbuf shouldn't be orphaned */ pend -= pkt_descs; + txq->hw_vlan_tci = hw_vlan_tci_prev; rte_pktmbuf_free(*pktp); @@ -819,10 +821,12 @@ sfc_efx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) fill_level = added - txq->completed; if (fill_level > hard_max_fill) { pend -= pkt_descs; + txq->hw_vlan_tci = hw_vlan_tci_prev; break; } } else { pend -= pkt_descs; + txq->hw_vlan_tci = hw_vlan_tci_prev; break; } } diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c index 49afd38d..a9342997 100644 --- a/drivers/net/tap/rte_eth_tap.c +++ b/drivers/net/tap/rte_eth_tap.c @@ -78,9 +78,6 @@ static const char *valid_arguments[] = { NULL }; -static unsigned int tap_unit; -static unsigned int tun_unit; - static char tuntap_name[8]; static volatile uint32_t tap_trigger; /* Rx trigger */ @@ -150,8 +147,6 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive) IFF_TAP : IFF_TUN | IFF_POINTOPOINT; snprintf(ifr.ifr_name, IFNAMSIZ, "%s", pmd->name); - TAP_LOG(DEBUG, "ifr_name '%s'", ifr.ifr_name); - fd = open(TUN_TAP_DEV_PATH, O_RDWR); if (fd < 0) { TAP_LOG(ERR, "Unable to create %s interface", tuntap_name); @@ -185,6 +180,13 @@ tun_alloc(struct pmd_internals *pmd, int is_keepalive) goto error; } + /* + * Name passed to kernel might be wildcard like dtun%d + * and need to find the resulting device. + */ + TAP_LOG(DEBUG, "Device name is '%s'", ifr.ifr_name); + strlcpy(pmd->name, ifr.ifr_name, RTE_ETH_NAME_MAX_LEN); + if (is_keepalive) { /* * Detach the TUN/TAP keep-alive queue @@ -281,13 +283,27 @@ tap_verify_csum(struct rte_mbuf *mbuf) l3_len = 4 * (iph->version_ihl & 0xf); if (unlikely(l2_len + l3_len > rte_pktmbuf_data_len(mbuf))) return; + /* check that the total length reported by header is not + * greater than the total received size + */ + if (l2_len + rte_be_to_cpu_16(iph->total_length) > + rte_pktmbuf_data_len(mbuf)) + return; cksum = ~rte_raw_cksum(iph, l3_len); mbuf->ol_flags |= cksum ? PKT_RX_IP_CKSUM_BAD : PKT_RX_IP_CKSUM_GOOD; } else if (l3 == RTE_PTYPE_L3_IPV6) { + struct ipv6_hdr *iph = l3_hdr; + l3_len = sizeof(struct ipv6_hdr); + /* check that the total length reported by header is not + * greater than the total received size + */ + if (l2_len + l3_len + rte_be_to_cpu_16(iph->payload_len) > + rte_pktmbuf_data_len(mbuf)) + return; } else { /* IPv6 extensions are not supported */ return; @@ -1741,6 +1757,7 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name, TAP_LOG(ERR, "Unable to create %s interface", tuntap_name); goto error_exit; } + TAP_LOG(DEBUG, "allocated %s", pmd->name); ifr.ifr_mtu = dev->data->mtu; if (tap_ioctl(pmd, SIOCSIFMTU, &ifr, 1, LOCAL_AND_REMOTE) < 0) @@ -1878,10 +1895,10 @@ set_interface_name(const char *key __rte_unused, char *name = (char *)extra_args; if (value) - strlcpy(name, value, RTE_ETH_NAME_MAX_LEN - 1); + strlcpy(name, value, RTE_ETH_NAME_MAX_LEN); else - snprintf(name, RTE_ETH_NAME_MAX_LEN - 1, "%s%d", - DEFAULT_TAP_NAME, (tap_unit - 1)); + /* use tap%d which causes kernel to choose next available */ + strlcpy(name, DEFAULT_TAP_NAME "%d", RTE_ETH_NAME_MAX_LEN); return 0; } @@ -1988,8 +2005,8 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev) return 0; } - snprintf(tun_name, sizeof(tun_name), "%s%u", - DEFAULT_TUN_NAME, tun_unit++); + /* use tun%d which causes kernel to choose next available */ + strlcpy(tun_name, DEFAULT_TUN_NAME "%d", RTE_ETH_NAME_MAX_LEN); if (params && (params[0] != '\0')) { TAP_LOG(DEBUG, "parameters (%s)", params); @@ -2009,17 +2026,15 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev) } pmd_link.link_speed = ETH_SPEED_NUM_10G; - TAP_LOG(NOTICE, "Initializing pmd_tun for %s as %s", - name, tun_name); + TAP_LOG(NOTICE, "Initializing pmd_tun for %s", name); ret = eth_dev_tap_create(dev, tun_name, remote_iface, 0, - ETH_TUNTAP_TYPE_TUN); + ETH_TUNTAP_TYPE_TUN); leave: if (ret == -1) { TAP_LOG(ERR, "Failed to create pmd for %s as %s", name, tun_name); - tun_unit--; /* Restore the unit number */ } rte_kvargs_free(kvlist); @@ -2175,8 +2190,9 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev) } speed = ETH_SPEED_NUM_10G; - snprintf(tap_name, sizeof(tap_name), "%s%u", - DEFAULT_TAP_NAME, tap_unit++); + + /* use tap%d which causes kernel to choose next available */ + strlcpy(tap_name, DEFAULT_TAP_NAME "%d", RTE_ETH_NAME_MAX_LEN); memset(remote_iface, 0, RTE_ETH_NAME_MAX_LEN); if (params && (params[0] != '\0')) { @@ -2240,7 +2256,6 @@ leave: rte_mp_action_unregister(TAP_MP_KEY); tap_devices_count--; } - tap_unit--; /* Restore the unit number */ } rte_kvargs_free(kvlist); diff --git a/drivers/net/tap/tap_tcmsgs.c b/drivers/net/tap/tap_tcmsgs.c index 3c9d0366..b478b595 100644 --- a/drivers/net/tap/tap_tcmsgs.c +++ b/drivers/net/tap/tap_tcmsgs.c @@ -116,7 +116,7 @@ error: int qdisc_add_multiq(int nlsk_fd, uint16_t ifindex) { - struct tc_multiq_qopt opt; + struct tc_multiq_qopt opt = {0}; struct nlmsg msg; tc_init_msg(&msg, ifindex, RTM_NEWQDISC, diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c index 42bdfcbd..b2cda048 100644 --- a/drivers/net/vhost/rte_eth_vhost.c +++ b/drivers/net/vhost/rte_eth_vhost.c @@ -1000,7 +1000,6 @@ eth_dev_close(struct rte_eth_dev *dev) for (i = 0; i < dev->data->nb_tx_queues; i++) rte_free(dev->data->tx_queues[i]); - rte_free(dev->data->mac_addrs); free(internal->dev_name); free(internal->iface_name); rte_free(internal); diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index 20816c93..9c8bcd2c 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -624,7 +624,7 @@ virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx) /* Update used ring */ uep = &vring->used->ring[avail_idx]; - uep->id = avail_idx; + uep->id = desc_idx; uep->len = n_descs; vring->used->idx++; diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h index 26518ed9..2e2abf15 100644 --- a/drivers/net/virtio/virtqueue.h +++ b/drivers/net/virtio/virtqueue.h @@ -336,6 +336,11 @@ vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx) static inline int virtqueue_kick_prepare(struct virtqueue *vq) { + /* + * Ensure updated avail->idx is visible to vhost before reading + * the used->flags. + */ + virtio_mb(); return !(vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY); } diff --git a/drivers/net/vmxnet3/vmxnet3_rxtx.c b/drivers/net/vmxnet3/vmxnet3_rxtx.c index cf85f3d6..d30914a8 100644 --- a/drivers/net/vmxnet3/vmxnet3_rxtx.c +++ b/drivers/net/vmxnet3/vmxnet3_rxtx.c @@ -50,6 +50,8 @@ #define VMXNET3_TX_OFFLOAD_MASK ( \ PKT_TX_VLAN_PKT | \ + PKT_TX_IPV6 | \ + PKT_TX_IPV4 | \ PKT_TX_L4_MASK | \ PKT_TX_TCP_SEG) |