diff options
Diffstat (limited to 'src/dpdk/drivers/net')
153 files changed, 24690 insertions, 7529 deletions
diff --git a/src/dpdk/drivers/net/af_packet/rte_eth_af_packet.c b/src/dpdk/drivers/net/af_packet/rte_eth_af_packet.c index f7955662..2f875539 100644 --- a/src/dpdk/drivers/net/af_packet/rte_eth_af_packet.c +++ b/src/dpdk/drivers/net/af_packet/rte_eth_af_packet.c @@ -40,7 +40,7 @@ #include <rte_ethdev.h> #include <rte_malloc.h> #include <rte_kvargs.h> -#include <rte_dev.h> +#include <rte_vdev.h> #include <linux/if_ether.h> #include <linux/if_packet.h> @@ -83,6 +83,7 @@ struct pkt_rx_queue { struct pkt_tx_queue { int sockfd; + unsigned int frame_data_size; struct iovec *rd; uint8_t *map; @@ -98,6 +99,7 @@ struct pmd_internals { unsigned nb_queues; int if_index; + char *if_name; struct ether_addr eth_addr; struct tpacket_req req; @@ -115,8 +117,6 @@ static const char *valid_arguments[] = { NULL }; -static const char *drivername = "AF_PACKET PMD"; - static struct rte_eth_link pmd_link = { .link_speed = ETH_SPEED_NUM_10G, .link_duplex = ETH_LINK_FULL_DUPLEX, @@ -161,6 +161,12 @@ eth_af_packet_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) pbuf = (uint8_t *) ppd + ppd->tp_mac; memcpy(rte_pktmbuf_mtod(mbuf, void *), pbuf, rte_pktmbuf_data_len(mbuf)); + /* check for vlan info */ + if (ppd->tp_status & TP_STATUS_VLAN_VALID) { + mbuf->vlan_tci = ppd->tp_vlan_tci; + mbuf->ol_flags |= (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED); + } + /* release incoming frame and advance ring buffer */ ppd->tp_status = TP_STATUS_KERNEL; if (++framenum >= framecount) @@ -206,13 +212,28 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) framenum = pkt_q->framenum; ppd = (struct tpacket2_hdr *) pkt_q->rd[framenum].iov_base; for (i = 0; i < nb_pkts; i++) { + mbuf = *bufs++; + + /* drop oversized packets */ + if (rte_pktmbuf_data_len(mbuf) > pkt_q->frame_data_size) { + rte_pktmbuf_free(mbuf); + continue; + } + + /* insert vlan info if necessary */ + if (mbuf->ol_flags & PKT_TX_VLAN_PKT) { + if (rte_vlan_insert(&mbuf)) { + rte_pktmbuf_free(mbuf); + continue; + } + } + /* point at the next incoming frame */ if ((ppd->tp_status != TP_STATUS_AVAILABLE) && (poll(&pfd, 1, -1) < 0)) - continue; + break; /* copy the tx frame data */ - mbuf = bufs[num_tx]; pbuf = (uint8_t *) ppd + TPACKET2_HDRLEN - sizeof(struct sockaddr_ll); memcpy(pbuf, rte_pktmbuf_mtod(mbuf, void*), rte_pktmbuf_data_len(mbuf)); @@ -231,13 +252,13 @@ eth_af_packet_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) /* kick-off transmits */ if (sendto(pkt_q->sockfd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1) - return 0; /* error sending -- no packets transmitted */ + num_tx = 0; /* error sending -- no packets transmitted */ pkt_q->framenum = framenum; pkt_q->tx_pkts += num_tx; - pkt_q->err_pkts += nb_pkts - num_tx; + pkt_q->err_pkts += i - num_tx; pkt_q->tx_bytes += num_tx_bytes; - return num_tx; + return i; } static int @@ -261,9 +282,16 @@ eth_dev_stop(struct rte_eth_dev *dev) sockfd = internals->rx_queue[i].sockfd; if (sockfd != -1) close(sockfd); - sockfd = internals->tx_queue[i].sockfd; - if (sockfd != -1) - close(sockfd); + + /* Prevent use after free in case tx fd == rx fd */ + if (sockfd != internals->tx_queue[i].sockfd) { + sockfd = internals->tx_queue[i].sockfd; + if (sockfd != -1) + close(sockfd); + } + + internals->rx_queue[i].sockfd = -1; + internals->tx_queue[i].sockfd = -1; } dev->data->dev_link.link_status = ETH_LINK_DOWN; @@ -280,14 +308,12 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct pmd_internals *internals = dev->data->dev_private; - dev_info->driver_name = drivername; dev_info->if_index = internals->if_index; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = (uint32_t)ETH_FRAME_LEN; dev_info->max_rx_queues = (uint16_t)internals->nb_queues; dev_info->max_tx_queues = (uint16_t)internals->nb_queues; dev_info->min_rx_bufsize = 0; - dev_info->pci_dev = NULL; } static void @@ -370,18 +396,20 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, { struct pmd_internals *internals = dev->data->dev_private; struct pkt_rx_queue *pkt_q = &internals->rx_queue[rx_queue_id]; - uint16_t buf_size; + unsigned int buf_size, data_size; pkt_q->mb_pool = mb_pool; /* Now get the space available for data in the mbuf */ - buf_size = (uint16_t)(rte_pktmbuf_data_room_size(pkt_q->mb_pool) - - RTE_PKTMBUF_HEADROOM); + buf_size = rte_pktmbuf_data_room_size(pkt_q->mb_pool) - + RTE_PKTMBUF_HEADROOM; + data_size = internals->req.tp_frame_size; + data_size -= TPACKET2_HDRLEN - sizeof(struct sockaddr_ll); - if (ETH_FRAME_LEN > buf_size) { + if (data_size > buf_size) { RTE_LOG(ERR, PMD, "%s: %d bytes will not fit in mbuf (%d bytes)\n", - dev->data->name, ETH_FRAME_LEN, buf_size); + dev->data->name, data_size, buf_size); return -ENOMEM; } @@ -405,12 +433,80 @@ eth_tx_queue_setup(struct rte_eth_dev *dev, return 0; } +static int +eth_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +{ + struct pmd_internals *internals = dev->data->dev_private; + struct ifreq ifr = { .ifr_mtu = mtu }; + int ret; + int s; + unsigned int data_size = internals->req.tp_frame_size - + TPACKET2_HDRLEN - + sizeof(struct sockaddr_ll); + + if (mtu > data_size) + return -EINVAL; + + s = socket(PF_INET, SOCK_DGRAM, 0); + if (s < 0) + return -EINVAL; + + strncpy(ifr.ifr_name, internals->if_name, IFNAMSIZ); + ret = ioctl(s, SIOCSIFMTU, &ifr); + close(s); + + if (ret < 0) + return -EINVAL; + + return 0; +} + +static void +eth_dev_change_flags(char *if_name, uint32_t flags, uint32_t mask) +{ + struct ifreq ifr; + int s; + + s = socket(PF_INET, SOCK_DGRAM, 0); + if (s < 0) + return; + + strncpy(ifr.ifr_name, if_name, IFNAMSIZ); + if (ioctl(s, SIOCGIFFLAGS, &ifr) < 0) + goto out; + ifr.ifr_flags &= mask; + ifr.ifr_flags |= flags; + if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) + goto out; +out: + close(s); +} + +static void +eth_dev_promiscuous_enable(struct rte_eth_dev *dev) +{ + struct pmd_internals *internals = dev->data->dev_private; + + eth_dev_change_flags(internals->if_name, IFF_PROMISC, ~0); +} + +static void +eth_dev_promiscuous_disable(struct rte_eth_dev *dev) +{ + struct pmd_internals *internals = dev->data->dev_private; + + eth_dev_change_flags(internals->if_name, 0, ~IFF_PROMISC); +} + static const struct eth_dev_ops ops = { .dev_start = eth_dev_start, .dev_stop = eth_dev_stop, .dev_close = eth_dev_close, .dev_configure = eth_dev_configure, .dev_infos_get = eth_dev_info, + .mtu_set = eth_dev_mtu_set, + .promiscuous_enable = eth_dev_promiscuous_enable, + .promiscuous_disable = eth_dev_promiscuous_disable, .rx_queue_setup = eth_rx_queue_setup, .tx_queue_setup = eth_tx_queue_setup, .rx_queue_release = eth_queue_release, @@ -440,6 +536,8 @@ open_packet_iface(const char *key __rte_unused, return 0; } +static struct rte_vdev_driver pmd_af_packet_drv; + static int rte_pmd_init_internals(const char *name, const int sockfd, @@ -524,6 +622,7 @@ rte_pmd_init_internals(const char *name, name); goto error_early; } + (*internals)->if_name = strdup(pair->value); (*internals)->if_index = ifr.ifr_ifindex; if (ioctl(sockfd, SIOCGIFHWADDR, &ifr) == -1) { @@ -633,6 +732,9 @@ rte_pmd_init_internals(const char *name, tx_queue = &((*internals)->tx_queue[q]); tx_queue->framecount = req->tp_frame_nr; + tx_queue->frame_data_size = req->tp_frame_size; + tx_queue->frame_data_size -= TPACKET2_HDRLEN - + sizeof(struct sockaddr_ll); tx_queue->map = rx_queue->map + req->tp_block_size * req->tp_block_nr; @@ -666,7 +768,7 @@ rte_pmd_init_internals(const char *name, } /* reserve an ethdev entry */ - *eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + *eth_dev = rte_eth_dev_allocate(name); if (*eth_dev == NULL) goto error; @@ -693,7 +795,7 @@ rte_pmd_init_internals(const char *name, (*eth_dev)->dev_ops = &ops; (*eth_dev)->driver = NULL; (*eth_dev)->data->dev_flags = RTE_ETH_DEV_DETACHABLE; - (*eth_dev)->data->drv_name = drivername; + (*eth_dev)->data->drv_name = pmd_af_packet_drv.driver.name; (*eth_dev)->data->kdrv = RTE_KDRV_NONE; (*eth_dev)->data->numa_node = numa_node; @@ -712,6 +814,7 @@ error: ((*internals)->rx_queue[q].sockfd != qsockfd)) close((*internals)->rx_queue[q].sockfd); } + free((*internals)->if_name); rte_free(*internals); error_early: rte_free(data); @@ -820,7 +923,7 @@ rte_eth_from_packet(const char *name, } static int -rte_pmd_af_packet_devinit(const char *name, const char *params) +rte_pmd_af_packet_probe(const char *name, const char *params) { unsigned numa_node; int ret = 0; @@ -858,7 +961,7 @@ exit: } static int -rte_pmd_af_packet_devuninit(const char *name) +rte_pmd_af_packet_remove(const char *name) { struct rte_eth_dev *eth_dev = NULL; struct pmd_internals *internals; @@ -880,6 +983,7 @@ rte_pmd_af_packet_devuninit(const char *name) rte_free(internals->rx_queue[q].rd); rte_free(internals->tx_queue[q].rd); } + free(internals->if_name); rte_free(eth_dev->data->dev_private); rte_free(eth_dev->data); @@ -889,14 +993,14 @@ rte_pmd_af_packet_devuninit(const char *name) return 0; } -static struct rte_driver pmd_af_packet_drv = { - .type = PMD_VDEV, - .init = rte_pmd_af_packet_devinit, - .uninit = rte_pmd_af_packet_devuninit, +static struct rte_vdev_driver pmd_af_packet_drv = { + .probe = rte_pmd_af_packet_probe, + .remove = rte_pmd_af_packet_remove, }; -PMD_REGISTER_DRIVER(pmd_af_packet_drv, eth_af_packet); -DRIVER_REGISTER_PARAM_STRING(eth_af_packet, +RTE_PMD_REGISTER_VDEV(net_af_packet, pmd_af_packet_drv); +RTE_PMD_REGISTER_ALIAS(net_af_packet, eth_af_packet); +RTE_PMD_REGISTER_PARAM_STRING(net_af_packet, "iface=<string> " "qpairs=<int> " "blocksz=<int> " diff --git a/src/dpdk/drivers/net/bnx2x/bnx2x.c b/src/dpdk/drivers/net/bnx2x/bnx2x.c index a49a07fb..cc380bd5 100644 --- a/src/dpdk/drivers/net/bnx2x/bnx2x.c +++ b/src/dpdk/drivers/net/bnx2x/bnx2x.c @@ -178,7 +178,7 @@ bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size, struct bnx2x_dma *dma, /* Caller must take care that strlen(mz_name) < RTE_MEMZONE_NAMESIZE */ z = rte_memzone_reserve_aligned(mz_name, (uint64_t) (size), - rte_lcore_to_socket_id(rte_lcore_id()), + SOCKET_ID_ANY, 0, align); if (z == NULL) { PMD_DRV_LOG(ERR, "DMA alloc failed for %s", msg); @@ -1397,10 +1397,10 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj, return rc; } -int +static int bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode, - unsigned long *rx_accept_flags, - unsigned long *tx_accept_flags) + unsigned long *rx_accept_flags, + unsigned long *tx_accept_flags) { /* Clear the flags first */ *rx_accept_flags = 0; @@ -1438,6 +1438,7 @@ bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode, break; + case BNX2X_RX_MODE_ALLMULTI_PROMISC: case BNX2X_RX_MODE_PROMISC: /* * According to deffinition of SI mode, iface in promisc mode @@ -2219,7 +2220,7 @@ int bnx2x_tx_encap(struct bnx2x_tx_queue *txq, struct rte_mbuf *m0) } PMD_TX_LOG(DEBUG, - "start bd: nbytes %d flags %x vlan %x\n", + "start bd: nbytes %d flags %x vlan %x", tx_start_bd->nbytes, tx_start_bd->bd_flags.as_bitfield, tx_start_bd->vlan_or_ethertype); @@ -7016,34 +7017,6 @@ static int bnx2x_initial_phy_init(struct bnx2x_softc *sc, int load_mode) bnx2x_set_requested_fc(sc); - if (CHIP_REV_IS_SLOW(sc)) { - uint32_t bond = CHIP_BOND_ID(sc); - uint32_t feat = 0; - - if (CHIP_IS_E2(sc) && CHIP_IS_MODE_4_PORT(sc)) { - feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC; - } else if (bond & 0x4) { - if (CHIP_IS_E3(sc)) { - feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC; - } else { - feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC; - } - } else if (bond & 0x8) { - if (CHIP_IS_E3(sc)) { - feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC; - } else { - feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC; - } - } - -/* disable EMAC for E3 and above */ - if (bond & 0x2) { - feat |= ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC; - } - - sc->link_params.feature_config_flags |= feat; - } - if (load_mode == LOAD_DIAG) { lp->loopback_mode = ELINK_LOOPBACK_XGXS; /* Prefer doing PHY loopback at 10G speed, if possible */ @@ -9556,8 +9529,8 @@ static void bnx2x_init_rte(struct bnx2x_softc *sc) sc->max_rx_queues = min(BNX2X_VF_MAX_QUEUES_PER_VF, sc->igu_sb_cnt); } else { - sc->max_tx_queues = 128; - sc->max_rx_queues = 128; + sc->max_rx_queues = BNX2X_MAX_RSS_COUNT(sc); + sc->max_tx_queues = sc->max_rx_queues; } } diff --git a/src/dpdk/drivers/net/bnx2x/bnx2x.h b/src/dpdk/drivers/net/bnx2x/bnx2x.h index 78757a8d..b3cd5fcc 100644 --- a/src/dpdk/drivers/net/bnx2x/bnx2x.h +++ b/src/dpdk/drivers/net/bnx2x/bnx2x.h @@ -17,6 +17,8 @@ #define __BNX2X_H__ #include <rte_byteorder.h> +#include <rte_spinlock.h> +#include <rte_io.h> #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN #ifndef __LITTLE_ENDIAN @@ -304,10 +306,7 @@ struct bnx2x_device_type { /* TCP with Timestamp Option (32) + IPv6 (40) */ /* max supported alignment is 256 (8 shift) */ -#define BNX2X_RX_ALIGN_SHIFT 8 -/* FW uses 2 cache lines alignment for start packet and size */ -#define BNX2X_FW_RX_ALIGN_START (1 << BNX2X_RX_ALIGN_SHIFT) -#define BNX2X_FW_RX_ALIGN_END (1 << BNX2X_RX_ALIGN_SHIFT) +#define BNX2X_RX_ALIGN_SHIFT RTE_MAX(6, min(8, RTE_CACHE_LINE_SIZE_LOG2)) #define BNX2X_PXP_DRAM_ALIGN (BNX2X_RX_ALIGN_SHIFT - 5) @@ -1031,12 +1030,13 @@ struct bnx2x_softc { struct bnx2x_mac_ops mac_ops; /* structures for VF mbox/response/bulletin */ - struct bnx2x_vf_mbx_msg *vf2pf_mbox; - struct bnx2x_dma vf2pf_mbox_mapping; - struct vf_acquire_resp_tlv acquire_resp; + struct bnx2x_vf_mbx_msg *vf2pf_mbox; + struct bnx2x_dma vf2pf_mbox_mapping; + struct vf_acquire_resp_tlv acquire_resp; struct bnx2x_vf_bulletin *pf2vf_bulletin; - struct bnx2x_dma pf2vf_bulletin_mapping; - struct bnx2x_vf_bulletin old_bulletin; + struct bnx2x_dma pf2vf_bulletin_mapping; + struct bnx2x_vf_bulletin old_bulletin; + rte_spinlock_t vf2pf_lock; int media; @@ -1147,11 +1147,12 @@ struct bnx2x_softc { #define BNX2X_RECOVERY_NIC_LOADING 5 uint32_t rx_mode; -#define BNX2X_RX_MODE_NONE 0 -#define BNX2X_RX_MODE_NORMAL 1 -#define BNX2X_RX_MODE_ALLMULTI 2 -#define BNX2X_RX_MODE_PROMISC 3 -#define BNX2X_MAX_MULTICAST 64 +#define BNX2X_RX_MODE_NONE 0 +#define BNX2X_RX_MODE_NORMAL 1 +#define BNX2X_RX_MODE_ALLMULTI 2 +#define BNX2X_RX_MODE_ALLMULTI_PROMISC 3 +#define BNX2X_RX_MODE_PROMISC 4 +#define BNX2X_MAX_MULTICAST 64 struct bnx2x_port port; @@ -1415,34 +1416,90 @@ struct bnx2x_func_init_params { #define BAR1 2 #define BAR2 4 +static inline void +bnx2x_reg_write8(struct bnx2x_softc *sc, size_t offset, uint8_t val) +{ + PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x", + (unsigned long)offset, val); + rte_write8(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset)); +} + +static inline void +bnx2x_reg_write16(struct bnx2x_softc *sc, size_t offset, uint16_t val) +{ #ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC -uint8_t bnx2x_reg_read8(struct bnx2x_softc *sc, size_t offset); -uint16_t bnx2x_reg_read16(struct bnx2x_softc *sc, size_t offset); -uint32_t bnx2x_reg_read32(struct bnx2x_softc *sc, size_t offset); + if ((offset % 2) != 0) + PMD_DRV_LOG(NOTICE, "Unaligned 16-bit write to 0x%08lx", + (unsigned long)offset); +#endif + PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%04x", + (unsigned long)offset, val); + rte_write16(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset)); -void bnx2x_reg_write8(struct bnx2x_softc *sc, size_t offset, uint8_t val); -void bnx2x_reg_write16(struct bnx2x_softc *sc, size_t offset, uint16_t val); -void bnx2x_reg_write32(struct bnx2x_softc *sc, size_t offset, uint32_t val); -#else -#define bnx2x_reg_write8(sc, offset, val)\ - *((volatile uint8_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val +} -#define bnx2x_reg_write16(sc, offset, val)\ - *((volatile uint16_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val +static inline void +bnx2x_reg_write32(struct bnx2x_softc *sc, size_t offset, uint32_t val) +{ +#ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC + if ((offset % 4) != 0) + PMD_DRV_LOG(NOTICE, "Unaligned 32-bit write to 0x%08lx", + (unsigned long)offset); +#endif -#define bnx2x_reg_write32(sc, offset, val)\ - *((volatile uint32_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val + PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", + (unsigned long)offset, val); + rte_write32(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset)); +} -#define bnx2x_reg_read8(sc, offset)\ - (*((volatile uint8_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset))) +static inline uint8_t +bnx2x_reg_read8(struct bnx2x_softc *sc, size_t offset) +{ + uint8_t val; -#define bnx2x_reg_read16(sc, offset)\ - (*((volatile uint16_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset))) + val = rte_read8((uint8_t *)sc->bar[BAR0].base_addr + offset); + PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x", + (unsigned long)offset, val); -#define bnx2x_reg_read32(sc, offset)\ - (*((volatile uint32_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset))) + return val; +} + +static inline uint16_t +bnx2x_reg_read16(struct bnx2x_softc *sc, size_t offset) +{ + uint16_t val; + +#ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC + if ((offset % 2) != 0) + PMD_DRV_LOG(NOTICE, "Unaligned 16-bit read from 0x%08lx", + (unsigned long)offset); +#endif + + val = rte_read16(((uint8_t *)sc->bar[BAR0].base_addr + offset)); + PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", + (unsigned long)offset, val); + + return val; +} + +static inline uint32_t +bnx2x_reg_read32(struct bnx2x_softc *sc, size_t offset) +{ + uint32_t val; + +#ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC + if ((offset % 4) != 0) + PMD_DRV_LOG(NOTICE, "Unaligned 32-bit read from 0x%08lx", + (unsigned long)offset); #endif + val = rte_read32(((uint8_t *)sc->bar[BAR0].base_addr + offset)); + PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", + (unsigned long)offset, val); + + return val; +} + #define REG_ADDR(sc, offset) (((uint64_t)sc->bar[BAR0].base_addr) + (offset)) #define REG_RD8(sc, offset) bnx2x_reg_read8(sc, (offset)) @@ -1500,11 +1557,9 @@ void bnx2x_reg_write32(struct bnx2x_softc *sc, size_t offset, uint32_t val); #define DPM_TRIGGER_TYPE 0x40 /* Doorbell macro */ -#define BNX2X_DB_WRITE(db_bar, val) \ - *((volatile uint32_t *)(db_bar)) = (val) +#define BNX2X_DB_WRITE(db_bar, val) rte_write32_relaxed((val), (db_bar)) -#define BNX2X_DB_READ(db_bar) \ - *((volatile uint32_t *)(db_bar)) +#define BNX2X_DB_READ(db_bar) rte_read32_relaxed(db_bar) #define DOORBELL_ADDR(sc, offset) \ (volatile uint32_t *)(((char *)(sc)->bar[BAR1].base_addr + (offset))) @@ -1883,8 +1938,6 @@ int bnx2x_vf_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, int leading); void bnx2x_free_hsi_mem(struct bnx2x_softc *sc); int bnx2x_vf_set_rx_mode(struct bnx2x_softc *sc); -int bnx2x_fill_accept_flags(struct bnx2x_softc *sc, uint32_t rx_mode, - unsigned long *rx_accept_flags, unsigned long *tx_accept_flags); int bnx2x_check_bull(struct bnx2x_softc *sc); //#define BNX2X_PULSE diff --git a/src/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c b/src/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c index f3ab3550..a0b0dfab 100644 --- a/src/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c +++ b/src/dpdk/drivers/net/bnx2x/bnx2x_ethdev.c @@ -17,7 +17,7 @@ * The set of PCI devices this driver supports */ #define BROADCOM_PCI_VENDOR_ID 0x14E4 -static struct rte_pci_id pci_id_bnx2x_map[] = { +static const struct rte_pci_id pci_id_bnx2x_map[] = { { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57800) }, { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57711) }, { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57810) }, @@ -33,7 +33,7 @@ static struct rte_pci_id pci_id_bnx2x_map[] = { { .vendor_id = 0, } }; -static struct rte_pci_id pci_id_bnx2xvf_map[] = { +static const struct rte_pci_id pci_id_bnx2xvf_map[] = { { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57800_VF) }, { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57810_VF) }, { RTE_PCI_DEVICE(BROADCOM_PCI_VENDOR_ID, CHIP_NUM_57811_VF) }, @@ -119,12 +119,12 @@ bnx2x_interrupt_action(struct rte_eth_dev *dev) } static __rte_unused void -bnx2x_interrupt_handler(__rte_unused struct rte_intr_handle *handle, void *param) +bnx2x_interrupt_handler(struct rte_intr_handle *handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; bnx2x_interrupt_action(dev); - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(handle); } /* @@ -187,10 +187,10 @@ bnx2x_dev_start(struct rte_eth_dev *dev) } if (IS_PF(sc)) { - rte_intr_callback_register(&(dev->pci_dev->intr_handle), + rte_intr_callback_register(&sc->pci_dev->intr_handle, bnx2x_interrupt_handler, (void *)dev); - if(rte_intr_enable(&(dev->pci_dev->intr_handle))) + if (rte_intr_enable(&sc->pci_dev->intr_handle)) PMD_DRV_LOG(ERR, "rte_intr_enable failed"); } @@ -203,8 +203,6 @@ bnx2x_dev_start(struct rte_eth_dev *dev) /* Print important adapter info for the user. */ bnx2x_print_adapter_info(sc); - DELAY_MS(2500); - return ret; } @@ -217,8 +215,8 @@ bnx2x_dev_stop(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); if (IS_PF(sc)) { - rte_intr_disable(&(dev->pci_dev->intr_handle)); - rte_intr_callback_unregister(&(dev->pci_dev->intr_handle), + rte_intr_disable(&sc->pci_dev->intr_handle); + rte_intr_callback_unregister(&sc->pci_dev->intr_handle, bnx2x_interrupt_handler, (void *)dev); } @@ -258,6 +256,8 @@ bnx2x_promisc_enable(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); sc->rx_mode = BNX2X_RX_MODE_PROMISC; + if (rte_eth_allmulticast_get(dev->data->port_id) == 1) + sc->rx_mode = BNX2X_RX_MODE_ALLMULTI_PROMISC; bnx2x_set_rx_mode(sc); } @@ -268,6 +268,8 @@ bnx2x_promisc_disable(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); sc->rx_mode = BNX2X_RX_MODE_NORMAL; + if (rte_eth_allmulticast_get(dev->data->port_id) == 1) + sc->rx_mode = BNX2X_RX_MODE_ALLMULTI; bnx2x_set_rx_mode(sc); } @@ -278,6 +280,8 @@ bnx2x_dev_allmulticast_enable(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); sc->rx_mode = BNX2X_RX_MODE_ALLMULTI; + if (rte_eth_promiscuous_get(dev->data->port_id) == 1) + sc->rx_mode = BNX2X_RX_MODE_ALLMULTI_PROMISC; bnx2x_set_rx_mode(sc); } @@ -288,6 +292,8 @@ bnx2x_dev_allmulticast_disable(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); sc->rx_mode = BNX2X_RX_MODE_NORMAL; + if (rte_eth_promiscuous_get(dev->data->port_id) == 1) + sc->rx_mode = BNX2X_RX_MODE_PROMISC; bnx2x_set_rx_mode(sc); } @@ -424,6 +430,7 @@ bnx2x_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, xstats[num].value = *(uint64_t *)((char *)&sc->eth_stats + bnx2x_xstats_strings[num].offset_lo); + xstats[num].id = num; } return num; @@ -433,6 +440,7 @@ static void bnx2x_dev_infos_get(struct rte_eth_dev *dev, __rte_unused struct rte_eth_dev_info *dev_info) { struct bnx2x_softc *sc = dev->data->dev_private; + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); dev_info->max_rx_queues = sc->max_rx_queues; dev_info->max_tx_queues = sc->max_tx_queues; dev_info->min_rx_bufsize = BNX2X_MIN_RX_BUF_SIZE; @@ -518,7 +526,7 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf) PMD_INIT_FUNC_TRACE(); eth_dev->dev_ops = is_vf ? &bnx2xvf_eth_dev_ops : &bnx2x_eth_dev_ops; - pci_dev = eth_dev->pci_dev; + pci_dev = RTE_DEV_TO_PCI(eth_dev->device); rte_eth_copy_pci_info(eth_dev, pci_dev); @@ -577,6 +585,8 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf) eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); if (IS_VF(sc)) { + rte_spinlock_init(&sc->vf2pf_lock); + if (bnx2x_dma_alloc(sc, sizeof(struct bnx2x_vf_mbx_msg), &sc->vf2pf_mbox_mapping, "vf2pf_mbox", RTE_CACHE_LINE_SIZE) != 0) @@ -618,9 +628,10 @@ eth_bnx2xvf_dev_init(struct rte_eth_dev *eth_dev) static struct eth_driver rte_bnx2x_pmd = { .pci_drv = { - .name = "rte_bnx2x_pmd", .id_table = pci_id_bnx2x_map, .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_bnx2x_dev_init, .dev_private_size = sizeof(struct bnx2x_softc), @@ -631,41 +642,18 @@ static struct eth_driver rte_bnx2x_pmd = { */ static struct eth_driver rte_bnx2xvf_pmd = { .pci_drv = { - .name = "rte_bnx2xvf_pmd", .id_table = pci_id_bnx2xvf_map, .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_bnx2xvf_dev_init, .dev_private_size = sizeof(struct bnx2x_softc), }; -static int rte_bnx2x_pmd_init(const char *name __rte_unused, const char *params __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - rte_eth_driver_register(&rte_bnx2x_pmd); - - return 0; -} - -static int rte_bnx2xvf_pmd_init(const char *name __rte_unused, const char *params __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - rte_eth_driver_register(&rte_bnx2xvf_pmd); - - return 0; -} - -static struct rte_driver rte_bnx2x_driver = { - .type = PMD_PDEV, - .init = rte_bnx2x_pmd_init, -}; - -static struct rte_driver rte_bnx2xvf_driver = { - .type = PMD_PDEV, - .init = rte_bnx2xvf_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_bnx2x_driver, bnx2x); -DRIVER_REGISTER_PCI_TABLE(bnx2x, pci_id_bnx2x_map); -PMD_REGISTER_DRIVER(rte_bnx2xvf_driver, bnx2xvf); -DRIVER_REGISTER_PCI_TABLE(bnx2xvf, pci_id_bnx2xvf_map); +RTE_PMD_REGISTER_PCI(net_bnx2x, rte_bnx2x_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_bnx2x, pci_id_bnx2x_map); +RTE_PMD_REGISTER_KMOD_DEP(net_bnx2x, "* igb_uio | uio_pci_generic | vfio"); +RTE_PMD_REGISTER_PCI(net_bnx2xvf, rte_bnx2xvf_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_bnx2xvf, pci_id_bnx2xvf_map); +RTE_PMD_REGISTER_KMOD_DEP(net_bnx2xvf, "* igb_uio | vfio"); diff --git a/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c b/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c index 0ec4f899..170e48fb 100644 --- a/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c +++ b/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.c @@ -19,7 +19,8 @@ ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name, const struct rte_memzone *mz; snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - dev->driver->pci_drv.name, ring_name, dev->data->port_id, queue_id); + dev->driver->pci_drv.driver.name, ring_name, + dev->data->port_id, queue_id); mz = rte_memzone_lookup(z_name); if (mz) @@ -59,7 +60,7 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf, + __rte_unused const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp) { uint16_t j, idx; @@ -84,7 +85,6 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev, rxq->mb_pool = mp; rxq->queue_id = queue_idx; rxq->port_id = dev->data->port_id; - rxq->crc_len = (uint8_t)((dev->data->dev_conf.rxmode.hw_strip_crc) ? 0 : ETHER_CRC_LEN); rxq->nb_rx_pages = 1; while (USABLE_RX_BD(rxq) < nb_desc) @@ -94,13 +94,9 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev, sc->rx_ring_size = USABLE_RX_BD(rxq); rxq->nb_cq_pages = RCQ_BD_PAGES(rxq); - rxq->rx_free_thresh = rx_conf->rx_free_thresh ? - rx_conf->rx_free_thresh : DEFAULT_RX_FREE_THRESH; - - PMD_INIT_LOG(DEBUG, "fp[%02d] req_bd=%u, thresh=%u, usable_bd=%lu, " + PMD_INIT_LOG(DEBUG, "fp[%02d] req_bd=%u, usable_bd=%lu, " "total_bd=%lu, rx_pages=%u, cq_pages=%u", - queue_idx, nb_desc, rxq->rx_free_thresh, - (unsigned long)USABLE_RX_BD(rxq), + queue_idx, nb_desc, (unsigned long)USABLE_RX_BD(rxq), (unsigned long)TOTAL_RX_BD(rxq), rxq->nb_rx_pages, rxq->nb_cq_pages); @@ -135,7 +131,6 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev, } /* Initialize software ring entries */ - rxq->rx_mbuf_alloc = 0; for (idx = 0; idx < rxq->nb_rx_desc; idx = NEXT_RX_BD(idx)) { mbuf = rte_mbuf_raw_alloc(mp); if (NULL == mbuf) { @@ -146,7 +141,6 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev, } rxq->sw_ring[idx] = mbuf; rxq->rx_ring[idx] = mbuf->buf_physaddr; - rxq->rx_mbuf_alloc++; } rxq->pkt_first_seg = NULL; rxq->pkt_last_seg = NULL; diff --git a/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.h b/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.h index ccb22fc1..dd251aaf 100644 --- a/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.h +++ b/src/dpdk/drivers/net/bnx2x/bnx2x_rxtx.h @@ -11,8 +11,6 @@ #ifndef _BNX2X_RXTX_H_ #define _BNX2X_RXTX_H_ - -#define DEFAULT_RX_FREE_THRESH 0 #define DEFAULT_TX_FREE_THRESH 512 #define RTE_PMD_BNX2X_TX_MAX_BURST 1 @@ -42,13 +40,9 @@ struct bnx2x_rx_queue { uint16_t rx_bd_tail; /**< Index of last rx bd. */ uint16_t rx_cq_head; /**< Index of current rcq bd. */ uint16_t rx_cq_tail; /**< Index of last rcq bd. */ - uint16_t nb_rx_hold; /**< number of held free RX desc. */ - uint16_t rx_free_thresh; /**< max free RX desc to hold. */ uint16_t queue_id; /**< RX queue index. */ uint8_t port_id; /**< Device port identifier. */ - uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */ struct bnx2x_softc *sc; /**< Ptr to dev_private data. */ - uint64_t rx_mbuf_alloc; /**< Number of allocated mbufs. */ }; /** diff --git a/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.c b/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.c index 1c895f88..0ca0df87 100644 --- a/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.c +++ b/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.c @@ -64,25 +64,46 @@ bnx2x_check_bull(struct bnx2x_softc *sc) return TRUE; } -/* add tlv to a buffer */ -#define BNX2X_TLV_APPEND(_tlvs, _offset, _type, _length) \ - ((struct vf_first_tlv *)((unsigned long)_tlvs + _offset))->type = _type; \ - ((struct vf_first_tlv *)((unsigned long)_tlvs + _offset))->length = _length +/* place a given tlv on the tlv buffer at a given offset */ +static void +bnx2x_add_tlv(__rte_unused struct bnx2x_softc *sc, void *tlvs_list, + uint16_t offset, uint16_t type, uint16_t length) +{ + struct channel_tlv *tl = (struct channel_tlv *) + ((unsigned long)tlvs_list + offset); + + tl->type = type; + tl->length = length; +} /* Initiliaze header of the first tlv and clear mailbox*/ static void -bnx2x_init_first_tlv(struct bnx2x_softc *sc, struct vf_first_tlv *tlv, - uint16_t type, uint16_t len) +bnx2x_vf_prep(struct bnx2x_softc *sc, struct vf_first_tlv *first_tlv, + uint16_t type, uint16_t length) { struct bnx2x_vf_mbx_msg *mbox = sc->vf2pf_mbox; + + rte_spinlock_lock(&sc->vf2pf_lock); + PMD_DRV_LOG(DEBUG, "Preparing %d tlv for sending", type); memset(mbox, 0, sizeof(struct bnx2x_vf_mbx_msg)); - BNX2X_TLV_APPEND(tlv, 0, type, len); + bnx2x_add_tlv(sc, &first_tlv->tl, 0, type, length); /* Initialize header of the first tlv */ - tlv->reply_offset = sizeof(mbox->query); + first_tlv->reply_offset = sizeof(mbox->query); +} + +/* releases the mailbox */ +static void +bnx2x_vf_finalize(struct bnx2x_softc *sc, + __rte_unused struct vf_first_tlv *first_tlv) +{ + PMD_DRV_LOG(DEBUG, "done sending [%d] tlv over vf pf channel", + first_tlv->tl.type); + + rte_spinlock_unlock(&sc->vf2pf_lock); } #define BNX2X_VF_CMD_ADDR_LO PXP_VF_ADDR_CSDM_GLOBAL_START @@ -97,39 +118,36 @@ bnx2x_do_req4pf(struct bnx2x_softc *sc, phys_addr_t phys_addr) uint8_t *status = &sc->vf2pf_mbox->resp.common_reply.status; uint8_t i; - if (!*status) { - bnx2x_check_bull(sc); - if (sc->old_bulletin.valid_bitmap & (1 << CHANNEL_DOWN)) { - PMD_DRV_LOG(ERR, "channel is down. Aborting message sending"); - *status = BNX2X_VF_STATUS_SUCCESS; - return 0; - } + if (*status) { + PMD_DRV_LOG(ERR, "status should be zero before message" + " to pf was sent"); + return -EINVAL; + } - REG_WR(sc, BNX2X_VF_CMD_ADDR_LO, U64_LO(phys_addr)); - REG_WR(sc, BNX2X_VF_CMD_ADDR_HI, U64_HI(phys_addr)); + bnx2x_check_bull(sc); + if (sc->old_bulletin.valid_bitmap & (1 << CHANNEL_DOWN)) { + PMD_DRV_LOG(ERR, "channel is down. Aborting message sending"); + return -EINVAL; + } - /* memory barrier to ensure that FW can read phys_addr */ - wmb(); + REG_WR(sc, BNX2X_VF_CMD_ADDR_LO, U64_LO(phys_addr)); + REG_WR(sc, BNX2X_VF_CMD_ADDR_HI, U64_HI(phys_addr)); - REG_WR8(sc, BNX2X_VF_CMD_TRIGGER, 1); + /* memory barrier to ensure that FW can read phys_addr */ + wmb(); - /* Do several attempts until PF completes - * "." is used to show progress - */ - for (i = 0; i < BNX2X_VF_CHANNEL_TRIES; i++) { - DELAY_MS(BNX2X_VF_CHANNEL_DELAY); - if (*status) - break; - } + REG_WR8(sc, BNX2X_VF_CMD_TRIGGER, 1); - if (!*status) { - PMD_DRV_LOG(ERR, "Response from PF timed out"); - return -EAGAIN; - } - } else { - PMD_DRV_LOG(ERR, "status should be zero before message" - "to pf was sent"); - return -EINVAL; + /* Do several attempts until PF completes */ + for (i = 0; i < BNX2X_VF_CHANNEL_TRIES; i++) { + DELAY_MS(BNX2X_VF_CHANNEL_DELAY); + if (*status) + break; + } + + if (!*status) { + PMD_DRV_LOG(ERR, "Response from PF timed out"); + return -EAGAIN; } PMD_DRV_LOG(DEBUG, "Response from PF was received"); @@ -168,31 +186,23 @@ static inline int bnx2x_read_vf_id(struct bnx2x_softc *sc) #define BNX2X_VF_OBTAIN_MAC_FILTERS 1 #define BNX2X_VF_OBTAIN_MC_FILTERS 10 -struct bnx2x_obtain_status { - int success; - int err_code; -}; - static -struct bnx2x_obtain_status bnx2x_loop_obtain_resources(struct bnx2x_softc *sc) +int bnx2x_loop_obtain_resources(struct bnx2x_softc *sc) { - int tries = 0; struct vf_acquire_resp_tlv *resp = &sc->vf2pf_mbox->resp.acquire_resp, - *sc_resp = &sc->acquire_resp; - struct vf_resource_query *res_query; - struct vf_resc *resc; - struct bnx2x_obtain_status status; + *sc_resp = &sc->acquire_resp; + struct vf_resource_query *res_query; + struct vf_resc *resc; int res_obtained = false; + int tries = 0; + int rc; do { PMD_DRV_LOG(DEBUG, "trying to get resources"); - if (bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr)) { - /* timeout */ - status.success = 0; - status.err_code = -EAGAIN; - return status; - } + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc) + return rc; memcpy(sc_resp, resp, sizeof(sc->acquire_resp)); @@ -203,12 +213,12 @@ struct bnx2x_obtain_status bnx2x_loop_obtain_resources(struct bnx2x_softc *sc) PMD_DRV_LOG(DEBUG, "resources obtained successfully"); res_obtained = true; } else if (sc_resp->status == BNX2X_VF_STATUS_NO_RESOURCES && - tries < BNX2X_VF_OBTAIN_MAX_TRIES) { + tries < BNX2X_VF_OBTAIN_MAX_TRIES) { PMD_DRV_LOG(DEBUG, "PF cannot allocate requested amount of resources"); res_query = &sc->vf2pf_mbox->query[0].acquire.res_query; - resc = &sc_resp->resc; + resc = &sc_resp->resc; /* PF refused our request. Try to decrease request params */ res_query->num_txqs = min(res_query->num_txqs, resc->num_txqs); @@ -220,30 +230,30 @@ struct bnx2x_obtain_status bnx2x_loop_obtain_resources(struct bnx2x_softc *sc) memset(&sc->vf2pf_mbox->resp, 0, sizeof(union resp_tlvs)); } else { - PMD_DRV_LOG(ERR, "Resources cannot be obtained. Status of handling: %d. Aborting", - sc_resp->status); - status.success = 0; - status.err_code = -EAGAIN; - return status; + PMD_DRV_LOG(ERR, "Failed to get the requested " + "amount of resources: %d.", + sc_resp->status); + return -EINVAL; } } while (!res_obtained); - status.success = 1; - return status; + return 0; } int bnx2x_vf_get_resources(struct bnx2x_softc *sc, uint8_t tx_count, uint8_t rx_count) { struct vf_acquire_tlv *acq = &sc->vf2pf_mbox->query[0].acquire; int vf_id; - struct bnx2x_obtain_status obtain_status; + int rc; bnx2x_vf_close(sc); - bnx2x_init_first_tlv(sc, &acq->first_tlv, BNX2X_VF_TLV_ACQUIRE, sizeof(*acq)); + bnx2x_vf_prep(sc, &acq->first_tlv, BNX2X_VF_TLV_ACQUIRE, sizeof(*acq)); vf_id = bnx2x_read_vf_id(sc); - if (vf_id < 0) - return -EAGAIN; + if (vf_id < 0) { + rc = -EAGAIN; + goto out; + } acq->vf_id = vf_id; @@ -256,19 +266,19 @@ int bnx2x_vf_get_resources(struct bnx2x_softc *sc, uint8_t tx_count, uint8_t rx_ acq->bulletin_addr = sc->pf2vf_bulletin_mapping.paddr; /* Request physical port identifier */ - BNX2X_TLV_APPEND(acq, acq->first_tlv.length, - BNX2X_VF_TLV_PHYS_PORT_ID, - sizeof(struct channel_tlv)); + bnx2x_add_tlv(sc, acq, acq->first_tlv.tl.length, + BNX2X_VF_TLV_PHYS_PORT_ID, + sizeof(struct channel_tlv)); - BNX2X_TLV_APPEND(acq, - (acq->first_tlv.length + sizeof(struct channel_tlv)), - BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, acq, + (acq->first_tlv.tl.length + sizeof(struct channel_tlv)), + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); /* requesting the resources in loop */ - obtain_status = bnx2x_loop_obtain_resources(sc); - if (!obtain_status.success) - return obtain_status.err_code; + rc = bnx2x_loop_obtain_resources(sc); + if (rc) + goto out; struct vf_acquire_resp_tlv sc_resp = sc->acquire_resp; @@ -299,7 +309,10 @@ int bnx2x_vf_get_resources(struct bnx2x_softc *sc, uint8_t tx_count, uint8_t rx_ else eth_random_addr(sc->link_params.mac_addr); - return 0; +out: + bnx2x_vf_finalize(sc, &acq->first_tlv); + + return rc; } /* Ask PF to release VF's resources */ @@ -309,19 +322,23 @@ bnx2x_vf_close(struct bnx2x_softc *sc) struct vf_release_tlv *query; struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply; int vf_id = bnx2x_read_vf_id(sc); + int rc; if (vf_id >= 0) { query = &sc->vf2pf_mbox->query[0].release; - bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_RELEASE, - sizeof(*query)); + bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_RELEASE, + sizeof(*query)); query->vf_id = vf_id; - BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query, query->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); - if (reply->status != BNX2X_VF_STATUS_SUCCESS) + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc || reply->status != BNX2X_VF_STATUS_SUCCESS) PMD_DRV_LOG(ERR, "Failed to release VF"); + + bnx2x_vf_finalize(sc, &query->first_tlv); } } @@ -331,11 +348,11 @@ bnx2x_vf_init(struct bnx2x_softc *sc) { struct vf_init_tlv *query; struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply; - int i; + int i, rc; query = &sc->vf2pf_mbox->query[0].init; - bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_INIT, - sizeof(*query)); + bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_INIT, + sizeof(*query)); FOR_EACH_QUEUE(sc, i) { query->sb_addr[i] = (unsigned long)(sc->fp[i].sb_dma.paddr); @@ -345,17 +362,23 @@ bnx2x_vf_init(struct bnx2x_softc *sc) query->stats_addr = sc->fw_stats_data_mapping + offsetof(struct bnx2x_fw_stats_data, queue_stats); - BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query, query->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc) + goto out; if (reply->status != BNX2X_VF_STATUS_SUCCESS) { PMD_DRV_LOG(ERR, "Failed to init VF"); - return -EINVAL; + rc = -EINVAL; + goto out; } PMD_DRV_LOG(DEBUG, "VF was initialized"); - return 0; +out: + bnx2x_vf_finalize(sc, &query->first_tlv); + return rc; } void @@ -364,44 +387,49 @@ bnx2x_vf_unload(struct bnx2x_softc *sc) struct vf_close_tlv *query; struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply; struct vf_q_op_tlv *query_op; - int i, vf_id; + int i, vf_id, rc; vf_id = bnx2x_read_vf_id(sc); if (vf_id > 0) { FOR_EACH_QUEUE(sc, i) { query_op = &sc->vf2pf_mbox->query[0].q_op; - bnx2x_init_first_tlv(sc, &query_op->first_tlv, - BNX2X_VF_TLV_TEARDOWN_Q, - sizeof(*query_op)); + bnx2x_vf_prep(sc, &query_op->first_tlv, + BNX2X_VF_TLV_TEARDOWN_Q, + sizeof(*query_op)); query_op->vf_qid = i; - BNX2X_TLV_APPEND(query_op, query_op->first_tlv.length, - BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query_op, + query_op->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); - if (reply->status != BNX2X_VF_STATUS_SUCCESS) + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc || reply->status != BNX2X_VF_STATUS_SUCCESS) PMD_DRV_LOG(ERR, "Bad reply for vf_q %d teardown", i); + + bnx2x_vf_finalize(sc, &query_op->first_tlv); } bnx2x_vf_set_mac(sc, false); query = &sc->vf2pf_mbox->query[0].close; - bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_CLOSE, - sizeof(*query)); + bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_CLOSE, + sizeof(*query)); query->vf_id = vf_id; - BNX2X_TLV_APPEND(query, query->first_tlv.length, - BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query, query->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); - if (reply->status != BNX2X_VF_STATUS_SUCCESS) + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc || reply->status != BNX2X_VF_STATUS_SUCCESS) PMD_DRV_LOG(ERR, "Bad reply from PF for close message"); + + bnx2x_vf_finalize(sc, &query->first_tlv); } } @@ -466,10 +494,11 @@ bnx2x_vf_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, int lead struct vf_setup_q_tlv *query; struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply; uint16_t flags = bnx2x_vf_q_flags(leading); + int rc; query = &sc->vf2pf_mbox->query[0].setup_q; - bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_SETUP_Q, - sizeof(*query)); + bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_SETUP_Q, + sizeof(*query)); query->vf_qid = fp->index; query->param_valid = VF_RXQ_VALID | VF_TXQ_VALID; @@ -477,17 +506,22 @@ bnx2x_vf_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, int lead bnx2x_vf_rx_q_prep(sc, fp, &query->rxq, flags); bnx2x_vf_tx_q_prep(sc, fp, &query->txq, flags); - BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query, query->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc) + goto out; if (reply->status != BNX2X_VF_STATUS_SUCCESS) { PMD_DRV_LOG(ERR, "Failed to setup VF queue[%d]", fp->index); - return -EINVAL; + rc = -EINVAL; } +out: + bnx2x_vf_finalize(sc, &query->first_tlv); - return 0; + return rc; } int @@ -495,9 +529,10 @@ bnx2x_vf_set_mac(struct bnx2x_softc *sc, int set) { struct vf_set_q_filters_tlv *query; struct vf_common_reply_tlv *reply; + int rc; query = &sc->vf2pf_mbox->query[0].set_q_filters; - bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_SET_Q_FILTERS, + bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_SET_Q_FILTERS, sizeof(*query)); query->vf_qid = sc->fp->index; @@ -511,10 +546,13 @@ bnx2x_vf_set_mac(struct bnx2x_softc *sc, int set) rte_memcpy(query->filters[0].mac, sc->link_params.mac_addr, ETH_ALEN); - BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query, query->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc) + goto out; reply = &sc->vf2pf_mbox->resp.common_reply; while (BNX2X_VF_STATUS_FAILURE == reply->status && @@ -525,16 +563,20 @@ bnx2x_vf_set_mac(struct bnx2x_softc *sc, int set) rte_memcpy(query->filters[0].mac, sc->pf2vf_bulletin->mac, ETH_ALEN); - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc) + goto out; } if (BNX2X_VF_STATUS_SUCCESS != reply->status) { PMD_DRV_LOG(ERR, "Bad reply from PF for SET MAC message: %d", reply->status); - return -EINVAL; + rc = -EINVAL; } +out: + bnx2x_vf_finalize(sc, &query->first_tlv); - return 0; + return rc; } int @@ -543,15 +585,17 @@ bnx2x_vf_config_rss(struct bnx2x_softc *sc, { struct vf_rss_tlv *query; struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply; + int rc; query = &sc->vf2pf_mbox->query[0].update_rss; - bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_UPDATE_RSS, + bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_UPDATE_RSS, sizeof(*query)); /* add list termination tlv */ - BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query, query->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); rte_memcpy(query->rss_key, params->rss_key, sizeof(params->rss_key)); query->rss_key_size = T_ETH_RSS_KEY; @@ -562,13 +606,18 @@ bnx2x_vf_config_rss(struct bnx2x_softc *sc, query->rss_result_mask = params->rss_result_mask; query->rss_flags = params->rss_flags; - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc) + goto out; + if (reply->status != BNX2X_VF_STATUS_SUCCESS) { PMD_DRV_LOG(ERR, "Failed to configure RSS"); - return -EINVAL; + rc = -EINVAL; } +out: + bnx2x_vf_finalize(sc, &query->first_tlv); - return 0; + return rc; } int @@ -576,27 +625,56 @@ bnx2x_vf_set_rx_mode(struct bnx2x_softc *sc) { struct vf_set_q_filters_tlv *query; struct vf_common_reply_tlv *reply = &sc->vf2pf_mbox->resp.common_reply; - unsigned long tx_mask; + int rc; query = &sc->vf2pf_mbox->query[0].set_q_filters; - bnx2x_init_first_tlv(sc, &query->first_tlv, BNX2X_VF_TLV_SET_Q_FILTERS, + bnx2x_vf_prep(sc, &query->first_tlv, BNX2X_VF_TLV_SET_Q_FILTERS, sizeof(*query)); query->vf_qid = 0; query->flags = BNX2X_VF_RX_MASK_CHANGED; - if (bnx2x_fill_accept_flags(sc, sc->rx_mode, &query->rx_mask, &tx_mask)) { - return -EINVAL; + switch (sc->rx_mode) { + case BNX2X_RX_MODE_NONE: /* no Rx */ + query->rx_mask = VFPF_RX_MASK_ACCEPT_NONE; + break; + case BNX2X_RX_MODE_NORMAL: + query->rx_mask = VFPF_RX_MASK_ACCEPT_MATCHED_MULTICAST; + query->rx_mask |= VFPF_RX_MASK_ACCEPT_MATCHED_UNICAST; + query->rx_mask |= VFPF_RX_MASK_ACCEPT_BROADCAST; + break; + case BNX2X_RX_MODE_ALLMULTI: + query->rx_mask = VFPF_RX_MASK_ACCEPT_ALL_MULTICAST; + query->rx_mask |= VFPF_RX_MASK_ACCEPT_MATCHED_UNICAST; + query->rx_mask |= VFPF_RX_MASK_ACCEPT_BROADCAST; + break; + case BNX2X_RX_MODE_ALLMULTI_PROMISC: + case BNX2X_RX_MODE_PROMISC: + query->rx_mask = VFPF_RX_MASK_ACCEPT_ALL_UNICAST; + query->rx_mask |= VFPF_RX_MASK_ACCEPT_ALL_MULTICAST; + query->rx_mask |= VFPF_RX_MASK_ACCEPT_BROADCAST; + break; + default: + PMD_DRV_LOG(ERR, "BAD rx mode (%d)", sc->rx_mode); + rc = -EINVAL; + goto out; } - BNX2X_TLV_APPEND(query, query->first_tlv.length, BNX2X_VF_TLV_LIST_END, - sizeof(struct channel_list_end_tlv)); + bnx2x_add_tlv(sc, query, query->first_tlv.tl.length, + BNX2X_VF_TLV_LIST_END, + sizeof(struct channel_list_end_tlv)); + + rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); + if (rc) + goto out; - bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr); if (reply->status != BNX2X_VF_STATUS_SUCCESS) { PMD_DRV_LOG(ERR, "Failed to set RX mode"); - return -EINVAL; + rc = -EINVAL; } - return 0; +out: + bnx2x_vf_finalize(sc, &query->first_tlv); + + return rc; } diff --git a/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.h b/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.h index f854d81b..955ea982 100644 --- a/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.h +++ b/src/dpdk/drivers/net/bnx2x/bnx2x_vfpf.h @@ -40,6 +40,13 @@ struct vf_resource_query { #define TLV_BUFFER_SIZE 1024 +#define VFPF_RX_MASK_ACCEPT_NONE 0x00000000 +#define VFPF_RX_MASK_ACCEPT_MATCHED_UNICAST 0x00000001 +#define VFPF_RX_MASK_ACCEPT_MATCHED_MULTICAST 0x00000002 +#define VFPF_RX_MASK_ACCEPT_ALL_UNICAST 0x00000004 +#define VFPF_RX_MASK_ACCEPT_ALL_MULTICAST 0x00000008 +#define VFPF_RX_MASK_ACCEPT_BROADCAST 0x00000010 + /* general tlv header (used for both vf->pf request and pf->vf response) */ struct channel_tlv { uint16_t type; @@ -47,8 +54,7 @@ struct channel_tlv { }; struct vf_first_tlv { - uint16_t type; - uint16_t length; + struct channel_tlv tl; uint32_t reply_offset; }; @@ -58,16 +64,14 @@ struct tlv_buffer_size { /* tlv struct for all PF replies except acquire */ struct vf_common_reply_tlv { - uint16_t type; - uint16_t length; + struct channel_tlv tl; uint8_t status; uint8_t pad[3]; }; /* used to terminate and pad a tlv list */ struct channel_list_end_tlv { - uint16_t type; - uint16_t length; + struct channel_tlv tl; uint32_t pad; }; @@ -327,7 +331,6 @@ struct bnx2x_vf_mbx_msg { union resp_tlvs resp; }; -void bnx2x_add_tlv(void *tlvs_list, uint16_t offset, uint16_t type, uint16_t length); int bnx2x_vf_set_mac(struct bnx2x_softc *sc, int set); int bnx2x_vf_config_rss(struct bnx2x_softc *sc, struct ecore_config_rss_params *params); diff --git a/src/dpdk/drivers/net/bnx2x/debug.c b/src/dpdk/drivers/net/bnx2x/debug.c deleted file mode 100644 index cc50845c..00000000 --- a/src/dpdk/drivers/net/bnx2x/debug.c +++ /dev/null @@ -1,96 +0,0 @@ -/*- - * Copyright (c) 2007-2013 QLogic Corporation. All rights reserved. - * - * Eric Davis <edavis@broadcom.com> - * David Christensen <davidch@broadcom.com> - * Gary Zambrano <zambrano@broadcom.com> - * - * Copyright (c) 2013-2015 Brocade Communications Systems, Inc. - * Copyright (c) 2015 QLogic Corporation. - * All rights reserved. - * www.qlogic.com - * - * See LICENSE.bnx2x_pmd for copyright and licensing details. - */ - -#include "bnx2x.h" - - -/* - * Debug versions of the 8/16/32 bit OS register read/write functions to - * capture/display values read/written from/to the controller. - */ -void -bnx2x_reg_write8(struct bnx2x_softc *sc, size_t offset, uint8_t val) -{ - PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x", (unsigned long)offset, val); - *((volatile uint8_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val; -} - -void -bnx2x_reg_write16(struct bnx2x_softc *sc, size_t offset, uint16_t val) -{ - if ((offset % 2) != 0) { - PMD_DRV_LOG(NOTICE, "Unaligned 16-bit write to 0x%08lx", - (unsigned long)offset); - } - - PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%04x", (unsigned long)offset, val); - *((volatile uint16_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val; -} - -void -bnx2x_reg_write32(struct bnx2x_softc *sc, size_t offset, uint32_t val) -{ - if ((offset % 4) != 0) { - PMD_DRV_LOG(NOTICE, "Unaligned 32-bit write to 0x%08lx", - (unsigned long)offset); - } - - PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", (unsigned long)offset, val); - *((volatile uint32_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset)) = val; -} - -uint8_t -bnx2x_reg_read8(struct bnx2x_softc *sc, size_t offset) -{ - uint8_t val; - - val = (uint8_t)(*((volatile uint8_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset))); - PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x", (unsigned long)offset, val); - - return val; -} - -uint16_t -bnx2x_reg_read16(struct bnx2x_softc *sc, size_t offset) -{ - uint16_t val; - - if ((offset % 2) != 0) { - PMD_DRV_LOG(NOTICE, "Unaligned 16-bit read from 0x%08lx", - (unsigned long)offset); - } - - val = (uint16_t)(*((volatile uint16_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset))); - PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", (unsigned long)offset, val); - - return val; -} - -uint32_t -bnx2x_reg_read32(struct bnx2x_softc *sc, size_t offset) -{ - uint32_t val; - - if ((offset % 4) != 0) { - PMD_DRV_LOG(NOTICE, "Unaligned 32-bit read from 0x%08lx", - (unsigned long)offset); - return 0; - } - - val = (uint32_t)(*((volatile uint32_t*)((uintptr_t)sc->bar[BAR0].base_addr + offset))); - PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x", (unsigned long)offset, val); - - return val; -} diff --git a/src/dpdk/drivers/net/bnx2x/elink.c b/src/dpdk/drivers/net/bnx2x/elink.c index 149cc975..53293962 100644 --- a/src/dpdk/drivers/net/bnx2x/elink.c +++ b/src/dpdk/drivers/net/bnx2x/elink.c @@ -1586,26 +1586,6 @@ static elink_status_t elink_emac_enable(struct elink_params *params, /* enable emac and not bmac */ REG_WR(sc, NIG_REG_EGRESS_EMAC0_PORT + port * 4, 1); -#ifdef ELINK_INCLUDE_EMUL - /* for paladium */ - if (CHIP_REV_IS_EMUL(sc)) { - /* Use lane 1 (of lanes 0-3) */ - REG_WR(sc, NIG_REG_XGXS_LANE_SEL_P0 + port * 4, 1); - REG_WR(sc, NIG_REG_XGXS_SERDES0_MODE_SEL + port * 4, 1); - } - /* for fpga */ - else -#endif -#ifdef ELINK_INCLUDE_FPGA - if (CHIP_REV_IS_FPGA(sc)) { - /* Use lane 1 (of lanes 0-3) */ - PMD_DRV_LOG(DEBUG, "elink_emac_enable: Setting FPGA"); - - REG_WR(sc, NIG_REG_XGXS_LANE_SEL_P0 + port * 4, 1); - REG_WR(sc, NIG_REG_XGXS_SERDES0_MODE_SEL + port * 4, 0); - } else -#endif - /* ASIC */ if (vars->phy_flags & PHY_XGXS_FLAG) { uint32_t ser_lane = ((params->lane_config & PORT_HW_CFG_LANE_SWAP_CFG_MASTER_MASK) >> @@ -1628,39 +1608,28 @@ static elink_status_t elink_emac_enable(struct elink_params *params, elink_bits_en(sc, emac_base + EMAC_REG_EMAC_TX_MODE, EMAC_TX_MODE_RESET); -#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA) - if (CHIP_REV_IS_SLOW(sc)) { - /* config GMII mode */ - val = REG_RD(sc, emac_base + EMAC_REG_EMAC_MODE); - elink_cb_reg_write(sc, emac_base + EMAC_REG_EMAC_MODE, - (val | EMAC_MODE_PORT_GMII)); - } else { /* ASIC */ -#endif - /* pause enable/disable */ - elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_RX_MODE, - EMAC_RX_MODE_FLOW_EN); + /* pause enable/disable */ + elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_RX_MODE, + EMAC_RX_MODE_FLOW_EN); - elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_TX_MODE, - (EMAC_TX_MODE_EXT_PAUSE_EN | - EMAC_TX_MODE_FLOW_EN)); - if (!(params->feature_config_flags & - ELINK_FEATURE_CONFIG_PFC_ENABLED)) { - if (vars->flow_ctrl & ELINK_FLOW_CTRL_RX) - elink_bits_en(sc, emac_base + - EMAC_REG_EMAC_RX_MODE, - EMAC_RX_MODE_FLOW_EN); - - if (vars->flow_ctrl & ELINK_FLOW_CTRL_TX) - elink_bits_en(sc, emac_base + - EMAC_REG_EMAC_TX_MODE, - (EMAC_TX_MODE_EXT_PAUSE_EN | - EMAC_TX_MODE_FLOW_EN)); - } else - elink_bits_en(sc, emac_base + EMAC_REG_EMAC_TX_MODE, - EMAC_TX_MODE_FLOW_EN); -#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA) - } -#endif + elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_TX_MODE, + (EMAC_TX_MODE_EXT_PAUSE_EN | + EMAC_TX_MODE_FLOW_EN)); + if (!(params->feature_config_flags & + ELINK_FEATURE_CONFIG_PFC_ENABLED)) { + if (vars->flow_ctrl & ELINK_FLOW_CTRL_RX) + elink_bits_en(sc, emac_base + + EMAC_REG_EMAC_RX_MODE, + EMAC_RX_MODE_FLOW_EN); + + if (vars->flow_ctrl & ELINK_FLOW_CTRL_TX) + elink_bits_en(sc, emac_base + + EMAC_REG_EMAC_TX_MODE, + (EMAC_TX_MODE_EXT_PAUSE_EN | + EMAC_TX_MODE_FLOW_EN)); + } else + elink_bits_en(sc, emac_base + EMAC_REG_EMAC_TX_MODE, + EMAC_TX_MODE_FLOW_EN); /* KEEP_VLAN_TAG, promiscuous */ val = REG_RD(sc, emac_base + EMAC_REG_EMAC_RX_MODE); @@ -1727,17 +1696,7 @@ static elink_status_t elink_emac_enable(struct elink_params *params, REG_WR(sc, NIG_REG_EMAC0_PAUSE_OUT_EN + port * 4, val); REG_WR(sc, NIG_REG_EGRESS_EMAC0_OUT_EN + port * 4, 0x1); -#ifdef ELINK_INCLUDE_EMUL - if (CHIP_REV_IS_EMUL(sc)) { - /* Take the BigMac out of reset */ - REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET, - (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port)); - - /* Enable access for bmac registers */ - REG_WR(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4, 0x1); - } else -#endif - REG_WR(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4, 0x0); + REG_WR(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4, 0x0); vars->mac_type = ELINK_MAC_TYPE_EMAC; return ELINK_STATUS_OK; @@ -2137,15 +2096,6 @@ static elink_status_t elink_bmac1_enable(struct elink_params *params, wb_data[1] = 0; REG_WR_DMAE(sc, bmac_addr + BIGMAC_REGISTER_RX_LLFC_MSG_FLDS, wb_data, 2); -#ifdef ELINK_INCLUDE_EMUL - /* Fix for emulation */ - if (CHIP_REV_IS_EMUL(sc)) { - wb_data[0] = 0xf000; - wb_data[1] = 0; - REG_WR_DMAE(sc, bmac_addr + BIGMAC_REGISTER_TX_PAUSE_THRESHOLD, - wb_data, 2); - } -#endif return ELINK_STATUS_OK; } @@ -5922,11 +5872,6 @@ elink_status_t elink_set_led(struct elink_params *params, params, mode); } } -#ifdef ELINK_INCLUDE_EMUL - if (params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC) - return rc; -#endif switch (mode) { case ELINK_LED_MODE_FRONT_PANEL_OFF: @@ -6645,7 +6590,7 @@ static elink_status_t elink_8073_8727_external_rom_boot(struct bnx2x_softc *sc, uint8_t port) { uint32_t count = 0; - uint16_t fw_ver1, fw_msgout; + uint16_t fw_ver1 = 0, fw_msgout; elink_status_t rc = ELINK_STATUS_OK; /* Boot port from external ROM */ @@ -11671,10 +11616,7 @@ elink_status_t elink_phy_probe(struct elink_params * params) struct elink_phy *phy; params->num_phys = 0; PMD_DRV_LOG(DEBUG, "Begin phy probe"); -#ifdef ELINK_INCLUDE_EMUL - if (CHIP_REV_IS_EMUL(sc)) - return ELINK_STATUS_OK; -#endif + phy_config_swapped = params->multi_phy_config & PORT_HW_CFG_PHY_SWAPPED_ENABLED; @@ -11739,182 +11681,6 @@ elink_status_t elink_phy_probe(struct elink_params * params) return ELINK_STATUS_OK; } -#ifdef ELINK_INCLUDE_EMUL -static elink_status_t elink_init_e3_emul_mac(struct elink_params *params, - struct elink_vars *vars) -{ - struct bnx2x_softc *sc = params->sc; - vars->line_speed = params->req_line_speed[0]; - /* In case link speed is auto, set speed the highest as possible */ - if (params->req_line_speed[0] == ELINK_SPEED_AUTO_NEG) { - if (params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC) - vars->line_speed = ELINK_SPEED_2500; - else if (elink_is_4_port_mode(sc)) - vars->line_speed = ELINK_SPEED_10000; - else - vars->line_speed = ELINK_SPEED_20000; - } - if (vars->line_speed < ELINK_SPEED_10000) { - if ((params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC)) { - PMD_DRV_LOG(DEBUG, "Invalid line speed %d while UMAC is" - " disabled!", params->req_line_speed[0]); - return ELINK_STATUS_ERROR; - } - switch (vars->line_speed) { - case ELINK_SPEED_10: - vars->link_status = ELINK_LINK_10TFD; - break; - case ELINK_SPEED_100: - vars->link_status = ELINK_LINK_100TXFD; - break; - case ELINK_SPEED_1000: - vars->link_status = ELINK_LINK_1000TFD; - break; - case ELINK_SPEED_2500: - vars->link_status = ELINK_LINK_2500TFD; - break; - default: - PMD_DRV_LOG(DEBUG, "Invalid line speed %d for UMAC", - vars->line_speed); - return ELINK_STATUS_ERROR; - } - vars->link_status |= LINK_STATUS_LINK_UP; - - if (params->loopback_mode == ELINK_LOOPBACK_UMAC) - elink_umac_enable(params, vars, 1); - else - elink_umac_enable(params, vars, 0); - } else { - /* Link speed >= 10000 requires XMAC enabled */ - if (params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC) { - PMD_DRV_LOG(DEBUG, "Invalid line speed %d while XMAC is" - " disabled!", params->req_line_speed[0]); - return ELINK_STATUS_ERROR; - } - /* Check link speed */ - switch (vars->line_speed) { - case ELINK_SPEED_10000: - vars->link_status = ELINK_LINK_10GTFD; - break; - case ELINK_SPEED_20000: - vars->link_status = ELINK_LINK_20GTFD; - break; - default: - PMD_DRV_LOG(DEBUG, "Invalid line speed %d for XMAC", - vars->line_speed); - return ELINK_STATUS_ERROR; - } - vars->link_status |= LINK_STATUS_LINK_UP; - if (params->loopback_mode == ELINK_LOOPBACK_XMAC) - elink_xmac_enable(params, vars, 1); - else - elink_xmac_enable(params, vars, 0); - } - return ELINK_STATUS_OK; -} - -static elink_status_t elink_init_emul(struct elink_params *params, - struct elink_vars *vars) -{ - struct bnx2x_softc *sc = params->sc; - if (CHIP_IS_E3(sc)) { - if (elink_init_e3_emul_mac(params, vars) != ELINK_STATUS_OK) - return ELINK_STATUS_ERROR; - } else { - if (params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC) { - vars->line_speed = ELINK_SPEED_1000; - vars->link_status = (LINK_STATUS_LINK_UP | - ELINK_LINK_1000XFD); - if (params->loopback_mode == ELINK_LOOPBACK_EMAC) - elink_emac_enable(params, vars, 1); - else - elink_emac_enable(params, vars, 0); - } else { - vars->line_speed = ELINK_SPEED_10000; - vars->link_status = (LINK_STATUS_LINK_UP | - ELINK_LINK_10GTFD); - if (params->loopback_mode == ELINK_LOOPBACK_BMAC) - elink_bmac_enable(params, vars, 1, 1); - else - elink_bmac_enable(params, vars, 0, 1); - } - } - vars->link_up = 1; - vars->duplex = DUPLEX_FULL; - vars->flow_ctrl = ELINK_FLOW_CTRL_NONE; - - if (CHIP_IS_E1x(sc)) - elink_pbf_update(params, vars->flow_ctrl, vars->line_speed); - /* Disable drain */ - REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0); - - /* update shared memory */ - elink_update_mng(params, vars->link_status); - return ELINK_STATUS_OK; -} -#endif -#ifdef ELINK_INCLUDE_FPGA -static elink_status_t elink_init_fpga(struct elink_params *params, - struct elink_vars *vars) -{ - /* Enable on E1.5 FPGA */ - struct bnx2x_softc *sc = params->sc; - vars->duplex = DUPLEX_FULL; - vars->flow_ctrl = ELINK_FLOW_CTRL_NONE; - vars->flow_ctrl = (ELINK_FLOW_CTRL_TX | ELINK_FLOW_CTRL_RX); - vars->link_status |= (LINK_STATUS_TX_FLOW_CONTROL_ENABLED | - LINK_STATUS_RX_FLOW_CONTROL_ENABLED); - if (CHIP_IS_E3(sc)) { - vars->line_speed = params->req_line_speed[0]; - switch (vars->line_speed) { - case ELINK_SPEED_AUTO_NEG: - vars->line_speed = ELINK_SPEED_2500; - case ELINK_SPEED_2500: - vars->link_status = ELINK_LINK_2500TFD; - break; - case ELINK_SPEED_1000: - vars->link_status = ELINK_LINK_1000XFD; - break; - case ELINK_SPEED_100: - vars->link_status = ELINK_LINK_100TXFD; - break; - case ELINK_SPEED_10: - vars->link_status = ELINK_LINK_10TFD; - break; - default: - PMD_DRV_LOG(DEBUG, "Invalid link speed %d", - params->req_line_speed[0]); - return ELINK_STATUS_ERROR; - } - vars->link_status |= LINK_STATUS_LINK_UP; - if (params->loopback_mode == ELINK_LOOPBACK_UMAC) - elink_umac_enable(params, vars, 1); - else - elink_umac_enable(params, vars, 0); - } else { - vars->line_speed = ELINK_SPEED_10000; - vars->link_status = (LINK_STATUS_LINK_UP | ELINK_LINK_10GTFD); - if (params->loopback_mode == ELINK_LOOPBACK_EMAC) - elink_emac_enable(params, vars, 1); - else - elink_emac_enable(params, vars, 0); - } - vars->link_up = 1; - - if (CHIP_IS_E1x(sc)) - elink_pbf_update(params, vars->flow_ctrl, vars->line_speed); - /* Disable drain */ - REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0); - - /* Update shared memory */ - elink_update_mng(params, vars->link_status); - return ELINK_STATUS_OK; -} -#endif static void elink_init_bmac_loopback(struct elink_params *params, struct elink_vars *vars) { @@ -12236,12 +12002,8 @@ elink_status_t elink_phy_init(struct elink_params *params, ELINK_NIG_MASK_XGXS0_LINK10G | ELINK_NIG_MASK_SERDES0_LINK_STATUS | ELINK_NIG_MASK_MI_INT)); -#ifdef ELINK_INCLUDE_EMUL - if (!(params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC)) -#endif - elink_emac_init(params); + elink_emac_init(params); if (params->feature_config_flags & ELINK_FEATURE_CONFIG_PFC_ENABLED) vars->link_status |= LINK_STATUS_PFC_ENABLED; @@ -12253,45 +12015,36 @@ elink_status_t elink_phy_init(struct elink_params *params, set_phy_vars(params, vars); PMD_DRV_LOG(DEBUG, "Num of phys on board: %d", params->num_phys); -#ifdef ELINK_INCLUDE_FPGA - if (CHIP_REV_IS_FPGA(sc)) { - return elink_init_fpga(params, vars); - } else -#endif -#ifdef ELINK_INCLUDE_EMUL - if (CHIP_REV_IS_EMUL(sc)) { - return elink_init_emul(params, vars); - } else -#endif - switch (params->loopback_mode) { - case ELINK_LOOPBACK_BMAC: - elink_init_bmac_loopback(params, vars); - break; - case ELINK_LOOPBACK_EMAC: - elink_init_emac_loopback(params, vars); - break; - case ELINK_LOOPBACK_XMAC: - elink_init_xmac_loopback(params, vars); - break; - case ELINK_LOOPBACK_UMAC: - elink_init_umac_loopback(params, vars); - break; - case ELINK_LOOPBACK_XGXS: - case ELINK_LOOPBACK_EXT_PHY: - elink_init_xgxs_loopback(params, vars); - break; - default: - if (!CHIP_IS_E3(sc)) { - if (params->switch_cfg == ELINK_SWITCH_CFG_10G) - elink_xgxs_deassert(params); - else - elink_serdes_deassert(sc, params->port); - } - elink_link_initialize(params, vars); - DELAY(1000 * 30); - elink_link_int_enable(params); - break; + + switch (params->loopback_mode) { + case ELINK_LOOPBACK_BMAC: + elink_init_bmac_loopback(params, vars); + break; + case ELINK_LOOPBACK_EMAC: + elink_init_emac_loopback(params, vars); + break; + case ELINK_LOOPBACK_XMAC: + elink_init_xmac_loopback(params, vars); + break; + case ELINK_LOOPBACK_UMAC: + elink_init_umac_loopback(params, vars); + break; + case ELINK_LOOPBACK_XGXS: + case ELINK_LOOPBACK_EXT_PHY: + elink_init_xgxs_loopback(params, vars); + break; + default: + if (!CHIP_IS_E3(sc)) { + if (params->switch_cfg == ELINK_SWITCH_CFG_10G) + elink_xgxs_deassert(params); + else + elink_serdes_deassert(sc, params->port); } + elink_link_initialize(params, vars); + DELAY(1000 * 30); + elink_link_int_enable(params); + break; + } elink_update_mng(params, vars->link_status); elink_update_mng_eee(params, vars->eee_status); @@ -12325,22 +12078,12 @@ static elink_status_t elink_link_reset(struct elink_params *params, REG_WR(sc, NIG_REG_BMAC0_OUT_EN + port * 4, 0); REG_WR(sc, NIG_REG_EGRESS_EMAC0_OUT_EN + port * 4, 0); } -#ifdef ELINK_INCLUDE_EMUL - /* Stop BigMac rx */ - if (!(params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC)) -#endif - if (!CHIP_IS_E3(sc)) - elink_set_bmac_rx(sc, port, 0); -#ifdef ELINK_INCLUDE_EMUL - /* Stop XMAC/UMAC rx */ - if (!(params->feature_config_flags & - ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC)) -#endif - if (CHIP_IS_E3(sc) && !CHIP_REV_IS_FPGA(sc)) { - elink_set_xmac_rxtx(params, 0); - elink_set_umac_rxtx(params, 0); - } + if (!CHIP_IS_E3(sc)) + elink_set_bmac_rx(sc, port, 0); + if (CHIP_IS_E3(sc) && !CHIP_REV_IS_FPGA(sc)) { + elink_set_xmac_rxtx(params, 0); + elink_set_umac_rxtx(params, 0); + } /* Disable emac */ if (!CHIP_IS_E3(sc)) REG_WR(sc, NIG_REG_NIG_EMAC0_EN + port * 4, 0); @@ -12376,14 +12119,11 @@ static elink_status_t elink_link_reset(struct elink_params *params, elink_bits_dis(sc, NIG_REG_LATCH_BC_0 + port * 4, 1 << ELINK_NIG_LATCH_BC_ENABLE_MI_INT); } -#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA) - if (!CHIP_REV_IS_SLOW(sc)) -#endif - if (params->phy[ELINK_INT_PHY].link_reset) - params->phy[ELINK_INT_PHY].link_reset(¶ms-> - phy - [ELINK_INT_PHY], - params); + if (params->phy[ELINK_INT_PHY].link_reset) + params->phy[ELINK_INT_PHY].link_reset(¶ms-> + phy + [ELINK_INT_PHY], + params); /* Disable nig ingress interface */ if (!CHIP_IS_E3(sc)) { @@ -12868,10 +12608,6 @@ elink_status_t elink_common_init_phy(struct bnx2x_softc * sc, uint32_t phy_ver, val; uint8_t phy_index = 0; uint32_t ext_phy_type, ext_phy_config; -#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA) - if (CHIP_REV_IS_EMUL(sc) || CHIP_REV_IS_FPGA(sc)) - return ELINK_STATUS_OK; -#endif elink_set_mdio_clk(sc, GRCBASE_EMAC0); elink_set_mdio_clk(sc, GRCBASE_EMAC1); diff --git a/src/dpdk/drivers/net/bnx2x/elink.h b/src/dpdk/drivers/net/bnx2x/elink.h index c4f886a7..9401b7cd 100644 --- a/src/dpdk/drivers/net/bnx2x/elink.h +++ b/src/dpdk/drivers/net/bnx2x/elink.h @@ -359,10 +359,6 @@ struct elink_params { #define ELINK_FEATURE_CONFIG_PFC_ENABLED (1<<1) #define ELINK_FEATURE_CONFIG_BC_SUPPORTS_OPT_MDL_VRFY (1<<2) #define ELINK_FEATURE_CONFIG_BC_SUPPORTS_DUAL_PHY_OPT_MDL_VRFY (1<<3) -#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC (1<<4) -#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC (1<<5) -#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC (1<<6) -#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC (1<<7) #define ELINK_FEATURE_CONFIG_BC_SUPPORTS_AFEX (1<<8) #define ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED (1<<9) #define ELINK_FEATURE_CONFIG_BC_SUPPORTS_SFP_TX_DISABLED (1<<10) diff --git a/src/dpdk/drivers/net/bonding/rte_eth_bond_api.c b/src/dpdk/drivers/net/bonding/rte_eth_bond_api.c index 203ebe9e..f552d969 100644 --- a/src/dpdk/drivers/net/bonding/rte_eth_bond_api.c +++ b/src/dpdk/drivers/net/bonding/rte_eth_bond_api.c @@ -37,6 +37,7 @@ #include <rte_malloc.h> #include <rte_ethdev.h> #include <rte_tcp.h> +#include <rte_vdev.h> #include "rte_eth_bond.h" #include "rte_eth_bond_private.h" @@ -44,8 +45,6 @@ #define DEFAULT_POLLING_INTERVAL_10_MS (10) -const char pmd_bond_driver_name[] = "rte_bond_pmd"; - int check_for_bonded_ethdev(const struct rte_eth_dev *eth_dev) { @@ -54,7 +53,7 @@ check_for_bonded_ethdev(const struct rte_eth_dev *eth_dev) return -1; /* return 0 if driver name matches */ - return eth_dev->data->drv_name != pmd_bond_driver_name; + return eth_dev->data->drv_name != pmd_bond_drv.driver.name; } int @@ -166,6 +165,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) { struct bond_dev_private *internals = NULL; struct rte_eth_dev *eth_dev = NULL; + uint32_t vlan_filter_bmp_size; /* now do all data allocation - for eth_dev structure, dummy pci driver * and internal (private) data @@ -189,7 +189,7 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) } /* reserve an ethdev entry */ - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + eth_dev = rte_eth_dev_allocate(name); if (eth_dev == NULL) { RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev"); goto err; @@ -199,10 +199,6 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) eth_dev->data->nb_rx_queues = (uint16_t)1; eth_dev->data->nb_tx_queues = (uint16_t)1; - TAILQ_INIT(&(eth_dev->link_intr_cbs)); - - eth_dev->data->dev_link.link_status = ETH_LINK_DOWN; - eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN, 0, socket_id); if (eth_dev->data->mac_addrs == NULL) { @@ -210,17 +206,12 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) goto err; } - eth_dev->data->dev_started = 0; - eth_dev->data->promiscuous = 0; - eth_dev->data->scattered_rx = 0; - eth_dev->data->all_multicast = 0; - eth_dev->dev_ops = &default_dev_ops; eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC | RTE_ETH_DEV_DETACHABLE; eth_dev->driver = NULL; eth_dev->data->kdrv = RTE_KDRV_NONE; - eth_dev->data->drv_name = pmd_bond_driver_name; + eth_dev->data->drv_name = pmd_bond_drv.driver.name; eth_dev->data->numa_node = socket_id; rte_spinlock_init(&internals->lock); @@ -260,6 +251,27 @@ rte_eth_bond_create(const char *name, uint8_t mode, uint8_t socket_id) goto err; } + vlan_filter_bmp_size = + rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1); + internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size, + RTE_CACHE_LINE_SIZE); + if (internals->vlan_filter_bmpmem == NULL) { + RTE_BOND_LOG(ERR, + "Failed to allocate vlan bitmap for bonded device %u\n", + eth_dev->data->port_id); + goto err; + } + + internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1, + internals->vlan_filter_bmpmem, vlan_filter_bmp_size); + if (internals->vlan_filter_bmp == NULL) { + RTE_BOND_LOG(ERR, + "Failed to init vlan bitmap for bonded device %u\n", + eth_dev->data->port_id); + rte_free(internals->vlan_filter_bmpmem); + goto err; + } + return eth_dev->data->port_id; err: @@ -299,6 +311,9 @@ rte_eth_bond_free(const char *name) eth_dev->rx_pkt_burst = NULL; eth_dev->tx_pkt_burst = NULL; + internals = eth_dev->data->dev_private; + rte_bitmap_free(internals->vlan_filter_bmp); + rte_free(internals->vlan_filter_bmpmem); rte_free(eth_dev->data->dev_private); rte_free(eth_dev->data->mac_addrs); @@ -308,6 +323,46 @@ rte_eth_bond_free(const char *name) } static int +slave_vlan_filter_set(uint8_t bonded_port_id, uint8_t slave_port_id) +{ + struct rte_eth_dev *bonded_eth_dev; + struct bond_dev_private *internals; + int found; + int res = 0; + uint64_t slab = 0; + uint32_t pos = 0; + uint16_t first; + + bonded_eth_dev = &rte_eth_devices[bonded_port_id]; + if (bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter == 0) + return 0; + + internals = bonded_eth_dev->data->dev_private; + found = rte_bitmap_scan(internals->vlan_filter_bmp, &pos, &slab); + first = pos; + + if (!found) + return 0; + + do { + uint32_t i; + uint64_t mask; + + for (i = 0, mask = 1; + i < RTE_BITMAP_SLAB_BIT_SIZE; + i ++, mask <<= 1) { + if (unlikely(slab & mask)) + res = rte_eth_dev_vlan_filter(slave_port_id, + (uint16_t)pos, 1); + } + found = rte_bitmap_scan(internals->vlan_filter_bmp, + &pos, &slab); + } while (found && first != pos && res == 0); + + return res; +} + +static int __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) { struct rte_eth_dev *bonded_eth_dev, *slave_eth_dev; @@ -373,21 +428,6 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) internals->candidate_max_rx_pktlen = dev_info.max_rx_pktlen; } else { - /* Check slave link properties are supported if props are set, - * all slaves must be the same */ - if (internals->link_props_set) { - if (link_properties_valid(&(bonded_eth_dev->data->dev_link), - &(slave_eth_dev->data->dev_link))) { - slave_eth_dev->data->dev_flags &= (~RTE_ETH_DEV_BONDED_SLAVE); - RTE_BOND_LOG(ERR, - "Slave port %d link speed/duplex not supported", - slave_port_id); - return -1; - } - } else { - link_properties_set(bonded_eth_dev, - &(slave_eth_dev->data->dev_link)); - } internals->rx_offload_capa &= dev_info.rx_offload_capa; internals->tx_offload_capa &= dev_info.tx_offload_capa; internals->flow_type_rss_offloads &= dev_info.flow_type_rss_offloads; @@ -442,6 +482,9 @@ __eth_bond_slave_add_lock_free(uint8_t bonded_port_id, uint8_t slave_port_id) activate_slave(bonded_eth_dev, slave_port_id); } } + + slave_vlan_filter_set(bonded_port_id, slave_port_id); + return 0; } diff --git a/src/dpdk/drivers/net/bonding/rte_eth_bond_args.c b/src/dpdk/drivers/net/bonding/rte_eth_bond_args.c index 02ecde64..3dca273d 100644 --- a/src/dpdk/drivers/net/bonding/rte_eth_bond_args.c +++ b/src/dpdk/drivers/net/bonding/rte_eth_bond_args.c @@ -54,15 +54,23 @@ const char *pmd_bond_init_valid_arguments[] = { static inline int find_port_id_by_pci_addr(const struct rte_pci_addr *pci_addr) { + struct rte_pci_device *pci_dev; struct rte_pci_addr *eth_pci_addr; unsigned i; for (i = 0; i < rte_eth_dev_count(); i++) { - if (rte_eth_devices[i].pci_dev == NULL) + /* Currently populated by rte_eth_copy_pci_info(). + * + * TODO: Once the PCI bus has arrived we should have a better + * way to test for being a PCI device or not. + */ + if (rte_eth_devices[i].data->kdrv == RTE_KDRV_UNKNOWN || + rte_eth_devices[i].data->kdrv == RTE_KDRV_NONE) continue; - eth_pci_addr = &(rte_eth_devices[i].pci_dev->addr); + pci_dev = RTE_DEV_TO_PCI(rte_eth_devices[i].device); + eth_pci_addr = &pci_dev->addr; if (pci_addr->bus == eth_pci_addr->bus && pci_addr->devid == eth_pci_addr->devid && diff --git a/src/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c b/src/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c index b20a2729..f3ac9e27 100644 --- a/src/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c +++ b/src/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c @@ -42,7 +42,7 @@ #include <rte_ip_frag.h> #include <rte_devargs.h> #include <rte_kvargs.h> -#include <rte_dev.h> +#include <rte_vdev.h> #include <rte_alarm.h> #include <rte_cycles.h> @@ -122,6 +122,15 @@ bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs, bd_rx_q->queue_id, bufs, nb_pkts); } +static inline uint8_t +is_lacp_packets(uint16_t ethertype, uint8_t subtype, uint16_t vlan_tci) +{ + const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW); + + return !vlan_tci && (ethertype == ether_type_slow_be && + (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP)); +} + static uint16_t bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) @@ -141,6 +150,7 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, uint8_t collecting; /* current slave collecting status */ const uint8_t promisc = internals->promiscuous_en; uint8_t i, j, k; + uint8_t subtype; rte_eth_macaddr_get(internals->port_id, &bond_mac); /* Copy slave list to protect against slave up/down changes during tx @@ -166,10 +176,12 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs, rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *)); hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *); + subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype; + /* Remove packet from array if it is slow packet or slave is not * in collecting state or bondign interface is not in promiscus * mode and packet address does not match. */ - if (unlikely(hdr->ether_type == ether_type_slow_be || + if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]->vlan_tci) || !collecting || (!promisc && !is_multicast_ether_addr(&hdr->d_addr) && !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) { @@ -888,7 +900,6 @@ bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) } num_tx_total += num_send; - num_not_send += slave_bufs_pkts[RTE_MAX_ETHPORTS] - num_send; } return num_tx_total; @@ -1305,8 +1316,6 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, struct bond_rx_queue *bd_rx_q; struct bond_tx_queue *bd_tx_q; - uint16_t old_nb_tx_queues = slave_eth_dev->data->nb_tx_queues; - uint16_t old_nb_rx_queues = slave_eth_dev->data->nb_rx_queues; int errval; uint16_t q_id; @@ -1335,6 +1344,9 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, bonded_eth_dev->data->dev_conf.rxmode.mq_mode; } + slave_eth_dev->data->dev_conf.rxmode.hw_vlan_filter = + bonded_eth_dev->data->dev_conf.rxmode.hw_vlan_filter; + /* Configure device */ errval = rte_eth_dev_configure(slave_eth_dev->data->port_id, bonded_eth_dev->data->nb_rx_queues, @@ -1347,9 +1359,7 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, } /* Setup Rx Queues */ - /* Use existing queues, if any */ - for (q_id = old_nb_rx_queues; - q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) { + for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) { bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id]; errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id, @@ -1365,9 +1375,7 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev, } /* Setup Tx Queues */ - /* Use existing queues, if any */ - for (q_id = old_nb_tx_queues; - q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) { + for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) { bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id]; errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id, @@ -1439,6 +1447,9 @@ slave_remove(struct bond_dev_private *internals, (internals->slave_count - i - 1)); internals->slave_count--; + + /* force reconfiguration of slave interfaces */ + _rte_eth_dev_reset(slave_eth_dev); } static void @@ -1637,7 +1648,10 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev) void bond_ethdev_close(struct rte_eth_dev *dev) { + struct bond_dev_private *internals = dev->data->dev_private; + bond_ethdev_free_queues(dev); + rte_bitmap_reset(internals->vlan_filter_bmp); } /* forward declaration */ @@ -1657,7 +1671,6 @@ bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->max_tx_queues = (uint16_t)512; dev_info->min_rx_bufsize = 0; - dev_info->pci_dev = NULL; dev_info->rx_offload_capa = internals->rx_offload_capa; dev_info->tx_offload_capa = internals->tx_offload_capa; @@ -1667,6 +1680,35 @@ bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) } static int +bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) +{ + int res; + uint8_t i; + struct bond_dev_private *internals = dev->data->dev_private; + + /* don't do this while a slave is being added */ + rte_spinlock_lock(&internals->lock); + + if (on) + rte_bitmap_set(internals->vlan_filter_bmp, vlan_id); + else + rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id); + + for (i = 0; i < internals->slave_count; i++) { + uint8_t port_id = internals->slaves[i].port_id; + + res = rte_eth_dev_vlan_filter(port_id, vlan_id, on); + if (res == ENOTSUP) + RTE_LOG(WARNING, PMD, + "Setting VLAN filter on slave port %u not supported.\n", + port_id); + } + + rte_spinlock_unlock(&internals->lock); + return 0; +} + +static int bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, uint16_t nb_rx_desc, unsigned int socket_id __rte_unused, const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool) @@ -1923,7 +1965,7 @@ bond_ethdev_delayed_lsc_propagation(void *arg) return; _rte_eth_dev_callback_process((struct rte_eth_dev *)arg, - RTE_ETH_EVENT_INTR_LSC); + RTE_ETH_EVENT_INTR_LSC, NULL); } void @@ -1985,6 +2027,16 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, /* Inherit eth dev link properties from first active slave */ link_properties_set(bonded_eth_dev, &(slave_eth_dev->data->dev_link)); + } else { + if (link_properties_valid( + &bonded_eth_dev->data->dev_link, &link) != 0) { + slave_eth_dev->data->dev_flags &= + (~RTE_ETH_DEV_BONDED_SLAVE); + RTE_LOG(ERR, PMD, + "port %u invalid speed/duplex\n", + port_id); + return; + } } activate_slave(bonded_eth_dev, port_id); @@ -2034,7 +2086,7 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, (void *)bonded_eth_dev); else _rte_eth_dev_callback_process(bonded_eth_dev, - RTE_ETH_EVENT_INTR_LSC); + RTE_ETH_EVENT_INTR_LSC, NULL); } else { if (internals->link_down_delay_ms > 0) @@ -2043,7 +2095,7 @@ bond_ethdev_lsc_event_callback(uint8_t port_id, enum rte_eth_event_type type, (void *)bonded_eth_dev); else _rte_eth_dev_callback_process(bonded_eth_dev, - RTE_ETH_EVENT_INTR_LSC); + RTE_ETH_EVENT_INTR_LSC, NULL); } } } @@ -2161,6 +2213,7 @@ const struct eth_dev_ops default_dev_ops = { .dev_close = bond_ethdev_close, .dev_configure = bond_ethdev_configure, .dev_infos_get = bond_ethdev_info, + .vlan_filter_set = bond_ethdev_vlan_filter_set, .rx_queue_setup = bond_ethdev_rx_queue_setup, .tx_queue_setup = bond_ethdev_tx_queue_setup, .rx_queue_release = bond_ethdev_rx_queue_release, @@ -2177,7 +2230,7 @@ const struct eth_dev_ops default_dev_ops = { }; static int -bond_init(const char *name, const char *params) +bond_probe(const char *name, const char *params) { struct bond_dev_private *internals; struct rte_kvargs *kvlist; @@ -2244,7 +2297,7 @@ parse_error: } static int -bond_uninit(const char *name) +bond_remove(const char *name) { int ret; @@ -2508,15 +2561,15 @@ bond_ethdev_configure(struct rte_eth_dev *dev) return 0; } -static struct rte_driver bond_drv = { - .type = PMD_VDEV, - .init = bond_init, - .uninit = bond_uninit, +struct rte_vdev_driver pmd_bond_drv = { + .probe = bond_probe, + .remove = bond_remove, }; -PMD_REGISTER_DRIVER(bond_drv, eth_bond); +RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv); +RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond); -DRIVER_REGISTER_PARAM_STRING(eth_bond, +RTE_PMD_REGISTER_PARAM_STRING(net_bonding, "slave=<ifc> " "primary=<ifc> " "mode=[0-6] " diff --git a/src/dpdk/drivers/net/bonding/rte_eth_bond_private.h b/src/dpdk/drivers/net/bonding/rte_eth_bond_private.h index 2bdc9efa..5a411e22 100644 --- a/src/dpdk/drivers/net/bonding/rte_eth_bond_private.h +++ b/src/dpdk/drivers/net/bonding/rte_eth_bond_private.h @@ -36,6 +36,7 @@ #include <rte_ethdev.h> #include <rte_spinlock.h> +#include <rte_bitmap.h> #include "rte_eth_bond.h" #include "rte_eth_bond_8023ad_private.h" @@ -62,7 +63,7 @@ extern const char *pmd_bond_init_valid_arguments[]; -extern const char pmd_bond_driver_name[]; +extern struct rte_vdev_driver pmd_bond_drv; /** Port Queue Mapping Structure */ struct bond_rx_queue { @@ -172,6 +173,9 @@ struct bond_dev_private { uint32_t candidate_max_rx_pktlen; uint32_t max_rx_pktlen; + + void *vlan_filter_bmpmem; /* enabled vlan filter bitmap */ + struct rte_bitmap *vlan_filter_bmp; }; extern const struct eth_dev_ops default_dev_ops; diff --git a/src/dpdk/drivers/net/cxgbe/base/adapter.h b/src/dpdk/drivers/net/cxgbe/base/adapter.h index 5e3bd509..beb1e3e6 100644 --- a/src/dpdk/drivers/net/cxgbe/base/adapter.h +++ b/src/dpdk/drivers/net/cxgbe/base/adapter.h @@ -37,6 +37,7 @@ #define __T4_ADAPTER_H__ #include <rte_mbuf.h> +#include <rte_io.h> #include "cxgbe_compat.h" #include "t4_regs_values.h" @@ -324,7 +325,7 @@ struct adapter { int use_unpacked_mode; /* unpacked rx mode state */ }; -#define CXGBE_PCI_REG(reg) (*((volatile uint32_t *)(reg))) +#define CXGBE_PCI_REG(reg) rte_read32(reg) static inline uint64_t cxgbe_read_addr64(volatile void *addr) { @@ -350,16 +351,21 @@ static inline uint32_t cxgbe_read_addr(volatile void *addr) #define CXGBE_READ_REG64(adap, reg) \ cxgbe_read_addr64(CXGBE_PCI_REG_ADDR((adap), (reg))) -#define CXGBE_PCI_REG_WRITE(reg, value) ({ \ - CXGBE_PCI_REG((reg)) = (value); }) +#define CXGBE_PCI_REG_WRITE(reg, value) rte_write32((value), (reg)) + +#define CXGBE_PCI_REG_WRITE_RELAXED(reg, value) \ + rte_write32_relaxed((value), (reg)) #define CXGBE_WRITE_REG(adap, reg, value) \ CXGBE_PCI_REG_WRITE(CXGBE_PCI_REG_ADDR((adap), (reg)), (value)) +#define CXGBE_WRITE_REG_RELAXED(adap, reg, value) \ + CXGBE_PCI_REG_WRITE_RELAXED(CXGBE_PCI_REG_ADDR((adap), (reg)), (value)) + static inline uint64_t cxgbe_write_addr64(volatile void *addr, uint64_t val) { - CXGBE_PCI_REG(addr) = val; - CXGBE_PCI_REG(((volatile uint8_t *)(addr) + 4)) = (val >> 32); + CXGBE_PCI_REG_WRITE(addr, val); + CXGBE_PCI_REG_WRITE(((volatile uint8_t *)(addr) + 4), (val >> 32)); return val; } @@ -383,7 +389,7 @@ static inline u32 t4_read_reg(struct adapter *adapter, u32 reg_addr) } /** - * t4_write_reg - write a HW register + * t4_write_reg - write a HW register with barrier * @adapter: the adapter * @reg_addr: the register address * @val: the value to write @@ -398,6 +404,22 @@ static inline void t4_write_reg(struct adapter *adapter, u32 reg_addr, u32 val) } /** + * t4_write_reg_relaxed - write a HW register with no barrier + * @adapter: the adapter + * @reg_addr: the register address + * @val: the value to write + * + * Write a 32-bit value into the given HW register. + */ +static inline void t4_write_reg_relaxed(struct adapter *adapter, u32 reg_addr, + u32 val) +{ + CXGBE_DEBUG_REG(adapter, "setting register 0x%x to 0x%x\n", reg_addr, + val); + CXGBE_WRITE_REG_RELAXED(adapter, reg_addr, val); +} + +/** * t4_read_reg64 - read a 64-bit HW register * @adapter: the adapter * @reg_addr: the register address diff --git a/src/dpdk/drivers/net/cxgbe/base/t4_hw.c b/src/dpdk/drivers/net/cxgbe/base/t4_hw.c index 7e79adf6..c089b068 100644 --- a/src/dpdk/drivers/net/cxgbe/base/t4_hw.c +++ b/src/dpdk/drivers/net/cxgbe/base/t4_hw.c @@ -1532,7 +1532,7 @@ int t4_seeprom_write(struct adapter *adapter, u32 addr, u32 data) { unsigned int base = adapter->params.pci.vpd_cap_addr; int ret; - u32 stats_reg; + u32 stats_reg = 0; int max_poll; /* VPD Accesses must alway be 4-byte aligned! diff --git a/src/dpdk/drivers/net/cxgbe/cxgbe_compat.h b/src/dpdk/drivers/net/cxgbe/cxgbe_compat.h index e68f8f59..1551cbf5 100644 --- a/src/dpdk/drivers/net/cxgbe/cxgbe_compat.h +++ b/src/dpdk/drivers/net/cxgbe/cxgbe_compat.h @@ -45,6 +45,7 @@ #include <rte_cycles.h> #include <rte_spinlock.h> #include <rte_log.h> +#include <rte_io.h> #define dev_printf(level, fmt, args...) \ RTE_LOG(level, PMD, "rte_cxgbe_pmd: " fmt, ## args) @@ -254,7 +255,7 @@ static inline unsigned long ilog2(unsigned long n) static inline void writel(unsigned int val, volatile void __iomem *addr) { - *(volatile unsigned int *)addr = val; + rte_write32(val, addr); } static inline void writeq(u64 val, volatile void __iomem *addr) @@ -263,4 +264,9 @@ static inline void writeq(u64 val, volatile void __iomem *addr) writel(val >> 32, (void *)((uintptr_t)addr + 4)); } +static inline void writel_relaxed(unsigned int val, volatile void __iomem *addr) +{ + rte_write32_relaxed(val, addr); +} + #endif /* _CXGBE_COMPAT_H_ */ diff --git a/src/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c b/src/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c index 9208a615..4d543a7f 100644 --- a/src/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c +++ b/src/dpdk/drivers/net/cxgbe/cxgbe_ethdev.c @@ -68,7 +68,7 @@ * Macros needed to support the PCI Device ID Table ... */ #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \ - static struct rte_pci_id cxgb4_pci_tbl[] = { + static const struct rte_pci_id cxgb4_pci_tbl[] = { #define CH_PCI_DEVICE_ID_FUNCTION 0x4 #define PCI_VENDOR_ID_CHELSIO 0x1425 @@ -147,6 +147,8 @@ static void cxgbe_dev_info_get(struct rte_eth_dev *eth_dev, .nb_align = 1, }; + device_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device); + device_info->min_rx_bufsize = CXGBE_MIN_RX_BUFSIZE; device_info->max_rx_pktlen = CXGBE_MAX_RX_PKTLEN; device_info->max_rx_queues = max_queues; @@ -1005,7 +1007,7 @@ static int eth_cxgbe_dev_init(struct rte_eth_dev *eth_dev) if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; - pci_dev = eth_dev->pci_dev; + pci_dev = RTE_DEV_TO_PCI(eth_dev->device); snprintf(name, sizeof(name), "cxgbeadapter%d", eth_dev->data->port_id); adapter = rte_zmalloc(name, sizeof(*adapter), 0); @@ -1039,33 +1041,15 @@ out_free_adapter: static struct eth_driver rte_cxgbe_pmd = { .pci_drv = { - .name = "rte_cxgbe_pmd", .id_table = cxgb4_pci_tbl, .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_cxgbe_dev_init, .dev_private_size = sizeof(struct port_info), }; -/* - * Driver initialization routine. - * Invoked once at EAL init time. - * Register itself as the [Poll Mode] Driver of PCI CXGBE devices. - */ -static int rte_cxgbe_pmd_init(const char *name __rte_unused, - const char *params __rte_unused) -{ - CXGBE_FUNC_TRACE(); - - rte_eth_driver_register(&rte_cxgbe_pmd); - return 0; -} - -static struct rte_driver rte_cxgbe_driver = { - .type = PMD_PDEV, - .init = rte_cxgbe_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_cxgbe_driver, cxgb4); -DRIVER_REGISTER_PCI_TABLE(cxgb4, cxgb4_pci_tbl); - +RTE_PMD_REGISTER_PCI(net_cxgbe, rte_cxgbe_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_cxgbe, cxgb4_pci_tbl); +RTE_PMD_REGISTER_KMOD_DEP(net_cxgbe, "* igb_uio | uio_pci_generic | vfio"); diff --git a/src/dpdk/drivers/net/cxgbe/cxgbe_main.c b/src/dpdk/drivers/net/cxgbe/cxgbe_main.c index ceaf5ab2..541fc40e 100644 --- a/src/dpdk/drivers/net/cxgbe/cxgbe_main.c +++ b/src/dpdk/drivers/net/cxgbe/cxgbe_main.c @@ -959,7 +959,7 @@ int setup_rss(struct port_info *pi) dev_debug(adapter, "%s: pi->rss_size = %u; pi->n_rx_qsets = %u\n", __func__, pi->rss_size, pi->n_rx_qsets); - if (!pi->flags & PORT_RSS_DONE) { + if (!(pi->flags & PORT_RSS_DONE)) { if (adapter->flags & FULL_INIT_DONE) { /* Fill default values with equal distribution */ for (j = 0; j < pi->rss_size; j++) @@ -1150,7 +1150,7 @@ int cxgbe_probe(struct adapter *adapter) */ /* reserve an ethdev entry */ - pi->eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_PCI); + pi->eth_dev = rte_eth_dev_allocate(name); if (!pi->eth_dev) goto out_free; @@ -1163,16 +1163,14 @@ int cxgbe_probe(struct adapter *adapter) pi->eth_dev->data = data; allocate_mac: - pi->eth_dev->pci_dev = adapter->pdev; + pi->eth_dev->device = &adapter->pdev->device; pi->eth_dev->data->dev_private = pi; pi->eth_dev->driver = adapter->eth_dev->driver; pi->eth_dev->dev_ops = adapter->eth_dev->dev_ops; pi->eth_dev->tx_pkt_burst = adapter->eth_dev->tx_pkt_burst; pi->eth_dev->rx_pkt_burst = adapter->eth_dev->rx_pkt_burst; - rte_eth_copy_pci_info(pi->eth_dev, pi->eth_dev->pci_dev); - - TAILQ_INIT(&pi->eth_dev->link_intr_cbs); + rte_eth_copy_pci_info(pi->eth_dev, adapter->pdev); pi->eth_dev->data->mac_addrs = rte_zmalloc(name, ETHER_ADDR_LEN, 0); diff --git a/src/dpdk/drivers/net/cxgbe/sge.c b/src/dpdk/drivers/net/cxgbe/sge.c index ab5a842a..37b60904 100644 --- a/src/dpdk/drivers/net/cxgbe/sge.c +++ b/src/dpdk/drivers/net/cxgbe/sge.c @@ -338,12 +338,12 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) * mechanism. */ if (unlikely(!q->bar2_addr)) { - t4_write_reg(adap, MYPF_REG(A_SGE_PF_KDOORBELL), - val | V_QID(q->cntxt_id)); + t4_write_reg_relaxed(adap, MYPF_REG(A_SGE_PF_KDOORBELL), + val | V_QID(q->cntxt_id)); } else { - writel(val | V_QID(q->bar2_qid), - (void *)((uintptr_t)q->bar2_addr + - SGE_UDB_KDOORBELL)); + writel_relaxed(val | V_QID(q->bar2_qid), + (void *)((uintptr_t)q->bar2_addr + + SGE_UDB_KDOORBELL)); /* * This Write memory Barrier will force the write to @@ -890,15 +890,11 @@ static inline int should_tx_packet_coalesce(struct sge_eth_txq *txq, struct sge_txq *q = &txq->q; unsigned int flits, ndesc; unsigned char type = 0; - int credits, hw_cidx = ntohs(q->stat->cidx); - int in_use = q->pidx - hw_cidx + flits_to_desc(q->coalesce.flits); + int credits; /* use coal WR type 1 when no frags are present */ type = (mbuf->nb_segs == 1) ? 1 : 0; - if (in_use < 0) - in_use += q->size; - if (unlikely(type != q->coalesce.type && q->coalesce.idx)) ship_tx_pkt_coalesce_wr(adap, txq); @@ -1645,7 +1641,8 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, iq->size = cxgbe_roundup(iq->size, 16); snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - eth_dev->driver->pci_drv.name, fwevtq ? "fwq_ring" : "rx_ring", + eth_dev->driver->pci_drv.driver.name, + fwevtq ? "fwq_ring" : "rx_ring", eth_dev->data->port_id, queue_id); snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name); @@ -1697,7 +1694,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, fl->size = cxgbe_roundup(fl->size, 8); snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - eth_dev->driver->pci_drv.name, + eth_dev->driver->pci_drv.driver.name, fwevtq ? "fwq_ring" : "fl_ring", eth_dev->data->port_id, queue_id); snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name); @@ -1893,7 +1890,7 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc); snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - eth_dev->driver->pci_drv.name, "tx_ring", + eth_dev->driver->pci_drv.driver.name, "tx_ring", eth_dev->data->port_id, queue_id); snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name); diff --git a/src/dpdk/drivers/net/e1000/base/e1000_82575.c b/src/dpdk/drivers/net/e1000/base/e1000_82575.c index 723885d7..c6400bde 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_82575.c +++ b/src/dpdk/drivers/net/e1000/base/e1000_82575.c @@ -100,7 +100,6 @@ STATIC s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset); STATIC s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw); STATIC s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw); -STATIC void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value); STATIC void e1000_clear_vfta_i350(struct e1000_hw *hw); STATIC void e1000_i2c_start(struct e1000_hw *hw); diff --git a/src/dpdk/drivers/net/e1000/base/e1000_82575.h b/src/dpdk/drivers/net/e1000/base/e1000_82575.h index c4986841..4133cdd8 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_82575.h +++ b/src/dpdk/drivers/net/e1000/base/e1000_82575.h @@ -492,6 +492,7 @@ enum e1000_promisc_type { void e1000_vfta_set_vf(struct e1000_hw *, u16, bool); void e1000_rlpml_set_vf(struct e1000_hw *, u16); s32 e1000_promisc_set_vf(struct e1000_hw *, enum e1000_promisc_type type); +void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value); u16 e1000_rxpbs_adjust_82580(u32 data); s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data); s32 e1000_set_eee_i350(struct e1000_hw *hw, bool adv1G, bool adv100M); diff --git a/src/dpdk/drivers/net/e1000/base/e1000_api.c b/src/dpdk/drivers/net/e1000/base/e1000_api.c index 22b96fd1..f7cf83b6 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_api.c +++ b/src/dpdk/drivers/net/e1000/base/e1000_api.c @@ -298,6 +298,23 @@ s32 e1000_set_mac_type(struct e1000_hw *hw) case E1000_DEV_ID_PCH_I218_V3: mac->type = e1000_pch_lpt; break; + case E1000_DEV_ID_PCH_SPT_I219_LM: + case E1000_DEV_ID_PCH_SPT_I219_V: + case E1000_DEV_ID_PCH_SPT_I219_LM2: + case E1000_DEV_ID_PCH_SPT_I219_V2: + case E1000_DEV_ID_PCH_LBG_I219_LM3: + case E1000_DEV_ID_PCH_SPT_I219_LM4: + case E1000_DEV_ID_PCH_SPT_I219_V4: + case E1000_DEV_ID_PCH_SPT_I219_LM5: + case E1000_DEV_ID_PCH_SPT_I219_V5: + mac->type = e1000_pch_spt; + break; + case E1000_DEV_ID_PCH_CNP_I219_LM6: + case E1000_DEV_ID_PCH_CNP_I219_V6: + case E1000_DEV_ID_PCH_CNP_I219_LM7: + case E1000_DEV_ID_PCH_CNP_I219_V7: + mac->type = e1000_pch_cnp; + break; case E1000_DEV_ID_82575EB_COPPER: case E1000_DEV_ID_82575EB_FIBER_SERDES: case E1000_DEV_ID_82575GB_QUAD_COPPER: @@ -448,6 +465,8 @@ s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device) case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: e1000_init_function_pointers_ich8lan(hw); break; case e1000_82575: @@ -632,8 +651,6 @@ s32 e1000_reset_hw(struct e1000_hw *hw) return -E1000_ERR_CONFIG; } -//TREX_PATCH -extern int eal_err_read_from_file_is_error; /** * e1000_init_hw - Initialize hardware * @hw: pointer to the HW structure @@ -643,9 +660,6 @@ extern int eal_err_read_from_file_is_error; **/ s32 e1000_init_hw(struct e1000_hw *hw) { - //TREX_PATCH - eal_err_read_from_file_is_error = 0; - if (hw->mac.ops.init_hw) return hw->mac.ops.init_hw(hw); diff --git a/src/dpdk/drivers/net/e1000/base/e1000_defines.h b/src/dpdk/drivers/net/e1000/base/e1000_defines.h index 69aa1f23..dbc2bbbe 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_defines.h +++ b/src/dpdk/drivers/net/e1000/base/e1000_defines.h @@ -198,6 +198,7 @@ POSSIBILITY OF SUCH DAMAGE. #define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */ #define E1000_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */ #define E1000_RCTL_RDMTS_HEX 0x00010000 +#define E1000_RCTL_RDMTS1_HEX E1000_RCTL_RDMTS_HEX #define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ #define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */ #define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ @@ -468,6 +469,8 @@ POSSIBILITY OF SUCH DAMAGE. #define ETHERNET_FCS_SIZE 4 #define MAX_JUMBO_FRAME_SIZE 0x3F00 +/* The datasheet maximum supported RX size is 9.5KB (9728 bytes) */ +#define MAX_RX_JUMBO_FRAME_SIZE 0x2600 #define E1000_TX_PTR_GAP 0x1F /* Extended Configuration Control and Size */ @@ -751,6 +754,12 @@ POSSIBILITY OF SUCH DAMAGE. #define E1000_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ #define E1000_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */ +/* HH Time Sync */ +#define E1000_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK 0x0000F000 /* max delay */ +#define E1000_TSYNCTXCTL_SYNC_COMP_ERR 0x20000000 /* sync err */ +#define E1000_TSYNCTXCTL_SYNC_COMP 0x40000000 /* sync complete */ +#define E1000_TSYNCTXCTL_START_SYNC 0x80000000 /* initiate sync */ + #define E1000_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */ #define E1000_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */ #define E1000_TSYNCRXCTL_TYPE_L2_V2 0x00 diff --git a/src/dpdk/drivers/net/e1000/base/e1000_hw.h b/src/dpdk/drivers/net/e1000/base/e1000_hw.h index e4e4f764..d9de9fc1 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_hw.h +++ b/src/dpdk/drivers/net/e1000/base/e1000_hw.h @@ -136,6 +136,19 @@ struct e1000_hw; #define E1000_DEV_ID_PCH_I218_V2 0x15A1 #define E1000_DEV_ID_PCH_I218_LM3 0x15A2 /* Wildcat Point PCH */ #define E1000_DEV_ID_PCH_I218_V3 0x15A3 /* Wildcat Point PCH */ +#define E1000_DEV_ID_PCH_SPT_I219_LM 0x156F /* Sunrise Point PCH */ +#define E1000_DEV_ID_PCH_SPT_I219_V 0x1570 /* Sunrise Point PCH */ +#define E1000_DEV_ID_PCH_SPT_I219_LM2 0x15B7 /* Sunrise Point-H PCH */ +#define E1000_DEV_ID_PCH_SPT_I219_V2 0x15B8 /* Sunrise Point-H PCH */ +#define E1000_DEV_ID_PCH_LBG_I219_LM3 0x15B9 /* LEWISBURG PCH */ +#define E1000_DEV_ID_PCH_SPT_I219_LM4 0x15D7 +#define E1000_DEV_ID_PCH_SPT_I219_V4 0x15D8 +#define E1000_DEV_ID_PCH_SPT_I219_LM5 0x15E3 +#define E1000_DEV_ID_PCH_SPT_I219_V5 0x15D6 +#define E1000_DEV_ID_PCH_CNP_I219_LM6 0x15BD +#define E1000_DEV_ID_PCH_CNP_I219_V6 0x15BE +#define E1000_DEV_ID_PCH_CNP_I219_LM7 0x15BB +#define E1000_DEV_ID_PCH_CNP_I219_V7 0x15BC #define E1000_DEV_ID_82576 0x10C9 #define E1000_DEV_ID_82576_FIBER 0x10E6 #define E1000_DEV_ID_82576_SERDES 0x10E7 @@ -221,6 +234,8 @@ enum e1000_mac_type { e1000_pchlan, e1000_pch2lan, e1000_pch_lpt, + e1000_pch_spt, + e1000_pch_cnp, e1000_82575, e1000_82576, e1000_82580, @@ -950,11 +965,15 @@ struct e1000_dev_spec_ich8lan { E1000_MUTEX nvm_mutex; E1000_MUTEX swflag_mutex; bool nvm_k1_enabled; + bool disable_k1_off; bool eee_disable; u16 eee_lp_ability; #ifdef ULP_SUPPORT enum e1000_ulp_state ulp_state; -#endif /* NAHUM6LP_HW && ULP_SUPPORT */ + bool ulp_capability_disabled; + bool during_suspend_flow; + bool during_dpg_exit; +#endif /* ULP_SUPPORT */ u16 lat_enc; u16 max_ltr_enc; bool smbus_disable; diff --git a/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.c b/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.c index 89d07e90..6dd046d2 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.c +++ b/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.c @@ -94,10 +94,13 @@ STATIC s32 e1000_set_d3_lplu_state_ich8lan(struct e1000_hw *hw, bool active); STATIC s32 e1000_read_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); +STATIC s32 e1000_read_nvm_spt(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data); STATIC s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); STATIC s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw); STATIC s32 e1000_update_nvm_checksum_ich8lan(struct e1000_hw *hw); +STATIC s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw); STATIC s32 e1000_valid_led_default_ich8lan(struct e1000_hw *hw, u16 *data); STATIC s32 e1000_id_led_init_pchlan(struct e1000_hw *hw); @@ -125,6 +128,14 @@ STATIC s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, u8 *data); STATIC s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, u8 size, u16 *data); +STATIC s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, + u32 *data); +STATIC s32 e1000_read_flash_dword_ich8lan(struct e1000_hw *hw, + u32 offset, u32 *data); +STATIC s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, + u32 offset, u32 data); +STATIC s32 e1000_retry_write_flash_dword_ich8lan(struct e1000_hw *hw, + u32 offset, u32 dword); STATIC s32 e1000_read_flash_word_ich8lan(struct e1000_hw *hw, u32 offset, u16 *data); STATIC s32 e1000_retry_write_flash_byte_ich8lan(struct e1000_hw *hw, @@ -233,7 +244,7 @@ STATIC bool e1000_phy_is_accessible_pchlan(struct e1000_hw *hw) if (ret_val) return false; out: - if (hw->mac.type == e1000_pch_lpt) { + if (hw->mac.type >= e1000_pch_lpt) { /* Only unforce SMBus if ME is not active */ if (!(E1000_READ_REG(hw, E1000_FWSM) & E1000_ICH_FWSM_FW_VALID)) { @@ -277,7 +288,7 @@ STATIC void e1000_toggle_lanphypc_pch_lpt(struct e1000_hw *hw) mac_reg &= ~E1000_CTRL_LANPHYPC_VALUE; E1000_WRITE_REG(hw, E1000_CTRL, mac_reg); E1000_WRITE_FLUSH(hw); - usec_delay(10); + msec_delay(1); mac_reg &= ~E1000_CTRL_LANPHYPC_OVERRIDE; E1000_WRITE_REG(hw, E1000_CTRL, mac_reg); E1000_WRITE_FLUSH(hw); @@ -334,6 +345,8 @@ STATIC s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw) */ switch (hw->mac.type) { case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: if (e1000_phy_is_accessible_pchlan(hw)) break; @@ -481,6 +494,8 @@ STATIC s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) /* fall-through */ case e1000_pch2lan: case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: /* In case the PHY needs to be in mdio slow mode, * set slow mode and try to get the PHY id again. */ @@ -623,36 +638,57 @@ STATIC s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw) struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; u32 gfpreg, sector_base_addr, sector_end_addr; u16 i; + u32 nvm_size; DEBUGFUNC("e1000_init_nvm_params_ich8lan"); - /* Can't read flash registers if the register set isn't mapped. */ nvm->type = e1000_nvm_flash_sw; - if (!hw->flash_address) { - DEBUGOUT("ERROR: Flash registers not mapped\n"); - return -E1000_ERR_CONFIG; - } - gfpreg = E1000_READ_FLASH_REG(hw, ICH_FLASH_GFPREG); + if (hw->mac.type >= e1000_pch_spt) { + /* in SPT, gfpreg doesn't exist. NVM size is taken from the + * STRAP register. This is because in SPT the GbE Flash region + * is no longer accessed through the flash registers. Instead, + * the mechanism has changed, and the Flash region access + * registers are now implemented in GbE memory space. + */ + nvm->flash_base_addr = 0; + nvm_size = + (((E1000_READ_REG(hw, E1000_STRAP) >> 1) & 0x1F) + 1) + * NVM_SIZE_MULTIPLIER; + nvm->flash_bank_size = nvm_size / 2; + /* Adjust to word count */ + nvm->flash_bank_size /= sizeof(u16); + /* Set the base address for flash register access */ + hw->flash_address = hw->hw_addr + E1000_FLASH_BASE_ADDR; + } else { + /* Can't read flash registers if register set isn't mapped. */ + if (!hw->flash_address) { + DEBUGOUT("ERROR: Flash registers not mapped\n"); + return -E1000_ERR_CONFIG; + } - /* sector_X_addr is a "sector"-aligned address (4096 bytes) - * Add 1 to sector_end_addr since this sector is included in - * the overall size. - */ - sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK; - sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1; + gfpreg = E1000_READ_FLASH_REG(hw, ICH_FLASH_GFPREG); - /* flash_base_addr is byte-aligned */ - nvm->flash_base_addr = sector_base_addr << FLASH_SECTOR_ADDR_SHIFT; + /* sector_X_addr is a "sector"-aligned address (4096 bytes) + * Add 1 to sector_end_addr since this sector is included in + * the overall size. + */ + sector_base_addr = gfpreg & FLASH_GFPREG_BASE_MASK; + sector_end_addr = ((gfpreg >> 16) & FLASH_GFPREG_BASE_MASK) + 1; - /* find total size of the NVM, then cut in half since the total - * size represents two separate NVM banks. - */ - nvm->flash_bank_size = ((sector_end_addr - sector_base_addr) - << FLASH_SECTOR_ADDR_SHIFT); - nvm->flash_bank_size /= 2; - /* Adjust to word count */ - nvm->flash_bank_size /= sizeof(u16); + /* flash_base_addr is byte-aligned */ + nvm->flash_base_addr = sector_base_addr + << FLASH_SECTOR_ADDR_SHIFT; + + /* find total size of the NVM, then cut in half since the total + * size represents two separate NVM banks. + */ + nvm->flash_bank_size = ((sector_end_addr - sector_base_addr) + << FLASH_SECTOR_ADDR_SHIFT); + nvm->flash_bank_size /= 2; + /* Adjust to word count */ + nvm->flash_bank_size /= sizeof(u16); + } nvm->word_size = E1000_SHADOW_RAM_WORDS; @@ -668,8 +704,13 @@ STATIC s32 e1000_init_nvm_params_ich8lan(struct e1000_hw *hw) /* Function Pointers */ nvm->ops.acquire = e1000_acquire_nvm_ich8lan; nvm->ops.release = e1000_release_nvm_ich8lan; - nvm->ops.read = e1000_read_nvm_ich8lan; - nvm->ops.update = e1000_update_nvm_checksum_ich8lan; + if (hw->mac.type >= e1000_pch_spt) { + nvm->ops.read = e1000_read_nvm_spt; + nvm->ops.update = e1000_update_nvm_checksum_spt; + } else { + nvm->ops.read = e1000_read_nvm_ich8lan; + nvm->ops.update = e1000_update_nvm_checksum_ich8lan; + } nvm->ops.valid_led_default = e1000_valid_led_default_ich8lan; nvm->ops.validate = e1000_validate_nvm_checksum_ich8lan; nvm->ops.write = e1000_write_nvm_ich8lan; @@ -758,6 +799,8 @@ STATIC s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) mac->ops.rar_set = e1000_rar_set_pch2lan; /* fall-through */ case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: #ifndef NO_NON_BLOCKING_PHY_MTA_UPDATE_SUPPORT /* multicast address update for pch2 */ mac->ops.update_mc_addr_list = @@ -768,7 +811,13 @@ STATIC s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) #if defined(QV_RELEASE) || !defined(NO_PCH_LPT_B0_SUPPORT) /* save PCH revision_id */ e1000_read_pci_cfg(hw, E1000_PCI_REVISION_ID_REG, &pci_cfg); - hw->revision_id = (u8)(pci_cfg &= 0x000F); + /* SPT uses full byte for revision ID, + * as opposed to previous generations + */ + if (hw->mac.type >= e1000_pch_spt) + hw->revision_id = (u8)(pci_cfg &= 0x00FF); + else + hw->revision_id = (u8)(pci_cfg &= 0x000F); #endif /* QV_RELEASE || !defined(NO_PCH_LPT_B0_SUPPORT) */ /* check management mode */ mac->ops.check_mng_mode = e1000_check_mng_mode_pchlan; @@ -786,7 +835,7 @@ STATIC s32 e1000_init_mac_params_ich8lan(struct e1000_hw *hw) break; } - if (mac->type == e1000_pch_lpt) { + if (mac->type >= e1000_pch_lpt) { mac->rar_entry_count = E1000_PCH_LPT_RAR_ENTRIES; mac->ops.rar_set = e1000_rar_set_pch_lpt; mac->ops.setup_physical_interface = e1000_setup_copper_link_pch_lpt; @@ -1015,8 +1064,9 @@ release: /* clear FEXTNVM6 bit 8 on link down or 10/100 */ fextnvm6 &= ~E1000_FEXTNVM6_REQ_PLL_CLK; - if (!link || ((status & E1000_STATUS_SPEED_100) && - (status & E1000_STATUS_FD))) + if ((hw->phy.revision > 5) || !link || + ((status & E1000_STATUS_SPEED_100) && + (status & E1000_STATUS_FD))) goto update_fextnvm6; ret_val = hw->phy.ops.read_reg(hw, I217_INBAND_CTRL, ®); @@ -1068,6 +1118,7 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) u32 mac_reg; s32 ret_val = E1000_SUCCESS; u16 phy_reg; + u16 oem_reg = 0; if ((hw->mac.type < e1000_pch_lpt) || (hw->device_id == E1000_DEV_ID_PCH_LPT_I217_LM) || @@ -1128,6 +1179,25 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx) mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS; E1000_WRITE_REG(hw, E1000_CTRL_EXT, mac_reg); + /* Si workaround for ULP entry flow on i127/rev6 h/w. Enable + * LPLU and disable Gig speed when entering ULP + */ + if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6)) { + ret_val = e1000_read_phy_reg_hv_locked(hw, HV_OEM_BITS, + &oem_reg); + if (ret_val) + goto release; + + phy_reg = oem_reg; + phy_reg |= HV_OEM_BITS_LPLU | HV_OEM_BITS_GBE_DIS; + + ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS, + phy_reg); + + if (ret_val) + goto release; + } + skip_smbus: if (!to_sx) { /* Change the 'Link Status Change' interrupt to trigger @@ -1184,6 +1254,14 @@ skip_smbus: E1000_WRITE_REG(hw, E1000_TCTL, mac_reg); } + if ((hw->phy.type == e1000_phy_i217) && (hw->phy.revision == 6) && + to_sx && (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) { + ret_val = e1000_write_phy_reg_hv_locked(hw, HV_OEM_BITS, + oem_reg); + if (ret_val) + goto release; + } + release: hw->phy.ops.release(hw); out: @@ -1240,10 +1318,10 @@ s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) E1000_WRITE_REG(hw, E1000_H2ME, mac_reg); } - /* Poll up to 100msec for ME to clear ULP_CFG_DONE */ + /* Poll up to 300msec for ME to clear ULP_CFG_DONE. */ while (E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_ULP_CFG_DONE) { - if (i++ == 10) { + if (i++ == 30) { ret_val = -E1000_ERR_PHY; goto out; } @@ -1343,6 +1421,8 @@ s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) I218_ULP_CONFIG1_RESET_TO_SMBUS | I218_ULP_CONFIG1_WOL_HOST | I218_ULP_CONFIG1_INBAND_EXIT | + I218_ULP_CONFIG1_EN_ULP_LANPHYPC | + I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST | I218_ULP_CONFIG1_DISABLE_SMB_PERST); e1000_write_phy_reg_hv_locked(hw, I218_ULP_CONFIG1, phy_reg); @@ -1360,6 +1440,8 @@ s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force) if (hw->mac.autoneg) e1000_phy_setup_autoneg(hw); + else + e1000_setup_copper_link_generic(hw); e1000_sw_lcd_config_ich8lan(hw); @@ -1397,6 +1479,8 @@ out: } #endif /* ULP_SUPPORT */ + + /** * e1000_check_for_copper_link_ich8lan - Check for link (Copper) * @hw: pointer to the HW structure @@ -1456,8 +1540,7 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * aggressive resulting in many collisions. To avoid this, increase * the IPG and reduce Rx latency in the PHY. */ - if (((hw->mac.type == e1000_pch2lan) || - (hw->mac.type == e1000_pch_lpt)) && link) { + if ((hw->mac.type >= e1000_pch2lan) && link) { u16 speed, duplex; e1000_get_speed_and_duplex_copper_generic(hw, &speed, &duplex); @@ -1468,6 +1551,10 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) tipg_reg |= 0xFF; /* Reduce Rx latency in analog PHY */ emi_val = 0; + } else if (hw->mac.type >= e1000_pch_spt && + duplex == FULL_DUPLEX && speed != SPEED_1000) { + tipg_reg |= 0xC; + emi_val = 1; } else { /* Roll back the default values */ tipg_reg |= 0x08; @@ -1486,10 +1573,78 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) emi_addr = I217_RX_CONFIG; ret_val = e1000_write_emi_reg_locked(hw, emi_addr, emi_val); + + if (hw->mac.type >= e1000_pch_lpt) { + u16 phy_reg; + + hw->phy.ops.read_reg_locked(hw, I217_PLL_CLOCK_GATE_REG, + &phy_reg); + phy_reg &= ~I217_PLL_CLOCK_GATE_MASK; + if (speed == SPEED_100 || speed == SPEED_10) + phy_reg |= 0x3E8; + else + phy_reg |= 0xFA; + hw->phy.ops.write_reg_locked(hw, + I217_PLL_CLOCK_GATE_REG, + phy_reg); + + if (speed == SPEED_1000) { + hw->phy.ops.read_reg_locked(hw, HV_PM_CTRL, + &phy_reg); + + phy_reg |= HV_PM_CTRL_K1_CLK_REQ; + + hw->phy.ops.write_reg_locked(hw, HV_PM_CTRL, + phy_reg); + } + } hw->phy.ops.release(hw); if (ret_val) return ret_val; + + if (hw->mac.type >= e1000_pch_spt) { + u16 data; + u16 ptr_gap; + + if (speed == SPEED_1000) { + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = hw->phy.ops.read_reg_locked(hw, + PHY_REG(776, 20), + &data); + if (ret_val) { + hw->phy.ops.release(hw); + return ret_val; + } + + ptr_gap = (data & (0x3FF << 2)) >> 2; + if (ptr_gap < 0x18) { + data &= ~(0x3FF << 2); + data |= (0x18 << 2); + ret_val = + hw->phy.ops.write_reg_locked(hw, + PHY_REG(776, 20), data); + } + hw->phy.ops.release(hw); + if (ret_val) + return ret_val; + } else { + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = hw->phy.ops.write_reg_locked(hw, + PHY_REG(776, 20), + 0xC023); + hw->phy.ops.release(hw); + if (ret_val) + return ret_val; + + } + } } /* I217 Packet Loss issue: @@ -1497,7 +1652,7 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) * on power up. * Set the Beacon Duration for I217 to 8 usec */ - if (hw->mac.type == e1000_pch_lpt) { + if (hw->mac.type >= e1000_pch_lpt) { u32 mac_reg; mac_reg = E1000_READ_REG(hw, E1000_FEXTNVM4); @@ -1519,10 +1674,29 @@ STATIC s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) hw->dev_spec.ich8lan.eee_lp_ability = 0; /* Configure K0s minimum time */ - if (hw->mac.type == e1000_pch_lpt) { + if (hw->mac.type >= e1000_pch_lpt) { e1000_configure_k0s_lpt(hw, K1_ENTRY_LATENCY, K1_MIN_TIME); } + if (hw->mac.type >= e1000_pch_lpt) { + u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6); + + if (hw->mac.type == e1000_pch_spt) { + /* FEXTNVM6 K1-off workaround - for SPT only */ + u32 pcieanacfg = E1000_READ_REG(hw, E1000_PCIEANACFG); + + if (pcieanacfg & E1000_FEXTNVM6_K1_OFF_ENABLE) + fextnvm6 |= E1000_FEXTNVM6_K1_OFF_ENABLE; + else + fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE; + } + + if (hw->dev_spec.ich8lan.disable_k1_off == true) + fextnvm6 &= ~E1000_FEXTNVM6_K1_OFF_ENABLE; + + E1000_WRITE_REG(hw, E1000_FEXTNVM6, fextnvm6); + } + if (!link) return E1000_SUCCESS; /* No link detected */ @@ -1616,6 +1790,8 @@ void e1000_init_function_pointers_ich8lan(struct e1000_hw *hw) case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: hw->phy.ops.init_params = e1000_init_phy_params_pchlan; break; default: @@ -2081,6 +2257,8 @@ STATIC s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG_ICH8M; break; default: @@ -3204,6 +3382,41 @@ STATIC s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) DEBUGFUNC("e1000_valid_nvm_bank_detect_ich8lan"); switch (hw->mac.type) { + case e1000_pch_spt: + case e1000_pch_cnp: + bank1_offset = nvm->flash_bank_size; + act_offset = E1000_ICH_NVM_SIG_WORD; + + /* set bank to 0 in case flash read fails */ + *bank = 0; + + /* Check bank 0 */ + ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, + &nvm_dword); + if (ret_val) + return ret_val; + sig_byte = (u8)((nvm_dword & 0xFF00) >> 8); + if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == + E1000_ICH_NVM_SIG_VALUE) { + *bank = 0; + return E1000_SUCCESS; + } + + /* Check bank 1 */ + ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset + + bank1_offset, + &nvm_dword); + if (ret_val) + return ret_val; + sig_byte = (u8)((nvm_dword & 0xFF00) >> 8); + if ((sig_byte & E1000_ICH_NVM_VALID_SIG_MASK) == + E1000_ICH_NVM_SIG_VALUE) { + *bank = 1; + return E1000_SUCCESS; + } + + DEBUGOUT("ERROR: No valid NVM bank present\n"); + return -E1000_ERR_NVM; case e1000_ich8lan: case e1000_ich9lan: eecd = E1000_READ_REG(hw, E1000_EECD); @@ -3251,6 +3464,99 @@ STATIC s32 e1000_valid_nvm_bank_detect_ich8lan(struct e1000_hw *hw, u32 *bank) } /** + * e1000_read_nvm_spt - NVM access for SPT + * @hw: pointer to the HW structure + * @offset: The offset (in bytes) of the word(s) to read. + * @words: Size of data to read in words. + * @data: pointer to the word(s) to read at offset. + * + * Reads a word(s) from the NVM + **/ +STATIC s32 e1000_read_nvm_spt(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; + u32 act_offset; + s32 ret_val = E1000_SUCCESS; + u32 bank = 0; + u32 dword = 0; + u16 offset_to_read; + u16 i; + + DEBUGFUNC("e1000_read_nvm_spt"); + + if ((offset >= nvm->word_size) || (words > nvm->word_size - offset) || + (words == 0)) { + DEBUGOUT("nvm parameter(s) out of bounds\n"); + ret_val = -E1000_ERR_NVM; + goto out; + } + + nvm->ops.acquire(hw); + + ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); + if (ret_val != E1000_SUCCESS) { + DEBUGOUT("Could not detect valid bank, assuming bank 0\n"); + bank = 0; + } + + act_offset = (bank) ? nvm->flash_bank_size : 0; + act_offset += offset; + + ret_val = E1000_SUCCESS; + + for (i = 0; i < words; i += 2) { + if (words - i == 1) { + if (dev_spec->shadow_ram[offset+i].modified) { + data[i] = dev_spec->shadow_ram[offset+i].value; + } else { + offset_to_read = act_offset + i - + ((act_offset + i) % 2); + ret_val = + e1000_read_flash_dword_ich8lan(hw, + offset_to_read, + &dword); + if (ret_val) + break; + if ((act_offset + i) % 2 == 0) + data[i] = (u16)(dword & 0xFFFF); + else + data[i] = (u16)((dword >> 16) & 0xFFFF); + } + } else { + offset_to_read = act_offset + i; + if (!(dev_spec->shadow_ram[offset+i].modified) || + !(dev_spec->shadow_ram[offset+i+1].modified)) { + ret_val = + e1000_read_flash_dword_ich8lan(hw, + offset_to_read, + &dword); + if (ret_val) + break; + } + if (dev_spec->shadow_ram[offset+i].modified) + data[i] = dev_spec->shadow_ram[offset+i].value; + else + data[i] = (u16) (dword & 0xFFFF); + if (dev_spec->shadow_ram[offset+i].modified) + data[i+1] = + dev_spec->shadow_ram[offset+i+1].value; + else + data[i+1] = (u16) (dword >> 16 & 0xFFFF); + } + } + + nvm->ops.release(hw); + +out: + if (ret_val) + DEBUGOUT1("NVM read error: %d\n", ret_val); + + return ret_val; +} + +/** * e1000_read_nvm_ich8lan - Read word(s) from the NVM * @hw: pointer to the HW structure * @offset: The offset (in bytes) of the word(s) to read. @@ -3337,7 +3643,11 @@ STATIC s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) /* Clear FCERR and DAEL in hw status by writing 1 */ hsfsts.hsf_status.flcerr = 1; hsfsts.hsf_status.dael = 1; - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval); + if (hw->mac.type >= e1000_pch_spt) + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + hsfsts.regval & 0xFFFF); + else + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval); /* Either we should have a hardware SPI cycle in progress * bit to check against, in order to start a new cycle or @@ -3353,7 +3663,12 @@ STATIC s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) * Begin by setting Flash Cycle Done. */ hsfsts.hsf_status.flcdone = 1; - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, hsfsts.regval); + if (hw->mac.type >= e1000_pch_spt) + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + hsfsts.regval & 0xFFFF); + else + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, + hsfsts.regval); ret_val = E1000_SUCCESS; } else { s32 i; @@ -3375,8 +3690,12 @@ STATIC s32 e1000_flash_cycle_init_ich8lan(struct e1000_hw *hw) * now set the Flash Cycle Done. */ hsfsts.hsf_status.flcdone = 1; - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, - hsfsts.regval); + if (hw->mac.type >= e1000_pch_spt) + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + hsfsts.regval & 0xFFFF); + else + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFSTS, + hsfsts.regval); } else { DEBUGOUT("Flash controller busy, cannot get access\n"); } @@ -3401,10 +3720,17 @@ STATIC s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout) DEBUGFUNC("e1000_flash_cycle_ich8lan"); /* Start a cycle by writing 1 in Flash Cycle Go in Hw Flash Control */ - hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); + if (hw->mac.type >= e1000_pch_spt) + hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; + else + hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); hsflctl.hsf_ctrl.flcgo = 1; - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); + if (hw->mac.type >= e1000_pch_spt) + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + hsflctl.regval << 16); + else + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); /* wait till FDONE bit is set to 1 */ do { @@ -3421,6 +3747,29 @@ STATIC s32 e1000_flash_cycle_ich8lan(struct e1000_hw *hw, u32 timeout) } /** + * e1000_read_flash_dword_ich8lan - Read dword from flash + * @hw: pointer to the HW structure + * @offset: offset to data location + * @data: pointer to the location for storing the data + * + * Reads the flash dword at offset into data. Offset is converted + * to bytes before read. + **/ +STATIC s32 e1000_read_flash_dword_ich8lan(struct e1000_hw *hw, u32 offset, + u32 *data) +{ + DEBUGFUNC("e1000_read_flash_dword_ich8lan"); + + if (!data) + return -E1000_ERR_NVM; + + /* Must convert word offset into bytes. */ + offset <<= 1; + + return e1000_read_flash_data32_ich8lan(hw, offset, data); +} + +/** * e1000_read_flash_word_ich8lan - Read word from flash * @hw: pointer to the HW structure * @offset: offset to data location @@ -3457,7 +3806,13 @@ STATIC s32 e1000_read_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, s32 ret_val; u16 word = 0; - ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word); + /* In SPT, only 32 bits access is supported, + * so this function should not be called. + */ + if (hw->mac.type >= e1000_pch_spt) + return -E1000_ERR_NVM; + else + ret_val = e1000_read_flash_data_ich8lan(hw, offset, 1, &word); if (ret_val) return ret_val; @@ -3543,6 +3898,83 @@ STATIC s32 e1000_read_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, return ret_val; } +/** + * e1000_read_flash_data32_ich8lan - Read dword from NVM + * @hw: pointer to the HW structure + * @offset: The offset (in bytes) of the dword to read. + * @data: Pointer to the dword to store the value read. + * + * Reads a byte or word from the NVM using the flash access registers. + **/ +STATIC s32 e1000_read_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, + u32 *data) +{ + union ich8_hws_flash_status hsfsts; + union ich8_hws_flash_ctrl hsflctl; + u32 flash_linear_addr; + s32 ret_val = -E1000_ERR_NVM; + u8 count = 0; + + DEBUGFUNC("e1000_read_flash_data_ich8lan"); + + if (offset > ICH_FLASH_LINEAR_ADDR_MASK || + hw->mac.type < e1000_pch_spt) + return -E1000_ERR_NVM; + flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + + hw->nvm.flash_base_addr); + + do { + usec_delay(1); + /* Steps */ + ret_val = e1000_flash_cycle_init_ich8lan(hw); + if (ret_val != E1000_SUCCESS) + break; + /* In SPT, This register is in Lan memory space, not flash. + * Therefore, only 32 bit access is supported + */ + hsflctl.regval = E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; + + /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ + hsflctl.hsf_ctrl.fldbcount = sizeof(u32) - 1; + hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_READ; + /* In SPT, This register is in Lan memory space, not flash. + * Therefore, only 32 bit access is supported + */ + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + (u32)hsflctl.regval << 16); + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); + + ret_val = e1000_flash_cycle_ich8lan(hw, + ICH_FLASH_READ_COMMAND_TIMEOUT); + + /* Check if FCERR is set to 1, if set to 1, clear it + * and try the whole sequence a few more times, else + * read in (shift in) the Flash Data0, the order is + * least significant byte first msb to lsb + */ + if (ret_val == E1000_SUCCESS) { + *data = E1000_READ_FLASH_REG(hw, ICH_FLASH_FDATA0); + break; + } else { + /* If we've gotten here, then things are probably + * completely hosed, but if the error condition is + * detected, it won't hurt to give it another try... + * ICH_FLASH_CYCLE_REPEAT_COUNT times. + */ + hsfsts.regval = E1000_READ_FLASH_REG16(hw, + ICH_FLASH_HSFSTS); + if (hsfsts.hsf_status.flcerr) { + /* Repeat for some time before giving up. */ + continue; + } else if (!hsfsts.hsf_status.flcdone) { + DEBUGOUT("Timeout error - flash cycle did not complete.\n"); + break; + } + } + } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); + + return ret_val; +} /** * e1000_write_nvm_ich8lan - Write word(s) to the NVM @@ -3581,6 +4013,175 @@ STATIC s32 e1000_write_nvm_ich8lan(struct e1000_hw *hw, u16 offset, u16 words, } /** + * e1000_update_nvm_checksum_spt - Update the checksum for NVM + * @hw: pointer to the HW structure + * + * The NVM checksum is updated by calling the generic update_nvm_checksum, + * which writes the checksum to the shadow ram. The changes in the shadow + * ram are then committed to the EEPROM by processing each bank at a time + * checking for the modified bit and writing only the pending changes. + * After a successful commit, the shadow ram is cleared and is ready for + * future writes. + **/ +STATIC s32 e1000_update_nvm_checksum_spt(struct e1000_hw *hw) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + struct e1000_dev_spec_ich8lan *dev_spec = &hw->dev_spec.ich8lan; + u32 i, act_offset, new_bank_offset, old_bank_offset, bank; + s32 ret_val; + u32 dword = 0; + + DEBUGFUNC("e1000_update_nvm_checksum_spt"); + + ret_val = e1000_update_nvm_checksum_generic(hw); + if (ret_val) + goto out; + + if (nvm->type != e1000_nvm_flash_sw) + goto out; + + nvm->ops.acquire(hw); + + /* We're writing to the opposite bank so if we're on bank 1, + * write to bank 0 etc. We also need to erase the segment that + * is going to be written + */ + ret_val = e1000_valid_nvm_bank_detect_ich8lan(hw, &bank); + if (ret_val != E1000_SUCCESS) { + DEBUGOUT("Could not detect valid bank, assuming bank 0\n"); + bank = 0; + } + + if (bank == 0) { + new_bank_offset = nvm->flash_bank_size; + old_bank_offset = 0; + ret_val = e1000_erase_flash_bank_ich8lan(hw, 1); + if (ret_val) + goto release; + } else { + old_bank_offset = nvm->flash_bank_size; + new_bank_offset = 0; + ret_val = e1000_erase_flash_bank_ich8lan(hw, 0); + if (ret_val) + goto release; + } + for (i = 0; i < E1000_SHADOW_RAM_WORDS; i += 2) { + /* Determine whether to write the value stored + * in the other NVM bank or a modified value stored + * in the shadow RAM + */ + ret_val = e1000_read_flash_dword_ich8lan(hw, + i + old_bank_offset, + &dword); + + if (dev_spec->shadow_ram[i].modified) { + dword &= 0xffff0000; + dword |= (dev_spec->shadow_ram[i].value & 0xffff); + } + if (dev_spec->shadow_ram[i + 1].modified) { + dword &= 0x0000ffff; + dword |= ((dev_spec->shadow_ram[i + 1].value & 0xffff) + << 16); + } + if (ret_val) + break; + + /* If the word is 0x13, then make sure the signature bits + * (15:14) are 11b until the commit has completed. + * This will allow us to write 10b which indicates the + * signature is valid. We want to do this after the write + * has completed so that we don't mark the segment valid + * while the write is still in progress + */ + if (i == E1000_ICH_NVM_SIG_WORD - 1) + dword |= E1000_ICH_NVM_SIG_MASK << 16; + + /* Convert offset to bytes. */ + act_offset = (i + new_bank_offset) << 1; + + usec_delay(100); + + /* Write the data to the new bank. Offset in words*/ + act_offset = i + new_bank_offset; + ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, + dword); + if (ret_val) + break; + } + + /* Don't bother writing the segment valid bits if sector + * programming failed. + */ + if (ret_val) { + DEBUGOUT("Flash commit failed.\n"); + goto release; + } + + /* Finally validate the new segment by setting bit 15:14 + * to 10b in word 0x13 , this can be done without an + * erase as well since these bits are 11 to start with + * and we need to change bit 14 to 0b + */ + act_offset = new_bank_offset + E1000_ICH_NVM_SIG_WORD; + + /*offset in words but we read dword*/ + --act_offset; + ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword); + + if (ret_val) + goto release; + + dword &= 0xBFFFFFFF; + ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword); + + if (ret_val) + goto release; + + /* And invalidate the previously valid segment by setting + * its signature word (0x13) high_byte to 0b. This can be + * done without an erase because flash erase sets all bits + * to 1's. We can write 1's to 0's without an erase + */ + act_offset = (old_bank_offset + E1000_ICH_NVM_SIG_WORD) * 2 + 1; + + /* offset in words but we read dword*/ + act_offset = old_bank_offset + E1000_ICH_NVM_SIG_WORD - 1; + ret_val = e1000_read_flash_dword_ich8lan(hw, act_offset, &dword); + + if (ret_val) + goto release; + + dword &= 0x00FFFFFF; + ret_val = e1000_retry_write_flash_dword_ich8lan(hw, act_offset, dword); + + if (ret_val) + goto release; + + /* Great! Everything worked, we can now clear the cached entries. */ + for (i = 0; i < E1000_SHADOW_RAM_WORDS; i++) { + dev_spec->shadow_ram[i].modified = false; + dev_spec->shadow_ram[i].value = 0xFFFF; + } + +release: + nvm->ops.release(hw); + + /* Reload the EEPROM, or else modifications will not appear + * until after the next adapter reset. + */ + if (!ret_val) { + nvm->ops.reload(hw); + msec_delay(10); + } + +out: + if (ret_val) + DEBUGOUT1("NVM update error: %d\n", ret_val); + + return ret_val; +} + +/** * e1000_update_nvm_checksum_ich8lan - Update the checksum for NVM * @hw: pointer to the HW structure * @@ -3757,6 +4358,8 @@ STATIC s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) */ switch (hw->mac.type) { case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: word = NVM_COMPAT; valid_csum_mask = NVM_COMPAT_VALID_CSUM; break; @@ -3804,8 +4407,13 @@ STATIC s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, DEBUGFUNC("e1000_write_ich8_data"); - if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK) - return -E1000_ERR_NVM; + if (hw->mac.type >= e1000_pch_spt) { + if (size != 4 || offset > ICH_FLASH_LINEAR_ADDR_MASK) + return -E1000_ERR_NVM; + } else { + if (size < 1 || size > 2 || offset > ICH_FLASH_LINEAR_ADDR_MASK) + return -E1000_ERR_NVM; + } flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + hw->nvm.flash_base_addr); @@ -3816,12 +4424,29 @@ STATIC s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, ret_val = e1000_flash_cycle_init_ich8lan(hw); if (ret_val != E1000_SUCCESS) break; - hsflctl.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); + /* In SPT, This register is in Lan memory space, not + * flash. Therefore, only 32 bit access is supported + */ + if (hw->mac.type >= e1000_pch_spt) + hsflctl.regval = + E1000_READ_FLASH_REG(hw, ICH_FLASH_HSFSTS)>>16; + else + hsflctl.regval = + E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); /* 0b/1b corresponds to 1 or 2 byte size, respectively. */ hsflctl.hsf_ctrl.fldbcount = size - 1; hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE; - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, hsflctl.regval); + /* In SPT, This register is in Lan memory space, + * not flash. Therefore, only 32 bit access is + * supported + */ + if (hw->mac.type >= e1000_pch_spt) + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + hsflctl.regval << 16); + else + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, + hsflctl.regval); E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); @@ -3859,6 +4484,94 @@ STATIC s32 e1000_write_flash_data_ich8lan(struct e1000_hw *hw, u32 offset, return ret_val; } +/** +* e1000_write_flash_data32_ich8lan - Writes 4 bytes to the NVM +* @hw: pointer to the HW structure +* @offset: The offset (in bytes) of the dwords to read. +* @data: The 4 bytes to write to the NVM. +* +* Writes one/two/four bytes to the NVM using the flash access registers. +**/ +STATIC s32 e1000_write_flash_data32_ich8lan(struct e1000_hw *hw, u32 offset, + u32 data) +{ + union ich8_hws_flash_status hsfsts; + union ich8_hws_flash_ctrl hsflctl; + u32 flash_linear_addr; + s32 ret_val; + u8 count = 0; + + DEBUGFUNC("e1000_write_flash_data32_ich8lan"); + + if (hw->mac.type >= e1000_pch_spt) { + if (offset > ICH_FLASH_LINEAR_ADDR_MASK) + return -E1000_ERR_NVM; + } + flash_linear_addr = ((ICH_FLASH_LINEAR_ADDR_MASK & offset) + + hw->nvm.flash_base_addr); + do { + usec_delay(1); + /* Steps */ + ret_val = e1000_flash_cycle_init_ich8lan(hw); + if (ret_val != E1000_SUCCESS) + break; + + /* In SPT, This register is in Lan memory space, not + * flash. Therefore, only 32 bit access is supported + */ + if (hw->mac.type >= e1000_pch_spt) + hsflctl.regval = E1000_READ_FLASH_REG(hw, + ICH_FLASH_HSFSTS) + >> 16; + else + hsflctl.regval = E1000_READ_FLASH_REG16(hw, + ICH_FLASH_HSFCTL); + + hsflctl.hsf_ctrl.fldbcount = sizeof(u32) - 1; + hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_WRITE; + + /* In SPT, This register is in Lan memory space, + * not flash. Therefore, only 32 bit access is + * supported + */ + if (hw->mac.type >= e1000_pch_spt) + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + hsflctl.regval << 16); + else + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, + hsflctl.regval); + + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FADDR, flash_linear_addr); + + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_FDATA0, data); + + /* check if FCERR is set to 1 , if set to 1, clear it + * and try the whole sequence a few more times else done + */ + ret_val = e1000_flash_cycle_ich8lan(hw, + ICH_FLASH_WRITE_COMMAND_TIMEOUT); + + if (ret_val == E1000_SUCCESS) + break; + + /* If we're here, then things are most likely + * completely hosed, but if the error condition + * is detected, it won't hurt to give it another + * try...ICH_FLASH_CYCLE_REPEAT_COUNT times. + */ + hsfsts.regval = E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFSTS); + + if (hsfsts.hsf_status.flcerr) + /* Repeat for some time before giving up. */ + continue; + if (!hsfsts.hsf_status.flcdone) { + DEBUGOUT("Timeout error - flash cycle did not complete.\n"); + break; + } + } while (count++ < ICH_FLASH_CYCLE_REPEAT_COUNT); + + return ret_val; +} /** * e1000_write_flash_byte_ich8lan - Write a single byte to NVM @@ -3878,7 +4591,42 @@ STATIC s32 e1000_write_flash_byte_ich8lan(struct e1000_hw *hw, u32 offset, return e1000_write_flash_data_ich8lan(hw, offset, 1, word); } +/** +* e1000_retry_write_flash_dword_ich8lan - Writes a dword to NVM +* @hw: pointer to the HW structure +* @offset: The offset of the word to write. +* @dword: The dword to write to the NVM. +* +* Writes a single dword to the NVM using the flash access registers. +* Goes through a retry algorithm before giving up. +**/ +STATIC s32 e1000_retry_write_flash_dword_ich8lan(struct e1000_hw *hw, + u32 offset, u32 dword) +{ + s32 ret_val; + u16 program_retries; + DEBUGFUNC("e1000_retry_write_flash_dword_ich8lan"); + + /* Must convert word offset into bytes. */ + offset <<= 1; + + ret_val = e1000_write_flash_data32_ich8lan(hw, offset, dword); + + if (!ret_val) + return ret_val; + for (program_retries = 0; program_retries < 100; program_retries++) { + DEBUGOUT2("Retrying Byte %8.8X at offset %u\n", dword, offset); + usec_delay(100); + ret_val = e1000_write_flash_data32_ich8lan(hw, offset, dword); + if (ret_val == E1000_SUCCESS) + break; + } + if (program_retries == 100) + return -E1000_ERR_NVM; + + return E1000_SUCCESS; +} /** * e1000_retry_write_flash_byte_ich8lan - Writes a single byte to NVM @@ -3988,12 +4736,22 @@ STATIC s32 e1000_erase_flash_bank_ich8lan(struct e1000_hw *hw, u32 bank) /* Write a value 11 (block Erase) in Flash * Cycle field in hw flash control */ - hsflctl.regval = - E1000_READ_FLASH_REG16(hw, ICH_FLASH_HSFCTL); + if (hw->mac.type >= e1000_pch_spt) + hsflctl.regval = + E1000_READ_FLASH_REG(hw, + ICH_FLASH_HSFSTS)>>16; + else + hsflctl.regval = + E1000_READ_FLASH_REG16(hw, + ICH_FLASH_HSFCTL); hsflctl.hsf_ctrl.flcycle = ICH_CYCLE_ERASE; - E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, - hsflctl.regval); + if (hw->mac.type >= e1000_pch_spt) + E1000_WRITE_FLASH_REG(hw, ICH_FLASH_HSFSTS, + hsflctl.regval << 16); + else + E1000_WRITE_FLASH_REG16(hw, ICH_FLASH_HSFCTL, + hsflctl.regval); /* Write the last 24 bits of an index within the * block into Flash Linear address field in Flash @@ -4426,7 +5184,7 @@ STATIC void e1000_initialize_hw_bits_ich8lan(struct e1000_hw *hw) E1000_WRITE_REG(hw, E1000_RFCTL, reg); /* Enable ECC on Lynxpoint */ - if (hw->mac.type == e1000_pch_lpt) { + if (hw->mac.type >= e1000_pch_lpt) { reg = E1000_READ_REG(hw, E1000_PBECCSTS); reg |= E1000_PBECCSTS_ECC_ENABLE; E1000_WRITE_REG(hw, E1000_PBECCSTS, reg); @@ -4858,7 +5616,8 @@ void e1000_suspend_workarounds_ich8lan(struct e1000_hw *hw) if ((device_id == E1000_DEV_ID_PCH_LPTLP_I218_LM) || (device_id == E1000_DEV_ID_PCH_LPTLP_I218_V) || (device_id == E1000_DEV_ID_PCH_I218_LM3) || - (device_id == E1000_DEV_ID_PCH_I218_V3)) { + (device_id == E1000_DEV_ID_PCH_I218_V3) || + (hw->mac.type >= e1000_pch_spt)) { u32 fextnvm6 = E1000_READ_REG(hw, E1000_FEXTNVM6); E1000_WRITE_REG(hw, E1000_FEXTNVM6, diff --git a/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.h b/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.h index 33e77fb8..bc4ed1dd 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.h +++ b/src/dpdk/drivers/net/e1000/base/e1000_ich8lan.h @@ -121,6 +121,18 @@ POSSIBILITY OF SUCH DAMAGE. #if !defined(EXTERNAL_RELEASE) || defined(ULP_SUPPORT) #define E1000_FEXTNVM7_DISABLE_SMB_PERST 0x00000020 #endif /* !EXTERNAL_RELEASE || ULP_SUPPORT */ +#define E1000_FEXTNVM9_IOSFSB_CLKGATE_DIS 0x00000800 +#define E1000_FEXTNVM9_IOSFSB_CLKREQ_DIS 0x00001000 +#define E1000_FEXTNVM11_DISABLE_PB_READ 0x00000200 +#define E1000_FEXTNVM11_DISABLE_MULR_FIX 0x00002000 + +/* bit24: RXDCTL thresholds granularity: 0 - cache lines, 1 - descriptors */ +#define E1000_RXDCTL_THRESH_UNIT_DESC 0x01000000 + +#define NVM_SIZE_MULTIPLIER 4096 /*multiplier for NVMS field*/ +#define E1000_FLASH_BASE_ADDR 0xE000 /*offset of NVM access regs*/ +#define E1000_CTRL_EXT_NVMVS 0x3 /*NVM valid sector */ +#define E1000_TARC0_CB_MULTIQ_3_REQ (1 << 28 | 1 << 29) #define PCIE_ICH8_SNOOP_ALL PCIE_NO_SNOOP_ALL #define E1000_ICH_RAR_ENTRIES 7 @@ -198,6 +210,10 @@ POSSIBILITY OF SUCH DAMAGE. #define I218_ULP_CONFIG1_INBAND_EXIT 0x0020 /* Inband on ULP exit */ #define I218_ULP_CONFIG1_WOL_HOST 0x0040 /* WoL Host on ULP exit */ #define I218_ULP_CONFIG1_RESET_TO_SMBUS 0x0100 /* Reset to SMBus mode */ +/* enable ULP even if when phy powered down via lanphypc */ +#define I218_ULP_CONFIG1_EN_ULP_LANPHYPC 0x0400 +/* disable clear of sticky ULP on PERST */ +#define I218_ULP_CONFIG1_DIS_CLR_STICKY_ON_PERST 0x0800 #define I218_ULP_CONFIG1_DISABLE_SMB_PERST 0x1000 /* Disable on PERST# */ #endif /* !EXTERNAL_RELEASE || ULP_SUPPORT */ @@ -234,9 +250,12 @@ POSSIBILITY OF SUCH DAMAGE. /* PHY Power Management Control */ #define HV_PM_CTRL PHY_REG(770, 17) -#define HV_PM_CTRL_PLL_STOP_IN_K1_GIGA 0x100 +#define HV_PM_CTRL_K1_CLK_REQ 0x200 #define HV_PM_CTRL_K1_ENABLE 0x4000 +#define I217_PLL_CLOCK_GATE_REG PHY_REG(772, 28) +#define I217_PLL_CLOCK_GATE_MASK 0x07FF + #define SW_FLAG_TIMEOUT 1000 /* SW Semaphore flag timeout in ms */ /* Inband Control */ diff --git a/src/dpdk/drivers/net/e1000/base/e1000_mbx.c b/src/dpdk/drivers/net/e1000/base/e1000_mbx.c index 6daf16b0..a92fd22e 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_mbx.c +++ b/src/dpdk/drivers/net/e1000/base/e1000_mbx.c @@ -430,15 +430,21 @@ STATIC s32 e1000_check_for_rst_vf(struct e1000_hw *hw, STATIC s32 e1000_obtain_mbx_lock_vf(struct e1000_hw *hw) { s32 ret_val = -E1000_ERR_MBX; + int count = 10; DEBUGFUNC("e1000_obtain_mbx_lock_vf"); - /* Take ownership of the buffer */ - E1000_WRITE_REG(hw, E1000_V2PMAILBOX(0), E1000_V2PMAILBOX_VFU); + do { + /* Take ownership of the buffer */ + E1000_WRITE_REG(hw, E1000_V2PMAILBOX(0), E1000_V2PMAILBOX_VFU); - /* reserve mailbox for vf use */ - if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU) - ret_val = E1000_SUCCESS; + /* reserve mailbox for vf use */ + if (e1000_read_v2p_mailbox(hw) & E1000_V2PMAILBOX_VFU) { + ret_val = E1000_SUCCESS; + break; + } + usec_delay(1000); + } while (count-- > 0); return ret_val; } @@ -645,18 +651,26 @@ STATIC s32 e1000_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number) { s32 ret_val = -E1000_ERR_MBX; u32 p2v_mailbox; + int count = 10; DEBUGFUNC("e1000_obtain_mbx_lock_pf"); - /* Take ownership of the buffer */ - E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU); + do { + /* Take ownership of the buffer */ + E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), + E1000_P2VMAILBOX_PFU); - /* reserve mailbox for vf use */ - p2v_mailbox = E1000_READ_REG(hw, E1000_P2VMAILBOX(vf_number)); - if (p2v_mailbox & E1000_P2VMAILBOX_PFU) - ret_val = E1000_SUCCESS; + /* reserve mailbox for pf use */ + p2v_mailbox = E1000_READ_REG(hw, E1000_P2VMAILBOX(vf_number)); + if (p2v_mailbox & E1000_P2VMAILBOX_PFU) { + ret_val = E1000_SUCCESS; + break; + } + usec_delay(1000); + } while (count-- > 0); return ret_val; + } /** diff --git a/src/dpdk/drivers/net/e1000/base/e1000_nvm.c b/src/dpdk/drivers/net/e1000/base/e1000_nvm.c index 762acd16..75c22827 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_nvm.c +++ b/src/dpdk/drivers/net/e1000/base/e1000_nvm.c @@ -1295,6 +1295,7 @@ void e1000_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers) case e1000_82575: case e1000_82576: case e1000_82580: + case e1000_i354: hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test); /* Use this format, unless EETRACK ID exists, * then use alternate format diff --git a/src/dpdk/drivers/net/e1000/base/e1000_osdep.h b/src/dpdk/drivers/net/e1000/base/e1000_osdep.h index 47a19481..b8868049 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_osdep.h +++ b/src/dpdk/drivers/net/e1000/base/e1000_osdep.h @@ -44,6 +44,7 @@ #include <rte_log.h> #include <rte_debug.h> #include <rte_byteorder.h> +#include <rte_io.h> #include "../e1000_logs.h" @@ -94,17 +95,18 @@ typedef int bool; #define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, E1000_STATUS) -#define E1000_PCI_REG(reg) (*((volatile uint32_t *)(reg))) +#define E1000_PCI_REG(reg) rte_read32(reg) -#define E1000_PCI_REG16(reg) (*((volatile uint16_t *)(reg))) +#define E1000_PCI_REG16(reg) rte_read16(reg) -#define E1000_PCI_REG_WRITE(reg, value) do { \ - E1000_PCI_REG((reg)) = (rte_cpu_to_le_32(value)); \ -} while (0) +#define E1000_PCI_REG_WRITE(reg, value) \ + rte_write32((rte_cpu_to_le_32(value)), reg) -#define E1000_PCI_REG_WRITE16(reg, value) do { \ - E1000_PCI_REG16((reg)) = (rte_cpu_to_le_16(value)); \ -} while (0) +#define E1000_PCI_REG_WRITE_RELAXED(reg, value) \ + rte_write32_relaxed((rte_cpu_to_le_32(value)), reg) + +#define E1000_PCI_REG_WRITE16(reg, value) \ + rte_write16((rte_cpu_to_le_16(value)), reg) #define E1000_PCI_REG_ADDR(hw, reg) \ ((volatile uint32_t *)((char *)(hw)->hw_addr + (reg))) diff --git a/src/dpdk/drivers/net/e1000/base/e1000_regs.h b/src/dpdk/drivers/net/e1000/base/e1000_regs.h index 84531a99..364a7261 100644 --- a/src/dpdk/drivers/net/e1000/base/e1000_regs.h +++ b/src/dpdk/drivers/net/e1000/base/e1000_regs.h @@ -66,6 +66,8 @@ POSSIBILITY OF SUCH DAMAGE. #define E1000_FEXTNVM4 0x00024 /* Future Extended NVM 4 - RW */ #define E1000_FEXTNVM6 0x00010 /* Future Extended NVM 6 - RW */ #define E1000_FEXTNVM7 0x000E4 /* Future Extended NVM 7 - RW */ +#define E1000_FEXTNVM9 0x5BB4 /* Future Extended NVM 9 - RW */ +#define E1000_FEXTNVM11 0x5BBC /* Future Extended NVM 11 - RW */ #define E1000_PCIEANACFG 0x00F18 /* PCIE Analog Config */ #define E1000_FCT 0x00030 /* Flow Control Type - RW */ #define E1000_CONNSW 0x00034 /* Copper/Fiber switch control - RW */ @@ -109,6 +111,7 @@ POSSIBILITY OF SUCH DAMAGE. #define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ #define E1000_PBS 0x01008 /* Packet Buffer Size */ #define E1000_PBECCSTS 0x0100C /* Packet Buffer ECC Status - RW */ +#define E1000_IOSFPC 0x00F28 /* TX corrupted data */ #define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ #define E1000_EEMNGCTL_I210 0x01010 /* i210 MNG EEprom Mode Control */ #define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */ @@ -591,6 +594,10 @@ POSSIBILITY OF SUCH DAMAGE. #define E1000_TIMADJL 0x0B60C /* Time sync time adjustment offset Low - RW */ #define E1000_TIMADJH 0x0B610 /* Time sync time adjustment offset High - RW */ #define E1000_TSAUXC 0x0B640 /* Timesync Auxiliary Control register */ +#define E1000_SYSSTMPL 0x0B648 /* HH Timesync system stamp low register */ +#define E1000_SYSSTMPH 0x0B64C /* HH Timesync system stamp hi register */ +#define E1000_PLTSTMPL 0x0B640 /* HH Timesync platform stamp low register */ +#define E1000_PLTSTMPH 0x0B644 /* HH Timesync platform stamp hi register */ #define E1000_SYSTIMR 0x0B6F8 /* System time register Residue */ #define E1000_TSICR 0x0B66C /* Interrupt Cause Register */ #define E1000_TSIM 0x0B674 /* Interrupt Mask Register */ diff --git a/src/dpdk/drivers/net/e1000/e1000_ethdev.h b/src/dpdk/drivers/net/e1000/e1000_ethdev.h index 6c25c8da..81a6dbb7 100644 --- a/src/dpdk/drivers/net/e1000/e1000_ethdev.h +++ b/src/dpdk/drivers/net/e1000/e1000_ethdev.h @@ -138,6 +138,11 @@ #define E1000_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET #define E1000_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET +#define IGB_TX_MAX_SEG UINT8_MAX +#define IGB_TX_MAX_MTU_SEG UINT8_MAX +#define EM_TX_MAX_SEG UINT8_MAX +#define EM_TX_MAX_MTU_SEG UINT8_MAX + /* structure for interrupt relative data */ struct e1000_interrupt { uint32_t flags; @@ -286,6 +291,8 @@ struct e1000_adapter { #define E1000_DEV_PRIVATE_TO_FILTER_INFO(adapter) \ (&((struct e1000_adapter *)adapter)->filter) +#define E1000_DEV_TO_PCI(eth_dev) \ + RTE_DEV_TO_PCI((eth_dev)->device) /* * RX/TX IGB function prototypes */ @@ -315,6 +322,9 @@ void eth_igb_tx_init(struct rte_eth_dev *dev); uint16_t eth_igb_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_igb_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_igb_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); @@ -376,6 +386,9 @@ void eth_em_tx_init(struct rte_eth_dev *dev); uint16_t eth_em_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t eth_em_prep_pkts(void *txq, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + uint16_t eth_em_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); diff --git a/src/dpdk/drivers/net/e1000/em_ethdev.c b/src/dpdk/drivers/net/e1000/em_ethdev.c index ad104ed7..d778785d 100644 --- a/src/dpdk/drivers/net/e1000/em_ethdev.c +++ b/src/dpdk/drivers/net/e1000/em_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -83,7 +83,8 @@ static int eth_em_flow_ctrl_set(struct rte_eth_dev *dev, static int eth_em_interrupt_setup(struct rte_eth_dev *dev); static int eth_em_rxq_interrupt_setup(struct rte_eth_dev *dev); static int eth_em_interrupt_get_status(struct rte_eth_dev *dev); -static int eth_em_interrupt_action(struct rte_eth_dev *dev); +static int eth_em_interrupt_action(struct rte_eth_dev *dev, + struct rte_intr_handle *handle); static void eth_em_interrupt_handler(struct rte_intr_handle *handle, void *param); @@ -168,6 +169,19 @@ static const struct rte_pci_id pci_id_em_map[] = { { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_I218_V2) }, { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_I218_LM3) }, { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_I218_V3) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_LM) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_V) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_LM2) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_V2) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_LBG_I219_LM3) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_LM4) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_V4) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_LM5) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_SPT_I219_V5) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_CNP_I219_LM6) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_CNP_I219_V6) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_CNP_I219_LM7) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_PCH_CNP_I219_V7) }, { .vendor_id = 0, /* sentinel */ }, }; @@ -278,6 +292,19 @@ eth_em_dev_is_ich8(struct e1000_hw *hw) case E1000_DEV_ID_PCH_I218_LM2: case E1000_DEV_ID_PCH_I218_V3: case E1000_DEV_ID_PCH_I218_LM3: + case E1000_DEV_ID_PCH_SPT_I219_LM: + case E1000_DEV_ID_PCH_SPT_I219_V: + case E1000_DEV_ID_PCH_SPT_I219_LM2: + case E1000_DEV_ID_PCH_SPT_I219_V2: + case E1000_DEV_ID_PCH_LBG_I219_LM3: + case E1000_DEV_ID_PCH_SPT_I219_LM4: + case E1000_DEV_ID_PCH_SPT_I219_V4: + case E1000_DEV_ID_PCH_SPT_I219_LM5: + case E1000_DEV_ID_PCH_SPT_I219_V5: + case E1000_DEV_ID_PCH_CNP_I219_LM6: + case E1000_DEV_ID_PCH_CNP_I219_V6: + case E1000_DEV_ID_PCH_CNP_I219_LM7: + case E1000_DEV_ID_PCH_CNP_I219_V7: return 1; default: return 0; @@ -287,7 +314,8 @@ eth_em_dev_is_ich8(struct e1000_hw *hw) static int eth_em_dev_init(struct rte_eth_dev *eth_dev) { - struct rte_pci_device *pci_dev; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct e1000_adapter *adapter = E1000_DEV_PRIVATE(eth_dev->data->dev_private); struct e1000_hw *hw = @@ -295,11 +323,10 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) struct e1000_vfta * shadow_vfta = E1000_DEV_PRIVATE_TO_VFTA(eth_dev->data->dev_private); - pci_dev = eth_dev->pci_dev; - eth_dev->dev_ops = ð_em_ops; eth_dev->rx_pkt_burst = (eth_rx_burst_t)ð_em_recv_pkts; eth_dev->tx_pkt_burst = (eth_tx_burst_t)ð_em_xmit_pkts; + eth_dev->tx_pkt_prepare = (eth_tx_prep_t)ð_em_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -312,6 +339,7 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) } rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; hw->hw_addr = (void *)pci_dev->mem_resource[0].addr; hw->device_id = pci_dev->id.device_id; @@ -351,8 +379,8 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); - rte_intr_callback_register(&(pci_dev->intr_handle), - eth_em_interrupt_handler, (void *)eth_dev); + rte_intr_callback_register(intr_handle, + eth_em_interrupt_handler, eth_dev); return 0; } @@ -360,17 +388,16 @@ eth_em_dev_init(struct rte_eth_dev *eth_dev) static int eth_em_dev_uninit(struct rte_eth_dev *eth_dev) { - struct rte_pci_device *pci_dev; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev); struct e1000_adapter *adapter = E1000_DEV_PRIVATE(eth_dev->data->dev_private); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; PMD_INIT_FUNC_TRACE(); if (rte_eal_process_type() != RTE_PROC_PRIMARY) return -EPERM; - pci_dev = eth_dev->pci_dev; - if (adapter->stopped == 0) eth_em_close(eth_dev); @@ -382,19 +409,19 @@ eth_em_dev_uninit(struct rte_eth_dev *eth_dev) eth_dev->data->mac_addrs = NULL; /* disable uio intr before callback unregister */ - rte_intr_disable(&(pci_dev->intr_handle)); - rte_intr_callback_unregister(&(pci_dev->intr_handle), - eth_em_interrupt_handler, (void *)eth_dev); + rte_intr_disable(intr_handle); + rte_intr_callback_unregister(intr_handle, + eth_em_interrupt_handler, eth_dev); return 0; } static struct eth_driver rte_em_pmd = { .pci_drv = { - .name = "rte_em_pmd", .id_table = pci_id_em_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | - RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_em_dev_init, .eth_dev_uninit = eth_em_dev_uninit, @@ -402,13 +429,6 @@ static struct eth_driver rte_em_pmd = { }; static int -rte_em_pmd_init(const char *name __rte_unused, const char *params __rte_unused) -{ - rte_eth_driver_register(&rte_em_pmd); - return 0; -} - -static int em_hw_init(struct e1000_hw *hw) { int diag; @@ -546,6 +566,8 @@ em_set_pba(struct e1000_hw *hw) case e1000_pchlan: case e1000_pch2lan: case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: pba = E1000_PBA_26K; break; default: @@ -562,7 +584,9 @@ eth_em_start(struct rte_eth_dev *dev) E1000_DEV_PRIVATE(dev->data->dev_private); struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = + E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; int ret, mask; uint32_t intr_vector = 0; uint32_t *speeds; @@ -615,7 +639,7 @@ eth_em_start(struct rte_eth_dev *dev) dev->data->nb_rx_queues * sizeof(int), 0); if (intr_handle->intr_vec == NULL) { PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" - " intr_vec\n", dev->data->nb_rx_queues); + " intr_vec", dev->data->nb_rx_queues); return -ENOMEM; } @@ -645,6 +669,7 @@ eth_em_start(struct rte_eth_dev *dev) speeds = &dev->data->dev_conf.link_speeds; if (*speeds == ETH_LINK_SPEED_AUTONEG) { hw->phy.autoneg_advertised = E1000_ALL_SPEED_DUPLEX; + hw->mac.autoneg = 1; } else { num_speeds = 0; autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0; @@ -680,6 +705,17 @@ eth_em_start(struct rte_eth_dev *dev) } if (num_speeds == 0 || (!autoneg && (num_speeds > 1))) goto error_invalid_config; + + /* Set/reset the mac.autoneg based on the link speed, + * fixed or not + */ + if (!autoneg) { + hw->mac.autoneg = 0; + hw->mac.forced_speed_duplex = + hw->phy.autoneg_advertised; + } else { + hw->mac.autoneg = 1; + } } e1000_setup_link(hw); @@ -700,7 +736,7 @@ eth_em_start(struct rte_eth_dev *dev) (void *)dev); if (dev->data->dev_conf.intr_conf.lsc != 0) PMD_INIT_LOG(INFO, "lsc won't enable because of" - " no intr multiplex\n"); + " no intr multiplexn"); } /* check if rxq interrupt is enabled */ if (dev->data->dev_conf.intr_conf.rxq != 0) @@ -732,7 +768,8 @@ eth_em_stop(struct rte_eth_dev *dev) { struct rte_eth_link link; struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; em_rxq_intr_disable(hw); em_lsc_intr_disable(hw); @@ -847,7 +884,9 @@ em_hardware_init(struct e1000_hw *hw) hw->fc.low_water = 0x5048; hw->fc.pause_time = 0x0650; hw->fc.refresh_time = 0x0400; - } else if (hw->mac.type == e1000_pch_lpt) { + } else if (hw->mac.type == e1000_pch_lpt || + hw->mac.type == e1000_pch_spt || + hw->mac.type == e1000_pch_cnp) { hw->fc.requested_mode = e1000_fc_full; } @@ -993,9 +1032,11 @@ static int eth_em_rx_queue_intr_enable(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; em_rxq_intr_enable(hw); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(intr_handle); return 0; } @@ -1020,6 +1061,8 @@ em_get_max_pktlen(const struct e1000_hw *hw) case e1000_ich10lan: case e1000_pch2lan: case e1000_pch_lpt: + case e1000_pch_spt: + case e1000_pch_cnp: case e1000_82574: case e1000_80003es2lan: /* 9K Jumbo Frame size */ case e1000_82583: @@ -1039,6 +1082,7 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */ dev_info->max_rx_pktlen = em_get_max_pktlen(hw); dev_info->max_mac_addrs = hw->mac.rar_entry_count; @@ -1073,6 +1117,8 @@ eth_em_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = E1000_MAX_RING_DESC, .nb_min = E1000_MIN_RING_DESC, .nb_align = EM_TXD_ALIGN, + .nb_seg_max = EM_TX_MAX_SEG, + .nb_mtu_seg_max = EM_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_10M_HD | ETH_LINK_SPEED_10M | @@ -1530,8 +1576,10 @@ eth_em_interrupt_get_status(struct rte_eth_dev *dev) * - On failure, a negative value. */ static int -eth_em_interrupt_action(struct rte_eth_dev *dev) +eth_em_interrupt_action(struct rte_eth_dev *dev, + struct rte_intr_handle *intr_handle) { + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct e1000_interrupt *intr = @@ -1544,7 +1592,7 @@ eth_em_interrupt_action(struct rte_eth_dev *dev) return -1; intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(intr_handle); /* set get_link_status to check register later */ hw->mac.get_link_status = 1; @@ -1565,8 +1613,8 @@ eth_em_interrupt_action(struct rte_eth_dev *dev) PMD_INIT_LOG(INFO, " Port %d: Link Down", dev->data->port_id); } PMD_INIT_LOG(DEBUG, "PCI Address: %04d:%02d:%02d:%d", - dev->pci_dev->addr.domain, dev->pci_dev->addr.bus, - dev->pci_dev->addr.devid, dev->pci_dev->addr.function); + pci_dev->addr.domain, pci_dev->addr.bus, + pci_dev->addr.devid, pci_dev->addr.function); tctl = E1000_READ_REG(hw, E1000_TCTL); rctl = E1000_READ_REG(hw, E1000_RCTL); @@ -1598,14 +1646,14 @@ eth_em_interrupt_action(struct rte_eth_dev *dev) * void */ static void -eth_em_interrupt_handler(__rte_unused struct rte_intr_handle *handle, - void *param) +eth_em_interrupt_handler(struct rte_intr_handle *handle, + void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; eth_em_interrupt_get_status(dev); - eth_em_interrupt_action(dev); - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); + eth_em_interrupt_action(dev, handle); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } static int @@ -1799,10 +1847,6 @@ eth_em_set_mc_addr_list(struct rte_eth_dev *dev, return 0; } -struct rte_driver em_pmd_drv = { - .type = PMD_PDEV, - .init = rte_em_pmd_init, -}; - -PMD_REGISTER_DRIVER(em_pmd_drv, em); -DRIVER_REGISTER_PCI_TABLE(em, pci_id_em_map); +RTE_PMD_REGISTER_PCI(net_e1000_em, rte_em_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_e1000_em, pci_id_em_map); +RTE_PMD_REGISTER_KMOD_DEP(net_e1000_em, "* igb_uio | uio_pci_generic | vfio"); diff --git a/src/dpdk/drivers/net/e1000/em_rxtx.c b/src/dpdk/drivers/net/e1000/em_rxtx.c index 6d8750a8..d099d6a2 100644 --- a/src/dpdk/drivers/net/e1000/em_rxtx.c +++ b/src/dpdk/drivers/net/e1000/em_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -56,7 +56,6 @@ #include <rte_lcore.h> #include <rte_atomic.h> #include <rte_branch_prediction.h> -#include <rte_ring.h> #include <rte_mempool.h> #include <rte_malloc.h> #include <rte_mbuf.h> @@ -67,6 +66,7 @@ #include <rte_udp.h> #include <rte_tcp.h> #include <rte_sctp.h> +#include <rte_net.h> #include <rte_string_fns.h> #include "e1000_logs.h" @@ -78,6 +78,14 @@ #define E1000_RXDCTL_GRAN 0x01000000 /* RXDCTL Granularity */ +#define E1000_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK | \ + PKT_TX_VLAN_PKT) + +#define E1000_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ E1000_TX_OFFLOAD_MASK) + /** * Structure associated with each descriptor of the RX ring of a RX queue. */ @@ -611,7 +619,7 @@ end_of_tx: PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u", (unsigned) txq->port_id, (unsigned) txq->queue_id, (unsigned) tx_id, (unsigned) nb_tx); - E1000_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id); + E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id); txq->tx_tail = tx_id; return nb_tx; @@ -619,6 +627,43 @@ end_of_tx: /********************************************************************* * + * TX prep functions + * + **********************************************************************/ +uint16_t +eth_em_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if (m->ol_flags & E1000_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +/********************************************************************* + * * RX functions * **********************************************************************/ diff --git a/src/dpdk/drivers/net/e1000/igb_ethdev.c b/src/dpdk/drivers/net/e1000/igb_ethdev.c index fbf4d090..d9397744 100644 --- a/src/dpdk/drivers/net/e1000/igb_ethdev.c +++ b/src/dpdk/drivers/net/e1000/igb_ethdev.c @@ -120,6 +120,8 @@ static int eth_igb_xstats_get_names(struct rte_eth_dev *dev, unsigned limit); static void eth_igb_stats_reset(struct rte_eth_dev *dev); static void eth_igb_xstats_reset(struct rte_eth_dev *dev); +static int eth_igb_fw_version_get(struct rte_eth_dev *dev, + char *fw_version, size_t fw_size); static void eth_igb_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info); static const uint32_t *eth_igb_supported_ptypes_get(struct rte_eth_dev *dev); @@ -132,7 +134,8 @@ static int eth_igb_flow_ctrl_set(struct rte_eth_dev *dev, static int eth_igb_lsc_interrupt_setup(struct rte_eth_dev *dev); static int eth_igb_rxq_interrupt_setup(struct rte_eth_dev *dev); static int eth_igb_interrupt_get_status(struct rte_eth_dev *dev); -static int eth_igb_interrupt_action(struct rte_eth_dev *dev); +static int eth_igb_interrupt_action(struct rte_eth_dev *dev, + struct rte_intr_handle *handle); static void eth_igb_interrupt_handler(struct rte_intr_handle *handle, void *param); static int igb_hardware_init(struct e1000_hw *hw); @@ -306,22 +309,57 @@ static enum e1000_fc_mode igb_fc_setting = e1000_fc_full; * The set of PCI devices this driver supports */ static const struct rte_pci_id pci_id_igb_map[] = { - -#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) {RTE_PCI_DEVICE(vend, dev)}, -#include "rte_pci_dev_ids.h" - -{0}, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_FIBER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_QUAD_COPPER_ET2) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_NS_SERDES) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_SERDES_QUAD) }, + + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_COPPER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82575EB_FIBER_SERDES) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82575GB_QUAD_COPPER) }, + + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_FIBER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SERDES) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_SGMII) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_COPPER_DUAL) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82580_QUAD_FIBER) }, + + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_COPPER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_FIBER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SERDES) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_SGMII) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_DA4) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_OEM1) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_COPPER_IT) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_FIBER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SERDES) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I210_SGMII) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I211_COPPER) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_1GBPS) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I354_SGMII) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SGMII) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SERDES) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_BACKPLANE) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_DH89XXCC_SFP) }, + { .vendor_id = 0, /* sentinel */ }, }; /* * The set of PCI devices this driver supports (for 82576&I350 VF) */ static const struct rte_pci_id pci_id_igbvf_map[] = { - -#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev) {RTE_PCI_DEVICE(vend, dev)}, -#include "rte_pci_dev_ids.h" - -{0}, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_82576_VF_HV) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF) }, + { RTE_PCI_DEVICE(E1000_INTEL_VENDOR_ID, E1000_DEV_ID_I350_VF_HV) }, + { .vendor_id = 0, /* sentinel */ }, }; static const struct rte_eth_desc_lim rx_desc_lim = { @@ -334,6 +372,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = { .nb_max = E1000_MAX_RING_DESC, .nb_min = E1000_MIN_RING_DESC, .nb_align = IGB_RXD_ALIGN, + .nb_seg_max = IGB_TX_MAX_SEG, + .nb_mtu_seg_max = IGB_TX_MAX_MTU_SEG, }; static const struct eth_dev_ops eth_igb_ops = { @@ -353,6 +393,7 @@ static const struct eth_dev_ops eth_igb_ops = { .xstats_get_names = eth_igb_xstats_get_names, .stats_reset = eth_igb_stats_reset, .xstats_reset = eth_igb_xstats_reset, + .fw_version_get = eth_igb_fw_version_get, .dev_infos_get = eth_igb_infos_get, .dev_supported_ptypes_get = eth_igb_supported_ptypes_get, .mtu_set = eth_igb_mtu_set, @@ -633,15 +674,16 @@ igb_pf_reset_hw(struct e1000_hw *hw) } static void -igb_identify_hardware(struct rte_eth_dev *dev) +igb_identify_hardware(struct rte_eth_dev *dev, struct rte_pci_device *pci_dev) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); - hw->vendor_id = dev->pci_dev->id.vendor_id; - hw->device_id = dev->pci_dev->id.device_id; - hw->subsystem_vendor_id = dev->pci_dev->id.subsystem_vendor_id; - hw->subsystem_device_id = dev->pci_dev->id.subsystem_device_id; + + hw->vendor_id = pci_dev->id.vendor_id; + hw->device_id = pci_dev->id.device_id; + hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id; + hw->subsystem_device_id = pci_dev->id.subsystem_device_id; e1000_set_mac_type(hw); @@ -708,7 +750,7 @@ static int eth_igb_dev_init(struct rte_eth_dev *eth_dev) { int error = 0; - struct rte_pci_device *pci_dev; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev); struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); struct e1000_vfta * shadow_vfta = @@ -720,11 +762,10 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev) uint32_t ctrl_ext; - pci_dev = eth_dev->pci_dev; - eth_dev->dev_ops = ð_igb_ops; eth_dev->rx_pkt_burst = ð_igb_recv_pkts; eth_dev->tx_pkt_burst = ð_igb_xmit_pkts; + eth_dev->tx_pkt_prepare = ð_igb_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -736,10 +777,11 @@ eth_igb_dev_init(struct rte_eth_dev *eth_dev) } rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; hw->hw_addr= (void *)pci_dev->mem_resource[0].addr; - igb_identify_hardware(eth_dev); + igb_identify_hardware(eth_dev, pci_dev); if (e1000_setup_init_funcs(hw, FALSE) != E1000_SUCCESS) { error = -EIO; goto err_late; @@ -873,6 +915,7 @@ static int eth_igb_dev_uninit(struct rte_eth_dev *eth_dev) { struct rte_pci_device *pci_dev; + struct rte_intr_handle *intr_handle; struct e1000_hw *hw; struct e1000_adapter *adapter = E1000_DEV_PRIVATE(eth_dev->data->dev_private); @@ -883,7 +926,8 @@ eth_igb_dev_uninit(struct rte_eth_dev *eth_dev) return -EPERM; hw = E1000_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); - pci_dev = eth_dev->pci_dev; + pci_dev = E1000_DEV_TO_PCI(eth_dev); + intr_handle = &pci_dev->intr_handle; if (adapter->stopped == 0) eth_igb_close(eth_dev); @@ -902,9 +946,9 @@ eth_igb_dev_uninit(struct rte_eth_dev *eth_dev) igb_pf_host_uninit(eth_dev); /* disable uio intr before callback unregister */ - rte_intr_disable(&(pci_dev->intr_handle)); - rte_intr_callback_unregister(&(pci_dev->intr_handle), - eth_igb_interrupt_handler, (void *)eth_dev); + rte_intr_disable(intr_handle); + rte_intr_callback_unregister(intr_handle, + eth_igb_interrupt_handler, eth_dev); return 0; } @@ -916,6 +960,7 @@ static int eth_igbvf_dev_init(struct rte_eth_dev *eth_dev) { struct rte_pci_device *pci_dev; + struct rte_intr_handle *intr_handle; struct e1000_adapter *adapter = E1000_DEV_PRIVATE(eth_dev->data->dev_private); struct e1000_hw *hw = @@ -928,6 +973,7 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = &igbvf_eth_dev_ops; eth_dev->rx_pkt_burst = ð_igb_recv_pkts; eth_dev->tx_pkt_burst = ð_igb_xmit_pkts; + eth_dev->tx_pkt_prepare = ð_igb_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -938,9 +984,9 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev) return 0; } - pci_dev = eth_dev->pci_dev; - + pci_dev = E1000_DEV_TO_PCI(eth_dev); rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; hw->device_id = pci_dev->id.device_id; hw->vendor_id = pci_dev->id.vendor_id; @@ -1003,9 +1049,9 @@ eth_igbvf_dev_init(struct rte_eth_dev *eth_dev) eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id, "igb_mac_82576_vf"); - rte_intr_callback_register(&pci_dev->intr_handle, - eth_igbvf_interrupt_handler, - (void *)eth_dev); + intr_handle = &pci_dev->intr_handle; + rte_intr_callback_register(intr_handle, + eth_igbvf_interrupt_handler, eth_dev); return 0; } @@ -1015,7 +1061,7 @@ eth_igbvf_dev_uninit(struct rte_eth_dev *eth_dev) { struct e1000_adapter *adapter = E1000_DEV_PRIVATE(eth_dev->data->dev_private); - struct rte_pci_device *pci_dev = eth_dev->pci_dev; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev); PMD_INIT_FUNC_TRACE(); @@ -1043,10 +1089,10 @@ eth_igbvf_dev_uninit(struct rte_eth_dev *eth_dev) static struct eth_driver rte_igb_pmd = { .pci_drv = { - .name = "rte_igb_pmd", .id_table = pci_id_igb_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | - RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_igb_dev_init, .eth_dev_uninit = eth_igb_dev_uninit, @@ -1058,22 +1104,16 @@ static struct eth_driver rte_igb_pmd = { */ static struct eth_driver rte_igbvf_pmd = { .pci_drv = { - .name = "rte_igbvf_pmd", .id_table = pci_id_igbvf_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_igbvf_dev_init, .eth_dev_uninit = eth_igbvf_dev_uninit, .dev_private_size = sizeof(struct e1000_adapter), }; -static int -rte_igb_pmd_init(const char *name __rte_unused, const char *params __rte_unused) -{ - rte_eth_driver_register(&rte_igb_pmd); - return 0; -} - static void igb_vmdq_vlan_hw_filter_enable(struct rte_eth_dev *dev) { @@ -1085,20 +1125,6 @@ igb_vmdq_vlan_hw_filter_enable(struct rte_eth_dev *dev) E1000_WRITE_REG(hw, E1000_RCTL, rctl); } -/* - * VF Driver initialization routine. - * Invoked one at EAL init time. - * Register itself as the [Virtual Poll Mode] Driver of PCI IGB devices. - */ -static int -rte_igbvf_pmd_init(const char *name __rte_unused, const char *params __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - - rte_eth_driver_register(&rte_igbvf_pmd); - return 0; -} - static int igb_check_mq_mode(struct rte_eth_dev *dev) { @@ -1201,7 +1227,8 @@ eth_igb_start(struct rte_eth_dev *dev) E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct e1000_adapter *adapter = E1000_DEV_PRIVATE(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; int ret, mask; uint32_t intr_vector = 0; uint32_t ctrl_ext; @@ -1265,7 +1292,7 @@ eth_igb_start(struct rte_eth_dev *dev) dev->data->nb_rx_queues * sizeof(int), 0); if (intr_handle->intr_vec == NULL) { PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" - " intr_vec\n", dev->data->nb_rx_queues); + " intr_vec", dev->data->nb_rx_queues); return -ENOMEM; } } @@ -1311,6 +1338,7 @@ eth_igb_start(struct rte_eth_dev *dev) speeds = &dev->data->dev_conf.link_speeds; if (*speeds == ETH_LINK_SPEED_AUTONEG) { hw->phy.autoneg_advertised = E1000_ALL_SPEED_DUPLEX; + hw->mac.autoneg = 1; } else { num_speeds = 0; autoneg = (*speeds & ETH_LINK_SPEED_FIXED) == 0; @@ -1346,6 +1374,17 @@ eth_igb_start(struct rte_eth_dev *dev) } if (num_speeds == 0 || (!autoneg && (num_speeds > 1))) goto error_invalid_config; + + /* Set/reset the mac.autoneg based on the link speed, + * fixed or not + */ + if (!autoneg) { + hw->mac.autoneg = 0; + hw->mac.forced_speed_duplex = + hw->phy.autoneg_advertised; + } else { + hw->mac.autoneg = 1; + } } e1000_setup_link(hw); @@ -1360,7 +1399,7 @@ eth_igb_start(struct rte_eth_dev *dev) (void *)dev); if (dev->data->dev_conf.intr_conf.lsc != 0) PMD_INIT_LOG(INFO, "lsc won't enable because of" - " no intr multiplex\n"); + " no intr multiplex"); } /* check if rxq interrupt is enabled */ @@ -1397,11 +1436,12 @@ eth_igb_stop(struct rte_eth_dev *dev) struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct e1000_filter_info *filter_info = E1000_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); struct rte_eth_link link; struct e1000_flex_filter *p_flex; struct e1000_5tuple_filter *p_5tuple, *p_5tuple_next; struct e1000_2tuple_filter *p_2tuple, *p_2tuple_next; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; igb_intr_disable(hw); @@ -1501,7 +1541,8 @@ eth_igb_close(struct rte_eth_dev *dev) struct e1000_adapter *adapter = E1000_DEV_PRIVATE(dev->data->dev_private); struct rte_eth_link link; - struct rte_pci_device *pci_dev; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; eth_igb_stop(dev); adapter->stopped = 1; @@ -1521,10 +1562,9 @@ eth_igb_close(struct rte_eth_dev *dev) igb_dev_free_queues(dev); - pci_dev = dev->pci_dev; - if (pci_dev->intr_handle.intr_vec) { - rte_free(pci_dev->intr_handle.intr_vec); - pci_dev->intr_handle.intr_vec = NULL; + if (intr_handle->intr_vec) { + rte_free(intr_handle->intr_vec); + intr_handle->intr_vec = NULL; } memset(&link, 0, sizeof(link)); @@ -1948,11 +1988,64 @@ eth_igbvf_stats_reset(struct rte_eth_dev *dev) offsetof(struct e1000_vf_stats, gprc)); } +static int +eth_igb_fw_version_get(struct rte_eth_dev *dev, char *fw_version, + size_t fw_size) +{ + struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct e1000_fw_version fw; + int ret; + + e1000_get_fw_version(hw, &fw); + + switch (hw->mac.type) { + case e1000_i210: + case e1000_i211: + if (!(e1000_get_flash_presence_i210(hw))) { + ret = snprintf(fw_version, fw_size, + "%2d.%2d-%d", + fw.invm_major, fw.invm_minor, + fw.invm_img_type); + break; + } + /* fall through */ + default: + /* if option rom is valid, display its version too */ + if (fw.or_valid) { + ret = snprintf(fw_version, fw_size, + "%d.%d, 0x%08x, %d.%d.%d", + fw.eep_major, fw.eep_minor, fw.etrack_id, + fw.or_major, fw.or_build, fw.or_patch); + /* no option rom */ + } else { + if (fw.etrack_id != 0X0000) { + ret = snprintf(fw_version, fw_size, + "%d.%d, 0x%08x", + fw.eep_major, fw.eep_minor, + fw.etrack_id); + } else { + ret = snprintf(fw_version, fw_size, + "%d.%d.%d", + fw.eep_major, fw.eep_minor, + fw.eep_build); + } + } + break; + } + + ret += 1; /* add the size of '\0' */ + if (fw_size < (u32)ret) + return ret; + else + return 0; +} + static void eth_igb_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */ dev_info->max_rx_pktlen = 0x3FFF; /* See RLPML register. */ dev_info->max_mac_addrs = hw->mac.rar_entry_count; @@ -2081,6 +2174,7 @@ eth_igbvf_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); dev_info->min_rx_bufsize = 256; /* See BSIZE field of RCTL register. */ dev_info->max_rx_pktlen = 0x3FFF; /* See RLPML register. */ dev_info->max_mac_addrs = hw->mac.rar_entry_count; @@ -2605,12 +2699,14 @@ eth_igb_interrupt_get_status(struct rte_eth_dev *dev) * - On failure, a negative value. */ static int -eth_igb_interrupt_action(struct rte_eth_dev *dev) +eth_igb_interrupt_action(struct rte_eth_dev *dev, + struct rte_intr_handle *intr_handle) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct e1000_interrupt *intr = E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private); + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); uint32_t tctl, rctl; struct rte_eth_link link; int ret; @@ -2621,7 +2717,7 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev) } igb_intr_enable(dev); - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(intr_handle); if (intr->flags & E1000_FLAG_NEED_LINK_UPDATE) { intr->flags &= ~E1000_FLAG_NEED_LINK_UPDATE; @@ -2649,10 +2745,10 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev) } PMD_INIT_LOG(DEBUG, "PCI Address: %04d:%02d:%02d:%d", - dev->pci_dev->addr.domain, - dev->pci_dev->addr.bus, - dev->pci_dev->addr.devid, - dev->pci_dev->addr.function); + pci_dev->addr.domain, + pci_dev->addr.bus, + pci_dev->addr.devid, + pci_dev->addr.function); tctl = E1000_READ_REG(hw, E1000_TCTL); rctl = E1000_READ_REG(hw, E1000_RCTL); if (link.link_status) { @@ -2667,7 +2763,7 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev) E1000_WRITE_REG(hw, E1000_TCTL, tctl); E1000_WRITE_REG(hw, E1000_RCTL, rctl); E1000_WRITE_FLUSH(hw); - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } return 0; @@ -2685,13 +2781,12 @@ eth_igb_interrupt_action(struct rte_eth_dev *dev) * void */ static void -eth_igb_interrupt_handler(__rte_unused struct rte_intr_handle *handle, - void *param) +eth_igb_interrupt_handler(struct rte_intr_handle *handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; eth_igb_interrupt_get_status(dev); - eth_igb_interrupt_action(dev); + eth_igb_interrupt_action(dev, handle); } static int @@ -2727,11 +2822,11 @@ void igbvf_mbx_process(struct rte_eth_dev *dev) /* PF reset VF event */ if (in_msg == E1000_PF_CONTROL_MSG) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, NULL); } static int -eth_igbvf_interrupt_action(struct rte_eth_dev *dev) +eth_igbvf_interrupt_action(struct rte_eth_dev *dev, struct rte_intr_handle *intr_handle) { struct e1000_interrupt *intr = E1000_DEV_PRIVATE_TO_INTR(dev->data->dev_private); @@ -2742,19 +2837,19 @@ eth_igbvf_interrupt_action(struct rte_eth_dev *dev) } igbvf_intr_enable(dev); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(intr_handle); return 0; } static void -eth_igbvf_interrupt_handler(__rte_unused struct rte_intr_handle *handle, +eth_igbvf_interrupt_handler(struct rte_intr_handle *handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; eth_igbvf_interrupt_get_status(dev); - eth_igbvf_interrupt_action(dev); + eth_igbvf_interrupt_action(dev, handle); } static int @@ -3027,8 +3122,9 @@ igbvf_dev_start(struct rte_eth_dev *dev) E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct e1000_adapter *adapter = E1000_DEV_PRIVATE(dev->data->dev_private); + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; int ret; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; uint32_t intr_vector = 0; PMD_INIT_FUNC_TRACE(); @@ -3063,7 +3159,7 @@ igbvf_dev_start(struct rte_eth_dev *dev) dev->data->nb_rx_queues * sizeof(int), 0); if (!intr_handle->intr_vec) { PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" - " intr_vec\n", dev->data->nb_rx_queues); + " intr_vec", dev->data->nb_rx_queues); return -ENOMEM; } } @@ -3082,7 +3178,8 @@ igbvf_dev_start(struct rte_eth_dev *dev) static void igbvf_dev_stop(struct rte_eth_dev *dev) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; PMD_INIT_FUNC_TRACE(); @@ -3281,7 +3378,7 @@ eth_igb_rss_reta_update(struct rte_eth_dev *dev, if (reta_size != ETH_RSS_RETA_SIZE_128) { PMD_DRV_LOG(ERR, "The size of hash lookup table configured " "(%d) doesn't match the number hardware can supported " - "(%d)\n", reta_size, ETH_RSS_RETA_SIZE_128); + "(%d)", reta_size, ETH_RSS_RETA_SIZE_128); return -EINVAL; } @@ -3322,7 +3419,7 @@ eth_igb_rss_reta_query(struct rte_eth_dev *dev, if (reta_size != ETH_RSS_RETA_SIZE_128) { PMD_DRV_LOG(ERR, "The size of hash lookup table configured " "(%d) doesn't match the number hardware can supported " - "(%d)\n", reta_size, ETH_RSS_RETA_SIZE_128); + "(%d)", reta_size, ETH_RSS_RETA_SIZE_128); return -EINVAL; } @@ -3443,7 +3540,7 @@ eth_igb_syn_filter_handle(struct rte_eth_dev *dev, (struct rte_eth_syn_filter *)arg); break; default: - PMD_DRV_LOG(ERR, "unsupported operation %u\n", filter_op); + PMD_DRV_LOG(ERR, "unsupported operation %u", filter_op); ret = -EINVAL; break; } @@ -5049,16 +5146,6 @@ eth_igb_set_eeprom(struct rte_eth_dev *dev, return nvm->ops.write(hw, first, length, data); } -static struct rte_driver pmd_igb_drv = { - .type = PMD_PDEV, - .init = rte_igb_pmd_init, -}; - -static struct rte_driver pmd_igbvf_drv = { - .type = PMD_PDEV, - .init = rte_igbvf_pmd_init, -}; - static int eth_igb_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) { @@ -5077,6 +5164,8 @@ eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) { struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t mask = 1 << queue_id; uint32_t regval; @@ -5084,7 +5173,7 @@ eth_igb_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) E1000_WRITE_REG(hw, E1000_EIMS, regval | mask); E1000_WRITE_FLUSH(hw); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(intr_handle); return 0; } @@ -5148,8 +5237,8 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev) uint32_t vec = E1000_MISC_VEC_ID; uint32_t base = E1000_MISC_VEC_ID; uint32_t misc_shift = 0; - - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; /* won't configure msix register if no mapping is done * between intr vector and event fd @@ -5220,7 +5309,9 @@ eth_igb_configure_msix_intr(struct rte_eth_dev *dev) E1000_WRITE_FLUSH(hw); } -PMD_REGISTER_DRIVER(pmd_igb_drv, igb); -DRIVER_REGISTER_PCI_TABLE(igb, pci_id_igb_map); -PMD_REGISTER_DRIVER(pmd_igbvf_drv, igbvf); -DRIVER_REGISTER_PCI_TABLE(igbvf, pci_id_igbvf_map); +RTE_PMD_REGISTER_PCI(net_e1000_igb, rte_igb_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_e1000_igb, pci_id_igb_map); +RTE_PMD_REGISTER_KMOD_DEP(net_e1000_igb, "* igb_uio | uio_pci_generic | vfio"); +RTE_PMD_REGISTER_PCI(net_e1000_igb_vf, rte_igbvf_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_e1000_igb_vf, pci_id_igbvf_map); +RTE_PMD_REGISTER_KMOD_DEP(net_e1000_igb_vf, "* igb_uio | vfio"); diff --git a/src/dpdk/drivers/net/e1000/igb_pf.c b/src/dpdk/drivers/net/e1000/igb_pf.c index 5845bc22..67da3c24 100644 --- a/src/dpdk/drivers/net/e1000/igb_pf.c +++ b/src/dpdk/drivers/net/e1000/igb_pf.c @@ -57,7 +57,9 @@ static inline uint16_t dev_num_vf(struct rte_eth_dev *eth_dev) { - return eth_dev->pci_dev->max_vfs; + struct rte_pci_device *pci_dev = E1000_DEV_TO_PCI(eth_dev); + + return pci_dev->max_vfs; } static inline diff --git a/src/dpdk/drivers/net/e1000/igb_rxtx.c b/src/dpdk/drivers/net/e1000/igb_rxtx.c index 9d80a0b3..45f3f249 100644 --- a/src/dpdk/drivers/net/e1000/igb_rxtx.c +++ b/src/dpdk/drivers/net/e1000/igb_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -56,7 +56,6 @@ #include <rte_lcore.h> #include <rte_atomic.h> #include <rte_branch_prediction.h> -#include <rte_ring.h> #include <rte_mempool.h> #include <rte_malloc.h> #include <rte_mbuf.h> @@ -66,6 +65,7 @@ #include <rte_udp.h> #include <rte_tcp.h> #include <rte_sctp.h> +#include <rte_net.h> #include <rte_string_fns.h> #include "e1000_logs.h" @@ -79,6 +79,9 @@ PKT_TX_L4_MASK | \ PKT_TX_TCP_SEG) +#define IGB_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK) + /** * Structure associated with each descriptor of the RX ring of a RX queue. */ @@ -606,7 +609,7 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, /* * Set the Transmit Descriptor Tail (TDT). */ - E1000_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id); + E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id); PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u", (unsigned) txq->port_id, (unsigned) txq->queue_id, (unsigned) tx_id, (unsigned) nb_tx); @@ -617,6 +620,52 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, /********************************************************************* * + * TX prep functions + * + **********************************************************************/ +uint16_t +eth_igb_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + /* Check some limitations for TSO in hardware */ + if (m->ol_flags & PKT_TX_TCP_SEG) + if ((m->tso_segsz > IGB_TSO_MAX_MSS) || + (m->l2_len + m->l3_len + m->l4_len > + IGB_TSO_MAX_HDRLEN)) { + rte_errno = -EINVAL; + return i; + } + + if (m->ol_flags & IGB_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +/********************************************************************* + * * RX functions * **********************************************************************/ @@ -748,7 +797,9 @@ rx_desc_error_to_pkt_flags(uint32_t rx_status) */ static uint64_t error_to_pkt_flags_map[4] = { - 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD, PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD }; return error_to_pkt_flags_map[(rx_status >> @@ -1363,6 +1414,7 @@ eth_igb_tx_queue_setup(struct rte_eth_dev *dev, igb_reset_tx_queue(txq, dev); dev->tx_pkt_burst = eth_igb_xmit_pkts; + dev->tx_pkt_prepare = ð_igb_prep_pkts; dev->data->tx_queues[queue_idx] = txq; return 0; @@ -1528,7 +1580,7 @@ eth_igb_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) desc - rxq->nb_rx_desc]); } - return 0; + return desc; } int diff --git a/src/dpdk/drivers/net/enic/base/vnic_dev.c b/src/dpdk/drivers/net/enic/base/vnic_dev.c index dddb1dcd..84e4840a 100644 --- a/src/dpdk/drivers/net/enic/base/vnic_dev.c +++ b/src/dpdk/drivers/net/enic/base/vnic_dev.c @@ -266,32 +266,35 @@ void vnic_dev_clear_desc_ring(struct vnic_dev_ring *ring) memset(ring->descs, 0, ring->size); } -int vnic_dev_alloc_desc_ring(__attribute__((unused)) struct vnic_dev *vdev, +int vnic_dev_alloc_desc_ring(struct vnic_dev *vdev, struct vnic_dev_ring *ring, - unsigned int desc_count, unsigned int desc_size, unsigned int socket_id, + unsigned int desc_count, unsigned int desc_size, + __attribute__((unused)) unsigned int socket_id, char *z_name) { - const struct rte_memzone *rz; + void *alloc_addr = NULL; + dma_addr_t alloc_pa = 0; vnic_dev_desc_ring_size(ring, desc_count, desc_size); - - rz = rte_memzone_reserve_aligned(z_name, - ring->size_unaligned, socket_id, - 0, ENIC_ALIGN); - if (!rz) { + alloc_addr = vdev->alloc_consistent(vdev->priv, + ring->size_unaligned, + &alloc_pa, (u8 *)z_name); + if (!alloc_addr) { pr_err("Failed to allocate ring (size=%d), aborting\n", (int)ring->size); return -ENOMEM; } - - ring->descs_unaligned = rz->addr; - if (!ring->descs_unaligned) { + ring->descs_unaligned = alloc_addr; + if (!alloc_pa) { pr_err("Failed to map allocated ring (size=%d), aborting\n", (int)ring->size); + vdev->free_consistent(vdev->priv, + ring->size_unaligned, + alloc_addr, + alloc_pa); return -ENOMEM; } - - ring->base_addr_unaligned = (dma_addr_t)rz->phys_addr; + ring->base_addr_unaligned = alloc_pa; ring->base_addr = VNIC_ALIGN(ring->base_addr_unaligned, ring->base_align); @@ -308,8 +311,13 @@ int vnic_dev_alloc_desc_ring(__attribute__((unused)) struct vnic_dev *vdev, void vnic_dev_free_desc_ring(__attribute__((unused)) struct vnic_dev *vdev, struct vnic_dev_ring *ring) { - if (ring->descs) + if (ring->descs) { + vdev->free_consistent(vdev->priv, + ring->size_unaligned, + ring->descs_unaligned, + ring->base_addr_unaligned); ring->descs = NULL; + } } static int _vnic_dev_cmd(struct vnic_dev *vdev, enum vnic_devcmd_cmd cmd, @@ -668,7 +676,6 @@ int vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast, (allmulti ? CMD_PFILTER_ALL_MULTICAST : 0); err = vnic_dev_cmd(vdev, CMD_PACKET_FILTER, &a0, &a1, wait); - if (err) pr_err("Can't set packet filter\n"); diff --git a/src/dpdk/drivers/net/enic/base/vnic_rq.c b/src/dpdk/drivers/net/enic/base/vnic_rq.c index 0e700a12..10a40c1b 100644 --- a/src/dpdk/drivers/net/enic/base/vnic_rq.c +++ b/src/dpdk/drivers/net/enic/base/vnic_rq.c @@ -87,9 +87,11 @@ void vnic_rq_init_start(struct vnic_rq *rq, unsigned int cq_index, iowrite32(0, &rq->ctrl->error_status); iowrite32(fetch_index, &rq->ctrl->fetch_index); iowrite32(posted_index, &rq->ctrl->posted_index); - if (rq->is_sop) - iowrite32(((rq->is_sop << 10) | rq->data_queue_idx), + if (rq->data_queue_enable) + iowrite32(((1 << 10) | rq->data_queue_idx), &rq->ctrl->data_ring); + else + iowrite32(0, &rq->ctrl->data_ring); } void vnic_rq_init(struct vnic_rq *rq, unsigned int cq_index, diff --git a/src/dpdk/drivers/net/enic/base/vnic_rq.h b/src/dpdk/drivers/net/enic/base/vnic_rq.h index fd9e1704..f3fd39f7 100644 --- a/src/dpdk/drivers/net/enic/base/vnic_rq.h +++ b/src/dpdk/drivers/net/enic/base/vnic_rq.h @@ -91,11 +91,13 @@ struct vnic_rq { uint16_t rxst_idx; uint32_t tot_pkts; uint16_t data_queue_idx; + uint8_t data_queue_enable; uint8_t is_sop; uint8_t in_use; struct rte_mbuf *pkt_first_seg; struct rte_mbuf *pkt_last_seg; unsigned int max_mbufs_per_pkt; + uint16_t tot_nb_desc; }; static inline unsigned int vnic_rq_desc_avail(struct vnic_rq *rq) diff --git a/src/dpdk/drivers/net/enic/enic.h b/src/dpdk/drivers/net/enic/enic.h index 9117cc76..a4540178 100644 --- a/src/dpdk/drivers/net/enic/enic.h +++ b/src/dpdk/drivers/net/enic/enic.h @@ -60,6 +60,7 @@ #define ENIC_RQ_MAX 16 #define ENIC_CQ_MAX (ENIC_WQ_MAX + (ENIC_RQ_MAX / 2)) #define ENIC_INTR_MAX (ENIC_CQ_MAX + 2) +#define ENIC_MAX_MAC_ADDR 64 #define VLAN_ETH_HLEN 18 @@ -97,13 +98,11 @@ struct enic_fdir { void (*copy_fltr_fn)(struct filter_v2 *filt, struct rte_eth_fdir_input *input, struct rte_eth_fdir_masks *masks); - }; struct enic_soft_stats { rte_atomic64_t rx_nombuf; rte_atomic64_t rx_packet_errors; - rte_atomic64_t tx_oversized; }; struct enic_memzone_entry { @@ -168,17 +167,32 @@ struct enic { /* linked list storing memory allocations */ LIST_HEAD(enic_memzone_list, enic_memzone_entry) memzone_list; rte_spinlock_t memzone_list_lock; + rte_spinlock_t mtu_lock; }; -static inline unsigned int enic_sop_rq(unsigned int rq) +/* Get the CQ index from a Start of Packet(SOP) RQ index */ +static inline unsigned int enic_sop_rq_idx_to_cq_idx(unsigned int sop_idx) { - return rq * 2; + return sop_idx / 2; } -static inline unsigned int enic_data_rq(unsigned int rq) +/* Get the RTE RQ index from a Start of Packet(SOP) RQ index */ +static inline unsigned int enic_sop_rq_idx_to_rte_idx(unsigned int sop_idx) { - return rq * 2 + 1; + return sop_idx / 2; +} + +/* Get the Start of Packet(SOP) RQ index from a RTE RQ index */ +static inline unsigned int enic_rte_rq_idx_to_sop_idx(unsigned int rte_idx) +{ + return rte_idx * 2; +} + +/* Get the Data RQ index from a RTE RQ index */ +static inline unsigned int enic_rte_rq_idx_to_data_idx(unsigned int rte_idx) +{ + return rte_idx * 2 + 1; } static inline unsigned int enic_vnic_rq_count(struct enic *enic) @@ -252,7 +266,7 @@ extern int enic_stop_rq(struct enic *enic, uint16_t queue_idx); extern void enic_free_rq(void *rxq); extern int enic_alloc_rq(struct enic *enic, uint16_t queue_idx, unsigned int socket_id, struct rte_mempool *mp, - uint16_t nb_desc); + uint16_t nb_desc, uint16_t free_thresh); extern int enic_set_rss_nic_cfg(struct enic *enic); extern int enic_set_vnic_res(struct enic *enic); extern void enic_set_hdr_split_size(struct enic *enic, u16 split_hdr_size); @@ -264,8 +278,8 @@ extern void enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats); extern void enic_dev_stats_clear(struct enic *enic); extern void enic_add_packet_filter(struct enic *enic); -extern void enic_set_mac_address(struct enic *enic, uint8_t *mac_addr); -extern void enic_del_mac_address(struct enic *enic); +void enic_set_mac_address(struct enic *enic, uint8_t *mac_addr); +void enic_del_mac_address(struct enic *enic, int mac_index); extern unsigned int enic_cleanup_wq(struct enic *enic, struct vnic_wq *wq); extern void enic_send_pkt(struct enic *enic, struct vnic_wq *wq, struct rte_mbuf *tx_pkt, unsigned short len, @@ -278,14 +292,17 @@ extern int enic_clsf_init(struct enic *enic); extern void enic_clsf_destroy(struct enic *enic); uint16_t enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); +uint16_t enic_dummy_recv_pkts(__rte_unused void *rx_queue, + __rte_unused struct rte_mbuf **rx_pkts, + __rte_unused uint16_t nb_pkts); uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); int enic_set_mtu(struct enic *enic, uint16_t new_mtu); +int enic_link_update(struct enic *enic); void enic_fdir_info(struct enic *enic); void enic_fdir_info_get(struct enic *enic, struct rte_eth_fdir_info *stats); void copy_fltr_v1(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, - struct rte_eth_fdir_masks *masks); -void copy_fltr_v2(__rte_unused struct filter_v2 *fltr, - __rte_unused struct rte_eth_fdir_input *input, __rte_unused struct rte_eth_fdir_masks *masks); +void copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, + struct rte_eth_fdir_masks *masks); #endif /* _ENIC_H_ */ diff --git a/src/dpdk/drivers/net/enic/enic_clsf.c b/src/dpdk/drivers/net/enic/enic_clsf.c index 1610951d..bcf479ac 100644 --- a/src/dpdk/drivers/net/enic/enic_clsf.c +++ b/src/dpdk/drivers/net/enic/enic_clsf.c @@ -74,7 +74,7 @@ void enic_fdir_stats_get(struct enic *enic, struct rte_eth_fdir_stats *stats) void enic_fdir_info_get(struct enic *enic, struct rte_eth_fdir_info *info) { - info->mode = enic->fdir.modes; + info->mode = (enum rte_fdir_mode)enic->fdir.modes; info->flow_types_mask[0] = enic->fdir.types_mask; } @@ -107,7 +107,6 @@ enic_set_layer(struct filter_generic_1 *gp, unsigned int flag, memcpy(gp->layer[layer].val, val, len); } - /* Copy Flow Director filter to a VIC ipv4 filter (for Cisco VICs * without advanced filter support. */ @@ -133,28 +132,6 @@ copy_fltr_v1(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, fltr->u.ipv4.flags = FILTER_FIELDS_IPV4_5TUPLE; } -#define TREX_PATCH -#ifdef TREX_PATCH -void -copy_fltr_recv_all(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, - struct rte_eth_fdir_masks *masks) { - struct filter_generic_1 *gp = &fltr->u.generic_1; - memset(gp, 0, sizeof(*gp)); - - struct ether_hdr eth_mask, eth_val; - memset(ð_mask, 0, sizeof(eth_mask)); - memset(ð_val, 0, sizeof(eth_val)); - - eth_val.ether_type = 0xdead; - eth_mask.ether_type = 0; - - gp->position = 0; - enic_set_layer(gp, 0, FILTER_GENERIC_1_L2, - ð_mask, ð_val, sizeof(struct ether_hdr)); - -} -#endif - /* Copy Flow Director filter to a VIC generic filter (requires advanced * filter support. */ @@ -165,15 +142,8 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, struct filter_generic_1 *gp = &fltr->u.generic_1; int i; - RTE_ASSERT(enic->adv_filters); - fltr->type = FILTER_DPDK_1; memset(gp, 0, sizeof(*gp)); -#ifdef TREX_PATCH - // important for this to be below 2. - // If added with position 2, IPv6 UDP and ICMP seems to be caught by some other rule - gp->position = 1; -#endif if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_UDP) { struct udp_hdr udp_mask, udp_val; @@ -185,7 +155,7 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, udp_val.src_port = input->flow.udp4_flow.src_port; } if (input->flow.udp4_flow.dst_port) { - udp_mask.src_port = masks->dst_port_mask; + udp_mask.dst_port = masks->dst_port_mask; udp_val.dst_port = input->flow.udp4_flow.dst_port; } @@ -241,13 +211,9 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, memset(&ip4_val, 0, sizeof(struct ipv4_hdr)); if (input->flow.ip4_flow.tos) { - ip4_mask.type_of_service = masks->ipv4_mask.tos; + ip4_mask.type_of_service = 0xff; ip4_val.type_of_service = input->flow.ip4_flow.tos; } - if (input->flow.ip4_flow.ip_id) { - ip4_mask.packet_id = 0xffff; - ip4_val.packet_id = input->flow.ip4_flow.ip_id; - } if (input->flow.ip4_flow.ttl) { ip4_mask.time_to_live = 0xff; ip4_val.time_to_live = input->flow.ip4_flow.ttl; @@ -333,7 +299,7 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, memset(&ipv6_val, 0, sizeof(struct ipv6_hdr)); if (input->flow.ipv6_flow.proto) { - ipv6_mask.proto = masks->ipv6_mask.proto; + ipv6_mask.proto = 0xff; ipv6_val.proto = input->flow.ipv6_flow.proto; } for (i = 0; i < 4; i++) { @@ -349,8 +315,8 @@ copy_fltr_v2(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, input->flow.ipv6_flow.dst_ip[i]; } if (input->flow.ipv6_flow.tc) { - ipv6_mask.vtc_flow = ((uint32_t)masks->ipv6_mask.tc<<12); - ipv6_val.vtc_flow = input->flow.ipv6_flow.tc << 12; + ipv6_mask.vtc_flow = 0x00ff0000; + ipv6_val.vtc_flow = input->flow.ipv6_flow.tc << 16; } if (input->flow.ipv6_flow.hop_limits) { ipv6_mask.hop_limits = 0xff; @@ -372,11 +338,7 @@ int enic_fdir_del_fltr(struct enic *enic, struct rte_eth_fdir_filter *params) case -EINVAL: case -ENOENT: enic->fdir.stats.f_remove++; -#ifdef TREX_PATCH - return pos; -#else return -EINVAL; -#endif default: /* The entry is present in the table */ key = enic->fdir.nodes[pos]; @@ -420,7 +382,7 @@ int enic_fdir_add_fltr(struct enic *enic, struct rte_eth_fdir_filter *params) } /* Get the enicpmd RQ from the DPDK Rx queue */ - queue = enic_sop_rq(params->action.rx_queue); + queue = enic_rte_rq_idx_to_sop_idx(params->action.rx_queue); if (!enic->rq[queue].in_use) return -EINVAL; @@ -487,18 +449,8 @@ int enic_fdir_add_fltr(struct enic *enic, struct rte_eth_fdir_filter *params) key->filter = *params; key->rq_index = queue; -#ifdef TREX_PATCH - switch (params->soft_id) { - case 100: - copy_fltr_recv_all(&fltr, ¶ms->input, &enic->rte_dev->data->dev_conf.fdir_conf.mask); - break; - default: -#endif - enic->fdir.copy_fltr_fn(&fltr, ¶ms->input, - &enic->rte_dev->data->dev_conf.fdir_conf.mask); -#ifdef TREX_PATCH - } -#endif + enic->fdir.copy_fltr_fn(&fltr, ¶ms->input, + &enic->rte_dev->data->dev_conf.fdir_conf.mask); if (!vnic_dev_classifier(enic->vdev, CLSF_ADD, &queue, &fltr)) { key->fltr_id = queue; diff --git a/src/dpdk/drivers/net/enic/enic_compat.h b/src/dpdk/drivers/net/enic/enic_compat.h index 5dbd983b..fc58bb41 100644 --- a/src/dpdk/drivers/net/enic/enic_compat.h +++ b/src/dpdk/drivers/net/enic/enic_compat.h @@ -41,6 +41,7 @@ #include <rte_atomic.h> #include <rte_malloc.h> #include <rte_log.h> +#include <rte_io.h> #define ENIC_PAGE_ALIGN 4096UL #define ENIC_ALIGN ENIC_PAGE_ALIGN @@ -95,42 +96,52 @@ typedef unsigned long long dma_addr_t; static inline uint32_t ioread32(volatile void *addr) { - return *(volatile uint32_t *)addr; + return rte_read32(addr); } static inline uint16_t ioread16(volatile void *addr) { - return *(volatile uint16_t *)addr; + return rte_read16(addr); } static inline uint8_t ioread8(volatile void *addr) { - return *(volatile uint8_t *)addr; + return rte_read8(addr); } static inline void iowrite32(uint32_t val, volatile void *addr) { - *(volatile uint32_t *)addr = val; + rte_write32(val, addr); +} + +static inline void iowrite32_relaxed(uint32_t val, volatile void *addr) +{ + rte_write32_relaxed(val, addr); } static inline void iowrite16(uint16_t val, volatile void *addr) { - *(volatile uint16_t *)addr = val; + rte_write16(val, addr); } static inline void iowrite8(uint8_t val, volatile void *addr) { - *(volatile uint8_t *)addr = val; + rte_write8(val, addr); } static inline unsigned int readl(volatile void __iomem *addr) { - return *(volatile unsigned int *)addr; + return rte_read32(addr); +} + +static inline unsigned int readl_relaxed(volatile void __iomem *addr) +{ + return rte_read32_relaxed(addr); } static inline void writel(unsigned int val, volatile void __iomem *addr) { - *(volatile unsigned int *)addr = val; + rte_write32(val, addr); } #define min_t(type, x, y) ({ \ diff --git a/src/dpdk/drivers/net/enic/enic_ethdev.c b/src/dpdk/drivers/net/enic/enic_ethdev.c index 6a86e23f..bffa8700 100644 --- a/src/dpdk/drivers/net/enic/enic_ethdev.c +++ b/src/dpdk/drivers/net/enic/enic_ethdev.c @@ -154,7 +154,7 @@ static int enicpmd_dev_setup_intr(struct enic *enic) return 0; /* check start of packet (SOP) RQs only in case scatter is disabled. */ for (index = 0; index < enic->rq_count; index++) { - if (!enic->rq[enic_sop_rq(index)].ctrl) + if (!enic->rq[enic_rte_rq_idx_to_sop_idx(index)].ctrl) break; } if (enic->rq_count != index) @@ -262,6 +262,35 @@ static void enicpmd_dev_rx_queue_release(void *rxq) enic_free_rq(rxq); } +static uint32_t enicpmd_dev_rx_queue_count(struct rte_eth_dev *dev, + uint16_t rx_queue_id) +{ + struct enic *enic = pmd_priv(dev); + uint32_t queue_count = 0; + struct vnic_cq *cq; + uint32_t cq_tail; + uint16_t cq_idx; + int rq_num; + + if (rx_queue_id >= dev->data->nb_rx_queues) { + dev_err(enic, "Invalid RX queue id=%d", rx_queue_id); + return 0; + } + + rq_num = enic_rte_rq_idx_to_sop_idx(rx_queue_id); + cq = &enic->cq[enic_cq_rq(enic, rq_num)]; + cq_idx = cq->to_clean; + + cq_tail = ioread32(&cq->ctrl->cq_tail); + + if (cq_tail < cq_idx) + cq_tail += cq->ring.desc_count; + + queue_count = cq_tail - cq_idx; + + return queue_count; +} + static int enicpmd_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t queue_idx, uint16_t nb_desc, @@ -284,18 +313,15 @@ static int enicpmd_dev_rx_queue_setup(struct rte_eth_dev *eth_dev, } eth_dev->data->rx_queues[queue_idx] = - (void *)&enic->rq[enic_sop_rq(queue_idx)]; + (void *)&enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)]; - ret = enic_alloc_rq(enic, queue_idx, socket_id, mp, nb_desc); + ret = enic_alloc_rq(enic, queue_idx, socket_id, mp, nb_desc, + rx_conf->rx_free_thresh); if (ret) { dev_err(enic, "error in allocating rq\n"); return ret; } - enic->rq[queue_idx].rx_free_thresh = rx_conf->rx_free_thresh; - dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx, - enic->rq[queue_idx].rx_free_thresh); - return enicpmd_dev_setup_intr(enic); } @@ -405,17 +431,9 @@ static int enicpmd_dev_link_update(struct rte_eth_dev *eth_dev, __rte_unused int wait_to_complete) { struct enic *enic = pmd_priv(eth_dev); - int ret; - int link_status = 0; ENICPMD_FUNC_TRACE(); - link_status = enic_get_link_status(enic); - ret = (link_status == enic->link_status); - enic->link_status = link_status; - eth_dev->data->dev_link.link_status = link_status; - eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX; - eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev); - return ret; + return enic_link_update(enic); } static void enicpmd_dev_stats_get(struct rte_eth_dev *eth_dev, @@ -435,22 +453,19 @@ static void enicpmd_dev_stats_reset(struct rte_eth_dev *eth_dev) enic_dev_stats_clear(enic); } - - - static void enicpmd_dev_info_get(struct rte_eth_dev *eth_dev, struct rte_eth_dev_info *device_info) { struct enic *enic = pmd_priv(eth_dev); ENICPMD_FUNC_TRACE(); + device_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device); /* Scattered Rx uses two receive queues per rx queue exposed to dpdk */ device_info->max_rx_queues = enic->conf_rq_count / 2; device_info->max_tx_queues = enic->conf_wq_count; device_info->min_rx_bufsize = ENIC_MIN_MTU; - device_info->max_rx_pktlen = enic->rte_dev->data->mtu - + ETHER_HDR_LEN + 4; - device_info->max_mac_addrs = 1; + device_info->max_rx_pktlen = enic->max_mtu + ETHER_HDR_LEN + 4; + device_info->max_mac_addrs = ENIC_MAX_MAC_ADDR; device_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP | DEV_RX_OFFLOAD_IPV4_CKSUM | @@ -460,17 +475,18 @@ static void enicpmd_dev_info_get(struct rte_eth_dev *eth_dev, DEV_TX_OFFLOAD_VLAN_INSERT | DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM | - DEV_TX_OFFLOAD_TCP_CKSUM; + DEV_TX_OFFLOAD_TCP_CKSUM | + DEV_TX_OFFLOAD_TCP_TSO; device_info->default_rxconf = (struct rte_eth_rxconf) { .rx_free_thresh = ENIC_DEFAULT_RX_FREE_THRESH }; - - device_info->speed_capa = ETH_LINK_SPEED_40G; } static const uint32_t *enicpmd_dev_supported_ptypes_get(struct rte_eth_dev *dev) { static const uint32_t ptypes[] = { + RTE_PTYPE_L2_ETHER, + RTE_PTYPE_L2_ETHER_VLAN, RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, RTE_PTYPE_L4_TCP, @@ -531,12 +547,12 @@ static void enicpmd_add_mac_addr(struct rte_eth_dev *eth_dev, enic_set_mac_address(enic, mac_addr->addr_bytes); } -static void enicpmd_remove_mac_addr(struct rte_eth_dev *eth_dev, __rte_unused uint32_t index) +static void enicpmd_remove_mac_addr(struct rte_eth_dev *eth_dev, uint32_t index) { struct enic *enic = pmd_priv(eth_dev); ENICPMD_FUNC_TRACE(); - enic_del_mac_address(enic); + enic_del_mac_address(enic, index); } static int enicpmd_mtu_set(struct rte_eth_dev *eth_dev, uint16_t mtu) @@ -575,7 +591,7 @@ static const struct eth_dev_ops enicpmd_eth_dev_ops = { .tx_queue_stop = enicpmd_dev_tx_queue_stop, .rx_queue_setup = enicpmd_dev_rx_queue_setup, .rx_queue_release = enicpmd_dev_rx_queue_release, - .rx_queue_count = NULL, + .rx_queue_count = enicpmd_dev_rx_queue_count, .rx_descriptor_done = NULL, .tx_queue_setup = enicpmd_dev_tx_queue_setup, .tx_queue_release = enicpmd_dev_tx_queue_release, @@ -607,7 +623,7 @@ static int eth_enicpmd_dev_init(struct rte_eth_dev *eth_dev) eth_dev->rx_pkt_burst = &enic_recv_pkts; eth_dev->tx_pkt_burst = &enic_xmit_pkts; - pdev = eth_dev->pci_dev; + pdev = RTE_DEV_TO_PCI(eth_dev->device); rte_eth_copy_pci_info(eth_dev, pdev); enic->pdev = pdev; addr = &pdev->addr; @@ -620,32 +636,15 @@ static int eth_enicpmd_dev_init(struct rte_eth_dev *eth_dev) static struct eth_driver rte_enic_pmd = { .pci_drv = { - .name = "rte_enic_pmd", .id_table = pci_id_enic_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_enicpmd_dev_init, .dev_private_size = sizeof(struct enic), }; -/* Driver initialization routine. - * Invoked once at EAL init time. - * Register as the [Poll Mode] Driver of Cisco ENIC device. - */ -static int -rte_enic_pmd_init(__rte_unused const char *name, - __rte_unused const char *params) -{ - ENICPMD_FUNC_TRACE(); - - rte_eth_driver_register(&rte_enic_pmd); - return 0; -} - -static struct rte_driver rte_enic_driver = { - .type = PMD_PDEV, - .init = rte_enic_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_enic_driver, enic); -DRIVER_REGISTER_PCI_TABLE(enic, pci_id_enic_map); +RTE_PMD_REGISTER_PCI(net_enic, rte_enic_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_enic, pci_id_enic_map); +RTE_PMD_REGISTER_KMOD_DEP(net_enic, "* igb_uio | uio_pci_generic | vfio"); diff --git a/src/dpdk/drivers/net/enic/enic_main.c b/src/dpdk/drivers/net/enic/enic_main.c index 4530dcf4..21e8edeb 100644 --- a/src/dpdk/drivers/net/enic/enic_main.c +++ b/src/dpdk/drivers/net/enic/enic_main.c @@ -137,7 +137,6 @@ static void enic_clear_soft_stats(struct enic *enic) struct enic_soft_stats *soft_stats = &enic->soft_stats; rte_atomic64_clear(&soft_stats->rx_nombuf); rte_atomic64_clear(&soft_stats->rx_packet_errors); - rte_atomic64_clear(&soft_stats->tx_oversized); } static void enic_init_soft_stats(struct enic *enic) @@ -145,7 +144,6 @@ static void enic_init_soft_stats(struct enic *enic) struct enic_soft_stats *soft_stats = &enic->soft_stats; rte_atomic64_init(&soft_stats->rx_nombuf); rte_atomic64_init(&soft_stats->rx_packet_errors); - rte_atomic64_init(&soft_stats->tx_oversized); enic_clear_soft_stats(enic); } @@ -168,7 +166,6 @@ void enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats) return; } - /* The number of truncated packets can only be calculated by * subtracting a hardware counter from error packets received by * the driver. Note: this causes transient inaccuracies in the @@ -177,26 +174,28 @@ void enic_dev_stats_get(struct enic *enic, struct rte_eth_stats *r_stats) * which can make ibytes be slightly higher than it should be. */ rx_packet_errors = rte_atomic64_read(&soft_stats->rx_packet_errors); - rx_truncated = rx_packet_errors - stats->rx.rx_errors - - stats->rx.rx_no_bufs; + rx_truncated = rx_packet_errors - stats->rx.rx_errors; r_stats->ipackets = stats->rx.rx_frames_ok - rx_truncated; r_stats->opackets = stats->tx.tx_frames_ok; - r_stats->ibytes = stats->rx.rx_unicast_bytes_ok+stats->rx.rx_multicast_bytes_ok+stats->rx.rx_broadcast_bytes_ok; + r_stats->ibytes = stats->rx.rx_bytes_ok; r_stats->obytes = stats->tx.tx_bytes_ok; r_stats->ierrors = stats->rx.rx_errors + stats->rx.rx_drop; - r_stats->oerrors = stats->tx.tx_errors + rte_atomic64_read(&soft_stats->tx_oversized); + r_stats->oerrors = stats->tx.tx_errors; r_stats->imissed = stats->rx.rx_no_bufs + rx_truncated; r_stats->rx_nombuf = rte_atomic64_read(&soft_stats->rx_nombuf); } -void enic_del_mac_address(struct enic *enic) +void enic_del_mac_address(struct enic *enic, int mac_index) { - if (vnic_dev_del_addr(enic->vdev, enic->mac_addr)) + struct rte_eth_dev *eth_dev = enic->rte_dev; + uint8_t *mac_addr = eth_dev->data->mac_addrs[mac_index].addr_bytes; + + if (vnic_dev_del_addr(enic->vdev, mac_addr)) dev_err(enic, "del mac addr failed\n"); } @@ -209,15 +208,6 @@ void enic_set_mac_address(struct enic *enic, uint8_t *mac_addr) return; } - err = vnic_dev_del_addr(enic->vdev, enic->mac_addr); - if (err) { - dev_err(enic, "del mac addr failed\n"); - return; - } - - ether_addr_copy((struct ether_addr *)mac_addr, - (struct ether_addr *)enic->mac_addr); - err = vnic_dev_add_addr(enic->vdev, mac_addr); if (err) { dev_err(enic, "add mac addr failed\n"); @@ -244,14 +234,14 @@ void enic_init_vnic_resources(struct enic *enic) struct vnic_rq *data_rq; for (index = 0; index < enic->rq_count; index++) { - cq_idx = enic_cq_rq(enic, enic_sop_rq(index)); + cq_idx = enic_cq_rq(enic, enic_rte_rq_idx_to_sop_idx(index)); - vnic_rq_init(&enic->rq[enic_sop_rq(index)], + vnic_rq_init(&enic->rq[enic_rte_rq_idx_to_sop_idx(index)], cq_idx, error_interrupt_enable, error_interrupt_offset); - data_rq = &enic->rq[enic_data_rq(index)]; + data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(index)]; if (data_rq->in_use) vnic_rq_init(data_rq, cq_idx, @@ -414,14 +404,32 @@ enic_free_consistent(void *priv, rte_free(mze); } +int enic_link_update(struct enic *enic) +{ + struct rte_eth_dev *eth_dev = enic->rte_dev; + int ret; + int link_status = 0; + + link_status = enic_get_link_status(enic); + ret = (link_status == enic->link_status); + enic->link_status = link_status; + eth_dev->data->dev_link.link_status = link_status; + eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX; + eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev); + return ret; +} + static void enic_intr_handler(__rte_unused struct rte_intr_handle *handle, void *arg) { - struct enic *enic = pmd_priv((struct rte_eth_dev *)arg); + struct rte_eth_dev *dev = (struct rte_eth_dev *)arg; + struct enic *enic = pmd_priv(dev); vnic_intr_return_all_credits(&enic->intr); + enic_link_update(enic); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); enic_log_q_error(enic); } @@ -433,7 +441,13 @@ int enic_enable(struct enic *enic) eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev); eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX; - vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */ + + /* vnic notification of link status has already been turned on in + * enic_dev_init() which is called during probe time. Here we are + * just turning on interrupt vector 0 if needed. + */ + if (eth_dev->data->dev_conf.intr_conf.lsc) + vnic_dev_notify_set(enic->vdev, 0); if (enic_clsf_init(enic)) dev_warning(enic, "Init of hash table for clsf failed."\ @@ -441,17 +455,17 @@ int enic_enable(struct enic *enic) for (index = 0; index < enic->rq_count; index++) { err = enic_alloc_rx_queue_mbufs(enic, - &enic->rq[enic_sop_rq(index)]); + &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]); if (err) { dev_err(enic, "Failed to alloc sop RX queue mbufs\n"); return err; } err = enic_alloc_rx_queue_mbufs(enic, - &enic->rq[enic_data_rq(index)]); + &enic->rq[enic_rte_rq_idx_to_data_idx(index)]); if (err) { /* release the allocated mbufs for the sop rq*/ enic_rxmbuf_queue_release(enic, - &enic->rq[enic_sop_rq(index)]); + &enic->rq[enic_rte_rq_idx_to_sop_idx(index)]); dev_err(enic, "Failed to alloc data RX queue mbufs\n"); return err; @@ -520,7 +534,10 @@ void enic_free_rq(void *rxq) if (rq_data->in_use) vnic_rq_free(rq_data); - vnic_cq_free(&enic->cq[rq_sop->index]); + vnic_cq_free(&enic->cq[enic_sop_rq_idx_to_cq_idx(rq_sop->index)]); + + rq_sop->in_use = 0; + rq_data->in_use = 0; } void enic_start_wq(struct enic *enic, uint16_t queue_idx) @@ -545,8 +562,10 @@ int enic_stop_wq(struct enic *enic, uint16_t queue_idx) void enic_start_rq(struct enic *enic, uint16_t queue_idx) { - struct vnic_rq *rq_sop = &enic->rq[enic_sop_rq(queue_idx)]; - struct vnic_rq *rq_data = &enic->rq[rq_sop->data_queue_idx]; + struct vnic_rq *rq_sop; + struct vnic_rq *rq_data; + rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)]; + rq_data = &enic->rq[rq_sop->data_queue_idx]; struct rte_eth_dev *eth_dev = enic->rte_dev; if (rq_data->in_use) @@ -560,8 +579,10 @@ int enic_stop_rq(struct enic *enic, uint16_t queue_idx) { int ret1 = 0, ret2 = 0; struct rte_eth_dev *eth_dev = enic->rte_dev; - struct vnic_rq *rq_sop = &enic->rq[enic_sop_rq(queue_idx)]; - struct vnic_rq *rq_data = &enic->rq[rq_sop->data_queue_idx]; + struct vnic_rq *rq_sop; + struct vnic_rq *rq_data; + rq_sop = &enic->rq[enic_rte_rq_idx_to_sop_idx(queue_idx)]; + rq_data = &enic->rq[rq_sop->data_queue_idx]; ret2 = vnic_rq_disable(rq_sop); rte_mb(); @@ -579,16 +600,17 @@ int enic_stop_rq(struct enic *enic, uint16_t queue_idx) int enic_alloc_rq(struct enic *enic, uint16_t queue_idx, unsigned int socket_id, struct rte_mempool *mp, - uint16_t nb_desc) + uint16_t nb_desc, uint16_t free_thresh) { int rc; - uint16_t sop_queue_idx = enic_sop_rq(queue_idx); - uint16_t data_queue_idx = enic_data_rq(queue_idx); + uint16_t sop_queue_idx = enic_rte_rq_idx_to_sop_idx(queue_idx); + uint16_t data_queue_idx = enic_rte_rq_idx_to_data_idx(queue_idx); struct vnic_rq *rq_sop = &enic->rq[sop_queue_idx]; struct vnic_rq *rq_data = &enic->rq[data_queue_idx]; unsigned int mbuf_size, mbufs_per_pkt; unsigned int nb_sop_desc, nb_data_desc; uint16_t min_sop, max_sop, min_data, max_data; + uint16_t mtu = enic->rte_dev->data->mtu; rq_sop->is_sop = 1; rq_sop->data_queue_idx = data_queue_idx; @@ -599,14 +621,18 @@ int enic_alloc_rq(struct enic *enic, uint16_t queue_idx, rq_data->socket_id = socket_id; rq_data->mp = mp; rq_sop->in_use = 1; + rq_sop->rx_free_thresh = free_thresh; + rq_data->rx_free_thresh = free_thresh; + dev_debug(enic, "Set queue_id:%u free thresh:%u\n", queue_idx, + free_thresh); mbuf_size = (uint16_t)(rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM); if (enic->rte_dev->data->dev_conf.rxmode.enable_scatter) { - dev_info(enic, "Scatter rx mode enabled\n"); + dev_info(enic, "Rq %u Scatter rx mode enabled\n", queue_idx); /* ceil((mtu + ETHER_HDR_LEN + 4)/mbuf_size) */ - mbufs_per_pkt = ((enic->config.mtu + ETHER_HDR_LEN + 4) + + mbufs_per_pkt = ((mtu + ETHER_HDR_LEN + 4) + (mbuf_size - 1)) / mbuf_size; } else { dev_info(enic, "Scatter rx mode disabled\n"); @@ -614,10 +640,13 @@ int enic_alloc_rq(struct enic *enic, uint16_t queue_idx, } if (mbufs_per_pkt > 1) { - dev_info(enic, "Scatter rx mode in use\n"); + dev_info(enic, "Rq %u Scatter rx mode in use\n", queue_idx); + rq_sop->data_queue_enable = 1; rq_data->in_use = 1; } else { - dev_info(enic, "Scatter rx mode not being used\n"); + dev_info(enic, "Rq %u Scatter rx mode not being used\n", + queue_idx); + rq_sop->data_queue_enable = 0; rq_data->in_use = 0; } @@ -654,7 +683,7 @@ int enic_alloc_rq(struct enic *enic, uint16_t queue_idx, } if (mbufs_per_pkt > 1) { dev_info(enic, "For mtu %d and mbuf size %d valid rx descriptor range is %d to %d\n", - enic->config.mtu, mbuf_size, min_sop + min_data, + mtu, mbuf_size, min_sop + min_data, max_sop + max_data); } dev_info(enic, "Using %d rx descriptors (sop %d, data %d)\n", @@ -705,6 +734,8 @@ int enic_alloc_rq(struct enic *enic, uint16_t queue_idx, goto err_free_sop_mbuf; } + rq_sop->tot_nb_desc = nb_desc; /* squirl away for MTU update function */ + return 0; err_free_sop_mbuf: @@ -801,6 +832,10 @@ int enic_disable(struct enic *enic) vnic_intr_mask(&enic->intr); (void)vnic_intr_masked(&enic->intr); /* flush write */ + rte_intr_disable(&enic->pdev->intr_handle); + rte_intr_callback_unregister(&enic->pdev->intr_handle, + enic_intr_handler, + (void *)enic->rte_dev); vnic_dev_disable(enic->vdev); @@ -822,8 +857,14 @@ int enic_disable(struct enic *enic) } } + /* If we were using interrupts, set the interrupt vector to -1 + * to disable interrupts. We are not disabling link notifcations, + * though, as we want the polling of link status to continue working. + */ + if (enic->rte_dev->data->dev_conf.intr_conf.lsc) + vnic_dev_notify_set(enic->vdev, -1); + vnic_dev_set_reset_flag(enic->vdev, 1); - vnic_dev_notify_unset(enic->vdev); for (i = 0; i < enic->wq_count; i++) vnic_wq_clean(&enic->wq[i], enic_free_wq_buf); @@ -925,7 +966,7 @@ static int enic_set_rsscpu(struct enic *enic, u8 rss_hash_bits) for (i = 0; i < (1 << rss_hash_bits); i++) (*rss_cpu_buf_va).cpu[i / 4].b[i % 4] = - enic_sop_rq(i % enic->rq_count); + enic_rte_rq_idx_to_sop_idx(i % enic->rq_count); err = enic_set_rss_cpu(enic, rss_cpu_buf_pa, @@ -1025,6 +1066,9 @@ static void enic_dev_deinit(struct enic *enic) { struct rte_eth_dev *eth_dev = enic->rte_dev; + /* stop link status checking */ + vnic_dev_notify_unset(enic->vdev); + rte_free(eth_dev->data->mac_addrs); } @@ -1066,6 +1110,56 @@ int enic_set_vnic_res(struct enic *enic) return rc; } +/* Initialize the completion queue for an RQ */ +static int +enic_reinit_rq(struct enic *enic, unsigned int rq_idx) +{ + struct vnic_rq *sop_rq, *data_rq; + unsigned int cq_idx = enic_cq_rq(enic, rq_idx); + int rc = 0; + + sop_rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)]; + data_rq = &enic->rq[enic_rte_rq_idx_to_data_idx(rq_idx)]; + + vnic_cq_clean(&enic->cq[cq_idx]); + vnic_cq_init(&enic->cq[cq_idx], + 0 /* flow_control_enable */, + 1 /* color_enable */, + 0 /* cq_head */, + 0 /* cq_tail */, + 1 /* cq_tail_color */, + 0 /* interrupt_enable */, + 1 /* cq_entry_enable */, + 0 /* cq_message_enable */, + 0 /* interrupt offset */, + 0 /* cq_message_addr */); + + + vnic_rq_init_start(sop_rq, enic_cq_rq(enic, + enic_rte_rq_idx_to_sop_idx(rq_idx)), 0, + sop_rq->ring.desc_count - 1, 1, 0); + if (data_rq->in_use) { + vnic_rq_init_start(data_rq, + enic_cq_rq(enic, + enic_rte_rq_idx_to_data_idx(rq_idx)), 0, + data_rq->ring.desc_count - 1, 1, 0); + } + + rc = enic_alloc_rx_queue_mbufs(enic, sop_rq); + if (rc) + return rc; + + if (data_rq->in_use) { + rc = enic_alloc_rx_queue_mbufs(enic, data_rq); + if (rc) { + enic_rxmbuf_queue_release(enic, sop_rq); + return rc; + } + } + + return 0; +} + /* The Cisco NIC can send and receive packets up to a max packet size * determined by the NIC type and firmware. There is also an MTU * configured into the NIC via the CIMC/UCSM management interface @@ -1075,6 +1169,9 @@ int enic_set_vnic_res(struct enic *enic) */ int enic_set_mtu(struct enic *enic, uint16_t new_mtu) { + unsigned int rq_idx; + struct vnic_rq *rq; + int rc = 0; uint16_t old_mtu; /* previous setting */ uint16_t config_mtu; /* Value configured into NIC via CIMC/UCSM */ struct rte_eth_dev *eth_dev = enic->rte_dev; @@ -1082,10 +1179,6 @@ int enic_set_mtu(struct enic *enic, uint16_t new_mtu) old_mtu = eth_dev->data->mtu; config_mtu = enic->config.mtu; - /* only works with Rx scatter disabled */ - if (enic->rte_dev->data->dev_conf.rxmode.enable_scatter) - return -ENOTSUP; - if (new_mtu > enic->max_mtu) { dev_err(enic, "MTU not updated: requested (%u) greater than max (%u)\n", @@ -1103,11 +1196,83 @@ int enic_set_mtu(struct enic *enic, uint16_t new_mtu) "MTU (%u) is greater than value configured in NIC (%u)\n", new_mtu, config_mtu); + /* The easy case is when scatter is disabled. However if the MTU + * becomes greater than the mbuf data size, packet drops will ensue. + */ + if (!enic->rte_dev->data->dev_conf.rxmode.enable_scatter) { + eth_dev->data->mtu = new_mtu; + goto set_mtu_done; + } + + /* Rx scatter is enabled so reconfigure RQ's on the fly. The point is to + * change Rx scatter mode if necessary for better performance. I.e. if + * MTU was greater than the mbuf size and now it's less, scatter Rx + * doesn't have to be used and vice versa. + */ + rte_spinlock_lock(&enic->mtu_lock); + + /* Stop traffic on all RQs */ + for (rq_idx = 0; rq_idx < enic->rq_count * 2; rq_idx++) { + rq = &enic->rq[rq_idx]; + if (rq->is_sop && rq->in_use) { + rc = enic_stop_rq(enic, + enic_sop_rq_idx_to_rte_idx(rq_idx)); + if (rc) { + dev_err(enic, "Failed to stop Rq %u\n", rq_idx); + goto set_mtu_done; + } + } + } + + /* replace Rx funciton with a no-op to avoid getting stale pkts */ + eth_dev->rx_pkt_burst = enic_dummy_recv_pkts; + rte_mb(); + + /* Allow time for threads to exit the real Rx function. */ + usleep(100000); + + /* now it is safe to reconfigure the RQs */ + /* update the mtu */ eth_dev->data->mtu = new_mtu; + /* free and reallocate RQs with the new MTU */ + for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) { + rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)]; + + enic_free_rq(rq); + rc = enic_alloc_rq(enic, rq_idx, rq->socket_id, rq->mp, + rq->tot_nb_desc, rq->rx_free_thresh); + if (rc) { + dev_err(enic, + "Fatal MTU alloc error- No traffic will pass\n"); + goto set_mtu_done; + } + + rc = enic_reinit_rq(enic, rq_idx); + if (rc) { + dev_err(enic, + "Fatal MTU RQ reinit- No traffic will pass\n"); + goto set_mtu_done; + } + } + + /* put back the real receive function */ + rte_mb(); + eth_dev->rx_pkt_burst = enic_recv_pkts; + rte_mb(); + + /* restart Rx traffic */ + for (rq_idx = 0; rq_idx < enic->rq_count; rq_idx++) { + rq = &enic->rq[enic_rte_rq_idx_to_sop_idx(rq_idx)]; + if (rq->is_sop && rq->in_use) + enic_start_rq(enic, rq_idx); + } + +set_mtu_done: dev_info(enic, "MTU changed from %u to %u\n", old_mtu, new_mtu); - return 0; + rte_spinlock_unlock(&enic->mtu_lock); + return rc; } static int enic_dev_init(struct enic *enic) @@ -1125,24 +1290,32 @@ static int enic_dev_init(struct enic *enic) return err; } + /* Get available resource counts */ + enic_get_res_counts(enic); + if (enic->conf_rq_count == 1) { + dev_err(enic, "Running with only 1 RQ configured in the vNIC is not supported.\n"); + dev_err(enic, "Please configure 2 RQs in the vNIC for each Rx queue used by DPDK.\n"); + dev_err(enic, "See the ENIC PMD guide for more information.\n"); + return -EINVAL; + } + /* Get the supported filters */ enic_fdir_info(enic); - eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN, 0); + eth_dev->data->mac_addrs = rte_zmalloc("enic_mac_addr", ETH_ALEN + * ENIC_MAX_MAC_ADDR, 0); if (!eth_dev->data->mac_addrs) { dev_err(enic, "mac addr storage alloc failed, aborting.\n"); return -1; } ether_addr_copy((struct ether_addr *) enic->mac_addr, - ð_dev->data->mac_addrs[0]); - - - /* Get available resource counts - */ - enic_get_res_counts(enic); + eth_dev->data->mac_addrs); vnic_dev_set_reset_flag(enic->vdev, 0); + /* set up link status checking */ + vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */ + return 0; } diff --git a/src/dpdk/drivers/net/enic/enic_res.c b/src/dpdk/drivers/net/enic/enic_res.c index 140c6efb..8a230a16 100644 --- a/src/dpdk/drivers/net/enic/enic_res.c +++ b/src/dpdk/drivers/net/enic/enic_res.c @@ -89,11 +89,10 @@ int enic_get_vnic_config(struct enic *enic) /* max packet size is only defined in newer VIC firmware * and will be 0 for legacy firmware and VICs */ - if (c->max_pkt_size > ENIC_DEFAULT_RX_MAX_PKT_SIZE) + if (c->max_pkt_size > ENIC_DEFAULT_MAX_PKT_SIZE) enic->max_mtu = c->max_pkt_size - (ETHER_HDR_LEN + 4); else - enic->max_mtu = ENIC_DEFAULT_RX_MAX_PKT_SIZE - - (ETHER_HDR_LEN + 4); + enic->max_mtu = ENIC_DEFAULT_MAX_PKT_SIZE - (ETHER_HDR_LEN + 4); if (c->mtu == 0) c->mtu = 1500; diff --git a/src/dpdk/drivers/net/enic/enic_res.h b/src/dpdk/drivers/net/enic/enic_res.h index cda2da1e..303530ef 100644 --- a/src/dpdk/drivers/net/enic/enic_res.h +++ b/src/dpdk/drivers/net/enic/enic_res.h @@ -48,11 +48,7 @@ #define ENIC_MIN_MTU 68 /* Does not include (possible) inserted VLAN tag and FCS */ -#define ENIC_DEFAULT_RX_MAX_PKT_SIZE 9022 - -/* Does not include (possible) inserted VLAN tag and FCS */ -#define ENIC_TX_MAX_PKT_SIZE 9208 - +#define ENIC_DEFAULT_MAX_PKT_SIZE 9022 #define ENIC_MULTICAST_PERFECT_FILTERS 32 #define ENIC_UNICAST_PERFECT_FILTERS 32 diff --git a/src/dpdk/drivers/net/enic/enic_rxtx.c b/src/dpdk/drivers/net/enic/enic_rxtx.c index cd155024..26b83ae9 100644 --- a/src/dpdk/drivers/net/enic/enic_rxtx.c +++ b/src/dpdk/drivers/net/enic/enic_rxtx.c @@ -37,6 +37,9 @@ #include "enic_compat.h" #include "rq_enet_desc.h" #include "enic.h" +#include <rte_ether.h> +#include <rte_ip.h> +#include <rte_tcp.h> #define RTE_PMD_USE_PREFETCH @@ -129,6 +132,60 @@ enic_cq_rx_desc_n_bytes(struct cq_desc *cqd) CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; } +/* Find the offset to L5. This is needed by enic TSO implementation. + * Return 0 if not a TCP packet or can't figure out the length. + */ +static inline uint8_t tso_header_len(struct rte_mbuf *mbuf) +{ + struct ether_hdr *eh; + struct vlan_hdr *vh; + struct ipv4_hdr *ip4; + struct ipv6_hdr *ip6; + struct tcp_hdr *th; + uint8_t hdr_len; + uint16_t ether_type; + + /* offset past Ethernet header */ + eh = rte_pktmbuf_mtod(mbuf, struct ether_hdr *); + ether_type = eh->ether_type; + hdr_len = sizeof(struct ether_hdr); + if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN)) { + vh = rte_pktmbuf_mtod_offset(mbuf, struct vlan_hdr *, hdr_len); + ether_type = vh->eth_proto; + hdr_len += sizeof(struct vlan_hdr); + } + + /* offset past IP header */ + switch (rte_be_to_cpu_16(ether_type)) { + case ETHER_TYPE_IPv4: + ip4 = rte_pktmbuf_mtod_offset(mbuf, struct ipv4_hdr *, hdr_len); + if (ip4->next_proto_id != IPPROTO_TCP) + return 0; + hdr_len += (ip4->version_ihl & 0xf) * 4; + break; + case ETHER_TYPE_IPv6: + ip6 = rte_pktmbuf_mtod_offset(mbuf, struct ipv6_hdr *, hdr_len); + if (ip6->proto != IPPROTO_TCP) + return 0; + hdr_len += sizeof(struct ipv6_hdr); + break; + default: + return 0; + } + + if ((hdr_len + sizeof(struct tcp_hdr)) > mbuf->pkt_len) + return 0; + + /* offset past TCP header */ + th = rte_pktmbuf_mtod_offset(mbuf, struct tcp_hdr *, hdr_len); + hdr_len += (th->data_off >> 4) * 4; + + if (hdr_len > mbuf->pkt_len) + return 0; + + return hdr_len; +} + static inline uint8_t enic_cq_rx_check_err(struct cq_desc *cqd) { @@ -149,30 +206,18 @@ enic_cq_rx_flags_to_pkt_type(struct cq_desc *cqd) uint8_t cqrd_flags = cqrd->flags; static const uint32_t cq_type_table[128] __rte_cache_aligned = { [0x00] = RTE_PTYPE_UNKNOWN, - [0x20] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN - | RTE_PTYPE_L4_NONFRAG, - [0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN - | RTE_PTYPE_L4_UDP, - [0x24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN - | RTE_PTYPE_L4_TCP, - [0x60] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN - | RTE_PTYPE_L4_FRAG, - [0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN - | RTE_PTYPE_L4_UDP, - [0x64] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN - | RTE_PTYPE_L4_TCP, - [0x10] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN - | RTE_PTYPE_L4_NONFRAG, - [0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN - | RTE_PTYPE_L4_UDP, - [0x14] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN - | RTE_PTYPE_L4_TCP, - [0x50] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN - | RTE_PTYPE_L4_FRAG, - [0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN - | RTE_PTYPE_L4_UDP, - [0x54] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN - | RTE_PTYPE_L4_TCP, + [0x20] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG, + [0x22] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, + [0x24] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, + [0x60] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x62] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, + [0x64] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, + [0x10] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG, + [0x12] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, + [0x14] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, + [0x50] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x52] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, + [0x54] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, /* All others reserved */ }; cqrd_flags &= CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT @@ -185,9 +230,10 @@ static inline void enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) { struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; - uint16_t ciflags, bwflags, pkt_flags = 0; + uint16_t ciflags, bwflags, pkt_flags = 0, vlan_tci; ciflags = enic_cq_rx_desc_ciflags(cqrd); bwflags = enic_cq_rx_desc_bwflags(cqrd); + vlan_tci = enic_cq_rx_desc_vlan(cqrd); mbuf->ol_flags = 0; @@ -195,13 +241,17 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) if (unlikely(!enic_cq_rx_desc_eop(ciflags))) goto mbuf_flags_done; - /* VLAN stripping */ + /* VLAN STRIPPED flag. The L2 packet type updated here also */ if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) { pkt_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED; - mbuf->vlan_tci = enic_cq_rx_desc_vlan(cqrd); + mbuf->packet_type |= RTE_PTYPE_L2_ETHER; } else { - mbuf->vlan_tci = 0; + if (vlan_tci != 0) + mbuf->packet_type |= RTE_PTYPE_L2_ETHER_VLAN; + else + mbuf->packet_type |= RTE_PTYPE_L2_ETHER; } + mbuf->vlan_tci = vlan_tci; /* RSS flag */ if (enic_cq_rx_desc_rss_type(cqrd)) { @@ -210,13 +260,25 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) } /* checksum flags */ - if (!enic_cq_rx_desc_csum_not_calc(cqrd) && - (mbuf->packet_type & RTE_PTYPE_L3_IPV4)) { - if (unlikely(!enic_cq_rx_desc_ipv4_csum_ok(cqrd))) - pkt_flags |= PKT_RX_IP_CKSUM_BAD; - if (mbuf->packet_type & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)) { - if (unlikely(!enic_cq_rx_desc_tcp_udp_csum_ok(cqrd))) - pkt_flags |= PKT_RX_L4_CKSUM_BAD; + if (mbuf->packet_type & RTE_PTYPE_L3_IPV4) { + if (enic_cq_rx_desc_csum_not_calc(cqrd)) + pkt_flags |= (PKT_RX_IP_CKSUM_UNKNOWN & + PKT_RX_L4_CKSUM_UNKNOWN); + else { + uint32_t l4_flags; + l4_flags = mbuf->packet_type & RTE_PTYPE_L4_MASK; + + if (enic_cq_rx_desc_ipv4_csum_ok(cqrd)) + pkt_flags |= PKT_RX_IP_CKSUM_GOOD; + else + pkt_flags |= PKT_RX_IP_CKSUM_BAD; + + if (l4_flags & (RTE_PTYPE_L4_UDP | RTE_PTYPE_L4_TCP)) { + if (enic_cq_rx_desc_tcp_udp_csum_ok(cqrd)) + pkt_flags |= PKT_RX_L4_CKSUM_GOOD; + else + pkt_flags |= PKT_RX_L4_CKSUM_BAD; + } } } @@ -224,6 +286,17 @@ enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) mbuf->ol_flags = pkt_flags; } +/* dummy receive function to replace actual function in + * order to do safe reconfiguration operations. + */ +uint16_t +enic_dummy_recv_pkts(__rte_unused void *rx_queue, + __rte_unused struct rte_mbuf **rx_pkts, + __rte_unused uint16_t nb_pkts) +{ + return 0; +} + uint16_t enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) @@ -373,10 +446,11 @@ enic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, rte_mb(); if (data_rq->in_use) - iowrite32(data_rq->posted_index, - &data_rq->ctrl->posted_index); + iowrite32_relaxed(data_rq->posted_index, + &data_rq->ctrl->posted_index); rte_compiler_barrier(); - iowrite32(sop_rq->posted_index, &sop_rq->ctrl->posted_index); + iowrite32_relaxed(sop_rq->posted_index, + &sop_rq->ctrl->posted_index); } @@ -459,6 +533,8 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint8_t vlan_tag_insert; uint8_t eop; uint64_t bus_addr; + uint8_t offload_mode; + uint16_t header_len; enic_cleanup_wq(enic, wq); wq_desc_avail = vnic_wq_desc_avail(wq); @@ -470,23 +546,16 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, for (index = 0; index < nb_pkts; index++) { tx_pkt = *tx_pkts++; - pkt_len = tx_pkt->pkt_len; - data_len = tx_pkt->data_len; - ol_flags = tx_pkt->ol_flags; nb_segs = tx_pkt->nb_segs; - - if (pkt_len > ENIC_TX_MAX_PKT_SIZE) { - rte_pktmbuf_free(tx_pkt); - rte_atomic64_inc(&enic->soft_stats.tx_oversized); - continue; - } - if (nb_segs > wq_desc_avail) { if (index > 0) goto post; goto done; } + pkt_len = tx_pkt->pkt_len; + data_len = tx_pkt->data_len; + ol_flags = tx_pkt->ol_flags; mss = 0; vlan_id = 0; vlan_tag_insert = 0; @@ -497,13 +566,17 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, desc_p = descs + head_idx; eop = (data_len == pkt_len); - - if (ol_flags & ol_flags_mask) { - if (ol_flags & PKT_TX_VLAN_PKT) { - vlan_tag_insert = 1; - vlan_id = tx_pkt->vlan_tci; + offload_mode = WQ_ENET_OFFLOAD_MODE_CSUM; + header_len = 0; + + if (tx_pkt->tso_segsz) { + header_len = tso_header_len(tx_pkt); + if (header_len) { + offload_mode = WQ_ENET_OFFLOAD_MODE_TSO; + mss = tx_pkt->tso_segsz; } - + } + if ((ol_flags & ol_flags_mask) && (header_len == 0)) { if (ol_flags & PKT_TX_IP_CKSUM) mss |= ENIC_CALC_IP_CKSUM; @@ -516,8 +589,14 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, } } - wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, 0, 0, eop, - eop, 0, vlan_tag_insert, vlan_id, 0); + if (ol_flags & PKT_TX_VLAN_PKT) { + vlan_tag_insert = 1; + vlan_id = tx_pkt->vlan_tci; + } + + wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, header_len, + offload_mode, eop, eop, 0, vlan_tag_insert, + vlan_id, 0); *desc_p = desc_tmp; buf = &wq->bufs[head_idx]; @@ -537,8 +616,9 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + tx_pkt->data_off); wq_enet_desc_enc((struct wq_enet_desc *) &desc_tmp, bus_addr, data_len, - mss, 0, 0, eop, eop, 0, - vlan_tag_insert, vlan_id, 0); + mss, 0, offload_mode, eop, eop, + 0, vlan_tag_insert, vlan_id, + 0); *desc_p = desc_tmp; buf = &wq->bufs[head_idx]; @@ -550,7 +630,7 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, } post: rte_wmb(); - iowrite32(head_idx, &wq->ctrl->posted_index); + iowrite32_relaxed(head_idx, &wq->ctrl->posted_index); done: wq->ring.desc_avail = wq_desc_avail; wq->head_idx = head_idx; diff --git a/src/dpdk/drivers/net/fm10k/base/fm10k_osdep.h b/src/dpdk/drivers/net/fm10k/base/fm10k_osdep.h index a21daa2a..f07b678f 100644 --- a/src/dpdk/drivers/net/fm10k/base/fm10k_osdep.h +++ b/src/dpdk/drivers/net/fm10k/base/fm10k_osdep.h @@ -39,6 +39,8 @@ POSSIBILITY OF SUCH DAMAGE. #include <rte_atomic.h> #include <rte_byteorder.h> #include <rte_cycles.h> +#include <rte_io.h> + #include "../fm10k_logs.h" /* TODO: this does not look like it should be used... */ @@ -88,17 +90,16 @@ typedef int bool; #endif /* offsets are WORD offsets, not BYTE offsets */ -#define FM10K_WRITE_REG(hw, reg, val) \ - ((((volatile uint32_t *)(hw)->hw_addr)[(reg)]) = ((uint32_t)(val))) -#define FM10K_READ_REG(hw, reg) \ - (((volatile uint32_t *)(hw)->hw_addr)[(reg)]) +#define FM10K_WRITE_REG(hw, reg, val) \ + rte_write32((val), ((hw)->hw_addr + (reg))) + +#define FM10K_READ_REG(hw, reg) rte_read32(((hw)->hw_addr + (reg))) + #define FM10K_WRITE_FLUSH(a) FM10K_READ_REG(a, FM10K_CTRL) -#define FM10K_PCI_REG(reg) (*((volatile uint32_t *)(reg))) +#define FM10K_PCI_REG(reg) rte_read32(reg) -#define FM10K_PCI_REG_WRITE(reg, value) do { \ - FM10K_PCI_REG((reg)) = (value); \ -} while (0) +#define FM10K_PCI_REG_WRITE(reg, value) rte_write32((value), (reg)) /* not implemented */ #define FM10K_READ_PCI_WORD(hw, reg) 0 diff --git a/src/dpdk/drivers/net/fm10k/fm10k.h b/src/dpdk/drivers/net/fm10k/fm10k.h index 05aa1a25..c6fed215 100644 --- a/src/dpdk/drivers/net/fm10k/fm10k.h +++ b/src/dpdk/drivers/net/fm10k/fm10k.h @@ -69,6 +69,9 @@ #define FM10K_MAX_RX_DESC (FM10K_MAX_RX_RING_SZ / sizeof(union fm10k_rx_desc)) #define FM10K_MAX_TX_DESC (FM10K_MAX_TX_RING_SZ / sizeof(struct fm10k_tx_desc)) +#define FM10K_TX_MAX_SEG UINT8_MAX +#define FM10K_TX_MAX_MTU_SEG UINT8_MAX + /* * byte aligment for HW RX data buffer * Datasheet requires RX buffer addresses shall either be 512-byte aligned or @@ -356,6 +359,9 @@ fm10k_dev_rx_descriptor_done(void *rx_queue, uint16_t offset); uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t fm10k_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq); int fm10k_rx_vec_condition_check(struct rte_eth_dev *); void fm10k_rx_queue_release_mbufs_vec(struct fm10k_rx_queue *rxq); diff --git a/src/dpdk/drivers/net/fm10k/fm10k_ethdev.c b/src/dpdk/drivers/net/fm10k/fm10k_ethdev.c index 01f4a72c..dd021e46 100644 --- a/src/dpdk/drivers/net/fm10k/fm10k_ethdev.c +++ b/src/dpdk/drivers/net/fm10k/fm10k_ethdev.c @@ -52,12 +52,14 @@ #define MAX_QUERY_SWITCH_STATE_TIMES 10 /* Wait interval to get switch status */ #define WAIT_SWITCH_MSG_US 100000 +/* A period of quiescence for switch */ +#define FM10K_SWITCH_QUIESCE_US 10000 /* Number of chars per uint32 type */ #define CHARS_PER_UINT32 (sizeof(uint32_t)) #define BIT_MASK_PER_UINT32 ((1 << CHARS_PER_UINT32) - 1) /* default 1:1 map from queue ID to interrupt vector ID */ -#define Q2V(dev, queue_id) (dev->pci_dev->intr_handle.intr_vec[queue_id]) +#define Q2V(pci_dev, queue_id) ((pci_dev)->intr_handle.intr_vec[queue_id]) /* First 64 Logical ports for PF/VMDQ, second 64 for Flow director */ #define MAX_LPORT_NUM 128 @@ -675,7 +677,7 @@ fm10k_dev_tx_init(struct rte_eth_dev *dev) /* Enable use of FTAG bit in TX descriptor, PFVTCTL * register is read-only for VF. */ - if (fm10k_check_ftag(dev->pci_dev->devargs)) { + if (fm10k_check_ftag(dev->device->devargs)) { if (hw->mac.type == fm10k_mac_pf) { FM10K_WRITE_REG(hw, FM10K_PFVTCTL(i), FM10K_PFVTCTL_FTAG_DESC_ENABLE); @@ -693,8 +695,9 @@ fm10k_dev_tx_init(struct rte_eth_dev *dev) base_addr >> (CHAR_BIT * sizeof(uint32_t))); FM10K_WRITE_REG(hw, FM10K_TDLEN(i), size); - /* assign default SGLORT for each TX queue */ - FM10K_WRITE_REG(hw, FM10K_TX_SGLORT(i), hw->mac.dglort_map); + /* assign default SGLORT for each TX queue by PF */ + if (hw->mac.type == fm10k_mac_pf) + FM10K_WRITE_REG(hw, FM10K_TX_SGLORT(i), hw->mac.dglort_map); } /* set up vector or scalar TX function as appropriate */ @@ -708,7 +711,8 @@ fm10k_dev_rx_init(struct rte_eth_dev *dev) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct fm10k_macvlan_filter_info *macvlan; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); + struct rte_intr_handle *intr_handle = &pdev->intr_handle; int i, ret; struct fm10k_rx_queue *rxq; uint64_t base_addr; @@ -722,13 +726,13 @@ fm10k_dev_rx_init(struct rte_eth_dev *dev) i = 0; if (rte_intr_dp_is_en(intr_handle)) { for (; i < dev->data->nb_rx_queues; i++) { - FM10K_WRITE_REG(hw, FM10K_RXINT(i), Q2V(dev, i)); + FM10K_WRITE_REG(hw, FM10K_RXINT(i), Q2V(pdev, i)); if (hw->mac.type == fm10k_mac_pf) - FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(dev, i)), + FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, i)), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); else - FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(dev, i)), + FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, i)), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); } @@ -1168,7 +1172,8 @@ static void fm10k_dev_stop(struct rte_eth_dev *dev) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); + struct rte_intr_handle *intr_handle = &pdev->intr_handle; int i; PMD_INIT_FUNC_TRACE(); @@ -1187,10 +1192,10 @@ fm10k_dev_stop(struct rte_eth_dev *dev) FM10K_WRITE_REG(hw, FM10K_RXINT(i), 3 << FM10K_RXINT_TIMER_SHIFT); if (hw->mac.type == fm10k_mac_pf) - FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(dev, i)), + FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, i)), FM10K_ITR_MASK_SET); else - FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(dev, i)), + FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, i)), FM10K_ITR_MASK_SET); } } @@ -1233,6 +1238,9 @@ fm10k_dev_close(struct rte_eth_dev *dev) MAX_LPORT_NUM, false); fm10k_mbx_unlock(hw); + /* allow 10ms for device to quiesce */ + rte_delay_us(FM10K_SWITCH_QUIESCE_US); + /* Stop mailbox service first */ fm10k_close_mbx_service(hw); fm10k_dev_stop(dev); @@ -1309,6 +1317,7 @@ fm10k_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, for (i = 0; i < FM10K_NB_HW_XSTATS; i++) { xstats[count].value = *(uint64_t *)(((char *)hw_stats) + fm10k_hw_stats_strings[count].offset); + xstats[count].id = count; count++; } @@ -1318,12 +1327,14 @@ fm10k_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, xstats[count].value = *(uint64_t *)(((char *)&hw_stats->q[q]) + fm10k_hw_stats_rx_q_strings[i].offset); + xstats[count].id = count; count++; } for (i = 0; i < FM10K_NB_TX_Q_XSTATS; i++) { xstats[count].value = *(uint64_t *)(((char *)&hw_stats->q[q]) + fm10k_hw_stats_tx_q_strings[i].offset); + xstats[count].id = count; count++; } } @@ -1381,16 +1392,18 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); PMD_INIT_FUNC_TRACE(); + dev_info->pci_dev = pdev; dev_info->min_rx_bufsize = FM10K_MIN_RX_BUF_SIZE; dev_info->max_rx_pktlen = FM10K_MAX_PKT_SIZE; dev_info->max_rx_queues = hw->mac.max_queues; dev_info->max_tx_queues = hw->mac.max_queues; dev_info->max_mac_addrs = FM10K_MAX_MACADDR_NUM; dev_info->max_hash_mac_addrs = 0; - dev_info->max_vfs = dev->pci_dev->max_vfs; + dev_info->max_vfs = pdev->max_vfs; dev_info->vmdq_pool_base = 0; dev_info->vmdq_queue_base = 0; dev_info->max_vmdq_pools = ETH_32_POOLS; @@ -1441,6 +1454,8 @@ fm10k_dev_infos_get(struct rte_eth_dev *dev, .nb_max = FM10K_MAX_TX_DESC, .nb_min = FM10K_MIN_TX_DESC, .nb_align = FM10K_MULT_TX_DESC, + .nb_seg_max = FM10K_TX_MAX_SEG, + .nb_mtu_seg_max = FM10K_TX_MAX_MTU_SEG, }; dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_2_5G | @@ -2327,15 +2342,16 @@ static int fm10k_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); /* Enable ITR */ if (hw->mac.type == fm10k_mac_pf) - FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(dev, queue_id)), + FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, queue_id)), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); else - FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(dev, queue_id)), + FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, queue_id)), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(&pdev->intr_handle); return 0; } @@ -2343,13 +2359,14 @@ static int fm10k_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); /* Disable ITR */ if (hw->mac.type == fm10k_mac_pf) - FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(dev, queue_id)), + FM10K_WRITE_REG(hw, FM10K_ITR(Q2V(pdev, queue_id)), FM10K_ITR_MASK_SET); else - FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(dev, queue_id)), + FM10K_WRITE_REG(hw, FM10K_VFITR(Q2V(pdev, queue_id)), FM10K_ITR_MASK_SET); return 0; } @@ -2358,7 +2375,8 @@ static int fm10k_dev_rxq_interrupt_setup(struct rte_eth_dev *dev) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); + struct rte_intr_handle *intr_handle = &pdev->intr_handle; uint32_t intr_vector, vec; uint16_t queue_id; int result = 0; @@ -2374,7 +2392,7 @@ fm10k_dev_rxq_interrupt_setup(struct rte_eth_dev *dev) intr_vector = dev->data->nb_rx_queues; /* disable interrupt first */ - rte_intr_disable(&dev->pci_dev->intr_handle); + rte_intr_disable(intr_handle); if (hw->mac.type == fm10k_mac_pf) fm10k_dev_disable_intr_pf(dev); else @@ -2409,7 +2427,7 @@ fm10k_dev_rxq_interrupt_setup(struct rte_eth_dev *dev) fm10k_dev_enable_intr_pf(dev); else fm10k_dev_enable_intr_vf(dev); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(intr_handle); hw->mac.ops.update_int_moderator(hw); return result; } @@ -2524,7 +2542,7 @@ error: */ static void fm10k_dev_interrupt_handler_pf( - __rte_unused struct rte_intr_handle *handle, + struct rte_intr_handle *handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; @@ -2575,7 +2593,7 @@ fm10k_dev_interrupt_handler_pf( FM10K_WRITE_REG(hw, FM10K_ITR(0), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); /* Re-enable interrupt from host side */ - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(handle); } /** @@ -2591,7 +2609,7 @@ fm10k_dev_interrupt_handler_pf( */ static void fm10k_dev_interrupt_handler_vf( - __rte_unused struct rte_intr_handle *handle, + struct rte_intr_handle *handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; @@ -2609,7 +2627,7 @@ fm10k_dev_interrupt_handler_vf( FM10K_WRITE_REG(hw, FM10K_VFITR(0), FM10K_ITR_AUTOMASK | FM10K_ITR_MASK_CLEAR); /* Re-enable interrupt from host side */ - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(handle); } /* Mailbox message handler in VF */ @@ -2731,7 +2749,7 @@ fm10k_set_tx_function(struct rte_eth_dev *dev) int use_sse = 1; uint16_t tx_ftag_en = 0; - if (fm10k_check_ftag(dev->pci_dev->devargs)) + if (fm10k_check_ftag(dev->device->devargs)) tx_ftag_en = 1; for (i = 0; i < dev->data->nb_tx_queues; i++) { @@ -2749,8 +2767,10 @@ fm10k_set_tx_function(struct rte_eth_dev *dev) fm10k_txq_vec_setup(txq); } dev->tx_pkt_burst = fm10k_xmit_pkts_vec; + dev->tx_pkt_prepare = NULL; } else { dev->tx_pkt_burst = fm10k_xmit_pkts; + dev->tx_pkt_prepare = fm10k_prep_pkts; PMD_INIT_LOG(DEBUG, "Use regular Tx func"); } } @@ -2762,7 +2782,7 @@ fm10k_set_rx_function(struct rte_eth_dev *dev) uint16_t i, rx_using_sse; uint16_t rx_ftag_en = 0; - if (fm10k_check_ftag(dev->pci_dev->devargs)) + if (fm10k_check_ftag(dev->device->devargs)) rx_ftag_en = 1; /* In order to allow Vector Rx there are a few configuration @@ -2821,6 +2841,8 @@ static int eth_fm10k_dev_init(struct rte_eth_dev *dev) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); + struct rte_intr_handle *intr_handle = &pdev->intr_handle; int diag, i; struct fm10k_macvlan_filter_info *macvlan; @@ -2829,23 +2851,25 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &fm10k_eth_dev_ops; dev->rx_pkt_burst = &fm10k_recv_pkts; dev->tx_pkt_burst = &fm10k_xmit_pkts; + dev->tx_pkt_prepare = &fm10k_prep_pkts; /* only initialize in the primary process */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; - rte_eth_copy_pci_info(dev, dev->pci_dev); + rte_eth_copy_pci_info(dev, pdev); + dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; macvlan = FM10K_DEV_PRIVATE_TO_MACVLAN(dev->data->dev_private); memset(macvlan, 0, sizeof(*macvlan)); /* Vendor and Device ID need to be set before init of shared code */ memset(hw, 0, sizeof(*hw)); - hw->device_id = dev->pci_dev->id.device_id; - hw->vendor_id = dev->pci_dev->id.vendor_id; - hw->subsystem_device_id = dev->pci_dev->id.subsystem_device_id; - hw->subsystem_vendor_id = dev->pci_dev->id.subsystem_vendor_id; + hw->device_id = pdev->id.device_id; + hw->vendor_id = pdev->id.vendor_id; + hw->subsystem_device_id = pdev->id.subsystem_device_id; + hw->subsystem_vendor_id = pdev->id.subsystem_vendor_id; hw->revision_id = 0; - hw->hw_addr = (void *)dev->pci_dev->mem_resource[0].addr; + hw->hw_addr = (void *)pdev->mem_resource[0].addr; if (hw->hw_addr == NULL) { PMD_INIT_LOG(ERR, "Bad mem resource." " Try to blacklist unused devices."); @@ -2915,20 +2939,20 @@ eth_fm10k_dev_init(struct rte_eth_dev *dev) /*PF/VF has different interrupt handling mechanism */ if (hw->mac.type == fm10k_mac_pf) { /* register callback func to eal lib */ - rte_intr_callback_register(&(dev->pci_dev->intr_handle), + rte_intr_callback_register(intr_handle, fm10k_dev_interrupt_handler_pf, (void *)dev); /* enable MISC interrupt */ fm10k_dev_enable_intr_pf(dev); } else { /* VF */ - rte_intr_callback_register(&(dev->pci_dev->intr_handle), + rte_intr_callback_register(intr_handle, fm10k_dev_interrupt_handler_vf, (void *)dev); fm10k_dev_enable_intr_vf(dev); } /* Enable intr after callback registered */ - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(intr_handle); hw->mac.ops.update_int_moderator(hw); @@ -2998,7 +3022,8 @@ static int eth_fm10k_dev_uninit(struct rte_eth_dev *dev) { struct fm10k_hw *hw = FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private); - + struct rte_pci_device *pdev = RTE_DEV_TO_PCI(dev->device); + struct rte_intr_handle *intr_handle = &pdev->intr_handle; PMD_INIT_FUNC_TRACE(); /* only uninitialize in the primary process */ @@ -3013,7 +3038,7 @@ eth_fm10k_dev_uninit(struct rte_eth_dev *dev) dev->tx_pkt_burst = NULL; /* disable uio/vfio intr */ - rte_intr_disable(&(dev->pci_dev->intr_handle)); + rte_intr_disable(intr_handle); /*PF/VF has different interrupt handling mechanism */ if (hw->mac.type == fm10k_mac_pf) { @@ -3021,13 +3046,13 @@ eth_fm10k_dev_uninit(struct rte_eth_dev *dev) fm10k_dev_disable_intr_pf(dev); /* unregister callback func to eal lib */ - rte_intr_callback_unregister(&(dev->pci_dev->intr_handle), + rte_intr_callback_unregister(intr_handle, fm10k_dev_interrupt_handler_pf, (void *)dev); } else { /* disable interrupt */ fm10k_dev_disable_intr_vf(dev); - rte_intr_callback_unregister(&(dev->pci_dev->intr_handle), + rte_intr_callback_unregister(intr_handle, fm10k_dev_interrupt_handler_vf, (void *)dev); } @@ -3055,34 +3080,16 @@ static const struct rte_pci_id pci_id_fm10k_map[] = { static struct eth_driver rte_pmd_fm10k = { .pci_drv = { - .name = "rte_pmd_fm10k", .id_table = pci_id_fm10k_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | - RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_fm10k_dev_init, .eth_dev_uninit = eth_fm10k_dev_uninit, .dev_private_size = sizeof(struct fm10k_adapter), }; -/* - * Driver initialization routine. - * Invoked once at EAL init time. - * Register itself as the [Poll Mode] Driver of PCI FM10K devices. - */ -static int -rte_pmd_fm10k_init(__rte_unused const char *name, - __rte_unused const char *params) -{ - PMD_INIT_FUNC_TRACE(); - rte_eth_driver_register(&rte_pmd_fm10k); - return 0; -} - -static struct rte_driver rte_fm10k_driver = { - .type = PMD_PDEV, - .init = rte_pmd_fm10k_init, -}; - -PMD_REGISTER_DRIVER(rte_fm10k_driver, fm10k); -DRIVER_REGISTER_PCI_TABLE(fm10k, pci_id_fm10k_map); +RTE_PMD_REGISTER_PCI(net_fm10k, rte_pmd_fm10k.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_fm10k, pci_id_fm10k_map); +RTE_PMD_REGISTER_KMOD_DEP(net_fm10k, "* igb_uio | uio_pci_generic | vfio"); diff --git a/src/dpdk/drivers/net/fm10k/fm10k_rxtx.c b/src/dpdk/drivers/net/fm10k/fm10k_rxtx.c index 5b2d04bf..144e5e6b 100644 --- a/src/dpdk/drivers/net/fm10k/fm10k_rxtx.c +++ b/src/dpdk/drivers/net/fm10k/fm10k_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,6 +35,7 @@ #include <rte_ethdev.h> #include <rte_common.h> +#include <rte_net.h> #include "fm10k.h" #include "base/fm10k_type.h" @@ -65,6 +66,15 @@ static inline void dump_rxd(union fm10k_rx_desc *rxd) } #endif +#define FM10K_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT | \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#define FM10K_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ FM10K_TX_OFFLOAD_MASK) + /* @note: When this function is changed, make corresponding change to * fm10k_dev_supported_ptypes_get() */ @@ -96,6 +106,20 @@ rx_desc_to_ol_flags(struct rte_mbuf *m, const union fm10k_rx_desc *d) if (d->w.pkt_info & FM10K_RXD_RSSTYPE_MASK) m->ol_flags |= PKT_RX_RSS_HASH; + + if (unlikely((d->d.staterr & + (FM10K_RXD_STATUS_IPCS | FM10K_RXD_STATUS_IPE)) == + (FM10K_RXD_STATUS_IPCS | FM10K_RXD_STATUS_IPE))) + m->ol_flags |= PKT_RX_IP_CKSUM_BAD; + else + m->ol_flags |= PKT_RX_IP_CKSUM_GOOD; + + if (unlikely((d->d.staterr & + (FM10K_RXD_STATUS_L4CS | FM10K_RXD_STATUS_L4E)) == + (FM10K_RXD_STATUS_L4CS | FM10K_RXD_STATUS_L4E))) + m->ol_flags |= PKT_RX_L4_CKSUM_BAD; + else + m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; } uint16_t @@ -583,3 +607,41 @@ fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, return count; } + +uint16_t +fm10k_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + + if ((m->ol_flags & PKT_TX_TCP_SEG) && + (m->tso_segsz < FM10K_TSO_MINMSS)) { + rte_errno = -EINVAL; + return i; + } + + if (m->ol_flags & FM10K_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} diff --git a/src/dpdk/drivers/net/fm10k/fm10k_rxtx_vec.c b/src/dpdk/drivers/net/fm10k/fm10k_rxtx_vec.c index 9ea747e1..27f3e43f 100644 --- a/src/dpdk/drivers/net/fm10k/fm10k_rxtx_vec.c +++ b/src/dpdk/drivers/net/fm10k/fm10k_rxtx_vec.c @@ -67,6 +67,8 @@ fm10k_reset_tx_queue(struct fm10k_tx_queue *txq); #define RXEFLAG_SHIFT (13) /* IPE/L4E flag shift */ #define L3L4EFLAG_SHIFT (14) +/* shift PKT_RX_L4_CKSUM_GOOD into one byte by 1 bit */ +#define CKSUM_SHIFT (1) static inline void fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) @@ -92,11 +94,18 @@ fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0001, 0x0001, 0x0001); + /* mask the lower byte of ol_flags */ + const __m128i ol_flags_msk = _mm_set_epi16( + 0x0000, 0x0000, 0x0000, 0x0000, + 0x00FF, 0x00FF, 0x00FF, 0x00FF); + const __m128i l3l4cksum_flag = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, - PKT_RX_IP_CKSUM_BAD, PKT_RX_L4_CKSUM_BAD, 0); + (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> CKSUM_SHIFT, + (PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD) >> CKSUM_SHIFT, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> CKSUM_SHIFT, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> CKSUM_SHIFT); const __m128i rxe_flag = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, @@ -139,6 +148,10 @@ fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) /* Process L4/L3 checksum error flags */ cksumflag = _mm_srli_epi16(cksumflag, L3L4EFLAG_SHIFT); cksumflag = _mm_shuffle_epi8(l3l4cksum_flag, cksumflag); + + /* clean the higher byte and shift back the flag bits */ + cksumflag = _mm_and_si128(cksumflag, ol_flags_msk); + cksumflag = _mm_slli_epi16(cksumflag, CKSUM_SHIFT); vtag1 = _mm_or_si128(cksumflag, vtag1); vol.dword = _mm_cvtsi128_si64(vtag1); @@ -234,11 +247,8 @@ fm10k_rx_vec_condition_check(struct rte_eth_dev *dev) if (fconf->mode != RTE_FDIR_MODE_NONE) return -1; - /* - no csum error report support - * - no header split support - */ - if (rxmode->hw_ip_checksum == 1 || - rxmode->header_split == 1) + /* no header split support */ + if (rxmode->header_split == 1) return -1; return 0; @@ -406,7 +416,7 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, */ rxdp = rxq->hw_ring + next_dd; - _mm_prefetch((const void *)rxdp, _MM_HINT_T0); + rte_prefetch0(rxdp); /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act @@ -468,6 +478,7 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, /* Read desc statuses backwards to avoid race condition */ /* A.1 load 4 pkts desc */ descs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); + rte_compiler_barrier(); /* B.2 copy 2 mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); @@ -476,8 +487,10 @@ fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, mbp2 = _mm_loadu_si128((__m128i *)&mbufp[pos+2]); descs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); + rte_compiler_barrier(); /* B.1 load 2 mbuf point */ descs0[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); + rte_compiler_barrier(); descs0[0] = _mm_loadu_si128((__m128i *)(rxdp)); /* B.2 copy 2 mbuf point into rx_pkts */ diff --git a/src/dpdk/drivers/net/i40e/base/i40e_adminq.c b/src/dpdk/drivers/net/i40e/base/i40e_adminq.c index 0d3a83fa..5bdf3f77 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_adminq.c +++ b/src/dpdk/drivers/net/i40e/base/i40e_adminq.c @@ -1077,11 +1077,11 @@ enum i40e_status_code i40e_clean_arq_element(struct i40e_hw *hw, desc = I40E_ADMINQ_DESC(hw->aq.arq, ntc); desc_idx = ntc; + hw->aq.arq_last_status = + (enum i40e_admin_queue_err)LE16_TO_CPU(desc->retval); flags = LE16_TO_CPU(desc->flags); if (flags & I40E_AQ_FLAG_ERR) { ret_code = I40E_ERR_ADMIN_QUEUE_ERROR; - hw->aq.arq_last_status = - (enum i40e_admin_queue_err)LE16_TO_CPU(desc->retval); i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE, "AQRX: Event received with error 0x%X.\n", diff --git a/src/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h b/src/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h index 2b7a7608..67cef7cf 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h +++ b/src/dpdk/drivers/net/i40e/base/i40e_adminq_cmd.h @@ -139,12 +139,10 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_list_func_capabilities = 0x000A, i40e_aqc_opc_list_dev_capabilities = 0x000B, -#ifdef X722_SUPPORT /* Proxy commands */ i40e_aqc_opc_set_proxy_config = 0x0104, i40e_aqc_opc_set_ns_proxy_table_entry = 0x0105, -#endif /* LAA */ i40e_aqc_opc_mac_address_read = 0x0107, i40e_aqc_opc_mac_address_write = 0x0108, @@ -152,12 +150,11 @@ enum i40e_admin_queue_opc { /* PXE */ i40e_aqc_opc_clear_pxe_mode = 0x0110, -#ifdef X722_SUPPORT /* WoL commands */ i40e_aqc_opc_set_wol_filter = 0x0120, i40e_aqc_opc_get_wake_reason = 0x0121, + i40e_aqc_opc_clear_all_wol_filters = 0x025E, -#endif /* internal switch commands */ i40e_aqc_opc_get_switch_config = 0x0200, i40e_aqc_opc_add_statistics = 0x0201, @@ -196,6 +193,7 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_remove_control_packet_filter = 0x025B, i40e_aqc_opc_add_cloud_filters = 0x025C, i40e_aqc_opc_remove_cloud_filters = 0x025D, + i40e_aqc_opc_clear_wol_switch_filters = 0x025E, i40e_aqc_opc_add_mirror_rule = 0x0260, i40e_aqc_opc_delete_mirror_rule = 0x0261, @@ -223,6 +221,9 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_suspend_port_tx = 0x041B, i40e_aqc_opc_resume_port_tx = 0x041C, i40e_aqc_opc_configure_partition_bw = 0x041D, + /* hmc */ + i40e_aqc_opc_query_hmc_resource_profile = 0x0500, + i40e_aqc_opc_set_hmc_resource_profile = 0x0501, /* phy commands*/ i40e_aqc_opc_get_phy_abilities = 0x0600, @@ -278,12 +279,10 @@ enum i40e_admin_queue_opc { /* Tunnel commands */ i40e_aqc_opc_add_udp_tunnel = 0x0B00, i40e_aqc_opc_del_udp_tunnel = 0x0B01, -#ifdef X722_SUPPORT i40e_aqc_opc_set_rss_key = 0x0B02, i40e_aqc_opc_set_rss_lut = 0x0B03, i40e_aqc_opc_get_rss_key = 0x0B04, i40e_aqc_opc_get_rss_lut = 0x0B05, -#endif /* Async Events */ i40e_aqc_opc_event_lan_overflow = 0x1001, @@ -471,13 +470,15 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_cppm_configuration); /* Set ARP Proxy command / response (indirect 0x0104) */ struct i40e_aqc_arp_proxy_data { __le16 command_flags; -#define I40E_AQ_ARP_INIT_IPV4 0x0008 -#define I40E_AQ_ARP_UNSUP_CTL 0x0010 -#define I40E_AQ_ARP_ENA 0x0020 -#define I40E_AQ_ARP_ADD_IPV4 0x0040 -#define I40E_AQ_ARP_DEL_IPV4 0x0080 +#define I40E_AQ_ARP_INIT_IPV4 0x0800 +#define I40E_AQ_ARP_UNSUP_CTL 0x1000 +#define I40E_AQ_ARP_ENA 0x2000 +#define I40E_AQ_ARP_ADD_IPV4 0x4000 +#define I40E_AQ_ARP_DEL_IPV4 0x8000 __le16 table_id; - __le32 pfpm_proxyfc; + __le32 enabled_offloads; +#define I40E_AQ_ARP_DIRECTED_OFFLOAD_ENABLE 0x00000020 +#define I40E_AQ_ARP_OFFLOAD_ENABLE 0x00000800 __le32 ip_addr; u8 mac_addr[6]; u8 reserved[2]; @@ -492,17 +493,19 @@ struct i40e_aqc_ns_proxy_data { __le16 table_idx_ipv6_0; __le16 table_idx_ipv6_1; __le16 control; -#define I40E_AQ_NS_PROXY_ADD_0 0x0100 -#define I40E_AQ_NS_PROXY_DEL_0 0x0200 -#define I40E_AQ_NS_PROXY_ADD_1 0x0400 -#define I40E_AQ_NS_PROXY_DEL_1 0x0800 -#define I40E_AQ_NS_PROXY_ADD_IPV6_0 0x1000 -#define I40E_AQ_NS_PROXY_DEL_IPV6_0 0x2000 -#define I40E_AQ_NS_PROXY_ADD_IPV6_1 0x4000 -#define I40E_AQ_NS_PROXY_DEL_IPV6_1 0x8000 -#define I40E_AQ_NS_PROXY_COMMAND_SEQ 0x0001 -#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0002 -#define I40E_AQ_NS_PROXY_INIT_MAC_TBL 0x0004 +#define I40E_AQ_NS_PROXY_ADD_0 0x0001 +#define I40E_AQ_NS_PROXY_DEL_0 0x0002 +#define I40E_AQ_NS_PROXY_ADD_1 0x0004 +#define I40E_AQ_NS_PROXY_DEL_1 0x0008 +#define I40E_AQ_NS_PROXY_ADD_IPV6_0 0x0010 +#define I40E_AQ_NS_PROXY_DEL_IPV6_0 0x0020 +#define I40E_AQ_NS_PROXY_ADD_IPV6_1 0x0040 +#define I40E_AQ_NS_PROXY_DEL_IPV6_1 0x0080 +#define I40E_AQ_NS_PROXY_COMMAND_SEQ 0x0100 +#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0200 +#define I40E_AQ_NS_PROXY_INIT_MAC_TBL 0x0400 +#define I40E_AQ_NS_PROXY_OFFLOAD_ENABLE 0x0800 +#define I40E_AQ_NS_PROXY_DIRECTED_OFFLOAD_ENABLE 0x1000 u8 mac_addr_0[6]; u8 mac_addr_1[6]; u8 local_mac_addr[6]; @@ -532,7 +535,8 @@ struct i40e_aqc_mac_address_read { #define I40E_AQC_PORT_ADDR_VALID 0x40 #define I40E_AQC_WOL_ADDR_VALID 0x80 #define I40E_AQC_MC_MAG_EN_VALID 0x100 -#define I40E_AQC_ADDR_VALID_MASK 0x1F0 +#define I40E_AQC_WOL_PRESERVE_STATUS 0x200 +#define I40E_AQC_ADDR_VALID_MASK 0x3F0 u8 reserved[6]; __le32 addr_high; __le32 addr_low; @@ -552,6 +556,8 @@ I40E_CHECK_STRUCT_LEN(24, i40e_aqc_mac_address_read_data); /* Manage MAC Address Write Command (0x0108) */ struct i40e_aqc_mac_address_write { __le16 command_flags; +#define I40E_AQC_MC_MAG_EN 0x0100 +#define I40E_AQC_WOL_PRESERVE_ON_PFR 0x0200 #define I40E_AQC_WRITE_TYPE_LAA_ONLY 0x0000 #define I40E_AQC_WRITE_TYPE_LAA_WOL 0x4000 #define I40E_AQC_WRITE_TYPE_PORT 0x8000 @@ -575,15 +581,24 @@ struct i40e_aqc_clear_pxe { I40E_CHECK_CMD_LENGTH(i40e_aqc_clear_pxe); -#ifdef X722_SUPPORT /* Set WoL Filter (0x0120) */ struct i40e_aqc_set_wol_filter { __le16 filter_index; #define I40E_AQC_MAX_NUM_WOL_FILTERS 8 +#define I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_SHIFT 15 +#define I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_MASK (0x1 << \ + I40E_AQC_SET_WOL_FILTER_TYPE_MAGIC_SHIFT) + +#define I40E_AQC_SET_WOL_FILTER_INDEX_SHIFT 0 +#define I40E_AQC_SET_WOL_FILTER_INDEX_MASK (0x7 << \ + I40E_AQC_SET_WOL_FILTER_INDEX_SHIFT) __le16 cmd_flags; #define I40E_AQC_SET_WOL_FILTER 0x8000 #define I40E_AQC_SET_WOL_FILTER_NO_TCO_WOL 0x4000 +#define I40E_AQC_SET_WOL_FILTER_WOL_PRESERVE_ON_PFR 0x2000 +#define I40E_AQC_SET_WOL_FILTER_ACTION_CLEAR 0 +#define I40E_AQC_SET_WOL_FILTER_ACTION_SET 1 __le16 valid_flags; #define I40E_AQC_SET_WOL_FILTER_ACTION_VALID 0x8000 #define I40E_AQC_SET_WOL_FILTER_NO_TCO_ACTION_VALID 0x4000 @@ -594,24 +609,29 @@ struct i40e_aqc_set_wol_filter { I40E_CHECK_CMD_LENGTH(i40e_aqc_set_wol_filter); +struct i40e_aqc_set_wol_filter_data { + u8 filter[128]; + u8 mask[16]; +}; + +I40E_CHECK_STRUCT_LEN(0x90, i40e_aqc_set_wol_filter_data); + /* Get Wake Reason (0x0121) */ struct i40e_aqc_get_wake_reason_completion { u8 reserved_1[2]; __le16 wake_reason; +#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_SHIFT 0 +#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_MASK (0xFF << \ + I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_MATCHED_INDEX_SHIFT) +#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_SHIFT 8 +#define I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_MASK (0xFF << \ + I40E_AQC_GET_WAKE_UP_REASON_WOL_REASON_RESERVED_SHIFT) u8 reserved_2[12]; }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_wake_reason_completion); -struct i40e_aqc_set_wol_filter_data { - u8 filter[128]; - u8 mask[16]; -}; - -I40E_CHECK_STRUCT_LEN(0x90, i40e_aqc_set_wol_filter_data); - -#endif /* X722_SUPPORT */ /* Switch configuration commands (0x02xx) */ /* Used by many indirect commands that only pass an seid and a buffer in the @@ -694,6 +714,8 @@ struct i40e_aqc_set_port_parameters { #define I40E_AQ_SET_P_PARAMS_PAD_SHORT_PACKETS 2 /* must set! */ #define I40E_AQ_SET_P_PARAMS_DOUBLE_VLAN_ENA 4 __le16 bad_frame_vsi; +#define I40E_AQ_SET_P_PARAMS_BFRAME_SEID_SHIFT 0x0 +#define I40E_AQ_SET_P_PARAMS_BFRAME_SEID_MASK 0x3FF __le16 default_seid; /* reserved for command */ u8 reserved[10]; }; @@ -745,6 +767,7 @@ I40E_CHECK_STRUCT_LEN(0x10, i40e_aqc_switch_resource_alloc_element_resp); /* Set Switch Configuration (direct 0x0205) */ struct i40e_aqc_set_switch_config { __le16 flags; +/* flags used for both fields below */ #define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001 #define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002 __le16 valid_flags; @@ -913,16 +936,12 @@ struct i40e_aqc_vsi_properties_data { I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT) /* queueing option section */ u8 queueing_opt_flags; -#ifdef X722_SUPPORT #define I40E_AQ_VSI_QUE_OPT_MULTICAST_UDP_ENA 0x04 #define I40E_AQ_VSI_QUE_OPT_UNICAST_UDP_ENA 0x08 -#endif #define I40E_AQ_VSI_QUE_OPT_TCP_ENA 0x10 #define I40E_AQ_VSI_QUE_OPT_FCOE_ENA 0x20 -#ifdef X722_SUPPORT #define I40E_AQ_VSI_QUE_OPT_RSS_LUT_PF 0x00 #define I40E_AQ_VSI_QUE_OPT_RSS_LUT_VSI 0x40 -#endif u8 queueing_opt_reserved[3]; /* scheduler section */ u8 up_enable_bits; @@ -1644,6 +1663,24 @@ struct i40e_aqc_configure_partition_bw_data { I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data); +/* Get and set the active HMC resource profile and status. + * (direct 0x0500) and (direct 0x0501) + */ +struct i40e_aq_get_set_hmc_resource_profile { + u8 pm_profile; + u8 pe_vf_enabled; + u8 reserved[14]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile); + +enum i40e_aq_hmc_profile { + /* I40E_HMC_PROFILE_NO_CHANGE = 0, reserved */ + I40E_HMC_PROFILE_DEFAULT = 1, + I40E_HMC_PROFILE_FAVOR_VF = 2, + I40E_HMC_PROFILE_EQUAL = 3, +}; + /* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */ /* set in param0 for get phy abilities to report qualified modules */ @@ -1691,6 +1728,7 @@ enum i40e_aq_phy_type { #define I40E_LINK_SPEED_10GB_SHIFT 0x3 #define I40E_LINK_SPEED_40GB_SHIFT 0x4 #define I40E_LINK_SPEED_20GB_SHIFT 0x5 +#define I40E_LINK_SPEED_25GB_SHIFT 0x6 enum i40e_aq_link_speed { I40E_LINK_SPEED_UNKNOWN = 0, @@ -1698,7 +1736,8 @@ enum i40e_aq_link_speed { I40E_LINK_SPEED_1GB = (1 << I40E_LINK_SPEED_1000MB_SHIFT), I40E_LINK_SPEED_10GB = (1 << I40E_LINK_SPEED_10GB_SHIFT), I40E_LINK_SPEED_40GB = (1 << I40E_LINK_SPEED_40GB_SHIFT), - I40E_LINK_SPEED_20GB = (1 << I40E_LINK_SPEED_20GB_SHIFT) + I40E_LINK_SPEED_20GB = (1 << I40E_LINK_SPEED_20GB_SHIFT), + I40E_LINK_SPEED_25GB = (1 << I40E_LINK_SPEED_25GB_SHIFT), }; struct i40e_aqc_module_desc { @@ -1721,6 +1760,8 @@ struct i40e_aq_get_phy_abilities_resp { #define I40E_AQ_PHY_LINK_ENABLED 0x08 #define I40E_AQ_PHY_AN_ENABLED 0x10 #define I40E_AQ_PHY_FLAG_MODULE_QUAL 0x20 +#define I40E_AQ_PHY_FEC_ABILITY_KR 0x40 +#define I40E_AQ_PHY_FEC_ABILITY_RS 0x80 __le16 eee_capability; #define I40E_AQ_EEE_100BASE_TX 0x0002 #define I40E_AQ_EEE_1000BASE_T 0x0004 @@ -1731,7 +1772,22 @@ struct i40e_aq_get_phy_abilities_resp { __le32 eeer_val; u8 d3_lpan; #define I40E_AQ_SET_PHY_D3_LPAN_ENA 0x01 - u8 reserved[3]; + u8 phy_type_ext; +#define I40E_AQ_PHY_TYPE_EXT_25G_KR 0x01 +#define I40E_AQ_PHY_TYPE_EXT_25G_CR 0x02 +#define I40E_AQ_PHY_TYPE_EXT_25G_SR 0x04 +#define I40E_AQ_PHY_TYPE_EXT_25G_LR 0x08 + u8 fec_cfg_curr_mod_ext_info; +#define I40E_AQ_ENABLE_FEC_KR 0x01 +#define I40E_AQ_ENABLE_FEC_RS 0x02 +#define I40E_AQ_REQUEST_FEC_KR 0x04 +#define I40E_AQ_REQUEST_FEC_RS 0x08 +#define I40E_AQ_ENABLE_FEC_AUTO 0x10 +#define I40E_AQ_FEC +#define I40E_AQ_MODULE_TYPE_EXT_MASK 0xE0 +#define I40E_AQ_MODULE_TYPE_EXT_SHIFT 5 + + u8 ext_comp_code; u8 phy_id[4]; u8 module_type[3]; u8 qualified_module_count; @@ -1753,7 +1809,16 @@ struct i40e_aq_set_phy_config { /* same bits as above in all */ __le16 eee_capability; __le32 eeer; u8 low_power_ctrl; - u8 reserved[3]; + u8 phy_type_ext; + u8 fec_config; +#define I40E_AQ_SET_FEC_ABILITY_KR BIT(0) +#define I40E_AQ_SET_FEC_ABILITY_RS BIT(1) +#define I40E_AQ_SET_FEC_REQUEST_KR BIT(2) +#define I40E_AQ_SET_FEC_REQUEST_RS BIT(3) +#define I40E_AQ_SET_FEC_AUTO BIT(4) +#define I40E_AQ_PHY_FEC_CONFIG_SHIFT 0x0 +#define I40E_AQ_PHY_FEC_CONFIG_MASK (0x1F << I40E_AQ_PHY_FEC_CONFIG_SHIFT) + u8 reserved; }; I40E_CHECK_CMD_LENGTH(i40e_aq_set_phy_config); @@ -1833,16 +1898,26 @@ struct i40e_aqc_get_link_status { #define I40E_AQ_LINK_TX_DRAINED 0x01 #define I40E_AQ_LINK_TX_FLUSHED 0x03 #define I40E_AQ_LINK_FORCED_40G 0x10 +/* 25G Error Codes */ +#define I40E_AQ_25G_NO_ERR 0X00 +#define I40E_AQ_25G_NOT_PRESENT 0X01 +#define I40E_AQ_25G_NVM_CRC_ERR 0X02 +#define I40E_AQ_25G_SBUS_UCODE_ERR 0X03 +#define I40E_AQ_25G_SERDES_UCODE_ERR 0X04 +#define I40E_AQ_25G_NIMB_UCODE_ERR 0X05 u8 loopback; /* use defines from i40e_aqc_set_lb_mode */ __le16 max_frame_size; u8 config; +#define I40E_AQ_CONFIG_FEC_KR_ENA 0x01 +#define I40E_AQ_CONFIG_FEC_RS_ENA 0x02 #define I40E_AQ_CONFIG_CRC_ENA 0x04 #define I40E_AQ_CONFIG_PACING_MASK 0x78 - u8 external_power_ability; + u8 power_desc; #define I40E_AQ_LINK_POWER_CLASS_1 0x00 #define I40E_AQ_LINK_POWER_CLASS_2 0x01 #define I40E_AQ_LINK_POWER_CLASS_3 0x02 #define I40E_AQ_LINK_POWER_CLASS_4 0x03 +#define I40E_AQ_PWR_CLASS_MASK 0x03 u8 reserved[4]; }; @@ -2340,7 +2415,6 @@ struct i40e_aqc_del_udp_tunnel_completion { }; I40E_CHECK_CMD_LENGTH(i40e_aqc_del_udp_tunnel_completion); -#ifdef X722_SUPPORT struct i40e_aqc_get_set_rss_key { #define I40E_AQC_SET_RSS_KEY_VSI_VALID (0x1 << 15) @@ -2381,7 +2455,6 @@ struct i40e_aqc_get_set_rss_lut { }; I40E_CHECK_CMD_LENGTH(i40e_aqc_get_set_rss_lut); -#endif /* tunnel key structure 0x0B10 */ diff --git a/src/dpdk/drivers/net/i40e/base/i40e_common.c b/src/dpdk/drivers/net/i40e/base/i40e_common.c index 98ed4b68..b8d81651 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_common.c +++ b/src/dpdk/drivers/net/i40e/base/i40e_common.c @@ -71,7 +71,6 @@ STATIC enum i40e_status_code i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_25G_SFP28: hw->mac.type = I40E_MAC_XL710; break; -#ifdef X722_SUPPORT #ifdef X722_A0_SUPPORT case I40E_DEV_ID_X722_A0: #endif @@ -81,21 +80,16 @@ STATIC enum i40e_status_code i40e_set_mac_type(struct i40e_hw *hw) case I40E_DEV_ID_1G_BASE_T_X722: case I40E_DEV_ID_10G_BASE_T_X722: case I40E_DEV_ID_SFP_I_X722: - case I40E_DEV_ID_QSFP_I_X722: hw->mac.type = I40E_MAC_X722; break; -#endif -#ifdef X722_SUPPORT #if defined(INTEGRATED_VF) || defined(VF_DRIVER) case I40E_DEV_ID_X722_VF: - case I40E_DEV_ID_X722_VF_HV: #ifdef X722_A0_SUPPORT case I40E_DEV_ID_X722_A0_VF: #endif hw->mac.type = I40E_MAC_X722_VF; break; #endif /* INTEGRATED_VF || VF_DRIVER */ -#endif /* X722_SUPPORT */ #if defined(INTEGRATED_VF) || defined(VF_DRIVER) case I40E_DEV_ID_VF: case I40E_DEV_ID_VF_HV: @@ -115,7 +109,6 @@ STATIC enum i40e_status_code i40e_set_mac_type(struct i40e_hw *hw) return status; } -#ifndef I40E_NDIS_SUPPORT /** * i40e_aq_str - convert AQ err code to a string * @hw: pointer to the HW structure @@ -322,7 +315,6 @@ const char *i40e_stat_str(struct i40e_hw *hw, enum i40e_status_code stat_err) return hw->err_str; } -#endif /* I40E_NDIS_SUPPORT */ /** * i40e_debug_aq * @hw: debug mask related to admin queue @@ -383,8 +375,7 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, d_buf[j] = buf[i]; i40e_debug(hw, mask, "\t0x%04X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X\n", - i_sav, d_buf[0], d_buf[1], - d_buf[2], d_buf[3], + i_sav, d_buf[0], d_buf[1], d_buf[2], d_buf[3], d_buf[4], d_buf[5], d_buf[6], d_buf[7], d_buf[8], d_buf[9], d_buf[10], d_buf[11], d_buf[12], d_buf[13], d_buf[14], d_buf[15]); @@ -449,7 +440,6 @@ enum i40e_status_code i40e_aq_queue_shutdown(struct i40e_hw *hw, return status; } -#ifdef X722_SUPPORT /** * i40e_aq_get_set_rss_lut @@ -608,7 +598,6 @@ enum i40e_status_code i40e_aq_set_rss_key(struct i40e_hw *hw, { return i40e_aq_get_set_rss_key(hw, vsi_id, key, true); } -#endif /* X722_SUPPORT */ /* The i40e_ptype_lookup table is used to convert from the 8-bit ptype in the * hardware to a bit-field that can be used by SW to more easily determine the @@ -773,7 +762,7 @@ struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = { /* Non Tunneled IPv6 */ I40E_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), I40E_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), - I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY3), + I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), I40E_PTT_UNUSED_ENTRY(91), I40E_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), I40E_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), @@ -1024,9 +1013,7 @@ enum i40e_status_code i40e_init_shared_code(struct i40e_hw *hw) switch (hw->mac.type) { case I40E_MAC_XL710: -#ifdef X722_SUPPORT case I40E_MAC_X722: -#endif break; default: return I40E_ERR_DEVICE_NOT_SUPPORTED; @@ -1046,11 +1033,9 @@ enum i40e_status_code i40e_init_shared_code(struct i40e_hw *hw) else hw->pf_id = (u8)(func_rid & 0x7); -#ifdef X722_SUPPORT if (hw->mac.type == I40E_MAC_X722) hw->flags |= I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE; -#endif status = i40e_init_nvm(hw); return status; } @@ -1128,7 +1113,8 @@ enum i40e_status_code i40e_get_mac_addr(struct i40e_hw *hw, u8 *mac_addr) status = i40e_aq_mac_address_read(hw, &flags, &addrs, NULL); if (flags & I40E_AQC_LAN_ADDR_VALID) - memcpy(mac_addr, &addrs.pf_lan_mac, sizeof(addrs.pf_lan_mac)); + i40e_memcpy(mac_addr, &addrs.pf_lan_mac, sizeof(addrs.pf_lan_mac), + I40E_NONDMA_TO_NONDMA); return status; } @@ -1151,7 +1137,8 @@ enum i40e_status_code i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr) return status; if (flags & I40E_AQC_PORT_ADDR_VALID) - memcpy(mac_addr, &addrs.port_mac, sizeof(addrs.port_mac)); + i40e_memcpy(mac_addr, &addrs.port_mac, sizeof(addrs.port_mac), + I40E_NONDMA_TO_NONDMA); else status = I40E_ERR_INVALID_MAC_ADDR; @@ -1191,6 +1178,33 @@ void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable) } /** + * i40e_get_san_mac_addr - get SAN MAC address + * @hw: pointer to the HW structure + * @mac_addr: pointer to SAN MAC address + * + * Reads the adapter's SAN MAC address from NVM + **/ +enum i40e_status_code i40e_get_san_mac_addr(struct i40e_hw *hw, + u8 *mac_addr) +{ + struct i40e_aqc_mac_address_read_data addrs; + enum i40e_status_code status; + u16 flags = 0; + + status = i40e_aq_mac_address_read(hw, &flags, &addrs, NULL); + if (status) + return status; + + if (flags & I40E_AQC_SAN_ADDR_VALID) + i40e_memcpy(mac_addr, &addrs.pf_san_mac, sizeof(addrs.pf_san_mac), + I40E_NONDMA_TO_NONDMA); + else + status = I40E_ERR_INVALID_MAC_ADDR; + + return status; +} + +/** * i40e_read_pba_string - Reads part number string from EEPROM * @hw: pointer to hardware structure * @pba_num: stores the part number string from the EEPROM @@ -1264,6 +1278,8 @@ STATIC enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw) case I40E_PHY_TYPE_1000BASE_LX: case I40E_PHY_TYPE_40GBASE_SR4: case I40E_PHY_TYPE_40GBASE_LR4: + case I40E_PHY_TYPE_25GBASE_LR: + case I40E_PHY_TYPE_25GBASE_SR: media = I40E_MEDIA_TYPE_FIBER; break; case I40E_PHY_TYPE_100BASE_TX: @@ -1278,6 +1294,7 @@ STATIC enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw) case I40E_PHY_TYPE_10GBASE_SFPP_CU: case I40E_PHY_TYPE_40GBASE_AOC: case I40E_PHY_TYPE_10GBASE_AOC: + case I40E_PHY_TYPE_25GBASE_CR: media = I40E_MEDIA_TYPE_DA; break; case I40E_PHY_TYPE_1000BASE_KX: @@ -1285,6 +1302,7 @@ STATIC enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw) case I40E_PHY_TYPE_10GBASE_KR: case I40E_PHY_TYPE_40GBASE_KR4: case I40E_PHY_TYPE_20GBASE_KR2: + case I40E_PHY_TYPE_25GBASE_KR: media = I40E_MEDIA_TYPE_BACKPLANE; break; case I40E_PHY_TYPE_SGMII: @@ -1670,8 +1688,10 @@ enum i40e_status_code i40e_aq_get_phy_capabilities(struct i40e_hw *hw, if (hw->aq.asq_last_status == I40E_AQ_RC_EIO) status = I40E_ERR_UNKNOWN_PHY; - if (report_init) + if (report_init) { hw->phy.phy_types = LE32_TO_CPU(abilities->phy_type); + hw->phy.phy_types |= ((u64)abilities->phy_type_ext << 32); + } return status; } @@ -1763,10 +1783,13 @@ enum i40e_status_code i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures, config.abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK; /* Copy over all the old settings */ config.phy_type = abilities.phy_type; + config.phy_type_ext = abilities.phy_type_ext; config.link_speed = abilities.link_speed; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; + config.fec_config = abilities.fec_cfg_curr_mod_ext_info & + I40E_AQ_PHY_FEC_CONFIG_MASK; status = i40e_aq_set_phy_config(hw, &config, NULL); if (status) @@ -1926,6 +1949,8 @@ enum i40e_status_code i40e_aq_get_link_info(struct i40e_hw *hw, hw_link_info->link_speed = (enum i40e_aq_link_speed)resp->link_speed; hw_link_info->link_info = resp->link_info; hw_link_info->an_info = resp->an_info; + hw_link_info->fec_info = resp->config & (I40E_AQ_CONFIG_FEC_KR_ENA | + I40E_AQ_CONFIG_FEC_RS_ENA); hw_link_info->ext_info = resp->ext_info; hw_link_info->loopback = resp->loopback; hw_link_info->max_frame_size = LE16_TO_CPU(resp->max_frame_size); @@ -1948,12 +1973,13 @@ enum i40e_status_code i40e_aq_get_link_info(struct i40e_hw *hw, else hw_link_info->crc_enable = false; - if (resp->command_flags & CPU_TO_LE16(I40E_AQ_LSE_ENABLE)) + if (resp->command_flags & CPU_TO_LE16(I40E_AQ_LSE_IS_ENABLED)) hw_link_info->lse_enable = true; else hw_link_info->lse_enable = false; - if ((hw->aq.fw_maj_ver < 4 || (hw->aq.fw_maj_ver == 4 && + if ((hw->mac.type == I40E_MAC_XL710) && + (hw->aq.fw_maj_ver < 4 || (hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE) hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU; @@ -2215,6 +2241,34 @@ enum i40e_status_code i40e_aq_set_default_vsi(struct i40e_hw *hw, } /** + * i40e_aq_clear_default_vsi + * @hw: pointer to the hw struct + * @seid: vsi number + * @cmd_details: pointer to command details structure or NULL + **/ +enum i40e_status_code i40e_aq_clear_default_vsi(struct i40e_hw *hw, + u16 seid, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_vsi_promiscuous_modes *cmd = + (struct i40e_aqc_set_vsi_promiscuous_modes *) + &desc.params.raw; + enum i40e_status_code status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_vsi_promiscuous_modes); + + cmd->promiscuous_flags = CPU_TO_LE16(0); + cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_DEFAULT); + cmd->seid = CPU_TO_LE16(seid); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** * i40e_aq_set_vsi_unicast_promiscuous * @hw: pointer to the hw struct * @seid: vsi number @@ -2290,6 +2344,43 @@ enum i40e_status_code i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw, } /** +* i40e_aq_set_vsi_full_promiscuous +* @hw: pointer to the hw struct +* @seid: VSI number +* @set: set promiscuous enable/disable +* @cmd_details: pointer to command details structure or NULL +**/ +enum i40e_status_code i40e_aq_set_vsi_full_promiscuous(struct i40e_hw *hw, + u16 seid, bool set, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_vsi_promiscuous_modes *cmd = + (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw; + enum i40e_status_code status; + u16 flags = 0; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_vsi_promiscuous_modes); + + if (set) + flags = I40E_AQC_SET_VSI_PROMISC_UNICAST | + I40E_AQC_SET_VSI_PROMISC_MULTICAST | + I40E_AQC_SET_VSI_PROMISC_BROADCAST; + + cmd->promiscuous_flags = CPU_TO_LE16(flags); + + cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_PROMISC_UNICAST | + I40E_AQC_SET_VSI_PROMISC_MULTICAST | + I40E_AQC_SET_VSI_PROMISC_BROADCAST); + + cmd->seid = CPU_TO_LE16(seid); + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** * i40e_aq_set_vsi_mc_promisc_on_vlan * @hw: pointer to the hw struct * @seid: vsi number @@ -2358,6 +2449,40 @@ enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw, } /** + * i40e_aq_set_vsi_bc_promisc_on_vlan + * @hw: pointer to the hw struct + * @seid: vsi number + * @enable: set broadcast promiscuous enable/disable for a given VLAN + * @vid: The VLAN tag filter - capture any broadcast packet with this VLAN tag + * @cmd_details: pointer to command details structure or NULL + **/ +enum i40e_status_code i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw, + u16 seid, bool enable, u16 vid, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + struct i40e_aqc_set_vsi_promiscuous_modes *cmd = + (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw; + enum i40e_status_code status; + u16 flags = 0; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_set_vsi_promiscuous_modes); + + if (enable) + flags |= I40E_AQC_SET_VSI_PROMISC_BROADCAST; + + cmd->promiscuous_flags = CPU_TO_LE16(flags); + cmd->valid_flags = CPU_TO_LE16(I40E_AQC_SET_VSI_PROMISC_BROADCAST); + cmd->seid = CPU_TO_LE16(seid); + cmd->vlan_tag = CPU_TO_LE16(vid | I40E_AQC_SET_VSI_VLAN_VALID); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +} + +/** * i40e_aq_set_vsi_broadcast * @hw: pointer to the hw struct * @seid: vsi number @@ -2691,14 +2816,17 @@ enum i40e_status_code i40e_update_link_info(struct i40e_hw *hw) if (status) return status; - if (hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) { + /* extra checking needed to ensure link info to user is timely */ + if ((hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) && + ((hw->phy.link_info.link_info & I40E_AQ_LINK_UP) || + !(hw->phy.link_info_old.link_info & I40E_AQ_LINK_UP))) { status = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, NULL); if (status) return status; - memcpy(hw->phy.link_info.module_type, &abilities.module_type, - sizeof(hw->phy.link_info.module_type)); + i40e_memcpy(hw->phy.link_info.module_type, &abilities.module_type, + sizeof(hw->phy.link_info.module_type), I40E_NONDMA_TO_NONDMA); } return status; } @@ -3549,6 +3677,14 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, break; case I40E_AQ_CAP_ID_MNG_MODE: p->management_mode = number; + if (major_rev > 1) { + p->mng_protocols_over_mctp = logical_id; + i40e_debug(hw, I40E_DEBUG_INIT, + "HW Capability: Protocols over MCTP = %d\n", + p->mng_protocols_over_mctp); + } else { + p->mng_protocols_over_mctp = 0; + } i40e_debug(hw, I40E_DEBUG_INIT, "HW Capability: Management Mode = %d\n", p->management_mode); @@ -3768,7 +3904,6 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, if (number & I40E_NVM_MGMT_UPDATE_DISABLED) p->update_disabled = true; break; -#ifdef X722_SUPPORT case I40E_AQ_CAP_ID_WOL_AND_PROXY: hw->num_wol_proxy_filters = (u16)number; hw->wol_proxy_vsi_seid = (u16)logical_id; @@ -3778,12 +3913,10 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, else p->acpi_prog_method = I40E_ACPI_PROGRAMMING_METHOD_HW_FVL; p->proxy_support = (phys_id & I40E_PROXY_SUPPORT_MASK) ? 1 : 0; - p->proxy_support = p->proxy_support; i40e_debug(hw, I40E_DEBUG_INIT, "HW Capability: WOL proxy filters = %d\n", hw->num_wol_proxy_filters); break; -#endif default: break; } @@ -3792,16 +3925,8 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, if (p->fcoe) i40e_debug(hw, I40E_DEBUG_ALL, "device is FCoE capable\n"); -#ifdef I40E_FCOE_ENA - /* Software override ensuring FCoE is disabled if npar or mfp - * mode because it is not supported in these modes. - */ - if (p->npar_enable || p->flex10_enable) - p->fcoe = false; -#else /* Always disable FCoE if compiled without the I40E_FCOE_ENA flag */ p->fcoe = false; -#endif /* count the enabled ports (aka the "not disabled" ports) */ hw->num_ports = 0; @@ -3828,8 +3953,10 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff, /* partition id is 1-based, and functions are evenly spread * across the ports as partitions */ - hw->partition_id = (hw->pf_id / hw->num_ports) + 1; - hw->num_partitions = num_functions / hw->num_ports; + if (hw->num_ports != 0) { + hw->partition_id = (hw->pf_id / hw->num_ports) + 1; + hw->num_partitions = num_functions / hw->num_ports; + } /* additional HW specific goodies that might * someday be HW version specific @@ -4314,11 +4441,15 @@ enum i40e_status_code i40e_aq_start_stop_dcbx(struct i40e_hw *hw, /** * i40e_aq_add_udp_tunnel * @hw: pointer to the hw struct - * @udp_port: the UDP port to add + * @udp_port: the UDP port to add in Host byte order * @header_len: length of the tunneling header length in DWords * @protocol_index: protocol index type * @filter_index: pointer to filter index * @cmd_details: pointer to command details structure or NULL + * + * Note: Firmware expects the udp_port value to be in Little Endian format, + * and this function will call CPU_TO_LE16 to convert from Host byte order to + * Little Endian order. **/ enum i40e_status_code i40e_aq_add_udp_tunnel(struct i40e_hw *hw, u16 udp_port, u8 protocol_index, @@ -5452,12 +5583,12 @@ STATIC void i40e_fix_up_geneve_vni( u16 tnl_type; u32 ti; - tnl_type = (le16_to_cpu(f[i].flags) & + tnl_type = (LE16_TO_CPU(f[i].flags) & I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >> I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT; if (tnl_type == I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE) { - ti = le32_to_cpu(f[i].tenant_id); - f[i].tenant_id = cpu_to_le32(ti << 8); + ti = LE32_TO_CPU(f[i].tenant_id); + f[i].tenant_id = CPU_TO_LE32(ti << 8); } } } @@ -5961,9 +6092,6 @@ enum i40e_status_code i40e_aq_configure_partition_bw(struct i40e_hw *hw, desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF); desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD); - if (bwd_size > I40E_AQ_LARGE_BUF) - desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_LB); - desc.datalen = CPU_TO_LE16(bwd_size); status = i40e_asq_send_command(hw, &desc, bw_data, bwd_size, cmd_details); @@ -5972,7 +6100,92 @@ enum i40e_status_code i40e_aq_configure_partition_bw(struct i40e_hw *hw, } /** - * i40e_read_phy_register + * i40e_read_phy_register_clause22 + * @hw: pointer to the HW structure + * @reg: register address in the page + * @phy_adr: PHY address on MDIO interface + * @value: PHY register value + * + * Reads specified PHY register value + **/ +enum i40e_status_code i40e_read_phy_register_clause22(struct i40e_hw *hw, + u16 reg, u8 phy_addr, u16 *value) +{ + enum i40e_status_code status = I40E_ERR_TIMEOUT; + u8 port_num = (u8)hw->func_caps.mdio_port_num; + u32 command = 0; + u16 retry = 1000; + + command = (reg << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_CLAUSE22_OPCODE_READ_MASK) | + (I40E_MDIO_CLAUSE22_STCODE_MASK) | + (I40E_GLGEN_MSCA_MDICMD_MASK); + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = I40E_SUCCESS; + break; + } + i40e_usec_delay(10); + retry--; + } while (retry); + + if (status) { + i40e_debug(hw, I40E_DEBUG_PHY, + "PHY: Can't write command to external PHY.\n"); + } else { + command = rd32(hw, I40E_GLGEN_MSRWD(port_num)); + *value = (command & I40E_GLGEN_MSRWD_MDIRDDATA_MASK) >> + I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT; + } + + return status; +} + +/** + * i40e_write_phy_register_clause22 + * @hw: pointer to the HW structure + * @reg: register address in the page + * @phy_adr: PHY address on MDIO interface + * @value: PHY register value + * + * Writes specified PHY register value + **/ +enum i40e_status_code i40e_write_phy_register_clause22(struct i40e_hw *hw, + u16 reg, u8 phy_addr, u16 value) +{ + enum i40e_status_code status = I40E_ERR_TIMEOUT; + u8 port_num = (u8)hw->func_caps.mdio_port_num; + u32 command = 0; + u16 retry = 1000; + + command = value << I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT; + wr32(hw, I40E_GLGEN_MSRWD(port_num), command); + + command = (reg << I40E_GLGEN_MSCA_DEVADD_SHIFT) | + (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | + (I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK) | + (I40E_MDIO_CLAUSE22_STCODE_MASK) | + (I40E_GLGEN_MSCA_MDICMD_MASK); + + wr32(hw, I40E_GLGEN_MSCA(port_num), command); + do { + command = rd32(hw, I40E_GLGEN_MSCA(port_num)); + if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) { + status = I40E_SUCCESS; + break; + } + i40e_usec_delay(10); + retry--; + } while (retry); + + return status; +} + +/** + * i40e_read_phy_register_clause45 * @hw: pointer to the HW structure * @page: registers page number * @reg: register address in the page @@ -5981,9 +6194,8 @@ enum i40e_status_code i40e_aq_configure_partition_bw(struct i40e_hw *hw, * * Reads specified PHY register value **/ -enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw, - u8 page, u16 reg, u8 phy_addr, - u16 *value) +enum i40e_status_code i40e_read_phy_register_clause45(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 *value) { enum i40e_status_code status = I40E_ERR_TIMEOUT; u32 command = 0; @@ -5993,8 +6205,8 @@ enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw, command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) | (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | - (I40E_MDIO_OPCODE_ADDRESS) | - (I40E_MDIO_STCODE) | + (I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK) | + (I40E_MDIO_CLAUSE45_STCODE_MASK) | (I40E_GLGEN_MSCA_MDICMD_MASK) | (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); wr32(hw, I40E_GLGEN_MSCA(port_num), command); @@ -6016,8 +6228,8 @@ enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw, command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | - (I40E_MDIO_OPCODE_READ) | - (I40E_MDIO_STCODE) | + (I40E_MDIO_CLAUSE45_OPCODE_READ_MASK) | + (I40E_MDIO_CLAUSE45_STCODE_MASK) | (I40E_GLGEN_MSCA_MDICMD_MASK) | (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); status = I40E_ERR_TIMEOUT; @@ -6047,7 +6259,7 @@ phy_read_end: } /** - * i40e_write_phy_register + * i40e_write_phy_register_clause45 * @hw: pointer to the HW structure * @page: registers page number * @reg: register address in the page @@ -6056,9 +6268,8 @@ phy_read_end: * * Writes value to specified PHY register **/ -enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw, - u8 page, u16 reg, u8 phy_addr, - u16 value) +enum i40e_status_code i40e_write_phy_register_clause45(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 value) { enum i40e_status_code status = I40E_ERR_TIMEOUT; u32 command = 0; @@ -6068,8 +6279,8 @@ enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw, command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) | (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | - (I40E_MDIO_OPCODE_ADDRESS) | - (I40E_MDIO_STCODE) | + (I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK) | + (I40E_MDIO_CLAUSE45_STCODE_MASK) | (I40E_GLGEN_MSCA_MDICMD_MASK) | (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); wr32(hw, I40E_GLGEN_MSCA(port_num), command); @@ -6093,8 +6304,8 @@ enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw, command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) | (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) | - (I40E_MDIO_OPCODE_WRITE) | - (I40E_MDIO_STCODE) | + (I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK) | + (I40E_MDIO_CLAUSE45_STCODE_MASK) | (I40E_GLGEN_MSCA_MDICMD_MASK) | (I40E_GLGEN_MSCA_MDIINPROGEN_MASK); status = I40E_ERR_TIMEOUT; @@ -6115,6 +6326,78 @@ phy_write_end: } /** + * i40e_write_phy_register + * @hw: pointer to the HW structure + * @page: registers page number + * @reg: register address in the page + * @phy_adr: PHY address on MDIO interface + * @value: PHY register value + * + * Writes value to specified PHY register + **/ +enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 value) +{ + enum i40e_status_code status; + + switch (hw->device_id) { + case I40E_DEV_ID_1G_BASE_T_X722: + status = i40e_write_phy_register_clause22(hw, + reg, phy_addr, value); + break; + case I40E_DEV_ID_10G_BASE_T: + case I40E_DEV_ID_10G_BASE_T4: + case I40E_DEV_ID_10G_BASE_T_X722: + case I40E_DEV_ID_25G_B: + case I40E_DEV_ID_25G_SFP28: + status = i40e_write_phy_register_clause45(hw, + page, reg, phy_addr, value); + break; + default: + status = I40E_ERR_UNKNOWN_PHY; + break; + } + + return status; +} + +/** + * i40e_read_phy_register + * @hw: pointer to the HW structure + * @page: registers page number + * @reg: register address in the page + * @phy_adr: PHY address on MDIO interface + * @value: PHY register value + * + * Reads specified PHY register value + **/ +enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 *value) +{ + enum i40e_status_code status; + + switch (hw->device_id) { + case I40E_DEV_ID_1G_BASE_T_X722: + status = i40e_read_phy_register_clause22(hw, reg, phy_addr, + value); + break; + case I40E_DEV_ID_10G_BASE_T: + case I40E_DEV_ID_10G_BASE_T4: + case I40E_DEV_ID_10G_BASE_T_X722: + case I40E_DEV_ID_25G_B: + case I40E_DEV_ID_25G_SFP28: + status = i40e_read_phy_register_clause45(hw, page, reg, + phy_addr, value); + break; + default: + status = I40E_ERR_UNKNOWN_PHY; + break; + } + + return status; +} + +/** * i40e_get_phy_address * @hw: pointer to the HW structure * @dev_num: PHY port num that address we want @@ -6156,14 +6439,16 @@ enum i40e_status_code i40e_blink_phy_link_led(struct i40e_hw *hw, for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++, led_addr++) { - status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, &led_reg); + status = i40e_read_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + &led_reg); if (status) goto phy_blinking_end; led_ctl = led_reg; if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) { led_reg = 0; - status = i40e_write_phy_register(hw, + status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, led_addr, phy_addr, led_reg); @@ -6175,20 +6460,18 @@ enum i40e_status_code i40e_blink_phy_link_led(struct i40e_hw *hw, if (time > 0 && interval > 0) { for (i = 0; i < time * 1000; i += interval) { - status = i40e_read_phy_register(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, - &led_reg); + status = i40e_read_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, &led_reg); if (status) goto restore_config; if (led_reg & I40E_PHY_LED_MANUAL_ON) led_reg = 0; else led_reg = I40E_PHY_LED_MANUAL_ON; - status = i40e_write_phy_register(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, - led_reg); + status = i40e_write_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_reg); if (status) goto restore_config; i40e_msec_delay(interval); @@ -6196,8 +6479,9 @@ enum i40e_status_code i40e_blink_phy_link_led(struct i40e_hw *hw, } restore_config: - status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, - phy_addr, led_ctl); + status = i40e_write_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_ctl); phy_blinking_end: return status; @@ -6228,8 +6512,10 @@ enum i40e_status_code i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++, temp_addr++) { - status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, - temp_addr, phy_addr, ®_val); + status = i40e_read_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + temp_addr, phy_addr, + ®_val); if (status) return status; *val = reg_val; @@ -6262,41 +6548,42 @@ enum i40e_status_code i40e_led_set_phy(struct i40e_hw *hw, bool on, i = rd32(hw, I40E_PFGEN_PORTNUM); port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); phy_addr = i40e_get_phy_address(hw, port_num); - - status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, - phy_addr, &led_reg); + status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, &led_reg); if (status) return status; led_ctl = led_reg; if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) { led_reg = 0; - status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_reg); + status = i40e_write_phy_register_clause45(hw, + I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, + led_reg); if (status) return status; } - status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, &led_reg); + status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, &led_reg); if (status) goto restore_config; if (on) led_reg = I40E_PHY_LED_MANUAL_ON; else led_reg = 0; - status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_reg); + status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_reg); if (status) goto restore_config; if (mode & I40E_PHY_LED_MODE_ORIG) { led_ctl = (mode & I40E_PHY_LED_MODE_MASK); - status = i40e_write_phy_register(hw, + status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, led_addr, phy_addr, led_ctl); } return status; restore_config: - status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr, - phy_addr, led_ctl); + status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE, + led_addr, phy_addr, led_ctl); return status; } #endif /* PF_DRIVER */ @@ -6522,7 +6809,6 @@ enum i40e_status_code i40e_vf_reset(struct i40e_hw *hw) I40E_SUCCESS, NULL, 0, NULL); } #endif /* VF_DRIVER */ -#ifdef X722_SUPPORT /** * i40e_aq_set_arp_proxy_config @@ -6545,10 +6831,13 @@ enum i40e_status_code i40e_aq_set_arp_proxy_config(struct i40e_hw *hw, i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_proxy_config); + desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF); + desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD); desc.params.external.addr_high = CPU_TO_LE32(I40E_HI_DWORD((u64)proxy_config)); desc.params.external.addr_low = CPU_TO_LE32(I40E_LO_DWORD((u64)proxy_config)); + desc.datalen = CPU_TO_LE16(sizeof(struct i40e_aqc_arp_proxy_data)); status = i40e_asq_send_command(hw, &desc, proxy_config, sizeof(struct i40e_aqc_arp_proxy_data), @@ -6579,10 +6868,13 @@ enum i40e_status_code i40e_aq_set_ns_proxy_table_entry(struct i40e_hw *hw, i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_ns_proxy_table_entry); + desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF); + desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD); desc.params.external.addr_high = CPU_TO_LE32(I40E_HI_DWORD((u64)ns_proxy_table_entry)); desc.params.external.addr_low = CPU_TO_LE32(I40E_LO_DWORD((u64)ns_proxy_table_entry)); + desc.datalen = CPU_TO_LE16(sizeof(struct i40e_aqc_ns_proxy_data)); status = i40e_asq_send_command(hw, &desc, ns_proxy_table_entry, sizeof(struct i40e_aqc_ns_proxy_data), @@ -6629,9 +6921,11 @@ enum i40e_status_code i40e_aq_set_clear_wol_filter(struct i40e_hw *hw, if (set_filter) { if (!filter) return I40E_ERR_PARAM; + cmd_flags |= I40E_AQC_SET_WOL_FILTER; - buff_len = sizeof(*filter); + cmd_flags |= I40E_AQC_SET_WOL_FILTER_WOL_PRESERVE_ON_PFR; } + if (no_wol_tco) cmd_flags |= I40E_AQC_SET_WOL_FILTER_NO_TCO_WOL; cmd->cmd_flags = CPU_TO_LE16(cmd_flags); @@ -6642,6 +6936,12 @@ enum i40e_status_code i40e_aq_set_clear_wol_filter(struct i40e_hw *hw, valid_flags |= I40E_AQC_SET_WOL_FILTER_NO_TCO_ACTION_VALID; cmd->valid_flags = CPU_TO_LE16(valid_flags); + buff_len = sizeof(*filter); + desc.datalen = CPU_TO_LE16(buff_len); + + desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_BUF); + desc.flags |= CPU_TO_LE16((u16)I40E_AQ_FLAG_RD); + cmd->address_high = CPU_TO_LE32(I40E_HI_DWORD((u64)filter)); cmd->address_low = CPU_TO_LE32(I40E_LO_DWORD((u64)filter)); @@ -6678,4 +6978,23 @@ enum i40e_status_code i40e_aq_get_wake_event_reason(struct i40e_hw *hw, return status; } -#endif /* X722_SUPPORT */ +/** +* i40e_aq_clear_all_wol_filters +* @hw: pointer to the hw struct +* @cmd_details: pointer to command details structure or NULL +* +* Get information for the reason of a Wake Up event +**/ +enum i40e_status_code i40e_aq_clear_all_wol_filters(struct i40e_hw *hw, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_desc desc; + enum i40e_status_code status; + + i40e_fill_default_direct_cmd_desc(&desc, + i40e_aqc_opc_clear_all_wol_filters); + + status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); + + return status; +}
\ No newline at end of file diff --git a/src/dpdk/drivers/net/i40e/base/i40e_devids.h b/src/dpdk/drivers/net/i40e/base/i40e_devids.h index ed73e1d2..4546689a 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_devids.h +++ b/src/dpdk/drivers/net/i40e/base/i40e_devids.h @@ -55,7 +55,6 @@ POSSIBILITY OF SUCH DAMAGE. #define I40E_DEV_ID_VF 0x154C #define I40E_DEV_ID_VF_HV 0x1571 #endif /* VF_DRIVER */ -#ifdef X722_SUPPORT #ifdef X722_A0_SUPPORT #define I40E_DEV_ID_X722_A0 0x374C #if defined(INTEGRATED_VF) || defined(VF_DRIVER) @@ -68,12 +67,9 @@ POSSIBILITY OF SUCH DAMAGE. #define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 #define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 #define I40E_DEV_ID_SFP_I_X722 0x37D3 -#define I40E_DEV_ID_QSFP_I_X722 0x37D4 #if defined(INTEGRATED_VF) || defined(VF_DRIVER) || defined(I40E_NDIS_SUPPORT) #define I40E_DEV_ID_X722_VF 0x37CD -#define I40E_DEV_ID_X722_VF_HV 0x37D9 #endif /* VF_DRIVER */ -#endif /* X722_SUPPORT */ #define i40e_is_40G_device(d) ((d) == I40E_DEV_ID_QSFP_A || \ (d) == I40E_DEV_ID_QSFP_B || \ diff --git a/src/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c b/src/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c index 22606484..f03f3813 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c +++ b/src/dpdk/drivers/net/i40e/base/i40e_lan_hmc.c @@ -1239,11 +1239,6 @@ enum i40e_status_code i40e_hmc_get_object_va(struct i40e_hw *hw, u64 obj_offset_in_fpm; u32 sd_idx, sd_lmt; - if (NULL == hmc_info) { - ret_code = I40E_ERR_BAD_PTR; - DEBUGOUT("i40e_hmc_get_object_va: bad hmc_info ptr\n"); - goto exit; - } if (NULL == hmc_info->hmc_obj) { ret_code = I40E_ERR_BAD_PTR; DEBUGOUT("i40e_hmc_get_object_va: bad hmc_info->hmc_obj ptr\n"); diff --git a/src/dpdk/drivers/net/i40e/base/i40e_nvm.c b/src/dpdk/drivers/net/i40e/base/i40e_nvm.c index 4fa1220b..e8965024 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_nvm.c +++ b/src/dpdk/drivers/net/i40e/base/i40e_nvm.c @@ -219,19 +219,15 @@ enum i40e_status_code i40e_read_nvm_word(struct i40e_hw *hw, u16 offset, { enum i40e_status_code ret_code = I40E_SUCCESS; -#ifdef X722_SUPPORT - if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) { - ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); - if (!ret_code) { + ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); + if (!ret_code) { + if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) { ret_code = i40e_read_nvm_word_aq(hw, offset, data); - i40e_release_nvm(hw); + } else { + ret_code = i40e_read_nvm_word_srctl(hw, offset, data); } - } else { - ret_code = i40e_read_nvm_word_srctl(hw, offset, data); + i40e_release_nvm(hw); } -#else - ret_code = i40e_read_nvm_word_srctl(hw, offset, data); -#endif return ret_code; } @@ -249,14 +245,10 @@ enum i40e_status_code __i40e_read_nvm_word(struct i40e_hw *hw, { enum i40e_status_code ret_code = I40E_SUCCESS; -#ifdef X722_SUPPORT if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) ret_code = i40e_read_nvm_word_aq(hw, offset, data); else ret_code = i40e_read_nvm_word_srctl(hw, offset, data); -#else - ret_code = i40e_read_nvm_word_srctl(hw, offset, data); -#endif return ret_code; } @@ -348,14 +340,10 @@ enum i40e_status_code __i40e_read_nvm_buffer(struct i40e_hw *hw, { enum i40e_status_code ret_code = I40E_SUCCESS; -#ifdef X722_SUPPORT if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) ret_code = i40e_read_nvm_buffer_aq(hw, offset, words, data); else ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data); -#else - ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data); -#endif return ret_code; } @@ -375,7 +363,6 @@ enum i40e_status_code i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset, { enum i40e_status_code ret_code = I40E_SUCCESS; -#ifdef X722_SUPPORT if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE) { ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ); if (!ret_code) { @@ -386,9 +373,6 @@ enum i40e_status_code i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset, } else { ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data); } -#else - ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data); -#endif return ret_code; } @@ -901,9 +885,20 @@ enum i40e_status_code i40e_nvmupd_command(struct i40e_hw *hw, *((u16 *)&bytes[2]) = hw->nvm_wait_opcode; } + /* Clear error status on read */ + if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; + return I40E_SUCCESS; } + /* Clear status even it is not read and log */ + if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) { + i40e_debug(hw, I40E_DEBUG_NVM, + "Clearing I40E_NVMUPD_STATE_ERROR state without reading\n"); + hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; + } + switch (hw->nvmupd_state) { case I40E_NVMUPD_STATE_INIT: status = i40e_nvmupd_state_init(hw, cmd, bytes, perrno); @@ -1253,6 +1248,7 @@ retry: void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode) { if (opcode == hw->nvm_wait_opcode) { + i40e_debug(hw, I40E_DEBUG_NVM, "NVMUPD: clearing wait on opcode 0x%04x\n", opcode); if (hw->nvm_release_on_done) { @@ -1261,6 +1257,11 @@ void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode) } hw->nvm_wait_opcode = 0; + if (hw->aq.arq_last_status) { + hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR; + return; + } + switch (hw->nvmupd_state) { case I40E_NVMUPD_STATE_INIT_WAIT: hw->nvmupd_state = I40E_NVMUPD_STATE_INIT; @@ -1423,7 +1424,8 @@ STATIC enum i40e_status_code i40e_nvmupd_exec_aq(struct i40e_hw *hw, if (hw->nvm_buff.va) { buff = hw->nvm_buff.va; - memcpy(buff, &bytes[aq_desc_len], aq_data_len); + i40e_memcpy(buff, &bytes[aq_desc_len], aq_data_len, + I40E_NONDMA_TO_NONDMA); } } @@ -1496,7 +1498,7 @@ STATIC enum i40e_status_code i40e_nvmupd_get_aq_result(struct i40e_hw *hw, __func__, cmd->offset, cmd->offset + len); buff = ((u8 *)&hw->nvm_wb_desc) + cmd->offset; - memcpy(bytes, buff, len); + i40e_memcpy(bytes, buff, len, I40E_NONDMA_TO_NONDMA); bytes += len; remainder -= len; @@ -1510,7 +1512,7 @@ STATIC enum i40e_status_code i40e_nvmupd_get_aq_result(struct i40e_hw *hw, i40e_debug(hw, I40E_DEBUG_NVM, "%s: databuf bytes %d to %d\n", __func__, start_byte, start_byte + remainder); - memcpy(bytes, buff, remainder); + i40e_memcpy(bytes, buff, remainder, I40E_NONDMA_TO_NONDMA); } return I40E_SUCCESS; diff --git a/src/dpdk/drivers/net/i40e/base/i40e_osdep.h b/src/dpdk/drivers/net/i40e/base/i40e_osdep.h index 38e7ba5b..c57ecded 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_osdep.h +++ b/src/dpdk/drivers/net/i40e/base/i40e_osdep.h @@ -44,6 +44,7 @@ #include <rte_cycles.h> #include <rte_spinlock.h> #include <rte_log.h> +#include <rte_io.h> #include "../i40e_logs.h" @@ -153,15 +154,18 @@ do { \ * I40E_PRTQF_FD_MSK */ -#define I40E_PCI_REG(reg) (*((volatile uint32_t *)(reg))) +#define I40E_PCI_REG(reg) rte_read32(reg) #define I40E_PCI_REG_ADDR(a, reg) \ ((volatile uint32_t *)((char *)(a)->hw_addr + (reg))) static inline uint32_t i40e_read_addr(volatile void *addr) { return rte_le_to_cpu_32(I40E_PCI_REG(addr)); } -#define I40E_PCI_REG_WRITE(reg, value) \ - do { I40E_PCI_REG((reg)) = rte_cpu_to_le_32(value); } while (0) + +#define I40E_PCI_REG_WRITE(reg, value) \ + rte_write32((rte_cpu_to_le_32(value)), reg) +#define I40E_PCI_REG_WRITE_RELAXED(reg, value) \ + rte_write32_relaxed((rte_cpu_to_le_32(value)), reg) #define I40E_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_GLGEN_STAT) #define I40EVF_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_VFGEN_RSTAT) diff --git a/src/dpdk/drivers/net/i40e/base/i40e_prototype.h b/src/dpdk/drivers/net/i40e/base/i40e_prototype.h index 03dda937..109d3c56 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_prototype.h +++ b/src/dpdk/drivers/net/i40e/base/i40e_prototype.h @@ -78,7 +78,6 @@ void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void i40e_idle_aq(struct i40e_hw *hw); bool i40e_check_asq_alive(struct i40e_hw *hw); enum i40e_status_code i40e_aq_queue_shutdown(struct i40e_hw *hw, bool unloading); -#ifdef X722_SUPPORT enum i40e_status_code i40e_aq_get_rss_lut(struct i40e_hw *hw, u16 seid, bool pf_lut, u8 *lut, u16 lut_size); @@ -90,11 +89,8 @@ enum i40e_status_code i40e_aq_get_rss_key(struct i40e_hw *hw, enum i40e_status_code i40e_aq_set_rss_key(struct i40e_hw *hw, u16 seid, struct i40e_aqc_get_set_rss_key_data *key); -#endif -#ifndef I40E_NDIS_SUPPORT const char *i40e_aq_str(struct i40e_hw *hw, enum i40e_admin_queue_err aq_err); const char *i40e_stat_str(struct i40e_hw *hw, enum i40e_status_code stat_err); -#endif /* I40E_NDIS_SUPPORT */ #ifdef PF_DRIVER @@ -124,6 +120,8 @@ enum i40e_status_code i40e_aq_set_phy_debug(struct i40e_hw *hw, u8 cmd_flags, struct i40e_asq_cmd_details *cmd_details); enum i40e_status_code i40e_aq_set_default_vsi(struct i40e_hw *hw, u16 vsi_id, struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code i40e_aq_clear_default_vsi(struct i40e_hw *hw, u16 vsi_id, + struct i40e_asq_cmd_details *cmd_details); enum i40e_status_code i40e_aq_get_phy_capabilities(struct i40e_hw *hw, bool qualified_modules, bool report_init, struct i40e_aq_get_phy_abilities_resp *abilities, @@ -170,12 +168,18 @@ enum i40e_status_code i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw, bool rx_only_promisc); enum i40e_status_code i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw, u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code i40e_aq_set_vsi_full_promiscuous(struct i40e_hw *hw, + u16 seid, bool set, + struct i40e_asq_cmd_details *cmd_details); enum i40e_status_code i40e_aq_set_vsi_mc_promisc_on_vlan(struct i40e_hw *hw, u16 seid, bool enable, u16 vid, struct i40e_asq_cmd_details *cmd_details); enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw, u16 seid, bool enable, u16 vid, struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw, + u16 seid, bool enable, u16 vid, + struct i40e_asq_cmd_details *cmd_details); enum i40e_status_code i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw, u16 seid, bool enable, struct i40e_asq_cmd_details *cmd_details); @@ -438,6 +442,7 @@ enum i40e_status_code i40e_get_port_mac_addr(struct i40e_hw *hw, u8 *mac_addr); enum i40e_status_code i40e_read_pba_string(struct i40e_hw *hw, u8 *pba_num, u32 pba_num_size); void i40e_pre_tx_queue_cfg(struct i40e_hw *hw, u32 queue, bool enable); +enum i40e_status_code i40e_get_san_mac_addr(struct i40e_hw *hw, u8 *mac_addr); enum i40e_aq_link_speed i40e_get_link_speed(struct i40e_hw *hw); /* prototype for functions used for NVM access */ enum i40e_status_code i40e_init_nvm(struct i40e_hw *hw); @@ -518,7 +523,6 @@ enum i40e_status_code i40e_aq_rx_ctl_write_register(struct i40e_hw *hw, u32 reg_addr, u32 reg_val, struct i40e_asq_cmd_details *cmd_details); void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val); -#ifdef X722_SUPPORT enum i40e_status_code i40e_aq_set_arp_proxy_config(struct i40e_hw *hw, struct i40e_aqc_arp_proxy_data *proxy_config, struct i40e_asq_cmd_details *cmd_details); @@ -534,11 +538,20 @@ enum i40e_status_code i40e_aq_set_clear_wol_filter(struct i40e_hw *hw, enum i40e_status_code i40e_aq_get_wake_event_reason(struct i40e_hw *hw, u16 *wake_reason, struct i40e_asq_cmd_details *cmd_details); -#endif -enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw, u8 page, - u16 reg, u8 phy_addr, u16 *value); -enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw, u8 page, - u16 reg, u8 phy_addr, u16 value); +enum i40e_status_code i40e_aq_clear_all_wol_filters(struct i40e_hw *hw, + struct i40e_asq_cmd_details *cmd_details); +enum i40e_status_code i40e_read_phy_register_clause22(struct i40e_hw *hw, + u16 reg, u8 phy_addr, u16 *value); +enum i40e_status_code i40e_write_phy_register_clause22(struct i40e_hw *hw, + u16 reg, u8 phy_addr, u16 value); +enum i40e_status_code i40e_read_phy_register_clause45(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 *value); +enum i40e_status_code i40e_write_phy_register_clause45(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 value); +enum i40e_status_code i40e_read_phy_register(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 *value); +enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw, + u8 page, u16 reg, u8 phy_addr, u16 value); u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num); enum i40e_status_code i40e_blink_phy_link_led(struct i40e_hw *hw, u32 time, u32 interval); diff --git a/src/dpdk/drivers/net/i40e/base/i40e_register.h b/src/dpdk/drivers/net/i40e/base/i40e_register.h index fd0a7230..3a305b67 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_register.h +++ b/src/dpdk/drivers/net/i40e/base/i40e_register.h @@ -3401,7 +3401,6 @@ POSSIBILITY OF SUCH DAMAGE. #define I40E_VFQF_HREGION_OVERRIDE_ENA_7_MASK I40E_MASK(0x1, I40E_VFQF_HREGION_OVERRIDE_ENA_7_SHIFT) #define I40E_VFQF_HREGION_REGION_7_SHIFT 29 #define I40E_VFQF_HREGION_REGION_7_MASK I40E_MASK(0x7, I40E_VFQF_HREGION_REGION_7_SHIFT) -#ifdef X722_SUPPORT #ifdef PF_DRIVER #define I40E_MNGSB_FDCRC 0x000B7050 /* Reset: POR */ @@ -5366,5 +5365,4 @@ POSSIBILITY OF SUCH DAMAGE. #define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT 20 #define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_MASK I40E_MASK(0xFFF, I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT) -#endif /* X722_SUPPORT */ #endif /* _I40E_REGISTER_H_ */ diff --git a/src/dpdk/drivers/net/i40e/base/i40e_type.h b/src/dpdk/drivers/net/i40e/base/i40e_type.h index 5349419f..590d97c7 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_type.h +++ b/src/dpdk/drivers/net/i40e/base/i40e_type.h @@ -157,13 +157,22 @@ enum i40e_debug_mask { #define I40E_PCI_LINK_SPEED_5000 0x2 #define I40E_PCI_LINK_SPEED_8000 0x3 -#define I40E_MDIO_STCODE 0 -#define I40E_MDIO_OPCODE_ADDRESS 0 -#define I40E_MDIO_OPCODE_WRITE I40E_MASK(1, \ +#define I40E_MDIO_CLAUSE22_STCODE_MASK I40E_MASK(1, \ + I40E_GLGEN_MSCA_STCODE_SHIFT) +#define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK I40E_MASK(1, \ I40E_GLGEN_MSCA_OPCODE_SHIFT) -#define I40E_MDIO_OPCODE_READ_INC_ADDR I40E_MASK(2, \ +#define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK I40E_MASK(2, \ I40E_GLGEN_MSCA_OPCODE_SHIFT) -#define I40E_MDIO_OPCODE_READ I40E_MASK(3, \ + +#define I40E_MDIO_CLAUSE45_STCODE_MASK I40E_MASK(0, \ + I40E_GLGEN_MSCA_STCODE_SHIFT) +#define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK I40E_MASK(0, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) +#define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK I40E_MASK(1, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) +#define I40E_MDIO_CLAUSE45_OPCODE_READ_INC_ADDR_MASK I40E_MASK(2, \ + I40E_GLGEN_MSCA_OPCODE_SHIFT) +#define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK I40E_MASK(3, \ I40E_GLGEN_MSCA_OPCODE_SHIFT) #define I40E_PHY_COM_REG_PAGE 0x1E @@ -187,9 +196,7 @@ enum i40e_memcpy_type { I40E_DMA_TO_NONDMA }; -#ifdef X722_SUPPORT #define I40E_FW_API_VERSION_MINOR_X722 0x0005 -#endif #define I40E_FW_API_VERSION_MINOR_X710 0x0005 @@ -203,13 +210,10 @@ enum i40e_memcpy_type { */ enum i40e_mac_type { I40E_MAC_UNKNOWN = 0, - I40E_MAC_X710, I40E_MAC_XL710, I40E_MAC_VF, -#ifdef X722_SUPPORT I40E_MAC_X722, I40E_MAC_X722_VF, -#endif I40E_MAC_GENERIC, }; @@ -264,6 +268,7 @@ struct i40e_link_status { enum i40e_aq_link_speed link_speed; u8 link_info; u8 an_info; + u8 fec_info; u8 ext_info; u8 loopback; /* is Link Status Event notification to SW enabled */ @@ -292,61 +297,73 @@ struct i40e_link_status { #define I40E_MODULE_TYPE_1000BASE_T 0x08 }; -enum i40e_aq_capabilities_phy_type { - I40E_CAP_PHY_TYPE_SGMII = BIT(I40E_PHY_TYPE_SGMII), - I40E_CAP_PHY_TYPE_1000BASE_KX = BIT(I40E_PHY_TYPE_1000BASE_KX), - I40E_CAP_PHY_TYPE_10GBASE_KX4 = BIT(I40E_PHY_TYPE_10GBASE_KX4), - I40E_CAP_PHY_TYPE_10GBASE_KR = BIT(I40E_PHY_TYPE_10GBASE_KR), - I40E_CAP_PHY_TYPE_40GBASE_KR4 = BIT(I40E_PHY_TYPE_40GBASE_KR4), - I40E_CAP_PHY_TYPE_XAUI = BIT(I40E_PHY_TYPE_XAUI), - I40E_CAP_PHY_TYPE_XFI = BIT(I40E_PHY_TYPE_XFI), - I40E_CAP_PHY_TYPE_SFI = BIT(I40E_PHY_TYPE_SFI), - I40E_CAP_PHY_TYPE_XLAUI = BIT(I40E_PHY_TYPE_XLAUI), - I40E_CAP_PHY_TYPE_XLPPI = BIT(I40E_PHY_TYPE_XLPPI), - I40E_CAP_PHY_TYPE_40GBASE_CR4_CU = BIT(I40E_PHY_TYPE_40GBASE_CR4_CU), - I40E_CAP_PHY_TYPE_10GBASE_CR1_CU = BIT(I40E_PHY_TYPE_10GBASE_CR1_CU), - I40E_CAP_PHY_TYPE_10GBASE_AOC = BIT(I40E_PHY_TYPE_10GBASE_AOC), - I40E_CAP_PHY_TYPE_40GBASE_AOC = BIT(I40E_PHY_TYPE_40GBASE_AOC), - I40E_CAP_PHY_TYPE_100BASE_TX = BIT(I40E_PHY_TYPE_100BASE_TX), - I40E_CAP_PHY_TYPE_1000BASE_T = BIT(I40E_PHY_TYPE_1000BASE_T), - I40E_CAP_PHY_TYPE_10GBASE_T = BIT(I40E_PHY_TYPE_10GBASE_T), - I40E_CAP_PHY_TYPE_10GBASE_SR = BIT(I40E_PHY_TYPE_10GBASE_SR), - I40E_CAP_PHY_TYPE_10GBASE_LR = BIT(I40E_PHY_TYPE_10GBASE_LR), - I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU = BIT(I40E_PHY_TYPE_10GBASE_SFPP_CU), - I40E_CAP_PHY_TYPE_10GBASE_CR1 = BIT(I40E_PHY_TYPE_10GBASE_CR1), - I40E_CAP_PHY_TYPE_40GBASE_CR4 = BIT(I40E_PHY_TYPE_40GBASE_CR4), - I40E_CAP_PHY_TYPE_40GBASE_SR4 = BIT(I40E_PHY_TYPE_40GBASE_SR4), - I40E_CAP_PHY_TYPE_40GBASE_LR4 = BIT(I40E_PHY_TYPE_40GBASE_LR4), - I40E_CAP_PHY_TYPE_1000BASE_SX = BIT(I40E_PHY_TYPE_1000BASE_SX), - I40E_CAP_PHY_TYPE_1000BASE_LX = BIT(I40E_PHY_TYPE_1000BASE_LX), - I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL = BIT(I40E_PHY_TYPE_1000BASE_T_OPTICAL), - I40E_CAP_PHY_TYPE_20GBASE_KR2 = BIT(I40E_PHY_TYPE_20GBASE_KR2) -}; - struct i40e_phy_info { struct i40e_link_status link_info; struct i40e_link_status link_info_old; bool get_link_info; enum i40e_media_type media_type; /* all the phy types the NVM is capable of */ - u32 phy_types; -}; - + u64 phy_types; +}; + +#define I40E_CAP_PHY_TYPE_SGMII BIT_ULL(I40E_PHY_TYPE_SGMII) +#define I40E_CAP_PHY_TYPE_1000BASE_KX BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) +#define I40E_CAP_PHY_TYPE_10GBASE_KX4 BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) +#define I40E_CAP_PHY_TYPE_10GBASE_KR BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) +#define I40E_CAP_PHY_TYPE_40GBASE_KR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) +#define I40E_CAP_PHY_TYPE_XAUI BIT_ULL(I40E_PHY_TYPE_XAUI) +#define I40E_CAP_PHY_TYPE_XFI BIT_ULL(I40E_PHY_TYPE_XFI) +#define I40E_CAP_PHY_TYPE_SFI BIT_ULL(I40E_PHY_TYPE_SFI) +#define I40E_CAP_PHY_TYPE_XLAUI BIT_ULL(I40E_PHY_TYPE_XLAUI) +#define I40E_CAP_PHY_TYPE_XLPPI BIT_ULL(I40E_PHY_TYPE_XLPPI) +#define I40E_CAP_PHY_TYPE_40GBASE_CR4_CU BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) +#define I40E_CAP_PHY_TYPE_10GBASE_CR1_CU BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) +#define I40E_CAP_PHY_TYPE_10GBASE_AOC BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) +#define I40E_CAP_PHY_TYPE_40GBASE_AOC BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) +#define I40E_CAP_PHY_TYPE_100BASE_TX BIT_ULL(I40E_PHY_TYPE_100BASE_TX) +#define I40E_CAP_PHY_TYPE_1000BASE_T BIT_ULL(I40E_PHY_TYPE_1000BASE_T) +#define I40E_CAP_PHY_TYPE_10GBASE_T BIT_ULL(I40E_PHY_TYPE_10GBASE_T) +#define I40E_CAP_PHY_TYPE_10GBASE_SR BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) +#define I40E_CAP_PHY_TYPE_10GBASE_LR BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) +#define I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) +#define I40E_CAP_PHY_TYPE_10GBASE_CR1 BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) +#define I40E_CAP_PHY_TYPE_40GBASE_CR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) +#define I40E_CAP_PHY_TYPE_40GBASE_SR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) +#define I40E_CAP_PHY_TYPE_40GBASE_LR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) +#define I40E_CAP_PHY_TYPE_1000BASE_SX BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) +#define I40E_CAP_PHY_TYPE_1000BASE_LX BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) +#define I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL \ + BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) +#define I40E_CAP_PHY_TYPE_20GBASE_KR2 BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) +/* + * Defining the macro I40E_TYPE_OFFSET to implement a bit shift for some + * PHY types. There is an unused bit (31) in the I40E_CAP_PHY_TYPE_* bit + * fields but no corresponding gap in the i40e_aq_phy_type enumeration. So, + * a shift is needed to adjust for this with values larger than 31. The + * only affected values are I40E_PHY_TYPE_25GBASE_*. + */ +#define I40E_PHY_TYPE_OFFSET 1 +#define I40E_CAP_PHY_TYPE_25GBASE_KR BIT_ULL(I40E_PHY_TYPE_25GBASE_KR + \ + I40E_PHY_TYPE_OFFSET) +#define I40E_CAP_PHY_TYPE_25GBASE_CR BIT_ULL(I40E_PHY_TYPE_25GBASE_CR + \ + I40E_PHY_TYPE_OFFSET) +#define I40E_CAP_PHY_TYPE_25GBASE_SR BIT_ULL(I40E_PHY_TYPE_25GBASE_SR + \ + I40E_PHY_TYPE_OFFSET) +#define I40E_CAP_PHY_TYPE_25GBASE_LR BIT_ULL(I40E_PHY_TYPE_25GBASE_LR + \ + I40E_PHY_TYPE_OFFSET) #define I40E_HW_CAP_MAX_GPIO 30 #define I40E_HW_CAP_MDIO_PORT_MODE_MDIO 0 #define I40E_HW_CAP_MDIO_PORT_MODE_I2C 1 -#ifdef X722_SUPPORT enum i40e_acpi_programming_method { I40E_ACPI_PROGRAMMING_METHOD_HW_FVL = 0, I40E_ACPI_PROGRAMMING_METHOD_AQC_FPK = 1 }; -#define I40E_WOL_SUPPORT_MASK 1 -#define I40E_ACPI_PROGRAMMING_METHOD_MASK (1 << 1) -#define I40E_PROXY_SUPPORT_MASK (1 << 2) +#define I40E_WOL_SUPPORT_MASK 0x1 +#define I40E_ACPI_PROGRAMMING_METHOD_MASK 0x2 +#define I40E_PROXY_SUPPORT_MASK 0x4 -#endif /* Capabilities of a PF or a VF or the whole device */ struct i40e_hw_capabilities { u32 switch_mode; @@ -355,6 +372,10 @@ struct i40e_hw_capabilities { #define I40E_NVM_IMAGE_TYPE_UDP_CLOUD 0x3 u32 management_mode; + u32 mng_protocols_over_mctp; +#define I40E_MNG_PROTOCOL_PLDM 0x2 +#define I40E_MNG_PROTOCOL_OEM_COMMANDS 0x4 +#define I40E_MNG_PROTOCOL_NCSI 0x8 u32 npar_enable; u32 os2bmc; u32 valid_functions; @@ -410,11 +431,9 @@ struct i40e_hw_capabilities { u32 enabled_tcmap; u32 maxtc; u64 wr_csr_prot; -#ifdef X722_SUPPORT bool apm_wol_support; enum i40e_acpi_programming_method acpi_prog_method; bool proxy_support; -#endif }; struct i40e_mac_info { @@ -472,6 +491,7 @@ enum i40e_nvmupd_state { I40E_NVMUPD_STATE_WRITING, I40E_NVMUPD_STATE_INIT_WAIT, I40E_NVMUPD_STATE_WRITE_WAIT, + I40E_NVMUPD_STATE_ERROR }; /* nvm_access definition and its masks/shifts need to be accessible to @@ -550,6 +570,7 @@ struct i40e_bus_info { u16 func; u16 device; u16 lan_id; + u16 bus_id; }; /* Flow control (FC) parameters */ @@ -674,30 +695,22 @@ struct i40e_hw { struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */ struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */ -#ifdef X722_SUPPORT /* WoL and proxy support */ u16 num_wol_proxy_filters; u16 wol_proxy_vsi_seid; -#endif #define I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE BIT_ULL(0) u64 flags; /* debug mask */ u32 debug_mask; -#ifndef I40E_NDIS_SUPPORT char err_str[16]; -#endif /* I40E_NDIS_SUPPORT */ }; STATIC INLINE bool i40e_is_vf(struct i40e_hw *hw) { -#ifdef X722_SUPPORT return (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF); -#else - return hw->mac.type == I40E_MAC_VF; -#endif } struct i40e_driver_version { @@ -801,11 +814,7 @@ enum i40e_rx_desc_status_bits { I40E_RX_DESC_STATUS_CRCP_SHIFT = 4, I40E_RX_DESC_STATUS_TSYNINDX_SHIFT = 5, /* 2 BITS */ I40E_RX_DESC_STATUS_TSYNVALID_SHIFT = 7, -#ifdef X722_SUPPORT I40E_RX_DESC_STATUS_EXT_UDP_0_SHIFT = 8, -#else - I40E_RX_DESC_STATUS_RESERVED1_SHIFT = 8, -#endif I40E_RX_DESC_STATUS_UMBCAST_SHIFT = 9, /* 2 BITS */ I40E_RX_DESC_STATUS_FLM_SHIFT = 11, @@ -813,11 +822,7 @@ enum i40e_rx_desc_status_bits { I40E_RX_DESC_STATUS_LPBK_SHIFT = 14, I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT = 15, I40E_RX_DESC_STATUS_RESERVED2_SHIFT = 16, /* 2 BITS */ -#ifdef X722_SUPPORT I40E_RX_DESC_STATUS_INT_UDP_0_SHIFT = 18, -#else - I40E_RX_DESC_STATUS_UDP_0_SHIFT = 18, -#endif I40E_RX_DESC_STATUS_LAST /* this entry must be last!!! */ }; @@ -1195,10 +1200,8 @@ enum i40e_tx_ctx_desc_eipt_offload { #define I40E_TXD_CTX_QW0_DECTTL_MASK (0xFULL << \ I40E_TXD_CTX_QW0_DECTTL_SHIFT) -#ifdef X722_SUPPORT #define I40E_TXD_CTX_QW0_L4T_CS_SHIFT 23 #define I40E_TXD_CTX_QW0_L4T_CS_MASK BIT_ULL(I40E_TXD_CTX_QW0_L4T_CS_SHIFT) -#endif struct i40e_nop_desc { __le64 rsvd; __le64 dtype_cmd; @@ -1235,38 +1238,24 @@ struct i40e_filter_program_desc { /* Packet Classifier Types for filters */ enum i40e_filter_pctype { -#ifdef X722_SUPPORT /* Note: Values 0-28 are reserved for future use. * Value 29, 30, 32 are not supported on XL710 and X710. */ I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP = 29, I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP = 30, -#else - /* Note: Values 0-30 are reserved for future use */ -#endif I40E_FILTER_PCTYPE_NONF_IPV4_UDP = 31, -#ifdef X722_SUPPORT I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK = 32, -#else - /* Note: Value 32 is reserved for future use */ -#endif I40E_FILTER_PCTYPE_NONF_IPV4_TCP = 33, I40E_FILTER_PCTYPE_NONF_IPV4_SCTP = 34, I40E_FILTER_PCTYPE_NONF_IPV4_OTHER = 35, I40E_FILTER_PCTYPE_FRAG_IPV4 = 36, -#ifdef X722_SUPPORT /* Note: Values 37-38 are reserved for future use. * Value 39, 40, 42 are not supported on XL710 and X710. */ I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP = 39, I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP = 40, -#else - /* Note: Values 37-40 are reserved for future use */ -#endif I40E_FILTER_PCTYPE_NONF_IPV6_UDP = 41, -#ifdef X722_SUPPORT I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK = 42, -#endif I40E_FILTER_PCTYPE_NONF_IPV6_TCP = 43, I40E_FILTER_PCTYPE_NONF_IPV6_SCTP = 44, I40E_FILTER_PCTYPE_NONF_IPV6_OTHER = 45, @@ -1321,12 +1310,10 @@ enum i40e_filter_program_desc_pcmd { I40E_TXD_FLTR_QW1_CMD_SHIFT) #define I40E_TXD_FLTR_QW1_FD_STATUS_MASK (0x3ULL << \ I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) -#ifdef X722_SUPPORT #define I40E_TXD_FLTR_QW1_ATR_SHIFT (0xEULL + \ I40E_TXD_FLTR_QW1_CMD_SHIFT) #define I40E_TXD_FLTR_QW1_ATR_MASK BIT_ULL(I40E_TXD_FLTR_QW1_ATR_SHIFT) -#endif #define I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT 20 #define I40E_TXD_FLTR_QW1_CNTINDEX_MASK (0x1FFUL << \ @@ -1388,6 +1375,23 @@ struct i40e_veb_tc_stats { u64 tc_tx_bytes[I40E_MAX_TRAFFIC_CLASS]; }; +/* Statistics collected per function for FCoE */ +struct i40e_fcoe_stats { + u64 rx_fcoe_packets; /* fcoeprc */ + u64 rx_fcoe_dwords; /* focedwrc */ + u64 rx_fcoe_dropped; /* fcoerpdc */ + u64 tx_fcoe_packets; /* fcoeptc */ + u64 tx_fcoe_dwords; /* focedwtc */ + u64 fcoe_bad_fccrc; /* fcoecrc */ + u64 fcoe_last_error; /* fcoelast */ + u64 fcoe_ddp_count; /* fcoeddpc */ +}; + +/* offset to per function FCoE statistics block */ +#define I40E_FCOE_VF_STAT_OFFSET 0 +#define I40E_FCOE_PF_STAT_OFFSET 128 +#define I40E_FCOE_STAT_MAX (I40E_FCOE_PF_STAT_OFFSET + I40E_MAX_PF) + /* Statistics collected by the MAC */ struct i40e_hw_port_stats { /* eth stats collected by the port */ @@ -1481,6 +1485,7 @@ struct i40e_hw_port_stats { #define I40E_SR_EMPR_REGS_AUTO_LOAD_PTR 0x3A #define I40E_SR_GLOBR_REGS_AUTO_LOAD_PTR 0x3B #define I40E_SR_CORER_REGS_AUTO_LOAD_PTR 0x3C +#define I40E_SR_PHY_ACTIVITY_LIST_PTR 0x3D #define I40E_SR_PCIE_ALT_AUTO_LOAD_PTR 0x3E #define I40E_SR_SW_CHECKSUM_WORD 0x3F #define I40E_SR_1ST_FREE_PROVISION_AREA_PTR 0x40 @@ -1509,6 +1514,208 @@ struct i40e_hw_port_stats { #define I40E_SRRD_SRCTL_ATTEMPTS 100000 +/* FCoE Tx context descriptor - Use the i40e_tx_context_desc struct */ + +enum i40E_fcoe_tx_ctx_desc_cmd_bits { + I40E_FCOE_TX_CTX_DESC_OPCODE_SINGLE_SEND = 0x00, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS2 = 0x01, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_TSO_FC_CLASS3 = 0x05, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_ETSO_FC_CLASS2 = 0x02, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_ETSO_FC_CLASS3 = 0x06, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_DWO_FC_CLASS2 = 0x03, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_DWO_FC_CLASS3 = 0x07, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_DDP_CTX_INVL = 0x08, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_OPCODE_DWO_CTX_INVL = 0x09, /* 4 BITS */ + I40E_FCOE_TX_CTX_DESC_RELOFF = 0x10, + I40E_FCOE_TX_CTX_DESC_CLRSEQ = 0x20, + I40E_FCOE_TX_CTX_DESC_DIFENA = 0x40, + I40E_FCOE_TX_CTX_DESC_IL2TAG2 = 0x80 +}; + +/* FCoE DIF/DIX Context descriptor */ +struct i40e_fcoe_difdix_context_desc { + __le64 flags_buff0_buff1_ref; + __le64 difapp_msk_bias; +}; + +#define I40E_FCOE_DIFDIX_CTX_QW0_FLAGS_SHIFT 0 +#define I40E_FCOE_DIFDIX_CTX_QW0_FLAGS_MASK (0xFFFULL << \ + I40E_FCOE_DIFDIX_CTX_QW0_FLAGS_SHIFT) + +enum i40e_fcoe_difdix_ctx_desc_flags_bits { + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_RSVD = 0x0000, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_APPTYPE_TAGCHK = 0x0000, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_APPTYPE_TAGNOTCHK = 0x0004, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_GTYPE_OPAQUE = 0x0000, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_GTYPE_CHKINTEGRITY = 0x0008, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_GTYPE_CHKINTEGRITY_APPTAG = 0x0010, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_GTYPE_CHKINTEGRITY_APPREFTAG = 0x0018, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_REFTYPE_CNST = 0x0000, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_REFTYPE_INC1BLK = 0x0020, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_REFTYPE_APPTAG = 0x0040, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_REFTYPE_RSVD = 0x0060, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_DIXMODE_XSUM = 0x0000, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_DIXMODE_CRC = 0x0080, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFHOST_UNTAG = 0x0000, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFHOST_BUF = 0x0100, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFHOST_RSVD = 0x0200, + /* 2 BITS */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFHOST_EMBDTAGS = 0x0300, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFLAN_UNTAG = 0x0000, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFLAN_TAG = 0x0400, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFBLK_512B = 0x0000, + /* 1 BIT */ + I40E_FCOE_DIFDIX_CTX_DESC_DIFBLK_4K = 0x0800 +}; + +#define I40E_FCOE_DIFDIX_CTX_QW0_BUFF0_SHIFT 12 +#define I40E_FCOE_DIFDIX_CTX_QW0_BUFF0_MASK (0x3FFULL << \ + I40E_FCOE_DIFDIX_CTX_QW0_BUFF0_SHIFT) + +#define I40E_FCOE_DIFDIX_CTX_QW0_BUFF1_SHIFT 22 +#define I40E_FCOE_DIFDIX_CTX_QW0_BUFF1_MASK (0x3FFULL << \ + I40E_FCOE_DIFDIX_CTX_QW0_BUFF1_SHIFT) + +#define I40E_FCOE_DIFDIX_CTX_QW0_REF_SHIFT 32 +#define I40E_FCOE_DIFDIX_CTX_QW0_REF_MASK (0xFFFFFFFFULL << \ + I40E_FCOE_DIFDIX_CTX_QW0_REF_SHIFT) + +#define I40E_FCOE_DIFDIX_CTX_QW1_APP_SHIFT 0 +#define I40E_FCOE_DIFDIX_CTX_QW1_APP_MASK (0xFFFFULL << \ + I40E_FCOE_DIFDIX_CTX_QW1_APP_SHIFT) + +#define I40E_FCOE_DIFDIX_CTX_QW1_APP_MSK_SHIFT 16 +#define I40E_FCOE_DIFDIX_CTX_QW1_APP_MSK_MASK (0xFFFFULL << \ + I40E_FCOE_DIFDIX_CTX_QW1_APP_MSK_SHIFT) + +#define I40E_FCOE_DIFDIX_CTX_QW1_REF_BIAS_SHIFT 32 +#define I40E_FCOE_DIFDIX_CTX_QW0_REF_BIAS_MASK (0xFFFFFFFFULL << \ + I40E_FCOE_DIFDIX_CTX_QW1_REF_BIAS_SHIFT) + +/* FCoE DIF/DIX Buffers descriptor */ +struct i40e_fcoe_difdix_buffers_desc { + __le64 buff_addr0; + __le64 buff_addr1; +}; + +/* FCoE DDP Context descriptor */ +struct i40e_fcoe_ddp_context_desc { + __le64 rsvd; + __le64 type_cmd_foff_lsize; +}; + +#define I40E_FCOE_DDP_CTX_QW1_DTYPE_SHIFT 0 +#define I40E_FCOE_DDP_CTX_QW1_DTYPE_MASK (0xFULL << \ + I40E_FCOE_DDP_CTX_QW1_DTYPE_SHIFT) + +#define I40E_FCOE_DDP_CTX_QW1_CMD_SHIFT 4 +#define I40E_FCOE_DDP_CTX_QW1_CMD_MASK (0xFULL << \ + I40E_FCOE_DDP_CTX_QW1_CMD_SHIFT) + +enum i40e_fcoe_ddp_ctx_desc_cmd_bits { + I40E_FCOE_DDP_CTX_DESC_BSIZE_512B = 0x00, /* 2 BITS */ + I40E_FCOE_DDP_CTX_DESC_BSIZE_4K = 0x01, /* 2 BITS */ + I40E_FCOE_DDP_CTX_DESC_BSIZE_8K = 0x02, /* 2 BITS */ + I40E_FCOE_DDP_CTX_DESC_BSIZE_16K = 0x03, /* 2 BITS */ + I40E_FCOE_DDP_CTX_DESC_DIFENA = 0x04, /* 1 BIT */ + I40E_FCOE_DDP_CTX_DESC_LASTSEQH = 0x08, /* 1 BIT */ +}; + +#define I40E_FCOE_DDP_CTX_QW1_FOFF_SHIFT 16 +#define I40E_FCOE_DDP_CTX_QW1_FOFF_MASK (0x3FFFULL << \ + I40E_FCOE_DDP_CTX_QW1_FOFF_SHIFT) + +#define I40E_FCOE_DDP_CTX_QW1_LSIZE_SHIFT 32 +#define I40E_FCOE_DDP_CTX_QW1_LSIZE_MASK (0x3FFFULL << \ + I40E_FCOE_DDP_CTX_QW1_LSIZE_SHIFT) + +/* FCoE DDP/DWO Queue Context descriptor */ +struct i40e_fcoe_queue_context_desc { + __le64 dmaindx_fbase; /* 0:11 DMAINDX, 12:63 FBASE */ + __le64 flen_tph; /* 0:12 FLEN, 13:15 TPH */ +}; + +#define I40E_FCOE_QUEUE_CTX_QW0_DMAINDX_SHIFT 0 +#define I40E_FCOE_QUEUE_CTX_QW0_DMAINDX_MASK (0xFFFULL << \ + I40E_FCOE_QUEUE_CTX_QW0_DMAINDX_SHIFT) + +#define I40E_FCOE_QUEUE_CTX_QW0_FBASE_SHIFT 12 +#define I40E_FCOE_QUEUE_CTX_QW0_FBASE_MASK (0xFFFFFFFFFFFFFULL << \ + I40E_FCOE_QUEUE_CTX_QW0_FBASE_SHIFT) + +#define I40E_FCOE_QUEUE_CTX_QW1_FLEN_SHIFT 0 +#define I40E_FCOE_QUEUE_CTX_QW1_FLEN_MASK (0x1FFFULL << \ + I40E_FCOE_QUEUE_CTX_QW1_FLEN_SHIFT) + +#define I40E_FCOE_QUEUE_CTX_QW1_TPH_SHIFT 13 +#define I40E_FCOE_QUEUE_CTX_QW1_TPH_MASK (0x7ULL << \ + I40E_FCOE_QUEUE_CTX_QW1_FLEN_SHIFT) + +enum i40e_fcoe_queue_ctx_desc_tph_bits { + I40E_FCOE_QUEUE_CTX_DESC_TPHRDESC = 0x1, + I40E_FCOE_QUEUE_CTX_DESC_TPHDATA = 0x2 +}; + +#define I40E_FCOE_QUEUE_CTX_QW1_RECIPE_SHIFT 30 +#define I40E_FCOE_QUEUE_CTX_QW1_RECIPE_MASK (0x3ULL << \ + I40E_FCOE_QUEUE_CTX_QW1_RECIPE_SHIFT) + +/* FCoE DDP/DWO Filter Context descriptor */ +struct i40e_fcoe_filter_context_desc { + __le32 param; + __le16 seqn; + + /* 48:51(0:3) RSVD, 52:63(4:15) DMAINDX */ + __le16 rsvd_dmaindx; + + /* 0:7 FLAGS, 8:52 RSVD, 53:63 LANQ */ + __le64 flags_rsvd_lanq; +}; + +#define I40E_FCOE_FILTER_CTX_QW0_DMAINDX_SHIFT 4 +#define I40E_FCOE_FILTER_CTX_QW0_DMAINDX_MASK (0xFFF << \ + I40E_FCOE_FILTER_CTX_QW0_DMAINDX_SHIFT) + +enum i40e_fcoe_filter_ctx_desc_flags_bits { + I40E_FCOE_FILTER_CTX_DESC_CTYP_DDP = 0x00, + I40E_FCOE_FILTER_CTX_DESC_CTYP_DWO = 0x01, + I40E_FCOE_FILTER_CTX_DESC_ENODE_INIT = 0x00, + I40E_FCOE_FILTER_CTX_DESC_ENODE_RSP = 0x02, + I40E_FCOE_FILTER_CTX_DESC_FC_CLASS2 = 0x00, + I40E_FCOE_FILTER_CTX_DESC_FC_CLASS3 = 0x04 +}; + +#define I40E_FCOE_FILTER_CTX_QW1_FLAGS_SHIFT 0 +#define I40E_FCOE_FILTER_CTX_QW1_FLAGS_MASK (0xFFULL << \ + I40E_FCOE_FILTER_CTX_QW1_FLAGS_SHIFT) + +#define I40E_FCOE_FILTER_CTX_QW1_PCTYPE_SHIFT 8 +#define I40E_FCOE_FILTER_CTX_QW1_PCTYPE_MASK (0x3FULL << \ + I40E_FCOE_FILTER_CTX_QW1_PCTYPE_SHIFT) + +#define I40E_FCOE_FILTER_CTX_QW1_LANQINDX_SHIFT 53 +#define I40E_FCOE_FILTER_CTX_QW1_LANQINDX_MASK (0x7FFULL << \ + I40E_FCOE_FILTER_CTX_QW1_LANQINDX_SHIFT) + enum i40e_switch_element_types { I40E_SWITCH_ELEMENT_TYPE_MAC = 1, I40E_SWITCH_ELEMENT_TYPE_PF = 2, diff --git a/src/dpdk/drivers/net/i40e/base/i40e_virtchnl.h b/src/dpdk/drivers/net/i40e/base/i40e_virtchnl.h index fd51ec32..8fba6081 100644 --- a/src/dpdk/drivers/net/i40e/base/i40e_virtchnl.h +++ b/src/dpdk/drivers/net/i40e/base/i40e_virtchnl.h @@ -170,6 +170,11 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 +#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00100000 + +#define I40E_VF_BASE_MODE_OFFLOADS (I40E_VIRTCHNL_VF_OFFLOAD_L2 | \ + I40E_VIRTCHNL_VF_OFFLOAD_VLAN | \ + I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) struct i40e_virtchnl_vf_resource { u16 num_vsis; diff --git a/src/dpdk/drivers/net/i40e/i40e_ethdev.c b/src/dpdk/drivers/net/i40e/i40e_ethdev.c index ca1a4808..4492bcc1 100644 --- a/src/dpdk/drivers/net/i40e/i40e_ethdev.c +++ b/src/dpdk/drivers/net/i40e/i40e_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -51,6 +51,7 @@ #include <rte_dev.h> #include <rte_eth_ctrl.h> #include <rte_tailq.h> +#include <rte_hash_crc.h> #include "i40e_logs.h" #include "base/i40e_prototype.h" @@ -62,6 +63,7 @@ #include "i40e_rxtx.h" #include "i40e_pf.h" #include "i40e_regs.h" +#include "rte_pmd_i40e.h" #define ETH_I40E_FLOATING_VEB_ARG "enable_floating_veb" #define ETH_I40E_FLOATING_VEB_LIST_ARG "floating_veb_list" @@ -108,7 +110,6 @@ I40E_PFINT_ICR0_ENA_GRST_MASK | \ I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK | \ I40E_PFINT_ICR0_ENA_STORM_DETECT_MASK | \ - I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_MASK | \ I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | \ I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK | \ I40E_PFINT_ICR0_ENA_VFLR_MASK | \ @@ -139,60 +140,6 @@ #define I40E_DEFAULT_DCB_APP_NUM 1 #define I40E_DEFAULT_DCB_APP_PRIO 3 -#define I40E_INSET_NONE 0x00000000000000000ULL - -/* bit0 ~ bit 7 */ -#define I40E_INSET_DMAC 0x0000000000000001ULL -#define I40E_INSET_SMAC 0x0000000000000002ULL -#define I40E_INSET_VLAN_OUTER 0x0000000000000004ULL -#define I40E_INSET_VLAN_INNER 0x0000000000000008ULL -#define I40E_INSET_VLAN_TUNNEL 0x0000000000000010ULL - -/* bit 8 ~ bit 15 */ -#define I40E_INSET_IPV4_SRC 0x0000000000000100ULL -#define I40E_INSET_IPV4_DST 0x0000000000000200ULL -#define I40E_INSET_IPV6_SRC 0x0000000000000400ULL -#define I40E_INSET_IPV6_DST 0x0000000000000800ULL -#define I40E_INSET_SRC_PORT 0x0000000000001000ULL -#define I40E_INSET_DST_PORT 0x0000000000002000ULL -#define I40E_INSET_SCTP_VT 0x0000000000004000ULL - -/* bit 16 ~ bit 31 */ -#define I40E_INSET_IPV4_TOS 0x0000000000010000ULL -#define I40E_INSET_IPV4_PROTO 0x0000000000020000ULL -#define I40E_INSET_IPV4_TTL 0x0000000000040000ULL -#define I40E_INSET_IPV6_TC 0x0000000000080000ULL -#define I40E_INSET_IPV6_FLOW 0x0000000000100000ULL -#define I40E_INSET_IPV6_NEXT_HDR 0x0000000000200000ULL -#define I40E_INSET_IPV6_HOP_LIMIT 0x0000000000400000ULL -#define I40E_INSET_TCP_FLAGS 0x0000000000800000ULL - -/* bit 32 ~ bit 47, tunnel fields */ -#define I40E_INSET_TUNNEL_IPV4_DST 0x0000000100000000ULL -#define I40E_INSET_TUNNEL_IPV6_DST 0x0000000200000000ULL -#define I40E_INSET_TUNNEL_DMAC 0x0000000400000000ULL -#define I40E_INSET_TUNNEL_SRC_PORT 0x0000000800000000ULL -#define I40E_INSET_TUNNEL_DST_PORT 0x0000001000000000ULL -#define I40E_INSET_TUNNEL_ID 0x0000002000000000ULL - -/* bit 48 ~ bit 55 */ -#define I40E_INSET_LAST_ETHER_TYPE 0x0001000000000000ULL - -/* bit 56 ~ bit 63, Flex Payload */ -#define I40E_INSET_FLEX_PAYLOAD_W1 0x0100000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD_W2 0x0200000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD_W3 0x0400000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD_W4 0x0800000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD_W5 0x1000000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD_W6 0x2000000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD_W7 0x4000000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD_W8 0x8000000000000000ULL -#define I40E_INSET_FLEX_PAYLOAD \ - (I40E_INSET_FLEX_PAYLOAD_W1 | I40E_INSET_FLEX_PAYLOAD_W2 | \ - I40E_INSET_FLEX_PAYLOAD_W3 | I40E_INSET_FLEX_PAYLOAD_W4 | \ - I40E_INSET_FLEX_PAYLOAD_W5 | I40E_INSET_FLEX_PAYLOAD_W6 | \ - I40E_INSET_FLEX_PAYLOAD_W7 | I40E_INSET_FLEX_PAYLOAD_W8) - /** * Below are values for writing un-exposed registers suggested * by silicon experts @@ -202,7 +149,7 @@ /* Source MAC address */ #define I40E_REG_INSET_L2_SMAC 0x1C00000000000000ULL /* Outer (S-Tag) VLAN tag in the outer L2 header */ -#define I40E_REG_INSET_L2_OUTER_VLAN 0x0200000000000000ULL +#define I40E_REG_INSET_L2_OUTER_VLAN 0x0000000004000000ULL /* Inner (C-Tag) or single VLAN tag in the outer L2 header */ #define I40E_REG_INSET_L2_INNER_VLAN 0x0080000000000000ULL /* Single VLAN tag in the inner L2 header */ @@ -211,6 +158,14 @@ #define I40E_REG_INSET_L3_SRC_IP4 0x0001800000000000ULL /* Destination IPv4 address */ #define I40E_REG_INSET_L3_DST_IP4 0x0000001800000000ULL +/* Source IPv4 address for X722 */ +#define I40E_X722_REG_INSET_L3_SRC_IP4 0x0006000000000000ULL +/* Destination IPv4 address for X722 */ +#define I40E_X722_REG_INSET_L3_DST_IP4 0x0000060000000000ULL +/* IPv4 Protocol for X722 */ +#define I40E_X722_REG_INSET_L3_IP4_PROTO 0x0010000000000000ULL +/* IPv4 Time to Live for X722 */ +#define I40E_X722_REG_INSET_L3_IP4_TTL 0x0010000000000000ULL /* IPv4 Type of Service (TOS) */ #define I40E_REG_INSET_L3_IP4_TOS 0x0040000000000000ULL /* IPv4 Protocol */ @@ -277,11 +232,6 @@ #define I40E_INSET_IPV6_HOP_LIMIT_MASK 0x000CFF00UL #define I40E_INSET_IPV6_NEXT_HDR_MASK 0x000C00FFUL -#define I40E_GL_SWT_L2TAGCTRL(_i) (0x001C0A70 + ((_i) * 4)) -#define I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT 16 -#define I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_MASK \ - I40E_MASK(0xFFFF, I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT) - /* PCI offset for querying capability */ #define PCI_DEV_CAP_REG 0xA4 /* PCI offset for enabling/disabling Extended Tag */ @@ -317,6 +267,8 @@ static int i40e_dev_queue_stats_mapping_set(struct rte_eth_dev *dev, uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx); +static int i40e_fw_version_get(struct rte_eth_dev *dev, + char *fw_version, size_t fw_size); static void i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info); static int i40e_vlan_filter_set(struct rte_eth_dev *dev, @@ -366,8 +318,8 @@ static void i40e_stat_update_48(struct i40e_hw *hw, uint64_t *offset, uint64_t *stat); static void i40e_pf_config_irq0(struct i40e_hw *hw, bool no_queue); -static void i40e_dev_interrupt_handler( - __rte_unused struct rte_intr_handle *handle, void *param); +static void i40e_dev_interrupt_handler(struct rte_intr_handle *handle, + void *param); static int i40e_res_pool_init(struct i40e_res_pool_info *pool, uint32_t base, uint32_t num); static void i40e_res_pool_destroy(struct i40e_res_pool_info *pool); @@ -399,9 +351,6 @@ static int i40e_dev_udp_tunnel_port_add(struct rte_eth_dev *dev, static int i40e_dev_udp_tunnel_port_del(struct rte_eth_dev *dev, struct rte_eth_udp_tunnel *udp_tunnel); static void i40e_filter_input_set_init(struct i40e_pf *pf); -static int i40e_ethertype_filter_set(struct i40e_pf *pf, - struct rte_eth_ethertype_filter *filter, - bool add); static int i40e_ethertype_filter_handle(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg); @@ -411,6 +360,7 @@ static int i40e_dev_filter_ctrl(struct rte_eth_dev *dev, void *arg); static int i40e_dev_get_dcb_info(struct rte_eth_dev *dev, struct rte_eth_dcb_info *dcb_info); +static int i40e_dev_sync_phy_type(struct i40e_hw *hw); static void i40e_configure_registers(struct i40e_hw *hw); static void i40e_hw_init(struct rte_eth_dev *dev); static int i40e_config_qinq(struct i40e_hw *hw, struct i40e_vsi *vsi); @@ -453,6 +403,22 @@ static void i40e_set_default_mac_addr(struct rte_eth_dev *dev, static int i40e_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); +static int i40e_ethertype_filter_convert( + const struct rte_eth_ethertype_filter *input, + struct i40e_ethertype_filter *filter); +static int i40e_sw_ethertype_filter_insert(struct i40e_pf *pf, + struct i40e_ethertype_filter *filter); + +static int i40e_tunnel_filter_convert( + struct i40e_aqc_add_remove_cloud_filters_element_data *cld_filter, + struct i40e_tunnel_filter *tunnel_filter); +static int i40e_sw_tunnel_filter_insert(struct i40e_pf *pf, + struct i40e_tunnel_filter *tunnel_filter); + +static void i40e_ethertype_filter_restore(struct i40e_pf *pf); +static void i40e_tunnel_filter_restore(struct i40e_pf *pf); +static void i40e_filter_restore(struct i40e_pf *pf); + static const struct rte_pci_id pci_id_i40e_map[] = { { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_XL710) }, { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QEMU) }, @@ -474,7 +440,6 @@ static const struct rte_pci_id pci_id_i40e_map[] = { { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_1G_BASE_T_X722) }, { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_10G_BASE_T_X722) }, { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_SFP_I_X722) }, - { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_QSFP_I_X722) }, { .vendor_id = 0, /* sentinel */ }, }; @@ -496,6 +461,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = { .stats_reset = i40e_dev_stats_reset, .xstats_reset = i40e_dev_stats_reset, .queue_stats_mapping_set = i40e_dev_queue_stats_mapping_set, + .fw_version_get = i40e_fw_version_get, .dev_infos_get = i40e_dev_info_get, .dev_supported_ptypes_get = i40e_dev_supported_ptypes_get, .vlan_filter_set = i40e_vlan_filter_set, @@ -663,10 +629,10 @@ static const struct rte_i40e_xstats_name_off rte_i40e_txq_prio_strings[] = { static struct eth_driver rte_i40e_pmd = { .pci_drv = { - .name = "rte_i40e_pmd", .id_table = pci_id_i40e_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | - RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_i40e_dev_init, .eth_dev_uninit = eth_i40e_dev_uninit, @@ -701,33 +667,10 @@ rte_i40e_dev_atomic_write_link_status(struct rte_eth_dev *dev, return 0; } -/* - * Driver initialization routine. - * Invoked once at EAL init time. - * Register itself as the [Poll Mode] Driver of PCI IXGBE devices. - */ -static int -rte_i40e_pmd_init(const char *name __rte_unused, - const char *params __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - rte_eth_driver_register(&rte_i40e_pmd); - - return 0; -} - -static struct rte_driver rte_i40e_driver = { - .type = PMD_PDEV, - .init = rte_i40e_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_i40e_driver, i40e); -DRIVER_REGISTER_PCI_TABLE(i40e, pci_id_i40e_map); +RTE_PMD_REGISTER_PCI(net_i40e, rte_i40e_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_i40e, pci_id_i40e_map); +RTE_PMD_REGISTER_KMOD_DEP(net_i40e, "* igb_uio | uio_pci_generic | vfio"); -/* - * Initialize registers for flexible payload, which should be set by NVM. - * This should be removed from code once it is fixed in NVM. - */ #ifndef I40E_GLQF_ORT #define I40E_GLQF_ORT(_i) (0x00268900 + ((_i) * 4)) #endif @@ -735,8 +678,12 @@ DRIVER_REGISTER_PCI_TABLE(i40e, pci_id_i40e_map); #define I40E_GLQF_PIT(_i) (0x00268C80 + ((_i) * 4)) #endif -static inline void i40e_flex_payload_reg_init(struct i40e_hw *hw) +static inline void i40e_GLQF_reg_init(struct i40e_hw *hw) { + /* + * Initialize registers for flexible payload, which should be set by NVM. + * This should be removed from code once it is fixed in NVM. + */ I40E_WRITE_REG(hw, I40E_GLQF_ORT(18), 0x00000030); I40E_WRITE_REG(hw, I40E_GLQF_ORT(19), 0x00000030); I40E_WRITE_REG(hw, I40E_GLQF_ORT(26), 0x0000002B); @@ -747,17 +694,16 @@ static inline void i40e_flex_payload_reg_init(struct i40e_hw *hw) I40E_WRITE_REG(hw, I40E_GLQF_ORT(20), 0x00000031); I40E_WRITE_REG(hw, I40E_GLQF_ORT(23), 0x00000031); I40E_WRITE_REG(hw, I40E_GLQF_ORT(63), 0x0000002D); - - /* GLQF_PIT Registers */ I40E_WRITE_REG(hw, I40E_GLQF_PIT(16), 0x00007480); I40E_WRITE_REG(hw, I40E_GLQF_PIT(17), 0x00007440); + + /* Initialize registers for parsing packet type of QinQ */ + I40E_WRITE_REG(hw, I40E_GLQF_ORT(40), 0x00000029); + I40E_WRITE_REG(hw, I40E_GLQF_PIT(9), 0x00009420); } #define I40E_FLOW_CONTROL_ETHERTYPE 0x8808 -#define TREX_PATCH -#define TREX_PATCH_LOW_LATENCY - /* * Add a ethertype filter to drop all flow control frames transmitted * from VSIs. @@ -776,8 +722,8 @@ i40e_add_tx_flow_control_drop_filter(struct i40e_pf *pf) pf->main_vsi_seid, 0, TRUE, NULL, NULL); if (ret) - PMD_INIT_LOG(ERR, "Failed to add filter to drop flow control " - " frames from VSIs."); + PMD_INIT_LOG(ERR, + "Failed to add filter to drop flow control frames from VSIs."); } static int @@ -920,25 +866,159 @@ is_floating_veb_supported(struct rte_devargs *devargs) static void config_floating_veb(struct rte_eth_dev *dev) { - struct rte_pci_device *pci_dev = dev->pci_dev; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); memset(pf->floating_veb_list, 0, sizeof(pf->floating_veb_list)); if (hw->aq.fw_maj_ver >= FLOATING_VEB_SUPPORTED_FW_MAJ) { - pf->floating_veb = is_floating_veb_supported(pci_dev->devargs); - config_vf_floating_veb(pci_dev->devargs, pf->floating_veb, + pf->floating_veb = + is_floating_veb_supported(pci_dev->device.devargs); + config_vf_floating_veb(pci_dev->device.devargs, + pf->floating_veb, pf->floating_veb_list); } else { pf->floating_veb = false; } } +#define I40E_L2_TAGS_S_TAG_SHIFT 1 +#define I40E_L2_TAGS_S_TAG_MASK I40E_MASK(0x1, I40E_L2_TAGS_S_TAG_SHIFT) + +static int +i40e_init_ethtype_filter_list(struct rte_eth_dev *dev) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + struct i40e_ethertype_rule *ethertype_rule = &pf->ethertype; + char ethertype_hash_name[RTE_HASH_NAMESIZE]; + int ret; + + struct rte_hash_parameters ethertype_hash_params = { + .name = ethertype_hash_name, + .entries = I40E_MAX_ETHERTYPE_FILTER_NUM, + .key_len = sizeof(struct i40e_ethertype_filter_input), + .hash_func = rte_hash_crc, + }; + + /* Initialize ethertype filter rule list and hash */ + TAILQ_INIT(ðertype_rule->ethertype_list); + snprintf(ethertype_hash_name, RTE_HASH_NAMESIZE, + "ethertype_%s", dev->data->name); + ethertype_rule->hash_table = rte_hash_create(ðertype_hash_params); + if (!ethertype_rule->hash_table) { + PMD_INIT_LOG(ERR, "Failed to create ethertype hash table!"); + return -EINVAL; + } + ethertype_rule->hash_map = rte_zmalloc("i40e_ethertype_hash_map", + sizeof(struct i40e_ethertype_filter *) * + I40E_MAX_ETHERTYPE_FILTER_NUM, + 0); + if (!ethertype_rule->hash_map) { + PMD_INIT_LOG(ERR, + "Failed to allocate memory for ethertype hash map!"); + ret = -ENOMEM; + goto err_ethertype_hash_map_alloc; + } + + return 0; + +err_ethertype_hash_map_alloc: + rte_hash_free(ethertype_rule->hash_table); + + return ret; +} + +static int +i40e_init_tunnel_filter_list(struct rte_eth_dev *dev) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + struct i40e_tunnel_rule *tunnel_rule = &pf->tunnel; + char tunnel_hash_name[RTE_HASH_NAMESIZE]; + int ret; + + struct rte_hash_parameters tunnel_hash_params = { + .name = tunnel_hash_name, + .entries = I40E_MAX_TUNNEL_FILTER_NUM, + .key_len = sizeof(struct i40e_tunnel_filter_input), + .hash_func = rte_hash_crc, + }; + + /* Initialize tunnel filter rule list and hash */ + TAILQ_INIT(&tunnel_rule->tunnel_list); + snprintf(tunnel_hash_name, RTE_HASH_NAMESIZE, + "tunnel_%s", dev->data->name); + tunnel_rule->hash_table = rte_hash_create(&tunnel_hash_params); + if (!tunnel_rule->hash_table) { + PMD_INIT_LOG(ERR, "Failed to create tunnel hash table!"); + return -EINVAL; + } + tunnel_rule->hash_map = rte_zmalloc("i40e_tunnel_hash_map", + sizeof(struct i40e_tunnel_filter *) * + I40E_MAX_TUNNEL_FILTER_NUM, + 0); + if (!tunnel_rule->hash_map) { + PMD_INIT_LOG(ERR, + "Failed to allocate memory for tunnel hash map!"); + ret = -ENOMEM; + goto err_tunnel_hash_map_alloc; + } + + return 0; + +err_tunnel_hash_map_alloc: + rte_hash_free(tunnel_rule->hash_table); + + return ret; +} + +static int +i40e_init_fdir_filter_list(struct rte_eth_dev *dev) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + struct i40e_fdir_info *fdir_info = &pf->fdir; + char fdir_hash_name[RTE_HASH_NAMESIZE]; + int ret; + + struct rte_hash_parameters fdir_hash_params = { + .name = fdir_hash_name, + .entries = I40E_MAX_FDIR_FILTER_NUM, + .key_len = sizeof(struct rte_eth_fdir_input), + .hash_func = rte_hash_crc, + }; + + /* Initialize flow director filter rule list and hash */ + TAILQ_INIT(&fdir_info->fdir_list); + snprintf(fdir_hash_name, RTE_HASH_NAMESIZE, + "fdir_%s", dev->data->name); + fdir_info->hash_table = rte_hash_create(&fdir_hash_params); + if (!fdir_info->hash_table) { + PMD_INIT_LOG(ERR, "Failed to create fdir hash table!"); + return -EINVAL; + } + fdir_info->hash_map = rte_zmalloc("i40e_fdir_hash_map", + sizeof(struct i40e_fdir_filter *) * + I40E_MAX_FDIR_FILTER_NUM, + 0); + if (!fdir_info->hash_map) { + PMD_INIT_LOG(ERR, + "Failed to allocate memory for fdir hash map!"); + ret = -ENOMEM; + goto err_fdir_hash_map_alloc; + } + return 0; + +err_fdir_hash_map_alloc: + rte_hash_free(fdir_info->hash_table); + + return ret; +} + static int eth_i40e_dev_init(struct rte_eth_dev *dev) { struct rte_pci_device *pci_dev; + struct rte_intr_handle *intr_handle; struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_vsi *vsi; @@ -951,6 +1031,7 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &i40e_eth_dev_ops; dev->rx_pkt_burst = i40e_recv_pkts; dev->tx_pkt_burst = i40e_xmit_pkts; + dev->tx_pkt_prepare = i40e_prep_pkts; /* for secondary processes, we don't initialise any further as primary * has already done this work. Only check we don't need a different @@ -960,9 +1041,11 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) i40e_set_tx_function(dev); return 0; } - pci_dev = dev->pci_dev; + pci_dev = I40E_DEV_TO_PCI(dev); + intr_handle = &pci_dev->intr_handle; rte_eth_copy_pci_info(dev, pci_dev); + dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; pf->adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); pf->adapter->eth_dev = dev; @@ -971,8 +1054,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) hw->back = I40E_PF_TO_ADAPTER(pf); hw->hw_addr = (uint8_t *)(pci_dev->mem_resource[0].addr); if (!hw->hw_addr) { - PMD_INIT_LOG(ERR, "Hardware is not available, " - "as address is NULL"); + PMD_INIT_LOG(ERR, + "Hardware is not available, as address is NULL"); return -ENODEV; } @@ -1005,11 +1088,12 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) } /* - * To work around the NVM issue,initialize registers - * for flexible payload by software. - * It should be removed once issues are fixed in NVM. + * To work around the NVM issue, initialize registers + * for flexible payload and packet type of QinQ by + * software. It should be removed once issues are fixed + * in NVM. */ - i40e_flex_payload_reg_init(hw); + i40e_GLQF_reg_init(hw); /* Initialize the input set for filters (hash and fd) to default value */ i40e_filter_input_set_init(pf); @@ -1032,7 +1116,11 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) config_floating_veb(dev); /* Clear PXE mode */ i40e_clear_pxe_mode(hw); - + ret = i40e_dev_sync_phy_type(hw); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to sync phy type: %d", ret); + goto err_sync_phy_type; + } /* * On X710, performance number is far from the expectation on recent * firmware versions. The fix for this issue may not be integrated in @@ -1103,8 +1191,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) /* Set the global registers with default ether type value */ ret = i40e_vlan_tpid_set(dev, ETH_VLAN_TYPE_OUTER, ETHER_TYPE_VLAN); if (ret != I40E_SUCCESS) { - PMD_INIT_LOG(ERR, "Failed to set the default outer " - "VLAN ether type"); + PMD_INIT_LOG(ERR, + "Failed to set the default outer VLAN ether type"); goto err_setup_pf_switch; } @@ -1123,6 +1211,15 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) /* Disable double vlan by default */ i40e_vsi_config_double_vlan(vsi, FALSE); + /* Disable S-TAG identification when floating_veb is disabled */ + if (!pf->floating_veb) { + ret = I40E_READ_REG(hw, I40E_PRT_L2TAGSEN); + if (ret & I40E_L2_TAGS_S_TAG_MASK) { + ret &= ~I40E_L2_TAGS_S_TAG_MASK; + I40E_WRITE_REG(hw, I40E_PRT_L2TAGSEN, ret); + } + } + if (!vsi->max_macaddrs) len = ETHER_ADDR_LEN; else @@ -1131,8 +1228,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) /* Should be after VSI initialized */ dev->data->mac_addrs = rte_zmalloc("i40e", len, 0); if (!dev->data->mac_addrs) { - PMD_INIT_LOG(ERR, "Failed to allocated memory " - "for storing mac address"); + PMD_INIT_LOG(ERR, + "Failed to allocated memory for storing mac address"); goto err_mac_alloc; } ether_addr_copy((struct ether_addr *)hw->mac.perm_addr, @@ -1142,15 +1239,15 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) i40e_pf_host_init(dev); /* register callback func to eal lib */ - rte_intr_callback_register(&(pci_dev->intr_handle), - i40e_dev_interrupt_handler, (void *)dev); + rte_intr_callback_register(intr_handle, + i40e_dev_interrupt_handler, dev); /* configure and enable device interrupt */ i40e_pf_config_irq0(hw, TRUE); i40e_pf_enable_irq0(hw); /* enable uio intr after callback register */ - rte_intr_enable(&(pci_dev->intr_handle)); + rte_intr_enable(intr_handle); /* * Add an ethertype filter to drop all flow control frames transmitted * from VSIs. By doing so, we stop VF from sending out PAUSE or PFC @@ -1173,8 +1270,26 @@ eth_i40e_dev_init(struct rte_eth_dev *dev) pf->flags &= ~I40E_FLAG_DCB; } + ret = i40e_init_ethtype_filter_list(dev); + if (ret < 0) + goto err_init_ethtype_filter_list; + ret = i40e_init_tunnel_filter_list(dev); + if (ret < 0) + goto err_init_tunnel_filter_list; + ret = i40e_init_fdir_filter_list(dev); + if (ret < 0) + goto err_init_fdir_filter_list; + return 0; +err_init_fdir_filter_list: + rte_free(pf->tunnel.hash_table); + rte_free(pf->tunnel.hash_map); +err_init_tunnel_filter_list: + rte_free(pf->ethertype.hash_table); + rte_free(pf->ethertype.hash_map); +err_init_ethtype_filter_list: + rte_free(dev->data->mac_addrs); err_mac_alloc: i40e_vsi_release(pf->main_vsi); err_setup_pf_switch: @@ -1188,17 +1303,79 @@ err_msix_pool_init: err_qp_pool_init: err_parameter_init: err_get_capabilities: +err_sync_phy_type: (void)i40e_shutdown_adminq(hw); return ret; } +static void +i40e_rm_ethtype_filter_list(struct i40e_pf *pf) +{ + struct i40e_ethertype_filter *p_ethertype; + struct i40e_ethertype_rule *ethertype_rule; + + ethertype_rule = &pf->ethertype; + /* Remove all ethertype filter rules and hash */ + if (ethertype_rule->hash_map) + rte_free(ethertype_rule->hash_map); + if (ethertype_rule->hash_table) + rte_hash_free(ethertype_rule->hash_table); + + while ((p_ethertype = TAILQ_FIRST(ðertype_rule->ethertype_list))) { + TAILQ_REMOVE(ðertype_rule->ethertype_list, + p_ethertype, rules); + rte_free(p_ethertype); + } +} + +static void +i40e_rm_tunnel_filter_list(struct i40e_pf *pf) +{ + struct i40e_tunnel_filter *p_tunnel; + struct i40e_tunnel_rule *tunnel_rule; + + tunnel_rule = &pf->tunnel; + /* Remove all tunnel director rules and hash */ + if (tunnel_rule->hash_map) + rte_free(tunnel_rule->hash_map); + if (tunnel_rule->hash_table) + rte_hash_free(tunnel_rule->hash_table); + + while ((p_tunnel = TAILQ_FIRST(&tunnel_rule->tunnel_list))) { + TAILQ_REMOVE(&tunnel_rule->tunnel_list, p_tunnel, rules); + rte_free(p_tunnel); + } +} + +static void +i40e_rm_fdir_filter_list(struct i40e_pf *pf) +{ + struct i40e_fdir_filter *p_fdir; + struct i40e_fdir_info *fdir_info; + + fdir_info = &pf->fdir; + /* Remove all flow director rules and hash */ + if (fdir_info->hash_map) + rte_free(fdir_info->hash_map); + if (fdir_info->hash_table) + rte_hash_free(fdir_info->hash_table); + + while ((p_fdir = TAILQ_FIRST(&fdir_info->fdir_list))) { + TAILQ_REMOVE(&fdir_info->fdir_list, p_fdir, rules); + rte_free(p_fdir); + } +} + static int eth_i40e_dev_uninit(struct rte_eth_dev *dev) { + struct i40e_pf *pf; struct rte_pci_device *pci_dev; + struct rte_intr_handle *intr_handle; struct i40e_hw *hw; struct i40e_filter_control_settings settings; + struct rte_flow *p_flow; int ret; uint8_t aq_fail = 0; @@ -1207,8 +1384,10 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev) if (rte_eal_process_type() != RTE_PROC_PRIMARY) return 0; + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - pci_dev = dev->pci_dev; + pci_dev = I40E_DEV_TO_PCI(dev); + intr_handle = &pci_dev->intr_handle; if (hw->adapter_stopped == 0) i40e_dev_close(dev); @@ -1217,11 +1396,6 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev) dev->rx_pkt_burst = NULL; dev->tx_pkt_burst = NULL; - /* Disable LLDP */ - ret = i40e_aq_stop_lldp(hw, true, NULL); - if (ret != I40E_SUCCESS) /* Its failure can be ignored */ - PMD_INIT_LOG(INFO, "Failed to stop lldp"); - /* Clear PXE mode */ i40e_clear_pxe_mode(hw); @@ -1243,11 +1417,21 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev) dev->data->mac_addrs = NULL; /* disable uio intr before callback unregister */ - rte_intr_disable(&(pci_dev->intr_handle)); + rte_intr_disable(intr_handle); /* register callback func to eal lib */ - rte_intr_callback_unregister(&(pci_dev->intr_handle), - i40e_dev_interrupt_handler, (void *)dev); + rte_intr_callback_unregister(intr_handle, + i40e_dev_interrupt_handler, dev); + + i40e_rm_ethtype_filter_list(pf); + i40e_rm_tunnel_filter_list(pf); + i40e_rm_fdir_filter_list(pf); + + /* Remove all flows */ + while ((p_flow = TAILQ_FIRST(&pf->flow_list))) { + TAILQ_REMOVE(&pf->flow_list, p_flow, node); + rte_free(p_flow); + } return 0; } @@ -1313,6 +1497,8 @@ i40e_dev_configure(struct rte_eth_dev *dev) } } + TAILQ_INIT(&pf->flow_list); + return 0; err_dcb: @@ -1333,7 +1519,8 @@ void i40e_vsi_queues_unbind_intr(struct i40e_vsi *vsi) { struct rte_eth_dev *dev = vsi->adapter->eth_dev; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); uint16_t msix_vect = vsi->msix_intr; uint16_t i; @@ -1446,7 +1633,8 @@ void i40e_vsi_queues_bind_intr(struct i40e_vsi *vsi) { struct rte_eth_dev *dev = vsi->adapter->eth_dev; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); uint16_t msix_vect = vsi->msix_intr; uint16_t nb_msix = RTE_MIN(vsi->nb_msix, intr_handle->nb_efd); @@ -1517,7 +1705,8 @@ static void i40e_vsi_enable_queues_intr(struct i40e_vsi *vsi) { struct rte_eth_dev *dev = vsi->adapter->eth_dev; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); uint16_t interval = i40e_calc_itr_interval(\ RTE_LIBRTE_I40E_ITR_INTERVAL); @@ -1548,7 +1737,8 @@ static void i40e_vsi_disable_queues_intr(struct i40e_vsi *vsi) { struct rte_eth_dev *dev = vsi->adapter->eth_dev; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); uint16_t msix_intr, i; @@ -1571,6 +1761,8 @@ i40e_parse_link_speeds(uint16_t link_speeds) if (link_speeds & ETH_LINK_SPEED_40G) link_speed |= I40E_LINK_SPEED_40GB; + if (link_speeds & ETH_LINK_SPEED_25G) + link_speed |= I40E_LINK_SPEED_25GB; if (link_speeds & ETH_LINK_SPEED_20G) link_speed |= I40E_LINK_SPEED_20GB; if (link_speeds & ETH_LINK_SPEED_10G) @@ -1596,6 +1788,7 @@ i40e_phy_conf_link(struct i40e_hw *hw, I40E_AQ_PHY_FLAG_PAUSE_RX | I40E_AQ_PHY_FLAG_LOW_POWER; const uint8_t advt = I40E_LINK_SPEED_40GB | + I40E_LINK_SPEED_25GB | I40E_LINK_SPEED_10GB | I40E_LINK_SPEED_1GB | I40E_LINK_SPEED_100MB; @@ -1623,6 +1816,8 @@ i40e_phy_conf_link(struct i40e_hw *hw, /* use get_phy_abilities_resp value for the rest */ phy_conf.phy_type = phy_ab.phy_type; + phy_conf.phy_type_ext = phy_ab.phy_type_ext; + phy_conf.fec_config = phy_ab.fec_cfg_curr_mod_ext_info; phy_conf.eee_capability = phy_ab.eee_capability; phy_conf.eeer = phy_ab.eeer_val; phy_conf.low_power_ctrl = phy_ab.d3_lpan; @@ -1654,7 +1849,7 @@ i40e_apply_link_speed(struct rte_eth_dev *dev) abilities |= I40E_AQ_PHY_LINK_ENABLED; /* Skip changing speed on 40G interfaces, FW does not support */ - if (i40e_is_40G_device(hw->device_id)) { + if (I40E_PHY_TYPE_SUPPORT_40G(hw->phy.phy_types)) { speed = I40E_LINK_SPEED_UNKNOWN; abilities |= I40E_AQ_PHY_AN_ENABLED; } @@ -1669,7 +1864,8 @@ i40e_dev_start(struct rte_eth_dev *dev) struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_vsi *main_vsi = pf->main_vsi; int ret, i; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t intr_vector = 0; hw->adapter_stopped = 0; @@ -1686,8 +1882,9 @@ i40e_dev_start(struct rte_eth_dev *dev) !RTE_ETH_DEV_SRIOV(dev).active) && dev->data->dev_conf.intr_conf.rxq != 0) { intr_vector = dev->data->nb_rx_queues; - if (rte_intr_efd_enable(intr_handle, intr_vector)) - return -1; + ret = rte_intr_efd_enable(intr_handle, intr_vector); + if (ret) + return ret; } if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) { @@ -1696,8 +1893,9 @@ i40e_dev_start(struct rte_eth_dev *dev) dev->data->nb_rx_queues * sizeof(int), 0); if (!intr_handle->intr_vec) { - PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" - " intr_vec\n", dev->data->nb_rx_queues); + PMD_INIT_LOG(ERR, + "Failed to allocate %d rx_queues intr_vec", + dev->data->nb_rx_queues); return -ENOMEM; } } @@ -1750,7 +1948,8 @@ i40e_dev_start(struct rte_eth_dev *dev) /* Apply link configure */ if (dev->data->dev_conf.link_speeds & ~(ETH_LINK_SPEED_100M | ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G | - ETH_LINK_SPEED_20G | ETH_LINK_SPEED_40G)) { + ETH_LINK_SPEED_20G | ETH_LINK_SPEED_25G | + ETH_LINK_SPEED_40G)) { PMD_DRV_LOG(ERR, "Invalid link setting"); goto err_up; } @@ -1769,13 +1968,25 @@ i40e_dev_start(struct rte_eth_dev *dev) i40e_pf_enable_irq0(hw); if (dev->data->dev_conf.intr_conf.lsc != 0) - PMD_INIT_LOG(INFO, "lsc won't enable because of" - " no intr multiplex\n"); + PMD_INIT_LOG(INFO, + "lsc won't enable because of no intr multiplex"); + } else if (dev->data->dev_conf.intr_conf.lsc != 0) { + ret = i40e_aq_set_phy_int_mask(hw, + ~(I40E_AQ_EVENT_LINK_UPDOWN | + I40E_AQ_EVENT_MODULE_QUAL_FAIL | + I40E_AQ_EVENT_MEDIA_NA), NULL); + if (ret != I40E_SUCCESS) + PMD_DRV_LOG(WARNING, "Fail to set phy mask"); + + /* Call get_link_info aq commond to enable LSE */ + i40e_dev_link_update(dev, 0); } /* enable uio intr after callback register */ rte_intr_enable(intr_handle); + i40e_filter_restore(pf); + return I40E_SUCCESS; err_up: @@ -1791,7 +2002,8 @@ i40e_dev_stop(struct rte_eth_dev *dev) struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); struct i40e_vsi *main_vsi = pf->main_vsi; struct i40e_mirror_rule *p_mirror; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; int i; /* Disable all queues */ @@ -1842,6 +2054,8 @@ i40e_dev_close(struct rte_eth_dev *dev) { struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t reg; int i; @@ -1853,23 +2067,22 @@ i40e_dev_close(struct rte_eth_dev *dev) /* Disable interrupt */ i40e_pf_disable_irq0(hw); - rte_intr_disable(&(dev->pci_dev->intr_handle)); + rte_intr_disable(intr_handle); /* shutdown and destroy the HMC */ i40e_shutdown_lan_hmc(hw); - /* release all the existing VSIs and VEBs */ - i40e_fdir_teardown(pf); - i40e_vsi_release(pf->main_vsi); - for (i = 0; i < pf->nb_cfg_vmdq_vsi; i++) { i40e_vsi_release(pf->vmdq[i].vsi); pf->vmdq[i].vsi = NULL; } - rte_free(pf->vmdq); pf->vmdq = NULL; + /* release all the existing VSIs and VEBs */ + i40e_fdir_teardown(pf); + i40e_vsi_release(pf->main_vsi); + /* shutdown the adminq */ i40e_aq_queue_shutdown(hw, true); i40e_shutdown_adminq(hw); @@ -1970,9 +2183,10 @@ static int i40e_dev_set_link_down(struct rte_eth_dev *dev) { uint8_t speed = I40E_LINK_SPEED_UNKNOWN; - uint8_t abilities = I40E_AQ_PHY_ENABLE_ATOMIC_LINK; + uint8_t abilities = 0; struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + abilities = I40E_AQ_PHY_ENABLE_ATOMIC_LINK; return i40e_phy_conf_link(hw, abilities, speed); } @@ -1987,6 +2201,7 @@ i40e_dev_link_update(struct rte_eth_dev *dev, struct rte_eth_link link, old; int status; unsigned rep_cnt = MAX_REPEAT_TIME; + bool enable_lse = dev->data->dev_conf.intr_conf.lsc ? true : false; memset(&link, 0, sizeof(link)); memset(&old, 0, sizeof(old)); @@ -1995,7 +2210,8 @@ i40e_dev_link_update(struct rte_eth_dev *dev, do { /* Get link status information from hardware */ - status = i40e_aq_get_link_info(hw, false, &link_status, NULL); + status = i40e_aq_get_link_info(hw, enable_lse, + &link_status, NULL); if (status != I40E_SUCCESS) { link.link_speed = ETH_SPEED_NUM_100M; link.link_duplex = ETH_LINK_FULL_DUPLEX; @@ -2030,6 +2246,9 @@ i40e_dev_link_update(struct rte_eth_dev *dev, case I40E_LINK_SPEED_20GB: link.link_speed = ETH_SPEED_NUM_20G; break; + case I40E_LINK_SPEED_25GB: + link.link_speed = ETH_SPEED_NUM_25G; + break; case I40E_LINK_SPEED_40GB: link.link_speed = ETH_SPEED_NUM_40G; break; @@ -2296,11 +2515,9 @@ i40e_read_stats_registers(struct i40e_pf *pf, struct i40e_hw *hw) I40E_GLPRT_PTC9522L(hw->port), pf->offset_loaded, &os->tx_size_big, &ns->tx_size_big); -#ifndef TREX_PATCH i40e_stat_update_32(hw, I40E_GLQF_PCNT(pf->fdir.match_counter_index), pf->offset_loaded, &os->fd_sb_match, &ns->fd_sb_match); -#endif /* GLPRT_MSPDC not supported */ /* GLPRT_XEC not supported */ @@ -2310,46 +2527,6 @@ i40e_read_stats_registers(struct i40e_pf *pf, struct i40e_hw *hw) i40e_update_vsi_stats(pf->main_vsi); } -//TREX_PATCH -// fill stats array with fdir rules match count statistics -// Notice that we read statistics from start to start + len, but we fill the stats are -// starting from 0 with len values -void -i40e_trex_fdir_stats_get(struct rte_eth_dev *dev, uint32_t *stats, uint32_t start, uint32_t len) -{ - int i; - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - - for (i = 0; i < len; i++) { - stats[i] = I40E_READ_REG(hw, I40E_GLQF_PCNT(i + start)); - } -} - -// TREX_PATCH -void -i40e_trex_fdir_stats_reset(struct rte_eth_dev *dev, uint32_t *stats, uint32_t start, uint32_t len) -{ - int i; - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - - for (i = 0; i < len; i++) { - if (stats) { - stats[i] = I40E_READ_REG(hw, I40E_GLQF_PCNT(i + start)); - } - I40E_WRITE_REG(hw, I40E_GLQF_PCNT(i + start), 0xffffffff); - } -} - -// TREX_PATCH -int -i40e_trex_get_fw_ver(struct rte_eth_dev *dev, uint32_t *nvm_ver) -{ - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - - *nvm_ver = hw->nvm.version; - return 0; -} - /* Get all statistics of a port */ static void i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) @@ -2366,17 +2543,10 @@ i40e_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) pf->main_vsi->eth_stats.rx_multicast + pf->main_vsi->eth_stats.rx_broadcast - pf->main_vsi->eth_stats.rx_discards; -#ifndef TREX_PATCH stats->opackets = pf->main_vsi->eth_stats.tx_unicast + pf->main_vsi->eth_stats.tx_multicast + pf->main_vsi->eth_stats.tx_broadcast; stats->ibytes = ns->eth.rx_bytes; -#else - /* Hanoch: move to global transmit and not pf->vsi and we have two high and low priorty */ - stats->opackets = ns->eth.tx_unicast +ns->eth.tx_multicast +ns->eth.tx_broadcast; - stats->ibytes = pf->main_vsi->eth_stats.rx_bytes; -#endif - stats->obytes = ns->eth.tx_bytes; stats->oerrors = ns->eth.tx_errors + pf->main_vsi->eth_stats.tx_errors; @@ -2557,6 +2727,7 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, for (i = 0; i < I40E_NB_ETH_XSTATS; i++) { xstats[count].value = *(uint64_t *)(((char *)&hw_stats->eth) + rte_i40e_stats_strings[i].offset); + xstats[count].id = count; count++; } @@ -2564,6 +2735,7 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, for (i = 0; i < I40E_NB_HW_PORT_XSTATS; i++) { xstats[count].value = *(uint64_t *)(((char *)hw_stats) + rte_i40e_hw_port_strings[i].offset); + xstats[count].id = count; count++; } @@ -2573,6 +2745,7 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, *(uint64_t *)(((char *)hw_stats) + rte_i40e_rxq_prio_strings[i].offset + (sizeof(uint64_t) * prio)); + xstats[count].id = count; count++; } } @@ -2583,6 +2756,7 @@ i40e_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, *(uint64_t *)(((char *)hw_stats) + rte_i40e_txq_prio_strings[i].offset + (sizeof(uint64_t) * prio)); + xstats[count].id = count; count++; } } @@ -2601,19 +2775,49 @@ i40e_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *dev, return -ENOSYS; } +static int +i40e_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) +{ + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + u32 full_ver; + u8 ver, patch; + u16 build; + int ret; + + full_ver = hw->nvm.oem_ver; + ver = (u8)(full_ver >> 24); + build = (u16)((full_ver >> 8) & 0xffff); + patch = (u8)(full_ver & 0xff); + + ret = snprintf(fw_version, fw_size, + "%d.%d%d 0x%08x %d.%d.%d", + ((hw->nvm.version >> 12) & 0xf), + ((hw->nvm.version >> 4) & 0xff), + (hw->nvm.version & 0xf), hw->nvm.eetrack, + ver, build, patch); + + ret += 1; /* add the size of '\0' */ + if (fw_size < (u32)ret) + return ret; + else + return 0; +} + static void i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_vsi *vsi = pf->main_vsi; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + dev_info->pci_dev = pci_dev; dev_info->max_rx_queues = vsi->nb_qps; dev_info->max_tx_queues = vsi->nb_qps; dev_info->min_rx_bufsize = I40E_BUF_SIZE_MIN; dev_info->max_rx_pktlen = I40E_FRAME_SIZE_MAX; dev_info->max_mac_addrs = vsi->max_macaddrs; - dev_info->max_vfs = dev->pci_dev->max_vfs; + dev_info->max_vfs = pci_dev->max_vfs; dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP | DEV_RX_OFFLOAD_QINQ_STRIP | @@ -2628,7 +2832,11 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) DEV_TX_OFFLOAD_TCP_CKSUM | DEV_TX_OFFLOAD_SCTP_CKSUM | DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | - DEV_TX_OFFLOAD_TCP_TSO; + DEV_TX_OFFLOAD_TCP_TSO | + DEV_TX_OFFLOAD_VXLAN_TNL_TSO | + DEV_TX_OFFLOAD_GRE_TNL_TSO | + DEV_TX_OFFLOAD_IPIP_TNL_TSO | + DEV_TX_OFFLOAD_GENEVE_TNL_TSO; dev_info->hash_key_size = (I40E_PFQF_HKEY_MAX_INDEX + 1) * sizeof(uint32_t); dev_info->reta_size = pf->hash_lut_size; @@ -2666,6 +2874,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) .nb_max = I40E_MAX_RING_DESC, .nb_min = I40E_MIN_RING_DESC, .nb_align = I40E_ALIGN_RING_DESC, + .nb_seg_max = I40E_TX_MAX_SEG, + .nb_mtu_seg_max = I40E_TX_MAX_MTU_SEG, }; if (pf->flags & I40E_FLAG_VMDQ) { @@ -2678,9 +2888,12 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->max_tx_queues += dev_info->vmdq_queue_num; } - if (i40e_is_40G_device(hw->device_id)) + if (I40E_PHY_TYPE_SUPPORT_40G(hw->phy.phy_types)) /* For XL710 */ dev_info->speed_capa = ETH_LINK_SPEED_40G; + else if (I40E_PHY_TYPE_SUPPORT_25G(hw->phy.phy_types)) + /* For XXV710 */ + dev_info->speed_capa = ETH_LINK_SPEED_25G; else /* For X710 */ dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G; @@ -2723,7 +2936,7 @@ i40e_vlan_tpid_set(struct rte_eth_dev *dev, else { ret = -EINVAL; PMD_DRV_LOG(ERR, - "Unsupported vlan type in single vlan.\n"); + "Unsupported vlan type in single vlan."); return ret; } break; @@ -2735,13 +2948,15 @@ i40e_vlan_tpid_set(struct rte_eth_dev *dev, ret = i40e_aq_debug_read_register(hw, I40E_GL_SWT_L2TAGCTRL(reg_id), ®_r, NULL); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Fail to debug read from " - "I40E_GL_SWT_L2TAGCTRL[%d]", reg_id); + PMD_DRV_LOG(ERR, + "Fail to debug read from I40E_GL_SWT_L2TAGCTRL[%d]", + reg_id); ret = -EIO; return ret; } - PMD_DRV_LOG(DEBUG, "Debug read from I40E_GL_SWT_L2TAGCTRL[%d]: " - "0x%08"PRIx64"", reg_id, reg_r); + PMD_DRV_LOG(DEBUG, + "Debug read from I40E_GL_SWT_L2TAGCTRL[%d]: 0x%08"PRIx64, + reg_id, reg_r); reg_w = reg_r & (~(I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_MASK)); reg_w |= ((uint64_t)tpid << I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT); @@ -2755,12 +2970,14 @@ i40e_vlan_tpid_set(struct rte_eth_dev *dev, reg_w, NULL); if (ret != I40E_SUCCESS) { ret = -EIO; - PMD_DRV_LOG(ERR, "Fail to debug write to " - "I40E_GL_SWT_L2TAGCTRL[%d]", reg_id); + PMD_DRV_LOG(ERR, + "Fail to debug write to I40E_GL_SWT_L2TAGCTRL[%d]", + reg_id); return ret; } - PMD_DRV_LOG(DEBUG, "Debug write 0x%08"PRIx64" to " - "I40E_GL_SWT_L2TAGCTRL[%d]", reg_w, reg_id); + PMD_DRV_LOG(DEBUG, + "Debug write 0x%08"PRIx64" to I40E_GL_SWT_L2TAGCTRL[%d]", + reg_w, reg_id); return ret; } @@ -2904,8 +3121,9 @@ i40e_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) max_high_water = I40E_RXPBSIZE >> I40E_KILOSHIFT; if ((fc_conf->high_water > max_high_water) || (fc_conf->high_water < fc_conf->low_water)) { - PMD_INIT_LOG(ERR, "Invalid high/low water setup value in KB, " - "High_water must <= %d.", max_high_water); + PMD_INIT_LOG(ERR, + "Invalid high/low water setup value in KB, High_water must be <= %d.", + max_high_water); return -EINVAL; } @@ -2926,7 +3144,7 @@ i40e_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf) if (err < 0) return -ENOSYS; - if (i40e_is_40G_device(hw->device_id)) { + if (I40E_PHY_TYPE_SUPPORT_40G(hw->phy.phy_types)) { /* Configure flow control refresh threshold, * the value for stat_tx_pause_refresh_timer[8] * is used for global pause operation. @@ -3077,8 +3295,8 @@ i40e_macaddr_remove(struct rte_eth_dev *dev, uint32_t index) /* No VMDQ pool enabled or configured */ if (!(pf->flags & I40E_FLAG_VMDQ) || (i > pf->nb_cfg_vmdq_vsi)) { - PMD_DRV_LOG(ERR, "No VMDQ pool enabled" - "/configured"); + PMD_DRV_LOG(ERR, + "No VMDQ pool enabled/configured"); return; } vsi = pf->vmdq[i - 1].vsi; @@ -3279,9 +3497,9 @@ i40e_dev_rss_reta_update(struct rte_eth_dev *dev, if (reta_size != lut_size || reta_size > ETH_RSS_RETA_SIZE_512) { - PMD_DRV_LOG(ERR, "The size of hash lookup table configured " - "(%d) doesn't match the number hardware can supported " - "(%d)\n", reta_size, lut_size); + PMD_DRV_LOG(ERR, + "The size of hash lookup table configured (%d) doesn't match the number hardware can supported (%d)", + reta_size, lut_size); return -EINVAL; } @@ -3320,9 +3538,9 @@ i40e_dev_rss_reta_query(struct rte_eth_dev *dev, if (reta_size != lut_size || reta_size > ETH_RSS_RETA_SIZE_512) { - PMD_DRV_LOG(ERR, "The size of hash lookup table configured " - "(%d) doesn't match the number hardware can supported " - "(%d)\n", reta_size, lut_size); + PMD_DRV_LOG(ERR, + "The size of hash lookup table configured (%d) doesn't match the number hardware can supported (%d)", + reta_size, lut_size); return -EINVAL; } @@ -3377,8 +3595,9 @@ i40e_allocate_dma_mem_d(__attribute__((unused)) struct i40e_hw *hw, mem->va = mz->addr; mem->pa = rte_mem_phy2mch(mz->memseg_id, mz->phys_addr); mem->zone = (const void *)mz; - PMD_DRV_LOG(DEBUG, "memzone %s allocated with physical address: " - "%"PRIu64, mz->name, mem->pa); + PMD_DRV_LOG(DEBUG, + "memzone %s allocated with physical address: %"PRIu64, + mz->name, mem->pa); return I40E_SUCCESS; } @@ -3395,9 +3614,9 @@ i40e_free_dma_mem_d(__attribute__((unused)) struct i40e_hw *hw, if (!mem) return I40E_ERR_PARAM; - PMD_DRV_LOG(DEBUG, "memzone %s to be freed with physical address: " - "%"PRIu64, ((const struct rte_memzone *)mem->zone)->name, - mem->pa); + PMD_DRV_LOG(DEBUG, + "memzone %s to be freed with physical address: %"PRIu64, + ((const struct rte_memzone *)mem->zone)->name, mem->pa); rte_memzone_free((const struct rte_memzone *)mem->zone); mem->zone = NULL; mem->va = NULL; @@ -3508,9 +3727,10 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev) { struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); struct i40e_hw *hw = I40E_PF_TO_HW(pf); + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); uint16_t qp_count = 0, vsi_count = 0; - if (dev->pci_dev->max_vfs && !hw->func_caps.sr_iov_1_1) { + if (pci_dev->max_vfs && !hw->func_caps.sr_iov_1_1) { PMD_INIT_LOG(ERR, "HW configuration doesn't support SRIOV"); return -EINVAL; } @@ -3551,13 +3771,13 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev) /* VF queue/VSI allocation */ pf->vf_qp_offset = pf->lan_qp_offset + pf->lan_nb_qps; - if (hw->func_caps.sr_iov_1_1 && dev->pci_dev->max_vfs) { + if (hw->func_caps.sr_iov_1_1 && pci_dev->max_vfs) { pf->flags |= I40E_FLAG_SRIOV; pf->vf_nb_qps = RTE_LIBRTE_I40E_QUEUE_NUM_PER_VF; - pf->vf_num = dev->pci_dev->max_vfs; - PMD_DRV_LOG(DEBUG, "%u VF VSIs, %u queues per VF VSI, " - "in total %u queues", pf->vf_num, pf->vf_nb_qps, - pf->vf_nb_qps * pf->vf_num); + pf->vf_num = pci_dev->max_vfs; + PMD_DRV_LOG(DEBUG, + "%u VF VSIs, %u queues per VF VSI, in total %u queues", + pf->vf_num, pf->vf_nb_qps, pf->vf_nb_qps * pf->vf_num); } else { pf->vf_nb_qps = 0; pf->vf_num = 0; @@ -3585,14 +3805,13 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev) if (pf->max_nb_vmdq_vsi) { pf->flags |= I40E_FLAG_VMDQ; pf->vmdq_nb_qps = pf->vmdq_nb_qp_max; - PMD_DRV_LOG(DEBUG, "%u VMDQ VSIs, %u queues " - "per VMDQ VSI, in total %u queues", - pf->max_nb_vmdq_vsi, - pf->vmdq_nb_qps, pf->vmdq_nb_qps * - pf->max_nb_vmdq_vsi); + PMD_DRV_LOG(DEBUG, + "%u VMDQ VSIs, %u queues per VMDQ VSI, in total %u queues", + pf->max_nb_vmdq_vsi, pf->vmdq_nb_qps, + pf->vmdq_nb_qps * pf->max_nb_vmdq_vsi); } else { - PMD_DRV_LOG(INFO, "No enough queues left for " - "VMDq"); + PMD_DRV_LOG(INFO, + "No enough queues left for VMDq"); } } else { PMD_DRV_LOG(INFO, "No queue or VSI left for VMDq"); @@ -3605,15 +3824,15 @@ i40e_pf_parameter_init(struct rte_eth_dev *dev) pf->flags |= I40E_FLAG_DCB; if (qp_count > hw->func_caps.num_tx_qp) { - PMD_DRV_LOG(ERR, "Failed to allocate %u queues, which exceeds " - "the hardware maximum %u", qp_count, - hw->func_caps.num_tx_qp); + PMD_DRV_LOG(ERR, + "Failed to allocate %u queues, which exceeds the hardware maximum %u", + qp_count, hw->func_caps.num_tx_qp); return -EINVAL; } if (vsi_count > hw->func_caps.num_vsis) { - PMD_DRV_LOG(ERR, "Failed to allocate %u VSIs, which exceeds " - "the hardware maximum %u", vsi_count, - hw->func_caps.num_vsis); + PMD_DRV_LOG(ERR, + "Failed to allocate %u VSIs, which exceeds the hardware maximum %u", + vsi_count, hw->func_caps.num_vsis); return -EINVAL; } @@ -3859,8 +4078,8 @@ i40e_res_pool_alloc(struct i40e_res_pool_info *pool, */ entry = rte_zmalloc("res_pool", sizeof(*entry), 0); if (entry == NULL) { - PMD_DRV_LOG(ERR, "Failed to allocate memory for " - "resource pool"); + PMD_DRV_LOG(ERR, + "Failed to allocate memory for resource pool"); return -ENOMEM; } entry->base = valid_entry->base; @@ -3900,9 +4119,9 @@ validate_tcmap_parameter(struct i40e_vsi *vsi, uint8_t enabled_tcmap) } if (!bitmap_is_subset(hw->func_caps.enabled_tcmap, enabled_tcmap)) { - PMD_DRV_LOG(ERR, "Enabled TC map 0x%x not applicable to " - "HW support 0x%x", hw->func_caps.enabled_tcmap, - enabled_tcmap); + PMD_DRV_LOG(ERR, + "Enabled TC map 0x%x not applicable to HW support 0x%x", + hw->func_caps.enabled_tcmap, enabled_tcmap); return I40E_NOT_SUPPORTED; } return I40E_SUCCESS; @@ -4108,18 +4327,10 @@ i40e_veb_setup(struct i40e_pf *pf, struct i40e_vsi *vsi) /* create floating veb if vsi is NULL */ if (vsi != NULL) { ret = i40e_aq_add_veb(hw, veb->uplink_seid, vsi->seid, -#ifdef TREX_PATCH_LOW_LATENCY - vsi->enabled_tc, false, -#else - I40E_DEFAULT_TCMAP, false, -#endif + I40E_DEFAULT_TCMAP, false, &veb->seid, false, NULL); } else { -#ifdef TREX_PATCH_LOW_LATENCY - ret = i40e_aq_add_veb(hw, 0, 0, vsi->enabled_tc, -#else ret = i40e_aq_add_veb(hw, 0, 0, I40E_DEFAULT_TCMAP, -#endif true, &veb->seid, false, NULL); } @@ -4133,7 +4344,7 @@ i40e_veb_setup(struct i40e_pf *pf, struct i40e_vsi *vsi) ret = i40e_aq_get_veb_parameters(hw, veb->seid, NULL, NULL, &veb->stats_idx, NULL, NULL, NULL); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Get veb statics index failed, aq_err: %d", + PMD_DRV_LOG(ERR, "Get veb statistics index failed, aq_err: %d", hw->aq.asq_last_status); goto fail; } @@ -4157,11 +4368,16 @@ i40e_vsi_release(struct i40e_vsi *vsi) void *temp; int ret; struct i40e_mac_filter *f; - uint16_t user_param = vsi->user_param; + uint16_t user_param; if (!vsi) return I40E_SUCCESS; + if (!vsi->adapter) + return -EFAULT; + + user_param = vsi->user_param; + pf = I40E_VSI_TO_PF(vsi); hw = I40E_VSI_TO_HW(vsi); @@ -4250,8 +4466,8 @@ i40e_update_default_filter_setting(struct i40e_vsi *vsi) struct i40e_mac_filter *f; struct ether_addr *mac; - PMD_DRV_LOG(WARNING, "Cannot remove the default " - "macvlan filter"); + PMD_DRV_LOG(WARNING, + "Cannot remove the default macvlan filter"); /* It needs to add the permanent mac into mac list */ f = rte_zmalloc("macv_filter", sizeof(*f), 0); if (f == NULL) { @@ -4273,57 +4489,6 @@ i40e_update_default_filter_setting(struct i40e_vsi *vsi) return i40e_vsi_add_mac(vsi, &filter); } -#ifdef TREX_PATCH_LOW_LATENCY -static int -i40e_vsi_update_tc_max_bw(struct i40e_vsi *vsi, u16 credit){ - struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); - int ret; - - if (!vsi->seid) { - PMD_DRV_LOG(ERR, "seid not valid"); - return -EINVAL; - } - - ret = i40e_aq_config_vsi_bw_limit(hw, vsi->seid, credit,0, NULL); - if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to configure TC BW"); - return ret; - } - return (0); -} - -static int -i40e_vsi_update_tc_bandwidth_ex(struct i40e_vsi *vsi) -{ - struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); - int i, ret; - struct i40e_aqc_configure_vsi_ets_sla_bw_data tc_bw_data; - struct i40e_aqc_configure_vsi_tc_bw_data * res_buffer; - - if (!vsi->seid) { - PMD_DRV_LOG(ERR, "seid not valid"); - return -EINVAL; - } - - memset(&tc_bw_data, 0, sizeof(tc_bw_data)); - tc_bw_data.tc_valid_bits = 3; - - /* enable TC 0,1 */ - ret = i40e_aq_config_vsi_ets_sla_bw_limit(hw, vsi->seid, &tc_bw_data, NULL); - if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to configure TC BW"); - return ret; - } - - vsi->enabled_tc=3; - res_buffer = ( struct i40e_aqc_configure_vsi_tc_bw_data *)&tc_bw_data; - (void)rte_memcpy(vsi->info.qs_handle, res_buffer->qs_handles, - sizeof(vsi->info.qs_handle)); - - return I40E_SUCCESS; -} -#endif - /* * i40e_vsi_get_bw_config - Query VSI BW Information * @vsi: the VSI to be queried @@ -4352,8 +4517,9 @@ i40e_vsi_get_bw_config(struct i40e_vsi *vsi) ret = i40e_aq_query_vsi_ets_sla_config(hw, vsi->seid, &ets_sla_config, NULL); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "VSI failed to get TC bandwdith " - "configuration %u", hw->aq.asq_last_status); + PMD_DRV_LOG(ERR, + "VSI failed to get TC bandwdith configuration %u", + hw->aq.asq_last_status); return ret; } @@ -4399,8 +4565,7 @@ i40e_enable_pf_lb(struct i40e_pf *pf) /* Use the FW API if FW >= v5.0 */ if (hw->aq.fw_maj_ver < 5) { - //TREX_PATCH - changed from ERR to INFO. Most of our customers do not have latest FW - PMD_INIT_LOG(INFO, "FW < v5.0, cannot enable loopback"); + PMD_INIT_LOG(ERR, "FW < v5.0, cannot enable loopback"); return; } @@ -4421,7 +4586,7 @@ i40e_enable_pf_lb(struct i40e_pf *pf) ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) - PMD_DRV_LOG(ERR, "update vsi switch failed, aq_err=%d\n", + PMD_DRV_LOG(ERR, "update vsi switch failed, aq_err=%d", hw->aq.asq_last_status); } @@ -4442,14 +4607,14 @@ i40e_vsi_setup(struct i40e_pf *pf, if (type != I40E_VSI_MAIN && type != I40E_VSI_SRIOV && uplink_vsi == NULL) { - PMD_DRV_LOG(ERR, "VSI setup failed, " - "VSI link shouldn't be NULL"); + PMD_DRV_LOG(ERR, + "VSI setup failed, VSI link shouldn't be NULL"); return NULL; } if (type == I40E_VSI_MAIN && uplink_vsi != NULL) { - PMD_DRV_LOG(ERR, "VSI setup failed, MAIN VSI " - "uplink VSI should be NULL"); + PMD_DRV_LOG(ERR, + "VSI setup failed, MAIN VSI uplink VSI should be NULL"); return NULL; } @@ -4493,6 +4658,7 @@ i40e_vsi_setup(struct i40e_pf *pf, vsi->max_macaddrs = I40E_NUM_MACADDR_MAX; vsi->parent_vsi = uplink_vsi ? uplink_vsi : pf->main_vsi; vsi->user_param = user_param; + vsi->vlan_anti_spoof_on = 0; /* Allocate queues */ switch (vsi->type) { case I40E_VSI_MAIN : @@ -4600,8 +4766,8 @@ i40e_vsi_setup(struct i40e_pf *pf, ret = i40e_vsi_config_tc_queue_mapping(vsi, &ctxt.info, I40E_DEFAULT_TCMAP); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to configure " - "TC queue mapping"); + PMD_DRV_LOG(ERR, + "Failed to configure TC queue mapping"); goto fail_msix_alloc; } ctxt.seid = vsi->seid; @@ -4671,8 +4837,8 @@ i40e_vsi_setup(struct i40e_pf *pf, ret = i40e_vsi_config_tc_queue_mapping(vsi, &ctxt.info, I40E_DEFAULT_TCMAP); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to configure " - "TC queue mapping"); + PMD_DRV_LOG(ERR, + "Failed to configure TC queue mapping"); goto fail_msix_alloc; } ctxt.info.up_enable_bits = I40E_DEFAULT_TCMAP; @@ -4714,8 +4880,8 @@ i40e_vsi_setup(struct i40e_pf *pf, ret = i40e_vsi_config_tc_queue_mapping(vsi, &ctxt.info, I40E_DEFAULT_TCMAP); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to configure " - "TC queue mapping"); + PMD_DRV_LOG(ERR, + "Failed to configure TC queue mapping"); goto fail_msix_alloc; } ctxt.info.up_enable_bits = I40E_DEFAULT_TCMAP; @@ -4732,8 +4898,8 @@ i40e_vsi_setup(struct i40e_pf *pf, ret = i40e_vsi_config_tc_queue_mapping(vsi, &ctxt.info, I40E_DEFAULT_TCMAP); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to configure " - "TC queue mapping."); + PMD_DRV_LOG(ERR, + "Failed to configure TC queue mapping."); goto fail_msix_alloc; } ctxt.info.up_enable_bits = I40E_DEFAULT_TCMAP; @@ -4996,8 +5162,9 @@ i40e_pf_setup(struct i40e_pf *pf) /* make queue allocated first, let FDIR use queue pair 0*/ ret = i40e_res_pool_alloc(&pf->qp_pool, I40E_DEFAULT_QP_NUM_FDIR); if (ret != I40E_FDIR_QUEUE_ID) { - PMD_DRV_LOG(ERR, "queue allocation fails for FDIR :" - " ret =%d", ret); + PMD_DRV_LOG(ERR, + "queue allocation fails for FDIR: ret =%d", + ret); pf->flags &= ~I40E_FLAG_FDIR; } } @@ -5016,12 +5183,12 @@ i40e_pf_setup(struct i40e_pf *pf) else if (hw->func_caps.rss_table_size == ETH_RSS_RETA_SIZE_512) settings.hash_lut_size = I40E_HASH_LUT_SIZE_512; else { - PMD_DRV_LOG(ERR, "Hash lookup table size (%u) not supported\n", - hw->func_caps.rss_table_size); + PMD_DRV_LOG(ERR, "Hash lookup table size (%u) not supported", + hw->func_caps.rss_table_size); return I40E_ERR_PARAM; } - PMD_DRV_LOG(INFO, "Hardware capability of hash lookup table " - "size: %u\n", hw->func_caps.rss_table_size); + PMD_DRV_LOG(INFO, "Hardware capability of hash lookup table size: %u", + hw->func_caps.rss_table_size); pf->hash_lut_size = hw->func_caps.rss_table_size; /* Enable ethtype and macvlan filters */ @@ -5271,8 +5438,8 @@ i40e_dev_rx_init(struct i40e_pf *pf) ret = i40e_rx_queue_init(rxq); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to do RX queue " - "initialization"); + PMD_DRV_LOG(ERR, + "Failed to do RX queue initialization"); break; } } @@ -5519,6 +5686,24 @@ i40e_dev_handle_vfr_event(struct rte_eth_dev *dev) } static void +i40e_notify_all_vfs_link_status(struct rte_eth_dev *dev) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + struct i40e_virtchnl_pf_event event; + int i; + + event.event = I40E_VIRTCHNL_EVENT_LINK_CHANGE; + event.event_data.link_event.link_status = + dev->data->dev_link.link_status; + event.event_data.link_event.link_speed = + (enum i40e_aq_link_speed)dev->data->dev_link.link_speed; + + for (i = 0; i < pf->vf_num; i++) + i40e_pf_host_send_msg_to_vf(&pf->vfs[i], I40E_VIRTCHNL_OP_EVENT, + I40E_SUCCESS, (uint8_t *)&event, sizeof(event)); +} + +static void i40e_dev_handle_aq_msg(struct rte_eth_dev *dev) { struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); @@ -5538,8 +5723,9 @@ i40e_dev_handle_aq_msg(struct rte_eth_dev *dev) ret = i40e_clean_arq_element(hw, &info, &pending); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(INFO, "Failed to read msg from AdminQ, " - "aq_err: %u", hw->aq.asq_last_status); + PMD_DRV_LOG(INFO, + "Failed to read msg from AdminQ, aq_err: %u", + hw->aq.asq_last_status); break; } opcode = rte_le_to_cpu_16(info.desc.opcode); @@ -5554,6 +5740,14 @@ i40e_dev_handle_aq_msg(struct rte_eth_dev *dev) info.msg_buf, info.msg_len); break; + case i40e_aqc_opc_get_link_status: + ret = i40e_dev_link_update(dev, 0); + if (!ret) { + i40e_notify_all_vfs_link_status(dev); + _rte_eth_dev_callback_process(dev, + RTE_ETH_EVENT_INTR_LSC, NULL); + } + break; default: PMD_DRV_LOG(ERR, "Request %u is not supported yet", opcode); @@ -5563,57 +5757,6 @@ i40e_dev_handle_aq_msg(struct rte_eth_dev *dev) rte_free(info.msg_buf); } -/* - * Interrupt handler is registered as the alarm callback for handling LSC - * interrupt in a definite of time, in order to wait the NIC into a stable - * state. Currently it waits 1 sec in i40e for the link up interrupt, and - * no need for link down interrupt. - */ -static void -i40e_dev_interrupt_delayed_handler(void *param) -{ - struct rte_eth_dev *dev = (struct rte_eth_dev *)param; - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - uint32_t icr0; - - /* read interrupt causes again */ - icr0 = I40E_READ_REG(hw, I40E_PFINT_ICR0); - -#ifdef RTE_LIBRTE_I40E_DEBUG_DRIVER - if (icr0 & I40E_PFINT_ICR0_ECC_ERR_MASK) - PMD_DRV_LOG(ERR, "ICR0: unrecoverable ECC error\n"); - if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK) - PMD_DRV_LOG(ERR, "ICR0: malicious programming detected\n"); - if (icr0 & I40E_PFINT_ICR0_GRST_MASK) - PMD_DRV_LOG(INFO, "ICR0: global reset requested\n"); - if (icr0 & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) - PMD_DRV_LOG(INFO, "ICR0: PCI exception\n activated\n"); - if (icr0 & I40E_PFINT_ICR0_STORM_DETECT_MASK) - PMD_DRV_LOG(INFO, "ICR0: a change in the storm control " - "state\n"); - if (icr0 & I40E_PFINT_ICR0_HMC_ERR_MASK) - PMD_DRV_LOG(ERR, "ICR0: HMC error\n"); - if (icr0 & I40E_PFINT_ICR0_PE_CRITERR_MASK) - PMD_DRV_LOG(ERR, "ICR0: protocol engine critical error\n"); -#endif /* RTE_LIBRTE_I40E_DEBUG_DRIVER */ - - if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) { - PMD_DRV_LOG(INFO, "INT:VF reset detected\n"); - i40e_dev_handle_vfr_event(dev); - } - if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) { - PMD_DRV_LOG(INFO, "INT:ADMINQ event\n"); - i40e_dev_handle_aq_msg(dev); - } - - /* handle the link up interrupt in an alarm callback */ - i40e_dev_link_update(dev, 0); - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); - - i40e_pf_enable_irq0(hw); - rte_intr_enable(&(dev->pci_dev->intr_handle)); -} - /** * Interrupt handler triggered by NIC for handling * specific interrupt. @@ -5627,7 +5770,7 @@ i40e_dev_interrupt_delayed_handler(void *param) * void */ static void -i40e_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle, +i40e_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; @@ -5671,34 +5814,10 @@ i40e_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle, i40e_dev_handle_aq_msg(dev); } - /* Link Status Change interrupt */ - if (icr0 & I40E_PFINT_ICR0_LINK_STAT_CHANGE_MASK) { -#define I40E_US_PER_SECOND 1000000 - struct rte_eth_link link; - - PMD_DRV_LOG(INFO, "ICR0: link status changed\n"); - memset(&link, 0, sizeof(link)); - rte_i40e_dev_atomic_read_link_status(dev, &link); - i40e_dev_link_update(dev, 0); - - /* - * For link up interrupt, it needs to wait 1 second to let the - * hardware be a stable state. Otherwise several consecutive - * interrupts can be observed. - * For link down interrupt, no need to wait. - */ - if (!link.link_status && rte_eal_alarm_set(I40E_US_PER_SECOND, - i40e_dev_interrupt_delayed_handler, (void *)dev) >= 0) - return; - else - _rte_eth_dev_callback_process(dev, - RTE_ETH_EVENT_INTR_LSC); - } - done: /* Enable interrupt */ i40e_pf_enable_irq0(hw); - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(intr_handle); } static int @@ -5751,7 +5870,7 @@ i40e_add_macvlan_filters(struct i40e_vsi *vsi, flags = I40E_AQC_MACVLAN_ADD_HASH_MATCH; break; default: - PMD_DRV_LOG(ERR, "Invalid MAC match type\n"); + PMD_DRV_LOG(ERR, "Invalid MAC match type"); ret = I40E_ERR_PARAM; goto DONE; } @@ -5826,7 +5945,7 @@ i40e_remove_macvlan_filters(struct i40e_vsi *vsi, flags = I40E_AQC_MACVLAN_DEL_HASH_MATCH; break; default: - PMD_DRV_LOG(ERR, "Invalid MAC filter type\n"); + PMD_DRV_LOG(ERR, "Invalid MAC filter type"); ret = I40E_ERR_PARAM; goto DONE; } @@ -5881,14 +6000,11 @@ i40e_find_vlan_filter(struct i40e_vsi *vsi, } static void -i40e_set_vlan_filter(struct i40e_vsi *vsi, - uint16_t vlan_id, bool on) +i40e_store_vlan_filter(struct i40e_vsi *vsi, + uint16_t vlan_id, bool on) { uint32_t vid_idx, vid_bit; - if (vlan_id > ETH_VLAN_ID_MAX) - return; - vid_idx = I40E_VFTA_IDX(vlan_id); vid_bit = I40E_VFTA_BIT(vlan_id); @@ -5898,6 +6014,38 @@ i40e_set_vlan_filter(struct i40e_vsi *vsi, vsi->vfta[vid_idx] &= ~vid_bit; } +static void +i40e_set_vlan_filter(struct i40e_vsi *vsi, + uint16_t vlan_id, bool on) +{ + struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); + struct i40e_aqc_add_remove_vlan_element_data vlan_data = {0}; + int ret; + + if (vlan_id > ETH_VLAN_ID_MAX) + return; + + i40e_store_vlan_filter(vsi, vlan_id, on); + + if (!vsi->vlan_anti_spoof_on || !vlan_id) + return; + + vlan_data.vlan_tag = rte_cpu_to_le_16(vlan_id); + + if (on) { + ret = i40e_aq_add_vlan(hw, vsi->seid, + &vlan_data, 1, NULL); + if (ret != I40E_SUCCESS) + PMD_DRV_LOG(ERR, "Failed to add vlan filter"); + } else { + ret = i40e_aq_remove_vlan(hw, vsi->seid, + &vlan_data, 1, NULL); + if (ret != I40E_SUCCESS) + PMD_DRV_LOG(ERR, + "Failed to remove vlan filter"); + } +} + /** * Find all vlan options for specific mac addr, * return with actual vlan found. @@ -5923,8 +6071,8 @@ i40e_find_all_vlan_for_mac(struct i40e_vsi *vsi, for (k = 0; k < I40E_UINT32_BIT_SIZE; k++) { if (vsi->vfta[j] & (1 << k)) { if (i > num - 1) { - PMD_DRV_LOG(ERR, "vlan number " - "not match"); + PMD_DRV_LOG(ERR, + "vlan number doesn't match"); return I40E_ERR_PARAM; } (void)rte_memcpy(&mv_f[i].macaddr, @@ -5969,7 +6117,7 @@ i40e_find_all_mac_for_vlan(struct i40e_vsi *vsi, static int i40e_vsi_remove_all_macvlan_filter(struct i40e_vsi *vsi) { - int i, num; + int i, j, num; struct i40e_mac_filter *f; struct i40e_macvlan_filter *mv_f; int ret = I40E_SUCCESS; @@ -5994,6 +6142,7 @@ i40e_vsi_remove_all_macvlan_filter(struct i40e_vsi *vsi) TAILQ_FOREACH(f, &vsi->mac_list, next) { (void)rte_memcpy(&mv_f[i].macaddr, &f->mac_info.mac_addr, ETH_ADDR_LEN); + mv_f[i].filter_type = f->mac_info.filter_type; mv_f[i].vlan_id = 0; i++; } @@ -6003,6 +6152,8 @@ i40e_vsi_remove_all_macvlan_filter(struct i40e_vsi *vsi) vsi->vlan_num, &f->mac_info.mac_addr); if (ret != I40E_SUCCESS) goto DONE; + for (j = i; j < i + vsi->vlan_num; j++) + mv_f[j].filter_type = f->mac_info.filter_type; i += vsi->vlan_num; } } @@ -6214,7 +6365,7 @@ i40e_vsi_delete_mac(struct i40e_vsi *vsi, struct ether_addr *addr) if (filter_type == RTE_MACVLAN_PERFECT_MATCH || filter_type == RTE_MACVLAN_HASH_MATCH) { if (vlan_num == 0) { - PMD_DRV_LOG(ERR, "VLAN number shouldn't be 0\n"); + PMD_DRV_LOG(ERR, "VLAN number shouldn't be 0"); return I40E_ERR_PARAM; } } else if (filter_type == RTE_MAC_PERFECT_MATCH || @@ -6256,7 +6407,7 @@ DONE: /* Configure hash enable flags for RSS */ uint64_t -i40e_config_hena(uint64_t flags) +i40e_config_hena(uint64_t flags, enum i40e_mac_type type) { uint64_t hena = 0; @@ -6265,20 +6416,42 @@ i40e_config_hena(uint64_t flags) if (flags & ETH_RSS_FRAG_IPV4) hena |= 1ULL << I40E_FILTER_PCTYPE_FRAG_IPV4; - if (flags & ETH_RSS_NONFRAG_IPV4_TCP) - hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP; - if (flags & ETH_RSS_NONFRAG_IPV4_UDP) - hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP; + if (flags & ETH_RSS_NONFRAG_IPV4_TCP) { + if (type == I40E_MAC_X722) { + hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) | + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); + } else + hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP; + } + if (flags & ETH_RSS_NONFRAG_IPV4_UDP) { + if (type == I40E_MAC_X722) { + hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | + (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | + (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); + } else + hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP; + } if (flags & ETH_RSS_NONFRAG_IPV4_SCTP) hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP; if (flags & ETH_RSS_NONFRAG_IPV4_OTHER) hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER; if (flags & ETH_RSS_FRAG_IPV6) hena |= 1ULL << I40E_FILTER_PCTYPE_FRAG_IPV6; - if (flags & ETH_RSS_NONFRAG_IPV6_TCP) - hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP; - if (flags & ETH_RSS_NONFRAG_IPV6_UDP) - hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP; + if (flags & ETH_RSS_NONFRAG_IPV6_TCP) { + if (type == I40E_MAC_X722) { + hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) | + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); + } else + hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP; + } + if (flags & ETH_RSS_NONFRAG_IPV6_UDP) { + if (type == I40E_MAC_X722) { + hena |= (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | + (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | + (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); + } else + hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP; + } if (flags & ETH_RSS_NONFRAG_IPV6_SCTP) hena |= 1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP; if (flags & ETH_RSS_NONFRAG_IPV6_OTHER) @@ -6301,8 +6474,14 @@ i40e_parse_hena(uint64_t flags) rss_hf |= ETH_RSS_FRAG_IPV4; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP)) rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP; + if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK)) + rss_hf |= ETH_RSS_NONFRAG_IPV4_TCP; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP)) rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP; + if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP)) + rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP; + if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP)) + rss_hf |= ETH_RSS_NONFRAG_IPV4_UDP; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_SCTP)) rss_hf |= ETH_RSS_NONFRAG_IPV4_SCTP; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER)) @@ -6311,8 +6490,14 @@ i40e_parse_hena(uint64_t flags) rss_hf |= ETH_RSS_FRAG_IPV6; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP)) rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP; + if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK)) + rss_hf |= ETH_RSS_NONFRAG_IPV6_TCP; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_UDP)) rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP; + if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP)) + rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP; + if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP)) + rss_hf |= ETH_RSS_NONFRAG_IPV6_UDP; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_SCTP)) rss_hf |= ETH_RSS_NONFRAG_IPV6_SCTP; if (flags & (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER)) @@ -6332,7 +6517,10 @@ i40e_pf_disable_rss(struct i40e_pf *pf) hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)); hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32; - hena &= ~I40E_RSS_HENA_ALL; + if (hw->mac.type == I40E_MAC_X722) + hena &= ~I40E_RSS_HENA_ALL_X722; + else + hena &= ~I40E_RSS_HENA_ALL; i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (uint32_t)hena); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (uint32_t)(hena >> 32)); I40E_WRITE_FLUSH(hw); @@ -6360,8 +6548,7 @@ i40e_set_rss_key(struct i40e_vsi *vsi, uint8_t *key, uint8_t key_len) ret = i40e_aq_set_rss_key(hw, vsi->vsi_id, key_dw); if (ret) - PMD_INIT_LOG(ERR, "Failed to configure RSS key " - "via AQ"); + PMD_INIT_LOG(ERR, "Failed to configure RSS key via AQ"); } else { uint32_t *hash_key = (uint32_t *)key; uint16_t i; @@ -6419,8 +6606,11 @@ i40e_hw_rss_hash_set(struct i40e_pf *pf, struct rte_eth_rss_conf *rss_conf) rss_hf = rss_conf->rss_hf; hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)); hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32; - hena &= ~I40E_RSS_HENA_ALL; - hena |= i40e_config_hena(rss_hf); + if (hw->mac.type == I40E_MAC_X722) + hena &= ~I40E_RSS_HENA_ALL_X722; + else + hena &= ~I40E_RSS_HENA_ALL; + hena |= i40e_config_hena(rss_hf, hw->mac.type); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (uint32_t)hena); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (uint32_t)(hena >> 32)); I40E_WRITE_FLUSH(hw); @@ -6439,7 +6629,9 @@ i40e_dev_rss_hash_update(struct rte_eth_dev *dev, hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)); hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1))) << 32; - if (!(hena & I40E_RSS_HENA_ALL)) { /* RSS disabled */ + if (!(hena & ((hw->mac.type == I40E_MAC_X722) + ? I40E_RSS_HENA_ALL_X722 + : I40E_RSS_HENA_ALL))) { /* RSS disabled */ if (rss_hf != 0) /* Enable RSS */ return -EINVAL; return 0; /* Nothing to do */ @@ -6502,7 +6694,86 @@ i40e_dev_get_filter_type(uint16_t filter_type, uint16_t *flag) return 0; } +/* Convert tunnel filter structure */ static int +i40e_tunnel_filter_convert(struct i40e_aqc_add_remove_cloud_filters_element_data + *cld_filter, + struct i40e_tunnel_filter *tunnel_filter) +{ + ether_addr_copy((struct ether_addr *)&cld_filter->outer_mac, + (struct ether_addr *)&tunnel_filter->input.outer_mac); + ether_addr_copy((struct ether_addr *)&cld_filter->inner_mac, + (struct ether_addr *)&tunnel_filter->input.inner_mac); + tunnel_filter->input.inner_vlan = cld_filter->inner_vlan; + tunnel_filter->input.flags = cld_filter->flags; + tunnel_filter->input.tenant_id = cld_filter->tenant_id; + tunnel_filter->queue = cld_filter->queue_number; + + return 0; +} + +/* Check if there exists the tunnel filter */ +struct i40e_tunnel_filter * +i40e_sw_tunnel_filter_lookup(struct i40e_tunnel_rule *tunnel_rule, + const struct i40e_tunnel_filter_input *input) +{ + int ret; + + ret = rte_hash_lookup(tunnel_rule->hash_table, (const void *)input); + if (ret < 0) + return NULL; + + return tunnel_rule->hash_map[ret]; +} + +/* Add a tunnel filter into the SW list */ +static int +i40e_sw_tunnel_filter_insert(struct i40e_pf *pf, + struct i40e_tunnel_filter *tunnel_filter) +{ + struct i40e_tunnel_rule *rule = &pf->tunnel; + int ret; + + ret = rte_hash_add_key(rule->hash_table, &tunnel_filter->input); + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to insert tunnel filter to hash table %d!", + ret); + return ret; + } + rule->hash_map[ret] = tunnel_filter; + + TAILQ_INSERT_TAIL(&rule->tunnel_list, tunnel_filter, rules); + + return 0; +} + +/* Delete a tunnel filter from the SW list */ +int +i40e_sw_tunnel_filter_del(struct i40e_pf *pf, + struct i40e_tunnel_filter_input *input) +{ + struct i40e_tunnel_rule *rule = &pf->tunnel; + struct i40e_tunnel_filter *tunnel_filter; + int ret; + + ret = rte_hash_del_key(rule->hash_table, input); + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to delete tunnel filter to hash table %d!", + ret); + return ret; + } + tunnel_filter = rule->hash_map[ret]; + rule->hash_map[ret] = NULL; + + TAILQ_REMOVE(&rule->tunnel_list, tunnel_filter, rules); + rte_free(tunnel_filter); + + return 0; +} + +int i40e_dev_tunnel_filter_set(struct i40e_pf *pf, struct rte_eth_tunnel_filter_conf *tunnel_filter, uint8_t add) @@ -6517,6 +6788,9 @@ i40e_dev_tunnel_filter_set(struct i40e_pf *pf, struct i40e_vsi *vsi = pf->main_vsi; struct i40e_aqc_add_remove_cloud_filters_element_data *cld_filter; struct i40e_aqc_add_remove_cloud_filters_element_data *pfilter; + struct i40e_tunnel_rule *tunnel_rule = &pf->tunnel; + struct i40e_tunnel_filter *tunnel, *node; + struct i40e_tunnel_filter check_filter; /* Check if filter exists */ cld_filter = rte_zmalloc("tunnel_filter", sizeof(struct i40e_aqc_add_remove_cloud_filters_element_data), @@ -6579,11 +6853,38 @@ i40e_dev_tunnel_filter_set(struct i40e_pf *pf, pfilter->tenant_id = rte_cpu_to_le_32(tunnel_filter->tenant_id); pfilter->queue_number = rte_cpu_to_le_16(tunnel_filter->queue_id); - if (add) + /* Check if there is the filter in SW list */ + memset(&check_filter, 0, sizeof(check_filter)); + i40e_tunnel_filter_convert(cld_filter, &check_filter); + node = i40e_sw_tunnel_filter_lookup(tunnel_rule, &check_filter.input); + if (add && node) { + PMD_DRV_LOG(ERR, "Conflict with existing tunnel rules!"); + return -EINVAL; + } + + if (!add && !node) { + PMD_DRV_LOG(ERR, "There's no corresponding tunnel filter!"); + return -EINVAL; + } + + if (add) { ret = i40e_aq_add_cloud_filters(hw, vsi->seid, cld_filter, 1); - else + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to add a tunnel filter."); + return ret; + } + tunnel = rte_zmalloc("tunnel_filter", sizeof(*tunnel), 0); + rte_memcpy(tunnel, &check_filter, sizeof(check_filter)); + ret = i40e_sw_tunnel_filter_insert(pf, tunnel); + } else { ret = i40e_aq_remove_cloud_filters(hw, vsi->seid, - cld_filter, 1); + cld_filter, 1); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to delete a tunnel filter."); + return ret; + } + ret = i40e_sw_tunnel_filter_del(pf, &node->input); + } rte_free(cld_filter); return ret; @@ -6620,8 +6921,9 @@ i40e_add_vxlan_port(struct i40e_pf *pf, uint16_t port) /* Now check if there is space to add the new port */ idx = i40e_get_vxlan_port_idx(pf, 0); if (idx < 0) { - PMD_DRV_LOG(ERR, "Maximum number of UDP ports reached," - "not adding port %d", port); + PMD_DRV_LOG(ERR, + "Maximum number of UDP ports reached, not adding port %d", + port); return -ENOSPC; } @@ -6860,7 +7162,7 @@ i40e_dev_set_gre_key_len(struct i40e_hw *hw, uint8_t len) int ret = -EINVAL; val = I40E_READ_REG(hw, I40E_GL_PRS_FVBM(2)); - PMD_DRV_LOG(DEBUG, "Read original GL_PRS_FVBM with 0x%08x\n", val); + PMD_DRV_LOG(DEBUG, "Read original GL_PRS_FVBM with 0x%08x", val); if (len == 3) { reg = val | I40E_GL_PRS_FVBM_MSK_ENA; @@ -6879,7 +7181,7 @@ i40e_dev_set_gre_key_len(struct i40e_hw *hw, uint8_t len) } else { ret = 0; } - PMD_DRV_LOG(DEBUG, "Read modified GL_PRS_FVBM with 0x%08x\n", + PMD_DRV_LOG(DEBUG, "Read modified GL_PRS_FVBM with 0x%08x", I40E_READ_REG(hw, I40E_GL_PRS_FVBM(2))); return ret; @@ -6992,15 +7294,15 @@ i40e_set_symmetric_hash_enable_per_port(struct i40e_hw *hw, uint8_t enable) if (enable > 0) { if (reg & I40E_PRTQF_CTL_0_HSYM_ENA_MASK) { - PMD_DRV_LOG(INFO, "Symmetric hash has already " - "been enabled"); + PMD_DRV_LOG(INFO, + "Symmetric hash has already been enabled"); return; } reg |= I40E_PRTQF_CTL_0_HSYM_ENA_MASK; } else { if (!(reg & I40E_PRTQF_CTL_0_HSYM_ENA_MASK)) { - PMD_DRV_LOG(INFO, "Symmetric hash has already " - "been disabled"); + PMD_DRV_LOG(INFO, + "Symmetric hash has already been disabled"); return; } reg &= ~I40E_PRTQF_CTL_0_HSYM_ENA_MASK; @@ -7124,16 +7426,16 @@ i40e_set_hash_filter_global_config(struct i40e_hw *hw, if (g_cfg->hash_func == RTE_ETH_HASH_FUNCTION_TOEPLITZ) { /* Toeplitz */ if (reg & I40E_GLQF_CTL_HTOEP_MASK) { - PMD_DRV_LOG(DEBUG, "Hash function already set to " - "Toeplitz"); + PMD_DRV_LOG(DEBUG, + "Hash function already set to Toeplitz"); goto out; } reg |= I40E_GLQF_CTL_HTOEP_MASK; } else if (g_cfg->hash_func == RTE_ETH_HASH_FUNCTION_SIMPLE_XOR) { /* Simple XOR */ if (!(reg & I40E_GLQF_CTL_HTOEP_MASK)) { - PMD_DRV_LOG(DEBUG, "Hash function already set to " - "Simple XOR"); + PMD_DRV_LOG(DEBUG, + "Hash function already set to Simple XOR"); goto out; } reg &= ~I40E_GLQF_CTL_HTOEP_MASK; @@ -7176,6 +7478,24 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype, I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | I40E_INSET_FLEX_PAYLOAD, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP] = + I40E_INSET_DMAC | I40E_INSET_SMAC | + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV4_TOS | + I40E_INSET_IPV4_PROTO | I40E_INSET_IPV4_TTL | + I40E_INSET_TUNNEL_DMAC | I40E_INSET_TUNNEL_ID | + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | + I40E_INSET_FLEX_PAYLOAD, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP] = + I40E_INSET_DMAC | I40E_INSET_SMAC | + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV4_TOS | + I40E_INSET_IPV4_PROTO | I40E_INSET_IPV4_TTL | + I40E_INSET_TUNNEL_DMAC | I40E_INSET_TUNNEL_ID | + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | + I40E_INSET_FLEX_PAYLOAD, [I40E_FILTER_PCTYPE_NONF_IPV4_TCP] = I40E_INSET_DMAC | I40E_INSET_SMAC | I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | @@ -7185,6 +7505,15 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype, I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | I40E_INSET_TCP_FLAGS | I40E_INSET_FLEX_PAYLOAD, + [I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK] = + I40E_INSET_DMAC | I40E_INSET_SMAC | + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV4_TOS | + I40E_INSET_IPV4_PROTO | I40E_INSET_IPV4_TTL | + I40E_INSET_TUNNEL_DMAC | I40E_INSET_TUNNEL_ID | + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | + I40E_INSET_TCP_FLAGS | I40E_INSET_FLEX_PAYLOAD, [I40E_FILTER_PCTYPE_NONF_IPV4_SCTP] = I40E_INSET_DMAC | I40E_INSET_SMAC | I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | @@ -7218,6 +7547,24 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype, I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | I40E_INSET_FLEX_PAYLOAD, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP] = + I40E_INSET_DMAC | I40E_INSET_SMAC | + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV6_TC | + I40E_INSET_IPV6_FLOW | I40E_INSET_IPV6_NEXT_HDR | + I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_IPV6_SRC | + I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | + I40E_INSET_DST_PORT | I40E_INSET_TCP_FLAGS | + I40E_INSET_FLEX_PAYLOAD, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP] = + I40E_INSET_DMAC | I40E_INSET_SMAC | + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV6_TC | + I40E_INSET_IPV6_FLOW | I40E_INSET_IPV6_NEXT_HDR | + I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_IPV6_SRC | + I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | + I40E_INSET_DST_PORT | I40E_INSET_TCP_FLAGS | + I40E_INSET_FLEX_PAYLOAD, [I40E_FILTER_PCTYPE_NONF_IPV6_TCP] = I40E_INSET_DMAC | I40E_INSET_SMAC | I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | @@ -7227,6 +7574,15 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype, I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | I40E_INSET_TCP_FLAGS | I40E_INSET_FLEX_PAYLOAD, + [I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK] = + I40E_INSET_DMAC | I40E_INSET_SMAC | + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_VLAN_TUNNEL | I40E_INSET_IPV6_TC | + I40E_INSET_IPV6_FLOW | I40E_INSET_IPV6_NEXT_HDR | + I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_IPV6_SRC | + I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | + I40E_INSET_DST_PORT | I40E_INSET_TCP_FLAGS | + I40E_INSET_FLEX_PAYLOAD, [I40E_FILTER_PCTYPE_NONF_IPV6_SCTP] = I40E_INSET_DMAC | I40E_INSET_SMAC | I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | @@ -7266,11 +7622,26 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype, I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP] = + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP] = + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV4_TCP] = I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK] = + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_IPV4_TOS | I40E_INSET_IPV4_TTL | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV4_SCTP] = I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | @@ -7292,11 +7663,26 @@ i40e_get_valid_input_set(enum i40e_filter_pctype pctype, I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP] = + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | + I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP] = + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | + I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV6_TCP] = I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK] = + I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | + I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | + I40E_INSET_IPV6_TC | I40E_INSET_IPV6_HOP_LIMIT | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV6_SCTP] = I40E_INSET_VLAN_OUTER | I40E_INSET_VLAN_INNER | I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | @@ -7340,7 +7726,7 @@ i40e_validate_input_set(enum i40e_filter_pctype pctype, } /* default input set fields combination per pctype */ -static uint64_t +uint64_t i40e_get_default_input_set(uint16_t pctype) { static const uint64_t default_inset_table[] = { @@ -7349,9 +7735,18 @@ i40e_get_default_input_set(uint16_t pctype) [I40E_FILTER_PCTYPE_NONF_IPV4_UDP] = I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP] = + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP] = + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV4_TCP] = I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK] = + I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV4_SCTP] = I40E_INSET_IPV4_SRC | I40E_INSET_IPV4_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | @@ -7363,9 +7758,18 @@ i40e_get_default_input_set(uint16_t pctype) [I40E_FILTER_PCTYPE_NONF_IPV6_UDP] = I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP] = + I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP] = + I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV6_TCP] = I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, + [I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK] = + I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | + I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT, [I40E_FILTER_PCTYPE_NONF_IPV6_SCTP] = I40E_INSET_IPV6_SRC | I40E_INSET_IPV6_DST | I40E_INSET_SRC_PORT | I40E_INSET_DST_PORT | @@ -7484,25 +7888,23 @@ i40e_parse_input_set(uint64_t *inset, * and vice versa */ static uint64_t -i40e_translate_input_set_reg(uint64_t input) +i40e_translate_input_set_reg(enum i40e_mac_type type, uint64_t input) { uint64_t val = 0; uint16_t i; - static const struct { + struct inset_map { uint64_t inset; uint64_t inset_reg; - } inset_map[] = { + }; + + static const struct inset_map inset_map_common[] = { {I40E_INSET_DMAC, I40E_REG_INSET_L2_DMAC}, {I40E_INSET_SMAC, I40E_REG_INSET_L2_SMAC}, {I40E_INSET_VLAN_OUTER, I40E_REG_INSET_L2_OUTER_VLAN}, {I40E_INSET_VLAN_INNER, I40E_REG_INSET_L2_INNER_VLAN}, {I40E_INSET_LAST_ETHER_TYPE, I40E_REG_INSET_LAST_ETHER_TYPE}, - {I40E_INSET_IPV4_SRC, I40E_REG_INSET_L3_SRC_IP4}, - {I40E_INSET_IPV4_DST, I40E_REG_INSET_L3_DST_IP4}, {I40E_INSET_IPV4_TOS, I40E_REG_INSET_L3_IP4_TOS}, - {I40E_INSET_IPV4_PROTO, I40E_REG_INSET_L3_IP4_PROTO}, - {I40E_INSET_IPV4_TTL, I40E_REG_INSET_L3_IP4_TTL}, {I40E_INSET_IPV6_SRC, I40E_REG_INSET_L3_SRC_IP6}, {I40E_INSET_IPV6_DST, I40E_REG_INSET_L3_DST_IP6}, {I40E_INSET_IPV6_TC, I40E_REG_INSET_L3_IP6_TC}, @@ -7531,13 +7933,40 @@ i40e_translate_input_set_reg(uint64_t input) {I40E_INSET_FLEX_PAYLOAD_W8, I40E_REG_INSET_FLEX_PAYLOAD_WORD8}, }; + /* some different registers map in x722*/ + static const struct inset_map inset_map_diff_x722[] = { + {I40E_INSET_IPV4_SRC, I40E_X722_REG_INSET_L3_SRC_IP4}, + {I40E_INSET_IPV4_DST, I40E_X722_REG_INSET_L3_DST_IP4}, + {I40E_INSET_IPV4_PROTO, I40E_X722_REG_INSET_L3_IP4_PROTO}, + {I40E_INSET_IPV4_TTL, I40E_X722_REG_INSET_L3_IP4_TTL}, + }; + + static const struct inset_map inset_map_diff_not_x722[] = { + {I40E_INSET_IPV4_SRC, I40E_REG_INSET_L3_SRC_IP4}, + {I40E_INSET_IPV4_DST, I40E_REG_INSET_L3_DST_IP4}, + {I40E_INSET_IPV4_PROTO, I40E_REG_INSET_L3_IP4_PROTO}, + {I40E_INSET_IPV4_TTL, I40E_REG_INSET_L3_IP4_TTL}, + }; + if (input == 0) return val; /* Translate input set to register aware inset */ - for (i = 0; i < RTE_DIM(inset_map); i++) { - if (input & inset_map[i].inset) - val |= inset_map[i].inset_reg; + if (type == I40E_MAC_X722) { + for (i = 0; i < RTE_DIM(inset_map_diff_x722); i++) { + if (input & inset_map_diff_x722[i].inset) + val |= inset_map_diff_x722[i].inset_reg; + } + } else { + for (i = 0; i < RTE_DIM(inset_map_diff_not_x722); i++) { + if (input & inset_map_diff_not_x722[i].inset) + val |= inset_map_diff_not_x722[i].inset_reg; + } + } + + for (i = 0; i < RTE_DIM(inset_map_common); i++) { + if (input & inset_map_common[i].inset) + val |= inset_map_common[i].inset_reg; } return val; @@ -7596,10 +8025,10 @@ i40e_check_write_reg(struct i40e_hw *hw, uint32_t addr, uint32_t val) { uint32_t reg = i40e_read_rx_ctl(hw, addr); - PMD_DRV_LOG(DEBUG, "[0x%08x] original: 0x%08x\n", addr, reg); + PMD_DRV_LOG(DEBUG, "[0x%08x] original: 0x%08x", addr, reg); if (reg != val) i40e_write_rx_ctl(hw, addr, val); - PMD_DRV_LOG(DEBUG, "[0x%08x] after: 0x%08x\n", addr, + PMD_DRV_LOG(DEBUG, "[0x%08x] after: 0x%08x", addr, (uint32_t)i40e_read_rx_ctl(hw, addr)); } @@ -7614,15 +8043,22 @@ i40e_filter_input_set_init(struct i40e_pf *pf) for (pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; pctype <= I40E_FILTER_PCTYPE_L2_PAYLOAD; pctype++) { - if (!I40E_VALID_PCTYPE(pctype)) - continue; + if (hw->mac.type == I40E_MAC_X722) { + if (!I40E_VALID_PCTYPE_X722(pctype)) + continue; + } else { + if (!I40E_VALID_PCTYPE(pctype)) + continue; + } + input_set = i40e_get_default_input_set(pctype); num = i40e_generate_inset_mask_reg(input_set, mask_reg, I40E_INSET_MASK_NUM_REG); if (num < 0) return; - inset_reg = i40e_translate_input_set_reg(input_set); + inset_reg = i40e_translate_input_set_reg(hw->mac.type, + input_set); i40e_check_write_reg(hw, I40E_PRTQF_FD_INSET(pctype, 0), (uint32_t)(inset_reg & UINT32_MAX)); @@ -7680,7 +8116,15 @@ i40e_hash_filter_inset_select(struct i40e_hw *hw, PMD_DRV_LOG(ERR, "invalid flow_type input."); return -EINVAL; } - pctype = i40e_flowtype_to_pctype(conf->flow_type); + + if (hw->mac.type == I40E_MAC_X722) { + /* get translated pctype value in fd pctype register */ + pctype = (enum i40e_filter_pctype)i40e_read_rx_ctl(hw, + I40E_GLQF_FD_PCTYPES((int)i40e_flowtype_to_pctype( + conf->flow_type))); + } else + pctype = i40e_flowtype_to_pctype(conf->flow_type); + ret = i40e_parse_input_set(&input_set, pctype, conf->field, conf->inset_size); if (ret) { @@ -7704,7 +8148,7 @@ i40e_hash_filter_inset_select(struct i40e_hw *hw, if (num < 0) return -EINVAL; - inset_reg |= i40e_translate_input_set_reg(input_set); + inset_reg |= i40e_translate_input_set_reg(hw->mac.type, input_set); i40e_check_write_reg(hw, I40E_GLQF_HASH_INSET(0, pctype), (uint32_t)(inset_reg & UINT32_MAX)); @@ -7749,7 +8193,9 @@ i40e_fdir_filter_inset_select(struct i40e_pf *pf, PMD_DRV_LOG(ERR, "invalid flow_type input."); return -EINVAL; } + pctype = i40e_flowtype_to_pctype(conf->flow_type); + ret = i40e_parse_input_set(&input_set, pctype, conf->field, conf->inset_size); if (ret) { @@ -7780,7 +8226,7 @@ i40e_fdir_filter_inset_select(struct i40e_pf *pf, if (num < 0) return -EINVAL; - inset_reg |= i40e_translate_input_set_reg(input_set); + inset_reg |= i40e_translate_input_set_reg(hw->mac.type, input_set); i40e_check_write_reg(hw, I40E_PRTQF_FD_INSET(pctype, 0), (uint32_t)(inset_reg & UINT32_MAX)); @@ -7893,16 +8339,95 @@ i40e_hash_filter_ctrl(struct rte_eth_dev *dev, return ret; } +/* Convert ethertype filter structure */ +static int +i40e_ethertype_filter_convert(const struct rte_eth_ethertype_filter *input, + struct i40e_ethertype_filter *filter) +{ + rte_memcpy(&filter->input.mac_addr, &input->mac_addr, ETHER_ADDR_LEN); + filter->input.ether_type = input->ether_type; + filter->flags = input->flags; + filter->queue = input->queue; + + return 0; +} + +/* Check if there exists the ehtertype filter */ +struct i40e_ethertype_filter * +i40e_sw_ethertype_filter_lookup(struct i40e_ethertype_rule *ethertype_rule, + const struct i40e_ethertype_filter_input *input) +{ + int ret; + + ret = rte_hash_lookup(ethertype_rule->hash_table, (const void *)input); + if (ret < 0) + return NULL; + + return ethertype_rule->hash_map[ret]; +} + +/* Add ethertype filter in SW list */ +static int +i40e_sw_ethertype_filter_insert(struct i40e_pf *pf, + struct i40e_ethertype_filter *filter) +{ + struct i40e_ethertype_rule *rule = &pf->ethertype; + int ret; + + ret = rte_hash_add_key(rule->hash_table, &filter->input); + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to insert ethertype filter" + " to hash table %d!", + ret); + return ret; + } + rule->hash_map[ret] = filter; + + TAILQ_INSERT_TAIL(&rule->ethertype_list, filter, rules); + + return 0; +} + +/* Delete ethertype filter in SW list */ +int +i40e_sw_ethertype_filter_del(struct i40e_pf *pf, + struct i40e_ethertype_filter_input *input) +{ + struct i40e_ethertype_rule *rule = &pf->ethertype; + struct i40e_ethertype_filter *filter; + int ret; + + ret = rte_hash_del_key(rule->hash_table, input); + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to delete ethertype filter" + " to hash table %d!", + ret); + return ret; + } + filter = rule->hash_map[ret]; + rule->hash_map[ret] = NULL; + + TAILQ_REMOVE(&rule->ethertype_list, filter, rules); + rte_free(filter); + + return 0; +} + /* * Configure ethertype filter, which can director packet by filtering * with mac address and ether_type or only ether_type */ -static int +int i40e_ethertype_filter_set(struct i40e_pf *pf, struct rte_eth_ethertype_filter *filter, bool add) { struct i40e_hw *hw = I40E_PF_TO_HW(pf); + struct i40e_ethertype_rule *ethertype_rule = &pf->ethertype; + struct i40e_ethertype_filter *ethertype_filter, *node; + struct i40e_ethertype_filter check_filter; struct i40e_control_filter_stats stats; uint16_t flags = 0; int ret; @@ -7913,13 +8438,29 @@ i40e_ethertype_filter_set(struct i40e_pf *pf, } if (filter->ether_type == ETHER_TYPE_IPv4 || filter->ether_type == ETHER_TYPE_IPv6) { - PMD_DRV_LOG(ERR, "unsupported ether_type(0x%04x) in" - " control packet filter.", filter->ether_type); + PMD_DRV_LOG(ERR, + "unsupported ether_type(0x%04x) in control packet filter.", + filter->ether_type); return -EINVAL; } if (filter->ether_type == ETHER_TYPE_VLAN) - PMD_DRV_LOG(WARNING, "filter vlan ether_type in first tag is" - " not supported."); + PMD_DRV_LOG(WARNING, + "filter vlan ether_type in first tag is not supported."); + + /* Check if there is the filter in SW list */ + memset(&check_filter, 0, sizeof(check_filter)); + i40e_ethertype_filter_convert(filter, &check_filter); + node = i40e_sw_ethertype_filter_lookup(ethertype_rule, + &check_filter.input); + if (add && node) { + PMD_DRV_LOG(ERR, "Conflict with existing ethertype rules!"); + return -EINVAL; + } + + if (!add && !node) { + PMD_DRV_LOG(ERR, "There's no corresponding ethertype filter!"); + return -EINVAL; + } if (!(filter->flags & RTE_ETHTYPE_FLAGS_MAC)) flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC; @@ -7934,14 +8475,25 @@ i40e_ethertype_filter_set(struct i40e_pf *pf, pf->main_vsi->seid, filter->queue, add, &stats, NULL); - PMD_DRV_LOG(INFO, "add/rem control packet filter, return %d," - " mac_etype_used = %u, etype_used = %u," - " mac_etype_free = %u, etype_free = %u\n", - ret, stats.mac_etype_used, stats.etype_used, - stats.mac_etype_free, stats.etype_free); + PMD_DRV_LOG(INFO, + "add/rem control packet filter, return %d, mac_etype_used = %u, etype_used = %u, mac_etype_free = %u, etype_free = %u", + ret, stats.mac_etype_used, stats.etype_used, + stats.mac_etype_free, stats.etype_free); if (ret < 0) return -ENOSYS; - return 0; + + /* Add or delete a filter in SW list */ + if (add) { + ethertype_filter = rte_zmalloc("ethertype_filter", + sizeof(*ethertype_filter), 0); + rte_memcpy(ethertype_filter, &check_filter, + sizeof(check_filter)); + ret = i40e_sw_ethertype_filter_insert(pf, ethertype_filter); + } else { + ret = i40e_sw_ethertype_filter_del(pf, &node->input); + } + + return ret; } /* @@ -7976,7 +8528,7 @@ i40e_ethertype_filter_handle(struct rte_eth_dev *dev, FALSE); break; default: - PMD_DRV_LOG(ERR, "unsupported operation %u\n", filter_op); + PMD_DRV_LOG(ERR, "unsupported operation %u", filter_op); ret = -ENOSYS; break; } @@ -8014,6 +8566,11 @@ i40e_dev_filter_ctrl(struct rte_eth_dev *dev, case RTE_ETH_FILTER_FDIR: ret = i40e_fdir_ctrl_func(dev, filter_op, arg); break; + case RTE_ETH_FILTER_GENERIC: + if (filter_op != RTE_ETH_FILTER_GET) + return -EINVAL; + *(const void **)arg = &i40e_flow_ops; + break; default: PMD_DRV_LOG(WARNING, "Filter type (%d) not supported", filter_type); @@ -8031,10 +8588,11 @@ i40e_dev_filter_ctrl(struct rte_eth_dev *dev, static void i40e_enable_extended_tag(struct rte_eth_dev *dev) { + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); uint32_t buf = 0; int ret; - ret = rte_eal_pci_read_config(dev->pci_dev, &buf, sizeof(buf), + ret = rte_eal_pci_read_config(pci_dev, &buf, sizeof(buf), PCI_DEV_CAP_REG); if (ret < 0) { PMD_DRV_LOG(ERR, "Failed to read PCI offset 0x%x", @@ -8047,7 +8605,7 @@ i40e_enable_extended_tag(struct rte_eth_dev *dev) } buf = 0; - ret = rte_eal_pci_read_config(dev->pci_dev, &buf, sizeof(buf), + ret = rte_eal_pci_read_config(pci_dev, &buf, sizeof(buf), PCI_DEV_CTRL_REG); if (ret < 0) { PMD_DRV_LOG(ERR, "Failed to read PCI offset 0x%x", @@ -8059,7 +8617,7 @@ i40e_enable_extended_tag(struct rte_eth_dev *dev) return; } buf |= PCI_DEV_CTRL_EXT_TAG_MASK; - ret = rte_eal_pci_write_config(dev->pci_dev, &buf, sizeof(buf), + ret = rte_eal_pci_write_config(pci_dev, &buf, sizeof(buf), PCI_DEV_CTRL_REG); if (ret < 0) { PMD_DRV_LOG(ERR, "Failed to write PCI offset 0x%x", @@ -8122,8 +8680,14 @@ i40e_pctype_to_flowtype(enum i40e_filter_pctype pctype) [I40E_FILTER_PCTYPE_FRAG_IPV4] = RTE_ETH_FLOW_FRAG_IPV4, [I40E_FILTER_PCTYPE_NONF_IPV4_UDP] = RTE_ETH_FLOW_NONFRAG_IPV4_UDP, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP] = + RTE_ETH_FLOW_NONFRAG_IPV4_UDP, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP] = + RTE_ETH_FLOW_NONFRAG_IPV4_UDP, [I40E_FILTER_PCTYPE_NONF_IPV4_TCP] = RTE_ETH_FLOW_NONFRAG_IPV4_TCP, + [I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK] = + RTE_ETH_FLOW_NONFRAG_IPV4_TCP, [I40E_FILTER_PCTYPE_NONF_IPV4_SCTP] = RTE_ETH_FLOW_NONFRAG_IPV4_SCTP, [I40E_FILTER_PCTYPE_NONF_IPV4_OTHER] = @@ -8131,8 +8695,14 @@ i40e_pctype_to_flowtype(enum i40e_filter_pctype pctype) [I40E_FILTER_PCTYPE_FRAG_IPV6] = RTE_ETH_FLOW_FRAG_IPV6, [I40E_FILTER_PCTYPE_NONF_IPV6_UDP] = RTE_ETH_FLOW_NONFRAG_IPV6_UDP, + [I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP] = + RTE_ETH_FLOW_NONFRAG_IPV6_UDP, + [I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP] = + RTE_ETH_FLOW_NONFRAG_IPV6_UDP, [I40E_FILTER_PCTYPE_NONF_IPV6_TCP] = RTE_ETH_FLOW_NONFRAG_IPV6_TCP, + [I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK] = + RTE_ETH_FLOW_NONFRAG_IPV6_TCP, [I40E_FILTER_PCTYPE_NONF_IPV6_SCTP] = RTE_ETH_FLOW_NONFRAG_IPV6_SCTP, [I40E_FILTER_PCTYPE_NONF_IPV6_OTHER] = @@ -8168,6 +8738,23 @@ i40e_pctype_to_flowtype(enum i40e_filter_pctype pctype) #define I40E_GL_SWR_PM_UP_THR_SF_VALUE 0x06060606 #define I40E_GL_SWR_PM_UP_THR 0x269FBC +static int +i40e_dev_sync_phy_type(struct i40e_hw *hw) +{ + enum i40e_status_code status; + struct i40e_aq_get_phy_abilities_resp phy_ab; + int ret = -ENOTSUP; + + status = i40e_aq_get_phy_capabilities(hw, false, true, &phy_ab, + NULL); + + if (status) + return ret; + + return 0; +} + + static void i40e_configure_registers(struct i40e_hw *hw) { @@ -8185,7 +8772,8 @@ i40e_configure_registers(struct i40e_hw *hw) for (i = 0; i < RTE_DIM(reg_table); i++) { if (reg_table[i].addr == I40E_GL_SWR_PM_UP_THR) { - if (i40e_is_40G_device(hw->device_id)) /* For XL710 */ + if (I40E_PHY_TYPE_SUPPORT_40G(hw->phy.phy_types) || /* For XL710 */ + I40E_PHY_TYPE_SUPPORT_25G(hw->phy.phy_types)) /* For XXV710 */ reg_table[i].val = I40E_GL_SWR_PM_UP_THR_SF_VALUE; else /* For X710 */ @@ -8208,9 +8796,9 @@ i40e_configure_registers(struct i40e_hw *hw) ret = i40e_aq_debug_write_register(hw, reg_table[i].addr, reg_table[i].val, NULL); if (ret < 0) { - PMD_DRV_LOG(ERR, "Failed to write 0x%"PRIx64" to the " - "address of 0x%"PRIx32, reg_table[i].val, - reg_table[i].addr); + PMD_DRV_LOG(ERR, + "Failed to write 0x%"PRIx64" to the address of 0x%"PRIx32, + reg_table[i].val, reg_table[i].addr); break; } PMD_DRV_LOG(DEBUG, "Write 0x%"PRIx64" to the address of " @@ -8255,8 +8843,9 @@ i40e_config_qinq(struct i40e_hw *hw, struct i40e_vsi *vsi) I40E_VSI_L2TAGSTXVALID( vsi->vsi_id), reg, NULL); if (ret < 0) { - PMD_DRV_LOG(ERR, "Failed to update " - "VSI_L2TAGSTXVALID[%d]", vsi->vsi_id); + PMD_DRV_LOG(ERR, + "Failed to update VSI_L2TAGSTXVALID[%d]", + vsi->vsi_id); return I40E_ERR_CONFIG; } } @@ -8307,11 +8896,10 @@ i40e_aq_add_mirror_rule(struct i40e_hw *hw, rte_memcpy(&desc.params.raw, &cmd, sizeof(cmd)); status = i40e_asq_send_command(hw, &desc, entries, buff_len, NULL); - PMD_DRV_LOG(INFO, "i40e_aq_add_mirror_rule, aq_status %d," - "rule_id = %u" - " mirror_rules_used = %u, mirror_rules_free = %u,", - hw->aq.asq_last_status, resp->rule_id, - resp->mirror_rules_used, resp->mirror_rules_free); + PMD_DRV_LOG(INFO, + "i40e_aq_add_mirror_rule, aq_status %d, rule_id = %u mirror_rules_used = %u, mirror_rules_free = %u,", + hw->aq.asq_last_status, resp->rule_id, + resp->mirror_rules_used, resp->mirror_rules_free); *rule_id = rte_le_to_cpu_16(resp->rule_id); return status; @@ -8389,8 +8977,8 @@ i40e_mirror_rule_set(struct rte_eth_dev *dev, PMD_DRV_LOG(DEBUG, "i40e_mirror_rule_set: sw_id = %d.", sw_id); if (pf->main_vsi->veb == NULL || pf->vfs == NULL) { - PMD_DRV_LOG(ERR, "mirror rule can not be configured" - " without veb or vfs."); + PMD_DRV_LOG(ERR, + "mirror rule can not be configured without veb or vfs."); return -ENOSYS; } if (pf->nb_mirror_rule > I40E_MAX_MIRROR_RULES) { @@ -8422,9 +9010,9 @@ i40e_mirror_rule_set(struct rte_eth_dev *dev, mirr_rule->entries, mirr_rule->num_entries, mirr_rule->id); if (ret < 0) { - PMD_DRV_LOG(ERR, "failed to remove mirror rule:" - " ret = %d, aq_err = %d.", - ret, hw->aq.asq_last_status); + PMD_DRV_LOG(ERR, + "failed to remove mirror rule: ret = %d, aq_err = %d.", + ret, hw->aq.asq_last_status); return -ENOSYS; } TAILQ_REMOVE(&pf->mirror_list, mirr_rule, rules); @@ -8513,9 +9101,9 @@ i40e_mirror_rule_set(struct rte_eth_dev *dev, mirr_rule->rule_type, mirr_rule->entries, j, &rule_id); if (ret < 0) { - PMD_DRV_LOG(ERR, "failed to add mirror rule:" - " ret = %d, aq_err = %d.", - ret, hw->aq.asq_last_status); + PMD_DRV_LOG(ERR, + "failed to add mirror rule: ret = %d, aq_err = %d.", + ret, hw->aq.asq_last_status); rte_free(mirr_rule); return -ENOSYS; } @@ -8567,9 +9155,9 @@ i40e_mirror_rule_reset(struct rte_eth_dev *dev, uint8_t sw_id) mirr_rule->entries, mirr_rule->num_entries, mirr_rule->id); if (ret < 0) { - PMD_DRV_LOG(ERR, "failed to remove mirror rule:" - " status = %d, aq_err = %d.", - ret, hw->aq.asq_last_status); + PMD_DRV_LOG(ERR, + "failed to remove mirror rule: status = %d, aq_err = %d.", + ret, hw->aq.asq_last_status); return -ENOSYS; } TAILQ_REMOVE(&pf->mirror_list, mirr_rule, rules); @@ -9001,9 +9589,9 @@ i40e_config_switch_comp_tc(struct i40e_veb *veb, uint8_t tc_map) ret = i40e_aq_config_switch_comp_bw_config(hw, veb->seid, &veb_bw, NULL); if (ret) { - PMD_INIT_LOG(ERR, "AQ command Config switch_comp BW allocation" - " per TC failed = %d", - hw->aq.asq_last_status); + PMD_INIT_LOG(ERR, + "AQ command Config switch_comp BW allocation per TC failed = %d", + hw->aq.asq_last_status); return ret; } @@ -9011,16 +9599,18 @@ i40e_config_switch_comp_tc(struct i40e_veb *veb, uint8_t tc_map) ret = i40e_aq_query_switch_comp_ets_config(hw, veb->seid, &ets_query, NULL); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to get switch_comp ETS" - " configuration %u", hw->aq.asq_last_status); + PMD_DRV_LOG(ERR, + "Failed to get switch_comp ETS configuration %u", + hw->aq.asq_last_status); return ret; } memset(&bw_query, 0, sizeof(bw_query)); ret = i40e_aq_query_switch_comp_bw_config(hw, veb->seid, &bw_query, NULL); if (ret != I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to get switch_comp bandwidth" - " configuration %u", hw->aq.asq_last_status); + PMD_DRV_LOG(ERR, + "Failed to get switch_comp bandwidth configuration %u", + hw->aq.asq_last_status); return ret; } @@ -9085,8 +9675,8 @@ i40e_vsi_config_tc(struct i40e_vsi *vsi, uint8_t tc_map) } ret = i40e_aq_config_vsi_tc_bw(hw, vsi->seid, &bw_data, NULL); if (ret) { - PMD_INIT_LOG(ERR, "AQ command Config VSI BW allocation" - " per TC failed = %d", + PMD_INIT_LOG(ERR, + "AQ command Config VSI BW allocation per TC failed = %d", hw->aq.asq_last_status); goto out; } @@ -9107,9 +9697,8 @@ i40e_vsi_config_tc(struct i40e_vsi *vsi, uint8_t tc_map) /* Update the VSI after updating the VSI queue-mapping information */ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) { - PMD_INIT_LOG(ERR, "Failed to configure " - "TC queue mapping = %d", - hw->aq.asq_last_status); + PMD_INIT_LOG(ERR, "Failed to configure TC queue mapping = %d", + hw->aq.asq_last_status); goto out; } /* update the local VSI info with updated queue map */ @@ -9161,8 +9750,8 @@ i40e_dcb_hw_configure(struct i40e_pf *pf, /* Use the FW API if FW > v4.4*/ if (!(((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver >= 4)) || (hw->aq.fw_maj_ver >= 5))) { - PMD_INIT_LOG(ERR, "FW < v4.4, can not use FW LLDP API" - " to configure DCB"); + PMD_INIT_LOG(ERR, + "FW < v4.4, can not use FW LLDP API to configure DCB"); return I40E_ERR_FIRMWARE_API_VERSION; } @@ -9177,8 +9766,7 @@ i40e_dcb_hw_configure(struct i40e_pf *pf, old_cfg->etsrec = old_cfg->etscfg; ret = i40e_set_dcb_config(hw); if (ret) { - PMD_INIT_LOG(ERR, - "Set DCB Config failed, err %s aq_err %s\n", + PMD_INIT_LOG(ERR, "Set DCB Config failed, err %s aq_err %s", i40e_stat_str(hw, ret), i40e_aq_str(hw, hw->aq.asq_last_status)); return ret; @@ -9210,7 +9798,7 @@ i40e_dcb_hw_configure(struct i40e_pf *pf, ret = i40e_config_switch_comp_tc(main_vsi->veb, tc_map); if (ret) PMD_INIT_LOG(WARNING, - "Failed configuring TC for VEB seid=%d\n", + "Failed configuring TC for VEB seid=%d", main_vsi->veb->seid); } /* Update each VSI */ @@ -9228,8 +9816,8 @@ i40e_dcb_hw_configure(struct i40e_pf *pf, I40E_DEFAULT_TCMAP); if (ret) PMD_INIT_LOG(WARNING, - "Failed configuring TC for VSI seid=%d\n", - vsi_list->vsi->seid); + "Failed configuring TC for VSI seid=%d", + vsi_list->vsi->seid); /* continue */ } } @@ -9243,7 +9831,6 @@ i40e_dcb_hw_configure(struct i40e_pf *pf, * * Returns 0 on success, negative value on failure */ -//TREX_PATCH - changed all ERR to INFO in below func static int i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb) { @@ -9252,7 +9839,7 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb) int ret = 0; if ((pf->flags & I40E_FLAG_DCB) == 0) { - PMD_INIT_LOG(INFO, "HW doesn't support DCB"); + PMD_INIT_LOG(ERR, "HW doesn't support DCB"); return -ENOTSUP; } @@ -9261,29 +9848,21 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb) * LLDP MIB change event. */ if (sw_dcb == TRUE) { - ret = i40e_aq_stop_lldp(hw, TRUE, NULL); - if (ret != I40E_SUCCESS) - PMD_INIT_LOG(DEBUG, "Failed to stop lldp"); - ret = i40e_init_dcb(hw); - /* if sw_dcb, lldp agent is stopped, the return from + /* If lldp agent is stopped, the return value from * i40e_init_dcb we expect is failure with I40E_AQ_RC_EPERM - * adminq status. + * adminq status. Otherwise, it should return success. */ - if (ret != I40E_SUCCESS && - hw->aq.asq_last_status == I40E_AQ_RC_EPERM) { + if ((ret == I40E_SUCCESS) || (ret != I40E_SUCCESS && + hw->aq.asq_last_status == I40E_AQ_RC_EPERM)) { memset(&hw->local_dcbx_config, 0, sizeof(struct i40e_dcbx_config)); /* set dcb default configuration */ hw->local_dcbx_config.etscfg.willing = 0; hw->local_dcbx_config.etscfg.maxtcs = 0; hw->local_dcbx_config.etscfg.tcbwtable[0] = 100; - hw->local_dcbx_config.etscfg.tsatable[0] = I40E_IEEE_TSA_ETS; -#ifdef TREX_PATCH_LOW_LATENCY - hw->local_dcbx_config.etscfg.tcbwtable[1] = 0; - hw->local_dcbx_config.etscfg.tsatable[1] = I40E_IEEE_TSA_STRICT; - hw->local_dcbx_config.etscfg.prioritytable[1] = 1; -#endif + hw->local_dcbx_config.etscfg.tsatable[0] = + I40E_IEEE_TSA_ETS; hw->local_dcbx_config.etsrec = hw->local_dcbx_config.etscfg; hw->local_dcbx_config.pfc.willing = 0; @@ -9298,22 +9877,15 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb) I40E_APP_PROTOID_FCOE; ret = i40e_set_dcb_config(hw); if (ret) { - PMD_INIT_LOG(INFO, "default dcb config fails." - " err = %d, aq_err = %d.", ret, - hw->aq.asq_last_status); + PMD_INIT_LOG(ERR, + "default dcb config fails. err = %d, aq_err = %d.", + ret, hw->aq.asq_last_status); return -ENOSYS; } -#ifdef TREX_PATCH_LOW_LATENCY - if (i40e_vsi_update_tc_bandwidth_ex(pf->main_vsi) != - I40E_SUCCESS) { - PMD_DRV_LOG(ERR, "Failed to update TC bandwidth"); - return -ENOSYS; - } -#endif } else { - PMD_INIT_LOG(INFO, "DCBX configuration failed, err = %d," - " aq_err = %d.", ret, - hw->aq.asq_last_status); + PMD_INIT_LOG(ERR, + "DCB initialization in FW fails, err = %d, aq_err = %d.", + ret, hw->aq.asq_last_status); return -ENOTSUP; } } else { @@ -9324,14 +9896,14 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb) ret = i40e_init_dcb(hw); if (!ret) { if (hw->dcbx_status == I40E_DCBX_STATUS_DISABLED) { - PMD_INIT_LOG(INFO, "HW doesn't support" - " DCBX offload."); + PMD_INIT_LOG(ERR, + "HW doesn't support DCBX offload."); return -ENOTSUP; } } else { - PMD_INIT_LOG(INFO, "DCBX configuration failed, err = %d," - " aq_err = %d.", ret, - hw->aq.asq_last_status); + PMD_INIT_LOG(ERR, + "DCBX configuration failed, err = %d, aq_err = %d.", + ret, hw->aq.asq_last_status); return -ENOTSUP; } } @@ -9440,7 +10012,8 @@ i40e_dev_get_dcb_info(struct rte_eth_dev *dev, static int i40e_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint16_t interval = i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL); @@ -9465,7 +10038,7 @@ i40e_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT)); I40E_WRITE_FLUSH(hw); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(&pci_dev->intr_handle); return 0; } @@ -9473,7 +10046,8 @@ i40e_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) static int i40e_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint16_t msix_intr; @@ -9605,8 +10179,7 @@ i40e_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) /* mtu setting is forbidden if port is start */ if (dev_data->dev_started) { - PMD_DRV_LOG(ERR, - "port %d must be stopped before configuration\n", + PMD_DRV_LOG(ERR, "port %d must be stopped before configuration", dev_data->port_id); return -EBUSY; } @@ -9620,3 +10193,997 @@ i40e_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) return ret; } + +/* Restore ethertype filter */ +static void +i40e_ethertype_filter_restore(struct i40e_pf *pf) +{ + struct i40e_hw *hw = I40E_PF_TO_HW(pf); + struct i40e_ethertype_filter_list + *ethertype_list = &pf->ethertype.ethertype_list; + struct i40e_ethertype_filter *f; + struct i40e_control_filter_stats stats; + uint16_t flags; + + TAILQ_FOREACH(f, ethertype_list, rules) { + flags = 0; + if (!(f->flags & RTE_ETHTYPE_FLAGS_MAC)) + flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC; + if (f->flags & RTE_ETHTYPE_FLAGS_DROP) + flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP; + flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TO_QUEUE; + + memset(&stats, 0, sizeof(stats)); + i40e_aq_add_rem_control_packet_filter(hw, + f->input.mac_addr.addr_bytes, + f->input.ether_type, + flags, pf->main_vsi->seid, + f->queue, 1, &stats, NULL); + } + PMD_DRV_LOG(INFO, "Ethertype filter:" + " mac_etype_used = %u, etype_used = %u," + " mac_etype_free = %u, etype_free = %u", + stats.mac_etype_used, stats.etype_used, + stats.mac_etype_free, stats.etype_free); +} + +/* Restore tunnel filter */ +static void +i40e_tunnel_filter_restore(struct i40e_pf *pf) +{ + struct i40e_hw *hw = I40E_PF_TO_HW(pf); + struct i40e_vsi *vsi = pf->main_vsi; + struct i40e_tunnel_filter_list + *tunnel_list = &pf->tunnel.tunnel_list; + struct i40e_tunnel_filter *f; + struct i40e_aqc_add_remove_cloud_filters_element_data cld_filter; + + TAILQ_FOREACH(f, tunnel_list, rules) { + memset(&cld_filter, 0, sizeof(cld_filter)); + rte_memcpy(&cld_filter, &f->input, sizeof(f->input)); + cld_filter.queue_number = f->queue; + i40e_aq_add_cloud_filters(hw, vsi->seid, &cld_filter, 1); + } +} + +static void +i40e_filter_restore(struct i40e_pf *pf) +{ + i40e_ethertype_filter_restore(pf); + i40e_tunnel_filter_restore(pf); + i40e_fdir_filter_restore(pf); +} + +static int +is_i40e_pmd(const char *driver_name) +{ + if (!strstr(driver_name, "i40e")) + return -ENOTSUP; + + if (strstr(driver_name, "i40e_vf")) + return -ENOTSUP; + + return 0; +} + +int +rte_pmd_i40e_ping_vfs(uint8_t port, uint16_t vf) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid argument."); + return -EINVAL; + } + + i40e_notify_vf_link_status(dev, &pf->vfs[vf]); + + return 0; +} + +int +rte_pmd_i40e_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf_id, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + struct i40e_hw *hw; + struct i40e_vsi_context ctxt; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid argument."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + /* Check if it has been already on or off */ + if (vsi->info.valid_sections & + rte_cpu_to_le_16(I40E_AQ_VSI_PROP_SECURITY_VALID)) { + if (on) { + if ((vsi->info.sec_flags & + I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK) == + I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK) + return 0; /* already on */ + } else { + if ((vsi->info.sec_flags & + I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK) == 0) + return 0; /* already off */ + } + } + + vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID); + if (on) + vsi->info.sec_flags |= I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK; + else + vsi->info.sec_flags &= ~I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK; + + memset(&ctxt, 0, sizeof(ctxt)); + (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info)); + ctxt.seid = vsi->seid; + + hw = I40E_VSI_TO_HW(vsi); + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to update VSI params"); + } + + return ret; +} + +static int +i40e_add_rm_all_vlan_filter(struct i40e_vsi *vsi, uint8_t add) +{ + uint32_t j, k; + uint16_t vlan_id; + struct i40e_hw *hw = I40E_VSI_TO_HW(vsi); + struct i40e_aqc_add_remove_vlan_element_data vlan_data = {0}; + int ret; + + for (j = 0; j < I40E_VFTA_SIZE; j++) { + if (!vsi->vfta[j]) + continue; + + for (k = 0; k < I40E_UINT32_BIT_SIZE; k++) { + if (!(vsi->vfta[j] & (1 << k))) + continue; + + vlan_id = j * I40E_UINT32_BIT_SIZE + k; + if (!vlan_id) + continue; + + vlan_data.vlan_tag = rte_cpu_to_le_16(vlan_id); + if (add) + ret = i40e_aq_add_vlan(hw, vsi->seid, + &vlan_data, 1, NULL); + else + ret = i40e_aq_remove_vlan(hw, vsi->seid, + &vlan_data, 1, NULL); + if (ret != I40E_SUCCESS) { + PMD_DRV_LOG(ERR, + "Failed to add/rm vlan filter"); + return ret; + } + } + } + + return I40E_SUCCESS; +} + +int +rte_pmd_i40e_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf_id, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + struct i40e_hw *hw; + struct i40e_vsi_context ctxt; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid argument."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + /* Check if it has been already on or off */ + if (vsi->vlan_anti_spoof_on == on) + return 0; /* already on or off */ + + vsi->vlan_anti_spoof_on = on; + ret = i40e_add_rm_all_vlan_filter(vsi, on); + if (ret) { + PMD_DRV_LOG(ERR, "Failed to remove VLAN filters."); + return -ENOTSUP; + } + + vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID); + if (on) + vsi->info.sec_flags |= I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK; + else + vsi->info.sec_flags &= ~I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK; + + memset(&ctxt, 0, sizeof(ctxt)); + (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info)); + ctxt.seid = vsi->seid; + + hw = I40E_VSI_TO_HW(vsi); + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to update VSI params"); + } + + return ret; +} + +static int +i40e_vsi_rm_mac_filter(struct i40e_vsi *vsi) +{ + struct i40e_mac_filter *f; + struct i40e_macvlan_filter *mv_f; + int i, vlan_num; + enum rte_mac_filter_type filter_type; + int ret = I40E_SUCCESS; + void *temp; + + /* remove all the MACs */ + TAILQ_FOREACH_SAFE(f, &vsi->mac_list, next, temp) { + vlan_num = vsi->vlan_num; + filter_type = f->mac_info.filter_type; + if (filter_type == RTE_MACVLAN_PERFECT_MATCH || + filter_type == RTE_MACVLAN_HASH_MATCH) { + if (vlan_num == 0) { + PMD_DRV_LOG(ERR, "VLAN number shouldn't be 0"); + return I40E_ERR_PARAM; + } + } else if (filter_type == RTE_MAC_PERFECT_MATCH || + filter_type == RTE_MAC_HASH_MATCH) + vlan_num = 1; + + mv_f = rte_zmalloc("macvlan_data", vlan_num * sizeof(*mv_f), 0); + if (!mv_f) { + PMD_DRV_LOG(ERR, "failed to allocate memory"); + return I40E_ERR_NO_MEMORY; + } + + for (i = 0; i < vlan_num; i++) { + mv_f[i].filter_type = filter_type; + (void)rte_memcpy(&mv_f[i].macaddr, + &f->mac_info.mac_addr, + ETH_ADDR_LEN); + } + if (filter_type == RTE_MACVLAN_PERFECT_MATCH || + filter_type == RTE_MACVLAN_HASH_MATCH) { + ret = i40e_find_all_vlan_for_mac(vsi, mv_f, vlan_num, + &f->mac_info.mac_addr); + if (ret != I40E_SUCCESS) { + rte_free(mv_f); + return ret; + } + } + + ret = i40e_remove_macvlan_filters(vsi, mv_f, vlan_num); + if (ret != I40E_SUCCESS) { + rte_free(mv_f); + return ret; + } + + rte_free(mv_f); + ret = I40E_SUCCESS; + } + + return ret; +} + +static int +i40e_vsi_restore_mac_filter(struct i40e_vsi *vsi) +{ + struct i40e_mac_filter *f; + struct i40e_macvlan_filter *mv_f; + int i, vlan_num = 0; + int ret = I40E_SUCCESS; + void *temp; + + /* restore all the MACs */ + TAILQ_FOREACH_SAFE(f, &vsi->mac_list, next, temp) { + if ((f->mac_info.filter_type == RTE_MACVLAN_PERFECT_MATCH) || + (f->mac_info.filter_type == RTE_MACVLAN_HASH_MATCH)) { + /** + * If vlan_num is 0, that's the first time to add mac, + * set mask for vlan_id 0. + */ + if (vsi->vlan_num == 0) { + i40e_set_vlan_filter(vsi, 0, 1); + vsi->vlan_num = 1; + } + vlan_num = vsi->vlan_num; + } else if ((f->mac_info.filter_type == RTE_MAC_PERFECT_MATCH) || + (f->mac_info.filter_type == RTE_MAC_HASH_MATCH)) + vlan_num = 1; + + mv_f = rte_zmalloc("macvlan_data", vlan_num * sizeof(*mv_f), 0); + if (!mv_f) { + PMD_DRV_LOG(ERR, "failed to allocate memory"); + return I40E_ERR_NO_MEMORY; + } + + for (i = 0; i < vlan_num; i++) { + mv_f[i].filter_type = f->mac_info.filter_type; + (void)rte_memcpy(&mv_f[i].macaddr, + &f->mac_info.mac_addr, + ETH_ADDR_LEN); + } + + if (f->mac_info.filter_type == RTE_MACVLAN_PERFECT_MATCH || + f->mac_info.filter_type == RTE_MACVLAN_HASH_MATCH) { + ret = i40e_find_all_vlan_for_mac(vsi, mv_f, vlan_num, + &f->mac_info.mac_addr); + if (ret != I40E_SUCCESS) { + rte_free(mv_f); + return ret; + } + } + + ret = i40e_add_macvlan_filters(vsi, mv_f, vlan_num); + if (ret != I40E_SUCCESS) { + rte_free(mv_f); + return ret; + } + + rte_free(mv_f); + ret = I40E_SUCCESS; + } + + return ret; +} + +static int +i40e_vsi_set_tx_loopback(struct i40e_vsi *vsi, uint8_t on) +{ + struct i40e_vsi_context ctxt; + struct i40e_hw *hw; + int ret; + + if (!vsi) + return -EINVAL; + + hw = I40E_VSI_TO_HW(vsi); + + /* Use the FW API if FW >= v5.0 */ + if (hw->aq.fw_maj_ver < 5) { + PMD_INIT_LOG(ERR, "FW < v5.0, cannot enable loopback"); + return -ENOTSUP; + } + + /* Check if it has been already on or off */ + if (vsi->info.valid_sections & + rte_cpu_to_le_16(I40E_AQ_VSI_PROP_SWITCH_VALID)) { + if (on) { + if ((vsi->info.switch_id & + I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB) == + I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB) + return 0; /* already on */ + } else { + if ((vsi->info.switch_id & + I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB) == 0) + return 0; /* already off */ + } + } + + /* remove all the MAC and VLAN first */ + ret = i40e_vsi_rm_mac_filter(vsi); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to remove MAC filters."); + return ret; + } + if (vsi->vlan_anti_spoof_on) { + ret = i40e_add_rm_all_vlan_filter(vsi, 0); + if (ret) { + PMD_INIT_LOG(ERR, "Failed to remove VLAN filters."); + return ret; + } + } + + vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID); + if (on) + vsi->info.switch_id |= I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB; + else + vsi->info.switch_id &= ~I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB; + + memset(&ctxt, 0, sizeof(ctxt)); + (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info)); + ctxt.seid = vsi->seid; + + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret != I40E_SUCCESS) { + PMD_DRV_LOG(ERR, "Failed to update VSI params"); + return ret; + } + + /* add all the MAC and VLAN back */ + ret = i40e_vsi_restore_mac_filter(vsi); + if (ret) + return ret; + if (vsi->vlan_anti_spoof_on) { + ret = i40e_add_rm_all_vlan_filter(vsi, 1); + if (ret) + return ret; + } + + return ret; +} + +int +rte_pmd_i40e_set_tx_loopback(uint8_t port, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_pf_vf *vf; + struct i40e_vsi *vsi; + uint16_t vf_id; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + /* setup PF TX loopback */ + vsi = pf->main_vsi; + ret = i40e_vsi_set_tx_loopback(vsi, on); + if (ret) + return -ENOTSUP; + + /* setup TX loopback for all the VFs */ + if (!pf->vfs) { + /* if no VF, do nothing. */ + return 0; + } + + for (vf_id = 0; vf_id < pf->vf_num; vf_id++) { + vf = &pf->vfs[vf_id]; + vsi = vf->vsi; + + ret = i40e_vsi_set_tx_loopback(vsi, on); + if (ret) + return -ENOTSUP; + } + + return ret; +} + +int +rte_pmd_i40e_set_vf_unicast_promisc(uint8_t port, uint16_t vf_id, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + struct i40e_hw *hw; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid argument."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + hw = I40E_VSI_TO_HW(vsi); + + ret = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, + on, NULL, true); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to set unicast promiscuous mode"); + } + + return ret; +} + +int +rte_pmd_i40e_set_vf_multicast_promisc(uint8_t port, uint16_t vf_id, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + struct i40e_hw *hw; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid argument."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + hw = I40E_VSI_TO_HW(vsi); + + ret = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, + on, NULL); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to set multicast promiscuous mode"); + } + + return ret; +} + +int +rte_pmd_i40e_set_vf_mac_addr(uint8_t port, uint16_t vf_id, + struct ether_addr *mac_addr) +{ + struct i40e_mac_filter *f; + struct rte_eth_dev *dev; + struct i40e_pf_vf *vf; + struct i40e_vsi *vsi; + struct i40e_pf *pf; + void *temp; + + if (i40e_validate_mac_addr((u8 *)mac_addr) != I40E_SUCCESS) + return -EINVAL; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) + return -EINVAL; + + vf = &pf->vfs[vf_id]; + vsi = vf->vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + ether_addr_copy(mac_addr, &vf->mac_addr); + + /* Remove all existing mac */ + TAILQ_FOREACH_SAFE(f, &vsi->mac_list, next, temp) + i40e_vsi_delete_mac(vsi, &f->mac_info.mac_addr); + + return 0; +} + +/* Set vlan strip on/off for specific VF from host */ +int +rte_pmd_i40e_set_vf_vlan_stripq(uint8_t port, uint16_t vf_id, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid argument."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + + if (!vsi) + return -EINVAL; + + ret = i40e_vsi_config_vlan_stripping(vsi, !!on); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to set VLAN stripping!"); + } + + return ret; +} + +int rte_pmd_i40e_set_vf_vlan_insert(uint8_t port, uint16_t vf_id, + uint16_t vlan_id) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_hw *hw; + struct i40e_vsi *vsi; + struct i40e_vsi_context ctxt; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + if (vlan_id > ETHER_MAX_VLAN_ID) { + PMD_DRV_LOG(ERR, "Invalid VLAN ID."); + return -EINVAL; + } + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + hw = I40E_PF_TO_HW(pf); + + /** + * return -ENODEV if SRIOV not enabled, VF number not configured + * or no queue assigned. + */ + if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 || + pf->vf_nb_qps == 0) + return -ENODEV; + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid VF ID."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID); + vsi->info.pvid = vlan_id; + if (vlan_id > 0) + vsi->info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_INSERT_PVID; + else + vsi->info.port_vlan_flags &= ~I40E_AQ_VSI_PVLAN_INSERT_PVID; + + memset(&ctxt, 0, sizeof(ctxt)); + (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info)); + ctxt.seid = vsi->seid; + + hw = I40E_VSI_TO_HW(vsi); + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to update VSI params"); + } + + return ret; +} + +int rte_pmd_i40e_set_vf_broadcast(uint8_t port, uint16_t vf_id, + uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + struct i40e_hw *hw; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + if (on > 1) { + PMD_DRV_LOG(ERR, "on should be 0 or 1."); + return -EINVAL; + } + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + hw = I40E_PF_TO_HW(pf); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid VF ID."); + return -EINVAL; + } + + /** + * return -ENODEV if SRIOV not enabled, VF number not configured + * or no queue assigned. + */ + if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 || + pf->vf_nb_qps == 0) { + PMD_DRV_LOG(ERR, "SRIOV is not enabled or no queue."); + return -ENODEV; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + hw = I40E_VSI_TO_HW(vsi); + + ret = i40e_aq_set_vsi_broadcast(hw, vsi->seid, on, NULL); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to set VSI broadcast"); + } + + return ret; +} + +int rte_pmd_i40e_set_vf_vlan_tag(uint8_t port, uint16_t vf_id, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_hw *hw; + struct i40e_vsi *vsi; + struct i40e_vsi_context ctxt; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + if (on > 1) { + PMD_DRV_LOG(ERR, "on should be 0 or 1."); + return -EINVAL; + } + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + hw = I40E_PF_TO_HW(pf); + + /** + * return -ENODEV if SRIOV not enabled, VF number not configured + * or no queue assigned. + */ + if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 || + pf->vf_nb_qps == 0) { + PMD_DRV_LOG(ERR, "SRIOV is not enabled or no queue."); + return -ENODEV; + } + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid VF ID."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID); + if (on) { + vsi->info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_TAGGED; + vsi->info.port_vlan_flags &= ~I40E_AQ_VSI_PVLAN_MODE_UNTAGGED; + } else { + vsi->info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_UNTAGGED; + vsi->info.port_vlan_flags &= ~I40E_AQ_VSI_PVLAN_MODE_TAGGED; + } + + memset(&ctxt, 0, sizeof(ctxt)); + (void)rte_memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info)); + ctxt.seid = vsi->seid; + + hw = I40E_VSI_TO_HW(vsi); + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to update VSI params"); + } + + return ret; +} + +int rte_pmd_i40e_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id, + uint64_t vf_mask, uint8_t on) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_hw *hw; + uint16_t vf_idx; + int ret = I40E_SUCCESS; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + if (vlan_id > ETHER_MAX_VLAN_ID) { + PMD_DRV_LOG(ERR, "Invalid VLAN ID."); + return -EINVAL; + } + + if (vf_mask == 0) { + PMD_DRV_LOG(ERR, "No VF."); + return -EINVAL; + } + + if (on > 1) { + PMD_DRV_LOG(ERR, "on is should be 0 or 1."); + return -EINVAL; + } + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + hw = I40E_PF_TO_HW(pf); + + /** + * return -ENODEV if SRIOV not enabled, VF number not configured + * or no queue assigned. + */ + if (!hw->func_caps.sr_iov_1_1 || pf->vf_num == 0 || + pf->vf_nb_qps == 0) { + PMD_DRV_LOG(ERR, "SRIOV is not enabled or no queue."); + return -ENODEV; + } + + for (vf_idx = 0; vf_idx < 64 && ret == I40E_SUCCESS; vf_idx++) { + if (vf_mask & ((uint64_t)(1ULL << vf_idx))) { + if (on) + ret = i40e_vsi_add_vlan(pf->vfs[vf_idx].vsi, + vlan_id); + else + ret = i40e_vsi_delete_vlan(pf->vfs[vf_idx].vsi, + vlan_id); + } + } + + if (ret != I40E_SUCCESS) { + ret = -ENOTSUP; + PMD_DRV_LOG(ERR, "Failed to set VF VLAN filter, on = %d", on); + } + + return ret; +} + +int +rte_pmd_i40e_get_vf_stats(uint8_t port, + uint16_t vf_id, + struct rte_eth_stats *stats) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid VF ID."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + i40e_update_vsi_stats(vsi); + + stats->ipackets = vsi->eth_stats.rx_unicast + + vsi->eth_stats.rx_multicast + + vsi->eth_stats.rx_broadcast; + stats->opackets = vsi->eth_stats.tx_unicast + + vsi->eth_stats.tx_multicast + + vsi->eth_stats.tx_broadcast; + stats->ibytes = vsi->eth_stats.rx_bytes; + stats->obytes = vsi->eth_stats.tx_bytes; + stats->ierrors = vsi->eth_stats.rx_discards; + stats->oerrors = vsi->eth_stats.tx_errors + vsi->eth_stats.tx_discards; + + return 0; +} + +int +rte_pmd_i40e_reset_vf_stats(uint8_t port, + uint16_t vf_id) +{ + struct rte_eth_dev *dev; + struct i40e_pf *pf; + struct i40e_vsi *vsi; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + + if (is_i40e_pmd(dev->data->drv_name)) + return -ENOTSUP; + + pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + + if (vf_id >= pf->vf_num || !pf->vfs) { + PMD_DRV_LOG(ERR, "Invalid VF ID."); + return -EINVAL; + } + + vsi = pf->vfs[vf_id].vsi; + if (!vsi) { + PMD_DRV_LOG(ERR, "Invalid VSI."); + return -EINVAL; + } + + vsi->offset_loaded = false; + i40e_update_vsi_stats(vsi); + + return 0; +} diff --git a/src/dpdk/drivers/net/i40e/i40e_ethdev.h b/src/dpdk/drivers/net/i40e/i40e_ethdev.h index 92c8fad0..9e2f7a28 100644 --- a/src/dpdk/drivers/net/i40e/i40e_ethdev.h +++ b/src/dpdk/drivers/net/i40e/i40e_ethdev.h @@ -37,6 +37,8 @@ #include <rte_eth_ctrl.h> #include <rte_time.h> #include <rte_kvargs.h> +#include <rte_hash.h> +#include <rte_flow_driver.h> #define I40E_VLAN_TAG_SIZE 4 @@ -126,6 +128,7 @@ enum i40e_flxpld_layer_idx { #define I40E_FLAG_FDIR (1ULL << 6) #define I40E_FLAG_VXLAN (1ULL << 7) #define I40E_FLAG_RSS_AQ_CAPABLE (1ULL << 8) +#define I40E_FLAG_VF_MAC_BY_PF (1ULL << 9) #define I40E_FLAG_ALL (I40E_FLAG_RSS | \ I40E_FLAG_DCB | \ I40E_FLAG_VMDQ | \ @@ -134,7 +137,8 @@ enum i40e_flxpld_layer_idx { I40E_FLAG_HEADER_SPLIT_ENABLED | \ I40E_FLAG_FDIR | \ I40E_FLAG_VXLAN | \ - I40E_FLAG_RSS_AQ_CAPABLE) + I40E_FLAG_RSS_AQ_CAPABLE | \ + I40E_FLAG_VF_MAC_BY_PF) #define I40E_RSS_OFFLOAD_ALL ( \ ETH_RSS_FRAG_IPV4 | \ @@ -149,6 +153,16 @@ enum i40e_flxpld_layer_idx { ETH_RSS_NONFRAG_IPV6_OTHER | \ ETH_RSS_L2_PAYLOAD) +/* All bits of RSS hash enable for X722*/ +#define I40E_RSS_HENA_ALL_X722 ( \ + (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP) | \ + (1ULL << I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK) | \ + I40E_RSS_HENA_ALL) + /* All bits of RSS hash enable */ #define I40E_RSS_HENA_ALL ( \ (1ULL << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | \ @@ -178,6 +192,65 @@ enum i40e_flxpld_layer_idx { #define FLOATING_VEB_SUPPORTED_FW_MAJ 5 #define FLOATING_VEB_SUPPORTED_FW_MIN 0 +#define I40E_GL_SWT_L2TAGCTRL(_i) (0x001C0A70 + ((_i) * 4)) +#define I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT 16 +#define I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_MASK \ + I40E_MASK(0xFFFF, I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT) + +#define I40E_INSET_NONE 0x00000000000000000ULL + +/* bit0 ~ bit 7 */ +#define I40E_INSET_DMAC 0x0000000000000001ULL +#define I40E_INSET_SMAC 0x0000000000000002ULL +#define I40E_INSET_VLAN_OUTER 0x0000000000000004ULL +#define I40E_INSET_VLAN_INNER 0x0000000000000008ULL +#define I40E_INSET_VLAN_TUNNEL 0x0000000000000010ULL + +/* bit 8 ~ bit 15 */ +#define I40E_INSET_IPV4_SRC 0x0000000000000100ULL +#define I40E_INSET_IPV4_DST 0x0000000000000200ULL +#define I40E_INSET_IPV6_SRC 0x0000000000000400ULL +#define I40E_INSET_IPV6_DST 0x0000000000000800ULL +#define I40E_INSET_SRC_PORT 0x0000000000001000ULL +#define I40E_INSET_DST_PORT 0x0000000000002000ULL +#define I40E_INSET_SCTP_VT 0x0000000000004000ULL + +/* bit 16 ~ bit 31 */ +#define I40E_INSET_IPV4_TOS 0x0000000000010000ULL +#define I40E_INSET_IPV4_PROTO 0x0000000000020000ULL +#define I40E_INSET_IPV4_TTL 0x0000000000040000ULL +#define I40E_INSET_IPV6_TC 0x0000000000080000ULL +#define I40E_INSET_IPV6_FLOW 0x0000000000100000ULL +#define I40E_INSET_IPV6_NEXT_HDR 0x0000000000200000ULL +#define I40E_INSET_IPV6_HOP_LIMIT 0x0000000000400000ULL +#define I40E_INSET_TCP_FLAGS 0x0000000000800000ULL + +/* bit 32 ~ bit 47, tunnel fields */ +#define I40E_INSET_TUNNEL_IPV4_DST 0x0000000100000000ULL +#define I40E_INSET_TUNNEL_IPV6_DST 0x0000000200000000ULL +#define I40E_INSET_TUNNEL_DMAC 0x0000000400000000ULL +#define I40E_INSET_TUNNEL_SRC_PORT 0x0000000800000000ULL +#define I40E_INSET_TUNNEL_DST_PORT 0x0000001000000000ULL +#define I40E_INSET_TUNNEL_ID 0x0000002000000000ULL + +/* bit 48 ~ bit 55 */ +#define I40E_INSET_LAST_ETHER_TYPE 0x0001000000000000ULL + +/* bit 56 ~ bit 63, Flex Payload */ +#define I40E_INSET_FLEX_PAYLOAD_W1 0x0100000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD_W2 0x0200000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD_W3 0x0400000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD_W4 0x0800000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD_W5 0x1000000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD_W6 0x2000000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD_W7 0x4000000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD_W8 0x8000000000000000ULL +#define I40E_INSET_FLEX_PAYLOAD \ + (I40E_INSET_FLEX_PAYLOAD_W1 | I40E_INSET_FLEX_PAYLOAD_W2 | \ + I40E_INSET_FLEX_PAYLOAD_W3 | I40E_INSET_FLEX_PAYLOAD_W4 | \ + I40E_INSET_FLEX_PAYLOAD_W5 | I40E_INSET_FLEX_PAYLOAD_W6 | \ + I40E_INSET_FLEX_PAYLOAD_W7 | I40E_INSET_FLEX_PAYLOAD_W8) + struct i40e_adapter; /** @@ -290,6 +363,7 @@ struct i40e_vsi { uint16_t msix_intr; /* The MSIX interrupt binds to VSI */ uint16_t nb_msix; /* The max number of msix vector */ uint8_t enabled_tc; /* The traffic class enabled */ + uint8_t vlan_anti_spoof_on; /* The VLAN anti-spoofing enabled */ struct i40e_bw_info bw_info; /* VSI bandwidth information */ }; @@ -366,6 +440,14 @@ struct i40e_fdir_flex_mask { }; #define I40E_FILTER_PCTYPE_MAX 64 +#define I40E_MAX_FDIR_FILTER_NUM (1024 * 8) + +struct i40e_fdir_filter { + TAILQ_ENTRY(i40e_fdir_filter) rules; + struct rte_eth_fdir_filter fdir; +}; + +TAILQ_HEAD(i40e_fdir_filter_list, i40e_fdir_filter); /* * A structure used to define fields of a FDIR related info. */ @@ -384,6 +466,60 @@ struct i40e_fdir_info { */ struct i40e_fdir_flex_pit flex_set[I40E_MAX_FLXPLD_LAYER * I40E_MAX_FLXPLD_FIED]; struct i40e_fdir_flex_mask flex_mask[I40E_FILTER_PCTYPE_MAX]; + + struct i40e_fdir_filter_list fdir_list; + struct i40e_fdir_filter **hash_map; + struct rte_hash *hash_table; +}; + +/* Ethertype filter number HW supports */ +#define I40E_MAX_ETHERTYPE_FILTER_NUM 768 + +/* Ethertype filter struct */ +struct i40e_ethertype_filter_input { + struct ether_addr mac_addr; /* Mac address to match */ + uint16_t ether_type; /* Ether type to match */ +}; + +struct i40e_ethertype_filter { + TAILQ_ENTRY(i40e_ethertype_filter) rules; + struct i40e_ethertype_filter_input input; + uint16_t flags; /* Flags from RTE_ETHTYPE_FLAGS_* */ + uint16_t queue; /* Queue assigned to when match */ +}; + +TAILQ_HEAD(i40e_ethertype_filter_list, i40e_ethertype_filter); + +struct i40e_ethertype_rule { + struct i40e_ethertype_filter_list ethertype_list; + struct i40e_ethertype_filter **hash_map; + struct rte_hash *hash_table; +}; + +/* Tunnel filter number HW supports */ +#define I40E_MAX_TUNNEL_FILTER_NUM 400 + +/* Tunnel filter struct */ +struct i40e_tunnel_filter_input { + uint8_t outer_mac[6]; /* Outer mac address to match */ + uint8_t inner_mac[6]; /* Inner mac address to match */ + uint16_t inner_vlan; /* Inner vlan address to match */ + uint16_t flags; /* Filter type flag */ + uint32_t tenant_id; /* Tenant id to match */ +}; + +struct i40e_tunnel_filter { + TAILQ_ENTRY(i40e_tunnel_filter) rules; + struct i40e_tunnel_filter_input input; + uint16_t queue; /* Queue assigned to when match */ +}; + +TAILQ_HEAD(i40e_tunnel_filter_list, i40e_tunnel_filter); + +struct i40e_tunnel_rule { + struct i40e_tunnel_filter_list tunnel_list; + struct i40e_tunnel_filter **hash_map; + struct rte_hash *hash_table; }; #define I40E_MIRROR_MAX_ENTRIES_PER_RULE 64 @@ -408,6 +544,17 @@ struct i40e_mirror_rule { TAILQ_HEAD(i40e_mirror_rule_list, i40e_mirror_rule); /* + * Struct to store flow created. + */ +struct rte_flow { + TAILQ_ENTRY(rte_flow) node; + enum rte_filter_type filter_type; + void *rule; +}; + +TAILQ_HEAD(i40e_flow_list, rte_flow); + +/* * Structure to store private data specific for PF instance. */ struct i40e_pf { @@ -456,12 +603,15 @@ struct i40e_pf { struct i40e_vmdq_info *vmdq; struct i40e_fdir_info fdir; /* flow director info */ + struct i40e_ethertype_rule ethertype; /* Ethertype filter rule */ + struct i40e_tunnel_rule tunnel; /* Tunnel filter rule */ struct i40e_fc_conf fc_conf; /* Flow control conf */ struct i40e_mirror_rule_list mirror_list; uint16_t nb_mirror_rule; /* The number of mirror rules */ bool floating_veb; /* The flag to use the floating VEB */ /* The floating enable flag for the specific VF */ bool floating_veb_list[I40E_MAX_VF]; + struct i40e_flow_list flow_list; }; enum pending_msg { @@ -517,7 +667,7 @@ struct i40e_vf { enum i40e_aq_link_speed link_speed; bool vf_reset; volatile uint32_t pend_cmd; /* pending command not finished yet */ - uint32_t cmd_retval; /* return value of the cmd response from PF */ + int32_t cmd_retval; /* return value of the cmd response from PF */ u16 pend_msg; /* flags indicates events from pf not handled yet */ uint8_t *aq_resp; /* buffer to store the adminq response from PF */ @@ -554,6 +704,25 @@ struct i40e_adapter { struct rte_timecounter tx_tstamp_tc; }; +extern const struct rte_flow_ops i40e_flow_ops; + +union i40e_filter_t { + struct rte_eth_ethertype_filter ethertype_filter; + struct rte_eth_fdir_filter fdir_filter; + struct rte_eth_tunnel_filter_conf tunnel_filter; +}; + +typedef int (*parse_filter_t)(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + union i40e_filter_t *filter); +struct i40e_valid_pattern { + enum rte_flow_item_type *items; + parse_filter_t parse_filter; +}; + int i40e_dev_switch_queues(struct i40e_pf *pf, bool on); int i40e_vsi_release(struct i40e_vsi *vsi); struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, @@ -577,7 +746,7 @@ int i40e_vsi_vlan_pvid_set(struct i40e_vsi *vsi, struct i40e_vsi_vlan_pvid_info *info); int i40e_vsi_config_vlan_stripping(struct i40e_vsi *vsi, bool on); int i40e_vsi_config_vlan_filter(struct i40e_vsi *vsi, bool on); -uint64_t i40e_config_hena(uint64_t flags); +uint64_t i40e_config_hena(uint64_t flags, enum i40e_mac_type type); uint64_t i40e_parse_hena(uint64_t flags); enum i40e_status_code i40e_fdir_setup_tx_resources(struct i40e_pf *pf); enum i40e_status_code i40e_fdir_setup_rx_resources(struct i40e_pf *pf); @@ -595,15 +764,44 @@ int i40e_fdir_ctrl_func(struct rte_eth_dev *dev, int i40e_select_filter_input_set(struct i40e_hw *hw, struct rte_eth_input_set_conf *conf, enum rte_filter_type filter); +void i40e_fdir_filter_restore(struct i40e_pf *pf); int i40e_hash_filter_inset_select(struct i40e_hw *hw, struct rte_eth_input_set_conf *conf); int i40e_fdir_filter_inset_select(struct i40e_pf *pf, struct rte_eth_input_set_conf *conf); - +int i40e_pf_host_send_msg_to_vf(struct i40e_pf_vf *vf, uint32_t opcode, + uint32_t retval, uint8_t *msg, + uint16_t msglen); void i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_rxq_info *qinfo); void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_txq_info *qinfo); +struct i40e_ethertype_filter * +i40e_sw_ethertype_filter_lookup(struct i40e_ethertype_rule *ethertype_rule, + const struct i40e_ethertype_filter_input *input); +int i40e_sw_ethertype_filter_del(struct i40e_pf *pf, + struct i40e_ethertype_filter_input *input); +int i40e_sw_fdir_filter_del(struct i40e_pf *pf, + struct rte_eth_fdir_input *input); +struct i40e_tunnel_filter * +i40e_sw_tunnel_filter_lookup(struct i40e_tunnel_rule *tunnel_rule, + const struct i40e_tunnel_filter_input *input); +int i40e_sw_tunnel_filter_del(struct i40e_pf *pf, + struct i40e_tunnel_filter_input *input); +uint64_t i40e_get_default_input_set(uint16_t pctype); +int i40e_ethertype_filter_set(struct i40e_pf *pf, + struct rte_eth_ethertype_filter *filter, + bool add); +int i40e_add_del_fdir_filter(struct rte_eth_dev *dev, + const struct rte_eth_fdir_filter *filter, + bool add); +int i40e_dev_tunnel_filter_set(struct i40e_pf *pf, + struct rte_eth_tunnel_filter_conf *tunnel_filter, + uint8_t add); +int i40e_fdir_flush(struct rte_eth_dev *dev); + +#define I40E_DEV_TO_PCI(eth_dev) \ + RTE_DEV_TO_PCI((eth_dev)->device) /* I40E_DEV_PRIVATE_TO */ #define I40E_DEV_PRIVATE_TO_PF(adapter) \ @@ -699,6 +897,25 @@ i40e_calc_itr_interval(int16_t interval) (flow_type) == RTE_ETH_FLOW_NONFRAG_IPV6_OTHER || \ (flow_type) == RTE_ETH_FLOW_L2_PAYLOAD) +#define I40E_VALID_PCTYPE_X722(pctype) \ + ((pctype) == I40E_FILTER_PCTYPE_FRAG_IPV4 || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_TCP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_UDP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_SCTP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_OTHER || \ + (pctype) == I40E_FILTER_PCTYPE_FRAG_IPV6 || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_UDP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_TCP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_SCTP || \ + (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_OTHER || \ + (pctype) == I40E_FILTER_PCTYPE_L2_PAYLOAD) + #define I40E_VALID_PCTYPE(pctype) \ ((pctype) == I40E_FILTER_PCTYPE_FRAG_IPV4 || \ (pctype) == I40E_FILTER_PCTYPE_NONF_IPV4_TCP || \ @@ -712,4 +929,18 @@ i40e_calc_itr_interval(int16_t interval) (pctype) == I40E_FILTER_PCTYPE_NONF_IPV6_OTHER || \ (pctype) == I40E_FILTER_PCTYPE_L2_PAYLOAD) +#define I40E_PHY_TYPE_SUPPORT_40G(phy_type) \ + (((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_KR4) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_AOC) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_CR4) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_SR4) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_40GBASE_LR4)) + +#define I40E_PHY_TYPE_SUPPORT_25G(phy_type) \ + (((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_KR) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_CR) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_SR) || \ + ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_LR)) + #endif /* _I40E_ETHDEV_H_ */ diff --git a/src/dpdk/drivers/net/i40e/i40e_ethdev_vf.c b/src/dpdk/drivers/net/i40e/i40e_ethdev_vf.c index a616ae0b..a606aefe 100644 --- a/src/dpdk/drivers/net/i40e/i40e_ethdev_vf.c +++ b/src/dpdk/drivers/net/i40e/i40e_ethdev_vf.c @@ -126,8 +126,6 @@ static void i40evf_dev_promiscuous_enable(struct rte_eth_dev *dev); static void i40evf_dev_promiscuous_disable(struct rte_eth_dev *dev); static void i40evf_dev_allmulticast_enable(struct rte_eth_dev *dev); static void i40evf_dev_allmulticast_disable(struct rte_eth_dev *dev); -static int i40evf_get_link_status(struct rte_eth_dev *dev, - struct rte_eth_link *link); static int i40evf_init_vlan(struct rte_eth_dev *dev); static int i40evf_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id); @@ -153,6 +151,9 @@ static int i40evf_dev_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); static int i40evf_dev_rss_hash_conf_get(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); +static int i40evf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu); +static void i40evf_set_default_mac_addr(struct rte_eth_dev *dev, + struct ether_addr *mac_addr); static int i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id); static int @@ -178,11 +179,11 @@ static const struct rte_i40evf_xstats_name_off rte_i40evf_stats_strings[] = { {"rx_unknown_protocol_packets", offsetof(struct i40e_eth_stats, rx_unknown_protocol)}, {"tx_bytes", offsetof(struct i40e_eth_stats, tx_bytes)}, - {"tx_unicast_packets", offsetof(struct i40e_eth_stats, tx_bytes)}, - {"tx_multicast_packets", offsetof(struct i40e_eth_stats, tx_bytes)}, - {"tx_broadcast_packets", offsetof(struct i40e_eth_stats, tx_bytes)}, - {"tx_dropped_packets", offsetof(struct i40e_eth_stats, tx_bytes)}, - {"tx_error_packets", offsetof(struct i40e_eth_stats, tx_bytes)}, + {"tx_unicast_packets", offsetof(struct i40e_eth_stats, tx_unicast)}, + {"tx_multicast_packets", offsetof(struct i40e_eth_stats, tx_multicast)}, + {"tx_broadcast_packets", offsetof(struct i40e_eth_stats, tx_broadcast)}, + {"tx_dropped_packets", offsetof(struct i40e_eth_stats, tx_discards)}, + {"tx_error_packets", offsetof(struct i40e_eth_stats, tx_errors)}, }; #define I40EVF_NB_XSTATS (sizeof(rte_i40evf_stats_strings) / \ @@ -227,6 +228,8 @@ static const struct eth_dev_ops i40evf_eth_dev_ops = { .reta_query = i40evf_dev_rss_reta_query, .rss_hash_update = i40evf_dev_rss_hash_update, .rss_hash_conf_get = i40evf_dev_rss_hash_conf_get, + .mtu_set = i40evf_dev_mtu_set, + .mac_addr_set = i40evf_set_default_mac_addr, }; /* @@ -363,6 +366,7 @@ i40evf_execute_vf_cmd(struct rte_eth_dev *dev, struct vf_cmd_info *args) err = -1; do { ret = i40evf_read_pfmsg(dev, &info); + vf->cmd_retval = info.result; if (ret == I40EVF_MSG_CMD) { err = 0; break; @@ -641,7 +645,7 @@ i40evf_configure_vsi_queues(struct rte_eth_dev *dev) ret = i40evf_execute_vf_cmd(dev, &args); if (ret) PMD_DRV_LOG(ERR, "Failed to execute command of " - "I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES\n"); + "I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES"); return ret; } @@ -694,7 +698,7 @@ i40evf_configure_vsi_queues_ext(struct rte_eth_dev *dev) ret = i40evf_execute_vf_cmd(dev, &args); if (ret) PMD_DRV_LOG(ERR, "Failed to execute command of " - "I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT\n"); + "I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT"); return ret; } @@ -720,7 +724,8 @@ i40evf_config_irq_map(struct rte_eth_dev *dev) uint8_t cmd_buffer[sizeof(struct i40e_virtchnl_irq_map_info) + \ sizeof(struct i40e_virtchnl_vector_map)]; struct i40e_virtchnl_irq_map_info *map_info; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t vector_id; int i, err; @@ -888,19 +893,16 @@ i40evf_add_mac_addr(struct rte_eth_dev *dev, } static void -i40evf_del_mac_addr(struct rte_eth_dev *dev, uint32_t index) +i40evf_del_mac_addr_by_addr(struct rte_eth_dev *dev, + struct ether_addr *addr) { struct i40e_virtchnl_ether_addr_list *list; struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); - struct rte_eth_dev_data *data = dev->data; - struct ether_addr *addr; uint8_t cmd_buffer[sizeof(struct i40e_virtchnl_ether_addr_list) + \ sizeof(struct i40e_virtchnl_ether_addr)]; int err; struct vf_cmd_info args; - addr = &(data->mac_addrs[index]); - if (i40e_validate_mac_addr(addr->addr_bytes) != I40E_SUCCESS) { PMD_DRV_LOG(ERR, "Invalid mac:%x-%x-%x-%x-%x-%x", addr->addr_bytes[0], addr->addr_bytes[1], @@ -927,6 +929,17 @@ i40evf_del_mac_addr(struct rte_eth_dev *dev, uint32_t index) return; } +static void +i40evf_del_mac_addr(struct rte_eth_dev *dev, uint32_t index) +{ + struct rte_eth_dev_data *data = dev->data; + struct ether_addr *addr; + + addr = &data->mac_addrs[index]; + + i40evf_del_mac_addr_by_addr(dev, addr); +} + static int i40evf_update_stats(struct rte_eth_dev *dev, struct i40e_eth_stats **pstats) { @@ -954,7 +967,7 @@ i40evf_update_stats(struct rte_eth_dev *dev, struct i40e_eth_stats **pstats) } static int -i40evf_get_statics(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +i40evf_get_statistics(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { int ret; struct i40e_eth_stats *pstats = NULL; @@ -1084,37 +1097,11 @@ i40evf_del_vlan(struct rte_eth_dev *dev, uint16_t vlanid) return err; } -static int -i40evf_get_link_status(struct rte_eth_dev *dev, struct rte_eth_link *link) -{ - struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); - int err; - struct vf_cmd_info args; - struct rte_eth_link *new_link; - - args.ops = (enum i40e_virtchnl_ops)I40E_VIRTCHNL_OP_GET_LINK_STAT; - args.in_args = NULL; - args.in_args_size = 0; - args.out_buffer = vf->aq_resp; - args.out_size = I40E_AQ_BUF_SZ; - err = i40evf_execute_vf_cmd(dev, &args); - if (err) { - PMD_DRV_LOG(ERR, "fail to execute command OP_GET_LINK_STAT"); - return err; - } - - new_link = (struct rte_eth_link *)args.out_buffer; - (void)rte_memcpy(link, new_link, sizeof(*link)); - - return 0; -} - static const struct rte_pci_id pci_id_i40evf_map[] = { { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_VF) }, { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_VF_HV) }, { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_X722_A0_VF) }, { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_X722_VF) }, - { RTE_PCI_DEVICE(I40E_INTEL_VENDOR_ID, I40E_DEV_ID_X722_VF_HV) }, { .vendor_id = 0, /* sentinel */ }, }; @@ -1208,7 +1195,6 @@ i40evf_init_vf(struct rte_eth_dev *dev) int i, err, bufsz; struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); - struct ether_addr *p_mac_addr; uint16_t interval = i40e_calc_itr_interval(I40E_QUEUE_ITR_INTERVAL_MAX); @@ -1285,9 +1271,8 @@ i40evf_init_vf(struct rte_eth_dev *dev) vf->vsi.adapter = I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); /* Store the MAC address configured by host, or generate random one */ - p_mac_addr = (struct ether_addr *)(vf->vsi_res->default_mac_addr); - if (is_valid_assigned_ether_addr(p_mac_addr)) /* Configured by host */ - ether_addr_copy(p_mac_addr, (struct ether_addr *)hw->mac.addr); + if (is_valid_assigned_ether_addr((struct ether_addr *)hw->mac.addr)) + vf->flags |= I40E_FLAG_VF_MAC_BY_PF; else eth_random_addr(hw->mac.addr); /* Generate a random one */ @@ -1340,16 +1325,16 @@ i40evf_handle_pf_event(__rte_unused struct rte_eth_dev *dev, switch (pf_msg->event) { case I40E_VIRTCHNL_EVENT_RESET_IMPENDING: - PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_RESET_IMPENDING event\n"); - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET); + PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_RESET_IMPENDING event"); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, NULL); break; case I40E_VIRTCHNL_EVENT_LINK_CHANGE: - PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_LINK_CHANGE event\n"); + PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_LINK_CHANGE event"); vf->link_up = pf_msg->event_data.link_event.link_status; vf->link_speed = pf_msg->event_data.link_event.link_speed; break; case I40E_VIRTCHNL_EVENT_PF_DRIVER_CLOSE: - PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_PF_DRIVER_CLOSE event\n"); + PMD_DRV_LOG(DEBUG, "VIRTCHNL_EVENT_PF_DRIVER_CLOSE event"); break; default: PMD_DRV_LOG(ERR, " unknown event received %u", pf_msg->event); @@ -1363,8 +1348,9 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev) struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); struct i40e_arq_event_info info; - struct i40e_virtchnl_msg *v_msg; - uint16_t pending, opcode; + uint16_t pending, aq_opc; + enum i40e_virtchnl_ops msg_opc; + enum i40e_status_code msg_ret; int ret; info.buf_len = I40E_AQ_BUF_SZ; @@ -1373,7 +1359,6 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev) return; } info.msg_buf = vf->aq_resp; - v_msg = (struct i40e_virtchnl_msg *)&info.desc; pending = 1; while (pending) { @@ -1384,32 +1369,39 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev) "ret: %d", ret); break; } - opcode = rte_le_to_cpu_16(info.desc.opcode); - - switch (opcode) { + aq_opc = rte_le_to_cpu_16(info.desc.opcode); + /* For the message sent from pf to vf, opcode is stored in + * cookie_high of struct i40e_aq_desc, while return error code + * are stored in cookie_low, Which is done by + * i40e_aq_send_msg_to_vf in PF driver.*/ + msg_opc = (enum i40e_virtchnl_ops)rte_le_to_cpu_32( + info.desc.cookie_high); + msg_ret = (enum i40e_status_code)rte_le_to_cpu_32( + info.desc.cookie_low); + switch (aq_opc) { case i40e_aqc_opc_send_msg_to_vf: - if (v_msg->v_opcode == I40E_VIRTCHNL_OP_EVENT) + if (msg_opc == I40E_VIRTCHNL_OP_EVENT) /* process event*/ i40evf_handle_pf_event(dev, info.msg_buf, info.msg_len); else { /* read message and it's expected one */ - if (v_msg->v_opcode == vf->pend_cmd) { - vf->cmd_retval = v_msg->v_retval; + if (msg_opc == vf->pend_cmd) { + vf->cmd_retval = msg_ret; /* prevent compiler reordering */ rte_compiler_barrier(); _clear_cmd(vf); } else PMD_DRV_LOG(ERR, "command mismatch," "expect %u, get %u", - vf->pend_cmd, v_msg->v_opcode); + vf->pend_cmd, msg_opc); PMD_DRV_LOG(DEBUG, "adminq response is received," - " opcode = %d\n", v_msg->v_opcode); + " opcode = %d", msg_opc); } break; default: PMD_DRV_LOG(ERR, "Request %u is not supported yet", - opcode); + aq_opc); break; } } @@ -1428,7 +1420,7 @@ i40evf_handle_aq_msg(struct rte_eth_dev *dev) * void */ static void -i40evf_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle, +i40evf_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; @@ -1442,31 +1434,31 @@ i40evf_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle, /* No interrupt event indicated */ if (!(icr0 & I40E_VFINT_ICR01_INTEVENT_MASK)) { - PMD_DRV_LOG(DEBUG, "No interrupt event, nothing to do\n"); + PMD_DRV_LOG(DEBUG, "No interrupt event, nothing to do"); goto done; } if (icr0 & I40E_VFINT_ICR01_ADMINQ_MASK) { - PMD_DRV_LOG(DEBUG, "ICR01_ADMINQ is reported\n"); + PMD_DRV_LOG(DEBUG, "ICR01_ADMINQ is reported"); i40evf_handle_aq_msg(dev); } /* Link Status Change interrupt */ if (icr0 & I40E_VFINT_ICR01_LINK_STAT_CHANGE_MASK) PMD_DRV_LOG(DEBUG, "LINK_STAT_CHANGE is reported," - " do nothing\n"); + " do nothing"); done: i40evf_enable_irq0(hw); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(intr_handle); } static int i40evf_dev_init(struct rte_eth_dev *eth_dev) { - struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(\ - eth_dev->data->dev_private); - struct rte_pci_device *pci_dev = eth_dev->pci_dev; + struct i40e_hw *hw + = I40E_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(eth_dev); PMD_INIT_FUNC_TRACE(); @@ -1485,15 +1477,16 @@ i40evf_dev_init(struct rte_eth_dev *eth_dev) return 0; } - rte_eth_copy_pci_info(eth_dev, eth_dev->pci_dev); + rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; - hw->vendor_id = eth_dev->pci_dev->id.vendor_id; - hw->device_id = eth_dev->pci_dev->id.device_id; - hw->subsystem_vendor_id = eth_dev->pci_dev->id.subsystem_vendor_id; - hw->subsystem_device_id = eth_dev->pci_dev->id.subsystem_device_id; - hw->bus.device = eth_dev->pci_dev->addr.devid; - hw->bus.func = eth_dev->pci_dev->addr.function; - hw->hw_addr = (void *)eth_dev->pci_dev->mem_resource[0].addr; + hw->vendor_id = pci_dev->id.vendor_id; + hw->device_id = pci_dev->id.device_id; + hw->subsystem_vendor_id = pci_dev->id.subsystem_vendor_id; + hw->subsystem_device_id = pci_dev->id.subsystem_device_id; + hw->bus.device = pci_dev->addr.devid; + hw->bus.func = pci_dev->addr.function; + hw->hw_addr = (void *)pci_dev->mem_resource[0].addr; hw->adapter_stopped = 0; if(i40evf_init_vf(eth_dev) != 0) { @@ -1554,38 +1547,19 @@ i40evf_dev_uninit(struct rte_eth_dev *eth_dev) */ static struct eth_driver rte_i40evf_pmd = { .pci_drv = { - .name = "rte_i40evf_pmd", .id_table = pci_id_i40evf_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = i40evf_dev_init, .eth_dev_uninit = i40evf_dev_uninit, .dev_private_size = sizeof(struct i40e_adapter), }; -/* - * VF Driver initialization routine. - * Invoked one at EAL init time. - * Register itself as the [Virtual Poll Mode] Driver of PCI Fortville devices. - */ -static int -rte_i40evf_pmd_init(const char *name __rte_unused, - const char *params __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - - rte_eth_driver_register(&rte_i40evf_pmd); - - return 0; -} - -static struct rte_driver rte_i40evf_driver = { - .type = PMD_PDEV, - .init = rte_i40evf_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_i40evf_driver, i40evf); -DRIVER_REGISTER_PCI_TABLE(i40evf, pci_id_i40evf_map); +RTE_PMD_REGISTER_PCI(net_i40e_vf, rte_i40evf_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_i40e_vf, pci_id_i40evf_map); +RTE_PMD_REGISTER_KMOD_DEP(net_i40e_vf, "* igb_uio | vfio"); static int i40evf_dev_configure(struct rte_eth_dev *dev) @@ -1900,7 +1874,8 @@ i40evf_enable_queues_intr(struct rte_eth_dev *dev) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; if (!rte_intr_allow_others(intr_handle)) { I40E_WRITE_REG(hw, @@ -1932,7 +1907,8 @@ i40evf_disable_queues_intr(struct rte_eth_dev *dev) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; if (!rte_intr_allow_others(intr_handle)) { I40E_WRITE_REG(hw, I40E_VFINT_DYN_CTL01, @@ -1958,7 +1934,8 @@ i40evf_disable_queues_intr(struct rte_eth_dev *dev) static int i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint16_t interval = i40e_calc_itr_interval(RTE_LIBRTE_I40E_ITR_INTERVAL); @@ -1984,7 +1961,7 @@ i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) I40EVF_WRITE_FLUSH(hw); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(&pci_dev->intr_handle); return 0; } @@ -1992,7 +1969,8 @@ i40evf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) static int i40evf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint16_t msix_intr; @@ -2072,7 +2050,8 @@ i40evf_dev_start(struct rte_eth_dev *dev) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t intr_vector = 0; PMD_INIT_FUNC_TRACE(); @@ -2096,7 +2075,7 @@ i40evf_dev_start(struct rte_eth_dev *dev) dev->data->nb_rx_queues * sizeof(int), 0); if (!intr_handle->intr_vec) { PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" - " intr_vec\n", dev->data->nb_rx_queues); + " intr_vec", dev->data->nb_rx_queues); return -ENOMEM; } } @@ -2137,7 +2116,8 @@ err_queue: static void i40evf_dev_stop(struct rte_eth_dev *dev) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; PMD_INIT_FUNC_TRACE(); @@ -2166,35 +2146,33 @@ i40evf_dev_link_update(struct rte_eth_dev *dev, * DPDK pf host provide interfacet to acquire link status * while Linux driver does not */ - if (vf->version_major == I40E_DPDK_VERSION_MAJOR) - i40evf_get_link_status(dev, &new_link); - else { - /* Linux driver PF host */ - switch (vf->link_speed) { - case I40E_LINK_SPEED_100MB: - new_link.link_speed = ETH_SPEED_NUM_100M; - break; - case I40E_LINK_SPEED_1GB: - new_link.link_speed = ETH_SPEED_NUM_1G; - break; - case I40E_LINK_SPEED_10GB: - new_link.link_speed = ETH_SPEED_NUM_10G; - break; - case I40E_LINK_SPEED_20GB: - new_link.link_speed = ETH_SPEED_NUM_20G; - break; - case I40E_LINK_SPEED_40GB: - new_link.link_speed = ETH_SPEED_NUM_40G; - break; - default: - new_link.link_speed = ETH_SPEED_NUM_100M; - break; - } - /* full duplex only */ - new_link.link_duplex = ETH_LINK_FULL_DUPLEX; - new_link.link_status = vf->link_up ? ETH_LINK_UP : - ETH_LINK_DOWN; + + /* Linux driver PF host */ + switch (vf->link_speed) { + case I40E_LINK_SPEED_100MB: + new_link.link_speed = ETH_SPEED_NUM_100M; + break; + case I40E_LINK_SPEED_1GB: + new_link.link_speed = ETH_SPEED_NUM_1G; + break; + case I40E_LINK_SPEED_10GB: + new_link.link_speed = ETH_SPEED_NUM_10G; + break; + case I40E_LINK_SPEED_20GB: + new_link.link_speed = ETH_SPEED_NUM_20G; + break; + case I40E_LINK_SPEED_40GB: + new_link.link_speed = ETH_SPEED_NUM_40G; + break; + default: + new_link.link_speed = ETH_SPEED_NUM_100M; + break; } + /* full duplex only */ + new_link.link_duplex = ETH_LINK_FULL_DUPLEX; + new_link.link_status = vf->link_up ? ETH_LINK_UP : + ETH_LINK_DOWN; + i40evf_dev_atomic_write_link_status(dev, &new_link); return 0; @@ -2266,6 +2244,7 @@ i40evf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); memset(dev_info, 0, sizeof(*dev_info)); + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); dev_info->max_rx_queues = vf->vsi_res->num_queue_pairs; dev_info->max_tx_queues = vf->vsi_res->num_queue_pairs; dev_info->min_rx_bufsize = I40E_BUF_SIZE_MIN; @@ -2326,15 +2305,16 @@ i40evf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) static void i40evf_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { - if (i40evf_get_statics(dev, stats)) - PMD_DRV_LOG(ERR, "Get statics failed"); + if (i40evf_get_statistics(dev, stats)) + PMD_DRV_LOG(ERR, "Get statistics failed"); } static void i40evf_dev_close(struct rte_eth_dev *dev) { struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_pci_device *pci_dev = dev->pci_dev; + struct rte_pci_device *pci_dev = I40E_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; i40evf_dev_stop(dev); hw->adapter_stopped = 1; @@ -2342,11 +2322,11 @@ i40evf_dev_close(struct rte_eth_dev *dev) i40evf_reset_vf(hw); i40e_shutdown_adminq(hw); /* disable uio intr before callback unregister */ - rte_intr_disable(&pci_dev->intr_handle); + rte_intr_disable(intr_handle); /* unregister callback func from eal lib */ - rte_intr_callback_unregister(&pci_dev->intr_handle, - i40evf_dev_interrupt_handler, (void *)dev); + rte_intr_callback_unregister(intr_handle, + i40evf_dev_interrupt_handler, dev); i40evf_disable_irq0(hw); } @@ -2423,7 +2403,7 @@ i40evf_dev_rss_reta_update(struct rte_eth_dev *dev, if (reta_size != ETH_RSS_RETA_SIZE_64) { PMD_DRV_LOG(ERR, "The size of hash lookup table configured " "(%d) doesn't match the number of hardware can " - "support (%d)\n", reta_size, ETH_RSS_RETA_SIZE_64); + "support (%d)", reta_size, ETH_RSS_RETA_SIZE_64); return -EINVAL; } @@ -2462,7 +2442,7 @@ i40evf_dev_rss_reta_query(struct rte_eth_dev *dev, if (reta_size != ETH_RSS_RETA_SIZE_64) { PMD_DRV_LOG(ERR, "The size of hash lookup table configured " "(%d) doesn't match the number of hardware can " - "support (%d)\n", reta_size, ETH_RSS_RETA_SIZE_64); + "support (%d)", reta_size, ETH_RSS_RETA_SIZE_64); return -EINVAL; } @@ -2568,8 +2548,11 @@ i40evf_hw_rss_hash_set(struct i40e_vf *vf, struct rte_eth_rss_conf *rss_conf) rss_hf = rss_conf->rss_hf; hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(0)); hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(1))) << 32; - hena &= ~I40E_RSS_HENA_ALL; - hena |= i40e_config_hena(rss_hf); + if (hw->mac.type == I40E_MAC_X722) + hena &= ~I40E_RSS_HENA_ALL_X722; + else + hena &= ~I40E_RSS_HENA_ALL; + hena |= i40e_config_hena(rss_hf, hw->mac.type); i40e_write_rx_ctl(hw, I40E_VFQF_HENA(0), (uint32_t)hena); i40e_write_rx_ctl(hw, I40E_VFQF_HENA(1), (uint32_t)(hena >> 32)); I40EVF_WRITE_FLUSH(hw); @@ -2585,7 +2568,10 @@ i40evf_disable_rss(struct i40e_vf *vf) hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(0)); hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(1))) << 32; - hena &= ~I40E_RSS_HENA_ALL; + if (hw->mac.type == I40E_MAC_X722) + hena &= ~I40E_RSS_HENA_ALL_X722; + else + hena &= ~I40E_RSS_HENA_ALL; i40e_write_rx_ctl(hw, I40E_VFQF_HENA(0), (uint32_t)hena); i40e_write_rx_ctl(hw, I40E_VFQF_HENA(1), (uint32_t)(hena >> 32)); I40EVF_WRITE_FLUSH(hw); @@ -2601,7 +2587,7 @@ i40evf_config_rss(struct i40e_vf *vf) if (vf->dev_data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_RSS) { i40evf_disable_rss(vf); - PMD_DRV_LOG(DEBUG, "RSS not configured\n"); + PMD_DRV_LOG(DEBUG, "RSS not configured"); return 0; } @@ -2618,7 +2604,7 @@ i40evf_config_rss(struct i40e_vf *vf) rss_conf = vf->dev_data->dev_conf.rx_adv_conf.rss_conf; if ((rss_conf.rss_hf & I40E_RSS_OFFLOAD_ALL) == 0) { i40evf_disable_rss(vf); - PMD_DRV_LOG(DEBUG, "No hash flag is set\n"); + PMD_DRV_LOG(DEBUG, "No hash flag is set"); return 0; } @@ -2646,7 +2632,9 @@ i40evf_dev_rss_hash_update(struct rte_eth_dev *dev, hena = (uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(0)); hena |= ((uint64_t)i40e_read_rx_ctl(hw, I40E_VFQF_HENA(1))) << 32; - if (!(hena & I40E_RSS_HENA_ALL)) { /* RSS disabled */ + if (!(hena & ((hw->mac.type == I40E_MAC_X722) + ? I40E_RSS_HENA_ALL_X722 + : I40E_RSS_HENA_ALL))) { /* RSS disabled */ if (rss_hf != 0) /* Enable RSS */ return -EINVAL; return 0; @@ -2676,3 +2664,55 @@ i40evf_dev_rss_hash_conf_get(struct rte_eth_dev *dev, return 0; } + +static int +i40evf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +{ + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + struct rte_eth_dev_data *dev_data = vf->dev_data; + uint32_t frame_size = mtu + ETHER_HDR_LEN + + ETHER_CRC_LEN + I40E_VLAN_TAG_SIZE; + int ret = 0; + + /* check if mtu is within the allowed range */ + if ((mtu < ETHER_MIN_MTU) || (frame_size > I40E_FRAME_SIZE_MAX)) + return -EINVAL; + + /* mtu setting is forbidden if port is start */ + if (dev_data->dev_started) { + PMD_DRV_LOG(ERR, "port %d must be stopped before configuration", + dev_data->port_id); + return -EBUSY; + } + + if (frame_size > ETHER_MAX_LEN) + dev_data->dev_conf.rxmode.jumbo_frame = 1; + else + dev_data->dev_conf.rxmode.jumbo_frame = 0; + + dev_data->dev_conf.rxmode.max_rx_pkt_len = frame_size; + + return ret; +} + +static void +i40evf_set_default_mac_addr(struct rte_eth_dev *dev, + struct ether_addr *mac_addr) +{ + struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); + + if (!is_valid_assigned_ether_addr(mac_addr)) { + PMD_DRV_LOG(ERR, "Tried to set invalid MAC address."); + return; + } + + if (is_same_ether_addr(mac_addr, dev->data->mac_addrs)) + return; + + if (vf->flags & I40E_FLAG_VF_MAC_BY_PF) + return; + + i40evf_del_mac_addr_by_addr(dev, dev->data->mac_addrs); + + i40evf_add_mac_addr(dev, mac_addr, 0, 0); +} diff --git a/src/dpdk/drivers/net/i40e/i40e_fdir.c b/src/dpdk/drivers/net/i40e/i40e_fdir.c index 33cb6dab..0700253b 100644 --- a/src/dpdk/drivers/net/i40e/i40e_fdir.c +++ b/src/dpdk/drivers/net/i40e/i40e_fdir.c @@ -74,11 +74,8 @@ #define I40E_FDIR_UDP_DEFAULT_LEN 400 /* Wait count and interval for fdir filter programming */ -#define TREX_PATCH -// TREX_PATCH - Values were 10 and 1000. These numbers give much better performance when -// configuring large amount of rules -#define I40E_FDIR_WAIT_COUNT 100 -#define I40E_FDIR_WAIT_INTERVAL_US 100 +#define I40E_FDIR_WAIT_COUNT 10 +#define I40E_FDIR_WAIT_INTERVAL_US 1000 /* Wait count and interval for fdir filter flush */ #define I40E_FDIR_FLUSH_RETRY 50 @@ -122,7 +119,13 @@ static int i40e_fdir_filter_programming(struct i40e_pf *pf, enum i40e_filter_pctype pctype, const struct rte_eth_fdir_filter *filter, bool add); -static int i40e_fdir_flush(struct rte_eth_dev *dev); +static int i40e_fdir_filter_convert(const struct rte_eth_fdir_filter *input, + struct i40e_fdir_filter *filter); +static struct i40e_fdir_filter * +i40e_sw_fdir_filter_lookup(struct i40e_fdir_info *fdir_info, + const struct rte_eth_fdir_input *input); +static int i40e_sw_fdir_filter_insert(struct i40e_pf *pf, + struct i40e_fdir_filter *filter); static int i40e_fdir_rx_queue_init(struct i40e_rx_queue *rxq) @@ -254,7 +257,7 @@ i40e_fdir_setup(struct i40e_pf *pf) /* reserve memory for the fdir programming packet */ snprintf(z_name, sizeof(z_name), "%s_%s_%d", - eth_dev->driver->pci_drv.name, + eth_dev->driver->pci_drv.driver.name, I40E_FDIR_MZ_NAME, eth_dev->data->port_id); mz = i40e_memzone_reserve(z_name, I40E_FDIR_PKT_LEN, SOCKET_ID_ANY); @@ -356,8 +359,15 @@ i40e_init_flx_pld(struct i40e_pf *pf) /* initialize the masks */ for (pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; pctype <= I40E_FILTER_PCTYPE_L2_PAYLOAD; pctype++) { - if (!I40E_VALID_PCTYPE((enum i40e_filter_pctype)pctype)) - continue; + if (hw->mac.type == I40E_MAC_X722) { + if (!I40E_VALID_PCTYPE_X722( + (enum i40e_filter_pctype)pctype)) + continue; + } else { + if (!I40E_VALID_PCTYPE( + (enum i40e_filter_pctype)pctype)) + continue; + } pf->fdir.flex_mask[pctype].word_mask = 0; i40e_write_rx_ctl(hw, I40E_PRTQF_FD_FLXINSET(pctype), 0); for (i = 0; i < I40E_FDIR_BITMASK_NUM_WORD; i++) { @@ -667,7 +677,16 @@ i40e_fdir_configure(struct rte_eth_dev *dev) i40e_set_flx_pld_cfg(pf, &conf->flex_set[i]); /* configure flex mask*/ for (i = 0; i < conf->nb_flexmasks; i++) { - pctype = i40e_flowtype_to_pctype(conf->flex_mask[i].flow_type); + if (hw->mac.type == I40E_MAC_X722) { + /* get translated pctype value in fd pctype register */ + pctype = (enum i40e_filter_pctype)i40e_read_rx_ctl( + hw, I40E_GLQF_FD_PCTYPES( + (int)i40e_flowtype_to_pctype( + conf->flex_mask[i].flow_type))); + } else + pctype = i40e_flowtype_to_pctype( + conf->flex_mask[i].flow_type); + i40e_set_flex_mask_on_pctype(pf, pctype, &conf->flex_mask[i]); } @@ -732,9 +751,6 @@ i40e_fdir_fill_eth_ip_head(const struct rte_eth_fdir_input *fdir_input, fdir_input->flow.ip4_flow.ttl : I40E_FDIR_IP_DEFAULT_TTL; ip->type_of_service = fdir_input->flow.ip4_flow.tos; -#ifdef TREX_PATCH - ip->packet_id = rte_cpu_to_be_16(fdir_input->flow.ip4_flow.ip_id); -#endif /* * The source and destination fields in the transmitted packet * need to be presented in a reversed order with respect @@ -755,11 +771,7 @@ i40e_fdir_fill_eth_ip_head(const struct rte_eth_fdir_input *fdir_input, ip6->vtc_flow = rte_cpu_to_be_32(I40E_FDIR_IPv6_DEFAULT_VTC_FLOW | (fdir_input->flow.ipv6_flow.tc << - I40E_FDIR_IPv6_TC_OFFSET) -#ifdef TREX_PATCH - | (fdir_input->flow.ipv6_flow.flow_label & 0x000fffff) -#endif - ); + I40E_FDIR_IPv6_TC_OFFSET)); ip6->payload_len = rte_cpu_to_be_16(I40E_FDIR_IPv6_PAYLOAD_LEN); ip6->proto = fdir_input->flow.ipv6_flow.proto ? @@ -1011,20 +1023,92 @@ i40e_check_fdir_programming_status(struct i40e_rx_queue *rxq) return ret; } +static int +i40e_fdir_filter_convert(const struct rte_eth_fdir_filter *input, + struct i40e_fdir_filter *filter) +{ + rte_memcpy(&filter->fdir, input, sizeof(struct rte_eth_fdir_filter)); + return 0; +} + +/* Check if there exists the flow director filter */ +static struct i40e_fdir_filter * +i40e_sw_fdir_filter_lookup(struct i40e_fdir_info *fdir_info, + const struct rte_eth_fdir_input *input) +{ + int ret; + + ret = rte_hash_lookup(fdir_info->hash_table, (const void *)input); + if (ret < 0) + return NULL; + + return fdir_info->hash_map[ret]; +} + +/* Add a flow director filter into the SW list */ +static int +i40e_sw_fdir_filter_insert(struct i40e_pf *pf, struct i40e_fdir_filter *filter) +{ + struct i40e_fdir_info *fdir_info = &pf->fdir; + int ret; + + ret = rte_hash_add_key(fdir_info->hash_table, + &filter->fdir.input); + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to insert fdir filter to hash table %d!", + ret); + return ret; + } + fdir_info->hash_map[ret] = filter; + + TAILQ_INSERT_TAIL(&fdir_info->fdir_list, filter, rules); + + return 0; +} + +/* Delete a flow director filter from the SW list */ +int +i40e_sw_fdir_filter_del(struct i40e_pf *pf, struct rte_eth_fdir_input *input) +{ + struct i40e_fdir_info *fdir_info = &pf->fdir; + struct i40e_fdir_filter *filter; + int ret; + + ret = rte_hash_del_key(fdir_info->hash_table, input); + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to delete fdir filter to hash table %d!", + ret); + return ret; + } + filter = fdir_info->hash_map[ret]; + fdir_info->hash_map[ret] = NULL; + + TAILQ_REMOVE(&fdir_info->fdir_list, filter, rules); + rte_free(filter); + + return 0; +} + /* * i40e_add_del_fdir_filter - add or remove a flow director filter. * @pf: board private structure * @filter: fdir filter entry * @add: 0 - delete, 1 - add */ -static int +int i40e_add_del_fdir_filter(struct rte_eth_dev *dev, const struct rte_eth_fdir_filter *filter, bool add) { + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); unsigned char *pkt = (unsigned char *)pf->fdir.prg_pkt; enum i40e_filter_pctype pctype; + struct i40e_fdir_info *fdir_info = &pf->fdir; + struct i40e_fdir_filter *fdir_filter, *node; + struct i40e_fdir_filter check_filter; /* Check if the filter exists */ int ret = 0; if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_PERFECT) { @@ -1047,6 +1131,22 @@ i40e_add_del_fdir_filter(struct rte_eth_dev *dev, return -EINVAL; } + /* Check if there is the filter in SW list */ + memset(&check_filter, 0, sizeof(check_filter)); + i40e_fdir_filter_convert(filter, &check_filter); + node = i40e_sw_fdir_filter_lookup(fdir_info, &check_filter.fdir.input); + if (add && node) { + PMD_DRV_LOG(ERR, + "Conflict with existing flow director rules!"); + return -EINVAL; + } + + if (!add && !node) { + PMD_DRV_LOG(ERR, + "There's no corresponding flow firector filter!"); + return -EINVAL; + } + memset(pkt, 0, I40E_FDIR_PKT_LEN); ret = i40e_fdir_construct_pkt(pf, &filter->input, pkt); @@ -1054,13 +1154,32 @@ i40e_add_del_fdir_filter(struct rte_eth_dev *dev, PMD_DRV_LOG(ERR, "construct packet for fdir fails."); return ret; } - pctype = i40e_flowtype_to_pctype(filter->input.flow_type); + + if (hw->mac.type == I40E_MAC_X722) { + /* get translated pctype value in fd pctype register */ + pctype = (enum i40e_filter_pctype)i40e_read_rx_ctl( + hw, I40E_GLQF_FD_PCTYPES( + (int)i40e_flowtype_to_pctype( + filter->input.flow_type))); + } else + pctype = i40e_flowtype_to_pctype(filter->input.flow_type); + ret = i40e_fdir_filter_programming(pf, pctype, filter, add); if (ret < 0) { PMD_DRV_LOG(ERR, "fdir programming fails for PCTYPE(%u).", pctype); return ret; } + + if (add) { + fdir_filter = rte_zmalloc("fdir_filter", + sizeof(*fdir_filter), 0); + rte_memcpy(fdir_filter, &check_filter, sizeof(check_filter)); + ret = i40e_sw_fdir_filter_insert(pf, fdir_filter); + } else { + ret = i40e_sw_fdir_filter_del(pf, &node->fdir.input); + } + return ret; } @@ -1153,12 +1272,8 @@ i40e_fdir_filter_programming(struct i40e_pf *pf, fdirdp->dtype_cmd_cntindex |= rte_cpu_to_le_32(I40E_TXD_FLTR_QW1_CNT_ENA_MASK); fdirdp->dtype_cmd_cntindex |= -#ifdef TREX_PATCH - rte_cpu_to_le_32((fdir_action->stat_count_index << -#else rte_cpu_to_le_32( ((uint32_t)pf->fdir.match_counter_index << -#endif I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & I40E_TXD_FLTR_QW1_CNTINDEX_MASK); @@ -1182,17 +1297,11 @@ i40e_fdir_filter_programming(struct i40e_pf *pf, I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail); for (i = 0; i < I40E_FDIR_WAIT_COUNT; i++) { -#ifndef TREX_PATCH - /* itay: moved this delay after the check to avoid first check */ rte_delay_us(I40E_FDIR_WAIT_INTERVAL_US); -#endif if ((txdp->cmd_type_offset_bsz & rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) == rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) break; -#ifdef TREX_PATCH - rte_delay_us(I40E_FDIR_WAIT_INTERVAL_US); -#endif } if (i >= I40E_FDIR_WAIT_COUNT) { PMD_DRV_LOG(ERR, "Failed to program FDIR filter:" @@ -1200,10 +1309,7 @@ i40e_fdir_filter_programming(struct i40e_pf *pf, return -ETIMEDOUT; } /* totally delay 10 ms to check programming status*/ -#ifndef TREX_PATCH - /* itay: tests show this is not needed */ rte_delay_us((I40E_FDIR_WAIT_COUNT - i) * I40E_FDIR_WAIT_INTERVAL_US); -#endif if (i40e_check_fdir_programming_status(rxq) < 0) { PMD_DRV_LOG(ERR, "Failed to program FDIR filter:" " programming status reported."); @@ -1217,7 +1323,7 @@ i40e_fdir_filter_programming(struct i40e_pf *pf, * i40e_fdir_flush - clear all filters of Flow Director table * @pf: board private structure */ -static int +int i40e_fdir_flush(struct rte_eth_dev *dev) { struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); @@ -1296,6 +1402,7 @@ i40e_fdir_info_get_flex_mask(struct i40e_pf *pf, { struct i40e_fdir_flex_mask *mask; struct rte_eth_fdir_flex_mask *ptr = flex_mask; + struct i40e_hw *hw = I40E_PF_TO_HW(pf); uint16_t flow_type; uint8_t i, j; uint16_t off_bytes, mask_tmp; @@ -1304,8 +1411,13 @@ i40e_fdir_info_get_flex_mask(struct i40e_pf *pf, i <= I40E_FILTER_PCTYPE_L2_PAYLOAD; i++) { mask = &pf->fdir.flex_mask[i]; - if (!I40E_VALID_PCTYPE((enum i40e_filter_pctype)i)) - continue; + if (hw->mac.type == I40E_MAC_X722) { + if (!I40E_VALID_PCTYPE_X722((enum i40e_filter_pctype)i)) + continue; + } else { + if (!I40E_VALID_PCTYPE((enum i40e_filter_pctype)i)) + continue; + } flow_type = i40e_pctype_to_flowtype((enum i40e_filter_pctype)i); for (j = 0; j < I40E_FDIR_MAX_FLEXWORD_NUM; j++) { if (mask->word_mask & I40E_FLEX_WORD_MASK(j)) { @@ -1472,3 +1584,34 @@ i40e_fdir_ctrl_func(struct rte_eth_dev *dev, } return ret; } + +/* Restore flow director filter */ +void +i40e_fdir_filter_restore(struct i40e_pf *pf) +{ + struct rte_eth_dev *dev = I40E_VSI_TO_ETH_DEV(pf->main_vsi); + struct i40e_fdir_filter_list *fdir_list = &pf->fdir.fdir_list; + struct i40e_fdir_filter *f; +#ifdef RTE_LIBRTE_I40E_DEBUG_DRIVER + struct i40e_hw *hw = I40E_PF_TO_HW(pf); + uint32_t fdstat; + uint32_t guarant_cnt; /**< Number of filters in guaranteed spaces. */ + uint32_t best_cnt; /**< Number of filters in best effort spaces. */ +#endif /* RTE_LIBRTE_I40E_DEBUG_DRIVER */ + + TAILQ_FOREACH(f, fdir_list, rules) + i40e_add_del_fdir_filter(dev, &f->fdir, TRUE); + +#ifdef RTE_LIBRTE_I40E_DEBUG_DRIVER + fdstat = I40E_READ_REG(hw, I40E_PFQF_FDSTAT); + guarant_cnt = + (uint32_t)((fdstat & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK) >> + I40E_PFQF_FDSTAT_GUARANT_CNT_SHIFT); + best_cnt = + (uint32_t)((fdstat & I40E_PFQF_FDSTAT_BEST_CNT_MASK) >> + I40E_PFQF_FDSTAT_BEST_CNT_SHIFT); +#endif /* RTE_LIBRTE_I40E_DEBUG_DRIVER */ + + PMD_DRV_LOG(INFO, "FDIR: Guarant count: %d, Best count: %d", + guarant_cnt, best_cnt); +} diff --git a/src/dpdk/drivers/net/i40e/i40e_flow.c b/src/dpdk/drivers/net/i40e/i40e_flow.c new file mode 100644 index 00000000..76bb3320 --- /dev/null +++ b/src/dpdk/drivers/net/i40e/i40e_flow.c @@ -0,0 +1,1849 @@ +/*- + * BSD LICENSE + * + * Copyright (c) 2016 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/queue.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <stdarg.h> + +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_log.h> +#include <rte_memzone.h> +#include <rte_malloc.h> +#include <rte_eth_ctrl.h> +#include <rte_tailq.h> +#include <rte_flow_driver.h> + +#include "i40e_logs.h" +#include "base/i40e_type.h" +#include "base/i40e_prototype.h" +#include "i40e_ethdev.h" + +#define I40E_IPV4_TC_SHIFT 4 +#define I40E_IPV6_TC_MASK (0x00FF << I40E_IPV4_TC_SHIFT) +#define I40E_IPV6_FRAG_HEADER 44 +#define I40E_TENANT_ARRAY_NUM 3 +#define I40E_TCI_MASK 0xFFFF + +static int i40e_flow_validate(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); +static struct rte_flow *i40e_flow_create(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); +static int i40e_flow_destroy(struct rte_eth_dev *dev, + struct rte_flow *flow, + struct rte_flow_error *error); +static int i40e_flow_flush(struct rte_eth_dev *dev, + struct rte_flow_error *error); +static int +i40e_flow_parse_ethertype_pattern(struct rte_eth_dev *dev, + const struct rte_flow_item *pattern, + struct rte_flow_error *error, + struct rte_eth_ethertype_filter *filter); +static int i40e_flow_parse_ethertype_action(struct rte_eth_dev *dev, + const struct rte_flow_action *actions, + struct rte_flow_error *error, + struct rte_eth_ethertype_filter *filter); +static int i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + const struct rte_flow_item *pattern, + struct rte_flow_error *error, + struct rte_eth_fdir_filter *filter); +static int i40e_flow_parse_fdir_action(struct rte_eth_dev *dev, + const struct rte_flow_action *actions, + struct rte_flow_error *error, + struct rte_eth_fdir_filter *filter); +static int i40e_flow_parse_tunnel_pattern(__rte_unused struct rte_eth_dev *dev, + const struct rte_flow_item *pattern, + struct rte_flow_error *error, + struct rte_eth_tunnel_filter_conf *filter); +static int i40e_flow_parse_tunnel_action(struct rte_eth_dev *dev, + const struct rte_flow_action *actions, + struct rte_flow_error *error, + struct rte_eth_tunnel_filter_conf *filter); +static int i40e_flow_parse_attr(const struct rte_flow_attr *attr, + struct rte_flow_error *error); +static int i40e_flow_parse_ethertype_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + union i40e_filter_t *filter); +static int i40e_flow_parse_fdir_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + union i40e_filter_t *filter); +static int i40e_flow_parse_tunnel_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + union i40e_filter_t *filter); +static int i40e_flow_destroy_ethertype_filter(struct i40e_pf *pf, + struct i40e_ethertype_filter *filter); +static int i40e_flow_destroy_tunnel_filter(struct i40e_pf *pf, + struct i40e_tunnel_filter *filter); +static int i40e_flow_flush_fdir_filter(struct i40e_pf *pf); +static int i40e_flow_flush_ethertype_filter(struct i40e_pf *pf); +static int i40e_flow_flush_tunnel_filter(struct i40e_pf *pf); + +const struct rte_flow_ops i40e_flow_ops = { + .validate = i40e_flow_validate, + .create = i40e_flow_create, + .destroy = i40e_flow_destroy, + .flush = i40e_flow_flush, +}; + +union i40e_filter_t cons_filter; +enum rte_filter_type cons_filter_type = RTE_ETH_FILTER_NONE; + +/* Pattern matched ethertype filter */ +static enum rte_flow_item_type pattern_ethertype[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_END, +}; + +/* Pattern matched flow director filter */ +static enum rte_flow_item_type pattern_fdir_ipv4[] = { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv4_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv4_udp[] = { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv4_udp_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv4_tcp[] = { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_TCP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv4_tcp_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_TCP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv4_sctp[] = { + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_SCTP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv4_sctp_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_SCTP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6[] = { + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6_udp[] = { + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6_udp_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6_tcp[] = { + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_TCP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6_tcp_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_TCP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6_sctp[] = { + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_SCTP, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_fdir_ipv6_sctp_ext[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_SCTP, + RTE_FLOW_ITEM_TYPE_END, +}; + +/* Pattern matched tunnel filter */ +static enum rte_flow_item_type pattern_vxlan_1[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_VXLAN, + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_vxlan_2[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_VXLAN, + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_vxlan_3[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_VXLAN, + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_VLAN, + RTE_FLOW_ITEM_TYPE_END, +}; + +static enum rte_flow_item_type pattern_vxlan_4[] = { + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_IPV6, + RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_VXLAN, + RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_VLAN, + RTE_FLOW_ITEM_TYPE_END, +}; + +static struct i40e_valid_pattern i40e_supported_patterns[] = { + /* Ethertype */ + { pattern_ethertype, i40e_flow_parse_ethertype_filter }, + /* FDIR */ + { pattern_fdir_ipv4, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv4_ext, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv4_udp, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv4_udp_ext, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv4_tcp, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv4_tcp_ext, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv4_sctp, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv4_sctp_ext, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6_ext, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6_udp, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6_udp_ext, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6_tcp, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6_tcp_ext, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6_sctp, i40e_flow_parse_fdir_filter }, + { pattern_fdir_ipv6_sctp_ext, i40e_flow_parse_fdir_filter }, + /* tunnel */ + { pattern_vxlan_1, i40e_flow_parse_tunnel_filter }, + { pattern_vxlan_2, i40e_flow_parse_tunnel_filter }, + { pattern_vxlan_3, i40e_flow_parse_tunnel_filter }, + { pattern_vxlan_4, i40e_flow_parse_tunnel_filter }, +}; + +#define NEXT_ITEM_OF_ACTION(act, actions, index) \ + do { \ + act = actions + index; \ + while (act->type == RTE_FLOW_ACTION_TYPE_VOID) { \ + index++; \ + act = actions + index; \ + } \ + } while (0) + +/* Find the first VOID or non-VOID item pointer */ +static const struct rte_flow_item * +i40e_find_first_item(const struct rte_flow_item *item, bool is_void) +{ + bool is_find; + + while (item->type != RTE_FLOW_ITEM_TYPE_END) { + if (is_void) + is_find = item->type == RTE_FLOW_ITEM_TYPE_VOID; + else + is_find = item->type != RTE_FLOW_ITEM_TYPE_VOID; + if (is_find) + break; + item++; + } + return item; +} + +/* Skip all VOID items of the pattern */ +static void +i40e_pattern_skip_void_item(struct rte_flow_item *items, + const struct rte_flow_item *pattern) +{ + uint32_t cpy_count = 0; + const struct rte_flow_item *pb = pattern, *pe = pattern; + + for (;;) { + /* Find a non-void item first */ + pb = i40e_find_first_item(pb, false); + if (pb->type == RTE_FLOW_ITEM_TYPE_END) { + pe = pb; + break; + } + + /* Find a void item */ + pe = i40e_find_first_item(pb + 1, true); + + cpy_count = pe - pb; + rte_memcpy(items, pb, sizeof(struct rte_flow_item) * cpy_count); + + items += cpy_count; + + if (pe->type == RTE_FLOW_ITEM_TYPE_END) { + pb = pe; + break; + } + + pb = pe + 1; + } + /* Copy the END item. */ + rte_memcpy(items, pe, sizeof(struct rte_flow_item)); +} + +/* Check if the pattern matches a supported item type array */ +static bool +i40e_match_pattern(enum rte_flow_item_type *item_array, + struct rte_flow_item *pattern) +{ + struct rte_flow_item *item = pattern; + + while ((*item_array == item->type) && + (*item_array != RTE_FLOW_ITEM_TYPE_END)) { + item_array++; + item++; + } + + return (*item_array == RTE_FLOW_ITEM_TYPE_END && + item->type == RTE_FLOW_ITEM_TYPE_END); +} + +/* Find if there's parse filter function matched */ +static parse_filter_t +i40e_find_parse_filter_func(struct rte_flow_item *pattern) +{ + parse_filter_t parse_filter = NULL; + uint8_t i = 0; + + for (; i < RTE_DIM(i40e_supported_patterns); i++) { + if (i40e_match_pattern(i40e_supported_patterns[i].items, + pattern)) { + parse_filter = i40e_supported_patterns[i].parse_filter; + break; + } + } + + return parse_filter; +} + +/* Parse attributes */ +static int +i40e_flow_parse_attr(const struct rte_flow_attr *attr, + struct rte_flow_error *error) +{ + /* Must be input direction */ + if (!attr->ingress) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + attr, "Only support ingress."); + return -rte_errno; + } + + /* Not supported */ + if (attr->egress) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + attr, "Not support egress."); + return -rte_errno; + } + + /* Not supported */ + if (attr->priority) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + attr, "Not support priority."); + return -rte_errno; + } + + /* Not supported */ + if (attr->group) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, + attr, "Not support group."); + return -rte_errno; + } + + return 0; +} + +static uint16_t +i40e_get_outer_vlan(struct rte_eth_dev *dev) +{ + struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private); + int qinq = dev->data->dev_conf.rxmode.hw_vlan_extend; + uint64_t reg_r = 0; + uint16_t reg_id; + uint16_t tpid; + + if (qinq) + reg_id = 2; + else + reg_id = 3; + + i40e_aq_debug_read_register(hw, I40E_GL_SWT_L2TAGCTRL(reg_id), + ®_r, NULL); + + tpid = (reg_r >> I40E_GL_SWT_L2TAGCTRL_ETHERTYPE_SHIFT) & 0xFFFF; + + return tpid; +} + +/* 1. Last in item should be NULL as range is not supported. + * 2. Supported filter types: MAC_ETHTYPE and ETHTYPE. + * 3. SRC mac_addr mask should be 00:00:00:00:00:00. + * 4. DST mac_addr mask should be 00:00:00:00:00:00 or + * FF:FF:FF:FF:FF:FF + * 5. Ether_type mask should be 0xFFFF. + */ +static int +i40e_flow_parse_ethertype_pattern(struct rte_eth_dev *dev, + const struct rte_flow_item *pattern, + struct rte_flow_error *error, + struct rte_eth_ethertype_filter *filter) +{ + const struct rte_flow_item *item = pattern; + const struct rte_flow_item_eth *eth_spec; + const struct rte_flow_item_eth *eth_mask; + enum rte_flow_item_type item_type; + uint16_t outer_tpid; + + outer_tpid = i40e_get_outer_vlan(dev); + + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Not support range"); + return -rte_errno; + } + item_type = item->type; + switch (item_type) { + case RTE_FLOW_ITEM_TYPE_ETH: + eth_spec = (const struct rte_flow_item_eth *)item->spec; + eth_mask = (const struct rte_flow_item_eth *)item->mask; + /* Get the MAC info. */ + if (!eth_spec || !eth_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "NULL ETH spec/mask"); + return -rte_errno; + } + + /* Mask bits of source MAC address must be full of 0. + * Mask bits of destination MAC address must be full + * of 1 or full of 0. + */ + if (!is_zero_ether_addr(ð_mask->src) || + (!is_zero_ether_addr(ð_mask->dst) && + !is_broadcast_ether_addr(ð_mask->dst))) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid MAC_addr mask"); + return -rte_errno; + } + + if ((eth_mask->type & UINT16_MAX) != UINT16_MAX) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid ethertype mask"); + return -rte_errno; + } + + /* If mask bits of destination MAC address + * are full of 1, set RTE_ETHTYPE_FLAGS_MAC. + */ + if (is_broadcast_ether_addr(ð_mask->dst)) { + filter->mac_addr = eth_spec->dst; + filter->flags |= RTE_ETHTYPE_FLAGS_MAC; + } else { + filter->flags &= ~RTE_ETHTYPE_FLAGS_MAC; + } + filter->ether_type = rte_be_to_cpu_16(eth_spec->type); + + if (filter->ether_type == ETHER_TYPE_IPv4 || + filter->ether_type == ETHER_TYPE_IPv6 || + filter->ether_type == outer_tpid) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Unsupported ether_type in" + " control packet filter."); + return -rte_errno; + } + break; + default: + break; + } + } + + return 0; +} + +/* Ethertype action only supports QUEUE or DROP. */ +static int +i40e_flow_parse_ethertype_action(struct rte_eth_dev *dev, + const struct rte_flow_action *actions, + struct rte_flow_error *error, + struct rte_eth_ethertype_filter *filter) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + const struct rte_flow_action *act; + const struct rte_flow_action_queue *act_q; + uint32_t index = 0; + + /* Check if the first non-void action is QUEUE or DROP. */ + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE && + act->type != RTE_FLOW_ACTION_TYPE_DROP) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + if (act->type == RTE_FLOW_ACTION_TYPE_QUEUE) { + act_q = (const struct rte_flow_action_queue *)act->conf; + filter->queue = act_q->index; + if (filter->queue >= pf->dev_data->nb_rx_queues) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Invalid queue ID for" + " ethertype_filter."); + return -rte_errno; + } + } else { + filter->flags |= RTE_ETHTYPE_FLAGS_DROP; + } + + /* Check if the next non-void item is END */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + return 0; +} + +static int +i40e_flow_parse_ethertype_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + union i40e_filter_t *filter) +{ + struct rte_eth_ethertype_filter *ethertype_filter = + &filter->ethertype_filter; + int ret; + + ret = i40e_flow_parse_ethertype_pattern(dev, pattern, error, + ethertype_filter); + if (ret) + return ret; + + ret = i40e_flow_parse_ethertype_action(dev, actions, error, + ethertype_filter); + if (ret) + return ret; + + ret = i40e_flow_parse_attr(attr, error); + if (ret) + return ret; + + cons_filter_type = RTE_ETH_FILTER_ETHERTYPE; + + return ret; +} + +/* 1. Last in item should be NULL as range is not supported. + * 2. Supported flow type and input set: refer to array + * default_inset_table in i40e_ethdev.c. + * 3. Mask of fields which need to be matched should be + * filled with 1. + * 4. Mask of fields which needn't to be matched should be + * filled with 0. + */ +static int +i40e_flow_parse_fdir_pattern(struct rte_eth_dev *dev, + const struct rte_flow_item *pattern, + struct rte_flow_error *error, + struct rte_eth_fdir_filter *filter) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + const struct rte_flow_item *item = pattern; + const struct rte_flow_item_eth *eth_spec, *eth_mask; + const struct rte_flow_item_ipv4 *ipv4_spec, *ipv4_mask; + const struct rte_flow_item_ipv6 *ipv6_spec, *ipv6_mask; + const struct rte_flow_item_tcp *tcp_spec, *tcp_mask; + const struct rte_flow_item_udp *udp_spec, *udp_mask; + const struct rte_flow_item_sctp *sctp_spec, *sctp_mask; + const struct rte_flow_item_vf *vf_spec; + uint32_t flow_type = RTE_ETH_FLOW_UNKNOWN; + enum i40e_filter_pctype pctype; + uint64_t input_set = I40E_INSET_NONE; + uint16_t flag_offset; + enum rte_flow_item_type item_type; + enum rte_flow_item_type l3 = RTE_FLOW_ITEM_TYPE_END; + uint32_t j; + + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Not support range"); + return -rte_errno; + } + item_type = item->type; + switch (item_type) { + case RTE_FLOW_ITEM_TYPE_ETH: + eth_spec = (const struct rte_flow_item_eth *)item->spec; + eth_mask = (const struct rte_flow_item_eth *)item->mask; + if (eth_spec || eth_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid ETH spec/mask"); + return -rte_errno; + } + break; + case RTE_FLOW_ITEM_TYPE_IPV4: + l3 = RTE_FLOW_ITEM_TYPE_IPV4; + ipv4_spec = + (const struct rte_flow_item_ipv4 *)item->spec; + ipv4_mask = + (const struct rte_flow_item_ipv4 *)item->mask; + if (!ipv4_spec || !ipv4_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "NULL IPv4 spec/mask"); + return -rte_errno; + } + + /* Check IPv4 mask and update input set */ + if (ipv4_mask->hdr.version_ihl || + ipv4_mask->hdr.total_length || + ipv4_mask->hdr.packet_id || + ipv4_mask->hdr.fragment_offset || + ipv4_mask->hdr.hdr_checksum) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid IPv4 mask."); + return -rte_errno; + } + + if (ipv4_mask->hdr.src_addr == UINT32_MAX) + input_set |= I40E_INSET_IPV4_SRC; + if (ipv4_mask->hdr.dst_addr == UINT32_MAX) + input_set |= I40E_INSET_IPV4_DST; + if (ipv4_mask->hdr.type_of_service == UINT8_MAX) + input_set |= I40E_INSET_IPV4_TOS; + if (ipv4_mask->hdr.time_to_live == UINT8_MAX) + input_set |= I40E_INSET_IPV4_TTL; + if (ipv4_mask->hdr.next_proto_id == UINT8_MAX) + input_set |= I40E_INSET_IPV4_PROTO; + + /* Get filter info */ + flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_OTHER; + /* Check if it is fragment. */ + flag_offset = + rte_be_to_cpu_16(ipv4_spec->hdr.fragment_offset); + if (flag_offset & IPV4_HDR_OFFSET_MASK || + flag_offset & IPV4_HDR_MF_FLAG) + flow_type = RTE_ETH_FLOW_FRAG_IPV4; + + /* Get the filter info */ + filter->input.flow.ip4_flow.proto = + ipv4_spec->hdr.next_proto_id; + filter->input.flow.ip4_flow.tos = + ipv4_spec->hdr.type_of_service; + filter->input.flow.ip4_flow.ttl = + ipv4_spec->hdr.time_to_live; + filter->input.flow.ip4_flow.src_ip = + ipv4_spec->hdr.src_addr; + filter->input.flow.ip4_flow.dst_ip = + ipv4_spec->hdr.dst_addr; + + break; + case RTE_FLOW_ITEM_TYPE_IPV6: + l3 = RTE_FLOW_ITEM_TYPE_IPV6; + ipv6_spec = + (const struct rte_flow_item_ipv6 *)item->spec; + ipv6_mask = + (const struct rte_flow_item_ipv6 *)item->mask; + if (!ipv6_spec || !ipv6_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "NULL IPv6 spec/mask"); + return -rte_errno; + } + + /* Check IPv6 mask and update input set */ + if (ipv6_mask->hdr.payload_len) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid IPv6 mask"); + return -rte_errno; + } + + /* SCR and DST address of IPv6 shouldn't be masked */ + for (j = 0; j < RTE_DIM(ipv6_mask->hdr.src_addr); j++) { + if (ipv6_mask->hdr.src_addr[j] != UINT8_MAX || + ipv6_mask->hdr.dst_addr[j] != UINT8_MAX) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid IPv6 mask"); + return -rte_errno; + } + } + + input_set |= I40E_INSET_IPV6_SRC; + input_set |= I40E_INSET_IPV6_DST; + + if ((ipv6_mask->hdr.vtc_flow & + rte_cpu_to_be_16(I40E_IPV6_TC_MASK)) + == rte_cpu_to_be_16(I40E_IPV6_TC_MASK)) + input_set |= I40E_INSET_IPV6_TC; + if (ipv6_mask->hdr.proto == UINT8_MAX) + input_set |= I40E_INSET_IPV6_NEXT_HDR; + if (ipv6_mask->hdr.hop_limits == UINT8_MAX) + input_set |= I40E_INSET_IPV6_HOP_LIMIT; + + /* Get filter info */ + filter->input.flow.ipv6_flow.tc = + (uint8_t)(ipv6_spec->hdr.vtc_flow << + I40E_IPV4_TC_SHIFT); + filter->input.flow.ipv6_flow.proto = + ipv6_spec->hdr.proto; + filter->input.flow.ipv6_flow.hop_limits = + ipv6_spec->hdr.hop_limits; + + rte_memcpy(filter->input.flow.ipv6_flow.src_ip, + ipv6_spec->hdr.src_addr, 16); + rte_memcpy(filter->input.flow.ipv6_flow.dst_ip, + ipv6_spec->hdr.dst_addr, 16); + + /* Check if it is fragment. */ + if (ipv6_spec->hdr.proto == I40E_IPV6_FRAG_HEADER) + flow_type = RTE_ETH_FLOW_FRAG_IPV6; + else + flow_type = RTE_ETH_FLOW_NONFRAG_IPV6_OTHER; + break; + case RTE_FLOW_ITEM_TYPE_TCP: + tcp_spec = (const struct rte_flow_item_tcp *)item->spec; + tcp_mask = (const struct rte_flow_item_tcp *)item->mask; + if (!tcp_spec || !tcp_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "NULL TCP spec/mask"); + return -rte_errno; + } + + /* Check TCP mask and update input set */ + if (tcp_mask->hdr.sent_seq || + tcp_mask->hdr.recv_ack || + tcp_mask->hdr.data_off || + tcp_mask->hdr.tcp_flags || + tcp_mask->hdr.rx_win || + tcp_mask->hdr.cksum || + tcp_mask->hdr.tcp_urp) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid TCP mask"); + return -rte_errno; + } + + if (tcp_mask->hdr.src_port != UINT16_MAX || + tcp_mask->hdr.dst_port != UINT16_MAX) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid TCP mask"); + return -rte_errno; + } + + input_set |= I40E_INSET_SRC_PORT; + input_set |= I40E_INSET_DST_PORT; + + /* Get filter info */ + if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) + flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_TCP; + else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) + flow_type = RTE_ETH_FLOW_NONFRAG_IPV6_TCP; + + if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) { + filter->input.flow.tcp4_flow.src_port = + tcp_spec->hdr.src_port; + filter->input.flow.tcp4_flow.dst_port = + tcp_spec->hdr.dst_port; + } else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) { + filter->input.flow.tcp6_flow.src_port = + tcp_spec->hdr.src_port; + filter->input.flow.tcp6_flow.dst_port = + tcp_spec->hdr.dst_port; + } + break; + case RTE_FLOW_ITEM_TYPE_UDP: + udp_spec = (const struct rte_flow_item_udp *)item->spec; + udp_mask = (const struct rte_flow_item_udp *)item->mask; + if (!udp_spec || !udp_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "NULL UDP spec/mask"); + return -rte_errno; + } + + /* Check UDP mask and update input set*/ + if (udp_mask->hdr.dgram_len || + udp_mask->hdr.dgram_cksum) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid UDP mask"); + return -rte_errno; + } + + if (udp_mask->hdr.src_port != UINT16_MAX || + udp_mask->hdr.dst_port != UINT16_MAX) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid UDP mask"); + return -rte_errno; + } + + input_set |= I40E_INSET_SRC_PORT; + input_set |= I40E_INSET_DST_PORT; + + /* Get filter info */ + if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) + flow_type = + RTE_ETH_FLOW_NONFRAG_IPV4_UDP; + else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) + flow_type = + RTE_ETH_FLOW_NONFRAG_IPV6_UDP; + + if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) { + filter->input.flow.udp4_flow.src_port = + udp_spec->hdr.src_port; + filter->input.flow.udp4_flow.dst_port = + udp_spec->hdr.dst_port; + } else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) { + filter->input.flow.udp6_flow.src_port = + udp_spec->hdr.src_port; + filter->input.flow.udp6_flow.dst_port = + udp_spec->hdr.dst_port; + } + break; + case RTE_FLOW_ITEM_TYPE_SCTP: + sctp_spec = + (const struct rte_flow_item_sctp *)item->spec; + sctp_mask = + (const struct rte_flow_item_sctp *)item->mask; + if (!sctp_spec || !sctp_mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "NULL SCTP spec/mask"); + return -rte_errno; + } + + /* Check SCTP mask and update input set */ + if (sctp_mask->hdr.cksum) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid UDP mask"); + return -rte_errno; + } + + if (sctp_mask->hdr.src_port != UINT16_MAX || + sctp_mask->hdr.dst_port != UINT16_MAX || + sctp_mask->hdr.tag != UINT32_MAX) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid UDP mask"); + return -rte_errno; + } + input_set |= I40E_INSET_SRC_PORT; + input_set |= I40E_INSET_DST_PORT; + input_set |= I40E_INSET_SCTP_VT; + + /* Get filter info */ + if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) + flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_SCTP; + else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) + flow_type = RTE_ETH_FLOW_NONFRAG_IPV6_SCTP; + + if (l3 == RTE_FLOW_ITEM_TYPE_IPV4) { + filter->input.flow.sctp4_flow.src_port = + sctp_spec->hdr.src_port; + filter->input.flow.sctp4_flow.dst_port = + sctp_spec->hdr.dst_port; + filter->input.flow.sctp4_flow.verify_tag = + sctp_spec->hdr.tag; + } else if (l3 == RTE_FLOW_ITEM_TYPE_IPV6) { + filter->input.flow.sctp6_flow.src_port = + sctp_spec->hdr.src_port; + filter->input.flow.sctp6_flow.dst_port = + sctp_spec->hdr.dst_port; + filter->input.flow.sctp6_flow.verify_tag = + sctp_spec->hdr.tag; + } + break; + case RTE_FLOW_ITEM_TYPE_VF: + vf_spec = (const struct rte_flow_item_vf *)item->spec; + filter->input.flow_ext.is_vf = 1; + filter->input.flow_ext.dst_id = vf_spec->id; + if (filter->input.flow_ext.is_vf && + filter->input.flow_ext.dst_id >= pf->vf_num) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid VF ID for FDIR."); + return -rte_errno; + } + break; + default: + break; + } + } + + pctype = i40e_flowtype_to_pctype(flow_type); + if (pctype == 0 || pctype > I40E_FILTER_PCTYPE_L2_PAYLOAD) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "Unsupported flow type"); + return -rte_errno; + } + + if (input_set != i40e_get_default_input_set(pctype)) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, item, + "Invalid input set."); + return -rte_errno; + } + filter->input.flow_type = flow_type; + + return 0; +} + +/* Parse to get the action info of a FDIR filter. + * FDIR action supports QUEUE or (QUEUE + MARK). + */ +static int +i40e_flow_parse_fdir_action(struct rte_eth_dev *dev, + const struct rte_flow_action *actions, + struct rte_flow_error *error, + struct rte_eth_fdir_filter *filter) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + const struct rte_flow_action *act; + const struct rte_flow_action_queue *act_q; + const struct rte_flow_action_mark *mark_spec; + uint32_t index = 0; + + /* Check if the first non-void action is QUEUE or DROP. */ + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE && + act->type != RTE_FLOW_ACTION_TYPE_DROP) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + act, "Invalid action."); + return -rte_errno; + } + + act_q = (const struct rte_flow_action_queue *)act->conf; + filter->action.flex_off = 0; + if (act->type == RTE_FLOW_ACTION_TYPE_QUEUE) + filter->action.behavior = RTE_ETH_FDIR_ACCEPT; + else + filter->action.behavior = RTE_ETH_FDIR_REJECT; + + filter->action.report_status = RTE_ETH_FDIR_REPORT_ID; + filter->action.rx_queue = act_q->index; + + if (filter->action.rx_queue >= pf->dev_data->nb_rx_queues) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, act, + "Invalid queue ID for FDIR."); + return -rte_errno; + } + + /* Check if the next non-void item is MARK or END. */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_MARK && + act->type != RTE_FLOW_ACTION_TYPE_END) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + act, "Invalid action."); + return -rte_errno; + } + + if (act->type == RTE_FLOW_ACTION_TYPE_MARK) { + mark_spec = (const struct rte_flow_action_mark *)act->conf; + filter->soft_id = mark_spec->id; + + /* Check if the next non-void item is END */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Invalid action."); + return -rte_errno; + } + } + + return 0; +} + +static int +i40e_flow_parse_fdir_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + union i40e_filter_t *filter) +{ + struct rte_eth_fdir_filter *fdir_filter = + &filter->fdir_filter; + int ret; + + ret = i40e_flow_parse_fdir_pattern(dev, pattern, error, fdir_filter); + if (ret) + return ret; + + ret = i40e_flow_parse_fdir_action(dev, actions, error, fdir_filter); + if (ret) + return ret; + + ret = i40e_flow_parse_attr(attr, error); + if (ret) + return ret; + + cons_filter_type = RTE_ETH_FILTER_FDIR; + + if (dev->data->dev_conf.fdir_conf.mode != + RTE_FDIR_MODE_PERFECT) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, + "Check the mode in fdir_conf."); + return -rte_errno; + } + + return 0; +} + +/* Parse to get the action info of a tunnle filter + * Tunnel action only supports QUEUE. + */ +static int +i40e_flow_parse_tunnel_action(struct rte_eth_dev *dev, + const struct rte_flow_action *actions, + struct rte_flow_error *error, + struct rte_eth_tunnel_filter_conf *filter) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + const struct rte_flow_action *act; + const struct rte_flow_action_queue *act_q; + uint32_t index = 0; + + /* Check if the first non-void action is QUEUE. */ + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + act_q = (const struct rte_flow_action_queue *)act->conf; + filter->queue_id = act_q->index; + if (filter->queue_id >= pf->dev_data->nb_rx_queues) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Invalid queue ID for tunnel filter"); + return -rte_errno; + } + + /* Check if the next non-void item is END */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + return 0; +} + +static int +i40e_check_tenant_id_mask(const uint8_t *mask) +{ + uint32_t j; + int is_masked = 0; + + for (j = 0; j < I40E_TENANT_ARRAY_NUM; j++) { + if (*(mask + j) == UINT8_MAX) { + if (j > 0 && (*(mask + j) != *(mask + j - 1))) + return -EINVAL; + is_masked = 0; + } else if (*(mask + j) == 0) { + if (j > 0 && (*(mask + j) != *(mask + j - 1))) + return -EINVAL; + is_masked = 1; + } else { + return -EINVAL; + } + } + + return is_masked; +} + +/* 1. Last in item should be NULL as range is not supported. + * 2. Supported filter types: IMAC_IVLAN_TENID, IMAC_IVLAN, + * IMAC_TENID, OMAC_TENID_IMAC and IMAC. + * 3. Mask of fields which need to be matched should be + * filled with 1. + * 4. Mask of fields which needn't to be matched should be + * filled with 0. + */ +static int +i40e_flow_parse_vxlan_pattern(const struct rte_flow_item *pattern, + struct rte_flow_error *error, + struct rte_eth_tunnel_filter_conf *filter) +{ + const struct rte_flow_item *item = pattern; + const struct rte_flow_item_eth *eth_spec; + const struct rte_flow_item_eth *eth_mask; + const struct rte_flow_item_eth *o_eth_spec = NULL; + const struct rte_flow_item_eth *o_eth_mask = NULL; + const struct rte_flow_item_vxlan *vxlan_spec = NULL; + const struct rte_flow_item_vxlan *vxlan_mask = NULL; + const struct rte_flow_item_eth *i_eth_spec = NULL; + const struct rte_flow_item_eth *i_eth_mask = NULL; + const struct rte_flow_item_vlan *vlan_spec = NULL; + const struct rte_flow_item_vlan *vlan_mask = NULL; + bool is_vni_masked = 0; + enum rte_flow_item_type item_type; + bool vxlan_flag = 0; + + for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) { + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Not support range"); + return -rte_errno; + } + item_type = item->type; + switch (item_type) { + case RTE_FLOW_ITEM_TYPE_ETH: + eth_spec = (const struct rte_flow_item_eth *)item->spec; + eth_mask = (const struct rte_flow_item_eth *)item->mask; + if ((!eth_spec && eth_mask) || + (eth_spec && !eth_mask)) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid ether spec/mask"); + return -rte_errno; + } + + if (eth_spec && eth_mask) { + /* DST address of inner MAC shouldn't be masked. + * SRC address of Inner MAC should be masked. + */ + if (!is_broadcast_ether_addr(ð_mask->dst) || + !is_zero_ether_addr(ð_mask->src) || + eth_mask->type) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid ether spec/mask"); + return -rte_errno; + } + + if (!vxlan_flag) + rte_memcpy(&filter->outer_mac, + ð_spec->dst, + ETHER_ADDR_LEN); + else + rte_memcpy(&filter->inner_mac, + ð_spec->dst, + ETHER_ADDR_LEN); + } + + if (!vxlan_flag) { + o_eth_spec = eth_spec; + o_eth_mask = eth_mask; + } else { + i_eth_spec = eth_spec; + i_eth_mask = eth_mask; + } + + break; + case RTE_FLOW_ITEM_TYPE_VLAN: + vlan_spec = + (const struct rte_flow_item_vlan *)item->spec; + vlan_mask = + (const struct rte_flow_item_vlan *)item->mask; + if (vxlan_flag) { + vlan_spec = + (const struct rte_flow_item_vlan *)item->spec; + vlan_mask = + (const struct rte_flow_item_vlan *)item->mask; + if (!(vlan_spec && vlan_mask)) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid vlan item"); + return -rte_errno; + } + } else { + if (vlan_spec || vlan_mask) + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid vlan item"); + return -rte_errno; + } + break; + case RTE_FLOW_ITEM_TYPE_IPV4: + case RTE_FLOW_ITEM_TYPE_IPV6: + case RTE_FLOW_ITEM_TYPE_UDP: + /* IPv4/IPv6/UDP are used to describe protocol, + * spec amd mask should be NULL. + */ + if (item->spec || item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid IPv4 item"); + return -rte_errno; + } + break; + case RTE_FLOW_ITEM_TYPE_VXLAN: + vxlan_spec = + (const struct rte_flow_item_vxlan *)item->spec; + vxlan_mask = + (const struct rte_flow_item_vxlan *)item->mask; + /* Check if VXLAN item is used to describe protocol. + * If yes, both spec and mask should be NULL. + * If no, either spec or mask shouldn't be NULL. + */ + if ((!vxlan_spec && vxlan_mask) || + (vxlan_spec && !vxlan_mask)) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid VXLAN item"); + return -rte_errno; + } + + /* Check if VNI is masked. */ + if (vxlan_mask) { + is_vni_masked = + i40e_check_tenant_id_mask(vxlan_mask->vni); + if (is_vni_masked < 0) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, + "Invalid VNI mask"); + return -rte_errno; + } + } + vxlan_flag = 1; + break; + default: + break; + } + } + + /* Check specification and mask to get the filter type */ + if (vlan_spec && vlan_mask && + (vlan_mask->tci == rte_cpu_to_be_16(I40E_TCI_MASK))) { + /* If there's inner vlan */ + filter->inner_vlan = rte_be_to_cpu_16(vlan_spec->tci) + & I40E_TCI_MASK; + if (vxlan_spec && vxlan_mask && !is_vni_masked) { + /* If there's vxlan */ + rte_memcpy(&filter->tenant_id, vxlan_spec->vni, + RTE_DIM(vxlan_spec->vni)); + if (!o_eth_spec && !o_eth_mask && + i_eth_spec && i_eth_mask) + filter->filter_type = + RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID; + else { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, + "Invalid filter type"); + return -rte_errno; + } + } else if (!vxlan_spec && !vxlan_mask) { + /* If there's no vxlan */ + if (!o_eth_spec && !o_eth_mask && + i_eth_spec && i_eth_mask) + filter->filter_type = + RTE_TUNNEL_FILTER_IMAC_IVLAN; + else { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, + "Invalid filter type"); + return -rte_errno; + } + } else { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, + "Invalid filter type"); + return -rte_errno; + } + } else if ((!vlan_spec && !vlan_mask) || + (vlan_spec && vlan_mask && vlan_mask->tci == 0x0)) { + /* If there's no inner vlan */ + if (vxlan_spec && vxlan_mask && !is_vni_masked) { + /* If there's vxlan */ + rte_memcpy(&filter->tenant_id, vxlan_spec->vni, + RTE_DIM(vxlan_spec->vni)); + if (!o_eth_spec && !o_eth_mask && + i_eth_spec && i_eth_mask) + filter->filter_type = + RTE_TUNNEL_FILTER_IMAC_TENID; + else if (o_eth_spec && o_eth_mask && + i_eth_spec && i_eth_mask) + filter->filter_type = + RTE_TUNNEL_FILTER_OMAC_TENID_IMAC; + } else if (!vxlan_spec && !vxlan_mask) { + /* If there's no vxlan */ + if (!o_eth_spec && !o_eth_mask && + i_eth_spec && i_eth_mask) { + filter->filter_type = ETH_TUNNEL_FILTER_IMAC; + } else { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "Invalid filter type"); + return -rte_errno; + } + } else { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "Invalid filter type"); + return -rte_errno; + } + } else { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, NULL, + "Not supported by tunnel filter."); + return -rte_errno; + } + + filter->tunnel_type = RTE_TUNNEL_TYPE_VXLAN; + + return 0; +} + +static int +i40e_flow_parse_tunnel_pattern(__rte_unused struct rte_eth_dev *dev, + const struct rte_flow_item *pattern, + struct rte_flow_error *error, + struct rte_eth_tunnel_filter_conf *filter) +{ + int ret; + + ret = i40e_flow_parse_vxlan_pattern(pattern, error, filter); + + return ret; +} + +static int +i40e_flow_parse_tunnel_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + union i40e_filter_t *filter) +{ + struct rte_eth_tunnel_filter_conf *tunnel_filter = + &filter->tunnel_filter; + int ret; + + ret = i40e_flow_parse_tunnel_pattern(dev, pattern, + error, tunnel_filter); + if (ret) + return ret; + + ret = i40e_flow_parse_tunnel_action(dev, actions, error, tunnel_filter); + if (ret) + return ret; + + ret = i40e_flow_parse_attr(attr, error); + if (ret) + return ret; + + cons_filter_type = RTE_ETH_FILTER_TUNNEL; + + return ret; +} + +static int +i40e_flow_validate(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct rte_flow_item *items; /* internal pattern w/o VOID items */ + parse_filter_t parse_filter; + uint32_t item_num = 0; /* non-void item number of pattern*/ + uint32_t i = 0; + int ret; + + if (!pattern) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -rte_errno; + } + + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -rte_errno; + } + + memset(&cons_filter, 0, sizeof(cons_filter)); + + /* Get the non-void item number of pattern */ + while ((pattern + i)->type != RTE_FLOW_ITEM_TYPE_END) { + if ((pattern + i)->type != RTE_FLOW_ITEM_TYPE_VOID) + item_num++; + i++; + } + item_num++; + + items = rte_zmalloc("i40e_pattern", + item_num * sizeof(struct rte_flow_item), 0); + if (!items) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "No memory for PMD internal items."); + return -ENOMEM; + } + + i40e_pattern_skip_void_item(items, pattern); + + /* Find if there's matched parse filter function */ + parse_filter = i40e_find_parse_filter_func(items); + if (!parse_filter) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + pattern, "Unsupported pattern"); + return -rte_errno; + } + + ret = parse_filter(dev, attr, items, actions, error, &cons_filter); + + rte_free(items); + + return ret; +} + +static struct rte_flow * +i40e_flow_create(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + struct rte_flow *flow; + int ret; + + flow = rte_zmalloc("i40e_flow", sizeof(struct rte_flow), 0); + if (!flow) { + rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "Failed to allocate memory"); + return flow; + } + + ret = i40e_flow_validate(dev, attr, pattern, actions, error); + if (ret < 0) + return NULL; + + switch (cons_filter_type) { + case RTE_ETH_FILTER_ETHERTYPE: + ret = i40e_ethertype_filter_set(pf, + &cons_filter.ethertype_filter, 1); + if (ret) + goto free_flow; + flow->rule = TAILQ_LAST(&pf->ethertype.ethertype_list, + i40e_ethertype_filter_list); + break; + case RTE_ETH_FILTER_FDIR: + ret = i40e_add_del_fdir_filter(dev, + &cons_filter.fdir_filter, 1); + if (ret) + goto free_flow; + flow->rule = TAILQ_LAST(&pf->fdir.fdir_list, + i40e_fdir_filter_list); + break; + case RTE_ETH_FILTER_TUNNEL: + ret = i40e_dev_tunnel_filter_set(pf, + &cons_filter.tunnel_filter, 1); + if (ret) + goto free_flow; + flow->rule = TAILQ_LAST(&pf->tunnel.tunnel_list, + i40e_tunnel_filter_list); + break; + default: + goto free_flow; + } + + flow->filter_type = cons_filter_type; + TAILQ_INSERT_TAIL(&pf->flow_list, flow, node); + return flow; + +free_flow: + rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "Failed to create flow."); + rte_free(flow); + return NULL; +} + +static int +i40e_flow_destroy(struct rte_eth_dev *dev, + struct rte_flow *flow, + struct rte_flow_error *error) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + enum rte_filter_type filter_type = flow->filter_type; + int ret = 0; + + switch (filter_type) { + case RTE_ETH_FILTER_ETHERTYPE: + ret = i40e_flow_destroy_ethertype_filter(pf, + (struct i40e_ethertype_filter *)flow->rule); + break; + case RTE_ETH_FILTER_TUNNEL: + ret = i40e_flow_destroy_tunnel_filter(pf, + (struct i40e_tunnel_filter *)flow->rule); + break; + case RTE_ETH_FILTER_FDIR: + ret = i40e_add_del_fdir_filter(dev, + &((struct i40e_fdir_filter *)flow->rule)->fdir, 0); + break; + default: + PMD_DRV_LOG(WARNING, "Filter type (%d) not supported", + filter_type); + ret = -EINVAL; + break; + } + + if (!ret) { + TAILQ_REMOVE(&pf->flow_list, flow, node); + rte_free(flow); + } else + rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "Failed to destroy flow."); + + return ret; +} + +static int +i40e_flow_destroy_ethertype_filter(struct i40e_pf *pf, + struct i40e_ethertype_filter *filter) +{ + struct i40e_hw *hw = I40E_PF_TO_HW(pf); + struct i40e_ethertype_rule *ethertype_rule = &pf->ethertype; + struct i40e_ethertype_filter *node; + struct i40e_control_filter_stats stats; + uint16_t flags = 0; + int ret = 0; + + if (!(filter->flags & RTE_ETHTYPE_FLAGS_MAC)) + flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC; + if (filter->flags & RTE_ETHTYPE_FLAGS_DROP) + flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP; + flags |= I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TO_QUEUE; + + memset(&stats, 0, sizeof(stats)); + ret = i40e_aq_add_rem_control_packet_filter(hw, + filter->input.mac_addr.addr_bytes, + filter->input.ether_type, + flags, pf->main_vsi->seid, + filter->queue, 0, &stats, NULL); + if (ret < 0) + return ret; + + node = i40e_sw_ethertype_filter_lookup(ethertype_rule, &filter->input); + if (!node) + return -EINVAL; + + ret = i40e_sw_ethertype_filter_del(pf, &node->input); + + return ret; +} + +static int +i40e_flow_destroy_tunnel_filter(struct i40e_pf *pf, + struct i40e_tunnel_filter *filter) +{ + struct i40e_hw *hw = I40E_PF_TO_HW(pf); + struct i40e_vsi *vsi = pf->main_vsi; + struct i40e_aqc_add_remove_cloud_filters_element_data cld_filter; + struct i40e_tunnel_rule *tunnel_rule = &pf->tunnel; + struct i40e_tunnel_filter *node; + int ret = 0; + + memset(&cld_filter, 0, sizeof(cld_filter)); + ether_addr_copy((struct ether_addr *)&filter->input.outer_mac, + (struct ether_addr *)&cld_filter.outer_mac); + ether_addr_copy((struct ether_addr *)&filter->input.inner_mac, + (struct ether_addr *)&cld_filter.inner_mac); + cld_filter.inner_vlan = filter->input.inner_vlan; + cld_filter.flags = filter->input.flags; + cld_filter.tenant_id = filter->input.tenant_id; + cld_filter.queue_number = filter->queue; + + ret = i40e_aq_remove_cloud_filters(hw, vsi->seid, + &cld_filter, 1); + if (ret < 0) + return ret; + + node = i40e_sw_tunnel_filter_lookup(tunnel_rule, &filter->input); + if (!node) + return -EINVAL; + + ret = i40e_sw_tunnel_filter_del(pf, &node->input); + + return ret; +} + +static int +i40e_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error) +{ + struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private); + int ret; + + ret = i40e_flow_flush_fdir_filter(pf); + if (ret) { + rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "Failed to flush FDIR flows."); + return -rte_errno; + } + + ret = i40e_flow_flush_ethertype_filter(pf); + if (ret) { + rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "Failed to ethertype flush flows."); + return -rte_errno; + } + + ret = i40e_flow_flush_tunnel_filter(pf); + if (ret) { + rte_flow_error_set(error, -ret, + RTE_FLOW_ERROR_TYPE_HANDLE, NULL, + "Failed to flush tunnel flows."); + return -rte_errno; + } + + return ret; +} + +static int +i40e_flow_flush_fdir_filter(struct i40e_pf *pf) +{ + struct rte_eth_dev *dev = pf->adapter->eth_dev; + struct i40e_fdir_info *fdir_info = &pf->fdir; + struct i40e_fdir_filter *fdir_filter; + struct rte_flow *flow; + void *temp; + int ret; + + ret = i40e_fdir_flush(dev); + if (!ret) { + /* Delete FDIR filters in FDIR list. */ + while ((fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list))) { + ret = i40e_sw_fdir_filter_del(pf, + &fdir_filter->fdir.input); + if (ret < 0) + return ret; + } + + /* Delete FDIR flows in flow list. */ + TAILQ_FOREACH_SAFE(flow, &pf->flow_list, node, temp) { + if (flow->filter_type == RTE_ETH_FILTER_FDIR) { + TAILQ_REMOVE(&pf->flow_list, flow, node); + rte_free(flow); + } + } + } + + return ret; +} + +/* Flush all ethertype filters */ +static int +i40e_flow_flush_ethertype_filter(struct i40e_pf *pf) +{ + struct i40e_ethertype_filter_list + *ethertype_list = &pf->ethertype.ethertype_list; + struct i40e_ethertype_filter *filter; + struct rte_flow *flow; + void *temp; + int ret = 0; + + while ((filter = TAILQ_FIRST(ethertype_list))) { + ret = i40e_flow_destroy_ethertype_filter(pf, filter); + if (ret) + return ret; + } + + /* Delete ethertype flows in flow list. */ + TAILQ_FOREACH_SAFE(flow, &pf->flow_list, node, temp) { + if (flow->filter_type == RTE_ETH_FILTER_ETHERTYPE) { + TAILQ_REMOVE(&pf->flow_list, flow, node); + rte_free(flow); + } + } + + return ret; +} + +/* Flush all tunnel filters */ +static int +i40e_flow_flush_tunnel_filter(struct i40e_pf *pf) +{ + struct i40e_tunnel_filter_list + *tunnel_list = &pf->tunnel.tunnel_list; + struct i40e_tunnel_filter *filter; + struct rte_flow *flow; + void *temp; + int ret = 0; + + while ((filter = TAILQ_FIRST(tunnel_list))) { + ret = i40e_flow_destroy_tunnel_filter(pf, filter); + if (ret) + return ret; + } + + /* Delete tunnel flows in flow list. */ + TAILQ_FOREACH_SAFE(flow, &pf->flow_list, node, temp) { + if (flow->filter_type == RTE_ETH_FILTER_TUNNEL) { + TAILQ_REMOVE(&pf->flow_list, flow, node); + rte_free(flow); + } + } + + return ret; +} diff --git a/src/dpdk/drivers/net/i40e/i40e_pf.c b/src/dpdk/drivers/net/i40e/i40e_pf.c index d5b2d450..f771dfb6 100644 --- a/src/dpdk/drivers/net/i40e/i40e_pf.c +++ b/src/dpdk/drivers/net/i40e/i40e_pf.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -55,6 +55,7 @@ #include "i40e_ethdev.h" #include "i40e_rxtx.h" #include "i40e_pf.h" +#include "rte_pmd_i40e.h" #define I40E_CFG_CRCSTRIP_DEFAULT 1 @@ -138,7 +139,7 @@ i40e_pf_host_vf_reset(struct i40e_pf_vf *vf, bool do_hw_reset) abs_vf_id = vf_id + hw->func_caps.vf_base_id; /* Notify VF that we are in VFR progress */ - I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_PF_VFR_INPROGRESS); + I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_VFR_INPROGRESS); /* * If require a SW VF reset, a VFLR interrupt will be generated, @@ -219,7 +220,7 @@ i40e_pf_host_vf_reset(struct i40e_pf_vf *vf, bool do_hw_reset) } /* Reset done, Set COMPLETE flag and clear reset bit */ - I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_PF_VFR_COMPLETED); + I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_VFR_COMPLETED); val = I40E_READ_REG(hw, I40E_VPGEN_VFRTRIG(vf_id)); val &= ~I40E_VPGEN_VFRTRIG_VFSWR_MASK; I40E_WRITE_REG(hw, I40E_VPGEN_VFRTRIG(vf_id), val); @@ -247,10 +248,12 @@ i40e_pf_host_vf_reset(struct i40e_pf_vf *vf, bool do_hw_reset) return -EFAULT; } + I40E_WRITE_REG(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_VFR_VFACTIVE); + return ret; } -static int +int i40e_pf_host_send_msg_to_vf(struct i40e_pf_vf *vf, uint32_t opcode, uint32_t retval, @@ -272,14 +275,30 @@ i40e_pf_host_send_msg_to_vf(struct i40e_pf_vf *vf, } static void -i40e_pf_host_process_cmd_version(struct i40e_pf_vf *vf) +i40e_pf_host_process_cmd_version(struct i40e_pf_vf *vf, bool b_op) { struct i40e_virtchnl_version_info info; - info.major = I40E_DPDK_VERSION_MAJOR; - info.minor = I40E_DPDK_VERSION_MINOR; - i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_VERSION, - I40E_SUCCESS, (uint8_t *)&info, sizeof(info)); + /* Respond like a Linux PF host in order to support both DPDK VF and + * Linux VF driver. The expense is original DPDK host specific feature + * like CFG_VLAN_PVID and CONFIG_VSI_QUEUES_EXT will not available. + * + * DPDK VF also can't identify host driver by version number returned. + * It always assume talking with Linux PF. + */ + info.major = I40E_VIRTCHNL_VERSION_MAJOR; + info.minor = I40E_VIRTCHNL_VERSION_MINOR_NO_VF_CAPS; + + if (b_op) + i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_VERSION, + I40E_SUCCESS, + (uint8_t *)&info, + sizeof(info)); + else + i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_VERSION, + I40E_NOT_SUPPORTED, + (uint8_t *)&info, + sizeof(info)); } static int @@ -292,13 +311,20 @@ i40e_pf_host_process_cmd_reset_vf(struct i40e_pf_vf *vf) } static int -i40e_pf_host_process_cmd_get_vf_resource(struct i40e_pf_vf *vf) +i40e_pf_host_process_cmd_get_vf_resource(struct i40e_pf_vf *vf, bool b_op) { struct i40e_virtchnl_vf_resource *vf_res = NULL; struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf); uint32_t len = 0; int ret = I40E_SUCCESS; + if (!b_op) { + i40e_pf_host_send_msg_to_vf(vf, + I40E_VIRTCHNL_OP_GET_VF_RESOURCES, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + /* only have 1 VSI by default */ len = sizeof(struct i40e_virtchnl_vf_resource) + I40E_DEFAULT_VF_VSI_NUM * @@ -321,8 +347,7 @@ i40e_pf_host_process_cmd_get_vf_resource(struct i40e_pf_vf *vf) /* Change below setting if PF host can support more VSIs for VF */ vf_res->vsi_res[0].vsi_type = I40E_VSI_SRIOV; - /* As assume Vf only has single VSI now, always return 0 */ - vf_res->vsi_res[0].vsi_id = 0; + vf_res->vsi_res[0].vsi_id = vf->vsi->vsi_id; vf_res->vsi_res[0].num_queue_pairs = vf->vsi->nb_qps; ether_addr_copy(&vf->mac_addr, (struct ether_addr *)vf_res->vsi_res[0].default_mac_addr); @@ -393,10 +418,12 @@ i40e_pf_host_hmc_config_txq(struct i40e_hw *hw, /* clear the context structure first */ memset(&tx_ctx, 0, sizeof(tx_ctx)); - tx_ctx.new_context = 1; tx_ctx.base = txq->dma_ring_addr / I40E_QUEUE_BASE_ADDR_UNIT; tx_ctx.qlen = txq->ring_len; tx_ctx.rdylist = rte_le_to_cpu_16(vf->vsi->info.qs_handle[0]); + tx_ctx.head_wb_ena = txq->headwb_enabled; + tx_ctx.head_wb_addr = txq->dma_headwb_addr; + err = i40e_clear_lan_tx_queue_context(hw, abs_queue_id); if (err != I40E_SUCCESS) return err; @@ -423,7 +450,8 @@ i40e_pf_host_hmc_config_txq(struct i40e_hw *hw, static int i40e_pf_host_process_cmd_config_vsi_queues(struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf); struct i40e_vsi *vsi = vf->vsi; @@ -432,11 +460,18 @@ i40e_pf_host_process_cmd_config_vsi_queues(struct i40e_pf_vf *vf, struct i40e_virtchnl_queue_pair_info *vc_qpi; int i, ret = I40E_SUCCESS; + if (!b_op) { + i40e_pf_host_send_msg_to_vf(vf, + I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (!msg || vc_vqci->num_queue_pairs > vsi->nb_qps || vc_vqci->num_queue_pairs > I40E_MAX_VSI_QP || msglen < I40E_VIRTCHNL_CONFIG_VSI_QUEUES_SIZE(vc_vqci, vc_vqci->num_queue_pairs)) { - PMD_DRV_LOG(ERR, "vsi_queue_config_info argument wrong\n"); + PMD_DRV_LOG(ERR, "vsi_queue_config_info argument wrong"); ret = I40E_ERR_PARAM; goto send_msg; } @@ -482,7 +517,8 @@ send_msg: static int i40e_pf_host_process_cmd_config_vsi_queues_ext(struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf); struct i40e_vsi *vsi = vf->vsi; @@ -491,11 +527,19 @@ i40e_pf_host_process_cmd_config_vsi_queues_ext(struct i40e_pf_vf *vf, struct i40e_virtchnl_queue_pair_ext_info *vc_qpei; int i, ret = I40E_SUCCESS; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (!msg || vc_vqcei->num_queue_pairs > vsi->nb_qps || vc_vqcei->num_queue_pairs > I40E_MAX_VSI_QP || msglen < I40E_VIRTCHNL_CONFIG_VSI_QUEUES_SIZE(vc_vqcei, vc_vqcei->num_queue_pairs)) { - PMD_DRV_LOG(ERR, "vsi_queue_config_ext_info argument wrong\n"); + PMD_DRV_LOG(ERR, "vsi_queue_config_ext_info argument wrong"); ret = I40E_ERR_PARAM; goto send_msg; } @@ -537,13 +581,125 @@ send_msg: return ret; } +static void +i40e_pf_config_irq_link_list(struct i40e_pf_vf *vf, + struct i40e_virtchnl_vector_map *vvm) +{ +#define BITS_PER_CHAR 8 + uint64_t linklistmap = 0, tempmap; + struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf); + uint16_t qid; + bool b_first_q = true; + enum i40e_queue_type qtype; + uint16_t vector_id; + uint32_t reg, reg_idx; + uint16_t itr_idx = 0, i; + + vector_id = vvm->vector_id; + /* setup the head */ + if (!vector_id) + reg_idx = I40E_VPINT_LNKLST0(vf->vf_idx); + else + reg_idx = I40E_VPINT_LNKLSTN( + ((hw->func_caps.num_msix_vectors_vf - 1) * vf->vf_idx) + + (vector_id - 1)); + + if (vvm->rxq_map == 0 && vvm->txq_map == 0) { + I40E_WRITE_REG(hw, reg_idx, + I40E_VPINT_LNKLST0_FIRSTQ_INDX_MASK); + goto cfg_irq_done; + } + + /* sort all rx and tx queues */ + tempmap = vvm->rxq_map; + for (i = 0; i < sizeof(vvm->rxq_map) * BITS_PER_CHAR; i++) { + if (tempmap & 0x1) + linklistmap |= (1 << (2 * i)); + tempmap >>= 1; + } + + tempmap = vvm->txq_map; + for (i = 0; i < sizeof(vvm->txq_map) * BITS_PER_CHAR; i++) { + if (tempmap & 0x1) + linklistmap |= (1 << (2 * i + 1)); + tempmap >>= 1; + } + + /* Link all rx and tx queues into a chained list */ + tempmap = linklistmap; + i = 0; + b_first_q = true; + do { + if (tempmap & 0x1) { + qtype = (enum i40e_queue_type)(i % 2); + qid = vf->vsi->base_queue + i / 2; + if (b_first_q) { + /* This is header */ + b_first_q = false; + reg = ((qtype << + I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) + | qid); + } else { + /* element in the link list */ + reg = (vector_id) | + (qtype << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | + (qid << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | + BIT(I40E_QINT_RQCTL_CAUSE_ENA_SHIFT) | + (itr_idx << I40E_QINT_RQCTL_ITR_INDX_SHIFT); + } + I40E_WRITE_REG(hw, reg_idx, reg); + /* find next register to program */ + switch (qtype) { + case I40E_QUEUE_TYPE_RX: + reg_idx = I40E_QINT_RQCTL(qid); + itr_idx = vvm->rxitr_idx; + break; + case I40E_QUEUE_TYPE_TX: + reg_idx = I40E_QINT_TQCTL(qid); + itr_idx = vvm->txitr_idx; + break; + default: + break; + } + } + i++; + tempmap >>= 1; + } while (tempmap); + + /* Terminate the link list */ + reg = (vector_id) | + (0 << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | + (0x7FF << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | + BIT(I40E_QINT_RQCTL_CAUSE_ENA_SHIFT) | + (itr_idx << I40E_QINT_RQCTL_ITR_INDX_SHIFT); + I40E_WRITE_REG(hw, reg_idx, reg); + +cfg_irq_done: + I40E_WRITE_FLUSH(hw); +} + static int i40e_pf_host_process_cmd_config_irq_map(struct i40e_pf_vf *vf, - uint8_t *msg, uint16_t msglen) + uint8_t *msg, uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; + struct i40e_pf *pf = vf->pf; + struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf); struct i40e_virtchnl_irq_map_info *irqmap = (struct i40e_virtchnl_irq_map_info *)msg; + struct i40e_virtchnl_vector_map *map; + int i; + uint16_t vector_id; + unsigned long qbit_max; + + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } if (msg == NULL || msglen < sizeof(struct i40e_virtchnl_irq_map_info)) { PMD_DRV_LOG(ERR, "buffer too short"); @@ -551,23 +707,46 @@ i40e_pf_host_process_cmd_config_irq_map(struct i40e_pf_vf *vf, goto send_msg; } - /* Assume VF only have 1 vector to bind all queues */ - if (irqmap->num_vectors != 1) { - PMD_DRV_LOG(ERR, "DKDK host only support 1 vector"); - ret = I40E_ERR_PARAM; + /* PF host will support both DPDK VF or Linux VF driver, identify by + * number of vectors requested. + */ + + /* DPDK VF only requires single vector */ + if (irqmap->num_vectors == 1) { + /* This MSIX intr store the intr in VF range */ + vf->vsi->msix_intr = irqmap->vecmap[0].vector_id; + vf->vsi->nb_msix = irqmap->num_vectors; + vf->vsi->nb_used_qps = vf->vsi->nb_qps; + + /* Don't care how the TX/RX queue mapping with this vector. + * Link all VF RX queues together. Only did mapping work. + * VF can disable/enable the intr by itself. + */ + i40e_vsi_queues_bind_intr(vf->vsi); goto send_msg; } - /* This MSIX intr store the intr in VF range */ - vf->vsi->msix_intr = irqmap->vecmap[0].vector_id; - vf->vsi->nb_msix = irqmap->num_vectors; - vf->vsi->nb_used_qps = vf->vsi->nb_qps; + /* Then, it's Linux VF driver */ + qbit_max = 1 << pf->vf_nb_qp_max; + for (i = 0; i < irqmap->num_vectors; i++) { + map = &irqmap->vecmap[i]; + + vector_id = map->vector_id; + /* validate msg params */ + if (vector_id >= hw->func_caps.num_msix_vectors_vf) { + ret = I40E_ERR_PARAM; + goto send_msg; + } + + if ((map->rxq_map < qbit_max) && (map->txq_map < qbit_max)) { + i40e_pf_config_irq_link_list(vf, map); + } else { + /* configured queue size excceed limit */ + ret = I40E_ERR_PARAM; + goto send_msg; + } + } - /* Don't care how the TX/RX queue mapping with this vector. - * Link all VF RX queues together. Only did mapping work. - * VF can disable/enable the intr by itself. - */ - i40e_vsi_queues_bind_intr(vf->vsi); send_msg: i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP, ret, NULL, 0); @@ -646,12 +825,21 @@ send_msg: static int i40e_pf_host_process_cmd_disable_queues(struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_queue_select *q_sel = (struct i40e_virtchnl_queue_select *)msg; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_DISABLE_QUEUES, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (msg == NULL || msglen != sizeof(*q_sel)) { ret = I40E_ERR_PARAM; goto send_msg; @@ -669,7 +857,8 @@ send_msg: static int i40e_pf_host_process_cmd_add_ether_address(struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_ether_addr_list *addr_list = @@ -678,6 +867,14 @@ i40e_pf_host_process_cmd_add_ether_address(struct i40e_pf_vf *vf, int i; struct ether_addr *mac; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + memset(&filter, 0 , sizeof(struct i40e_mac_filter_info)); if (msg == NULL || msglen <= sizeof(*addr_list)) { @@ -690,8 +887,8 @@ i40e_pf_host_process_cmd_add_ether_address(struct i40e_pf_vf *vf, mac = (struct ether_addr *)(addr_list->list[i].addr); (void)rte_memcpy(&filter.mac_addr, mac, ETHER_ADDR_LEN); filter.filter_type = RTE_MACVLAN_PERFECT_MATCH; - if(!is_valid_assigned_ether_addr(mac) || - i40e_vsi_add_mac(vf->vsi, &filter)) { + if (is_zero_ether_addr(mac) || + i40e_vsi_add_mac(vf->vsi, &filter)) { ret = I40E_ERR_INVALID_MAC_ADDR; goto send_msg; } @@ -707,7 +904,8 @@ send_msg: static int i40e_pf_host_process_cmd_del_ether_address(struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_ether_addr_list *addr_list = @@ -715,6 +913,14 @@ i40e_pf_host_process_cmd_del_ether_address(struct i40e_pf_vf *vf, int i; struct ether_addr *mac; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (msg == NULL || msglen <= sizeof(*addr_list)) { PMD_DRV_LOG(ERR, "delete_ether_address argument too short"); ret = I40E_ERR_PARAM; @@ -723,7 +929,7 @@ i40e_pf_host_process_cmd_del_ether_address(struct i40e_pf_vf *vf, for (i = 0; i < addr_list->num_elements; i++) { mac = (struct ether_addr *)(addr_list->list[i].addr); - if(!is_valid_assigned_ether_addr(mac) || + if(is_zero_ether_addr(mac) || i40e_vsi_delete_mac(vf->vsi, mac)) { ret = I40E_ERR_INVALID_MAC_ADDR; goto send_msg; @@ -739,7 +945,8 @@ send_msg: static int i40e_pf_host_process_cmd_add_vlan(struct i40e_pf_vf *vf, - uint8_t *msg, uint16_t msglen) + uint8_t *msg, uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_vlan_filter_list *vlan_filter_list = @@ -747,6 +954,14 @@ i40e_pf_host_process_cmd_add_vlan(struct i40e_pf_vf *vf, int i; uint16_t *vid; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_ADD_VLAN, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (msg == NULL || msglen <= sizeof(*vlan_filter_list)) { PMD_DRV_LOG(ERR, "add_vlan argument too short"); ret = I40E_ERR_PARAM; @@ -771,7 +986,8 @@ send_msg: static int i40e_pf_host_process_cmd_del_vlan(struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_vlan_filter_list *vlan_filter_list = @@ -779,6 +995,14 @@ i40e_pf_host_process_cmd_del_vlan(struct i40e_pf_vf *vf, int i; uint16_t *vid; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_DEL_VLAN, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (msg == NULL || msglen <= sizeof(*vlan_filter_list)) { PMD_DRV_LOG(ERR, "delete_vlan argument too short"); ret = I40E_ERR_PARAM; @@ -803,7 +1027,8 @@ static int i40e_pf_host_process_cmd_config_promisc_mode( struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_promisc_info *promisc = @@ -811,6 +1036,14 @@ i40e_pf_host_process_cmd_config_promisc_mode( struct i40e_hw *hw = I40E_PF_TO_HW(vf->pf); bool unicast = FALSE, multicast = FALSE; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (msg == NULL || msglen != sizeof(*promisc)) { ret = I40E_ERR_PARAM; goto send_msg; @@ -836,39 +1069,43 @@ send_msg: } static int -i40e_pf_host_process_cmd_get_stats(struct i40e_pf_vf *vf) +i40e_pf_host_process_cmd_get_stats(struct i40e_pf_vf *vf, bool b_op) { i40e_update_vsi_stats(vf->vsi); - i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_STATS, - I40E_SUCCESS, (uint8_t *)&vf->vsi->eth_stats, - sizeof(vf->vsi->eth_stats)); + if (b_op) + i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_STATS, + I40E_SUCCESS, + (uint8_t *)&vf->vsi->eth_stats, + sizeof(vf->vsi->eth_stats)); + else + i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_STATS, + I40E_NOT_SUPPORTED, + (uint8_t *)&vf->vsi->eth_stats, + sizeof(vf->vsi->eth_stats)); return I40E_SUCCESS; } -static void -i40e_pf_host_process_cmd_get_link_status(struct i40e_pf_vf *vf) -{ - struct rte_eth_dev *dev = I40E_VSI_TO_ETH_DEV(vf->pf->main_vsi); - - /* Update link status first to acquire latest link change */ - i40e_dev_link_update(dev, 1); - i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_LINK_STAT, - I40E_SUCCESS, (uint8_t *)&dev->data->dev_link, - sizeof(struct rte_eth_link)); -} - static int i40e_pf_host_process_cmd_cfg_vlan_offload( struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_vlan_offload_info *offload = (struct i40e_virtchnl_vlan_offload_info *)msg; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_CFG_VLAN_OFFLOAD, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (msg == NULL || msglen != sizeof(*offload)) { ret = I40E_ERR_PARAM; goto send_msg; @@ -889,12 +1126,21 @@ send_msg: static int i40e_pf_host_process_cmd_cfg_pvid(struct i40e_pf_vf *vf, uint8_t *msg, - uint16_t msglen) + uint16_t msglen, + bool b_op) { int ret = I40E_SUCCESS; struct i40e_virtchnl_pvid_info *tpid_info = (struct i40e_virtchnl_pvid_info *)msg; + if (!b_op) { + i40e_pf_host_send_msg_to_vf( + vf, + I40E_VIRTCHNL_OP_CFG_VLAN_PVID, + I40E_NOT_SUPPORTED, NULL, 0); + return ret; + } + if (msg == NULL || msglen != sizeof(*tpid_info)) { ret = I40E_ERR_PARAM; goto send_msg; @@ -910,6 +1156,20 @@ send_msg: } void +i40e_notify_vf_link_status(struct rte_eth_dev *dev, struct i40e_pf_vf *vf) +{ + struct i40e_virtchnl_pf_event event; + + event.event = I40E_VIRTCHNL_EVENT_LINK_CHANGE; + event.event_data.link_event.link_status = + dev->data->dev_link.link_status; + event.event_data.link_event.link_speed = + (enum i40e_aq_link_speed)dev->data->dev_link.link_speed; + i40e_pf_host_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_EVENT, + I40E_SUCCESS, (uint8_t *)&event, sizeof(event)); +} + +void i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, uint16_t abs_vf_id, uint32_t opcode, __rte_unused uint32_t retval, @@ -921,6 +1181,8 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, struct i40e_pf_vf *vf; /* AdminQ will pass absolute VF id, transfer to internal vf id */ uint16_t vf_id = abs_vf_id - hw->func_caps.vf_base_id; + struct rte_pmd_i40e_mb_event_param cb_param; + bool b_op = TRUE; if (vf_id > pf->vf_num - 1 || !pf->vfs) { PMD_DRV_LOG(ERR, "invalid argument"); @@ -935,10 +1197,35 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, return; } + /** + * initialise structure to send to user application + * will return response from user in retval field + */ + cb_param.retval = RTE_PMD_I40E_MB_EVENT_PROCEED; + cb_param.vfid = vf_id; + cb_param.msg_type = opcode; + cb_param.msg = (void *)msg; + cb_param.msglen = msglen; + + /** + * Ask user application if we're allowed to perform those functions. + * If we get cb_param.retval == RTE_PMD_I40E_MB_EVENT_PROCEED, + * then business as usual. + * If RTE_PMD_I40E_MB_EVENT_NOOP_ACK or RTE_PMD_I40E_MB_EVENT_NOOP_NACK, + * do nothing and send not_supported to VF. As PF must send a response + * to VF and ACK/NACK is not defined. + */ + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_VF_MBOX, &cb_param); + if (cb_param.retval != RTE_PMD_I40E_MB_EVENT_PROCEED) { + PMD_DRV_LOG(WARNING, "VF to PF message(%d) is not permitted!", + opcode); + b_op = FALSE; + } + switch (opcode) { case I40E_VIRTCHNL_OP_VERSION : PMD_DRV_LOG(INFO, "OP_VERSION received"); - i40e_pf_host_process_cmd_version(vf); + i40e_pf_host_process_cmd_version(vf, b_op); break; case I40E_VIRTCHNL_OP_RESET_VF : PMD_DRV_LOG(INFO, "OP_RESET_VF received"); @@ -946,64 +1233,72 @@ i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, break; case I40E_VIRTCHNL_OP_GET_VF_RESOURCES: PMD_DRV_LOG(INFO, "OP_GET_VF_RESOURCES received"); - i40e_pf_host_process_cmd_get_vf_resource(vf); + i40e_pf_host_process_cmd_get_vf_resource(vf, b_op); break; case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES: PMD_DRV_LOG(INFO, "OP_CONFIG_VSI_QUEUES received"); - i40e_pf_host_process_cmd_config_vsi_queues(vf, msg, msglen); + i40e_pf_host_process_cmd_config_vsi_queues(vf, msg, + msglen, b_op); break; case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT: PMD_DRV_LOG(INFO, "OP_CONFIG_VSI_QUEUES_EXT received"); i40e_pf_host_process_cmd_config_vsi_queues_ext(vf, msg, - msglen); + msglen, b_op); break; case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP: PMD_DRV_LOG(INFO, "OP_CONFIG_IRQ_MAP received"); - i40e_pf_host_process_cmd_config_irq_map(vf, msg, msglen); + i40e_pf_host_process_cmd_config_irq_map(vf, msg, msglen, b_op); break; case I40E_VIRTCHNL_OP_ENABLE_QUEUES: PMD_DRV_LOG(INFO, "OP_ENABLE_QUEUES received"); - i40e_pf_host_process_cmd_enable_queues(vf, msg, msglen); + if (b_op) { + i40e_pf_host_process_cmd_enable_queues(vf, msg, msglen); + i40e_notify_vf_link_status(dev, vf); + } else { + i40e_pf_host_send_msg_to_vf( + vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES, + I40E_NOT_SUPPORTED, NULL, 0); + } break; case I40E_VIRTCHNL_OP_DISABLE_QUEUES: PMD_DRV_LOG(INFO, "OP_DISABLE_QUEUE received"); - i40e_pf_host_process_cmd_disable_queues(vf, msg, msglen); + i40e_pf_host_process_cmd_disable_queues(vf, msg, msglen, b_op); break; case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS: PMD_DRV_LOG(INFO, "OP_ADD_ETHER_ADDRESS received"); - i40e_pf_host_process_cmd_add_ether_address(vf, msg, msglen); + i40e_pf_host_process_cmd_add_ether_address(vf, msg, + msglen, b_op); break; case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS: PMD_DRV_LOG(INFO, "OP_DEL_ETHER_ADDRESS received"); - i40e_pf_host_process_cmd_del_ether_address(vf, msg, msglen); + i40e_pf_host_process_cmd_del_ether_address(vf, msg, + msglen, b_op); break; case I40E_VIRTCHNL_OP_ADD_VLAN: PMD_DRV_LOG(INFO, "OP_ADD_VLAN received"); - i40e_pf_host_process_cmd_add_vlan(vf, msg, msglen); + i40e_pf_host_process_cmd_add_vlan(vf, msg, msglen, b_op); break; case I40E_VIRTCHNL_OP_DEL_VLAN: PMD_DRV_LOG(INFO, "OP_DEL_VLAN received"); - i40e_pf_host_process_cmd_del_vlan(vf, msg, msglen); + i40e_pf_host_process_cmd_del_vlan(vf, msg, msglen, b_op); break; case I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE: PMD_DRV_LOG(INFO, "OP_CONFIG_PROMISCUOUS_MODE received"); - i40e_pf_host_process_cmd_config_promisc_mode(vf, msg, msglen); + i40e_pf_host_process_cmd_config_promisc_mode(vf, msg, + msglen, b_op); break; case I40E_VIRTCHNL_OP_GET_STATS: PMD_DRV_LOG(INFO, "OP_GET_STATS received"); - i40e_pf_host_process_cmd_get_stats(vf); - break; - case I40E_VIRTCHNL_OP_GET_LINK_STAT: - PMD_DRV_LOG(INFO, "OP_GET_LINK_STAT received"); - i40e_pf_host_process_cmd_get_link_status(vf); + i40e_pf_host_process_cmd_get_stats(vf, b_op); break; case I40E_VIRTCHNL_OP_CFG_VLAN_OFFLOAD: PMD_DRV_LOG(INFO, "OP_CFG_VLAN_OFFLOAD received"); - i40e_pf_host_process_cmd_cfg_vlan_offload(vf, msg, msglen); + i40e_pf_host_process_cmd_cfg_vlan_offload(vf, msg, + msglen, b_op); break; case I40E_VIRTCHNL_OP_CFG_VLAN_PVID: PMD_DRV_LOG(INFO, "OP_CFG_VLAN_PVID received"); - i40e_pf_host_process_cmd_cfg_pvid(vf, msg, msglen); + i40e_pf_host_process_cmd_cfg_pvid(vf, msg, msglen, b_op); break; /* Don't add command supported below, which will * return an error code. diff --git a/src/dpdk/drivers/net/i40e/i40e_pf.h b/src/dpdk/drivers/net/i40e/i40e_pf.h index 9c01829a..b4c22876 100644 --- a/src/dpdk/drivers/net/i40e/i40e_pf.h +++ b/src/dpdk/drivers/net/i40e/i40e_pf.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -48,20 +48,14 @@ #define I40E_DPDK_OFFSET 0x100 -enum i40e_pf_vfr_state { - I40E_PF_VFR_INPROGRESS = 0, - I40E_PF_VFR_COMPLETED = 1, -}; - /* DPDK pf driver specific command to VF */ enum i40e_virtchnl_ops_dpdk { /* * Keep some gap between Linux PF commands and * DPDK PF extended commands. */ - I40E_VIRTCHNL_OP_GET_LINK_STAT = I40E_VIRTCHNL_OP_VERSION + + I40E_VIRTCHNL_OP_CFG_VLAN_OFFLOAD = I40E_VIRTCHNL_OP_VERSION + I40E_DPDK_OFFSET, - I40E_VIRTCHNL_OP_CFG_VLAN_OFFLOAD, I40E_VIRTCHNL_OP_CFG_VLAN_PVID, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES_EXT, }; @@ -124,5 +118,7 @@ void i40e_pf_host_handle_vf_msg(struct rte_eth_dev *dev, uint8_t *msg, uint16_t msglen); int i40e_pf_host_init(struct rte_eth_dev *dev); int i40e_pf_host_uninit(struct rte_eth_dev *dev); +void i40e_notify_vf_link_status(struct rte_eth_dev *dev, + struct i40e_pf_vf *vf); #endif /* _I40E_PF_H_ */ diff --git a/src/dpdk/drivers/net/i40e/i40e_rxtx.c b/src/dpdk/drivers/net/i40e/i40e_rxtx.c index 19b431c3..608685fa 100644 --- a/src/dpdk/drivers/net/i40e/i40e_rxtx.c +++ b/src/dpdk/drivers/net/i40e/i40e_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -50,6 +50,8 @@ #include <rte_tcp.h> #include <rte_sctp.h> #include <rte_udp.h> +#include <rte_ip.h> +#include <rte_net.h> #include "i40e_logs.h" #include "base/i40e_prototype.h" @@ -79,6 +81,17 @@ PKT_TX_TCP_SEG | \ PKT_TX_OUTER_IP_CKSUM) +#define I40E_TX_OFFLOAD_MASK ( \ + PKT_TX_IP_CKSUM | \ + PKT_TX_L4_MASK | \ + PKT_TX_OUTER_IP_CKSUM | \ + PKT_TX_TCP_SEG | \ + PKT_TX_QINQ_PKT | \ + PKT_TX_VLAN_PKT) + +#define I40E_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ I40E_TX_OFFLOAD_MASK) + static uint16_t i40e_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); @@ -138,12 +151,21 @@ i40e_rxd_error_to_pkt_flags(uint64_t qword) uint64_t error_bits = (qword >> I40E_RXD_QW1_ERROR_SHIFT); #define I40E_RX_ERR_BITS 0x3f - if (likely((error_bits & I40E_RX_ERR_BITS) == 0)) + if (likely((error_bits & I40E_RX_ERR_BITS) == 0)) { + flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD); return flags; + } + if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT))) flags |= PKT_RX_IP_CKSUM_BAD; + else + flags |= PKT_RX_IP_CKSUM_GOOD; + if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) flags |= PKT_RX_L4_CKSUM_BAD; + else + flags |= PKT_RX_L4_CKSUM_GOOD; + if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))) flags |= PKT_RX_EIP_CKSUM_BAD; @@ -174,569 +196,6 @@ i40e_get_iee15888_flags(struct rte_mbuf *mb, uint64_t qword) } #endif -/* For each value it means, datasheet of hardware can tell more details - * - * @note: fix i40e_dev_supported_ptypes_get() if any change here. - */ -static inline uint32_t -i40e_rxd_pkt_type_mapping(uint8_t ptype) -{ - static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = { - /* L2 types */ - /* [0] reserved */ - [1] = RTE_PTYPE_L2_ETHER, - [2] = RTE_PTYPE_L2_ETHER_TIMESYNC, - /* [3] - [5] reserved */ - [6] = RTE_PTYPE_L2_ETHER_LLDP, - /* [7] - [10] reserved */ - [11] = RTE_PTYPE_L2_ETHER_ARP, - /* [12] - [21] reserved */ - - /* Non tunneled IPv4 */ - [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_FRAG, - [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_NONFRAG, - [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_UDP, - /* [25] reserved */ - [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_TCP, - [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_SCTP, - [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_ICMP, - - /* IPv4 --> IPv4 */ - [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [32] reserved */ - [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> IPv6 */ - [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [39] reserved */ - [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> GRE/Teredo/VXLAN */ - [43] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT, - - /* IPv4 --> GRE/Teredo/VXLAN --> IPv4 */ - [44] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [47] reserved */ - [48] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [50] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> GRE/Teredo/VXLAN --> IPv6 */ - [51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [53] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [54] reserved */ - [55] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [56] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [57] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> GRE/Teredo/VXLAN --> MAC */ - [58] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER, - - /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */ - [59] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [60] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [62] reserved */ - [63] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [64] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [65] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */ - [66] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [67] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [68] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [69] reserved */ - [70] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [71] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [72] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN */ - [73] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN, - - /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */ - [74] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [75] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [76] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [77] reserved */ - [78] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [79] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [80] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */ - [81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [83] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [84] reserved */ - [85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [87] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* Non tunneled IPv6 */ - [88] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_FRAG, - [89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_NONFRAG, - [90] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_UDP, - /* [91] reserved */ - [92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_TCP, - [93] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_SCTP, - [94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_ICMP, - - /* IPv6 --> IPv4 */ - [95] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [96] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [97] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [98] reserved */ - [99] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [100] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [101] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv6 --> IPv6 */ - [102] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [103] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [104] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [105] reserved */ - [106] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [107] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [108] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_IP | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv6 --> GRE/Teredo/VXLAN */ - [109] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT, - - /* IPv6 --> GRE/Teredo/VXLAN --> IPv4 */ - [110] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [111] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [112] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [113] reserved */ - [114] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [115] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [116] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv6 --> GRE/Teredo/VXLAN --> IPv6 */ - [117] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [118] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [119] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [120] reserved */ - [121] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [122] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [123] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv6 --> GRE/Teredo/VXLAN --> MAC */ - [124] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER, - - /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */ - [125] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [126] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [127] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [128] reserved */ - [129] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [130] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [131] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */ - [132] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [133] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [134] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [135] reserved */ - [136] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [137] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [138] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN */ - [139] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN, - - /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */ - [140] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [141] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [142] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [143] reserved */ - [144] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [145] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [146] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */ - [147] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [148] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [149] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - /* [150] reserved */ - [151] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [152] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_SCTP, - [153] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER_VLAN | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_ICMP, - - /* L2 NSH packet type */ - [154] = RTE_PTYPE_L2_ETHER_NSH, - [155] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_FRAG, - [156] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_NONFRAG, - [157] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_UDP, - [158] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_TCP, - [159] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_SCTP, - [160] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_L4_ICMP, - [161] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_FRAG, - [162] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_NONFRAG, - [163] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_UDP, - [164] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_TCP, - [165] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_SCTP, - [166] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_L4_ICMP, - - /* All others reserved */ - }; - - return type_table[ptype]; -} - #define I40E_RX_DESC_EXT_STATUS_FLEXBH_MASK 0x03 #define I40E_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01 #define I40E_RX_DESC_EXT_STATUS_FLEXBH_FLEX 0x02 @@ -779,33 +238,65 @@ i40e_rxd_build_fdir(volatile union i40e_rx_desc *rxdp, struct rte_mbuf *mb) #endif return flags; } + +static inline void +i40e_parse_tunneling_params(uint64_t ol_flags, + union i40e_tx_offload tx_offload, + uint32_t *cd_tunneling) +{ + /* EIPT: External (outer) IP header type */ + if (ol_flags & PKT_TX_OUTER_IP_CKSUM) + *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; + else if (ol_flags & PKT_TX_OUTER_IPV4) + *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; + else if (ol_flags & PKT_TX_OUTER_IPV6) + *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; + + /* EIPLEN: External (outer) IP header length, in DWords */ + *cd_tunneling |= (tx_offload.outer_l3_len >> 2) << + I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT; + + /* L4TUNT: L4 Tunneling Type */ + switch (ol_flags & PKT_TX_TUNNEL_MASK) { + case PKT_TX_TUNNEL_IPIP: + /* for non UDP / GRE tunneling, set to 00b */ + break; + case PKT_TX_TUNNEL_VXLAN: + case PKT_TX_TUNNEL_GENEVE: + *cd_tunneling |= I40E_TXD_CTX_UDP_TUNNELING; + break; + case PKT_TX_TUNNEL_GRE: + *cd_tunneling |= I40E_TXD_CTX_GRE_TUNNELING; + break; + default: + PMD_TX_LOG(ERR, "Tunnel type not supported"); + return; + } + + /* L4TUNLEN: L4 Tunneling Length, in Words + * + * We depend on app to set rte_mbuf.l2_len correctly. + * For IP in GRE it should be set to the length of the GRE + * header; + * for MAC in GRE or MAC in UDP it should be set to the length + * of the GRE or UDP headers plus the inner MAC up to including + * its last Ethertype. + */ + *cd_tunneling |= (tx_offload.l2_len >> 1) << + I40E_TXD_CTX_QW0_NATLEN_SHIFT; +} + static inline void i40e_txd_enable_checksum(uint64_t ol_flags, uint32_t *td_cmd, uint32_t *td_offset, - union i40e_tx_offload tx_offload, - uint32_t *cd_tunneling) + union i40e_tx_offload tx_offload) { - /* UDP tunneling packet TX checksum offload */ - if (ol_flags & PKT_TX_OUTER_IP_CKSUM) { - + /* Set MACLEN */ + if (ol_flags & PKT_TX_TUNNEL_MASK) *td_offset |= (tx_offload.outer_l2_len >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; - - if (ol_flags & PKT_TX_OUTER_IP_CKSUM) - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4; - else if (ol_flags & PKT_TX_OUTER_IPV4) - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM; - else if (ol_flags & PKT_TX_OUTER_IPV6) - *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6; - - /* Now set the ctx descriptor fields */ - *cd_tunneling |= (tx_offload.outer_l3_len >> 2) << - I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT | - (tx_offload.l2_len >> 1) << - I40E_TXD_CTX_QW0_NATLEN_SHIFT; - - } else + else *td_offset |= (tx_offload.l2_len >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT; @@ -934,15 +425,6 @@ check_rx_burst_bulk_alloc_preconditions(__rte_unused struct i40e_rx_queue *rxq) "rxq->rx_free_thresh=%d", rxq->nb_rx_desc, rxq->rx_free_thresh); ret = -EINVAL; - } else if (!(rxq->nb_rx_desc < (I40E_MAX_RING_DESC - - RTE_PMD_I40E_RX_MAX_BURST))) { - PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: " - "rxq->nb_rx_desc=%d, " - "I40E_MAX_RING_DESC=%d, " - "RTE_PMD_I40E_RX_MAX_BURST=%d", - rxq->nb_rx_desc, I40E_MAX_RING_DESC, - RTE_PMD_I40E_RX_MAX_BURST); - ret = -EINVAL; } #else ret = -EINVAL; @@ -994,6 +476,8 @@ i40e_rx_scan_hw_ring(struct i40e_rx_queue *rxq) I40E_RXD_QW1_STATUS_SHIFT; } + rte_smp_rmb(); + /* Compute how many status bits were set */ for (j = 0, nb_dd = 0; j < I40E_LOOK_AHEAD; j++) nb_dd += s[j] & (1 << I40E_RX_DESC_STATUS_DD_SHIFT); @@ -1104,7 +588,7 @@ i40e_rx_alloc_bufs(struct i40e_rx_queue *rxq) /* Update rx tail regsiter */ rte_wmb(); - I40E_PCI_REG_WRITE(rxq->qrx_tail, rxq->rx_free_trigger); + I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rxq->rx_free_trigger); rxq->rx_free_trigger = (uint16_t)(rxq->rx_free_trigger + rxq->rx_free_thresh); @@ -1484,7 +968,8 @@ i40e_calc_context_desc(uint64_t flags) { static uint64_t mask = PKT_TX_OUTER_IP_CKSUM | PKT_TX_TCP_SEG | - PKT_TX_QINQ_PKT; + PKT_TX_QINQ_PKT | + PKT_TX_TUNNEL_MASK; #ifdef RTE_LIBRTE_IEEE1588 mask |= PKT_TX_IEEE1588_TMST; @@ -1506,7 +991,7 @@ i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload) } /** - * in case of tunneling packet, the outer_l2_len and + * in case of non tunneling packet, the outer_l2_len and * outer_l3_len must be 0. */ hdr_len = tx_offload.outer_l2_len + @@ -1623,12 +1108,15 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) /* Always enable CRC offload insertion */ td_cmd |= I40E_TX_DESC_CMD_ICRC; - /* Enable checksum offloading */ + /* Fill in tunneling parameters if necessary */ cd_tunneling_params = 0; - if (ol_flags & I40E_TX_CKSUM_OFFLOAD_MASK) { - i40e_txd_enable_checksum(ol_flags, &td_cmd, &td_offset, - tx_offload, &cd_tunneling_params); - } + if (ol_flags & PKT_TX_TUNNEL_MASK) + i40e_parse_tunneling_params(ol_flags, tx_offload, + &cd_tunneling_params); + /* Enable checksum offloading */ + if (ol_flags & I40E_TX_CKSUM_OFFLOAD_MASK) + i40e_txd_enable_checksum(ol_flags, &td_cmd, + &td_offset, tx_offload); if (nb_ctx) { /* Setup TX context descriptor if required */ @@ -1747,7 +1235,7 @@ end_of_tx: (unsigned) txq->port_id, (unsigned) txq->queue_id, (unsigned) tx_id, (unsigned) nb_tx); - I40E_PCI_REG_WRITE(txq->qtx_tail, tx_id); + I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id); txq->tx_tail = tx_id; return nb_tx; @@ -1899,7 +1387,7 @@ tx_xmit_pkts(struct i40e_tx_queue *txq, /* Update the tx tail register */ rte_wmb(); - I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail); + I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, txq->tx_tail); return nb_pkts; } @@ -1930,6 +1418,63 @@ i40e_xmit_pkts_simple(void *tx_queue, return nb_tx; } +/********************************************************************* + * + * TX prep functions + * + **********************************************************************/ +uint16_t +i40e_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** + * m->nb_segs is uint8_t, so nb_segs is always less than + * I40E_TX_MAX_SEG. + * We check only a condition for nb_segs > I40E_TX_MAX_MTU_SEG. + */ + if (!(ol_flags & PKT_TX_TCP_SEG)) { + if (m->nb_segs > I40E_TX_MAX_MTU_SEG) { + rte_errno = -EINVAL; + return i; + } + } else if ((m->tso_segsz < I40E_MIN_TSO_MSS) || + (m->tso_segsz > I40E_MAX_TSO_MSS)) { + /* MSS outside the range (256B - 9674B) are considered + * malicious + */ + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & I40E_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + return i; +} + /* * Find the VSI the queue belongs to. 'queue_idx' is the queue index * application used, which assume having sequential ones. But from driver's @@ -2136,7 +1681,9 @@ i40e_dev_supported_ptypes_get(struct rte_eth_dev *dev) #ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC dev->rx_pkt_burst == i40e_recv_pkts_bulk_alloc || #endif - dev->rx_pkt_burst == i40e_recv_scattered_pkts) + dev->rx_pkt_burst == i40e_recv_scattered_pkts || + dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec || + dev->rx_pkt_burst == i40e_recv_pkts_vec) return ptypes; return NULL; } @@ -2161,21 +1708,12 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t base, bsf, tc_mapping; int use_def_burst_func = 1; -#define TREX_PATCH_LOW_LATENCY -#ifdef TREX_PATCH_LOW_LATENCY - int is_vf = 0; -#endif - if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); vsi = &vf->vsi; -#ifdef TREX_PATCH_LOW_LATENCY - is_vf = 1; -#endif - } else { + } else vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx); - } if (vsi == NULL) { PMD_DRV_LOG(ERR, "VSI not available or queue " @@ -2224,8 +1762,19 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, rxq->rx_deferred_start = rx_conf->rx_deferred_start; /* Allocate the maximun number of RX ring hardware descriptor. */ - ring_size = sizeof(union i40e_rx_desc) * I40E_MAX_RING_DESC; - ring_size = RTE_ALIGN(ring_size, I40E_DMA_MEM_ALIGN); + len = I40E_MAX_RING_DESC; + +#ifdef RTE_LIBRTE_I40E_RX_ALLOW_BULK_ALLOC + /** + * Allocating a little more memory because vectorized/bulk_alloc Rx + * functions doesn't check boundaries each time. + */ + len += RTE_PMD_I40E_RX_MAX_BURST; +#endif + + ring_size = RTE_ALIGN(len * sizeof(union i40e_rx_desc), + I40E_DMA_MEM_ALIGN); + rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, ring_size, I40E_RING_BASE_ALIGN, socket_id); if (!rz) { @@ -2280,12 +1829,6 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev, ad->rx_bulk_alloc_allowed = false; } -#ifdef TREX_PATCH_LOW_LATENCY - if (! is_vf) - rxq->dcb_tc =0; - else // The entire for below is in the else -#endif - for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (!(vsi->enabled_tc & (1 << i))) continue; @@ -2393,25 +1936,12 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_rs_thresh, tx_free_thresh; uint16_t i, base, bsf, tc_mapping; -#ifdef TREX_PATCH_LOW_LATENCY - u8 low_latency = 0; - int is_vf = 1; -#endif - if (hw->mac.type == I40E_MAC_VF || hw->mac.type == I40E_MAC_X722_VF) { struct i40e_vf *vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); vsi = &vf->vsi; - } else { + } else vsi = i40e_pf_get_vsi_by_qindex(pf, queue_idx); -#ifdef TREX_PATCH_LOW_LATENCY - if (queue_idx == pf->dev_data->nb_tx_queues-1) { - low_latency = 1; - } - is_vf = 0; -#endif - } - if (vsi == NULL) { PMD_DRV_LOG(ERR, "VSI is NULL, or queue index (%u) " @@ -2461,8 +1991,7 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev, return I40E_ERR_PARAM; } if (tx_free_thresh >= (nb_desc - 3)) { - PMD_INIT_LOG(ERR, "tx_rs_thresh must be less than the " - "tx_free_thresh must be less than the " + PMD_INIT_LOG(ERR, "tx_free_thresh must be less than the " "number of TX descriptors minus 3. " "(tx_free_thresh=%u port=%d queue=%d)", (unsigned int)tx_free_thresh, @@ -2567,15 +2096,6 @@ i40e_dev_tx_queue_setup(struct rte_eth_dev *dev, /* Use a simple TX queue without offloads or multi segs if possible */ i40e_set_tx_function_flag(dev, txq); -#ifdef TREX_PATCH_LOW_LATENCY - if (! is_vf) { - if (low_latency) { - txq->dcb_tc=1; - }else{ - txq->dcb_tc=0; - } - } else // The entire for below is in the else -#endif for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (!(vsi->enabled_tc & (1 << i))) continue; @@ -2985,11 +2505,15 @@ i40e_dev_clear_queues(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); for (i = 0; i < dev->data->nb_tx_queues; i++) { + if (!dev->data->tx_queues[i]) + continue; i40e_tx_queue_release_mbufs(dev->data->tx_queues[i]); i40e_reset_tx_queue(dev->data->tx_queues[i]); } for (i = 0; i < dev->data->nb_rx_queues; i++) { + if (!dev->data->rx_queues[i]) + continue; i40e_rx_queue_release_mbufs(dev->data->rx_queues[i]); i40e_reset_rx_queue(dev->data->rx_queues[i]); } @@ -3003,12 +2527,16 @@ i40e_dev_free_queues(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); for (i = 0; i < dev->data->nb_rx_queues; i++) { + if (!dev->data->rx_queues[i]) + continue; i40e_dev_rx_queue_release(dev->data->rx_queues[i]); dev->data->rx_queues[i] = NULL; } dev->data->nb_rx_queues = 0; for (i = 0; i < dev->data->nb_tx_queues; i++) { + if (!dev->data->tx_queues[i]) + continue; i40e_dev_tx_queue_release(dev->data->tx_queues[i]); dev->data->tx_queues[i] = NULL; } @@ -3191,7 +2719,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev) struct i40e_rx_queue *rxq = dev->data->rx_queues[i]; - if (i40e_rxq_vec_setup(rxq)) { + if (rxq && i40e_rxq_vec_setup(rxq)) { ad->rx_vec_allowed = false; break; } @@ -3253,7 +2781,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev) for (i = 0; i < dev->data->nb_rx_queues; i++) { struct i40e_rx_queue *rxq = dev->data->rx_queues[i]; - rxq->rx_using_sse = rx_using_sse; + if (rxq) + rxq->rx_using_sse = rx_using_sse; } } } @@ -3292,7 +2821,7 @@ i40e_set_tx_function(struct rte_eth_dev *dev) struct i40e_tx_queue *txq = dev->data->tx_queues[i]; - if (i40e_txq_vec_setup(txq)) { + if (txq && i40e_txq_vec_setup(txq)) { ad->tx_vec_allowed = false; break; } @@ -3308,9 +2837,11 @@ i40e_set_tx_function(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "Simple tx finally be used."); dev->tx_pkt_burst = i40e_xmit_pkts_simple; } + dev->tx_pkt_prepare = NULL; } else { PMD_INIT_LOG(DEBUG, "Xmit tx finally be used."); dev->tx_pkt_burst = i40e_xmit_pkts; + dev->tx_pkt_prepare = i40e_prep_pkts; } } diff --git a/src/dpdk/drivers/net/i40e/i40e_rxtx.h b/src/dpdk/drivers/net/i40e/i40e_rxtx.h index 98179f00..9df8a56f 100644 --- a/src/dpdk/drivers/net/i40e/i40e_rxtx.h +++ b/src/dpdk/drivers/net/i40e/i40e_rxtx.h @@ -63,6 +63,12 @@ #define I40E_MIN_RING_DESC 64 #define I40E_MAX_RING_DESC 4096 +#define I40E_MIN_TSO_MSS 256 +#define I40E_MAX_TSO_MSS 9674 + +#define I40E_TX_MAX_SEG UINT8_MAX +#define I40E_TX_MAX_MTU_SEG 8 + #undef container_of #define container_of(ptr, type, member) ({ \ typeof(((type *)0)->member)(*__mptr) = (ptr); \ @@ -223,6 +229,8 @@ uint16_t i40e_recv_scattered_pkts(void *rx_queue, uint16_t i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t i40e_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); int i40e_tx_queue_init(struct i40e_tx_queue *txq); int i40e_rx_queue_init(struct i40e_rx_queue *rxq); void i40e_free_tx_resources(struct i40e_tx_queue *txq); @@ -255,4 +263,567 @@ void i40e_set_tx_function_flag(struct rte_eth_dev *dev, struct i40e_tx_queue *txq); void i40e_set_tx_function(struct rte_eth_dev *dev); +/* For each value it means, datasheet of hardware can tell more details + * + * @note: fix i40e_dev_supported_ptypes_get() if any change here. + */ +static inline uint32_t +i40e_rxd_pkt_type_mapping(uint8_t ptype) +{ + static const uint32_t type_table[UINT8_MAX + 1] __rte_cache_aligned = { + /* L2 types */ + /* [0] reserved */ + [1] = RTE_PTYPE_L2_ETHER, + [2] = RTE_PTYPE_L2_ETHER_TIMESYNC, + /* [3] - [5] reserved */ + [6] = RTE_PTYPE_L2_ETHER_LLDP, + /* [7] - [10] reserved */ + [11] = RTE_PTYPE_L2_ETHER_ARP, + /* [12] - [21] reserved */ + + /* Non tunneled IPv4 */ + [22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG, + [23] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG, + [24] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP, + /* [25] reserved */ + [26] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP, + [27] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_SCTP, + [28] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_ICMP, + + /* IPv4 --> IPv4 */ + [29] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [30] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [31] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [32] reserved */ + [33] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [34] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [35] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv4 --> IPv6 */ + [36] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [37] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [38] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [39] reserved */ + [40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv4 --> GRE/Teredo/VXLAN */ + [43] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT, + + /* IPv4 --> GRE/Teredo/VXLAN --> IPv4 */ + [44] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [47] reserved */ + [48] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [50] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv4 --> GRE/Teredo/VXLAN --> IPv6 */ + [51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [53] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [54] reserved */ + [55] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [56] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [57] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv4 --> GRE/Teredo/VXLAN --> MAC */ + [58] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER, + + /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */ + [59] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [60] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [62] reserved */ + [63] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [64] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [65] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv4 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */ + [66] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [67] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [68] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [69] reserved */ + [70] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [71] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [72] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN */ + [73] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN, + + /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */ + [74] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [75] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [76] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [77] reserved */ + [78] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [79] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [80] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv4 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */ + [81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [83] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [84] reserved */ + [85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [87] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* Non tunneled IPv6 */ + [88] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG, + [89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG, + [90] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP, + /* [91] reserved */ + [92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP, + [93] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_SCTP, + [94] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_ICMP, + + /* IPv6 --> IPv4 */ + [95] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [96] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [97] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [98] reserved */ + [99] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [100] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [101] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv6 --> IPv6 */ + [102] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [103] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [104] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [105] reserved */ + [106] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [107] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [108] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_IP | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv6 --> GRE/Teredo/VXLAN */ + [109] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT, + + /* IPv6 --> GRE/Teredo/VXLAN --> IPv4 */ + [110] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [111] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [112] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [113] reserved */ + [114] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [115] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [116] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv6 --> GRE/Teredo/VXLAN --> IPv6 */ + [117] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [118] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [119] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [120] reserved */ + [121] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [122] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [123] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv6 --> GRE/Teredo/VXLAN --> MAC */ + [124] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER, + + /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv4 */ + [125] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [126] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [127] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [128] reserved */ + [129] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [130] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [131] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv6 --> GRE/Teredo/VXLAN --> MAC --> IPv6 */ + [132] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [133] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [134] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [135] reserved */ + [136] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [137] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [138] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN */ + [139] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN, + + /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv4 */ + [140] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [141] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [142] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [143] reserved */ + [144] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [145] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [146] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* IPv6 --> GRE/Teredo/VXLAN --> MAC/VLAN --> IPv6 */ + [147] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [148] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [149] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + /* [150] reserved */ + [151] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [152] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_SCTP, + [153] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER_VLAN | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_ICMP, + + /* L2 NSH packet type */ + [154] = RTE_PTYPE_L2_ETHER_NSH, + [155] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG, + [156] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG, + [157] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP, + [158] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP, + [159] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_SCTP, + [160] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_ICMP, + [161] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG, + [162] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG, + [163] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_UDP, + [164] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_TCP, + [165] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_SCTP, + [166] = RTE_PTYPE_L2_ETHER_NSH | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_ICMP, + + /* All others reserved */ + }; + + return type_table[ptype]; +} + #endif /* _I40E_RXTX_H_ */ diff --git a/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h new file mode 100644 index 00000000..37455589 --- /dev/null +++ b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_common.h @@ -0,0 +1,251 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _I40E_RXTX_VEC_COMMON_H_ +#define _I40E_RXTX_VEC_COMMON_H_ +#include <stdint.h> +#include <rte_ethdev.h> +#include <rte_malloc.h> + +#include "i40e_ethdev.h" +#include "i40e_rxtx.h" + +static inline uint16_t +reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs, + uint16_t nb_bufs, uint8_t *split_flags) +{ + struct rte_mbuf *pkts[RTE_I40E_VPMD_RX_BURST]; /*finished pkts*/ + struct rte_mbuf *start = rxq->pkt_first_seg; + struct rte_mbuf *end = rxq->pkt_last_seg; + unsigned pkt_idx, buf_idx; + + for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) { + if (end != NULL) { + /* processing a split packet */ + end->next = rx_bufs[buf_idx]; + rx_bufs[buf_idx]->data_len += rxq->crc_len; + + start->nb_segs++; + start->pkt_len += rx_bufs[buf_idx]->data_len; + end = end->next; + + if (!split_flags[buf_idx]) { + /* it's the last packet of the set */ + start->hash = end->hash; + start->ol_flags = end->ol_flags; + /* we need to strip crc for the whole packet */ + start->pkt_len -= rxq->crc_len; + if (end->data_len > rxq->crc_len) + end->data_len -= rxq->crc_len; + else { + /* free up last mbuf */ + struct rte_mbuf *secondlast = start; + + start->nb_segs--; + while (secondlast->next != end) + secondlast = secondlast->next; + secondlast->data_len -= (rxq->crc_len - + end->data_len); + secondlast->next = NULL; + rte_pktmbuf_free_seg(end); + } + pkts[pkt_idx++] = start; + start = end = NULL; + } + } else { + /* not processing a split packet */ + if (!split_flags[buf_idx]) { + /* not a split packet, save and skip */ + pkts[pkt_idx++] = rx_bufs[buf_idx]; + continue; + } + end = start = rx_bufs[buf_idx]; + rx_bufs[buf_idx]->data_len += rxq->crc_len; + rx_bufs[buf_idx]->pkt_len += rxq->crc_len; + } + } + + /* save the partial packet for next time */ + rxq->pkt_first_seg = start; + rxq->pkt_last_seg = end; + memcpy(rx_bufs, pkts, pkt_idx * (sizeof(*pkts))); + return pkt_idx; +} + +static inline int __attribute__((always_inline)) +i40e_tx_free_bufs(struct i40e_tx_queue *txq) +{ + struct i40e_tx_entry *txep; + uint32_t n; + uint32_t i; + int nb_free = 0; + struct rte_mbuf *m, *free[RTE_I40E_TX_MAX_FREE_BUF_SZ]; + + /* check DD bits on threshold descriptor */ + if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz & + rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) != + rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) + return 0; + + n = txq->tx_rs_thresh; + + /* first buffer to free from S/W ring is at index + * tx_next_dd - (tx_rs_thresh-1) + */ + txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)]; + m = __rte_pktmbuf_prefree_seg(txep[0].mbuf); + if (likely(m != NULL)) { + free[0] = m; + nb_free = 1; + for (i = 1; i < n; i++) { + m = __rte_pktmbuf_prefree_seg(txep[i].mbuf); + if (likely(m != NULL)) { + if (likely(m->pool == free[0]->pool)) { + free[nb_free++] = m; + } else { + rte_mempool_put_bulk(free[0]->pool, + (void *)free, + nb_free); + free[0] = m; + nb_free = 1; + } + } + } + rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); + } else { + for (i = 1; i < n; i++) { + m = __rte_pktmbuf_prefree_seg(txep[i].mbuf); + if (m != NULL) + rte_mempool_put(m->pool, m); + } + } + + /* buffers were freed, update counters */ + txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh); + txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh); + if (txq->tx_next_dd >= txq->nb_tx_desc) + txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1); + + return txq->tx_rs_thresh; +} + +static inline void __attribute__((always_inline)) +tx_backlog_entry(struct i40e_tx_entry *txep, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + int i; + + for (i = 0; i < (int)nb_pkts; ++i) + txep[i].mbuf = tx_pkts[i]; +} + +static inline void +_i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq) +{ + const unsigned mask = rxq->nb_rx_desc - 1; + unsigned i; + + if (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_rx_desc) + return; + + /* free all mbufs that are valid in the ring */ + if (rxq->rxrearm_nb == 0) { + for (i = 0; i < rxq->nb_rx_desc; i++) { + if (rxq->sw_ring[i].mbuf != NULL) + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + } + } else { + for (i = rxq->rx_tail; + i != rxq->rxrearm_start; + i = (i + 1) & mask) { + if (rxq->sw_ring[i].mbuf != NULL) + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + } + } + + rxq->rxrearm_nb = rxq->nb_rx_desc; + + /* set all entries to NULL */ + memset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc); +} + +static inline int +i40e_rxq_vec_setup_default(struct i40e_rx_queue *rxq) +{ + uintptr_t p; + struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ + + mb_def.nb_segs = 1; + mb_def.data_off = RTE_PKTMBUF_HEADROOM; + mb_def.port = rxq->port_id; + rte_mbuf_refcnt_set(&mb_def, 1); + + /* prevent compiler reordering: rearm_data covers previous fields */ + rte_compiler_barrier(); + p = (uintptr_t)&mb_def.rearm_data; + rxq->mbuf_initializer = *(uint64_t *)p; + return 0; +} + +static inline int +i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev) +{ +#ifndef RTE_LIBRTE_IEEE1588 + struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; + struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf; + +#ifndef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE + /* whithout rx ol_flags, no VP flag report */ + if (rxmode->hw_vlan_strip != 0 || + rxmode->hw_vlan_extend != 0 || + rxmode->hw_ip_checksum != 0) + return -1; +#endif + + /* no fdir support */ + if (fconf->mode != RTE_FDIR_MODE_NONE) + return -1; + + /* - no csum error report support + * - no header split support + */ + if (rxmode->header_split == 1) + return -1; + + return 0; +#else + RTE_SET_USED(dev); + return -1; +#endif +} +#endif diff --git a/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c new file mode 100644 index 00000000..011c54e0 --- /dev/null +++ b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_neon.c @@ -0,0 +1,614 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2016, Linaro Limited + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <rte_ethdev.h> +#include <rte_malloc.h> + +#include "base/i40e_prototype.h" +#include "base/i40e_type.h" +#include "i40e_ethdev.h" +#include "i40e_rxtx.h" +#include "i40e_rxtx_vec_common.h" + +#include <arm_neon.h> + +#pragma GCC diagnostic ignored "-Wcast-qual" + +static inline void +i40e_rxq_rearm(struct i40e_rx_queue *rxq) +{ + int i; + uint16_t rx_id; + volatile union i40e_rx_desc *rxdp; + struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; + struct rte_mbuf *mb0, *mb1; + uint64x2_t dma_addr0, dma_addr1; + uint64x2_t zero = vdupq_n_u64(0); + uint64_t paddr; + uint8x8_t p; + + rxdp = rxq->rx_ring + rxq->rxrearm_start; + + /* Pull 'n' more MBUFs into the software ring */ + if (unlikely(rte_mempool_get_bulk(rxq->mp, + (void *)rxep, + RTE_I40E_RXQ_REARM_THRESH) < 0)) { + if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= + rxq->nb_rx_desc) { + for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { + rxep[i].mbuf = &rxq->fake_mbuf; + vst1q_u64((uint64_t *)&rxdp[i].read, zero); + } + } + rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += + RTE_I40E_RXQ_REARM_THRESH; + return; + } + + p = vld1_u8((uint8_t *)&rxq->mbuf_initializer); + + /* Initialize the mbufs in vector, process 2 mbufs in one loop */ + for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) { + mb0 = rxep[0].mbuf; + mb1 = rxep[1].mbuf; + + /* Flush mbuf with pkt template. + * Data to be rearmed is 6 bytes long. + * Though, RX will overwrite ol_flags that are coming next + * anyway. So overwrite whole 8 bytes with one load: + * 6 bytes of rearm_data plus first 2 bytes of ol_flags. + */ + vst1_u8((uint8_t *)&mb0->rearm_data, p); + paddr = mb0->buf_physaddr + RTE_PKTMBUF_HEADROOM; + dma_addr0 = vdupq_n_u64(paddr); + + /* flush desc with pa dma_addr */ + vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0); + + vst1_u8((uint8_t *)&mb1->rearm_data, p); + paddr = mb1->buf_physaddr + RTE_PKTMBUF_HEADROOM; + dma_addr1 = vdupq_n_u64(paddr); + vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1); + } + + rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH; + if (rxq->rxrearm_start >= rxq->nb_rx_desc) + rxq->rxrearm_start = 0; + + rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH; + + rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? + (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); + + /* Update the tail pointer on the NIC */ + I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id); +} + +/* Handling the offload flags (olflags) field takes computation + * time when receiving packets. Therefore we provide a flag to disable + * the processing of the olflags field when they are not needed. This + * gives improved performance, at the cost of losing the offload info + * in the received packet + */ +#ifdef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE + +static inline void +desc_to_olflags_v(uint64x2_t descs[4], struct rte_mbuf **rx_pkts) +{ + uint32x4_t vlan0, vlan1, rss, l3_l4e; + + /* mask everything except RSS, flow director and VLAN flags + * bit2 is for VLAN tag, bit11 for flow director indication + * bit13:12 for RSS indication. + */ + const uint32x4_t rss_vlan_msk = { + 0x1c03804, 0x1c03804, 0x1c03804, 0x1c03804}; + + /* map rss and vlan type to rss hash and vlan flag */ + const uint8x16_t vlan_flags = { + 0, 0, 0, 0, + PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0}; + + const uint8x16_t rss_flags = { + 0, PKT_RX_FDIR, 0, 0, + 0, 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH | PKT_RX_FDIR, + 0, 0, 0, 0, + 0, 0, 0, 0}; + + const uint8x16_t l3_l4e_flags = { + 0, + PKT_RX_IP_CKSUM_BAD, + PKT_RX_L4_CKSUM_BAD, + PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + 0, 0, 0, 0, 0, 0, 0, 0}; + + vlan0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]), + vreinterpretq_u32_u64(descs[2])).val[1]; + vlan1 = vzipq_u32(vreinterpretq_u32_u64(descs[1]), + vreinterpretq_u32_u64(descs[3])).val[1]; + vlan0 = vzipq_u32(vlan0, vlan1).val[0]; + + vlan1 = vandq_u32(vlan0, rss_vlan_msk); + vlan0 = vreinterpretq_u32_u8(vqtbl1q_u8(vlan_flags, + vreinterpretq_u8_u32(vlan1))); + + rss = vshrq_n_u32(vlan1, 11); + rss = vreinterpretq_u32_u8(vqtbl1q_u8(rss_flags, + vreinterpretq_u8_u32(rss))); + + l3_l4e = vshrq_n_u32(vlan1, 22); + l3_l4e = vreinterpretq_u32_u8(vqtbl1q_u8(l3_l4e_flags, + vreinterpretq_u8_u32(l3_l4e))); + + + vlan0 = vorrq_u32(vlan0, rss); + vlan0 = vorrq_u32(vlan0, l3_l4e); + + rx_pkts[0]->ol_flags = vgetq_lane_u32(vlan0, 0); + rx_pkts[1]->ol_flags = vgetq_lane_u32(vlan0, 1); + rx_pkts[2]->ol_flags = vgetq_lane_u32(vlan0, 2); + rx_pkts[3]->ol_flags = vgetq_lane_u32(vlan0, 3); +} +#else +#define desc_to_olflags_v(descs, rx_pkts) do {} while (0) +#endif + +#define PKTLEN_SHIFT 10 + +#define I40E_VPMD_DESC_DD_MASK 0x0001000100010001ULL + +static inline void +desc_to_ptype_v(uint64x2_t descs[4], struct rte_mbuf **rx_pkts) +{ + int i; + uint8_t ptype; + uint8x16_t tmp; + + for (i = 0; i < 4; i++) { + tmp = vreinterpretq_u8_u64(vshrq_n_u64(descs[i], 30)); + ptype = vgetq_lane_u8(tmp, 8); + rx_pkts[0]->packet_type = i40e_rxd_pkt_type_mapping(ptype); + } + +} + + /* + * Notice: + * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST + * numbers of DD bits + */ +static inline uint16_t +_recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts, uint8_t *split_packet) +{ + volatile union i40e_rx_desc *rxdp; + struct i40e_rx_entry *sw_ring; + uint16_t nb_pkts_recd; + int pos; + uint64_t var; + + /* mask to shuffle from desc. to mbuf */ + uint8x16_t shuf_msk = { + 0xFF, 0xFF, /* pkt_type set as unknown */ + 0xFF, 0xFF, /* pkt_type set as unknown */ + 14, 15, /* octet 15~14, low 16 bits pkt_len */ + 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ + 14, 15, /* octet 15~14, 16 bits data_len */ + 2, 3, /* octet 2~3, low 16 bits vlan_macip */ + 4, 5, 6, 7 /* octet 4~7, 32bits rss */ + }; + + uint8x16_t eop_check = { + 0x02, 0x00, 0x02, 0x00, + 0x02, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 + }; + + uint16x8_t crc_adjust = { + 0, 0, /* ignore pkt_type field */ + rxq->crc_len, /* sub crc on pkt_len */ + 0, /* ignore high-16bits of pkt_len */ + rxq->crc_len, /* sub crc on data_len */ + 0, 0, 0 /* ignore non-length fields */ + }; + + /* nb_pkts shall be less equal than RTE_I40E_MAX_RX_BURST */ + nb_pkts = RTE_MIN(nb_pkts, RTE_I40E_MAX_RX_BURST); + + /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */ + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP); + + /* Just the act of getting into the function from the application is + * going to cost about 7 cycles + */ + rxdp = rxq->rx_ring + rxq->rx_tail; + + rte_prefetch_non_temporal(rxdp); + + /* See if we need to rearm the RX queue - gives the prefetch a bit + * of time to act + */ + if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) + i40e_rxq_rearm(rxq); + + /* Before we start moving massive data around, check to see if + * there is actually a packet available + */ + if (!(rxdp->wb.qword1.status_error_len & + rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT))) + return 0; + + /* Cache is empty -> need to scan the buffer rings, but first move + * the next 'n' mbufs into the cache + */ + sw_ring = &rxq->sw_ring[rxq->rx_tail]; + + /* A. load 4 packet in one loop + * [A*. mask out 4 unused dirty field in desc] + * B. copy 4 mbuf point from swring to rx_pkts + * C. calc the number of DD bits among the 4 packets + * [C*. extract the end-of-packet bit, if requested] + * D. fill info. from desc to mbuf + */ + + for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; + pos += RTE_I40E_DESCS_PER_LOOP, + rxdp += RTE_I40E_DESCS_PER_LOOP) { + uint64x2_t descs[RTE_I40E_DESCS_PER_LOOP]; + uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; + uint16x8x2_t sterr_tmp1, sterr_tmp2; + uint64x2_t mbp1, mbp2; + uint16x8_t staterr; + uint16x8_t tmp; + uint64_t stat; + + int32x4_t len_shl = {0, 0, 0, PKTLEN_SHIFT}; + + /* B.1 load 1 mbuf point */ + mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]); + /* Read desc statuses backwards to avoid race condition */ + /* A.1 load 4 pkts desc */ + descs[3] = vld1q_u64((uint64_t *)(rxdp + 3)); + rte_rmb(); + + /* B.2 copy 2 mbuf point into rx_pkts */ + vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1); + + /* B.1 load 1 mbuf point */ + mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]); + + descs[2] = vld1q_u64((uint64_t *)(rxdp + 2)); + /* B.1 load 2 mbuf point */ + descs[1] = vld1q_u64((uint64_t *)(rxdp + 1)); + descs[0] = vld1q_u64((uint64_t *)(rxdp)); + + /* B.2 copy 2 mbuf point into rx_pkts */ + vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2); + + if (split_packet) { + rte_mbuf_prefetch_part2(rx_pkts[pos]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); + } + + /* avoid compiler reorder optimization */ + rte_compiler_barrier(); + + /* pkt 3,4 shift the pktlen field to be 16-bit aligned*/ + uint32x4_t len3 = vshlq_u32(vreinterpretq_u32_u64(descs[3]), + len_shl); + descs[3] = vreinterpretq_u64_u32(len3); + uint32x4_t len2 = vshlq_u32(vreinterpretq_u32_u64(descs[2]), + len_shl); + descs[2] = vreinterpretq_u64_u32(len2); + + /* D.1 pkt 3,4 convert format from desc to pktmbuf */ + pkt_mb4 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[3]), shuf_msk); + pkt_mb3 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[2]), shuf_msk); + + /* C.1 4=>2 filter staterr info only */ + sterr_tmp2 = vzipq_u16(vreinterpretq_u16_u64(descs[1]), + vreinterpretq_u16_u64(descs[3])); + /* C.1 4=>2 filter staterr info only */ + sterr_tmp1 = vzipq_u16(vreinterpretq_u16_u64(descs[0]), + vreinterpretq_u16_u64(descs[2])); + + /* C.2 get 4 pkts staterr value */ + staterr = vzipq_u16(sterr_tmp1.val[1], + sterr_tmp2.val[1]).val[0]; + stat = vgetq_lane_u64(vreinterpretq_u64_u16(staterr), 0); + + desc_to_olflags_v(descs, &rx_pkts[pos]); + + /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust); + pkt_mb4 = vreinterpretq_u8_u16(tmp); + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust); + pkt_mb3 = vreinterpretq_u8_u16(tmp); + + /* pkt 1,2 shift the pktlen field to be 16-bit aligned*/ + uint32x4_t len1 = vshlq_u32(vreinterpretq_u32_u64(descs[1]), + len_shl); + descs[1] = vreinterpretq_u64_u32(len1); + uint32x4_t len0 = vshlq_u32(vreinterpretq_u32_u64(descs[0]), + len_shl); + descs[0] = vreinterpretq_u64_u32(len0); + + /* D.1 pkt 1,2 convert format from desc to pktmbuf */ + pkt_mb2 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[1]), shuf_msk); + pkt_mb1 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[0]), shuf_msk); + + /* D.3 copy final 3,4 data to rx_pkts */ + vst1q_u8((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1, + pkt_mb4); + vst1q_u8((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1, + pkt_mb3); + + /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust); + pkt_mb2 = vreinterpretq_u8_u16(tmp); + tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust); + pkt_mb1 = vreinterpretq_u8_u16(tmp); + + /* C* extract and record EOP bit */ + if (split_packet) { + uint8x16_t eop_shuf_mask = { + 0x00, 0x02, 0x04, 0x06, + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF}; + uint8x16_t eop_bits; + + /* and with mask to extract bits, flipping 1-0 */ + eop_bits = vmvnq_u8(vreinterpretq_u8_u16(staterr)); + eop_bits = vandq_u8(eop_bits, eop_check); + /* the staterr values are not in order, as the count + * count of dd bits doesn't care. However, for end of + * packet tracking, we do care, so shuffle. This also + * compresses the 32-bit values to 8-bit + */ + eop_bits = vqtbl1q_u8(eop_bits, eop_shuf_mask); + + /* store the resulting 32-bit value */ + vst1q_lane_u32((uint32_t *)split_packet, + vreinterpretq_u32_u8(eop_bits), 0); + split_packet += RTE_I40E_DESCS_PER_LOOP; + + /* zero-out next pointers */ + rx_pkts[pos]->next = NULL; + rx_pkts[pos + 1]->next = NULL; + rx_pkts[pos + 2]->next = NULL; + rx_pkts[pos + 3]->next = NULL; + } + + rte_prefetch_non_temporal(rxdp + RTE_I40E_DESCS_PER_LOOP); + + /* D.3 copy final 1,2 data to rx_pkts */ + vst1q_u8((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1, + pkt_mb2); + vst1q_u8((void *)&rx_pkts[pos]->rx_descriptor_fields1, + pkt_mb1); + desc_to_ptype_v(descs, &rx_pkts[pos]); + /* C.4 calc avaialbe number of desc */ + var = __builtin_popcountll(stat & I40E_VPMD_DESC_DD_MASK); + nb_pkts_recd += var; + if (likely(var != RTE_I40E_DESCS_PER_LOOP)) + break; + } + + /* Update our internal tail pointer */ + rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); + rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); + rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); + + return nb_pkts_recd; +} + + /* + * Notice: + * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST + * numbers of DD bits + */ +uint16_t +i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL); +} + + /* vPMD receive routine that reassembles scattered packets + * Notice: + * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet + * - nb_pkts > RTE_I40E_VPMD_RX_BURST, only scan RTE_I40E_VPMD_RX_BURST + * numbers of DD bits + */ +uint16_t +i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + + struct i40e_rx_queue *rxq = rx_queue; + uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0}; + + /* get some new buffers */ + uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts, + split_flags); + if (nb_bufs == 0) + return 0; + + /* happy day case, full burst + no packets to be joined */ + const uint64_t *split_fl64 = (uint64_t *)split_flags; + + if (rxq->pkt_first_seg == NULL && + split_fl64[0] == 0 && split_fl64[1] == 0 && + split_fl64[2] == 0 && split_fl64[3] == 0) + return nb_bufs; + + /* reassemble any packets that need reassembly*/ + unsigned i = 0; + + if (rxq->pkt_first_seg == NULL) { + /* find the first split flag, and only reassemble then*/ + while (i < nb_bufs && !split_flags[i]) + i++; + if (i == nb_bufs) + return nb_bufs; + } + return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i, + &split_flags[i]); +} + +static inline void +vtx1(volatile struct i40e_tx_desc *txdp, + struct rte_mbuf *pkt, uint64_t flags) +{ + uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA | + ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) | + ((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)); + + uint64x2_t descriptor = {pkt->buf_physaddr + pkt->data_off, high_qw}; + vst1q_u64((uint64_t *)txdp, descriptor); +} + +static inline void +vtx(volatile struct i40e_tx_desc *txdp, + struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags) +{ + int i; + + for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt) + vtx1(txdp, *pkt, flags); +} + +uint16_t +i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue; + volatile struct i40e_tx_desc *txdp; + struct i40e_tx_entry *txep; + uint16_t n, nb_commit, tx_id; + uint64_t flags = I40E_TD_CMD; + uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD; + int i; + + /* cross rx_thresh boundary is not allowed */ + nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh); + + if (txq->nb_tx_free < txq->tx_free_thresh) + i40e_tx_free_bufs(txq); + + nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); + if (unlikely(nb_pkts == 0)) + return 0; + + tx_id = txq->tx_tail; + txdp = &txq->tx_ring[tx_id]; + txep = &txq->sw_ring[tx_id]; + + txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts); + + n = (uint16_t)(txq->nb_tx_desc - tx_id); + if (nb_commit >= n) { + tx_backlog_entry(txep, tx_pkts, n); + + for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp) + vtx1(txdp, *tx_pkts, flags); + + vtx1(txdp, *tx_pkts++, rs); + + nb_commit = (uint16_t)(nb_commit - n); + + tx_id = 0; + txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1); + + /* avoid reach the end of ring */ + txdp = &txq->tx_ring[tx_id]; + txep = &txq->sw_ring[tx_id]; + } + + tx_backlog_entry(txep, tx_pkts, nb_commit); + + vtx(txdp, tx_pkts, nb_commit, flags); + + tx_id = (uint16_t)(tx_id + nb_commit); + if (tx_id > txq->tx_next_rs) { + txq->tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |= + rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) << + I40E_TXD_QW1_CMD_SHIFT); + txq->tx_next_rs = + (uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh); + } + + txq->tx_tail = tx_id; + + I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail); + + return nb_pkts; +} + +void __attribute__((cold)) +i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq) +{ + _i40e_rx_queue_release_mbufs_vec(rxq); +} + +int __attribute__((cold)) +i40e_rxq_vec_setup(struct i40e_rx_queue *rxq) +{ + return i40e_rxq_vec_setup_default(rxq); +} + +int __attribute__((cold)) +i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused *txq) +{ + return 0; +} + +int __attribute__((cold)) +i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) +{ + return i40e_rx_vec_dev_conf_condition_check_default(dev); +} diff --git a/src/dpdk/drivers/net/i40e/i40e_rxtx_vec.c b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c index 51fb282a..b95cc8e1 100644 --- a/src/dpdk/drivers/net/i40e/i40e_rxtx_vec.c +++ b/src/dpdk/drivers/net/i40e/i40e_rxtx_vec_sse.c @@ -39,6 +39,7 @@ #include "base/i40e_type.h" #include "i40e_ethdev.h" #include "i40e_rxtx.h" +#include "i40e_rxtx_vec_common.h" #include <tmmintrin.h> @@ -138,19 +139,28 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq) static inline void desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) { - __m128i vlan0, vlan1, rss; - union { - uint16_t e[4]; - uint64_t dword; - } vol; + __m128i vlan0, vlan1, rss, l3_l4e; /* mask everything except RSS, flow director and VLAN flags * bit2 is for VLAN tag, bit11 for flow director indication * bit13:12 for RSS indication. */ - const __m128i rss_vlan_msk = _mm_set_epi16( - 0x0000, 0x0000, 0x0000, 0x0000, - 0x3804, 0x3804, 0x3804, 0x3804); + const __m128i rss_vlan_msk = _mm_set_epi32( + 0x1c03804, 0x1c03804, 0x1c03804, 0x1c03804); + + const __m128i cksum_mask = _mm_set_epi32( + PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_EIP_CKSUM_BAD); /* map rss and vlan type to rss hash and vlan flag */ const __m128i vlan_flags = _mm_set_epi8(0, 0, 0, 0, @@ -163,23 +173,43 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) PKT_RX_RSS_HASH | PKT_RX_FDIR, PKT_RX_RSS_HASH, 0, 0, 0, 0, PKT_RX_FDIR, 0); - vlan0 = _mm_unpackhi_epi16(descs[0], descs[1]); - vlan1 = _mm_unpackhi_epi16(descs[2], descs[3]); - vlan0 = _mm_unpacklo_epi32(vlan0, vlan1); + const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + /* shift right 1 bit to make sure it not exceed 255 */ + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1, + PKT_RX_IP_CKSUM_BAD >> 1, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1); + + vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]); + vlan1 = _mm_unpackhi_epi32(descs[2], descs[3]); + vlan0 = _mm_unpacklo_epi64(vlan0, vlan1); vlan1 = _mm_and_si128(vlan0, rss_vlan_msk); vlan0 = _mm_shuffle_epi8(vlan_flags, vlan1); - rss = _mm_srli_epi16(vlan1, 11); + rss = _mm_srli_epi32(vlan1, 11); rss = _mm_shuffle_epi8(rss_flags, rss); + l3_l4e = _mm_srli_epi32(vlan1, 22); + l3_l4e = _mm_shuffle_epi8(l3_l4e_flags, l3_l4e); + /* then we shift left 1 bit */ + l3_l4e = _mm_slli_epi32(l3_l4e, 1); + /* we need to mask out the reduntant bits */ + l3_l4e = _mm_and_si128(l3_l4e, cksum_mask); + vlan0 = _mm_or_si128(vlan0, rss); - vol.dword = _mm_cvtsi128_si64(vlan0); + vlan0 = _mm_or_si128(vlan0, l3_l4e); - rx_pkts[0]->ol_flags = vol.e[0]; - rx_pkts[1]->ol_flags = vol.e[1]; - rx_pkts[2]->ol_flags = vol.e[2]; - rx_pkts[3]->ol_flags = vol.e[3]; + rx_pkts[0]->ol_flags = _mm_extract_epi16(vlan0, 0); + rx_pkts[1]->ol_flags = _mm_extract_epi16(vlan0, 2); + rx_pkts[2]->ol_flags = _mm_extract_epi16(vlan0, 4); + rx_pkts[3]->ol_flags = _mm_extract_epi16(vlan0, 6); } #else #define desc_to_olflags_v(desc, rx_pkts) do {} while (0) @@ -187,6 +217,21 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts) #define PKTLEN_SHIFT 10 +static inline void +desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts) +{ + __m128i ptype0 = _mm_unpackhi_epi64(descs[0], descs[1]); + __m128i ptype1 = _mm_unpackhi_epi64(descs[2], descs[3]); + + ptype0 = _mm_srli_epi64(ptype0, 30); + ptype1 = _mm_srli_epi64(ptype1, 30); + + rx_pkts[0]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype0, 0)); + rx_pkts[1]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype0, 8)); + rx_pkts[2]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype1, 0)); + rx_pkts[3]->packet_type = i40e_rxd_pkt_type_mapping(_mm_extract_epi8(ptype1, 8)); +} + /* * Notice: * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet @@ -224,7 +269,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, */ rxdp = rxq->rx_ring + rxq->rx_tail; - _mm_prefetch((const void *)rxdp, _MM_HINT_T0); + rte_prefetch0(rxdp); /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act @@ -282,6 +327,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* Read desc statuses backwards to avoid race condition */ /* A.1 load 4 pkts desc */ descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); + rte_compiler_barrier(); /* B.2 copy 2 mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); @@ -290,8 +336,10 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]); descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); + rte_compiler_barrier(); /* B.1 load 2 mbuf point */ descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); + rte_compiler_barrier(); descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); /* B.2 copy 2 mbuf point into rx_pkts */ @@ -393,6 +441,7 @@ _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, pkt_mb2); _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, pkt_mb1); + desc_to_ptype_v(descs, &rx_pkts[pos]); /* C.4 calc avaialbe number of desc */ var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); nb_pkts_recd += var; @@ -421,68 +470,6 @@ i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL); } -static inline uint16_t -reassemble_packets(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_bufs, - uint16_t nb_bufs, uint8_t *split_flags) -{ - struct rte_mbuf *pkts[RTE_I40E_VPMD_RX_BURST]; /*finished pkts*/ - struct rte_mbuf *start = rxq->pkt_first_seg; - struct rte_mbuf *end = rxq->pkt_last_seg; - unsigned pkt_idx, buf_idx; - - for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) { - if (end != NULL) { - /* processing a split packet */ - end->next = rx_bufs[buf_idx]; - rx_bufs[buf_idx]->data_len += rxq->crc_len; - - start->nb_segs++; - start->pkt_len += rx_bufs[buf_idx]->data_len; - end = end->next; - - if (!split_flags[buf_idx]) { - /* it's the last packet of the set */ - start->hash = end->hash; - start->ol_flags = end->ol_flags; - /* we need to strip crc for the whole packet */ - start->pkt_len -= rxq->crc_len; - if (end->data_len > rxq->crc_len) { - end->data_len -= rxq->crc_len; - } else { - /* free up last mbuf */ - struct rte_mbuf *secondlast = start; - - while (secondlast->next != end) - secondlast = secondlast->next; - secondlast->data_len -= (rxq->crc_len - - end->data_len); - secondlast->next = NULL; - rte_pktmbuf_free_seg(end); - end = secondlast; - } - pkts[pkt_idx++] = start; - start = end = NULL; - } - } else { - /* not processing a split packet */ - if (!split_flags[buf_idx]) { - /* not a split packet, save and skip */ - pkts[pkt_idx++] = rx_bufs[buf_idx]; - continue; - } - end = start = rx_bufs[buf_idx]; - rx_bufs[buf_idx]->data_len += rxq->crc_len; - rx_bufs[buf_idx]->pkt_len += rxq->crc_len; - } - } - - /* save the partial packet for next time */ - rxq->pkt_first_seg = start; - rxq->pkt_last_seg = end; - memcpy(rx_bufs, pkts, pkt_idx * (sizeof(*pkts))); - return pkt_idx; -} - /* vPMD receive routine that reassembles scattered packets * Notice: * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet @@ -548,73 +535,6 @@ vtx(volatile struct i40e_tx_desc *txdp, vtx1(txdp, *pkt, flags); } -static inline int __attribute__((always_inline)) -i40e_tx_free_bufs(struct i40e_tx_queue *txq) -{ - struct i40e_tx_entry *txep; - uint32_t n; - uint32_t i; - int nb_free = 0; - struct rte_mbuf *m, *free[RTE_I40E_TX_MAX_FREE_BUF_SZ]; - - /* check DD bits on threshold descriptor */ - if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz & - rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) != - rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)) - return 0; - - n = txq->tx_rs_thresh; - - /* first buffer to free from S/W ring is at index - * tx_next_dd - (tx_rs_thresh-1) - */ - txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)]; - m = __rte_pktmbuf_prefree_seg(txep[0].mbuf); - if (likely(m != NULL)) { - free[0] = m; - nb_free = 1; - for (i = 1; i < n; i++) { - m = __rte_pktmbuf_prefree_seg(txep[i].mbuf); - if (likely(m != NULL)) { - if (likely(m->pool == free[0]->pool)) { - free[nb_free++] = m; - } else { - rte_mempool_put_bulk(free[0]->pool, - (void *)free, - nb_free); - free[0] = m; - nb_free = 1; - } - } - } - rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); - } else { - for (i = 1; i < n; i++) { - m = __rte_pktmbuf_prefree_seg(txep[i].mbuf); - if (m != NULL) - rte_mempool_put(m->pool, m); - } - } - - /* buffers were freed, update counters */ - txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh); - txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh); - if (txq->tx_next_dd >= txq->nb_tx_desc) - txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1); - - return txq->tx_rs_thresh; -} - -static inline void __attribute__((always_inline)) -tx_backlog_entry(struct i40e_tx_entry *txep, - struct rte_mbuf **tx_pkts, uint16_t nb_pkts) -{ - int i; - - for (i = 0; i < (int)nb_pkts; ++i) - txep[i].mbuf = tx_pkts[i]; -} - uint16_t i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) @@ -685,37 +605,13 @@ i40e_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, void __attribute__((cold)) i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq) { - const unsigned mask = rxq->nb_rx_desc - 1; - unsigned i; - - if (rxq->sw_ring == NULL || rxq->rxrearm_nb >= rxq->nb_rx_desc) - return; - - /* free all mbufs that are valid in the ring */ - for (i = rxq->rx_tail; i != rxq->rxrearm_start; i = (i + 1) & mask) - rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); - rxq->rxrearm_nb = rxq->nb_rx_desc; - - /* set all entries to NULL */ - memset(rxq->sw_ring, 0, sizeof(rxq->sw_ring[0]) * rxq->nb_rx_desc); + _i40e_rx_queue_release_mbufs_vec(rxq); } int __attribute__((cold)) i40e_rxq_vec_setup(struct i40e_rx_queue *rxq) { - uintptr_t p; - struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ - - mb_def.nb_segs = 1; - mb_def.data_off = RTE_PKTMBUF_HEADROOM; - mb_def.port = rxq->port_id; - rte_mbuf_refcnt_set(&mb_def, 1); - - /* prevent compiler reordering: rearm_data covers previous fields */ - rte_compiler_barrier(); - p = (uintptr_t)&mb_def.rearm_data; - rxq->mbuf_initializer = *(uint64_t *)p; - return 0; + return i40e_rxq_vec_setup_default(rxq); } int __attribute__((cold)) @@ -728,34 +624,10 @@ int __attribute__((cold)) i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) { #ifndef RTE_LIBRTE_IEEE1588 - struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; - struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf; - /* need SSE4.1 support */ if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1)) return -1; - -#ifndef RTE_LIBRTE_I40E_RX_OLFLAGS_ENABLE - /* whithout rx ol_flags, no VP flag report */ - if (rxmode->hw_vlan_strip != 0 || - rxmode->hw_vlan_extend != 0) - return -1; #endif - /* no fdir support */ - if (fconf->mode != RTE_FDIR_MODE_NONE) - return -1; - - /* - no csum error report support - * - no header split support - */ - if (rxmode->hw_ip_checksum == 1 || - rxmode->header_split == 1) - return -1; - - return 0; -#else - RTE_SET_USED(dev); - return -1; -#endif + return i40e_rx_vec_dev_conf_condition_check_default(dev); } diff --git a/src/dpdk/drivers/net/i40e/rte_pmd_i40e.h b/src/dpdk/drivers/net/i40e/rte_pmd_i40e.h new file mode 100644 index 00000000..a0ad88c6 --- /dev/null +++ b/src/dpdk/drivers/net/i40e/rte_pmd_i40e.h @@ -0,0 +1,335 @@ +/*- + * BSD LICENSE + * + * Copyright (c) 2017 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _PMD_I40E_H_ +#define _PMD_I40E_H_ + +/** + * @file rte_pmd_i40e.h + * + * i40e PMD specific functions. + * + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice + * + */ + +#include <rte_ethdev.h> + +/** + * Response sent back to i40e driver from user app after callback + */ +enum rte_pmd_i40e_mb_event_rsp { + RTE_PMD_I40E_MB_EVENT_NOOP_ACK, /**< skip mbox request and ACK */ + RTE_PMD_I40E_MB_EVENT_NOOP_NACK, /**< skip mbox request and NACK */ + RTE_PMD_I40E_MB_EVENT_PROCEED, /**< proceed with mbox request */ + RTE_PMD_I40E_MB_EVENT_MAX /**< max value of this enum */ +}; + +/** + * Data sent to the user application when the callback is executed. + */ +struct rte_pmd_i40e_mb_event_param { + uint16_t vfid; /**< Virtual Function number */ + uint16_t msg_type; /**< VF to PF message type, see i40e_virtchnl_ops */ + uint16_t retval; /**< return value */ + void *msg; /**< pointer to message */ + uint16_t msglen; /**< length of the message */ +}; + +/** + * Notify VF when PF link status changes. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * VF id. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if *vf* invalid. + */ +int rte_pmd_i40e_ping_vfs(uint8_t port, uint16_t vf); + +/** + * Enable/Disable VF MAC anti spoofing. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * VF on which to set MAC anti spoofing. + * @param on + * 1 - Enable VFs MAC anti spoofing. + * 0 - Disable VFs MAC anti spoofing. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_vf_mac_anti_spoof(uint8_t port, + uint16_t vf_id, + uint8_t on); + +/** + * Enable/Disable VF VLAN anti spoofing. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * VF on which to set VLAN anti spoofing. + * @param on + * 1 - Enable VFs VLAN anti spoofing. + * 0 - Disable VFs VLAN anti spoofing. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_vf_vlan_anti_spoof(uint8_t port, + uint16_t vf_id, + uint8_t on); + +/** + * Enable/Disable TX loopback on all the PF and VFs. + * + * @param port + * The port identifier of the Ethernet device. + * @param on + * 1 - Enable TX loopback. + * 0 - Disable TX loopback. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_tx_loopback(uint8_t port, + uint8_t on); + +/** + * Enable/Disable VF unicast promiscuous mode. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * VF on which to set. + * @param on + * 1 - Enable. + * 0 - Disable. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_vf_unicast_promisc(uint8_t port, + uint16_t vf_id, + uint8_t on); + +/** + * Enable/Disable VF multicast promiscuous mode. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * VF on which to set. + * @param on + * 1 - Enable. + * 0 - Disable. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_vf_multicast_promisc(uint8_t port, + uint16_t vf_id, + uint8_t on); + +/** + * Set the VF MAC address. + * + * PF should set MAC address before VF initialized, if PF sets the MAC + * address after VF initialized, new MAC address won't be effective until + * VF reinitialize. + * + * This will remove all existing MAC filters. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * VF id. + * @param mac_addr + * VF MAC address. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if *vf* or *mac_addr* is invalid. + */ +int rte_pmd_i40e_set_vf_mac_addr(uint8_t port, uint16_t vf_id, + struct ether_addr *mac_addr); + +/** + * Enable/Disable vf vlan strip for all queues in a pool + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * ID specifying VF. + * @param on + * 1 - Enable VF's vlan strip on RX queues. + * 0 - Disable VF's vlan strip on RX queues. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int +rte_pmd_i40e_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on); + +/** + * Enable/Disable vf vlan insert + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * ID specifying VF. + * @param vlan_id + * 0 - Disable VF's vlan insert. + * n - Enable; n is inserted as the vlan id. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_vf_vlan_insert(uint8_t port, uint16_t vf_id, + uint16_t vlan_id); + +/** + * Enable/Disable vf broadcast mode + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * ID specifying VF. + * @param on + * 0 - Disable broadcast. + * 1 - Enable broadcast. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_vf_broadcast(uint8_t port, uint16_t vf_id, + uint8_t on); + +/** + * Enable/Disable vf vlan tag + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * ID specifying VF. + * @param on + * 0 - Disable VF's vlan tag. + * n - Enable VF's vlan tag. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_set_vf_vlan_tag(uint8_t port, uint16_t vf_id, uint8_t on); + +/** + * Enable/Disable VF VLAN filter + * + * @param port + * The port identifier of the Ethernet device. + * @param vlan_id + * ID specifying VLAN + * @param vf_mask + * Mask to filter VF's + * @param on + * 0 - Disable VF's VLAN filter. + * 1 - Enable VF's VLAN filter. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + * - (-ENOTSUP) not supported by firmware. + */ +int rte_pmd_i40e_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id, + uint64_t vf_mask, uint8_t on); + +/** + * Get VF's statistics + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * VF on which to get. + * @param stats + * A pointer to a structure of type *rte_eth_stats* to be filled with + * the values of device counters for the following set of statistics: + * - *ipackets* with the total of successfully received packets. + * - *opackets* with the total of successfully transmitted packets. + * - *ibytes* with the total of successfully received bytes. + * - *obytes* with the total of successfully transmitted bytes. + * - *ierrors* with the total of erroneous received packets. + * - *oerrors* with the total of failed transmitted packets. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ + +int rte_pmd_i40e_get_vf_stats(uint8_t port, + uint16_t vf_id, + struct rte_eth_stats *stats); + +/** + * Clear VF's statistics + * + * @param port + * The port identifier of the Ethernet device. + * @param vf_id + * VF on which to get. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_i40e_reset_vf_stats(uint8_t port, + uint16_t vf_id); + +#endif /* _PMD_I40E_H_ */ diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c index db808801..724dcbbc 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_82598.c @@ -995,19 +995,19 @@ STATIC s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) * @vlan: VLAN id to write to VLAN filter * @vind: VMDq output index that maps queue to VLAN id in VFTA * @vlan_on: boolean flag to turn on/off VLAN in VFTA - * @bypass_vlvf: boolean flag - unused + * @vlvf_bypass: boolean flag - unused * * Turn on/off specified VLAN in the VLAN filter table. **/ s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, - bool vlan_on, bool bypass_vlvf) + bool vlan_on, bool vlvf_bypass) { u32 regindex; u32 bitindex; u32 bits; u32 vftabyte; - UNREFERENCED_1PARAMETER(bypass_vlvf); + UNREFERENCED_1PARAMETER(vlvf_bypass); DEBUGFUNC("ixgbe_set_vfta_82598"); diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c index 5bc7c2b9..832242ee 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_82599.c @@ -1178,6 +1178,7 @@ mac_reset_top: if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) { /* Save the SAN MAC RAR index */ hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1; + hw->mac.ops.set_rar(hw, hw->mac.san_mac_rar_index, hw->mac.san_addr, 0, IXGBE_RAH_AV); @@ -1809,14 +1810,23 @@ s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, } IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIP6M, fdirip6m); - /* Set all bits in FDIRTCPM, FDIRUDPM, FDIRSIP4M and - * FDIRDIP4M in cloud mode to allow L3/L3 packets to - * tunnel. + /* Set all bits in FDIRTCPM, FDIRUDPM, FDIRSCTPM, + * FDIRSIP4M and FDIRDIP4M in cloud mode to allow + * L3/L3 packets to tunnel. */ IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, 0xFFFFFFFF); IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, 0xFFFFFFFF); IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M, 0xFFFFFFFF); IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M, 0xFFFFFFFF); + switch (hw->mac.type) { + case ixgbe_mac_X550: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_X550EM_a: + IXGBE_WRITE_REG(hw, IXGBE_FDIRSCTPM, 0xFFFFFFFF); + break; + default: + break; + } } /* Now mask VM pool and destination IPv6 - bits 5 and 2 */ diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.c index 17868676..270a97dc 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.c @@ -106,8 +106,10 @@ s32 ixgbe_init_shared_code(struct ixgbe_hw *hw) status = ixgbe_init_ops_X550(hw); break; case ixgbe_mac_X550EM_x: + status = ixgbe_init_ops_X550EM_x(hw); + break; case ixgbe_mac_X550EM_a: - status = ixgbe_init_ops_X550EM(hw); + status = ixgbe_init_ops_X550EM_a(hw); break; case ixgbe_mac_82599_vf: case ixgbe_mac_X540_vf: @@ -203,6 +205,7 @@ s32 ixgbe_set_mac_type(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_X_10G_T: case IXGBE_DEV_ID_X550EM_X_1G_T: case IXGBE_DEV_ID_X550EM_X_SFP: + case IXGBE_DEV_ID_X550EM_X_XFI: hw->mac.type = ixgbe_mac_X550EM_x; hw->mvals = ixgbe_mvals_X550EM_x; break; @@ -1090,7 +1093,7 @@ s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, bool vlvf_bypass) { return ixgbe_call_func(hw, hw->mac.ops.set_vfta, (hw, vlan, vind, - vlan_on, vlvf_bypass), IXGBE_NOT_IMPLEMENTED); + vlan_on, vlvf_bypass), IXGBE_NOT_IMPLEMENTED); } /** @@ -1100,7 +1103,7 @@ s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, * @vind: VMDq output index that maps queue to VLAN id in VLVFB * @vlan_on: boolean flag to turn on/off VLAN in VLVF * @vfta_delta: pointer to the difference between the current value of VFTA - * and the desired value + * and the desired value * @vfta: the desired value of the VFTA * @vlvf_bypass: boolean flag indicating updating the default pool is okay * @@ -1110,7 +1113,7 @@ s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, u32 *vfta_delta, u32 vfta, bool vlvf_bypass) { return ixgbe_call_func(hw, hw->mac.ops.set_vlvf, (hw, vlan, vind, - vlan_on, vfta_delta, vfta, vlvf_bypass), + vlan_on, vfta_delta, vfta, vlvf_bypass), IXGBE_NOT_IMPLEMENTED); } @@ -1145,12 +1148,15 @@ s32 ixgbe_setup_fc(struct ixgbe_hw *hw) * @min: driver minor number to be sent to firmware * @build: driver build number to be sent to firmware * @ver: driver version number to be sent to firmware + * @len: length of driver_ver string + * @driver_ver: driver string **/ s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, - u8 ver) + u8 ver, u16 len, char *driver_ver) { return ixgbe_call_func(hw, hw->mac.ops.set_fw_drv_ver, (hw, maj, min, - build, ver), IXGBE_NOT_IMPLEMENTED); + build, ver, len, driver_ver), + IXGBE_NOT_IMPLEMENTED); } @@ -1659,6 +1665,7 @@ void ixgbe_init_swfw_semaphore(struct ixgbe_hw *hw) hw->mac.ops.init_swfw_sync(hw); } + void ixgbe_disable_rx(struct ixgbe_hw *hw) { if (hw->mac.ops.disable_rx) diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.h index 3aad1da7..af85d4ea 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_api.h @@ -45,6 +45,8 @@ extern s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw); extern s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw); extern s32 ixgbe_init_ops_X550(struct ixgbe_hw *hw); extern s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw); +extern s32 ixgbe_init_ops_X550EM_x(struct ixgbe_hw *hw); +extern s32 ixgbe_init_ops_X550EM_a(struct ixgbe_hw *hw); extern s32 ixgbe_init_ops_vf(struct ixgbe_hw *hw); s32 ixgbe_set_mac_type(struct ixgbe_hw *hw); @@ -131,7 +133,7 @@ s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind, s32 ixgbe_fc_enable(struct ixgbe_hw *hw); s32 ixgbe_setup_fc(struct ixgbe_hw *hw); s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, - u8 ver); + u8 ver, u16 len, char *driver_ver); s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw); s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw); void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr); diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.c index 811875a4..96456678 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.c @@ -113,6 +113,7 @@ s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw) mac->ops.led_off = ixgbe_led_off_generic; mac->ops.blink_led_start = ixgbe_blink_led_start_generic; mac->ops.blink_led_stop = ixgbe_blink_led_stop_generic; + mac->ops.init_led_link_act = ixgbe_init_led_link_act_generic; /* RAR, Multicast, VLAN */ mac->ops.set_rar = ixgbe_set_rar_generic; @@ -168,13 +169,24 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) switch (hw->phy.media_type) { case ixgbe_media_type_fiber_qsfp: case ixgbe_media_type_fiber: - hw->mac.ops.check_link(hw, &speed, &link_up, false); - /* if link is down, assume supported */ - if (link_up) - supported = speed == IXGBE_LINK_SPEED_1GB_FULL ? + /* flow control autoneg black list */ + switch (hw->device_id) { + case IXGBE_DEV_ID_X550EM_A_SFP: + case IXGBE_DEV_ID_X550EM_A_SFP_N: + case IXGBE_DEV_ID_X550EM_A_QSFP: + case IXGBE_DEV_ID_X550EM_A_QSFP_N: + supported = false; + break; + default: + hw->mac.ops.check_link(hw, &speed, &link_up, false); + /* if link is down, assume supported */ + if (link_up) + supported = speed == IXGBE_LINK_SPEED_1GB_FULL ? true : false; - else - supported = true; + else + supported = true; + } + break; case ixgbe_media_type_backplane: supported = true; @@ -188,6 +200,9 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550T: case IXGBE_DEV_ID_X550T1: case IXGBE_DEV_ID_X550EM_X_10G_T: + case IXGBE_DEV_ID_X550EM_A_10G_T: + case IXGBE_DEV_ID_X550EM_A_1G_T: + case IXGBE_DEV_ID_X550EM_A_1G_T_L: supported = true; break; default: @@ -197,9 +212,10 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) break; } - ERROR_REPORT2(IXGBE_ERROR_UNSUPPORTED, - "Device %x does not support flow control autoneg", - hw->device_id); + if (!supported) + ERROR_REPORT2(IXGBE_ERROR_UNSUPPORTED, + "Device %x does not support flow control autoneg", + hw->device_id); return supported; } @@ -371,6 +387,7 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw) { s32 ret_val; u32 ctrl_ext; + u16 device_caps; DEBUGFUNC("ixgbe_start_hw_generic"); @@ -393,14 +410,31 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw) /* Setup flow control */ ret_val = ixgbe_setup_fc(hw); - if (ret_val != IXGBE_SUCCESS) - goto out; + if (ret_val != IXGBE_SUCCESS && ret_val != IXGBE_NOT_IMPLEMENTED) { + DEBUGOUT1("Flow control setup failed, returning %d\n", ret_val); + return ret_val; + } + + /* Cache bit indicating need for crosstalk fix */ + switch (hw->mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X550EM_x: + case ixgbe_mac_X550EM_a: + hw->mac.ops.get_device_caps(hw, &device_caps); + if (device_caps & IXGBE_DEVICE_CAPS_NO_CROSSTALK_WR) + hw->need_crosstalk_fix = false; + else + hw->need_crosstalk_fix = true; + break; + default: + hw->need_crosstalk_fix = false; + break; + } /* Clear adapter stopped flag */ hw->adapter_stopped = false; -out: - return ret_val; + return IXGBE_SUCCESS; } /** @@ -466,6 +500,12 @@ s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw) status = hw->mac.ops.start_hw(hw); } + /* Initialize the LED link active for LED blink support */ + hw->mac.ops.init_led_link_act(hw); + + if (status != IXGBE_SUCCESS) + DEBUGOUT1("Failed to initialize HW, STATUS = %d\n", status); + return status; } @@ -1046,7 +1086,7 @@ void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw) if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP) { hw->eeprom.ops.read(hw, IXGBE_EEPROM_CTRL_4, &ee_ctrl_4); bus->instance_id = (ee_ctrl_4 & IXGBE_EE_CTRL_4_INST_ID) >> - IXGBE_EE_CTRL_4_INST_ID_SHIFT; + IXGBE_EE_CTRL_4_INST_ID_SHIFT; } } @@ -1105,6 +1145,47 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw) } /** + * ixgbe_init_led_link_act_generic - Store the LED index link/activity. + * @hw: pointer to hardware structure + * + * Store the index for the link active LED. This will be used to support + * blinking the LED. + **/ +s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + u32 led_reg, led_mode; + u8 i; + + led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + + /* Get LED link active from the LEDCTL register */ + for (i = 0; i < 4; i++) { + led_mode = led_reg >> IXGBE_LED_MODE_SHIFT(i); + + if ((led_mode & IXGBE_LED_MODE_MASK_BASE) == + IXGBE_LED_LINK_ACTIVE) { + mac->led_link_act = i; + return IXGBE_SUCCESS; + } + } + + /* + * If LEDCTL register does not have the LED link active set, then use + * known MAC defaults. + */ + switch (hw->mac.type) { + case ixgbe_mac_X550EM_a: + case ixgbe_mac_X550EM_x: + mac->led_link_act = 1; + break; + default: + mac->led_link_act = 2; + } + return IXGBE_SUCCESS; +} + +/** * ixgbe_led_on_generic - Turns on the software controllable LEDs. * @hw: pointer to hardware structure * @index: led number to turn on @@ -1115,6 +1196,9 @@ s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index) DEBUGFUNC("ixgbe_led_on_generic"); + if (index > 3) + return IXGBE_ERR_PARAM; + /* To turn on the LED, set mode to ON. */ led_reg &= ~IXGBE_LED_MODE_MASK(index); led_reg |= IXGBE_LED_ON << IXGBE_LED_MODE_SHIFT(index); @@ -1135,6 +1219,9 @@ s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index) DEBUGFUNC("ixgbe_led_off_generic"); + if (index > 3) + return IXGBE_ERR_PARAM; + /* To turn off the LED, set mode to OFF. */ led_reg &= ~IXGBE_LED_MODE_MASK(index); led_reg |= IXGBE_LED_OFF << IXGBE_LED_MODE_SHIFT(index); @@ -2851,7 +2938,7 @@ out: * advertised settings **/ s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, - u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) + u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) { if ((!(adv_reg)) || (!(lp_reg))) { ERROR_REPORT3(IXGBE_ERROR_UNSUPPORTED, @@ -3323,7 +3410,7 @@ s32 prot_autoc_write_generic(struct ixgbe_hw *hw, u32 reg_val, bool locked) **/ s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw) { - int secrxreg; + u32 secrxreg; DEBUGFUNC("ixgbe_enable_sec_rx_path_generic"); @@ -3370,6 +3457,9 @@ s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index) DEBUGFUNC("ixgbe_blink_led_start_generic"); + if (index > 3) + return IXGBE_ERR_PARAM; + /* * Link must be up to auto-blink the LEDs; * Force it if link is down. @@ -3415,6 +3505,10 @@ s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index) DEBUGFUNC("ixgbe_blink_led_stop_generic"); + if (index > 3) + return IXGBE_ERR_PARAM; + + ret_val = hw->mac.ops.prot_autoc_read(hw, &locked, &autoc_reg); if (ret_val != IXGBE_SUCCESS) goto out; @@ -3720,7 +3814,8 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) } /* was that the last pool using this rar? */ - if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0) + if (mpsar_lo == 0 && mpsar_hi == 0 && + rar != 0 && rar != hw->mac.san_mac_rar_index) hw->mac.ops.clear_rar(hw, rar); done: return IXGBE_SUCCESS; @@ -3887,7 +3982,8 @@ s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, vfta_delta = 1 << (vlan % 32); vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regidx)); - /* vfta_delta represents the difference between the current value + /* + * vfta_delta represents the difference between the current value * of vfta and the value we want in the register. Since the diff * is an XOR mask we can just update the vfta using an XOR */ @@ -3920,7 +4016,7 @@ vfta_update: * @vind: VMDq output index that maps queue to VLAN id in VLVFB * @vlan_on: boolean flag to turn on/off VLAN in VLVF * @vfta_delta: pointer to the difference between the current value of VFTA - * and the desired value + * and the desired value * @vfta: the desired value of the VFTA * @vlvf_bypass: boolean flag indicating updating default pool is okay * @@ -3947,6 +4043,7 @@ s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, */ if (!(IXGBE_READ_REG(hw, IXGBE_VT_CTL) & IXGBE_VT_CTL_VT_ENABLE)) return IXGBE_SUCCESS; + vlvf_index = ixgbe_find_vlvf_slot(hw, vlan, vlvf_bypass); if (vlvf_index < 0) return vlvf_index; @@ -3967,7 +4064,7 @@ s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, * we run the risk of stray packets leaking into * the PF via the default pool */ - if (vfta_delta) + if (*vfta_delta) IXGBE_WRITE_REG(hw, IXGBE_VFTA(vlan / 32), vfta); /* disable VLVF and clear remaining bit from pool */ @@ -3976,6 +4073,7 @@ s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, return IXGBE_SUCCESS; } + /* If there are still bits set in the VLVFB registers * for the VLAN ID indicated we need to see if the * caller is requesting that we clear the VFTA entry bit. @@ -4025,6 +4123,32 @@ s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw) } /** + * ixgbe_need_crosstalk_fix - Determine if we need to do cross talk fix + * @hw: pointer to hardware structure + * + * Contains the logic to identify if we need to verify link for the + * crosstalk fix + **/ +static bool ixgbe_need_crosstalk_fix(struct ixgbe_hw *hw) +{ + + /* Does FW say we need the fix */ + if (!hw->need_crosstalk_fix) + return false; + + /* Only consider SFP+ PHYs i.e. media type fiber */ + switch (hw->mac.ops.get_media_type(hw)) { + case ixgbe_media_type_fiber: + case ixgbe_media_type_fiber_qsfp: + break; + default: + return false; + } + + return true; +} + +/** * ixgbe_check_mac_link_generic - Determine link and speed status * @hw: pointer to hardware structure * @speed: pointer to link speed @@ -4041,6 +4165,35 @@ s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed, DEBUGFUNC("ixgbe_check_mac_link_generic"); + /* If Crosstalk fix enabled do the sanity check of making sure + * the SFP+ cage is full. + */ + if (ixgbe_need_crosstalk_fix(hw)) { + u32 sfp_cage_full; + + switch (hw->mac.type) { + case ixgbe_mac_82599EB: + sfp_cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) & + IXGBE_ESDP_SDP2; + break; + case ixgbe_mac_X550EM_x: + case ixgbe_mac_X550EM_a: + sfp_cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) & + IXGBE_ESDP_SDP0; + break; + default: + /* sanity check - No SFP+ devices here */ + sfp_cage_full = false; + break; + } + + if (!sfp_cage_full) { + *link_up = false; + *speed = IXGBE_LINK_SPEED_UNKNOWN; + return IXGBE_SUCCESS; + } + } + /* clear the old state */ links_orig = IXGBE_READ_REG(hw, IXGBE_LINKS); @@ -4082,11 +4235,18 @@ s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed, break; case IXGBE_LINKS_SPEED_100_82599: *speed = IXGBE_LINK_SPEED_100_FULL; - if (hw->mac.type >= ixgbe_mac_X550) { + if (hw->mac.type == ixgbe_mac_X550) { if (links_reg & IXGBE_LINKS_SPEED_NON_STD) *speed = IXGBE_LINK_SPEED_5GB_FULL; } break; + case IXGBE_LINKS_SPEED_10_X550EM_A: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + if (hw->device_id == IXGBE_DEV_ID_X550EM_A_1G_T || + hw->device_id == IXGBE_DEV_ID_X550EM_A_1G_T_L) { + *speed = IXGBE_LINK_SPEED_10_FULL; + } + break; default: *speed = IXGBE_LINK_SPEED_UNKNOWN; } @@ -4318,43 +4478,31 @@ u8 ixgbe_calculate_checksum(u8 *buffer, u32 length) } /** - * ixgbe_host_interface_command - Issue command to manageability block + * ixgbe_hic_unlocked - Issue command to manageability block unlocked * @hw: pointer to the HW structure - * @buffer: contains the command to write and where the return status will - * be placed + * @buffer: command to write and where the return status will be placed * @length: length of buffer, must be multiple of 4 bytes * @timeout: time in ms to wait for command completion - * @return_data: read and return data from the buffer (true) or not (false) - * Needed because FW structures are big endian and decoding of - * these fields can be 8 bit or 16 bit based on command. Decoding - * is not easily understood without making a table of commands. - * So we will leave this up to the caller to read back the data - * in these cases. * * Communicates with the manageability block. On success return IXGBE_SUCCESS * else returns semaphore error when encountering an error acquiring * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + * + * This function assumes that the IXGBE_GSSR_SW_MNG_SM semaphore is held + * by the caller. **/ -s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, - u32 length, u32 timeout, bool return_data) +s32 ixgbe_hic_unlocked(struct ixgbe_hw *hw, u32 *buffer, u32 length, + u32 timeout) { - u32 hicr, i, bi, fwsts; - u32 hdr_size = sizeof(struct ixgbe_hic_hdr); - u16 buf_len; + u32 hicr, i, fwsts; u16 dword_len; - s32 status; - DEBUGFUNC("ixgbe_host_interface_command"); + DEBUGFUNC("ixgbe_hic_unlocked"); - if (length == 0 || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { + if (!length || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { DEBUGOUT1("Buffer length failure buffersize=%d.\n", length); return IXGBE_ERR_HOST_INTERFACE_COMMAND; } - /* Take management host interface semaphore */ - status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM); - - if (status) - return status; /* Set bit 9 of FWSTS clearing FW reset indication */ fwsts = IXGBE_READ_REG(hw, IXGBE_FWSTS); @@ -4362,17 +4510,15 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, /* Check that the host interface is enabled. */ hicr = IXGBE_READ_REG(hw, IXGBE_HICR); - if ((hicr & IXGBE_HICR_EN) == 0) { + if (!(hicr & IXGBE_HICR_EN)) { DEBUGOUT("IXGBE_HOST_EN bit disabled.\n"); - status = IXGBE_ERR_HOST_INTERFACE_COMMAND; - goto rel_out; + return IXGBE_ERR_HOST_INTERFACE_COMMAND; } /* Calculate length in DWORDs. We must be DWORD aligned */ - if ((length % (sizeof(u32))) != 0) { + if (length % sizeof(u32)) { DEBUGOUT("Buffer length failure, not aligned to dword"); - status = IXGBE_ERR_INVALID_ARGUMENT; - goto rel_out; + return IXGBE_ERR_INVALID_ARGUMENT; } dword_len = length >> 2; @@ -4395,14 +4541,59 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, } /* Check command completion */ - if ((timeout != 0 && i == timeout) || + if ((timeout && i == timeout) || !(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV)) { ERROR_REPORT1(IXGBE_ERROR_CAUTION, "Command has failed with no status valid.\n"); - status = IXGBE_ERR_HOST_INTERFACE_COMMAND; - goto rel_out; + return IXGBE_ERR_HOST_INTERFACE_COMMAND; } + return IXGBE_SUCCESS; +} + +/** + * ixgbe_host_interface_command - Issue command to manageability block + * @hw: pointer to the HW structure + * @buffer: contains the command to write and where the return status will + * be placed + * @length: length of buffer, must be multiple of 4 bytes + * @timeout: time in ms to wait for command completion + * @return_data: read and return data from the buffer (true) or not (false) + * Needed because FW structures are big endian and decoding of + * these fields can be 8 bit or 16 bit based on command. Decoding + * is not easily understood without making a table of commands. + * So we will leave this up to the caller to read back the data + * in these cases. + * + * Communicates with the manageability block. On success return IXGBE_SUCCESS + * else returns semaphore error when encountering an error acquiring + * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + **/ +s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, + u32 length, u32 timeout, bool return_data) +{ + u32 hdr_size = sizeof(struct ixgbe_hic_hdr); + u16 dword_len; + u16 buf_len; + s32 status; + u32 bi; + + DEBUGFUNC("ixgbe_host_interface_command"); + + if (length == 0 || length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { + DEBUGOUT1("Buffer length failure buffersize=%d.\n", length); + return IXGBE_ERR_HOST_INTERFACE_COMMAND; + } + + /* Take management host interface semaphore */ + status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM); + if (status) + return status; + + status = ixgbe_hic_unlocked(hw, buffer, length, timeout); + if (status) + goto rel_out; + if (!return_data) goto rel_out; @@ -4417,7 +4608,7 @@ s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, /* If there is any thing in data position pull it in */ buf_len = ((struct ixgbe_hic_hdr *)buffer)->buf_len; - if (buf_len == 0) + if (!buf_len) goto rel_out; if (length < buf_len + hdr_size) { @@ -4455,13 +4646,15 @@ rel_out: * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. **/ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, - u8 build, u8 sub) + u8 build, u8 sub, u16 len, + const char *driver_ver) { struct ixgbe_hic_drv_info fw_cmd; int i; s32 ret_val = IXGBE_SUCCESS; DEBUGFUNC("ixgbe_set_fw_drv_ver_generic"); + UNREFERENCED_2PARAMETER(len, driver_ver); fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO; fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN; @@ -4923,14 +5116,6 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, speedcnt++; highest_link_speed = IXGBE_LINK_SPEED_10GB_FULL; - /* If we already have link at this speed, just jump out */ - status = ixgbe_check_link(hw, &link_speed, &link_up, false); - if (status != IXGBE_SUCCESS) - return status; - - if ((link_speed == IXGBE_LINK_SPEED_10GB_FULL) && link_up) - goto out; - /* Set the module link speed */ switch (hw->phy.media_type) { case ixgbe_media_type_fiber: @@ -4981,14 +5166,6 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, if (highest_link_speed == IXGBE_LINK_SPEED_UNKNOWN) highest_link_speed = IXGBE_LINK_SPEED_1GB_FULL; - /* If we already have link at this speed, just jump out */ - status = ixgbe_check_link(hw, &link_speed, &link_up, false); - if (status != IXGBE_SUCCESS) - return status; - - if ((link_speed == IXGBE_LINK_SPEED_1GB_FULL) && link_up) - goto out; - /* Set the module link speed */ switch (hw->phy.media_type) { case ixgbe_media_type_fiber: diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.h index 0545f85c..903f34d5 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_common.h @@ -72,6 +72,7 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw); s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index); s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw); s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw); s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data); @@ -133,7 +134,7 @@ s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq); s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq); s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw); s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, - u32 vind, bool vlan_on, bool vlvf_bypass); + u32 vind, bool vlan_on, bool vlvf_bypass); s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, u32 *vfta_delta, u32 vfta, bool vlvf_bypass); @@ -155,11 +156,14 @@ void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom, int strategy); void ixgbe_enable_relaxed_ordering_gen2(struct ixgbe_hw *hw); s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, - u8 build, u8 ver); + u8 build, u8 ver, u16 len, const char *str); u8 ixgbe_calculate_checksum(u8 *buffer, u32 length); s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, u32 length, u32 timeout, bool return_data); - +s32 ixgbe_hic_unlocked(struct ixgbe_hw *, u32 *buffer, u32 length, u32 timeout); +s32 ixgbe_shutdown_fw_phy(struct ixgbe_hw *); +s32 ixgbe_fw_phy_activity(struct ixgbe_hw *, u16 activity, + u32 (*data)[FW_PHY_ACT_DATA_COUNT]); void ixgbe_clear_tx_pending(struct ixgbe_hw *hw); extern s32 ixgbe_reset_pipeline_82599(struct ixgbe_hw *hw); diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c new file mode 100644 index 00000000..47143a26 --- /dev/null +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.c @@ -0,0 +1,240 @@ +/******************************************************************************* + +Copyright (c) 2001-2015, Intel Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ + +#include "ixgbe_vf.h" +#include "ixgbe_hv_vf.h" + +/** + * Hyper-V variant - just a stub. + */ +static s32 ixgbevf_hv_update_mc_addr_list_vf(struct ixgbe_hw *hw, u8 *mc_addr_list, + u32 mc_addr_count, ixgbe_mc_addr_itr next, + bool clear) +{ + UNREFERENCED_5PARAMETER(hw, mc_addr_list, mc_addr_count, next, clear); + + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; +} + +/** + * Hyper-V variant - just a stub. + */ +static s32 ixgbevf_hv_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode) +{ + UNREFERENCED_2PARAMETER(hw, xcast_mode); + + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; +} + +/** + * Hyper-V variant - just a stub. + */ +static s32 ixgbevf_hv_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind, + bool vlan_on, bool vlvf_bypass) +{ + UNREFERENCED_5PARAMETER(hw, vlan, vind, vlan_on, vlvf_bypass); + + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; +} + +static s32 ixgbevf_hv_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr) +{ + UNREFERENCED_3PARAMETER(hw, index, addr); + + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; +} + +/** + * Hyper-V variant - just a stub. + */ +static s32 ixgbevf_hv_reset_hw_vf(struct ixgbe_hw *hw) +{ + UNREFERENCED_PARAMETER(hw); + + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; +} + +/** + * Hyper-V variant - just a stub. + */ +static s32 ixgbevf_hv_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vlan, u32 vind) +{ + UNREFERENCED_5PARAMETER(hw, index, addr, vlan, vind); + + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; +} + +/** + * Hyper-V variant; there is no mailbox communication. + */ +static s32 ixgbevf_hv_check_mac_link_vf(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *link_up, + bool autoneg_wait_to_complete) +{ + struct ixgbe_mbx_info *mbx = &hw->mbx; + struct ixgbe_mac_info *mac = &hw->mac; + u32 links_reg; + UNREFERENCED_1PARAMETER(autoneg_wait_to_complete); + + /* If we were hit with a reset drop the link */ + if (!mbx->ops.check_for_rst(hw, 0) || !mbx->timeout) + mac->get_link_status = true; + + if (!mac->get_link_status) + goto out; + + /* if link status is down no point in checking to see if pf is up */ + links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); + if (!(links_reg & IXGBE_LINKS_UP)) + goto out; + + /* for SFP+ modules and DA cables on 82599 it can take up to 500usecs + * before the link status is correct + */ + if (mac->type == ixgbe_mac_82599_vf) { + int i; + + for (i = 0; i < 5; i++) { + DELAY(100); + links_reg = IXGBE_READ_REG(hw, IXGBE_VFLINKS); + + if (!(links_reg & IXGBE_LINKS_UP)) + goto out; + } + } + + switch (links_reg & IXGBE_LINKS_SPEED_82599) { + case IXGBE_LINKS_SPEED_10G_82599: + *speed = IXGBE_LINK_SPEED_10GB_FULL; + if (hw->mac.type >= ixgbe_mac_X550) { + if (links_reg & IXGBE_LINKS_SPEED_NON_STD) + *speed = IXGBE_LINK_SPEED_2_5GB_FULL; + } + break; + case IXGBE_LINKS_SPEED_1G_82599: + *speed = IXGBE_LINK_SPEED_1GB_FULL; + break; + case IXGBE_LINKS_SPEED_100_82599: + *speed = IXGBE_LINK_SPEED_100_FULL; + if (hw->mac.type == ixgbe_mac_X550) { + if (links_reg & IXGBE_LINKS_SPEED_NON_STD) + *speed = IXGBE_LINK_SPEED_5GB_FULL; + } + break; + case IXGBE_LINKS_SPEED_10_X550EM_A: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + /* Reserved for pre-x550 devices */ + if (hw->mac.type >= ixgbe_mac_X550) + *speed = IXGBE_LINK_SPEED_10_FULL; + break; + default: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + } + + /* if we passed all the tests above then the link is up and we no + * longer need to check for link + */ + mac->get_link_status = false; + +out: + *link_up = !mac->get_link_status; + return IXGBE_SUCCESS; +} + +/** + * ixgbevf_hv_set_rlpml_vf - Set the maximum receive packet length + * @hw: pointer to the HW structure + * @max_size: value to assign to max frame size + * Hyper-V variant. + **/ +static s32 ixgbevf_hv_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size) +{ + u32 reg; + + /* If we are on Hyper-V, we implement this functionality + * differently. + */ + reg = IXGBE_READ_REG(hw, IXGBE_VFRXDCTL(0)); + /* CRC == 4 */ + reg |= ((max_size + 4) | IXGBE_RXDCTL_RLPML_EN); + IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(0), reg); + + return IXGBE_SUCCESS; +} + +/** + * ixgbevf_hv_negotiate_api_version_vf - Negotiate supported API version + * @hw: pointer to the HW structure + * @api: integer containing requested API version + * Hyper-V version - only ixgbe_mbox_api_10 supported. + **/ +static int ixgbevf_hv_negotiate_api_version_vf(struct ixgbe_hw *hw, int api) +{ + UNREFERENCED_1PARAMETER(hw); + + /* Hyper-V only supports api version ixgbe_mbox_api_10 */ + if (api != ixgbe_mbox_api_10) + return IXGBE_ERR_INVALID_ARGUMENT; + + return IXGBE_SUCCESS; +} + +/** + * ixgbevf_hv_init_ops_vf - Initialize the pointers for vf + * @hw: pointer to hardware structure + * + * This will assign function pointers, adapter-specific functions can + * override the assignment of generic function pointers by assigning + * their own adapter-specific function pointers. + * Does not touch the hardware. + **/ +s32 ixgbevf_hv_init_ops_vf(struct ixgbe_hw *hw) +{ + /* Set defaults for VF then override applicable Hyper-V + * specific functions + */ + ixgbe_init_ops_vf(hw); + + hw->mac.ops.reset_hw = ixgbevf_hv_reset_hw_vf; + hw->mac.ops.check_link = ixgbevf_hv_check_mac_link_vf; + hw->mac.ops.negotiate_api_version = ixgbevf_hv_negotiate_api_version_vf; + hw->mac.ops.set_rar = ixgbevf_hv_set_rar_vf; + hw->mac.ops.update_mc_addr_list = ixgbevf_hv_update_mc_addr_list_vf; + hw->mac.ops.update_xcast_mode = ixgbevf_hv_update_xcast_mode; + hw->mac.ops.set_uc_addr = ixgbevf_hv_set_uc_addr_vf; + hw->mac.ops.set_vfta = ixgbevf_hv_set_vfta_vf; + hw->mac.ops.set_rlpml = ixgbevf_hv_set_rlpml_vf; + + return IXGBE_SUCCESS; +} diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h new file mode 100644 index 00000000..9119f29f --- /dev/null +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_hv_vf.h @@ -0,0 +1,41 @@ +/******************************************************************************* + +Copyright (c) 2001-2016, Intel Corporation +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +***************************************************************************/ + +#ifndef _IXGBE_HV_VF_H_ +#define _IXGBE_HV_VF_H_ + +#include "ixgbe_type.h" + +s32 ixgbevf_hv_init_ops_vf(struct ixgbe_hw *hw); + +#endif /* _IXGBE_HV_VF_H_ */ diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h index d775142d..7556a818 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_mbx.h @@ -90,6 +90,7 @@ enum ixgbe_pfvf_api_rev { ixgbe_mbox_api_20, /* API version 2.0, solaris Phase1 VF driver */ ixgbe_mbox_api_11, /* API version 1.1, linux/freebsd VF driver */ ixgbe_mbox_api_12, /* API version 1.2, linux/freebsd VF driver */ + ixgbe_mbox_api_13, /* API version 1.3, linux/freebsd VF driver */ /* This value should always be last */ ixgbe_mbox_api_unknown, /* indicates that API version is not known */ }; @@ -109,9 +110,9 @@ enum ixgbe_pfvf_api_rev { #define IXGBE_VF_GET_QUEUES 0x09 /* get queue configuration */ /* mailbox API, version 1.2 VF requests */ -#define IXGBE_VF_GET_RETA 0x0a /* VF request for RETA */ -#define IXGBE_VF_GET_RSS_KEY 0x0b /* get RSS key */ -#define IXGBE_VF_UPDATE_XCAST_MODE 0x0C +#define IXGBE_VF_GET_RETA 0x0a /* VF request for RETA */ +#define IXGBE_VF_GET_RSS_KEY 0x0b /* get RSS key */ +#define IXGBE_VF_UPDATE_XCAST_MODE 0x0c /* GET_QUEUES return data indices within the mailbox */ #define IXGBE_VF_TX_QUEUES 1 /* number of Tx queues supported */ diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h index 06d1ee1c..4aab278d 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_osdep.h @@ -44,6 +44,7 @@ #include <rte_cycles.h> #include <rte_log.h> #include <rte_byteorder.h> +#include <rte_io.h> #include "../ixgbe_logs.h" #include "../ixgbe_bypass_defines.h" @@ -81,6 +82,7 @@ #define UNREFERENCED_2PARAMETER(_p, _q) #define UNREFERENCED_3PARAMETER(_p, _q, _r) #define UNREFERENCED_4PARAMETER(_p, _q, _r, _s) +#define UNREFERENCED_5PARAMETER(_p, _q, _r, _s, _t) /* Shared code error reporting */ enum { @@ -95,11 +97,13 @@ enum { #define STATIC static #define IXGBE_NTOHL(_i) rte_be_to_cpu_32(_i) #define IXGBE_NTOHS(_i) rte_be_to_cpu_16(_i) +#define IXGBE_CPU_TO_LE16(_i) rte_cpu_to_le_16(_i) #define IXGBE_CPU_TO_LE32(_i) rte_cpu_to_le_32(_i) -#define IXGBE_LE32_TO_CPU(_i) rte_le_to_cpu_32(_i) +#define IXGBE_LE32_TO_CPU(_i) rte_le_to_cpu_32(_i) #define IXGBE_LE32_TO_CPUS(_i) rte_le_to_cpu_32(_i) #define IXGBE_CPU_TO_BE16(_i) rte_cpu_to_be_16(_i) #define IXGBE_CPU_TO_BE32(_i) rte_cpu_to_be_32(_i) +#define IXGBE_BE32_TO_CPU(_i) rte_be_to_cpu_32(_i) typedef uint8_t u8; typedef int8_t s8; @@ -120,16 +124,18 @@ typedef int bool; #define prefetch(x) rte_prefetch0(x) -#define IXGBE_PCI_REG(reg) (*((volatile uint32_t *)(reg))) +#define IXGBE_PCI_REG(reg) rte_read32(reg) static inline uint32_t ixgbe_read_addr(volatile void* addr) { return rte_le_to_cpu_32(IXGBE_PCI_REG(addr)); } -#define IXGBE_PCI_REG_WRITE(reg, value) do { \ - IXGBE_PCI_REG((reg)) = (rte_cpu_to_le_32(value)); \ -} while(0) +#define IXGBE_PCI_REG_WRITE(reg, value) \ + rte_write32((rte_cpu_to_le_32(value)), reg) + +#define IXGBE_PCI_REG_WRITE_RELAXED(reg, value) \ + rte_write32_relaxed((rte_cpu_to_le_32(value)), reg) #define IXGBE_PCI_REG_ADDR(hw, reg) \ ((volatile uint32_t *)((char *)(hw)->hw_addr + (reg))) diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c index ed1b14f3..c9538056 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.c @@ -113,7 +113,7 @@ s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, u16 reg, u16 *val, bool lock) { u32 swfw_mask = hw->phy.phy_semaphore_mask; - int max_retry = 10; + int max_retry = 3; int retry = 0; u8 csum_byte; u8 high_bits; @@ -121,8 +121,6 @@ s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr, u16 reg, u8 reg_high; u8 csum; - if (hw->mac.type >= ixgbe_mac_X550) - max_retry = 3; reg_high = ((reg >> 7) & 0xFE) | 1; /* Indicate read combined */ csum = ixgbe_ones_comp_byte_add(reg_high, reg & 0xFF); csum = ~csum; @@ -283,6 +281,42 @@ s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw) } /** + * ixgbe_probe_phy - Probe a single address for a PHY + * @hw: pointer to hardware structure + * @phy_addr: PHY address to probe + * + * Returns true if PHY found + */ +static bool ixgbe_probe_phy(struct ixgbe_hw *hw, u16 phy_addr) +{ + u16 ext_ability = 0; + + if (!ixgbe_validate_phy_addr(hw, phy_addr)) { + DEBUGOUT1("Unable to validate PHY address 0x%04X\n", + phy_addr); + return false; + } + + if (ixgbe_get_phy_id(hw)) + return false; + + hw->phy.type = ixgbe_get_phy_type_from_id(hw->phy.id); + + if (hw->phy.type == ixgbe_phy_unknown) { + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability); + if (ext_ability & + (IXGBE_MDIO_PHY_10GBASET_ABILITY | + IXGBE_MDIO_PHY_1000BASET_ABILITY)) + hw->phy.type = ixgbe_phy_cu_unknown; + else + hw->phy.type = ixgbe_phy_generic; + } + + return true; +} + +/** * ixgbe_identify_phy_generic - Get physical layer module * @hw: pointer to hardware structure * @@ -291,8 +325,7 @@ s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw) s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) { s32 status = IXGBE_ERR_PHY_ADDR_INVALID; - u32 phy_addr; - u16 ext_ability = 0; + u16 phy_addr; DEBUGFUNC("ixgbe_identify_phy_generic"); @@ -303,45 +336,33 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM; } - if (hw->phy.type == ixgbe_phy_unknown) { - for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) { - if (ixgbe_validate_phy_addr(hw, phy_addr)) { - hw->phy.addr = phy_addr; - ixgbe_get_phy_id(hw); - hw->phy.type = - ixgbe_get_phy_type_from_id(hw->phy.id); - - if (hw->phy.type == ixgbe_phy_unknown) { - hw->phy.ops.read_reg(hw, - IXGBE_MDIO_PHY_EXT_ABILITY, - IXGBE_MDIO_PMA_PMD_DEV_TYPE, - &ext_ability); - if (ext_ability & - (IXGBE_MDIO_PHY_10GBASET_ABILITY | - IXGBE_MDIO_PHY_1000BASET_ABILITY)) - hw->phy.type = - ixgbe_phy_cu_unknown; - else - hw->phy.type = - ixgbe_phy_generic; - } + if (hw->phy.type != ixgbe_phy_unknown) + return IXGBE_SUCCESS; - status = IXGBE_SUCCESS; - break; - } - } + if (hw->phy.nw_mng_if_sel) { + phy_addr = (hw->phy.nw_mng_if_sel & + IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >> + IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; + if (ixgbe_probe_phy(hw, phy_addr)) + return IXGBE_SUCCESS; + else + return IXGBE_ERR_PHY_ADDR_INVALID; + } - /* Certain media types do not have a phy so an address will not - * be found and the code will take this path. Caller has to - * decide if it is an error or not. - */ - if (status != IXGBE_SUCCESS) { - hw->phy.addr = 0; + for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) { + if (ixgbe_probe_phy(hw, phy_addr)) { + status = IXGBE_SUCCESS; + break; } - } else { - status = IXGBE_SUCCESS; } + /* Certain media types do not have a phy so an address will not + * be found and the code will take this path. Caller has to + * decide if it is an error or not. + */ + if (status != IXGBE_SUCCESS) + hw->phy.addr = 0; + return status; } @@ -393,6 +414,8 @@ bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr) if (phy_id != 0xFFFF && phy_id != 0x0) valid = true; + DEBUGOUT1("PHY ID HIGH is 0x%04X\n", phy_id); + return valid; } @@ -421,6 +444,9 @@ s32 ixgbe_get_phy_id(struct ixgbe_hw *hw) hw->phy.id |= (u32)(phy_id_low & IXGBE_PHY_REVISION_MASK); hw->phy.revision = (u32)(phy_id_low & ~IXGBE_PHY_REVISION_MASK); } + DEBUGOUT2("PHY_ID_HIGH 0x%04X, PHY_ID_LOW 0x%04X\n", + phy_id_high, phy_id_low); + return status; } @@ -439,7 +465,6 @@ enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) case TN1010_PHY_ID: phy_type = ixgbe_phy_tn; break; - case X550_PHY_ID1: case X550_PHY_ID2: case X550_PHY_ID3: case X540_PHY_ID: @@ -452,10 +477,12 @@ enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) phy_type = ixgbe_phy_nl; break; case X557_PHY_ID: + case X557_PHY_ID2: phy_type = ixgbe_phy_x550em_ext_t; break; case IXGBE_M88E1500_E_PHY_ID: - phy_type = ixgbe_phy_m88; + case IXGBE_M88E1543_E_PHY_ID: + phy_type = ixgbe_phy_ext_1g_t; break; default: phy_type = ixgbe_phy_unknown; @@ -506,11 +533,30 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) */ for (i = 0; i < 30; i++) { msec_delay(100); - hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, - IXGBE_MDIO_PHY_XS_DEV_TYPE, &ctrl); - if (!(ctrl & IXGBE_MDIO_PHY_XS_RESET)) { - usec_delay(2); - break; + if (hw->phy.type == ixgbe_phy_x550em_ext_t) { + status = hw->phy.ops.read_reg(hw, + IXGBE_MDIO_TX_VENDOR_ALARMS_3, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, + &ctrl); + if (status != IXGBE_SUCCESS) + return status; + + if (ctrl & IXGBE_MDIO_TX_VENDOR_ALARMS_3_RST_MASK) { + usec_delay(2); + break; + } + } else { + status = hw->phy.ops.read_reg(hw, + IXGBE_MDIO_PHY_XS_CONTROL, + IXGBE_MDIO_PHY_XS_DEV_TYPE, + &ctrl); + if (status != IXGBE_SUCCESS) + return status; + + if (!(ctrl & IXGBE_MDIO_PHY_XS_RESET)) { + usec_delay(2); + break; + } } } @@ -532,7 +578,7 @@ out: * @phy_data: Pointer to read data from PHY register **/ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, - u16 *phy_data) + u16 *phy_data) { u32 i, data, command; @@ -554,12 +600,13 @@ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, command = IXGBE_READ_REG(hw, IXGBE_MSCA); if ((command & IXGBE_MSCA_MDI_COMMAND) == 0) - break; + break; } if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { ERROR_REPORT1(IXGBE_ERROR_POLLING, "PHY address command did not complete.\n"); + DEBUGOUT("PHY address command did not complete, returning IXGBE_ERR_PHY\n"); return IXGBE_ERR_PHY; } @@ -589,6 +636,7 @@ s32 ixgbe_read_phy_reg_mdi(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { ERROR_REPORT1(IXGBE_ERROR_POLLING, "PHY read command didn't complete\n"); + DEBUGOUT("PHY read command didn't complete, returning IXGBE_ERR_PHY\n"); return IXGBE_ERR_PHY; } @@ -719,7 +767,7 @@ s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, DEBUGFUNC("ixgbe_write_phy_reg_generic"); if (hw->mac.ops.acquire_swfw_sync(hw, gssr) == IXGBE_SUCCESS) { - status = ixgbe_write_phy_reg_mdi(hw, reg_addr, device_type, + status = hw->phy.ops.write_reg_mdi(hw, reg_addr, device_type, phy_data); hw->mac.ops.release_swfw_sync(hw, gssr); } else { @@ -746,91 +794,63 @@ s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw) ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg); - if (speed & IXGBE_LINK_SPEED_10GB_FULL) { - /* Set or unset auto-negotiation 10G advertisement */ - hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); + /* Set or unset auto-negotiation 10G advertisement */ + hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); - autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE; - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) - autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE; + autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE; + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) && + (speed & IXGBE_LINK_SPEED_10GB_FULL)) + autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE; - hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } + hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); - if (hw->mac.type == ixgbe_mac_X550) { - if (speed & IXGBE_LINK_SPEED_5GB_FULL) { - /* Set or unset auto-negotiation 5G advertisement */ - hw->phy.ops.read_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); - - autoneg_reg &= ~IXGBE_MII_5GBASE_T_ADVERTISE; - if (hw->phy.autoneg_advertised & - IXGBE_LINK_SPEED_5GB_FULL) - autoneg_reg |= IXGBE_MII_5GBASE_T_ADVERTISE; - - hw->phy.ops.write_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } + hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); - if (speed & IXGBE_LINK_SPEED_2_5GB_FULL) { - /* Set or unset auto-negotiation 2.5G advertisement */ - hw->phy.ops.read_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); - - autoneg_reg &= ~IXGBE_MII_2_5GBASE_T_ADVERTISE; - if (hw->phy.autoneg_advertised & - IXGBE_LINK_SPEED_2_5GB_FULL) - autoneg_reg |= IXGBE_MII_2_5GBASE_T_ADVERTISE; - - hw->phy.ops.write_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } + if (hw->mac.type == ixgbe_mac_X550) { + /* Set or unset auto-negotiation 5G advertisement */ + autoneg_reg &= ~IXGBE_MII_5GBASE_T_ADVERTISE; + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_5GB_FULL) && + (speed & IXGBE_LINK_SPEED_5GB_FULL)) + autoneg_reg |= IXGBE_MII_5GBASE_T_ADVERTISE; + + /* Set or unset auto-negotiation 2.5G advertisement */ + autoneg_reg &= ~IXGBE_MII_2_5GBASE_T_ADVERTISE; + if ((hw->phy.autoneg_advertised & + IXGBE_LINK_SPEED_2_5GB_FULL) && + (speed & IXGBE_LINK_SPEED_2_5GB_FULL)) + autoneg_reg |= IXGBE_MII_2_5GBASE_T_ADVERTISE; } - if (speed & IXGBE_LINK_SPEED_1GB_FULL) { - /* Set or unset auto-negotiation 1G advertisement */ - hw->phy.ops.read_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); + /* Set or unset auto-negotiation 1G advertisement */ + autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE; + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) && + (speed & IXGBE_LINK_SPEED_1GB_FULL)) + autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE; - autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE; - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) - autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE; + hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); - hw->phy.ops.write_reg(hw, - IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } + /* Set or unset auto-negotiation 100M advertisement */ + hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); - if (speed & IXGBE_LINK_SPEED_100_FULL) { - /* Set or unset auto-negotiation 100M advertisement */ - hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_reg); + autoneg_reg &= ~(IXGBE_MII_100BASE_T_ADVERTISE | + IXGBE_MII_100BASE_T_ADVERTISE_HALF); + if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) && + (speed & IXGBE_LINK_SPEED_100_FULL)) + autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE; - autoneg_reg &= ~(IXGBE_MII_100BASE_T_ADVERTISE | - IXGBE_MII_100BASE_T_ADVERTISE_HALF); - if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) - autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE; - - hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_reg); - } + hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); /* Blocked by MNG FW so don't reset PHY */ if (ixgbe_check_reset_blocked(hw)) @@ -882,6 +902,9 @@ s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw, if (speed & IXGBE_LINK_SPEED_100_FULL) hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL; + if (speed & IXGBE_LINK_SPEED_10_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10_FULL; + /* Setup link based on the new speed settings */ ixgbe_setup_phy_link(hw); @@ -919,6 +942,7 @@ static s32 ixgbe_get_copper_speeds_supported(struct ixgbe_hw *hw) hw->phy.speeds_supported |= IXGBE_LINK_SPEED_5GB_FULL; break; case ixgbe_mac_X550EM_x: + case ixgbe_mac_X550EM_a: hw->phy.speeds_supported &= ~IXGBE_LINK_SPEED_100_FULL; break; default: diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h index 281f9faf..820d4712 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_phy.h @@ -92,8 +92,9 @@ POSSIBILITY OF SUCH DAMAGE. #define IXGBE_CS4227_GLOBAL_ID_MSB 1 #define IXGBE_CS4227_SCRATCH 2 #define IXGBE_CS4227_GLOBAL_ID_VALUE 0x03E5 -#define IXGBE_CS4223_PHY_ID 0x7003/* Quad port */ -#define IXGBE_CS4227_PHY_ID 0x3003/* Dual port */ +#define IXGBE_CS4227_EFUSE_PDF_SKU 0x19F +#define IXGBE_CS4223_SKU_ID 0x0010 /* Quad port */ +#define IXGBE_CS4227_SKU_ID 0x0014 /* Dual port */ #define IXGBE_CS4227_RESET_PENDING 0x1357 #define IXGBE_CS4227_RESET_COMPLETE 0x5AA5 #define IXGBE_CS4227_RETRIES 15 diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_type.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_type.h index 83818a96..bb1f85b0 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_type.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_type.h @@ -105,11 +105,11 @@ POSSIBILITY OF SUCH DAMAGE. #define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 #define IXGBE_SUBDEV_ID_82599_ECNA_DP 0x0470 #define IXGBE_SUBDEV_ID_82599_SP_560FLR 0x211B -#define IXGBE_SUBDEV_ID_82599_LOM_SFP 0x8976 #define IXGBE_SUBDEV_ID_82599_LOM_SNAP6 0x2159 #define IXGBE_SUBDEV_ID_82599_SFP_1OCP 0x000D #define IXGBE_SUBDEV_ID_82599_SFP_2OCP 0x0008 -#define IXGBE_SUBDEV_ID_82599_SFP_LOM 0x06EE +#define IXGBE_SUBDEV_ID_82599_SFP_LOM_OEM1 0x8976 +#define IXGBE_SUBDEV_ID_82599_SFP_LOM_OEM2 0x06EE #define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A #define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529 #define IXGBE_DEV_ID_82599_SFP_EM 0x1507 @@ -146,6 +146,7 @@ POSSIBILITY OF SUCH DAMAGE. #define IXGBE_DEV_ID_X550EM_X_SFP 0x15AC #define IXGBE_DEV_ID_X550EM_X_10G_T 0x15AD #define IXGBE_DEV_ID_X550EM_X_1G_T 0x15AE +#define IXGBE_DEV_ID_X550EM_X_XFI 0x15B0 #define IXGBE_DEV_ID_X550_VF_HV 0x1564 #define IXGBE_DEV_ID_X550_VF 0x1565 #define IXGBE_DEV_ID_X550EM_A_VF 0x15C5 @@ -565,6 +566,13 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_PROXYFC 0x05F64 /* Proxying Filter Control Register */ #define IXGBE_VXLANCTRL 0x0000507C /* Rx filter VXLAN UDPPORT Register */ +/* masks for accessing VXLAN and GENEVE UDP ports */ +#define IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK 0x0000ffff /* VXLAN port */ +#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK 0xffff0000 /* GENEVE port */ +#define IXGBE_VXLANCTRL_ALL_UDPPORT_MASK 0xffffffff /* GENEVE/VXLAN */ + +#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT 16 + #define IXGBE_FHFT(_n) (0x09000 + ((_n) * 0x100)) /* Flex host filter table */ /* Ext Flexible Host Filter Table */ #define IXGBE_FHFT_EXT(_n) (0x09800 + ((_n) * 0x100)) @@ -1038,7 +1046,7 @@ struct ixgbe_dmac_config { #define IXGBE_FTFT 0x09400 /* 0x9400-0x97FC */ #define IXGBE_METF(_i) (0x05190 + ((_i) * 4)) /* 4 of these (0-3) */ #define IXGBE_MDEF_EXT(_i) (0x05160 + ((_i) * 4)) /* 8 of these (0-7) */ -#define IXGBE_LSWFW 0x15014 +#define IXGBE_LSWFW 0x15F14 #define IXGBE_BMCIP(_i) (0x05050 + ((_i) * 4)) /* 0x5050-0x505C */ #define IXGBE_BMCIPVAL 0x05060 #define IXGBE_BMCIP_IPADDR_TYPE 0x00000001 @@ -1640,17 +1648,17 @@ struct ixgbe_dmac_config { #define TN1010_PHY_ID 0x00A19410 #define TNX_FW_REV 0xB #define X540_PHY_ID 0x01540200 -#define X550_PHY_ID1 0x01540220 #define X550_PHY_ID2 0x01540223 #define X550_PHY_ID3 0x01540221 #define X557_PHY_ID 0x01540240 +#define X557_PHY_ID2 0x01540250 #define AQ_FW_REV 0x20 #define QT2022_PHY_ID 0x0043A400 #define ATH_PHY_ID 0x03429050 /* PHY Types */ -#define IXGBE_M88E1500_E_PHY_ID 0x01410DD0 -#define IXGBE_M88E1543_E_PHY_ID 0x01410EA0 +#define IXGBE_M88E1500_E_PHY_ID 0x01410DD0 +#define IXGBE_M88E1543_E_PHY_ID 0x01410EA0 /* Special PHY Init Routine */ #define IXGBE_PHY_INIT_OFFSET_NL 0x002B @@ -1765,6 +1773,8 @@ enum { #define IXGBE_VT_CTL_POOL_MASK (0x3F << IXGBE_VT_CTL_POOL_SHIFT) /* VMOLR bitmasks */ +#define IXGBE_VMOLR_UPE 0x00400000 /* unicast promiscuous */ +#define IXGBE_VMOLR_VPE 0x00800000 /* VLAN promiscuous */ #define IXGBE_VMOLR_AUPE 0x01000000 /* accept untagged packets */ #define IXGBE_VMOLR_ROMPE 0x02000000 /* accept packets in MTA tbl */ #define IXGBE_VMOLR_ROPE 0x04000000 /* accept packets in UC tbl */ @@ -2203,6 +2213,7 @@ enum { #define IXGBE_LINKS_SPEED_10G_82599 0x30000000 #define IXGBE_LINKS_SPEED_1G_82599 0x20000000 #define IXGBE_LINKS_SPEED_100_82599 0x10000000 +#define IXGBE_LINKS_SPEED_10_X550EM_A 0x00000000 #define IXGBE_LINK_UP_TIME 90 /* 9.0 Seconds */ #define IXGBE_AUTO_NEG_TIME 45 /* 4.5 Seconds */ @@ -2335,7 +2346,9 @@ enum { #define IXGBE_SAN_MAC_ADDR_PTR 0x28 #define IXGBE_DEVICE_CAPS 0x2C -#define IXGBE_SERIAL_NUMBER_MAC_ADDR 0x11 +#define IXGBE_82599_SERIAL_NUMBER_MAC_ADDR 0x11 +#define IXGBE_X550_SERIAL_NUMBER_MAC_ADDR 0x04 + #define IXGBE_PCIE_MSIX_82599_CAPS 0x72 #define IXGBE_MAX_MSIX_VECTORS_82599 0x40 #define IXGBE_PCIE_MSIX_82598_CAPS 0x62 @@ -2780,6 +2793,7 @@ enum { #define IXGBE_RXDADV_PKTTYPE_UDP 0x00000200 /* UDP hdr present */ #define IXGBE_RXDADV_PKTTYPE_SCTP 0x00000400 /* SCTP hdr present */ #define IXGBE_RXDADV_PKTTYPE_NFS 0x00000800 /* NFS hdr present */ +#define IXGBE_RXDADV_PKTTYPE_GENEVE 0x00000800 /* GENEVE hdr present */ #define IXGBE_RXDADV_PKTTYPE_VXLAN 0x00000800 /* VXLAN hdr present */ #define IXGBE_RXDADV_PKTTYPE_TUNNEL 0x00010000 /* Tunnel type */ #define IXGBE_RXDADV_PKTTYPE_IPSEC_ESP 0x00001000 /* IPSec ESP */ @@ -3023,6 +3037,7 @@ enum ixgbe_fdir_pballoc_type { #define FW_CEM_UNUSED_VER 0x0 #define FW_CEM_MAX_RETRIES 3 #define FW_CEM_RESP_STATUS_SUCCESS 0x1 +#define FW_CEM_DRIVER_VERSION_SIZE 39 /* +9 would send 48 bytes to fw */ #define FW_READ_SHADOW_RAM_CMD 0x31 #define FW_READ_SHADOW_RAM_LEN 0x6 #define FW_WRITE_SHADOW_RAM_CMD 0x33 @@ -3048,13 +3063,66 @@ enum ixgbe_fdir_pballoc_type { #define FW_INT_PHY_REQ_LEN 10 #define FW_INT_PHY_REQ_READ 0 #define FW_INT_PHY_REQ_WRITE 1 +#define FW_PHY_ACT_REQ_CMD 5 +#define FW_PHY_ACT_DATA_COUNT 4 +#define FW_PHY_ACT_REQ_LEN (4 + 4 * FW_PHY_ACT_DATA_COUNT) +#define FW_PHY_ACT_INIT_PHY 1 +#define FW_PHY_ACT_SETUP_LINK 2 +#define FW_PHY_ACT_LINK_SPEED_10 (1u << 0) +#define FW_PHY_ACT_LINK_SPEED_100 (1u << 1) +#define FW_PHY_ACT_LINK_SPEED_1G (1u << 2) +#define FW_PHY_ACT_LINK_SPEED_2_5G (1u << 3) +#define FW_PHY_ACT_LINK_SPEED_5G (1u << 4) +#define FW_PHY_ACT_LINK_SPEED_10G (1u << 5) +#define FW_PHY_ACT_LINK_SPEED_20G (1u << 6) +#define FW_PHY_ACT_LINK_SPEED_25G (1u << 7) +#define FW_PHY_ACT_LINK_SPEED_40G (1u << 8) +#define FW_PHY_ACT_LINK_SPEED_50G (1u << 9) +#define FW_PHY_ACT_LINK_SPEED_100G (1u << 10) +#define FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT 16 +#define FW_PHY_ACT_SETUP_LINK_PAUSE_MASK (3u << \ + FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT) +#define FW_PHY_ACT_SETUP_LINK_PAUSE_NONE 0u +#define FW_PHY_ACT_SETUP_LINK_PAUSE_TX 1u +#define FW_PHY_ACT_SETUP_LINK_PAUSE_RX 2u +#define FW_PHY_ACT_SETUP_LINK_PAUSE_RXTX 3u +#define FW_PHY_ACT_SETUP_LINK_LP (1u << 18) +#define FW_PHY_ACT_SETUP_LINK_HP (1u << 19) +#define FW_PHY_ACT_SETUP_LINK_EEE (1u << 20) +#define FW_PHY_ACT_SETUP_LINK_AN (1u << 22) +#define FW_PHY_ACT_SETUP_LINK_RSP_DOWN (1u << 0) +#define FW_PHY_ACT_GET_LINK_INFO 3 +#define FW_PHY_ACT_GET_LINK_INFO_EEE (1u << 19) +#define FW_PHY_ACT_GET_LINK_INFO_FC_TX (1u << 20) +#define FW_PHY_ACT_GET_LINK_INFO_FC_RX (1u << 21) +#define FW_PHY_ACT_GET_LINK_INFO_POWER (1u << 22) +#define FW_PHY_ACT_GET_LINK_INFO_AN_COMPLETE (1u << 24) +#define FW_PHY_ACT_GET_LINK_INFO_TEMP (1u << 25) +#define FW_PHY_ACT_GET_LINK_INFO_LP_FC_TX (1u << 28) +#define FW_PHY_ACT_GET_LINK_INFO_LP_FC_RX (1u << 29) +#define FW_PHY_ACT_FORCE_LINK_DOWN 4 +#define FW_PHY_ACT_FORCE_LINK_DOWN_OFF (1u << 0) +#define FW_PHY_ACT_PHY_SW_RESET 5 +#define FW_PHY_ACT_PHY_HW_RESET 6 +#define FW_PHY_ACT_GET_PHY_INFO 7 +#define FW_PHY_ACT_UD_2 0x1002 +#define FW_PHY_ACT_UD_2_10G_KR_EEE (1u << 6) +#define FW_PHY_ACT_UD_2_10G_KX4_EEE (1u << 5) +#define FW_PHY_ACT_UD_2_1G_KX_EEE (1u << 4) +#define FW_PHY_ACT_UD_2_10G_T_EEE (1u << 3) +#define FW_PHY_ACT_UD_2_1G_T_EEE (1u << 2) +#define FW_PHY_ACT_UD_2_100M_TX_EEE (1u << 1) +#define FW_PHY_ACT_RETRIES 50 +#define FW_PHY_INFO_SPEED_MASK 0xFFFu +#define FW_PHY_INFO_ID_HI_MASK 0xFFFF0000u +#define FW_PHY_INFO_ID_LO_MASK 0x0000FFFFu /* Host Interface Command Structures */ #ifdef C99 #pragma pack(push, 1) #else -#pragma pack(1) +#pragma pack (1) #endif /* C99 */ struct ixgbe_hic_hdr { @@ -3097,6 +3165,16 @@ struct ixgbe_hic_drv_info { u16 pad2; /* end spacing to ensure length is mult. of dword2 */ }; +struct ixgbe_hic_drv_info2 { + struct ixgbe_hic_hdr hdr; + u8 port_num; + u8 ver_sub; + u8 ver_build; + u8 ver_min; + u8 ver_maj; + char driver_string[FW_CEM_DRIVER_VERSION_SIZE]; +}; + /* These need to be dword aligned */ struct ixgbe_hic_read_shadow_ram { union ixgbe_hic_hdr2 hdr; @@ -3136,13 +3214,26 @@ struct ixgbe_hic_internal_phy_req { u8 command_type; __be16 address; u16 rsv1; - __le32 write_data; + __be32 write_data; u16 pad; }; struct ixgbe_hic_internal_phy_resp { struct ixgbe_hic_hdr hdr; - __le32 read_data; + __be32 read_data; +}; + +struct ixgbe_hic_phy_activity_req { + struct ixgbe_hic_hdr hdr; + u8 port_number; + u8 pad; + __le16 activity_id; + __be32 data[FW_PHY_ACT_DATA_COUNT]; +}; + +struct ixgbe_hic_phy_activity_resp { + struct ixgbe_hic_hdr hdr; + __be32 data[FW_PHY_ACT_DATA_COUNT]; }; #ifdef C99 @@ -3305,7 +3396,7 @@ typedef u32 ixgbe_autoneg_advertised; /* Link speed */ typedef u32 ixgbe_link_speed; #define IXGBE_LINK_SPEED_UNKNOWN 0 -#define IXGBE_LINK_SPEED_10_FULL 0x0004 +#define IXGBE_LINK_SPEED_10_FULL 0x0002 #define IXGBE_LINK_SPEED_100_FULL 0x0008 #define IXGBE_LINK_SPEED_1GB_FULL 0x0020 #define IXGBE_LINK_SPEED_2_5GB_FULL 0x0400 @@ -3335,6 +3426,7 @@ typedef u32 ixgbe_physical_layer; #define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI 0x1000 #define IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA 0x2000 #define IXGBE_PHYSICAL_LAYER_1000BASE_SX 0x4000 +#define IXGBE_PHYSICAL_LAYER_10BASE_T 0x8000 /* Flow Control Data Sheet defined values * Calculation and defines taken from 802.1bb Annex O @@ -3553,7 +3645,9 @@ enum ixgbe_phy_type { ixgbe_phy_aq, ixgbe_phy_x550em_kr, ixgbe_phy_x550em_kx4, + ixgbe_phy_x550em_xfi, ixgbe_phy_x550em_ext_t, + ixgbe_phy_ext_1g_t, ixgbe_phy_cu_unknown, ixgbe_phy_qt, ixgbe_phy_xaui, @@ -3572,7 +3666,7 @@ enum ixgbe_phy_type { ixgbe_phy_qsfp_unknown, ixgbe_phy_sfp_unsupported, /*Enforce bit set with unsupported module*/ ixgbe_phy_sgmii, - ixgbe_phy_m88, + ixgbe_phy_fw, ixgbe_phy_generic }; @@ -3629,14 +3723,6 @@ enum ixgbe_fc_mode { ixgbe_fc_default }; -/* Master/slave control */ -enum ixgbe_ms_type { - ixgbe_ms_hw_default = 0, - ixgbe_ms_force_master, - ixgbe_ms_force_slave, - ixgbe_ms_auto -}; - /* Smart Speed Settings */ #define IXGBE_SMARTSPEED_MAX_RETRIES 3 enum ixgbe_smart_speed { @@ -3840,6 +3926,7 @@ struct ixgbe_mac_operations { void (*init_swfw_sync)(struct ixgbe_hw *); s32 (*prot_autoc_read)(struct ixgbe_hw *, bool *, u32 *); s32 (*prot_autoc_write)(struct ixgbe_hw *, u32, bool); + s32 (*negotiate_api_version)(struct ixgbe_hw *hw, int api); /* Link */ void (*disable_tx_laser)(struct ixgbe_hw *); @@ -3860,6 +3947,7 @@ struct ixgbe_mac_operations { s32 (*led_off)(struct ixgbe_hw *, u32); s32 (*blink_led_start)(struct ixgbe_hw *, u32); s32 (*blink_led_stop)(struct ixgbe_hw *, u32); + s32 (*init_led_link_act)(struct ixgbe_hw *); /* RAR, Multicast, VLAN */ s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32); @@ -3883,6 +3971,8 @@ struct ixgbe_mac_operations { s32 (*init_uta_tables)(struct ixgbe_hw *); void (*set_mac_anti_spoofing)(struct ixgbe_hw *, bool, int); void (*set_vlan_anti_spoofing)(struct ixgbe_hw *, bool, int); + s32 (*update_xcast_mode)(struct ixgbe_hw *, int); + s32 (*set_rlpml)(struct ixgbe_hw *, u16); /* Flow Control */ s32 (*fc_enable)(struct ixgbe_hw *); @@ -3890,7 +3980,8 @@ struct ixgbe_mac_operations { void (*fc_autoneg)(struct ixgbe_hw *); /* Manageability interface */ - s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8); + s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8, u16, + const char *); s32 (*get_thermal_sensor_data)(struct ixgbe_hw *); s32 (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw); void (*get_rtrup2tc)(struct ixgbe_hw *hw, u8 *map); @@ -4000,6 +4091,7 @@ struct ixgbe_mac_info { struct ixgbe_dmac_config dmac_config; bool set_lben; u32 max_link_up_time; + u8 led_link_act; }; struct ixgbe_phy_info { @@ -4015,8 +4107,8 @@ struct ixgbe_phy_info { bool reset_disable; ixgbe_autoneg_advertised autoneg_advertised; ixgbe_link_speed speeds_supported; - enum ixgbe_ms_type ms_type; - enum ixgbe_ms_type original_ms_type; + ixgbe_link_speed eee_speeds_supported; + ixgbe_link_speed eee_speeds_advertised; enum ixgbe_smart_speed smart_speed; bool smart_speed_active; bool multispeed_fiber; @@ -4078,6 +4170,7 @@ struct ixgbe_hw { bool force_full_reset; bool allow_unsupported_sfp; bool wol_enabled; + bool need_crosstalk_fix; }; #define ixgbe_call_func(hw, func, params, error) \ @@ -4136,16 +4229,35 @@ struct ixgbe_hw { #define IXGBE_KRM_LINK_S1(P) ((P) ? 0x8200 : 0x4200) #define IXGBE_KRM_LINK_CTRL_1(P) ((P) ? 0x820C : 0x420C) #define IXGBE_KRM_AN_CNTL_1(P) ((P) ? 0x822C : 0x422C) +#define IXGBE_KRM_AN_CNTL_4(P) ((P) ? 0x8238 : 0x4238) #define IXGBE_KRM_AN_CNTL_8(P) ((P) ? 0x8248 : 0x4248) +#define IXGBE_KRM_PCS_KX_AN(P) ((P) ? 0x9918 : 0x5918) +#define IXGBE_KRM_PCS_KX_AN_LP(P) ((P) ? 0x991C : 0x591C) #define IXGBE_KRM_SGMII_CTRL(P) ((P) ? 0x82A0 : 0x42A0) #define IXGBE_KRM_LP_BASE_PAGE_HIGH(P) ((P) ? 0x836C : 0x436C) #define IXGBE_KRM_DSP_TXFFE_STATE_4(P) ((P) ? 0x8634 : 0x4634) #define IXGBE_KRM_DSP_TXFFE_STATE_5(P) ((P) ? 0x8638 : 0x4638) #define IXGBE_KRM_RX_TRN_LINKUP_CTRL(P) ((P) ? 0x8B00 : 0x4B00) #define IXGBE_KRM_PMD_DFX_BURNIN(P) ((P) ? 0x8E00 : 0x4E00) +#define IXGBE_KRM_PMD_FLX_MASK_ST20(P) ((P) ? 0x9054 : 0x5054) #define IXGBE_KRM_TX_COEFF_CTRL_1(P) ((P) ? 0x9520 : 0x5520) #define IXGBE_KRM_RX_ANA_CTL(P) ((P) ? 0x9A00 : 0x5A00) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA ~(0x3 << 20) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR (1u << 20) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_LR (0x2 << 20) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN (1u << 25) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN (1u << 26) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN (1u << 27) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10M ~(0x7 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_100M (1u << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G (0x2 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G (0x3 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN (0x4 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_2_5G (0x7 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK (0x7 << 28) +#define IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART (1u << 31) + #define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_32B (1 << 9) #define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_KRPCS (1 << 11) @@ -4166,9 +4278,14 @@ struct ixgbe_hw { #define IXGBE_KRM_AN_CNTL_1_SYM_PAUSE (1 << 28) #define IXGBE_KRM_AN_CNTL_1_ASM_PAUSE (1 << 29) - +#define IXGBE_KRM_PCS_KX_AN_SYM_PAUSE (1 << 1) +#define IXGBE_KRM_PCS_KX_AN_ASM_PAUSE (1 << 2) +#define IXGBE_KRM_PCS_KX_AN_LP_SYM_PAUSE (1 << 2) +#define IXGBE_KRM_PCS_KX_AN_LP_ASM_PAUSE (1 << 3) +#define IXGBE_KRM_AN_CNTL_4_ECSR_AN37_OVER_73 (1 << 29) #define IXGBE_KRM_AN_CNTL_8_LINEAR (1 << 0) #define IXGBE_KRM_AN_CNTL_8_LIMITING (1 << 1) + #define IXGBE_KRM_LP_BASE_PAGE_HIGH_SYM_PAUSE (1 << 10) #define IXGBE_KRM_LP_BASE_PAGE_HIGH_ASM_PAUSE (1 << 11) @@ -4207,11 +4324,18 @@ struct ixgbe_hw { #define IXGBE_SB_IOSF_TARGET_KR_PHY 0 #define IXGBE_NW_MNG_IF_SEL 0x00011178 -#define IXGBE_NW_MNG_IF_SEL_MDIO_ACT (1 << 1) -#define IXGBE_NW_MNG_IF_SEL_ENABLE_10_100M (1 << 23) -#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE (1 << 24) +#define IXGBE_NW_MNG_IF_SEL_MDIO_ACT (1u << 1) +#define IXGBE_NW_MNG_IF_SEL_MDIO_IF_MODE (1u << 2) +#define IXGBE_NW_MNG_IF_SEL_EN_SHARED_MDIO (1u << 13) +#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_10M (1u << 17) +#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_100M (1u << 18) +#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_1G (1u << 19) +#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_2_5G (1u << 20) +#define IXGBE_NW_MNG_IF_SEL_PHY_SPEED_10G (1u << 21) +#define IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE (1u << 25) +#define IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE (1 << 24) /* X552 reg field only */ #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT 3 #define IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD \ - (0x1F << IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT) + (0x1F << IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT) #endif /* _IXGBE_TYPE_H_ */ diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c index a75074a5..8775ee51 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.c @@ -64,6 +64,7 @@ s32 ixgbe_init_ops_vf(struct ixgbe_hw *hw) hw->mac.ops.get_mac_addr = ixgbe_get_mac_addr_vf; hw->mac.ops.stop_adapter = ixgbe_stop_adapter_vf; hw->mac.ops.get_bus_info = NULL; + hw->mac.ops.negotiate_api_version = ixgbevf_negotiate_api_version; /* Link */ hw->mac.ops.setup_link = ixgbe_setup_mac_link_vf; @@ -75,10 +76,12 @@ s32 ixgbe_init_ops_vf(struct ixgbe_hw *hw) hw->mac.ops.set_uc_addr = ixgbevf_set_uc_addr_vf; hw->mac.ops.init_rx_addrs = NULL; hw->mac.ops.update_mc_addr_list = ixgbe_update_mc_addr_list_vf; + hw->mac.ops.update_xcast_mode = ixgbevf_update_xcast_mode; hw->mac.ops.enable_mc = NULL; hw->mac.ops.disable_mc = NULL; hw->mac.ops.clear_vfta = NULL; hw->mac.ops.set_vfta = ixgbe_set_vfta_vf; + hw->mac.ops.set_rlpml = ixgbevf_rlpml_set_vf; hw->mac.max_tx_queues = 1; hw->mac.max_rx_queues = 1; @@ -322,15 +325,16 @@ STATIC s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr) return vector; } -STATIC void ixgbevf_write_msg_read_ack(struct ixgbe_hw *hw, - u32 *msg, u16 size) +STATIC s32 ixgbevf_write_msg_read_ack(struct ixgbe_hw *hw, u32 *msg, + u32 *retmsg, u16 size) { struct ixgbe_mbx_info *mbx = &hw->mbx; - u32 retmsg[IXGBE_VFMAILBOX_SIZE]; s32 retval = mbx->ops.write_posted(hw, msg, size, 0); - if (!retval) - mbx->ops.read_posted(hw, retmsg, size, 0); + if (retval) + return retval; + + return mbx->ops.read_posted(hw, retmsg, size, 0); } /** @@ -344,7 +348,6 @@ STATIC void ixgbevf_write_msg_read_ack(struct ixgbe_hw *hw, s32 ixgbe_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, u32 enable_addr) { - struct ixgbe_mbx_info *mbx = &hw->mbx; u32 msgbuf[3]; u8 *msg_addr = (u8 *)(&msgbuf[1]); s32 ret_val; @@ -353,10 +356,7 @@ s32 ixgbe_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, memset(msgbuf, 0, 12); msgbuf[0] = IXGBE_VF_SET_MAC_ADDR; memcpy(msg_addr, addr, 6); - ret_val = mbx->ops.write_posted(hw, msgbuf, 3, 0); - - if (!ret_val) - ret_val = mbx->ops.read_posted(hw, msgbuf, 3, 0); + ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 3); msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; @@ -419,17 +419,51 @@ s32 ixgbe_update_mc_addr_list_vf(struct ixgbe_hw *hw, u8 *mc_addr_list, } /** + * ixgbevf_update_xcast_mode - Update Multicast mode + * @hw: pointer to the HW structure + * @xcast_mode: new multicast mode + * + * Updates the Multicast Mode of VF. + **/ +s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode) +{ + u32 msgbuf[2]; + s32 err; + + switch (hw->api_version) { + case ixgbe_mbox_api_12: + case ixgbe_mbox_api_13: + break; + default: + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; + } + + msgbuf[0] = IXGBE_VF_UPDATE_XCAST_MODE; + msgbuf[1] = xcast_mode; + + err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); + if (err) + return err; + + msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; + if (msgbuf[0] == (IXGBE_VF_UPDATE_XCAST_MODE | IXGBE_VT_MSGTYPE_NACK)) + return IXGBE_ERR_FEATURE_NOT_SUPPORTED; + return IXGBE_SUCCESS; +} + +/** * ixgbe_set_vfta_vf - Set/Unset vlan filter table address * @hw: pointer to the HW structure * @vlan: 12 bit VLAN ID * @vind: unused by VF drivers * @vlan_on: if true then set bit, else clear bit * @vlvf_bypass: boolean flag indicating updating default pool is okay + * + * Turn on/off specified VLAN in the VLAN filter table. **/ s32 ixgbe_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, bool vlvf_bypass) { - struct ixgbe_mbx_info *mbx = &hw->mbx; u32 msgbuf[2]; s32 ret_val; UNREFERENCED_2PARAMETER(vind, vlvf_bypass); @@ -439,10 +473,7 @@ s32 ixgbe_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind, /* Setting the 8 bit field MSG INFO to TRUE indicates "add" */ msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT; - ret_val = mbx->ops.write_posted(hw, msgbuf, 2, 0); - if (!ret_val) - ret_val = mbx->ops.read_posted(hw, msgbuf, 1, 0); - + ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); if (!ret_val && (msgbuf[0] & IXGBE_VT_MSGTYPE_ACK)) return IXGBE_SUCCESS; @@ -489,8 +520,7 @@ s32 ixgbe_get_mac_addr_vf(struct ixgbe_hw *hw, u8 *mac_addr) s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr) { - struct ixgbe_mbx_info *mbx = &hw->mbx; - u32 msgbuf[3]; + u32 msgbuf[3], msgbuf_chk; u8 *msg_addr = (u8 *)(&msgbuf[1]); s32 ret_val; @@ -503,18 +533,17 @@ s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr) */ msgbuf[0] |= index << IXGBE_VT_MSGINFO_SHIFT; msgbuf[0] |= IXGBE_VF_SET_MACVLAN; + msgbuf_chk = msgbuf[0]; if (addr) memcpy(msg_addr, addr, 6); - ret_val = mbx->ops.write_posted(hw, msgbuf, 3, 0); - if (!ret_val) - ret_val = mbx->ops.read_posted(hw, msgbuf, 3, 0); + ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 3); + if (!ret_val) { + msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; - msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; - - if (!ret_val) - if (msgbuf[0] == (IXGBE_VF_SET_MACVLAN | IXGBE_VT_MSGTYPE_NACK)) - ret_val = IXGBE_ERR_OUT_OF_MEM; + if (msgbuf[0] == (msgbuf_chk | IXGBE_VT_MSGTYPE_NACK)) + return IXGBE_ERR_OUT_OF_MEM; + } return ret_val; } @@ -584,13 +613,29 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw, ixgbe_link_speed *speed, switch (links_reg & IXGBE_LINKS_SPEED_82599) { case IXGBE_LINKS_SPEED_10G_82599: *speed = IXGBE_LINK_SPEED_10GB_FULL; + if (hw->mac.type >= ixgbe_mac_X550) { + if (links_reg & IXGBE_LINKS_SPEED_NON_STD) + *speed = IXGBE_LINK_SPEED_2_5GB_FULL; + } break; case IXGBE_LINKS_SPEED_1G_82599: *speed = IXGBE_LINK_SPEED_1GB_FULL; break; case IXGBE_LINKS_SPEED_100_82599: *speed = IXGBE_LINK_SPEED_100_FULL; + if (hw->mac.type == ixgbe_mac_X550) { + if (links_reg & IXGBE_LINKS_SPEED_NON_STD) + *speed = IXGBE_LINK_SPEED_5GB_FULL; + } + break; + case IXGBE_LINKS_SPEED_10_X550EM_A: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + /* Since Reserved in older MAC's */ + if (hw->mac.type >= ixgbe_mac_X550) + *speed = IXGBE_LINK_SPEED_10_FULL; break; + default: + *speed = IXGBE_LINK_SPEED_UNKNOWN; } /* if the read failed it could just be a mailbox collision, best wait @@ -627,13 +672,22 @@ out: * @hw: pointer to the HW structure * @max_size: value to assign to max frame size **/ -void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size) +s32 ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size) { u32 msgbuf[2]; + s32 retval; msgbuf[0] = IXGBE_VF_SET_LPE; msgbuf[1] = max_size; - ixgbevf_write_msg_read_ack(hw, msgbuf, 2); + + retval = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); + if (retval) + return retval; + if ((msgbuf[0] & IXGBE_VF_SET_LPE) && + (msgbuf[0] & IXGBE_VT_MSGTYPE_NACK)) + return IXGBE_ERR_MBX; + + return 0; } /** @@ -650,11 +704,8 @@ int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api) msg[0] = IXGBE_VF_API_NEGOTIATE; msg[1] = api; msg[2] = 0; - err = hw->mbx.ops.write_posted(hw, msg, 3, 0); - - if (!err) - err = hw->mbx.ops.read_posted(hw, msg, 3, 0); + err = ixgbevf_write_msg_read_ack(hw, msg, msg, 3); if (!err) { msg[0] &= ~IXGBE_VT_MSGTYPE_CTS; @@ -680,6 +731,7 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, switch (hw->api_version) { case ixgbe_mbox_api_11: case ixgbe_mbox_api_12: + case ixgbe_mbox_api_13: break; default: return 0; @@ -688,11 +740,8 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, /* Fetch queue configuration from the PF */ msg[0] = IXGBE_VF_GET_QUEUES; msg[1] = msg[2] = msg[3] = msg[4] = 0; - err = hw->mbx.ops.write_posted(hw, msg, 5, 0); - - if (!err) - err = hw->mbx.ops.read_posted(hw, msg, 5, 0); + err = ixgbevf_write_msg_read_ack(hw, msg, msg, 5); if (!err) { msg[0] &= ~IXGBE_VT_MSGTYPE_CTS; diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h index 8851cb82..3efffe82 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_vf.h @@ -34,6 +34,8 @@ POSSIBILITY OF SUCH DAMAGE. #ifndef _IXGBE_VF_H_ #define _IXGBE_VF_H_ +#include "ixgbe_type.h" + #define IXGBE_VF_IRQ_CLEAR_MASK 7 #define IXGBE_VF_MAX_TX_QUEUES 8 #define IXGBE_VF_MAX_RX_QUEUES 8 @@ -114,6 +116,7 @@ struct ixgbevf_hw_stats { u64 saved_reset_vfmprc; }; +s32 ixgbe_init_ops_vf(struct ixgbe_hw *hw); s32 ixgbe_init_hw_vf(struct ixgbe_hw *hw); s32 ixgbe_start_hw_vf(struct ixgbe_hw *hw); s32 ixgbe_reset_hw_vf(struct ixgbe_hw *hw); @@ -131,9 +134,10 @@ s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr); s32 ixgbe_update_mc_addr_list_vf(struct ixgbe_hw *hw, u8 *mc_addr_list, u32 mc_addr_count, ixgbe_mc_addr_itr, bool clear); +s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode); s32 ixgbe_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, bool vlvf_bypass); -void ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size); +s32 ixgbevf_rlpml_set_vf(struct ixgbe_hw *hw, u16 max_size); int ixgbevf_negotiate_api_version(struct ixgbe_hw *hw, int api); int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, unsigned int *default_tc); diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c index 31dead0d..499b1fac 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_x540.c @@ -271,6 +271,7 @@ mac_reset_top: if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) { /* Save the SAN MAC RAR index */ hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1; + hw->mac.ops.set_rar(hw, hw->mac.san_mac_rar_index, hw->mac.san_addr, 0, IXGBE_RAH_AV); @@ -490,7 +491,6 @@ s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) u16 length = 0; u16 pointer = 0; u16 word = 0; - u16 checksum_last_word = IXGBE_EEPROM_CHECKSUM; u16 ptr_start = IXGBE_PCIE_ANALOG_PTR; /* Do not use hw->eeprom.ops.read because we do not want to take @@ -500,14 +500,15 @@ s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) DEBUGFUNC("ixgbe_calc_eeprom_checksum_X540"); - /* Include 0x0-0x3F in the checksum */ - for (i = 0; i <= checksum_last_word; i++) { + /* Include 0x0 up to IXGBE_EEPROM_CHECKSUM; do not include the + * checksum itself + */ + for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) { if (ixgbe_read_eerd_generic(hw, i, &word)) { DEBUGOUT("EEPROM read failed\n"); return IXGBE_ERR_EEPROM; } - if (i != IXGBE_EEPROM_CHECKSUM) - checksum += word; + checksum += word; } /* Include all data from pointers 0x3, 0x6-0xE. This excludes the @@ -774,8 +775,10 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) /* SW NVM semaphore bit is used for access to all * SW_FW_SYNC bits (not just NVM) */ - if (ixgbe_get_swfw_sync_semaphore(hw)) + if (ixgbe_get_swfw_sync_semaphore(hw)) { + DEBUGOUT("Failed to get NVM access and register semaphore, returning IXGBE_ERR_SWFW_SYNC\n"); return IXGBE_ERR_SWFW_SYNC; + } swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC_BY_MAC(hw)); if (!(swfw_sync & (fwmask | swmask | hwmask))) { @@ -783,7 +786,6 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC_BY_MAC(hw), swfw_sync); ixgbe_release_swfw_sync_semaphore(hw); - msec_delay(5); return IXGBE_SUCCESS; } /* Firmware currently using resource (fwmask), hardware @@ -798,6 +800,7 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) if (swmask == IXGBE_GSSR_SW_MNG_SM) { ERROR_REPORT1(IXGBE_ERROR_POLLING, "Failed to get SW only semaphore"); + DEBUGOUT("Failed to get SW only semaphore, returning IXGBE_ERR_SWFW_SYNC\n"); return IXGBE_ERR_SWFW_SYNC; } @@ -806,8 +809,10 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) * of the requested resource(s) while ignoring the corresponding FW/HW * bits in the SW_FW_SYNC register. */ - if (ixgbe_get_swfw_sync_semaphore(hw)) + if (ixgbe_get_swfw_sync_semaphore(hw)) { + DEBUGOUT("Failed to get NVM sempahore and register semaphore while forcefully ignoring FW sempahore bit(s) and setting SW semaphore bit(s), returning IXGBE_ERR_SWFW_SYNC\n"); return IXGBE_ERR_SWFW_SYNC; + } swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC_BY_MAC(hw)); if (swfw_sync & (fwmask | hwmask)) { swfw_sync |= swmask; @@ -829,9 +834,11 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) rmask |= IXGBE_GSSR_I2C_MASK; ixgbe_release_swfw_sync_X540(hw, rmask); ixgbe_release_swfw_sync_semaphore(hw); + DEBUGOUT("Resource not released by other SW, returning IXGBE_ERR_SWFW_SYNC\n"); return IXGBE_ERR_SWFW_SYNC; } ixgbe_release_swfw_sync_semaphore(hw); + DEBUGOUT("Returning error IXGBE_ERR_SWFW_SYNC\n"); return IXGBE_ERR_SWFW_SYNC; } @@ -860,7 +867,7 @@ void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask) IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC_BY_MAC(hw), swfw_sync); ixgbe_release_swfw_sync_semaphore(hw); - msec_delay(5); + msec_delay(2); } /** @@ -982,6 +989,9 @@ s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index) DEBUGFUNC("ixgbe_blink_led_start_X540"); + if (index > 3) + return IXGBE_ERR_PARAM; + /* * Link should be up in order for the blink bit in the LED control * register to work. Force link and speed in the MAC if link is down. @@ -1016,6 +1026,9 @@ s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index) u32 macc_reg; u32 ledctl_reg; + if (index > 3) + return IXGBE_ERR_PARAM; + DEBUGFUNC("ixgbe_blink_led_stop_X540"); /* Restore the LED to its default value. */ diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c b/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c index aa6e859f..6f9c034b 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.c @@ -41,6 +41,7 @@ POSSIBILITY OF SUCH DAMAGE. STATIC s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed); STATIC s32 ixgbe_acquire_swfw_sync_X550a(struct ixgbe_hw *, u32 mask); STATIC void ixgbe_release_swfw_sync_X550a(struct ixgbe_hw *, u32 mask); +STATIC s32 ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw); /** * ixgbe_init_ops_X550 - Inits func ptrs and MAC type @@ -61,7 +62,7 @@ s32 ixgbe_init_ops_X550(struct ixgbe_hw *hw) mac->ops.dmac_config = ixgbe_dmac_config_X550; mac->ops.dmac_config_tcs = ixgbe_dmac_config_tcs_X550; mac->ops.dmac_update_tcs = ixgbe_dmac_update_tcs_X550; - mac->ops.setup_eee = ixgbe_setup_eee_X550; + mac->ops.setup_eee = NULL; mac->ops.set_source_address_pruning = ixgbe_set_source_address_pruning_X550; mac->ops.set_ethertype_anti_spoofing = @@ -82,6 +83,8 @@ s32 ixgbe_init_ops_X550(struct ixgbe_hw *hw) mac->ops.mdd_event = ixgbe_mdd_event_X550; mac->ops.restore_mdd_vf = ixgbe_restore_mdd_vf_X550; mac->ops.disable_rx = ixgbe_disable_rx_x550; + /* Manageability interface */ + mac->ops.set_fw_drv_ver = ixgbe_set_fw_drv_ver_x550; switch (hw->device_id) { case IXGBE_DEV_ID_X550EM_X_10G_T: case IXGBE_DEV_ID_X550EM_A_10G_T: @@ -342,11 +345,10 @@ STATIC s32 ixgbe_read_phy_reg_mdi_22(struct ixgbe_hw *hw, u32 reg_addr, UNREFERENCED_1PARAMETER(dev_type); /* Setup and write the read command */ - command = (reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | - (reg_addr << IXGBE_MSCA_DEV_TYPE_SHIFT) | - (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | - IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_READ | - IXGBE_MSCA_MDI_COMMAND; + command = (reg_addr << IXGBE_MSCA_DEV_TYPE_SHIFT) | + (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | + IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_READ_AUTOINC | + IXGBE_MSCA_MDI_COMMAND; IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); @@ -393,11 +395,10 @@ STATIC s32 ixgbe_write_phy_reg_mdi_22(struct ixgbe_hw *hw, u32 reg_addr, IXGBE_WRITE_REG(hw, IXGBE_MSRWD, (u32)phy_data); /* Setup and write the write command */ - command = (reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | - (reg_addr << IXGBE_MSCA_DEV_TYPE_SHIFT) | - (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | - IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_WRITE | - IXGBE_MSCA_MDI_COMMAND; + command = (reg_addr << IXGBE_MSCA_DEV_TYPE_SHIFT) | + (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | + IXGBE_MSCA_OLD_PROTOCOL | IXGBE_MSCA_WRITE | + IXGBE_MSCA_MDI_COMMAND; IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); @@ -423,43 +424,6 @@ STATIC s32 ixgbe_write_phy_reg_mdi_22(struct ixgbe_hw *hw, u32 reg_addr, } /** - * ixgbe_identify_phy_1g - Get 1g PHY type based on device id - * @hw: pointer to hardware structure - * - * Returns error code - */ -STATIC s32 ixgbe_identify_phy_1g(struct ixgbe_hw *hw) -{ - u32 swfw_mask = hw->phy.phy_semaphore_mask; - u16 phy_id_high; - u16 phy_id_low; - s32 rc; - - rc = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask); - if (rc) - return rc; - - rc = ixgbe_read_phy_reg_mdi_22(hw, IXGBE_MDIO_PHY_ID_HIGH, 0, - &phy_id_high); - if (rc) - goto rel_out; - - rc = ixgbe_read_phy_reg_mdi_22(hw, IXGBE_MDIO_PHY_ID_LOW, 0, - &phy_id_low); - if (rc) - goto rel_out; - - hw->phy.id = (u32)phy_id_high << 16; - hw->phy.id |= phy_id_low & IXGBE_PHY_REVISION_MASK; - hw->phy.revision = (u32)phy_id_low & ~IXGBE_PHY_REVISION_MASK; - -rel_out: - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - - return rc; -} - -/** * ixgbe_identify_phy_x550em - Get PHY type based on device id * @hw: pointer to hardware structure * @@ -467,18 +431,15 @@ rel_out: */ STATIC s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw) { + hw->mac.ops.set_lan_id(hw); + + ixgbe_read_mng_if_sel_x550em(hw); + switch (hw->device_id) { case IXGBE_DEV_ID_X550EM_A_SFP: - hw->phy.ops.read_reg = ixgbe_read_phy_reg_x550a; - hw->phy.ops.write_reg = ixgbe_write_phy_reg_x550a; - if (hw->bus.lan_id) - hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY1_SM; - else - hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY0_SM; return ixgbe_identify_module_generic(hw); case IXGBE_DEV_ID_X550EM_X_SFP: /* set up for CS4227 usage */ - hw->phy.phy_semaphore_mask = IXGBE_GSSR_SHARED_I2C_SM; ixgbe_setup_mux_ctl(hw); ixgbe_check_cs4227(hw); /* Fallthrough */ @@ -489,30 +450,161 @@ STATIC s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_X_KX4: hw->phy.type = ixgbe_phy_x550em_kx4; break; + case IXGBE_DEV_ID_X550EM_X_XFI: + hw->phy.type = ixgbe_phy_x550em_xfi; + break; case IXGBE_DEV_ID_X550EM_X_KR: case IXGBE_DEV_ID_X550EM_A_KR: case IXGBE_DEV_ID_X550EM_A_KR_L: hw->phy.type = ixgbe_phy_x550em_kr; break; + case IXGBE_DEV_ID_X550EM_A_10G_T: case IXGBE_DEV_ID_X550EM_X_1G_T: case IXGBE_DEV_ID_X550EM_X_10G_T: - case IXGBE_DEV_ID_X550EM_A_10G_T: return ixgbe_identify_phy_generic(hw); case IXGBE_DEV_ID_X550EM_A_1G_T: case IXGBE_DEV_ID_X550EM_A_1G_T_L: - hw->phy.ops.read_reg = ixgbe_read_phy_reg_x550a; - hw->phy.ops.write_reg = ixgbe_write_phy_reg_x550a; + hw->phy.type = ixgbe_phy_fw; + hw->phy.ops.read_reg = NULL; + hw->phy.ops.write_reg = NULL; if (hw->bus.lan_id) hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY1_SM; else hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY0_SM; - return ixgbe_identify_phy_1g(hw); + break; default: break; } return IXGBE_SUCCESS; } +/** + * ixgbe_fw_phy_activity - Perform an activity on a PHY + * @hw: pointer to hardware structure + * @activity: activity to perform + * @data: Pointer to 4 32-bit words of data + */ +s32 ixgbe_fw_phy_activity(struct ixgbe_hw *hw, u16 activity, + u32 (*data)[FW_PHY_ACT_DATA_COUNT]) +{ + union { + struct ixgbe_hic_phy_activity_req cmd; + struct ixgbe_hic_phy_activity_resp rsp; + } hic; + u16 retries = FW_PHY_ACT_RETRIES; + s32 rc; + u16 i; + + do { + memset(&hic, 0, sizeof(hic)); + hic.cmd.hdr.cmd = FW_PHY_ACT_REQ_CMD; + hic.cmd.hdr.buf_len = FW_PHY_ACT_REQ_LEN; + hic.cmd.hdr.checksum = FW_DEFAULT_CHECKSUM; + hic.cmd.port_number = hw->bus.lan_id; + hic.cmd.activity_id = IXGBE_CPU_TO_LE16(activity); + for (i = 0; i < FW_PHY_ACT_DATA_COUNT; ++i) + hic.cmd.data[i] = IXGBE_CPU_TO_BE32((*data)[i]); + + rc = ixgbe_host_interface_command(hw, (u32 *)&hic.cmd, + sizeof(hic.cmd), + IXGBE_HI_COMMAND_TIMEOUT, + true); + if (rc != IXGBE_SUCCESS) + return rc; + if (hic.rsp.hdr.cmd_or_resp.ret_status == + FW_CEM_RESP_STATUS_SUCCESS) { + for (i = 0; i < FW_PHY_ACT_DATA_COUNT; ++i) + (*data)[i] = IXGBE_BE32_TO_CPU(hic.rsp.data[i]); + return IXGBE_SUCCESS; + } + usec_delay(20); + --retries; + } while (retries > 0); + + return IXGBE_ERR_HOST_INTERFACE_COMMAND; +} + +static const struct { + u16 fw_speed; + ixgbe_link_speed phy_speed; +} ixgbe_fw_map[] = { + { FW_PHY_ACT_LINK_SPEED_10, IXGBE_LINK_SPEED_10_FULL }, + { FW_PHY_ACT_LINK_SPEED_100, IXGBE_LINK_SPEED_100_FULL }, + { FW_PHY_ACT_LINK_SPEED_1G, IXGBE_LINK_SPEED_1GB_FULL }, + { FW_PHY_ACT_LINK_SPEED_2_5G, IXGBE_LINK_SPEED_2_5GB_FULL }, + { FW_PHY_ACT_LINK_SPEED_5G, IXGBE_LINK_SPEED_5GB_FULL }, + { FW_PHY_ACT_LINK_SPEED_10G, IXGBE_LINK_SPEED_10GB_FULL }, +}; + +/** + * ixgbe_get_phy_id_fw - Get the phy ID via firmware command + * @hw: pointer to hardware structure + * + * Returns error code + */ +static s32 ixgbe_get_phy_id_fw(struct ixgbe_hw *hw) +{ + u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 }; + u16 phy_speeds; + u16 phy_id_lo; + s32 rc; + u16 i; + + rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_PHY_INFO, &info); + if (rc) + return rc; + + hw->phy.speeds_supported = 0; + phy_speeds = info[0] & FW_PHY_INFO_SPEED_MASK; + for (i = 0; i < sizeof(ixgbe_fw_map) / sizeof(ixgbe_fw_map[0]); ++i) { + if (phy_speeds & ixgbe_fw_map[i].fw_speed) + hw->phy.speeds_supported |= ixgbe_fw_map[i].phy_speed; + } + if (!hw->phy.autoneg_advertised) + hw->phy.autoneg_advertised = hw->phy.speeds_supported; + + hw->phy.id = info[0] & FW_PHY_INFO_ID_HI_MASK; + phy_id_lo = info[1] & FW_PHY_INFO_ID_LO_MASK; + hw->phy.id |= phy_id_lo & IXGBE_PHY_REVISION_MASK; + hw->phy.revision = phy_id_lo & ~IXGBE_PHY_REVISION_MASK; + if (!hw->phy.id || hw->phy.id == IXGBE_PHY_REVISION_MASK) + return IXGBE_ERR_PHY_ADDR_INVALID; + return IXGBE_SUCCESS; +} + +/** + * ixgbe_identify_phy_fw - Get PHY type based on firmware command + * @hw: pointer to hardware structure + * + * Returns error code + */ +static s32 ixgbe_identify_phy_fw(struct ixgbe_hw *hw) +{ + if (hw->bus.lan_id) + hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY1_SM; + else + hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM; + + hw->phy.type = ixgbe_phy_fw; + hw->phy.ops.read_reg = NULL; + hw->phy.ops.write_reg = NULL; + return ixgbe_get_phy_id_fw(hw); +} + +/** + * ixgbe_shutdown_fw_phy - Shutdown a firmware-controlled PHY + * @hw: pointer to hardware structure + * + * Returns error code + */ +s32 ixgbe_shutdown_fw_phy(struct ixgbe_hw *hw) +{ + u32 setup[FW_PHY_ACT_DATA_COUNT] = { 0 }; + + setup[0] = FW_PHY_ACT_FORCE_LINK_DOWN_OFF; + return ixgbe_fw_phy_activity(hw, FW_PHY_ACT_FORCE_LINK_DOWN, &setup); +} + STATIC s32 ixgbe_read_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u16 *phy_data) { @@ -601,7 +693,6 @@ s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw) struct ixgbe_mac_info *mac = &hw->mac; struct ixgbe_eeprom_info *eeprom = &hw->eeprom; struct ixgbe_phy_info *phy = &hw->phy; - struct ixgbe_link_info *link = &hw->link; s32 ret_val; DEBUGFUNC("ixgbe_init_ops_X550EM"); @@ -637,25 +728,6 @@ s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw) hw->bus.type = ixgbe_bus_type_internal; mac->ops.get_bus_info = ixgbe_get_bus_info_X550em; - if (hw->mac.type == ixgbe_mac_X550EM_x) { - mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550; - mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550; - mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550em; - mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550em; - link->ops.read_link = ixgbe_read_i2c_combined_generic; - link->ops.read_link_unlocked = - ixgbe_read_i2c_combined_generic_unlocked; - link->ops.write_link = ixgbe_write_i2c_combined_generic; - link->ops.write_link_unlocked = - ixgbe_write_i2c_combined_generic_unlocked; - link->addr = IXGBE_CS4227; - } - if (hw->mac.type == ixgbe_mac_X550EM_a) { - mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550; - mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550; - mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550a; - mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550a; - } mac->ops.get_media_type = ixgbe_get_media_type_X550em; mac->ops.setup_sfp = ixgbe_setup_sfp_modules_X550em; @@ -666,25 +738,23 @@ s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw) if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) mac->ops.setup_fc = ixgbe_setup_fc_generic; - else if (hw->mac.type == ixgbe_mac_X550EM_a) { - mac->ops.setup_fc = ixgbe_setup_fc_x550a; - mac->ops.fc_autoneg = ixgbe_fc_autoneg_x550a; - } else mac->ops.setup_fc = ixgbe_setup_fc_X550em; + /* PHY */ + phy->ops.init = ixgbe_init_phy_ops_X550em; switch (hw->device_id) { - case IXGBE_DEV_ID_X550EM_X_KR: - case IXGBE_DEV_ID_X550EM_A_KR: - case IXGBE_DEV_ID_X550EM_A_KR_L: + case IXGBE_DEV_ID_X550EM_A_1G_T: + case IXGBE_DEV_ID_X550EM_A_1G_T_L: + mac->ops.setup_fc = NULL; + phy->ops.identify = ixgbe_identify_phy_fw; + phy->ops.set_phy_power = NULL; + phy->ops.get_firmware_version = NULL; break; default: - mac->ops.setup_eee = NULL; + phy->ops.identify = ixgbe_identify_phy_x550em; } - /* PHY */ - phy->ops.init = ixgbe_init_phy_ops_X550em; - phy->ops.identify = ixgbe_identify_phy_x550em; if (mac->ops.get_media_type(hw) != ixgbe_media_type_copper) phy->ops.set_phy_power = NULL; @@ -703,6 +773,183 @@ s32 ixgbe_init_ops_X550EM(struct ixgbe_hw *hw) } /** + * ixgbe_setup_fw_link - Setup firmware-controlled PHYs + * @hw: pointer to hardware structure + */ +static s32 ixgbe_setup_fw_link(struct ixgbe_hw *hw) +{ + u32 setup[FW_PHY_ACT_DATA_COUNT] = { 0 }; + s32 rc; + u16 i; + + if (hw->phy.reset_disable || ixgbe_check_reset_blocked(hw)) + return 0; + + if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { + ERROR_REPORT1(IXGBE_ERROR_UNSUPPORTED, + "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); + return IXGBE_ERR_INVALID_LINK_SETTINGS; + } + + switch (hw->fc.requested_mode) { + case ixgbe_fc_full: + setup[0] |= FW_PHY_ACT_SETUP_LINK_PAUSE_RXTX << + FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT; + break; + case ixgbe_fc_rx_pause: + setup[0] |= FW_PHY_ACT_SETUP_LINK_PAUSE_RX << + FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT; + break; + case ixgbe_fc_tx_pause: + setup[0] |= FW_PHY_ACT_SETUP_LINK_PAUSE_TX << + FW_PHY_ACT_SETUP_LINK_PAUSE_SHIFT; + break; + default: + break; + } + + for (i = 0; i < sizeof(ixgbe_fw_map) / sizeof(ixgbe_fw_map[0]); ++i) { + if (hw->phy.autoneg_advertised & ixgbe_fw_map[i].phy_speed) + setup[0] |= ixgbe_fw_map[i].fw_speed; + } + setup[0] |= FW_PHY_ACT_SETUP_LINK_HP | FW_PHY_ACT_SETUP_LINK_AN; + + if (hw->phy.eee_speeds_advertised) + setup[0] |= FW_PHY_ACT_SETUP_LINK_EEE; + + rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_SETUP_LINK, &setup); + if (rc) + return rc; + if (setup[0] == FW_PHY_ACT_SETUP_LINK_RSP_DOWN) + return IXGBE_ERR_OVERTEMP; + return IXGBE_SUCCESS; +} + +/** + * ixgbe_fc_autoneg_fw _ Set up flow control for FW-controlled PHYs + * @hw: pointer to hardware structure + * + * Called at init time to set up flow control. + */ +static s32 ixgbe_fc_autoneg_fw(struct ixgbe_hw *hw) +{ + if (hw->fc.requested_mode == ixgbe_fc_default) + hw->fc.requested_mode = ixgbe_fc_full; + + return ixgbe_setup_fw_link(hw); +} + +/** + * ixgbe_setup_eee_fw - Enable/disable EEE support + * @hw: pointer to the HW structure + * @enable_eee: boolean flag to enable EEE + * + * Enable/disable EEE based on enable_eee flag. + * This function controls EEE for firmware-based PHY implementations. + */ +static s32 ixgbe_setup_eee_fw(struct ixgbe_hw *hw, bool enable_eee) +{ + if (!!hw->phy.eee_speeds_advertised == enable_eee) + return IXGBE_SUCCESS; + if (enable_eee) + hw->phy.eee_speeds_advertised = hw->phy.eee_speeds_supported; + else + hw->phy.eee_speeds_advertised = 0; + return hw->phy.ops.setup_link(hw); +} + +/** +* ixgbe_init_ops_X550EM_a - Inits func ptrs and MAC type +* @hw: pointer to hardware structure +* +* Initialize the function pointers and for MAC type X550EM_a. +* Does not touch the hardware. +**/ +s32 ixgbe_init_ops_X550EM_a(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + s32 ret_val; + + DEBUGFUNC("ixgbe_init_ops_X550EM_a"); + + /* Start with generic X550EM init */ + ret_val = ixgbe_init_ops_X550EM(hw); + + if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SGMII || + hw->device_id == IXGBE_DEV_ID_X550EM_A_SGMII_L) { + mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550; + mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550; + } else { + mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550a; + mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550a; + } + mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550a; + mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550a; + + switch (mac->ops.get_media_type(hw)) { + case ixgbe_media_type_fiber: + mac->ops.setup_fc = NULL; + mac->ops.fc_autoneg = ixgbe_fc_autoneg_fiber_x550em_a; + break; + case ixgbe_media_type_backplane: + mac->ops.fc_autoneg = ixgbe_fc_autoneg_backplane_x550em_a; + mac->ops.setup_fc = ixgbe_setup_fc_backplane_x550em_a; + break; + default: + break; + } + + switch (hw->device_id) { + case IXGBE_DEV_ID_X550EM_A_1G_T: + case IXGBE_DEV_ID_X550EM_A_1G_T_L: + mac->ops.fc_autoneg = ixgbe_fc_autoneg_sgmii_x550em_a; + mac->ops.setup_fc = ixgbe_fc_autoneg_fw; + mac->ops.setup_eee = ixgbe_setup_eee_fw; + hw->phy.eee_speeds_supported = IXGBE_LINK_SPEED_100_FULL | + IXGBE_LINK_SPEED_1GB_FULL; + hw->phy.eee_speeds_advertised = hw->phy.eee_speeds_supported; + break; + default: + break; + } + + return ret_val; +} + +/** +* ixgbe_init_ops_X550EM_x - Inits func ptrs and MAC type +* @hw: pointer to hardware structure +* +* Initialize the function pointers and for MAC type X550EM_x. +* Does not touch the hardware. +**/ +s32 ixgbe_init_ops_X550EM_x(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_link_info *link = &hw->link; + s32 ret_val; + + DEBUGFUNC("ixgbe_init_ops_X550EM_x"); + + /* Start with generic X550EM init */ + ret_val = ixgbe_init_ops_X550EM(hw); + + mac->ops.read_iosf_sb_reg = ixgbe_read_iosf_sb_reg_x550; + mac->ops.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550; + mac->ops.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X550em; + mac->ops.release_swfw_sync = ixgbe_release_swfw_sync_X550em; + link->ops.read_link = ixgbe_read_i2c_combined_generic; + link->ops.read_link_unlocked = ixgbe_read_i2c_combined_generic_unlocked; + link->ops.write_link = ixgbe_write_i2c_combined_generic; + link->ops.write_link_unlocked = + ixgbe_write_i2c_combined_generic_unlocked; + link->addr = IXGBE_CS4227; + + + return ret_val; +} + +/** * ixgbe_dmac_config_X550 * @hw: pointer to hardware structure * @@ -765,6 +1012,7 @@ s32 ixgbe_dmac_config_tcs_X550(struct ixgbe_hw *hw) /* Configure DMA coalescing enabled */ switch (hw->mac.dmac_config.link_speed) { + case IXGBE_LINK_SPEED_10_FULL: case IXGBE_LINK_SPEED_100_FULL: pb_headroom = IXGBE_DMACRXT_100M; break; @@ -865,158 +1113,6 @@ s32 ixgbe_init_eeprom_params_X550(struct ixgbe_hw *hw) } /** - * ixgbe_enable_eee_x550 - Enable EEE support - * @hw: pointer to hardware structure - */ -STATIC s32 ixgbe_enable_eee_x550(struct ixgbe_hw *hw) -{ - u16 autoneg_eee_reg; - u32 link_reg; - s32 status; - - if (hw->mac.type == ixgbe_mac_X550) { - /* Advertise EEE capability */ - hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_eee_reg); - - autoneg_eee_reg |= (IXGBE_AUTO_NEG_10GBASE_EEE_ADVT | - IXGBE_AUTO_NEG_1000BASE_EEE_ADVT | - IXGBE_AUTO_NEG_100BASE_EEE_ADVT); - - hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_eee_reg); - return IXGBE_SUCCESS; - } - - switch (hw->device_id) { - case IXGBE_DEV_ID_X550EM_X_KR: - case IXGBE_DEV_ID_X550EM_A_KR: - case IXGBE_DEV_ID_X550EM_A_KR_L: - status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, &link_reg); - if (status != IXGBE_SUCCESS) - return status; - - link_reg |= IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR | - IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX; - - /* Don't advertise FEC capability when EEE enabled. */ - link_reg &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_FEC; - - status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, link_reg); - if (status != IXGBE_SUCCESS) - return status; - break; - default: - break; - } - - return IXGBE_SUCCESS; -} - -/** - * ixgbe_disable_eee_x550 - Disable EEE support - * @hw: pointer to hardware structure - */ -STATIC s32 ixgbe_disable_eee_x550(struct ixgbe_hw *hw) -{ - u16 autoneg_eee_reg; - u32 link_reg; - s32 status; - - if (hw->mac.type == ixgbe_mac_X550) { - /* Disable advertised EEE capability */ - hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_eee_reg); - - autoneg_eee_reg &= ~(IXGBE_AUTO_NEG_10GBASE_EEE_ADVT | - IXGBE_AUTO_NEG_1000BASE_EEE_ADVT | - IXGBE_AUTO_NEG_100BASE_EEE_ADVT); - - hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_EEE_ADVT, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - autoneg_eee_reg); - return IXGBE_SUCCESS; - } - - switch (hw->device_id) { - case IXGBE_DEV_ID_X550EM_X_KR: - case IXGBE_DEV_ID_X550EM_A_KR: - case IXGBE_DEV_ID_X550EM_A_KR_L: - status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, &link_reg); - if (status != IXGBE_SUCCESS) - return status; - - link_reg &= ~(IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR | - IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX); - - /* Advertise FEC capability when EEE is disabled. */ - link_reg |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_FEC; - - status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, link_reg); - if (status != IXGBE_SUCCESS) - return status; - break; - default: - break; - } - - return IXGBE_SUCCESS; -} - -/** - * ixgbe_setup_eee_X550 - Enable/disable EEE support - * @hw: pointer to the HW structure - * @enable_eee: boolean flag to enable EEE - * - * Enable/disable EEE based on enable_eee flag. - * Auto-negotiation must be started after BASE-T EEE bits in PHY register 7.3C - * are modified. - * - **/ -s32 ixgbe_setup_eee_X550(struct ixgbe_hw *hw, bool enable_eee) -{ - s32 status; - u32 eeer; - - DEBUGFUNC("ixgbe_setup_eee_X550"); - - eeer = IXGBE_READ_REG(hw, IXGBE_EEER); - /* Enable or disable EEE per flag */ - if (enable_eee) { - eeer |= (IXGBE_EEER_TX_LPI_EN | IXGBE_EEER_RX_LPI_EN); - - /* Not supported on first revision of X550EM_x. */ - if ((hw->mac.type == ixgbe_mac_X550EM_x) && - !(IXGBE_FUSES0_REV_MASK & - IXGBE_READ_REG(hw, IXGBE_FUSES0_GROUP(0)))) - return IXGBE_SUCCESS; - status = ixgbe_enable_eee_x550(hw); - if (status) - return status; - } else { - eeer &= ~(IXGBE_EEER_TX_LPI_EN | IXGBE_EEER_RX_LPI_EN); - - status = ixgbe_disable_eee_x550(hw); - if (status) - return status; - } - IXGBE_WRITE_REG(hw, IXGBE_EEER, eeer); - - return IXGBE_SUCCESS; -} - -/** * ixgbe_set_source_address_pruning_X550 - Enable/Disbale source address pruning * @hw: pointer to hardware structure * @enable: enable or disable source address pruning @@ -1102,8 +1198,8 @@ STATIC s32 ixgbe_iosf_wait(struct ixgbe_hw *hw, u32 *ctrl) } /** - * ixgbe_write_iosf_sb_reg_x550 - Writes a value to specified register of the IOSF - * device + * ixgbe_write_iosf_sb_reg_x550 - Writes a value to specified register + * of the IOSF device * @hw: pointer to hardware structure * @reg_addr: 32 bit PHY register to write * @device_type: 3 bit device type @@ -1149,12 +1245,11 @@ out: } /** - * ixgbe_read_iosf_sb_reg_x550 - Writes a value to specified register of the IOSF - * device + * ixgbe_read_iosf_sb_reg_x550 - Reads specified register of the IOSF device * @hw: pointer to hardware structure * @reg_addr: 32 bit PHY register to write * @device_type: 3 bit device type - * @phy_data: Pointer to read data from the register + * @data: Pointer to read data from the register **/ s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u32 *data) @@ -1216,13 +1311,20 @@ s32 ixgbe_get_phy_token(struct ixgbe_hw *hw) sizeof(token_cmd), IXGBE_HI_COMMAND_TIMEOUT, true); - if (status) + if (status) { + DEBUGOUT1("Issuing host interface command failed with Status = %d\n", + status); return status; + } if (token_cmd.hdr.cmd_or_resp.ret_status == FW_PHY_TOKEN_OK) return IXGBE_SUCCESS; - if (token_cmd.hdr.cmd_or_resp.ret_status != FW_PHY_TOKEN_RETRY) + if (token_cmd.hdr.cmd_or_resp.ret_status != FW_PHY_TOKEN_RETRY) { + DEBUGOUT1("Host interface command returned 0x%08x , returning IXGBE_ERR_FW_RESP_INVALID\n", + token_cmd.hdr.cmd_or_resp.ret_status); return IXGBE_ERR_FW_RESP_INVALID; + } + DEBUGOUT("Returning IXGBE_ERR_TOKEN_RETRY\n"); return IXGBE_ERR_TOKEN_RETRY; } @@ -1278,7 +1380,7 @@ s32 ixgbe_write_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, write_cmd.port_number = hw->bus.lan_id; write_cmd.command_type = FW_INT_PHY_REQ_WRITE; write_cmd.address = IXGBE_CPU_TO_BE16(reg_addr); - write_cmd.write_data = IXGBE_CPU_TO_LE32(data); + write_cmd.write_data = IXGBE_CPU_TO_BE32(data); status = ixgbe_host_interface_command(hw, (u32 *)&write_cmd, sizeof(write_cmd), @@ -1288,8 +1390,7 @@ s32 ixgbe_write_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, } /** - * ixgbe_read_iosf_sb_reg_x550a - Writes a value to specified register - * of the IOSF device. + * ixgbe_read_iosf_sb_reg_x550a - Reads specified register of the IOSF device * @hw: pointer to hardware structure * @reg_addr: 32 bit PHY register to write * @device_type: 3 bit device type @@ -1318,7 +1419,7 @@ s32 ixgbe_read_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, IXGBE_HI_COMMAND_TIMEOUT, true); /* Extract the register value from the response. */ - *data = IXGBE_LE32_TO_CPU(hic.rsp.read_data); + *data = IXGBE_BE32_TO_CPU(hic.rsp.read_data); return status; } @@ -1482,6 +1583,7 @@ enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw) switch (hw->device_id) { case IXGBE_DEV_ID_X550EM_X_KR: case IXGBE_DEV_ID_X550EM_X_KX4: + case IXGBE_DEV_ID_X550EM_X_XFI: case IXGBE_DEV_ID_X550EM_A_KR: case IXGBE_DEV_ID_X550EM_A_KR_L: media_type = ixgbe_media_type_backplane; @@ -1506,7 +1608,6 @@ enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_A_1G_T: case IXGBE_DEV_ID_X550EM_A_1G_T_L: media_type = ixgbe_media_type_copper; - hw->phy.type = ixgbe_phy_m88; break; default: media_type = ixgbe_media_type_unknown; @@ -1599,16 +1700,62 @@ s32 ixgbe_setup_sfp_modules_X550em(struct ixgbe_hw *hw) } /** +* ixgbe_restart_an_internal_phy_x550em - restart autonegotiation for the +* internal PHY +* @hw: pointer to hardware structure +**/ +STATIC s32 ixgbe_restart_an_internal_phy_x550em(struct ixgbe_hw *hw) +{ + s32 status; + u32 link_ctrl; + + /* Restart auto-negotiation. */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &link_ctrl); + + if (status) { + DEBUGOUT("Auto-negotiation did not complete\n"); + return status; + } + + link_ctrl |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; + status = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, link_ctrl); + + if (hw->mac.type == ixgbe_mac_X550EM_a) { + u32 flx_mask_st20; + + /* Indicate to FW that AN restart has been asserted */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_mask_st20); + + if (status) { + DEBUGOUT("Auto-negotiation did not complete\n"); + return status; + } + + flx_mask_st20 |= IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART; + status = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, flx_mask_st20); + } + + return status; +} + +/** * ixgbe_setup_sgmii - Set up link for sgmii * @hw: pointer to hardware structure */ STATIC s32 ixgbe_setup_sgmii(struct ixgbe_hw *hw, ixgbe_link_speed speed, - bool autoneg_wait_to_complete) + bool autoneg_wait) { struct ixgbe_mac_info *mac = &hw->mac; - u32 lval, sval; + u32 lval, sval, flx_val; s32 rc; - UNREFERENCED_2PARAMETER(speed, autoneg_wait_to_complete); rc = mac->ops.read_iosf_sb_reg(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), @@ -1641,12 +1788,100 @@ STATIC s32 ixgbe_setup_sgmii(struct ixgbe_hw *hw, ixgbe_link_speed speed, if (rc) return rc; - lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; + rc = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val); + if (rc) + return rc; + + flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G; + flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN; + + rc = mac->ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, flx_val); + if (rc) + return rc; + + rc = ixgbe_restart_an_internal_phy_x550em(hw); + if (rc) + return rc; + + return hw->phy.ops.setup_link_speed(hw, speed, autoneg_wait); +} + +/** + * ixgbe_setup_sgmii_fw - Set up link for sgmii with firmware-controlled PHYs + * @hw: pointer to hardware structure + */ +STATIC s32 ixgbe_setup_sgmii_fw(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg_wait) +{ + struct ixgbe_mac_info *mac = &hw->mac; + u32 lval, sval, flx_val; + s32 rc; + + rc = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &lval); + if (rc) + return rc; + + lval &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE; + lval &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK; + lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_SGMII_EN; + lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CLAUSE_37_EN; + lval &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G; + rc = mac->ops.write_iosf_sb_reg(hw, + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, lval); + if (rc) + return rc; + + rc = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_SGMII_CTRL(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &sval); + if (rc) + return rc; + + sval &= ~IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_10_D; + sval &= ~IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_100_D; + rc = mac->ops.write_iosf_sb_reg(hw, + IXGBE_KRM_SGMII_CTRL(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, sval); + if (rc) + return rc; + rc = mac->ops.write_iosf_sb_reg(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, lval); + if (rc) + return rc; + + rc = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val); + if (rc) + return rc; - return rc; + flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN; + flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN; + flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN; + + rc = mac->ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, flx_val); + if (rc) + return rc; + + rc = ixgbe_restart_an_internal_phy_x550em(hw); + + return hw->phy.ops.setup_link_speed(hw, speed, autoneg_wait); } /** @@ -1670,17 +1905,30 @@ void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw) mac->ops.setup_link = ixgbe_setup_mac_link_multispeed_fiber; mac->ops.set_rate_select_speed = ixgbe_set_soft_rate_select_speed; + if ((hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP_N) || (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP)) mac->ops.setup_mac_link = - ixgbe_setup_mac_link_sfp_x550a; + ixgbe_setup_mac_link_sfp_x550a; else mac->ops.setup_mac_link = - ixgbe_setup_mac_link_sfp_x550em; + ixgbe_setup_mac_link_sfp_x550em; break; case ixgbe_media_type_copper: - mac->ops.setup_link = ixgbe_setup_mac_link_t_X550em; - mac->ops.check_link = ixgbe_check_link_t_X550em; + if (hw->mac.type == ixgbe_mac_X550EM_a) { + if (hw->device_id == IXGBE_DEV_ID_X550EM_A_1G_T || + hw->device_id == IXGBE_DEV_ID_X550EM_A_1G_T_L) { + mac->ops.setup_link = ixgbe_setup_sgmii_fw; + mac->ops.check_link = + ixgbe_check_mac_link_generic; + } else { + mac->ops.setup_link = + ixgbe_setup_mac_link_t_X550em; + } + } else { + mac->ops.setup_link = ixgbe_setup_mac_link_t_X550em; + mac->ops.check_link = ixgbe_check_link_t_X550em; + } break; case ixgbe_media_type_backplane: if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SGMII || @@ -1704,6 +1952,13 @@ s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, { DEBUGFUNC("ixgbe_get_link_capabilities_X550em"); + + if (hw->phy.type == ixgbe_phy_fw) { + *autoneg = true; + *speed = hw->phy.speeds_supported; + return 0; + } + /* SFP */ if (hw->phy.media_type == ixgbe_media_type_fiber) { @@ -1727,13 +1982,24 @@ s32 ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, *speed = IXGBE_LINK_SPEED_10GB_FULL; } else { switch (hw->phy.type) { - case ixgbe_phy_m88: - *speed = IXGBE_LINK_SPEED_100_FULL | - IXGBE_LINK_SPEED_1GB_FULL; - break; + case ixgbe_phy_ext_1g_t: case ixgbe_phy_sgmii: *speed = IXGBE_LINK_SPEED_1GB_FULL; break; + case ixgbe_phy_x550em_kr: + if (hw->mac.type == ixgbe_mac_X550EM_a) { + /* check different backplane modes */ + if (hw->phy.nw_mng_if_sel & + IXGBE_NW_MNG_IF_SEL_PHY_SPEED_2_5G) { + *speed = IXGBE_LINK_SPEED_2_5GB_FULL; + break; + } else if (hw->device_id == + IXGBE_DEV_ID_X550EM_A_KR_L) { + *speed = IXGBE_LINK_SPEED_1GB_FULL; + break; + } + } + /* fall through */ default: *speed = IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL; @@ -1854,19 +2120,32 @@ STATIC s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw) status = ixgbe_get_lasi_ext_t_x550em(hw, &lsc); /* Enable link status change alarm */ - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, ®); - if (status != IXGBE_SUCCESS) - return status; + /* Enable the LASI interrupts on X552 devices to receive notifications + * of the link configurations of the external PHY and correspondingly + * support the configuration of the internal iXFI link, since iXFI does + * not support auto-negotiation. This is not required for X553 devices + * having KR support, which performs auto-negotiations and which is used + * as the internal link to the external PHY. Hence adding a check here + * to avoid enabling LASI interrupts for X553 devices. + */ + if (hw->mac.type != ixgbe_mac_X550EM_a) { + status = hw->phy.ops.read_reg(hw, + IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, ®); + + if (status != IXGBE_SUCCESS) + return status; - reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN; + reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN; - status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg); + status = hw->phy.ops.write_reg(hw, + IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg); - if (status != IXGBE_SUCCESS) - return status; + if (status != IXGBE_SUCCESS) + return status; + } /* Enable high temperature failure and global fault alarms */ status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK, @@ -1935,8 +2214,8 @@ STATIC s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw, u32 reg_val; status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status) return status; @@ -1952,182 +2231,75 @@ STATIC s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw, if (speed & IXGBE_LINK_SPEED_1GB_FULL) reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KX; - /* Restart auto-negotiation. */ - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); - - return status; -} + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); -/** - * ixgbe_set_master_slave_mode - Set up PHY for master/slave mode - * @hw: pointer to hardware structure - * - * Must be called while holding the PHY semaphore and token - */ -STATIC s32 ixgbe_set_master_slave_mode(struct ixgbe_hw *hw) -{ - u16 phy_data; - s32 rc; + if (hw->mac.type == ixgbe_mac_X550EM_a) { + /* Set lane mode to KR auto negotiation */ + status = hw->mac.ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); - /* Resolve master/slave mode */ - rc = ixgbe_read_phy_reg_mdi_22(hw, IXGBE_M88E1500_1000T_CTRL, 0, - &phy_data); - if (rc) - return rc; + if (status) + return status; - /* load defaults for future use */ - if (phy_data & IXGBE_M88E1500_1000T_CTRL_MS_ENABLE) { - if (phy_data & IXGBE_M88E1500_1000T_CTRL_MS_VALUE) - hw->phy.original_ms_type = ixgbe_ms_force_master; - else - hw->phy.original_ms_type = ixgbe_ms_force_slave; - } else { - hw->phy.original_ms_type = ixgbe_ms_auto; - } + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK; + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN; + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN; - switch (hw->phy.ms_type) { - case ixgbe_ms_force_master: - phy_data |= IXGBE_M88E1500_1000T_CTRL_MS_ENABLE; - phy_data |= IXGBE_M88E1500_1000T_CTRL_MS_VALUE; - break; - case ixgbe_ms_force_slave: - phy_data |= IXGBE_M88E1500_1000T_CTRL_MS_ENABLE; - phy_data &= ~IXGBE_M88E1500_1000T_CTRL_MS_VALUE; - break; - case ixgbe_ms_auto: - phy_data &= ~IXGBE_M88E1500_1000T_CTRL_MS_ENABLE; - break; - default: - break; + status = hw->mac.ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); } - return ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_1000T_CTRL, 0, - phy_data); -} - -/** - * ixgbe_reset_phy_m88_nolock - Reset m88 PHY without locking - * @hw: pointer to hardware structure - * - * Must be called while holding the PHY semaphore and token - */ -STATIC s32 ixgbe_reset_phy_m88_nolock(struct ixgbe_hw *hw) -{ - s32 rc; - - rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 1); - if (rc) - return rc; - - rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_FIBER_CTRL, 0, - IXGBE_M88E1500_FIBER_CTRL_RESET | - IXGBE_M88E1500_FIBER_CTRL_DUPLEX_FULL | - IXGBE_M88E1500_FIBER_CTRL_SPEED_MSB); - if (rc) - goto res_out; - - rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 18); - if (rc) - goto res_out; - - rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_GEN_CTRL, 0, - IXGBE_M88E1500_GEN_CTRL_RESET | - IXGBE_M88E1500_GEN_CTRL_SGMII_COPPER); - if (rc) - goto res_out; - - rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 0); - if (rc) - goto res_out; - - rc = ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_COPPER_CTRL, 0, - IXGBE_M88E1500_COPPER_CTRL_RESET | - IXGBE_M88E1500_COPPER_CTRL_AN_EN | - IXGBE_M88E1500_COPPER_CTRL_RESTART_AN | - IXGBE_M88E1500_COPPER_CTRL_FULL_DUPLEX | - IXGBE_M88E1500_COPPER_CTRL_SPEED_MSB); - -res_out: - ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 0); - return rc; + return ixgbe_restart_an_internal_phy_x550em(hw); } /** - * ixgbe_reset_phy_m88 - Reset m88 PHY + * ixgbe_reset_phy_fw - Reset firmware-controlled PHYs * @hw: pointer to hardware structure */ -STATIC s32 ixgbe_reset_phy_m88(struct ixgbe_hw *hw) +static s32 ixgbe_reset_phy_fw(struct ixgbe_hw *hw) { - u32 swfw_mask = hw->phy.phy_semaphore_mask; + u32 store[FW_PHY_ACT_DATA_COUNT] = { 0 }; s32 rc; if (hw->phy.reset_disable || ixgbe_check_reset_blocked(hw)) return IXGBE_SUCCESS; - rc = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask); + rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_PHY_SW_RESET, &store); if (rc) return rc; + memset(store, 0, sizeof(store)); - rc = ixgbe_reset_phy_m88_nolock(hw); + rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_INIT_PHY, &store); + if (rc) + return rc; - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - return rc; + return ixgbe_setup_fw_link(hw); } /** - * ixgbe_setup_m88 - setup m88 PHY + * ixgbe_check_overtemp_fw - Check firmware-controlled PHYs for overtemp * @hw: pointer to hardware structure */ -STATIC s32 ixgbe_setup_m88(struct ixgbe_hw *hw) +static s32 ixgbe_check_overtemp_fw(struct ixgbe_hw *hw) { - u32 swfw_mask = hw->phy.phy_semaphore_mask; - struct ixgbe_phy_info *phy = &hw->phy; - u16 phy_data; + u32 store[FW_PHY_ACT_DATA_COUNT] = { 0 }; s32 rc; - if (phy->reset_disable || ixgbe_check_reset_blocked(hw)) - return IXGBE_SUCCESS; - - rc = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask); + rc = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &store); if (rc) return rc; - rc = ixgbe_read_phy_reg_mdi_22(hw, IXGBE_M88E1500_PHY_SPEC_CTRL, 0, - &phy_data); - if (rc) - goto rel_out; - - /* Enable downshift and setting it to X6 */ - phy_data &= ~IXGBE_M88E1500_PSCR_DOWNSHIFT_ENABLE; - phy_data |= IXGBE_M88E1500_PSCR_DOWNSHIFT_6X; - phy_data |= IXGBE_M88E1500_PSCR_DOWNSHIFT_ENABLE; - rc = ixgbe_write_phy_reg_mdi_22(hw, - IXGBE_M88E1500_PHY_SPEC_CTRL, 0, - phy_data); - if (rc) - goto rel_out; - - ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 0); - - /* Commit the changes */ - rc = ixgbe_reset_phy_m88_nolock(hw); - if (rc) { - DEBUGOUT("Error committing the PHY changes\n"); - goto rel_out; + if (store[0] & FW_PHY_ACT_GET_LINK_INFO_TEMP) { + ixgbe_shutdown_fw_phy(hw); + return IXGBE_ERR_OVERTEMP; } - - rc = ixgbe_set_master_slave_mode(hw); - - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - return rc; - -rel_out: - ixgbe_write_phy_reg_mdi_22(hw, IXGBE_M88E1500_PAGE_ADDR, 0, 0); - hw->mac.ops.release_swfw_sync(hw, swfw_mask); - return rc; + return IXGBE_SUCCESS; } /** @@ -2151,7 +2323,7 @@ STATIC s32 ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw) hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_ACT) { hw->phy.addr = (hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >> - IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; + IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; } return IXGBE_SUCCESS; @@ -2173,7 +2345,6 @@ s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) DEBUGFUNC("ixgbe_init_phy_ops_X550em"); hw->mac.ops.set_lan_id(hw); - ixgbe_read_mng_if_sel_x550em(hw); if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) { @@ -2182,9 +2353,45 @@ s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) phy->ops.identify_sfp = ixgbe_identify_sfp_module_X550em; } + switch (hw->device_id) { + case IXGBE_DEV_ID_X550EM_A_1G_T: + case IXGBE_DEV_ID_X550EM_A_1G_T_L: + phy->ops.read_reg_mdi = ixgbe_read_phy_reg_mdi_22; + phy->ops.write_reg_mdi = ixgbe_write_phy_reg_mdi_22; + hw->phy.ops.read_reg = ixgbe_read_phy_reg_x550a; + hw->phy.ops.write_reg = ixgbe_write_phy_reg_x550a; + phy->ops.check_overtemp = ixgbe_check_overtemp_fw; + if (hw->bus.lan_id) + hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY1_SM; + else + hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY0_SM; + + break; + case IXGBE_DEV_ID_X550EM_A_10G_T: + case IXGBE_DEV_ID_X550EM_A_SFP: + hw->phy.ops.read_reg = ixgbe_read_phy_reg_x550a; + hw->phy.ops.write_reg = ixgbe_write_phy_reg_x550a; + if (hw->bus.lan_id) + hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY1_SM; + else + hw->phy.phy_semaphore_mask |= IXGBE_GSSR_PHY0_SM; + break; + case IXGBE_DEV_ID_X550EM_X_SFP: + /* set up for CS4227 usage */ + hw->phy.phy_semaphore_mask = IXGBE_GSSR_SHARED_I2C_SM; + break; + case IXGBE_DEV_ID_X550EM_X_1G_T: + phy->ops.read_reg_mdi = ixgbe_read_phy_reg_mdi_22; + phy->ops.write_reg_mdi = ixgbe_write_phy_reg_mdi_22; + break; + default: + break; + } + /* Identify the PHY or SFP module */ ret_val = phy->ops.identify(hw); - if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED || + ret_val == IXGBE_ERR_PHY_ADDR_INVALID) return ret_val; /* Setup function pointers based on detected hardware */ @@ -2204,6 +2411,16 @@ s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) phy->ops.read_reg = ixgbe_read_phy_reg_x550em; phy->ops.write_reg = ixgbe_write_phy_reg_x550em; break; + case ixgbe_phy_ext_1g_t: + /* link is managed by FW */ + phy->ops.setup_link = NULL; + break; + case ixgbe_phy_x550em_xfi: + /* link is managed by HW */ + phy->ops.setup_link = NULL; + phy->ops.read_reg = ixgbe_read_phy_reg_x550em; + phy->ops.write_reg = ixgbe_write_phy_reg_x550em; + break; case ixgbe_phy_x550em_ext_t: /* If internal link mode is XFI, then setup iXFI internal link, * else setup KR now. @@ -2223,11 +2440,9 @@ s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw) case ixgbe_phy_sgmii: phy->ops.setup_link = NULL; break; - case ixgbe_phy_m88: - phy->ops.setup_link = ixgbe_setup_m88; - phy->ops.read_reg_mdi = ixgbe_read_phy_reg_mdi_22; - phy->ops.write_reg_mdi = ixgbe_write_phy_reg_mdi_22; - phy->ops.reset = ixgbe_reset_phy_m88; + case ixgbe_phy_fw: + phy->ops.setup_link = ixgbe_setup_fw_link; + phy->ops.reset = ixgbe_reset_phy_fw; break; default: break; @@ -2247,8 +2462,6 @@ STATIC void ixgbe_set_mdio_speed(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_X_10G_T: case IXGBE_DEV_ID_X550EM_A_SGMII: case IXGBE_DEV_ID_X550EM_A_SGMII_L: - case IXGBE_DEV_ID_X550EM_A_1G_T: - case IXGBE_DEV_ID_X550EM_A_1G_T_L: case IXGBE_DEV_ID_X550EM_A_10G_T: case IXGBE_DEV_ID_X550EM_A_SFP: case IXGBE_DEV_ID_X550EM_A_QSFP: @@ -2257,6 +2470,13 @@ STATIC void ixgbe_set_mdio_speed(struct ixgbe_hw *hw) hlreg0 &= ~IXGBE_HLREG0_MDCSPD; IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0); break; + case IXGBE_DEV_ID_X550EM_A_1G_T: + case IXGBE_DEV_ID_X550EM_A_1G_T_L: + /* Select fast MDIO clock speed for these devices */ + hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); + hlreg0 |= IXGBE_HLREG0_MDCSPD; + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0); + break; default: break; } @@ -2282,9 +2502,10 @@ s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) /* Call adapter stop to disable Tx/Rx and clear interrupts */ status = hw->mac.ops.stop_adapter(hw); - if (status != IXGBE_SUCCESS) + if (status != IXGBE_SUCCESS) { + DEBUGOUT1("Failed to stop adapter, STATUS = %d\n", status); return status; - + } /* flush pending Tx transactions */ ixgbe_clear_tx_pending(hw); @@ -2293,14 +2514,23 @@ s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) /* PHY ops must be identified and initialized prior to reset */ status = hw->phy.ops.init(hw); - if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + if (status) + DEBUGOUT1("Failed to initialize PHY ops, STATUS = %d\n", + status); + + if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) { + DEBUGOUT("Returning from reset HW since PHY ops init returned IXGBE_ERR_SFP_NOT_SUPPORTED\n"); return status; + } /* start the external PHY */ if (hw->phy.type == ixgbe_phy_x550em_ext_t) { status = ixgbe_init_ext_t_x550em(hw); - if (status) + if (status) { + DEBUGOUT1("Failed to start the external PHY, STATUS = %d\n", + status); return status; + } } /* Setup SFP module if there is one present. */ @@ -2313,8 +2543,10 @@ s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) return status; /* Reset PHY */ - if (!hw->phy.reset_disable && hw->phy.ops.reset) - hw->phy.ops.reset(hw); + if (!hw->phy.reset_disable && hw->phy.ops.reset) { + if (hw->phy.ops.reset(hw) == IXGBE_ERR_OVERTEMP) + return IXGBE_ERR_OVERTEMP; + } mac_reset_top: /* Issue global reset to the MAC. Needs to be SW reset if link is up. @@ -2372,6 +2604,9 @@ mac_reset_top: if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP) ixgbe_setup_mux_ctl(hw); + if (status != IXGBE_SUCCESS) + DEBUGOUT1("Reset HW failed, STATUS = %d\n", status); + return status; } @@ -2421,12 +2656,11 @@ s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw) /** * ixgbe_setup_kr_x550em - Configure the KR PHY. * @hw: pointer to hardware structure - * - * Configures the integrated KR PHY for X550EM_x. **/ s32 ixgbe_setup_kr_x550em(struct ixgbe_hw *hw) { - if (hw->mac.type != ixgbe_mac_X550EM_x) + /* leave link alone for 2.5G */ + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_2_5GB_FULL) return IXGBE_SUCCESS; return ixgbe_setup_kr_speed_x550em(hw, hw->phy.autoneg_advertised); @@ -2511,14 +2745,63 @@ s32 ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, } /** + * ixgbe_setup_sfi_x550a - Configure the internal PHY for native SFI mode + * @hw: pointer to hardware structure + * @speed: the link speed to force + * + * Configures the integrated PHY for native SFI mode. Used to connect the + * internal PHY directly to an SFP cage, without autonegotiation. + **/ +STATIC s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed) +{ + struct ixgbe_mac_info *mac = &hw->mac; + s32 status; + u32 reg_val; + + /* Disable all AN and force speed to 10G Serial. */ + status = mac->ops.read_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + if (status != IXGBE_SUCCESS) + return status; + + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN; + reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK; + + /* Select forced link speed for internal PHY. */ + switch (*speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G; + break; + default: + /* Other link speeds are not supported by internal PHY. */ + return IXGBE_ERR_LINK_SETUP; + } + + status = mac->ops.write_iosf_sb_reg(hw, + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + + /* Toggle port SW reset by AN reset. */ + status = ixgbe_restart_an_internal_phy_x550em(hw); + + return status; +} + +/** * ixgbe_setup_mac_link_sfp_x550a - Setup internal PHY for SFP * @hw: pointer to hardware structure * * Configure the the integrated PHY for SFP support. **/ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, - ixgbe_link_speed speed, - bool autoneg_wait_to_complete) + ixgbe_link_speed speed, + bool autoneg_wait_to_complete) { s32 ret_val; u16 reg_phy_ext; @@ -2540,31 +2823,27 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, return ret_val; if (hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP_N) { - /* Configure internal PHY for native SFI */ + /* Configure internal PHY for native SFI based on module type */ ret_val = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_8(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_phy_int); + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_phy_int); if (ret_val != IXGBE_SUCCESS) return ret_val; - if (setup_linear) { - reg_phy_int &= ~IXGBE_KRM_AN_CNTL_8_LIMITING; - reg_phy_int |= IXGBE_KRM_AN_CNTL_8_LINEAR; - } else { - reg_phy_int |= IXGBE_KRM_AN_CNTL_8_LIMITING; - reg_phy_int &= ~IXGBE_KRM_AN_CNTL_8_LINEAR; - } + reg_phy_int &= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA; + if (!setup_linear) + reg_phy_int |= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR; ret_val = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_8(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_phy_int); + IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_phy_int); if (ret_val != IXGBE_SUCCESS) return ret_val; - /* Setup XFI/SFI internal link. */ - ret_val = ixgbe_setup_ixfi_x550em(hw, &speed); + /* Setup SFI internal link. */ + ret_val = ixgbe_setup_sfi_x550a(hw, &speed); } else { /* Configure internal PHY for KR/KX. */ ixgbe_setup_kr_speed_x550em(hw, speed); @@ -2575,9 +2854,9 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, return IXGBE_ERR_PHY_ADDR_INVALID; } - /* Get external PHY device id */ - ret_val = hw->phy.ops.read_reg(hw, IXGBE_CS4227_GLOBAL_ID_MSB, - IXGBE_MDIO_ZERO_DEV_TYPE, ®_phy_ext); + /* Get external PHY SKU id */ + ret_val = hw->phy.ops.read_reg(hw, IXGBE_CS4227_EFUSE_PDF_SKU, + IXGBE_MDIO_ZERO_DEV_TYPE, ®_phy_ext); if (ret_val != IXGBE_SUCCESS) return ret_val; @@ -2585,7 +2864,7 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, /* When configuring quad port CS4223, the MAC instance is part * of the slice offset. */ - if (reg_phy_ext == IXGBE_CS4223_PHY_ID) + if (reg_phy_ext == IXGBE_CS4223_SKU_ID) slice_offset = (hw->bus.lan_id + (hw->bus.instance_id << 1)) << 12; else @@ -2593,12 +2872,26 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, /* Configure CS4227/CS4223 LINE side to proper mode. */ reg_slice = IXGBE_CS4227_LINE_SPARE24_LSB + slice_offset; + + ret_val = hw->phy.ops.read_reg(hw, reg_slice, + IXGBE_MDIO_ZERO_DEV_TYPE, ®_phy_ext); + + if (ret_val != IXGBE_SUCCESS) + return ret_val; + + reg_phy_ext &= ~((IXGBE_CS4227_EDC_MODE_CX1 << 1) | + (IXGBE_CS4227_EDC_MODE_SR << 1)); + if (setup_linear) reg_phy_ext = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 0x1; else reg_phy_ext = (IXGBE_CS4227_EDC_MODE_SR << 1) | 0x1; ret_val = hw->phy.ops.write_reg(hw, reg_slice, - IXGBE_MDIO_ZERO_DEV_TYPE, reg_phy_ext); + IXGBE_MDIO_ZERO_DEV_TYPE, reg_phy_ext); + + /* Flush previous write with a read */ + ret_val = hw->phy.ops.read_reg(hw, reg_slice, + IXGBE_MDIO_ZERO_DEV_TYPE, ®_phy_ext); } return ret_val; } @@ -2611,24 +2904,25 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, **/ STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw) { + struct ixgbe_mac_info *mac = &hw->mac; s32 status; u32 reg_val; /* Disable training protocol FSM. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, + status = mac->ops.read_iosf_sb_reg(hw, IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) return status; reg_val |= IXGBE_KRM_RX_TRN_LINKUP_CTRL_CONV_WO_PROTOCOL; - status = ixgbe_write_iosf_sb_reg_x550(hw, + status = mac->ops.write_iosf_sb_reg(hw, IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); if (status != IXGBE_SUCCESS) return status; /* Disable Flex from training TXFFE. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, + status = mac->ops.read_iosf_sb_reg(hw, IXGBE_KRM_DSP_TXFFE_STATE_4(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) @@ -2636,12 +2930,12 @@ STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw) reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_C0_EN; reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CP1_CN1_EN; reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CO_ADAPT_EN; - status = ixgbe_write_iosf_sb_reg_x550(hw, + status = mac->ops.write_iosf_sb_reg(hw, IXGBE_KRM_DSP_TXFFE_STATE_4(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); if (status != IXGBE_SUCCESS) return status; - status = ixgbe_read_iosf_sb_reg_x550(hw, + status = mac->ops.read_iosf_sb_reg(hw, IXGBE_KRM_DSP_TXFFE_STATE_5(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) @@ -2649,14 +2943,14 @@ STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw) reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_C0_EN; reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CP1_CN1_EN; reg_val &= ~IXGBE_KRM_DSP_TXFFE_STATE_CO_ADAPT_EN; - status = ixgbe_write_iosf_sb_reg_x550(hw, + status = mac->ops.write_iosf_sb_reg(hw, IXGBE_KRM_DSP_TXFFE_STATE_5(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); if (status != IXGBE_SUCCESS) return status; /* Enable override for coefficients. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, + status = mac->ops.read_iosf_sb_reg(hw, IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) @@ -2665,7 +2959,7 @@ STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw) reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CZERO_EN; reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CPLUS1_OVRRD_EN; reg_val |= IXGBE_KRM_TX_COEFF_CTRL_1_CMINUS1_OVRRD_EN; - status = ixgbe_write_iosf_sb_reg_x550(hw, + status = mac->ops.write_iosf_sb_reg(hw, IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); return status; @@ -2681,11 +2975,16 @@ STATIC s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw) **/ STATIC s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) { + struct ixgbe_mac_info *mac = &hw->mac; s32 status; u32 reg_val; + /* iXFI is only supported with X552 */ + if (mac->type != ixgbe_mac_X550EM_x) + return IXGBE_ERR_LINK_SETUP; + /* Disable AN and force speed to 10G Serial. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, + status = mac->ops.read_iosf_sb_reg(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) @@ -2707,7 +3006,7 @@ STATIC s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) return IXGBE_ERR_LINK_SETUP; } - status = ixgbe_write_iosf_sb_reg_x550(hw, + status = mac->ops.write_iosf_sb_reg(hw, IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); if (status != IXGBE_SUCCESS) @@ -2721,15 +3020,7 @@ STATIC s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed) } /* Toggle port SW reset by AN reset. */ - status = ixgbe_read_iosf_sb_reg_x550(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); - if (status != IXGBE_SUCCESS) - return status; - reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; - status = ixgbe_write_iosf_sb_reg_x550(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + status = ixgbe_restart_an_internal_phy_x550em(hw); return status; } @@ -2788,7 +3079,8 @@ s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw) if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return IXGBE_ERR_CONFIG; - if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { + if (hw->mac.type == ixgbe_mac_X550EM_x && + !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { /* If link is down, there is no setup necessary so return */ status = ixgbe_ext_phy_t_x550em_get_link(hw, &link_up); if (status != IXGBE_SUCCESS) @@ -2847,56 +3139,56 @@ s32 ixgbe_setup_phy_loopback_x550em(struct ixgbe_hw *hw) /* Disable AN and force speed to 10G Serial. */ status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) return status; reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE; reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK; reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G; status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); if (status != IXGBE_SUCCESS) return status; /* Set near-end loopback clocks. */ status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_PORT_CAR_GEN_CTRL(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + IXGBE_KRM_PORT_CAR_GEN_CTRL(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) return status; reg_val |= IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_32B; reg_val |= IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_KRPCS; status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_PORT_CAR_GEN_CTRL(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + IXGBE_KRM_PORT_CAR_GEN_CTRL(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); if (status != IXGBE_SUCCESS) return status; /* Set loopback enable. */ status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_PMD_DFX_BURNIN(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + IXGBE_KRM_PMD_DFX_BURNIN(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) return status; reg_val |= IXGBE_KRM_PMD_DFX_BURNIN_TX_RX_KR_LB_MASK; status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_PMD_DFX_BURNIN(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + IXGBE_KRM_PMD_DFX_BURNIN(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); if (status != IXGBE_SUCCESS) return status; /* Training bypass. */ status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (status != IXGBE_SUCCESS) return status; reg_val |= IXGBE_KRM_RX_TRN_LINKUP_CTRL_PROTOCOL_BYPASS; status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); return status; } @@ -2910,13 +3202,13 @@ s32 ixgbe_setup_phy_loopback_x550em(struct ixgbe_hw *hw) * * Reads a 16 bit word from the EEPROM using the hostif. **/ -s32 ixgbe_read_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset, - u16 *data) +s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 *data) { - s32 status; + const u32 mask = IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_EEP_SM; struct ixgbe_hic_read_shadow_ram buffer; + s32 status; - DEBUGFUNC("ixgbe_read_ee_hostif_data_X550"); + DEBUGFUNC("ixgbe_read_ee_hostif_X550"); buffer.hdr.req.cmd = FW_READ_SHADOW_RAM_CMD; buffer.hdr.req.buf_lenh = 0; buffer.hdr.req.buf_lenl = FW_READ_SHADOW_RAM_LEN; @@ -2927,42 +3219,18 @@ s32 ixgbe_read_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset, /* one word */ buffer.length = IXGBE_CPU_TO_BE16(sizeof(u16)); - status = ixgbe_host_interface_command(hw, (u32 *)&buffer, - sizeof(buffer), - IXGBE_HI_COMMAND_TIMEOUT, false); - + status = hw->mac.ops.acquire_swfw_sync(hw, mask); if (status) return status; - *data = (u16)IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, - FW_NVM_DATA_OFFSET); - - return 0; -} - -/** - * ixgbe_read_ee_hostif_X550 - Read EEPROM word using a host interface command - * @hw: pointer to hardware structure - * @offset: offset of word in the EEPROM to read - * @data: word read from the EEPROM - * - * Reads a 16 bit word from the EEPROM using the hostif. - **/ -s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, - u16 *data) -{ - s32 status = IXGBE_SUCCESS; - - DEBUGFUNC("ixgbe_read_ee_hostif_X550"); - - if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == - IXGBE_SUCCESS) { - status = ixgbe_read_ee_hostif_data_X550(hw, offset, data); - hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); - } else { - status = IXGBE_ERR_SWFW_SYNC; + status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer), + IXGBE_HI_COMMAND_TIMEOUT); + if (!status) { + *data = (u16)IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, + FW_NVM_DATA_OFFSET); } + hw->mac.ops.release_swfw_sync(hw, mask); return status; } @@ -2978,6 +3246,7 @@ s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, u16 offset, u16 words, u16 *data) { + const u32 mask = IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_EEP_SM; struct ixgbe_hic_read_shadow_ram buffer; u32 current_word = 0; u16 words_to_read; @@ -2987,11 +3256,12 @@ s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, DEBUGFUNC("ixgbe_read_ee_hostif_buffer_X550"); /* Take semaphore for the entire operation. */ - status = hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + status = hw->mac.ops.acquire_swfw_sync(hw, mask); if (status) { DEBUGOUT("EEPROM read buffer - semaphore failed\n"); return status; } + while (words) { if (words > FW_MAX_READ_BUFFER_SIZE / 2) words_to_read = FW_MAX_READ_BUFFER_SIZE / 2; @@ -3007,10 +3277,8 @@ s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, buffer.address = IXGBE_CPU_TO_BE32((offset + current_word) * 2); buffer.length = IXGBE_CPU_TO_BE16(words_to_read * 2); - status = ixgbe_host_interface_command(hw, (u32 *)&buffer, - sizeof(buffer), - IXGBE_HI_COMMAND_TIMEOUT, - false); + status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer), + IXGBE_HI_COMMAND_TIMEOUT); if (status) { DEBUGOUT("Host interface command failed\n"); @@ -3035,7 +3303,7 @@ s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, } out: - hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + hw->mac.ops.release_swfw_sync(hw, mask); return status; } @@ -3439,6 +3707,7 @@ u32 ixgbe_get_supported_physical_layer_X550em(struct ixgbe_hw *hw) switch (hw->phy.type) { case ixgbe_phy_x550em_kr: + case ixgbe_phy_x550em_xfi: physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KR | IXGBE_PHYSICAL_LAYER_1000BASE_KX; break; @@ -3455,6 +3724,20 @@ u32 ixgbe_get_supported_physical_layer_X550em(struct ixgbe_hw *hw) if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY) physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; break; + case ixgbe_phy_fw: + if (hw->phy.speeds_supported & IXGBE_LINK_SPEED_1GB_FULL) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; + if (hw->phy.speeds_supported & IXGBE_LINK_SPEED_100_FULL) + physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX; + if (hw->phy.speeds_supported & IXGBE_LINK_SPEED_10_FULL) + physical_layer |= IXGBE_PHYSICAL_LAYER_10BASE_T; + break; + case ixgbe_phy_sgmii: + physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX; + break; + case ixgbe_phy_ext_1g_t: + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; + break; default: break; } @@ -3737,19 +4020,19 @@ s32 ixgbe_setup_fc_X550em(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_A_KR: case IXGBE_DEV_ID_X550EM_A_KR_L: ret_val = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, ®_val); if (ret_val != IXGBE_SUCCESS) goto out; reg_val &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE | - IXGBE_KRM_AN_CNTL_1_ASM_PAUSE); + IXGBE_KRM_AN_CNTL_1_ASM_PAUSE); if (pause) reg_val |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE; if (asm_dir) reg_val |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE; ret_val = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val); /* This device does not fully support AN. */ hw->fc.disable_fc_autoneg = true; @@ -3763,12 +4046,12 @@ out: } /** - * ixgbe_fc_autoneg_x550a - Enable flow control IEEE clause 37 + * ixgbe_fc_autoneg_backplane_x550em_a - Enable flow control IEEE clause 37 * @hw: pointer to hardware structure * * Enable flow control according to IEEE clause 37. **/ -void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw) +void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw) { u32 link_s1, lp_an_page_low, an_cntl_1; s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED; @@ -3782,7 +4065,7 @@ void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw) */ if (hw->fc.disable_fc_autoneg) { ERROR_REPORT1(IXGBE_ERROR_UNSUPPORTED, - "Flow control autoneg is disabled"); + "Flow control autoneg is disabled"); goto out; } @@ -3794,12 +4077,13 @@ void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw) /* Check at auto-negotiation has completed */ status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LINK_S1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, &link_s1); + IXGBE_KRM_LINK_S1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &link_s1); if (status != IXGBE_SUCCESS || (link_s1 & IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE) == 0) { DEBUGOUT("Auto-Negotiation did not complete\n"); + status = IXGBE_ERR_FC_NOT_NEGOTIATED; goto out; } @@ -3807,8 +4091,8 @@ void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw) * local flow control settings accordingly */ status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl_1); + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl_1); if (status != IXGBE_SUCCESS) { DEBUGOUT("Auto-Negotiation did not complete\n"); @@ -3816,8 +4100,8 @@ void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw) } status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LP_BASE_PAGE_HIGH(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, &lp_an_page_low); + IXGBE_KRM_LP_BASE_PAGE_HIGH(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &lp_an_page_low); if (status != IXGBE_SUCCESS) { DEBUGOUT("Auto-Negotiation did not complete\n"); @@ -3840,22 +4124,88 @@ out: } /** - * ixgbe_setup_fc_x550em - Set up flow control + * ixgbe_fc_autoneg_fiber_x550em_a - passthrough FC settings + * @hw: pointer to hardware structure + * + **/ +void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *hw) +{ + hw->fc.fc_was_autonegged = false; + hw->fc.current_mode = hw->fc.requested_mode; +} + +/** + * ixgbe_fc_autoneg_sgmii_x550em_a - Enable flow control IEEE clause 37 + * @hw: pointer to hardware structure + * + * Enable flow control according to IEEE clause 37. + **/ +void ixgbe_fc_autoneg_sgmii_x550em_a(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED; + u32 info[FW_PHY_ACT_DATA_COUNT] = { 0 }; + ixgbe_link_speed speed; + bool link_up; + + /* AN should have completed when the cable was plugged in. + * Look for reasons to bail out. Bail out if: + * - FC autoneg is disabled, or if + * - link is not up. + */ + if (hw->fc.disable_fc_autoneg) { + ERROR_REPORT1(IXGBE_ERROR_UNSUPPORTED, + "Flow control autoneg is disabled"); + goto out; + } + + hw->mac.ops.check_link(hw, &speed, &link_up, false); + if (!link_up) { + ERROR_REPORT1(IXGBE_ERROR_SOFTWARE, "The link is down"); + goto out; + } + + /* Check if auto-negotiation has completed */ + status = ixgbe_fw_phy_activity(hw, FW_PHY_ACT_GET_LINK_INFO, &info); + if (status != IXGBE_SUCCESS || + !(info[0] & FW_PHY_ACT_GET_LINK_INFO_AN_COMPLETE)) { + DEBUGOUT("Auto-Negotiation did not complete\n"); + status = IXGBE_ERR_FC_NOT_NEGOTIATED; + goto out; + } + + /* Negotiate the flow control */ + status = ixgbe_negotiate_fc(hw, info[0], info[0], + FW_PHY_ACT_GET_LINK_INFO_FC_RX, + FW_PHY_ACT_GET_LINK_INFO_FC_TX, + FW_PHY_ACT_GET_LINK_INFO_LP_FC_RX, + FW_PHY_ACT_GET_LINK_INFO_LP_FC_TX); + +out: + if (status == IXGBE_SUCCESS) { + hw->fc.fc_was_autonegged = true; + } else { + hw->fc.fc_was_autonegged = false; + hw->fc.current_mode = hw->fc.requested_mode; + } +} + +/** + * ixgbe_setup_fc_backplane_x550em_a - Set up flow control * @hw: pointer to hardware structure * * Called at init time to set up flow control. **/ -s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw) +s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw) { s32 status = IXGBE_SUCCESS; - u32 an_cntl, link_ctrl = 0; + u32 an_cntl = 0; - DEBUGFUNC("ixgbe_setup_fc_x550em"); + DEBUGFUNC("ixgbe_setup_fc_backplane_x550em_a"); /* Validate the requested mode */ if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { ERROR_REPORT1(IXGBE_ERROR_UNSUPPORTED, - "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); + "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); return IXGBE_ERR_INVALID_LINK_SETTINGS; } @@ -3867,8 +4217,8 @@ s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw) * we link at 10G, the 1G advertisement is harmless and vice versa. */ status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl); + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl); if (status != IXGBE_SUCCESS) { DEBUGOUT("Auto-Negotiation did not complete\n"); @@ -3909,7 +4259,7 @@ s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw) case ixgbe_fc_full: /* Flow control (both Rx and Tx) is enabled by SW override. */ an_cntl |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE | - IXGBE_KRM_AN_CNTL_1_ASM_PAUSE; + IXGBE_KRM_AN_CNTL_1_ASM_PAUSE; break; default: ERROR_REPORT1(IXGBE_ERROR_ARGUMENT, @@ -3918,23 +4268,11 @@ s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw) } status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, an_cntl); + IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id), + IXGBE_SB_IOSF_TARGET_KR_PHY, an_cntl); /* Restart auto-negotiation. */ - status = hw->mac.ops.read_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, &link_ctrl); - - if (status != IXGBE_SUCCESS) { - DEBUGOUT("Auto-Negotiation did not complete\n"); - return status; - } - - link_ctrl |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART; - status = hw->mac.ops.write_iosf_sb_reg(hw, - IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id), - IXGBE_SB_IOSF_TARGET_KR_PHY, link_ctrl); + status = ixgbe_restart_an_internal_phy_x550em(hw); return status; } @@ -4018,22 +4356,34 @@ STATIC s32 ixgbe_acquire_swfw_sync_X550a(struct ixgbe_hw *hw, u32 mask) status = IXGBE_SUCCESS; if (hmask) status = ixgbe_acquire_swfw_sync_X540(hw, hmask); - if (status) + if (status) { + DEBUGOUT1("Could not acquire SWFW semaphore, Status = %d\n", + status); return status; + } if (!(mask & IXGBE_GSSR_TOKEN_SM)) return IXGBE_SUCCESS; status = ixgbe_get_phy_token(hw); + if (status == IXGBE_ERR_TOKEN_RETRY) + DEBUGOUT1("Could not acquire PHY token, Status = %d\n", + status); + if (status == IXGBE_SUCCESS) return IXGBE_SUCCESS; if (hmask) ixgbe_release_swfw_sync_X540(hw, hmask); - if (status != IXGBE_ERR_TOKEN_RETRY) + + if (status != IXGBE_ERR_TOKEN_RETRY) { + DEBUGOUT1("Unable to retry acquiring the PHY token, Status = %d\n", + status); return status; - msec_delay(FW_PHY_TOKEN_DELAY); + } } + DEBUGOUT1("Semaphore acquisition retries failed!: PHY ID = 0x%08X\n", + hw->phy.id); return status; } @@ -4068,7 +4418,7 @@ STATIC void ixgbe_release_swfw_sync_X550a(struct ixgbe_hw *hw, u32 mask) * instances. **/ s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, - u32 device_type, u16 *phy_data) + u32 device_type, u16 *phy_data) { s32 status; u32 mask = hw->phy.phy_semaphore_mask | IXGBE_GSSR_TOKEN_SM; @@ -4096,7 +4446,7 @@ s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, * The PHY Token is needed since the MDIO is shared between to MAC instances. **/ s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, - u32 device_type, u16 phy_data) + u32 device_type, u16 phy_data) { s32 status; u32 mask = hw->phy.phy_semaphore_mask | IXGBE_GSSR_TOKEN_SM; @@ -4104,7 +4454,7 @@ s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, DEBUGFUNC("ixgbe_write_phy_reg_x550a"); if (hw->mac.ops.acquire_swfw_sync(hw, mask) == IXGBE_SUCCESS) { - status = ixgbe_write_phy_reg_mdi(hw, reg_addr, device_type, + status = hw->phy.ops.write_reg_mdi(hw, reg_addr, device_type, phy_data); hw->mac.ops.release_swfw_sync(hw, mask); } else { @@ -4169,8 +4519,10 @@ s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw, else force_speed = IXGBE_LINK_SPEED_1GB_FULL; - /* If internal link mode is XFI, then setup XFI internal link. */ - if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { + /* If X552 and internal link mode is XFI, then setup XFI internal link. + */ + if (hw->mac.type == ixgbe_mac_X550EM_x && + !(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) { status = ixgbe_setup_ixfi_x550em(hw, &force_speed); if (status != IXGBE_SUCCESS) @@ -4193,7 +4545,7 @@ s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed, bool *link_up, bool link_up_wait_to_complete) { u32 status; - u16 autoneg_status; + u16 i, autoneg_status = 0; if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper) return IXGBE_ERR_CONFIG; @@ -4206,21 +4558,18 @@ s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed, return status; /* MAC link is up, so check external PHY link. - * Read this twice back to back to indicate current status. + * X557 PHY. Link status is latching low, and can only be used to detect + * link drop, and not the current status of the link without performing + * back-to-back reads. */ - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_status); - - if (status != IXGBE_SUCCESS) - return status; - - status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, - IXGBE_MDIO_AUTO_NEG_DEV_TYPE, - &autoneg_status); + for (i = 0; i < 2; i++) { + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_status); - if (status != IXGBE_SUCCESS) - return status; + if (status != IXGBE_SUCCESS) + return status; + } /* If external PHY link is not up, then indicate link not up */ if (!(autoneg_status & IXGBE_MDIO_AUTO_NEG_LINK_STATUS)) @@ -4294,3 +4643,63 @@ s32 ixgbe_led_off_t_X550em(struct ixgbe_hw *hw, u32 led_idx) return IXGBE_SUCCESS; } +/** + * ixgbe_set_fw_drv_ver_x550 - Sends driver version to firmware + * @hw: pointer to the HW structure + * @maj: driver version major number + * @min: driver version minor number + * @build: driver version build number + * @sub: driver version sub build number + * @len: length of driver_ver string + * @driver_ver: driver string + * + * Sends driver version number to firmware through the manageability + * block. On success return IXGBE_SUCCESS + * else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring + * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + **/ +s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, + u8 build, u8 sub, u16 len, const char *driver_ver) +{ + struct ixgbe_hic_drv_info2 fw_cmd; + s32 ret_val = IXGBE_SUCCESS; + int i; + + DEBUGFUNC("ixgbe_set_fw_drv_ver_x550"); + + if ((len == 0) || (driver_ver == NULL) || + (len > sizeof(fw_cmd.driver_string))) + return IXGBE_ERR_INVALID_ARGUMENT; + + fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO; + fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN + len; + fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED; + fw_cmd.port_num = (u8)hw->bus.func; + fw_cmd.ver_maj = maj; + fw_cmd.ver_min = min; + fw_cmd.ver_build = build; + fw_cmd.ver_sub = sub; + fw_cmd.hdr.checksum = 0; + memcpy(fw_cmd.driver_string, driver_ver, len); + fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, + (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); + + for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { + ret_val = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd, + sizeof(fw_cmd), + IXGBE_HI_COMMAND_TIMEOUT, + true); + if (ret_val != IXGBE_SUCCESS) + continue; + + if (fw_cmd.hdr.cmd_or_resp.ret_status == + FW_CEM_RESP_STATUS_SUCCESS) + ret_val = IXGBE_SUCCESS; + else + ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + + break; + } + + return ret_val; +} diff --git a/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h b/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h index 27d5d02f..30ca5df1 100644 --- a/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h +++ b/src/dpdk/drivers/net/ixgbe/base/ixgbe_x550.h @@ -36,49 +36,6 @@ POSSIBILITY OF SUCH DAMAGE. #include "ixgbe_type.h" -/* More phy definitions */ -#define IXGBE_M88E1500_COPPER_CTRL 0x0/* Page 0 reg */ -#define IXGBE_M88E1500_COPPER_CTRL_RESET 0x8000 -#define IXGBE_M88E1500_COPPER_CTRL_AN_EN 0x1000 -#define IXGBE_M88E1500_COPPER_CTRL_RESTART_AN 0x0200 -#define IXGBE_M88E1500_COPPER_CTRL_FULL_DUPLEX 0x0100 -#define IXGBE_M88E1500_COPPER_CTRL_SPEED_MSB 0x0040 -#define IXGBE_M88E1500_1000T_CTRL 0x09 /* 1000Base-T Ctrl Reg */ -/* 1=Configure PHY as Master 0=Configure PHY as Slave */ -#define IXGBE_M88E1500_1000T_CTRL_MS_VALUE 0x0800 -/* 1=Master/Slave manual config value 0=Automatic Master/Slave config */ -#define IXGBE_M88E1500_1000T_CTRL_MS_ENABLE 0x1000 -#define IXGBE_M88E1500_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ -#define IXGBE_M88E1500_AUTO_COPPER_SGMII 0x2 -#define IXGBE_M88E1500_AUTO_COPPER_BASEX 0x3 -#define IXGBE_M88E1500_STATUS_LINK 0x0004 /* Interface Link Bit */ -#define IXGBE_M88E1500_MAC_CTRL_1 0x10 -#define IXGBE_M88E1500_MAC_CTRL_1_MODE_MASK 0x0380 /* Mode Select */ -#define IXGBE_M88E1500_CFG_REG_1 0x0010 -#define IXGBE_M88E1500_CFG_REG_2 0x0011 -#define IXGBE_M88E1500_CFG_REG_3 0x0007 -#define IXGBE_M88E1500_MODE 0x0014 -#define IXGBE_M88E1500_PAGE_ADDR 0x16/* Page Offset reg */ -#define IXGBE_M88E1500_FIBER_CTRL 0x0/* Page 1 reg */ -#define IXGBE_M88E1500_FIBER_CTRL_RESET 0x8000 -#define IXGBE_M88E1500_FIBER_CTRL_SPEED_LSB 0x2000 -#define IXGBE_M88E1500_FIBER_CTRL_POWER_DOWN 0x0800 -#define IXGBE_M88E1500_FIBER_CTRL_DUPLEX_FULL 0x0100 -#define IXGBE_M88E1500_FIBER_CTRL_SPEED_MSB 0x0040 -#define IXGBE_M88E1500_EEE_CTRL_1 0x0/* Page 18 reg */ -#define IXGBE_M88E1500_EEE_CTRL_1_MS 0x0001/* EEE Master/Slave */ -#define IXGBE_M88E1500_GEN_CTRL 0x14/* Page 18 reg */ -#define IXGBE_M88E1500_GEN_CTRL_RESET 0x8000 -#define IXGBE_M88E1500_GEN_CTRL_SGMII_COPPER 0x0001/* Mode bits 0-2 */ - -/* M88E1500 Specific Registers */ -#define IXGBE_M88E1500_PHY_SPEC_CTRL 0x10 /* PHY Specific Ctrl Reg */ -#define IXGBE_M88E1500_PHY_SPEC_STATUS 0x11 /* PHY Specific Stat Reg */ - -#define IXGBE_M88E1500_PSCR_DOWNSHIFT_ENABLE 0x0800 -#define IXGBE_M88E1500_PSCR_DOWNSHIFT_MASK 0x7000 -#define IXGBE_M88E1500_PSCR_DOWNSHIFT_6X 0x5000 - s32 ixgbe_dmac_config_X550(struct ixgbe_hw *hw); s32 ixgbe_dmac_config_tcs_X550(struct ixgbe_hw *hw); s32 ixgbe_dmac_update_tcs_X550(struct ixgbe_hw *hw); @@ -98,12 +55,8 @@ s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw, u16 offset, u16 words, u16 *data); s32 ixgbe_read_ee_hostif_X550(struct ixgbe_hw *hw, u16 offset, u16 *data); -s32 ixgbe_read_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset, - u16 *data); s32 ixgbe_write_ee_hostif_data_X550(struct ixgbe_hw *hw, u16 offset, u16 data); -s32 ixgbe_set_eee_X550(struct ixgbe_hw *hw, bool enable_eee); -s32 ixgbe_setup_eee_X550(struct ixgbe_hw *hw, bool enable_eee); void ixgbe_set_source_address_pruning_X550(struct ixgbe_hw *hw, bool enable, unsigned int pool); void ixgbe_set_ethertype_anti_spoofing_X550(struct ixgbe_hw *hw, @@ -112,6 +65,8 @@ s32 ixgbe_write_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u32 data); s32 ixgbe_read_iosf_sb_reg_x550(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, u32 *data); +s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, + u8 build, u8 ver, u16 len, const char *str); s32 ixgbe_get_phy_token(struct ixgbe_hw *); s32 ixgbe_put_phy_token(struct ixgbe_hw *); s32 ixgbe_write_iosf_sb_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, @@ -144,14 +99,18 @@ s32 ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw, ixgbe_link_speed speed, bool autoneg_wait_to_complete); s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, - ixgbe_link_speed speed, - bool autoneg_wait_to_complete); + ixgbe_link_speed speed, + bool autoneg_wait_to_complete); s32 ixgbe_read_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, - u32 device_type, u16 *phy_data); + u32 device_type, u16 *phy_data); s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, - u32 device_type, u16 phy_data); -s32 ixgbe_setup_fc_x550a(struct ixgbe_hw *hw); -void ixgbe_fc_autoneg_x550a(struct ixgbe_hw *hw); + u32 device_type, u16 phy_data); +s32 ixgbe_setup_fc_fiber_x550em_a(struct ixgbe_hw *hw); +s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw); +s32 ixgbe_setup_fc_sgmii_x550em_a(struct ixgbe_hw *hw); +void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *hw); +void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw); +void ixgbe_fc_autoneg_sgmii_x550em_a(struct ixgbe_hw *hw); s32 ixgbe_handle_lasi_ext_t_x550em(struct ixgbe_hw *hw); s32 ixgbe_setup_mac_link_t_X550em(struct ixgbe_hw *hw, ixgbe_link_speed speed, diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c b/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c index 72963a89..5b625a3d 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2017 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -60,6 +60,7 @@ #include <rte_malloc.h> #include <rte_random.h> #include <rte_dev.h> +#include <rte_hash_crc.h> #include "ixgbe_logs.h" #include "base/ixgbe_api.h" @@ -72,6 +73,8 @@ #include "base/ixgbe_phy.h" #include "ixgbe_regs.h" +#include "rte_pmd_ixgbe.h" + /* * High threshold controlling when to start sending XOFF frames. Must be at * least 8 bytes less than receive packet buffer size. This value is in units @@ -163,6 +166,11 @@ enum ixgbevf_xcast_modes { static int eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev); static int eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev); +static int ixgbe_fdir_filter_init(struct rte_eth_dev *eth_dev); +static int ixgbe_fdir_filter_uninit(struct rte_eth_dev *eth_dev); +static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev); +static int ixgbe_l2_tn_filter_uninit(struct rte_eth_dev *eth_dev); +static int ixgbe_ntuple_filter_uninit(struct rte_eth_dev *eth_dev); static int ixgbe_dev_configure(struct rte_eth_dev *dev); static int ixgbe_dev_start(struct rte_eth_dev *dev); static void ixgbe_dev_stop(struct rte_eth_dev *dev); @@ -191,6 +199,8 @@ static int ixgbe_dev_queue_stats_mapping_set(struct rte_eth_dev *eth_dev, uint16_t queue_id, uint8_t stat_idx, uint8_t is_rx); +static int ixgbe_fw_version_get(struct rte_eth_dev *dev, char *fw_version, + size_t fw_size); static void ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info); static const uint32_t *ixgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev); @@ -229,9 +239,11 @@ static int ixgbe_dev_rss_reta_query(struct rte_eth_dev *dev, uint16_t reta_size); static void ixgbe_dev_link_status_print(struct rte_eth_dev *dev); static int ixgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev); +static int ixgbe_dev_macsec_interrupt_setup(struct rte_eth_dev *dev); static int ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev); static int ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev); -static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev); +static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev, + struct rte_intr_handle *handle); static void ixgbe_dev_interrupt_handler(struct rte_intr_handle *handle, void *param); static void ixgbe_dev_interrupt_delayed_handler(void *param); @@ -241,6 +253,7 @@ static void ixgbe_remove_rar(struct rte_eth_dev *dev, uint32_t index); static void ixgbe_set_default_mac_addr(struct rte_eth_dev *dev, struct ether_addr *mac_addr); static void ixgbe_dcb_init(struct ixgbe_hw *hw, struct ixgbe_dcb_config *dcb_config); +static int is_ixgbe_pmd(const char *driver_name); /* For Virtual Function support */ static int eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev); @@ -274,12 +287,6 @@ static void ixgbevf_dev_allmulticast_disable(struct rte_eth_dev *dev); static int ixgbe_uc_hash_table_set(struct rte_eth_dev *dev, struct ether_addr * mac_addr, uint8_t on); static int ixgbe_uc_all_hash_table_set(struct rte_eth_dev *dev, uint8_t on); -static int ixgbe_set_pool_rx_mode(struct rte_eth_dev *dev, uint16_t pool, - uint16_t rx_mask, uint8_t on); -static int ixgbe_set_pool_rx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on); -static int ixgbe_set_pool_tx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on); -static int ixgbe_set_pool_vlan_filter(struct rte_eth_dev *dev, uint16_t vlan, - uint64_t pool_mask, uint8_t vlan_on); static int ixgbe_mirror_rule_set(struct rte_eth_dev *dev, struct rte_eth_mirror_conf *mirror_conf, uint8_t rule_id, uint8_t on); @@ -295,8 +302,6 @@ static void ixgbe_configure_msix(struct rte_eth_dev *dev); static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t tx_rate); -static int ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf, - uint16_t tx_rate, uint64_t q_msk); static void ixgbevf_add_mac_addr(struct rte_eth_dev *dev, struct ether_addr *mac_addr, @@ -304,9 +309,6 @@ static void ixgbevf_add_mac_addr(struct rte_eth_dev *dev, static void ixgbevf_remove_mac_addr(struct rte_eth_dev *dev, uint32_t index); static void ixgbevf_set_default_mac_addr(struct rte_eth_dev *dev, struct ether_addr *mac_addr); -static int ixgbe_syn_filter_set(struct rte_eth_dev *dev, - struct rte_eth_syn_filter *filter, - bool add); static int ixgbe_syn_filter_get(struct rte_eth_dev *dev, struct rte_eth_syn_filter *filter); static int ixgbe_syn_filter_handle(struct rte_eth_dev *dev, @@ -316,17 +318,11 @@ static int ixgbe_add_5tuple_filter(struct rte_eth_dev *dev, struct ixgbe_5tuple_filter *filter); static void ixgbe_remove_5tuple_filter(struct rte_eth_dev *dev, struct ixgbe_5tuple_filter *filter); -static int ixgbe_add_del_ntuple_filter(struct rte_eth_dev *dev, - struct rte_eth_ntuple_filter *filter, - bool add); static int ixgbe_ntuple_filter_handle(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg); static int ixgbe_get_ntuple_filter(struct rte_eth_dev *dev, struct rte_eth_ntuple_filter *filter); -static int ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev, - struct rte_eth_ethertype_filter *filter, - bool add); static int ixgbe_ethertype_filter_handle(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg); @@ -387,6 +383,8 @@ static int ixgbe_dev_udp_tunnel_port_add(struct rte_eth_dev *dev, struct rte_eth_udp_tunnel *udp_tunnel); static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev, struct rte_eth_udp_tunnel *udp_tunnel); +static int ixgbe_filter_restore(struct rte_eth_dev *dev); +static void ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev); /* * Define VF Stats MACRO for Non "cleared on read" register @@ -429,23 +427,80 @@ static int ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev, * The set of PCI devices this driver supports */ static const struct rte_pci_id pci_id_ixgbe_map[] = { - -#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) {RTE_PCI_DEVICE(vend, dev)}, -#include "rte_pci_dev_ids.h" - -{ .vendor_id = 0, /* sentinel */ }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_BX) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KR) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_SFP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_RNDC) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_560FLR) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_SUBDEV_ID_82599_ECNA_DP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_EM) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_QSFP_SF_QP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_LS) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_10G_T) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_1G_T) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T1) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR_L) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP_N) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII_L) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_10G_T) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_QSFP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_QSFP_N) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T_L) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KX4) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KR) }, +#ifdef RTE_NIC_BYPASS + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BYPASS) }, +#endif + { .vendor_id = 0, /* sentinel */ }, }; - /* * The set of PCI devices this driver supports (for 82599 VF) */ static const struct rte_pci_id pci_id_ixgbevf_map[] = { - -#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) {RTE_PCI_DEVICE(vend, dev)}, -#include "rte_pci_dev_ids.h" -{ .vendor_id = 0, /* sentinel */ }, - + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_VF_HV) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_VF_HV) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF_HV) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550_VF) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_VF) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_VF_HV) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF) }, + { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_VF_HV) }, + { .vendor_id = 0, /* sentinel */ }, }; static const struct rte_eth_desc_lim rx_desc_lim = { @@ -458,6 +513,8 @@ static const struct rte_eth_desc_lim tx_desc_lim = { .nb_max = IXGBE_MAX_RING_DESC, .nb_min = IXGBE_MIN_RING_DESC, .nb_align = IXGBE_TXD_ALIGN, + .nb_seg_max = IXGBE_TX_MAX_SEG, + .nb_mtu_seg_max = IXGBE_TX_MAX_SEG, }; static const struct eth_dev_ops ixgbe_eth_dev_ops = { @@ -478,6 +535,7 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops = { .xstats_reset = ixgbe_dev_xstats_reset, .xstats_get_names = ixgbe_dev_xstats_get_names, .queue_stats_mapping_set = ixgbe_dev_queue_stats_mapping_set, + .fw_version_get = ixgbe_fw_version_get, .dev_infos_get = ixgbe_dev_info_get, .dev_supported_ptypes_get = ixgbe_dev_supported_ptypes_get, .mtu_set = ixgbe_dev_mtu_set, @@ -509,12 +567,7 @@ static const struct eth_dev_ops ixgbe_eth_dev_ops = { .uc_all_hash_table_set = ixgbe_uc_all_hash_table_set, .mirror_rule_set = ixgbe_mirror_rule_set, .mirror_rule_reset = ixgbe_mirror_rule_reset, - .set_vf_rx_mode = ixgbe_set_pool_rx_mode, - .set_vf_rx = ixgbe_set_pool_rx, - .set_vf_tx = ixgbe_set_pool_tx, - .set_vf_vlan_filter = ixgbe_set_pool_vlan_filter, .set_queue_rate_limit = ixgbe_set_queue_rate_limit, - .set_vf_rate_limit = ixgbe_set_vf_rate_limit, .reta_update = ixgbe_dev_rss_reta_update, .reta_query = ixgbe_dev_rss_reta_query, #ifdef RTE_NIC_BYPASS @@ -685,6 +738,51 @@ static const struct rte_ixgbe_xstats_name_off rte_ixgbe_stats_strings[] = { #define IXGBE_NB_HW_STATS (sizeof(rte_ixgbe_stats_strings) / \ sizeof(rte_ixgbe_stats_strings[0])) +/* MACsec statistics */ +static const struct rte_ixgbe_xstats_name_off rte_ixgbe_macsec_strings[] = { + {"out_pkts_untagged", offsetof(struct ixgbe_macsec_stats, + out_pkts_untagged)}, + {"out_pkts_encrypted", offsetof(struct ixgbe_macsec_stats, + out_pkts_encrypted)}, + {"out_pkts_protected", offsetof(struct ixgbe_macsec_stats, + out_pkts_protected)}, + {"out_octets_encrypted", offsetof(struct ixgbe_macsec_stats, + out_octets_encrypted)}, + {"out_octets_protected", offsetof(struct ixgbe_macsec_stats, + out_octets_protected)}, + {"in_pkts_untagged", offsetof(struct ixgbe_macsec_stats, + in_pkts_untagged)}, + {"in_pkts_badtag", offsetof(struct ixgbe_macsec_stats, + in_pkts_badtag)}, + {"in_pkts_nosci", offsetof(struct ixgbe_macsec_stats, + in_pkts_nosci)}, + {"in_pkts_unknownsci", offsetof(struct ixgbe_macsec_stats, + in_pkts_unknownsci)}, + {"in_octets_decrypted", offsetof(struct ixgbe_macsec_stats, + in_octets_decrypted)}, + {"in_octets_validated", offsetof(struct ixgbe_macsec_stats, + in_octets_validated)}, + {"in_pkts_unchecked", offsetof(struct ixgbe_macsec_stats, + in_pkts_unchecked)}, + {"in_pkts_delayed", offsetof(struct ixgbe_macsec_stats, + in_pkts_delayed)}, + {"in_pkts_late", offsetof(struct ixgbe_macsec_stats, + in_pkts_late)}, + {"in_pkts_ok", offsetof(struct ixgbe_macsec_stats, + in_pkts_ok)}, + {"in_pkts_invalid", offsetof(struct ixgbe_macsec_stats, + in_pkts_invalid)}, + {"in_pkts_notvalid", offsetof(struct ixgbe_macsec_stats, + in_pkts_notvalid)}, + {"in_pkts_unusedsa", offsetof(struct ixgbe_macsec_stats, + in_pkts_unusedsa)}, + {"in_pkts_notusingsa", offsetof(struct ixgbe_macsec_stats, + in_pkts_notusingsa)}, +}; + +#define IXGBE_NB_MACSEC_STATS (sizeof(rte_ixgbe_macsec_strings) / \ + sizeof(rte_ixgbe_macsec_strings[0])) + /* Per-queue statistics */ static const struct rte_ixgbe_xstats_name_off rte_ixgbe_rxq_strings[] = { {"mbuf_allocation_errors", offsetof(struct ixgbe_hw_stats, rnbc)}, @@ -800,6 +898,8 @@ ixgbe_pf_reset_hw(struct ixgbe_hw *hw) IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); IXGBE_WRITE_FLUSH(hw); + if (status == IXGBE_ERR_SFP_NOT_PRESENT) + status = IXGBE_SUCCESS; return status; } @@ -1024,7 +1124,8 @@ ixgbe_swfw_lock_reset(struct ixgbe_hw *hw) static int eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) { - struct rte_pci_device *pci_dev; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); struct ixgbe_vfta *shadow_vfta = @@ -1044,6 +1145,7 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = &ixgbe_eth_dev_ops; eth_dev->rx_pkt_burst = &ixgbe_recv_pkts; eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts; + eth_dev->tx_pkt_prepare = &ixgbe_prep_pkts; /* * For secondary processes, we don't initialise any further as primary @@ -1068,9 +1170,9 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) return 0; } - pci_dev = eth_dev->pci_dev; rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; /* Vendor and Device ID need to be set before init of shared code */ hw->device_id = pci_dev->id.device_id; @@ -1137,6 +1239,9 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) diag = ixgbe_init_hw(hw); } + if (diag == IXGBE_ERR_SFP_NOT_PRESENT) + diag = IXGBE_SUCCESS; + if (diag == IXGBE_ERR_EEPROM_VERSION) { PMD_INIT_LOG(ERR, "This device is a pre-production adapter/" "LOM. Please be aware there may be issues associated " @@ -1213,20 +1318,34 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); - rte_intr_callback_register(&pci_dev->intr_handle, - ixgbe_dev_interrupt_handler, - (void *)eth_dev); + rte_intr_callback_register(intr_handle, + ixgbe_dev_interrupt_handler, eth_dev); /* enable uio/vfio intr/eventfd mapping */ - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_enable(intr_handle); /* enable support intr */ ixgbe_enable_intr(eth_dev); + /* initialize filter info */ + memset(filter_info, 0, + sizeof(struct ixgbe_filter_info)); + /* initialize 5tuple filter list */ TAILQ_INIT(&filter_info->fivetuple_list); - memset(filter_info->fivetuple_mask, 0, - sizeof(uint32_t) * IXGBE_5TUPLE_ARRAY_SIZE); + + /* initialize flow director filter list & hash */ + ixgbe_fdir_filter_init(eth_dev); + + /* initialize l2 tunnel filter list & hash */ + ixgbe_l2_tn_filter_init(eth_dev); + + TAILQ_INIT(&filter_ntuple_list); + TAILQ_INIT(&filter_ethertype_list); + TAILQ_INIT(&filter_syn_list); + TAILQ_INIT(&filter_fdir_list); + TAILQ_INIT(&filter_l2_tunnel_list); + TAILQ_INIT(&ixgbe_flow_list); return 0; } @@ -1234,7 +1353,8 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) static int eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev) { - struct rte_pci_device *pci_dev; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ixgbe_hw *hw; PMD_INIT_FUNC_TRACE(); @@ -1243,7 +1363,6 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev) return -EPERM; hw = IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); - pci_dev = eth_dev->pci_dev; if (hw->adapter_stopped == 0) ixgbe_dev_close(eth_dev); @@ -1256,9 +1375,9 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev) ixgbe_swfw_lock_reset(hw); /* disable uio intr before callback unregister */ - rte_intr_disable(&(pci_dev->intr_handle)); - rte_intr_callback_unregister(&(pci_dev->intr_handle), - ixgbe_dev_interrupt_handler, (void *)eth_dev); + rte_intr_disable(intr_handle); + rte_intr_callback_unregister(intr_handle, + ixgbe_dev_interrupt_handler, eth_dev); /* uninitialize PF if max_vfs not zero */ ixgbe_pf_host_uninit(eth_dev); @@ -1269,9 +1388,154 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev) rte_free(eth_dev->data->hash_mac_addrs); eth_dev->data->hash_mac_addrs = NULL; + /* remove all the fdir filters & hash */ + ixgbe_fdir_filter_uninit(eth_dev); + + /* remove all the L2 tunnel filters & hash */ + ixgbe_l2_tn_filter_uninit(eth_dev); + + /* Remove all ntuple filters of the device */ + ixgbe_ntuple_filter_uninit(eth_dev); + + /* clear all the filters list */ + ixgbe_filterlist_flush(); + + return 0; +} + +static int ixgbe_ntuple_filter_uninit(struct rte_eth_dev *eth_dev) +{ + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(eth_dev->data->dev_private); + struct ixgbe_5tuple_filter *p_5tuple; + + while ((p_5tuple = TAILQ_FIRST(&filter_info->fivetuple_list))) { + TAILQ_REMOVE(&filter_info->fivetuple_list, + p_5tuple, + entries); + rte_free(p_5tuple); + } + memset(filter_info->fivetuple_mask, 0, + sizeof(uint32_t) * IXGBE_5TUPLE_ARRAY_SIZE); + return 0; } +static int ixgbe_fdir_filter_uninit(struct rte_eth_dev *eth_dev) +{ + struct ixgbe_hw_fdir_info *fdir_info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(eth_dev->data->dev_private); + struct ixgbe_fdir_filter *fdir_filter; + + if (fdir_info->hash_map) + rte_free(fdir_info->hash_map); + if (fdir_info->hash_handle) + rte_hash_free(fdir_info->hash_handle); + + while ((fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list))) { + TAILQ_REMOVE(&fdir_info->fdir_list, + fdir_filter, + entries); + rte_free(fdir_filter); + } + + return 0; +} + +static int ixgbe_l2_tn_filter_uninit(struct rte_eth_dev *eth_dev) +{ + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(eth_dev->data->dev_private); + struct ixgbe_l2_tn_filter *l2_tn_filter; + + if (l2_tn_info->hash_map) + rte_free(l2_tn_info->hash_map); + if (l2_tn_info->hash_handle) + rte_hash_free(l2_tn_info->hash_handle); + + while ((l2_tn_filter = TAILQ_FIRST(&l2_tn_info->l2_tn_list))) { + TAILQ_REMOVE(&l2_tn_info->l2_tn_list, + l2_tn_filter, + entries); + rte_free(l2_tn_filter); + } + + return 0; +} + +static int ixgbe_fdir_filter_init(struct rte_eth_dev *eth_dev) +{ + struct ixgbe_hw_fdir_info *fdir_info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(eth_dev->data->dev_private); + char fdir_hash_name[RTE_HASH_NAMESIZE]; + struct rte_hash_parameters fdir_hash_params = { + .name = fdir_hash_name, + .entries = IXGBE_MAX_FDIR_FILTER_NUM, + .key_len = sizeof(union ixgbe_atr_input), + .hash_func = rte_hash_crc, + .hash_func_init_val = 0, + .socket_id = rte_socket_id(), + }; + + TAILQ_INIT(&fdir_info->fdir_list); + snprintf(fdir_hash_name, RTE_HASH_NAMESIZE, + "fdir_%s", eth_dev->data->name); + fdir_info->hash_handle = rte_hash_create(&fdir_hash_params); + if (!fdir_info->hash_handle) { + PMD_INIT_LOG(ERR, "Failed to create fdir hash table!"); + return -EINVAL; + } + fdir_info->hash_map = rte_zmalloc("ixgbe", + sizeof(struct ixgbe_fdir_filter *) * + IXGBE_MAX_FDIR_FILTER_NUM, + 0); + if (!fdir_info->hash_map) { + PMD_INIT_LOG(ERR, + "Failed to allocate memory for fdir hash map!"); + return -ENOMEM; + } + fdir_info->mask_added = FALSE; + + return 0; +} + +static int ixgbe_l2_tn_filter_init(struct rte_eth_dev *eth_dev) +{ + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(eth_dev->data->dev_private); + char l2_tn_hash_name[RTE_HASH_NAMESIZE]; + struct rte_hash_parameters l2_tn_hash_params = { + .name = l2_tn_hash_name, + .entries = IXGBE_MAX_L2_TN_FILTER_NUM, + .key_len = sizeof(struct ixgbe_l2_tn_key), + .hash_func = rte_hash_crc, + .hash_func_init_val = 0, + .socket_id = rte_socket_id(), + }; + + TAILQ_INIT(&l2_tn_info->l2_tn_list); + snprintf(l2_tn_hash_name, RTE_HASH_NAMESIZE, + "l2_tn_%s", eth_dev->data->name); + l2_tn_info->hash_handle = rte_hash_create(&l2_tn_hash_params); + if (!l2_tn_info->hash_handle) { + PMD_INIT_LOG(ERR, "Failed to create L2 TN hash table!"); + return -EINVAL; + } + l2_tn_info->hash_map = rte_zmalloc("ixgbe", + sizeof(struct ixgbe_l2_tn_filter *) * + IXGBE_MAX_L2_TN_FILTER_NUM, + 0); + if (!l2_tn_info->hash_map) { + PMD_INIT_LOG(ERR, + "Failed to allocate memory for L2 TN hash map!"); + return -ENOMEM; + } + l2_tn_info->e_tag_en = FALSE; + l2_tn_info->e_tag_fwd_en = FALSE; + l2_tn_info->e_tag_ether_type = DEFAULT_ETAG_ETYPE; + + return 0; +} /* * Negotiate mailbox API version with the PF. * After reset API version is always set to the basic one (ixgbe_mbox_api_10). @@ -1322,7 +1586,8 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev) { int diag; uint32_t tc, tcs; - struct rte_pci_device *pci_dev; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); struct ixgbe_vfta *shadow_vfta = @@ -1360,9 +1625,8 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev) return 0; } - pci_dev = eth_dev->pci_dev; - rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; hw->device_id = pci_dev->id.device_id; hw->vendor_id = pci_dev->id.vendor_id; @@ -1454,10 +1718,9 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev) return -EIO; } - rte_intr_callback_register(&pci_dev->intr_handle, - ixgbevf_dev_interrupt_handler, - (void *)eth_dev); - rte_intr_enable(&pci_dev->intr_handle); + rte_intr_callback_register(intr_handle, + ixgbevf_dev_interrupt_handler, eth_dev); + rte_intr_enable(intr_handle); ixgbevf_intr_enable(hw); PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x mac.type=%s", @@ -1472,8 +1735,9 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev) static int eth_ixgbevf_dev_uninit(struct rte_eth_dev *eth_dev) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ixgbe_hw *hw; - struct rte_pci_device *pci_dev = eth_dev->pci_dev; PMD_INIT_FUNC_TRACE(); @@ -1495,20 +1759,19 @@ eth_ixgbevf_dev_uninit(struct rte_eth_dev *eth_dev) rte_free(eth_dev->data->mac_addrs); eth_dev->data->mac_addrs = NULL; - rte_intr_disable(&pci_dev->intr_handle); - rte_intr_callback_unregister(&pci_dev->intr_handle, - ixgbevf_dev_interrupt_handler, - (void *)eth_dev); + rte_intr_disable(intr_handle); + rte_intr_callback_unregister(intr_handle, + ixgbevf_dev_interrupt_handler, eth_dev); return 0; } static struct eth_driver rte_ixgbe_pmd = { .pci_drv = { - .name = "rte_ixgbe_pmd", .id_table = pci_id_ixgbe_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | - RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_ixgbe_dev_init, .eth_dev_uninit = eth_ixgbe_dev_uninit, @@ -1520,43 +1783,16 @@ static struct eth_driver rte_ixgbe_pmd = { */ static struct eth_driver rte_ixgbevf_pmd = { .pci_drv = { - .name = "rte_ixgbevf_pmd", .id_table = pci_id_ixgbevf_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_ixgbevf_dev_init, .eth_dev_uninit = eth_ixgbevf_dev_uninit, .dev_private_size = sizeof(struct ixgbe_adapter), }; -/* - * Driver initialization routine. - * Invoked once at EAL init time. - * Register itself as the [Poll Mode] Driver of PCI IXGBE devices. - */ -static int -rte_ixgbe_pmd_init(const char *name __rte_unused, const char *params __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - - rte_eth_driver_register(&rte_ixgbe_pmd); - return 0; -} - -/* - * VF Driver initialization routine. - * Invoked one at EAL init time. - * Register itself as the [Virtual Poll Mode] Driver of PCI niantic devices. - */ -static int -rte_ixgbevf_pmd_init(const char *name __rte_unused, const char *param __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - - rte_eth_driver_register(&rte_ixgbevf_pmd); - return 0; -} - static int ixgbe_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) { @@ -1768,6 +2004,7 @@ ixgbe_vlan_hw_strip_disable_all(struct rte_eth_dev *dev) IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t ctrl; uint16_t i; + struct ixgbe_rx_queue *rxq; PMD_INIT_FUNC_TRACE(); @@ -1778,9 +2015,10 @@ ixgbe_vlan_hw_strip_disable_all(struct rte_eth_dev *dev) } else { /* Other 10G NIC, the VLAN strip can be setup per queue in RXDCTL */ for (i = 0; i < dev->data->nb_rx_queues; i++) { - ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); + rxq = dev->data->rx_queues[i]; + ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx)); ctrl &= ~IXGBE_RXDCTL_VME; - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl); + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), ctrl); /* record those setting for HW strip per queue */ ixgbe_vlan_hw_strip_bitmap_set(dev, i, 0); @@ -1795,6 +2033,7 @@ ixgbe_vlan_hw_strip_enable_all(struct rte_eth_dev *dev) IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t ctrl; uint16_t i; + struct ixgbe_rx_queue *rxq; PMD_INIT_FUNC_TRACE(); @@ -1805,9 +2044,10 @@ ixgbe_vlan_hw_strip_enable_all(struct rte_eth_dev *dev) } else { /* Other 10G NIC, the VLAN strip can be setup per queue in RXDCTL */ for (i = 0; i < dev->data->nb_rx_queues; i++) { - ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); + rxq = dev->data->rx_queues[i]; + ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx)); ctrl |= IXGBE_RXDCTL_VME; - IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), ctrl); + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxq->reg_idx), ctrl); /* record those setting for HW strip per queue */ ixgbe_vlan_hw_strip_bitmap_set(dev, i, 1); @@ -1910,6 +2150,8 @@ ixgbe_vmdq_vlan_hw_filter_enable(struct rte_eth_dev *dev) static int ixgbe_check_vf_rss_rxq_num(struct rte_eth_dev *dev, uint16_t nb_rx_q) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + switch (nb_rx_q) { case 1: case 2: @@ -1923,7 +2165,7 @@ ixgbe_check_vf_rss_rxq_num(struct rte_eth_dev *dev, uint16_t nb_rx_q) } RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool = nb_rx_q; - RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx = dev->pci_dev->max_vfs * nb_rx_q; + RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx = pci_dev->max_vfs * nb_rx_q; return 0; } @@ -1940,6 +2182,8 @@ ixgbe_check_mq_mode(struct rte_eth_dev *dev) /* check multi-queue mode */ switch (dev_conf->rxmode.mq_mode) { case ETH_MQ_RX_VMDQ_DCB: + PMD_INIT_LOG(INFO, "ETH_MQ_RX_VMDQ_DCB mode supported in SRIOV"); + break; case ETH_MQ_RX_VMDQ_DCB_RSS: /* DCB/RSS VMDQ in SRIOV mode, not implement yet */ PMD_INIT_LOG(ERR, "SRIOV active," @@ -1975,11 +2219,9 @@ ixgbe_check_mq_mode(struct rte_eth_dev *dev) switch (dev_conf->txmode.mq_mode) { case ETH_MQ_TX_VMDQ_DCB: - /* DCB VMDQ in SRIOV mode, not implement yet */ - PMD_INIT_LOG(ERR, "SRIOV is active," - " unsupported VMDQ mq_mode tx %d.", - dev_conf->txmode.mq_mode); - return -EINVAL; + PMD_INIT_LOG(INFO, "ETH_MQ_TX_VMDQ_DCB mode supported in SRIOV"); + dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_DCB; + break; default: /* ETH_MQ_TX_VMDQ_ONLY or ETH_MQ_TX_NONE */ dev->data->dev_conf.txmode.mq_mode = ETH_MQ_TX_VMDQ_ONLY; break; @@ -2154,7 +2396,8 @@ ixgbe_dev_start(struct rte_eth_dev *dev) IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_vf_info *vfinfo = *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t intr_vector = 0; int err, link_up = 0, negotiate = 0; uint32_t speed = 0; @@ -2216,7 +2459,7 @@ ixgbe_dev_start(struct rte_eth_dev *dev) dev->data->nb_rx_queues * sizeof(int), 0); if (intr_handle->intr_vec == NULL) { PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" - " intr_vec\n", dev->data->nb_rx_queues); + " intr_vec", dev->data->nb_rx_queues); return -ENOMEM; } } @@ -2234,6 +2477,37 @@ ixgbe_dev_start(struct rte_eth_dev *dev) goto error; } + mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK | + ETH_VLAN_EXTEND_MASK; + ixgbe_vlan_offload_set(dev, mask); + + if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_ONLY) { + /* Enable vlan filtering for VMDq */ + ixgbe_vmdq_vlan_hw_filter_enable(dev); + } + + /* Configure DCB hw */ + ixgbe_configure_dcb(dev); + + if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE) { + err = ixgbe_fdir_configure(dev); + if (err) + goto error; + } + + /* Restore vf rate limit */ + if (vfinfo != NULL) { + for (vf = 0; vf < pci_dev->max_vfs; vf++) + for (idx = 0; idx < IXGBE_MAX_QUEUE_NUM_PER_VF; idx++) + if (vfinfo[vf].tx_rate[idx] != 0) + rte_pmd_ixgbe_set_vf_rate_limit( + dev->data->port_id, vf, + vfinfo[vf].tx_rate[idx], + 1 << idx); + } + + ixgbe_restore_statistics_mapping(dev); + err = ixgbe_dev_rxtx_start(dev); if (err < 0) { PMD_INIT_LOG(ERR, "Unable to start rxtx queues"); @@ -2299,13 +2573,13 @@ skip_link_setup: /* check if lsc interrupt is enabled */ if (dev->data->dev_conf.intr_conf.lsc != 0) ixgbe_dev_lsc_interrupt_setup(dev); + ixgbe_dev_macsec_interrupt_setup(dev); } else { rte_intr_callback_unregister(intr_handle, - ixgbe_dev_interrupt_handler, - (void *)dev); + ixgbe_dev_interrupt_handler, dev); if (dev->data->dev_conf.intr_conf.lsc != 0) PMD_INIT_LOG(INFO, "lsc won't enable because of" - " no intr multiplex\n"); + " no intr multiplex"); } /* check if rxq interrupt is enabled */ @@ -2318,36 +2592,8 @@ skip_link_setup: /* resume enabled intr since hw reset */ ixgbe_enable_intr(dev); - - mask = ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK | - ETH_VLAN_EXTEND_MASK; - ixgbe_vlan_offload_set(dev, mask); - - if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_ONLY) { - /* Enable vlan filtering for VMDq */ - ixgbe_vmdq_vlan_hw_filter_enable(dev); - } - - /* Configure DCB hw */ - ixgbe_configure_dcb(dev); - - if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE) { - err = ixgbe_fdir_configure(dev); - if (err) - goto error; - } - - /* Restore vf rate limit */ - if (vfinfo != NULL) { - for (vf = 0; vf < dev->pci_dev->max_vfs; vf++) - for (idx = 0; idx < IXGBE_MAX_QUEUE_NUM_PER_VF; idx++) - if (vfinfo[vf].tx_rate[idx] != 0) - ixgbe_set_vf_rate_limit(dev, vf, - vfinfo[vf].tx_rate[idx], - 1 << idx); - } - - ixgbe_restore_statistics_mapping(dev); + ixgbe_l2_tunnel_conf(dev); + ixgbe_filter_restore(dev); return 0; @@ -2368,10 +2614,8 @@ ixgbe_dev_stop(struct rte_eth_dev *dev) IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_vf_info *vfinfo = *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private); - struct ixgbe_filter_info *filter_info = - IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); - struct ixgbe_5tuple_filter *p_5tuple, *p_5tuple_next; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; int vf; PMD_INIT_FUNC_TRACE(); @@ -2386,8 +2630,7 @@ ixgbe_dev_stop(struct rte_eth_dev *dev) /* stop adapter */ ixgbe_stop_adapter(hw); - for (vf = 0; vfinfo != NULL && - vf < dev->pci_dev->max_vfs; vf++) + for (vf = 0; vfinfo != NULL && vf < pci_dev->max_vfs; vf++) vfinfo[vf].clear_to_send = false; if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_copper) { @@ -2408,17 +2651,6 @@ ixgbe_dev_stop(struct rte_eth_dev *dev) memset(&link, 0, sizeof(link)); rte_ixgbe_dev_atomic_write_link_status(dev, &link); - /* Remove all ntuple filters of the device */ - for (p_5tuple = TAILQ_FIRST(&filter_info->fivetuple_list); - p_5tuple != NULL; p_5tuple = p_5tuple_next) { - p_5tuple_next = TAILQ_NEXT(p_5tuple, entries); - TAILQ_REMOVE(&filter_info->fivetuple_list, - p_5tuple, entries); - rte_free(p_5tuple); - } - memset(filter_info->fivetuple_mask, 0, - sizeof(uint32_t) * IXGBE_5TUPLE_ARRAY_SIZE); - if (!rte_intr_allow_others(intr_handle)) /* resume to the default handler */ rte_intr_callback_register(intr_handle, @@ -2520,6 +2752,7 @@ ixgbe_dev_close(struct rte_eth_dev *dev) static void ixgbe_read_stats_registers(struct ixgbe_hw *hw, struct ixgbe_hw_stats *hw_stats, + struct ixgbe_macsec_stats *macsec_stats, uint64_t *total_missed_rx, uint64_t *total_qbrc, uint64_t *total_qprc, uint64_t *total_qprdc) { @@ -2527,9 +2760,9 @@ ixgbe_read_stats_registers(struct ixgbe_hw *hw, uint32_t delta_gprc = 0; unsigned i; /* Workaround for RX byte count not including CRC bytes when CRC -+ * strip is enabled. CRC bytes are removed from counters when crc_strip + * strip is enabled. CRC bytes are removed from counters when crc_strip * is disabled. -+ */ + */ int crc_strip = (IXGBE_READ_REG(hw, IXGBE_HLREG0) & IXGBE_HLREG0_RXCRCSTRP); @@ -2689,6 +2922,40 @@ ixgbe_read_stats_registers(struct ixgbe_hw *hw, /* Flow Director Stats registers */ hw_stats->fdirmatch += IXGBE_READ_REG(hw, IXGBE_FDIRMATCH); hw_stats->fdirmiss += IXGBE_READ_REG(hw, IXGBE_FDIRMISS); + + /* MACsec Stats registers */ + macsec_stats->out_pkts_untagged += IXGBE_READ_REG(hw, IXGBE_LSECTXUT); + macsec_stats->out_pkts_encrypted += + IXGBE_READ_REG(hw, IXGBE_LSECTXPKTE); + macsec_stats->out_pkts_protected += + IXGBE_READ_REG(hw, IXGBE_LSECTXPKTP); + macsec_stats->out_octets_encrypted += + IXGBE_READ_REG(hw, IXGBE_LSECTXOCTE); + macsec_stats->out_octets_protected += + IXGBE_READ_REG(hw, IXGBE_LSECTXOCTP); + macsec_stats->in_pkts_untagged += IXGBE_READ_REG(hw, IXGBE_LSECRXUT); + macsec_stats->in_pkts_badtag += IXGBE_READ_REG(hw, IXGBE_LSECRXBAD); + macsec_stats->in_pkts_nosci += IXGBE_READ_REG(hw, IXGBE_LSECRXNOSCI); + macsec_stats->in_pkts_unknownsci += + IXGBE_READ_REG(hw, IXGBE_LSECRXUNSCI); + macsec_stats->in_octets_decrypted += + IXGBE_READ_REG(hw, IXGBE_LSECRXOCTD); + macsec_stats->in_octets_validated += + IXGBE_READ_REG(hw, IXGBE_LSECRXOCTV); + macsec_stats->in_pkts_unchecked += IXGBE_READ_REG(hw, IXGBE_LSECRXUNCH); + macsec_stats->in_pkts_delayed += IXGBE_READ_REG(hw, IXGBE_LSECRXDELAY); + macsec_stats->in_pkts_late += IXGBE_READ_REG(hw, IXGBE_LSECRXLATE); + for (i = 0; i < 2; i++) { + macsec_stats->in_pkts_ok += + IXGBE_READ_REG(hw, IXGBE_LSECRXOK(i)); + macsec_stats->in_pkts_invalid += + IXGBE_READ_REG(hw, IXGBE_LSECRXINV(i)); + macsec_stats->in_pkts_notvalid += + IXGBE_READ_REG(hw, IXGBE_LSECRXNV(i)); + } + macsec_stats->in_pkts_unusedsa += IXGBE_READ_REG(hw, IXGBE_LSECRXUNSA); + macsec_stats->in_pkts_notusingsa += + IXGBE_READ_REG(hw, IXGBE_LSECRXNUSA); } /* @@ -2701,6 +2968,9 @@ ixgbe_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_hw_stats *hw_stats = IXGBE_DEV_PRIVATE_TO_STATS(dev->data->dev_private); + struct ixgbe_macsec_stats *macsec_stats = + IXGBE_DEV_PRIVATE_TO_MACSEC_STATS( + dev->data->dev_private); uint64_t total_missed_rx, total_qbrc, total_qprc, total_qprdc; unsigned i; @@ -2709,8 +2979,8 @@ ixgbe_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) total_qprc = 0; total_qprdc = 0; - ixgbe_read_stats_registers(hw, hw_stats, &total_missed_rx, &total_qbrc, - &total_qprc, &total_qprdc); + ixgbe_read_stats_registers(hw, hw_stats, macsec_stats, &total_missed_rx, + &total_qbrc, &total_qprc, &total_qprdc); if (stats == NULL) return; @@ -2762,7 +3032,7 @@ ixgbe_dev_stats_reset(struct rte_eth_dev *dev) /* This function calculates the number of xstats based on the current config */ static unsigned ixgbe_xstats_calc_num(void) { - return IXGBE_NB_HW_STATS + + return IXGBE_NB_HW_STATS + IXGBE_NB_MACSEC_STATS + (IXGBE_NB_RXQ_PRIO_STATS * IXGBE_NB_RXQ_PRIO_VALUES) + (IXGBE_NB_TXQ_PRIO_STATS * IXGBE_NB_TXQ_PRIO_VALUES); } @@ -2789,6 +3059,15 @@ static int ixgbe_dev_xstats_get_names(__rte_unused struct rte_eth_dev *dev, count++; } + /* MACsec Stats */ + for (i = 0; i < IXGBE_NB_MACSEC_STATS; i++) { + snprintf(xstats_names[count].name, + sizeof(xstats_names[count].name), + "%s", + rte_ixgbe_macsec_strings[i].name); + count++; + } + /* RX Priority Stats */ for (stat = 0; stat < IXGBE_NB_RXQ_PRIO_STATS; stat++) { for (i = 0; i < IXGBE_NB_RXQ_PRIO_VALUES; i++) { @@ -2838,6 +3117,9 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_hw_stats *hw_stats = IXGBE_DEV_PRIVATE_TO_STATS(dev->data->dev_private); + struct ixgbe_macsec_stats *macsec_stats = + IXGBE_DEV_PRIVATE_TO_MACSEC_STATS( + dev->data->dev_private); uint64_t total_missed_rx, total_qbrc, total_qprc, total_qprdc; unsigned i, stat, count = 0; @@ -2851,8 +3133,8 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, total_qprc = 0; total_qprdc = 0; - ixgbe_read_stats_registers(hw, hw_stats, &total_missed_rx, &total_qbrc, - &total_qprc, &total_qprdc); + ixgbe_read_stats_registers(hw, hw_stats, macsec_stats, &total_missed_rx, + &total_qbrc, &total_qprc, &total_qprdc); /* If this is a reset xstats is NULL, and we have cleared the * registers by reading them. @@ -2865,6 +3147,15 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, for (i = 0; i < IXGBE_NB_HW_STATS; i++) { xstats[count].value = *(uint64_t *)(((char *)hw_stats) + rte_ixgbe_stats_strings[i].offset); + xstats[count].id = count; + count++; + } + + /* MACsec Stats */ + for (i = 0; i < IXGBE_NB_MACSEC_STATS; i++) { + xstats[count].value = *(uint64_t *)(((char *)macsec_stats) + + rte_ixgbe_macsec_strings[i].offset); + xstats[count].id = count; count++; } @@ -2874,6 +3165,7 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, xstats[count].value = *(uint64_t *)(((char *)hw_stats) + rte_ixgbe_rxq_strings[stat].offset + (sizeof(uint64_t) * i)); + xstats[count].id = count; count++; } } @@ -2884,6 +3176,7 @@ ixgbe_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, xstats[count].value = *(uint64_t *)(((char *)hw_stats) + rte_ixgbe_txq_strings[stat].offset + (sizeof(uint64_t) * i)); + xstats[count].id = count; count++; } } @@ -2895,6 +3188,9 @@ ixgbe_dev_xstats_reset(struct rte_eth_dev *dev) { struct ixgbe_hw_stats *stats = IXGBE_DEV_PRIVATE_TO_STATS(dev->data->dev_private); + struct ixgbe_macsec_stats *macsec_stats = + IXGBE_DEV_PRIVATE_TO_MACSEC_STATS( + dev->data->dev_private); unsigned count = ixgbe_xstats_calc_num(); @@ -2903,6 +3199,7 @@ ixgbe_dev_xstats_reset(struct rte_eth_dev *dev) /* Reset software totals */ memset(stats, 0, sizeof(*stats)); + memset(macsec_stats, 0, sizeof(*macsec_stats)); } static void @@ -2991,12 +3288,35 @@ ixgbevf_dev_stats_reset(struct rte_eth_dev *dev) hw_stats->vfgotc = 0; } +static int +ixgbe_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + u16 eeprom_verh, eeprom_verl; + u32 etrack_id; + int ret; + + ixgbe_read_eeprom(hw, 0x2e, &eeprom_verh); + ixgbe_read_eeprom(hw, 0x2d, &eeprom_verl); + + etrack_id = (eeprom_verh << 16) | eeprom_verl; + ret = snprintf(fw_version, fw_size, "0x%08x", etrack_id); + + ret += 1; /* add the size of '\0' */ + if (fw_size < (u32)ret) + return ret; + else + return 0; +} + static void ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct rte_eth_conf *dev_conf = &dev->data->dev_conf; + dev_info->pci_dev = pci_dev; dev_info->max_rx_queues = (uint16_t)hw->mac.max_rx_queues; dev_info->max_tx_queues = (uint16_t)hw->mac.max_tx_queues; if (RTE_ETH_DEV_SRIOV(dev).active == 0) { @@ -3012,7 +3332,7 @@ ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->max_rx_pktlen = 15872; /* includes CRC, cf MAXFRS register */ dev_info->max_mac_addrs = hw->mac.num_rar_entries; dev_info->max_hash_mac_addrs = IXGBE_VMDQ_NUM_UC_MAC; - dev_info->max_vfs = dev->pci_dev->max_vfs; + dev_info->max_vfs = pci_dev->max_vfs; if (hw->mac.type == ixgbe_mac_82598EB) dev_info->max_vmdq_pools = ETH_16_POOLS; else @@ -3033,6 +3353,10 @@ ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) !RTE_ETH_DEV_SRIOV(dev).active) dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO; + if (hw->mac.type == ixgbe_mac_82599EB || + hw->mac.type == ixgbe_mac_X540) + dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_MACSEC_STRIP; + if (hw->mac.type == ixgbe_mac_X550 || hw->mac.type == ixgbe_mac_X550EM_x || hw->mac.type == ixgbe_mac_X550EM_a) @@ -3046,6 +3370,10 @@ ixgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) DEV_TX_OFFLOAD_SCTP_CKSUM | DEV_TX_OFFLOAD_TCP_TSO; + if (hw->mac.type == ixgbe_mac_82599EB || + hw->mac.type == ixgbe_mac_X540) + dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_MACSEC_INSERT; + if (hw->mac.type == ixgbe_mac_X550 || hw->mac.type == ixgbe_mac_X550EM_x || hw->mac.type == ixgbe_mac_X550EM_a) @@ -3126,15 +3454,17 @@ static void ixgbevf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + dev_info->pci_dev = pci_dev; dev_info->max_rx_queues = (uint16_t)hw->mac.max_rx_queues; dev_info->max_tx_queues = (uint16_t)hw->mac.max_tx_queues; dev_info->min_rx_bufsize = 1024; /* cf BSIZEPACKET in SRRCTL reg */ - dev_info->max_rx_pktlen = 15872; /* includes CRC, cf MAXFRS reg */ + dev_info->max_rx_pktlen = 9728; /* includes CRC, cf MAXFRS reg */ dev_info->max_mac_addrs = hw->mac.num_rar_entries; dev_info->max_hash_mac_addrs = IXGBE_VMDQ_NUM_UC_MAC; - dev_info->max_vfs = dev->pci_dev->max_vfs; + dev_info->max_vfs = pci_dev->max_vfs; if (hw->mac.type == ixgbe_mac_82598EB) dev_info->max_vmdq_pools = ETH_16_POOLS; else @@ -3341,6 +3671,28 @@ ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev) return 0; } +/** + * It clears the interrupt causes and enables the interrupt. + * It will be called once only during nic initialized. + * + * @param dev + * Pointer to struct rte_eth_dev. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +static int +ixgbe_dev_macsec_interrupt_setup(struct rte_eth_dev *dev) +{ + struct ixgbe_interrupt *intr = + IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private); + + intr->mask |= IXGBE_EICR_LINKSEC; + + return 0; +} + /* * It reads ICR and sets flag (IXGBE_EICR_LSC) for the link_update. * @@ -3375,6 +3727,9 @@ ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev) if (eicr & IXGBE_EICR_MAILBOX) intr->flags |= IXGBE_FLAG_MAILBOX; + if (eicr & IXGBE_EICR_LINKSEC) + intr->flags |= IXGBE_FLAG_MACSEC; + if (hw->mac.type == ixgbe_mac_X550EM_x && hw->phy.type == ixgbe_phy_x550em_ext_t && (eicr & IXGBE_EICR_GPI_SDP0_X550EM_x)) @@ -3396,6 +3751,7 @@ ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev) static void ixgbe_dev_link_status_print(struct rte_eth_dev *dev) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); struct rte_eth_link link; memset(&link, 0, sizeof(link)); @@ -3410,11 +3766,11 @@ ixgbe_dev_link_status_print(struct rte_eth_dev *dev) PMD_INIT_LOG(INFO, " Port %d: Link Down", (int)(dev->data->port_id)); } - PMD_INIT_LOG(DEBUG, "PCI Address: %04d:%02d:%02d:%d", - dev->pci_dev->addr.domain, - dev->pci_dev->addr.bus, - dev->pci_dev->addr.devid, - dev->pci_dev->addr.function); + PMD_INIT_LOG(DEBUG, "PCI Address: " PCI_PRI_FMT, + pci_dev->addr.domain, + pci_dev->addr.bus, + pci_dev->addr.devid, + pci_dev->addr.function); } /* @@ -3428,13 +3784,13 @@ ixgbe_dev_link_status_print(struct rte_eth_dev *dev) * - On failure, a negative value. */ static int -ixgbe_dev_interrupt_action(struct rte_eth_dev *dev) +ixgbe_dev_interrupt_action(struct rte_eth_dev *dev, + struct rte_intr_handle *intr_handle) { struct ixgbe_interrupt *intr = IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private); int64_t timeout; struct rte_eth_link link; - int intr_enable_delay = false; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); @@ -3467,20 +3823,19 @@ ixgbe_dev_interrupt_action(struct rte_eth_dev *dev) timeout = IXGBE_LINK_DOWN_CHECK_TIMEOUT; ixgbe_dev_link_status_print(dev); - - intr_enable_delay = true; - } - - if (intr_enable_delay) { + intr->mask_original = intr->mask; + /* only disable lsc interrupt */ + intr->mask &= ~IXGBE_EIMS_LSC; if (rte_eal_alarm_set(timeout * 1000, ixgbe_dev_interrupt_delayed_handler, (void *)dev) < 0) PMD_DRV_LOG(ERR, "Error setting alarm"); - } else { - PMD_DRV_LOG(DEBUG, "enable intr immediately"); - ixgbe_enable_intr(dev); - rte_intr_enable(&(dev->pci_dev->intr_handle)); + else + intr->mask = intr->mask_original; } + PMD_DRV_LOG(DEBUG, "enable intr immediately"); + ixgbe_enable_intr(dev); + rte_intr_enable(intr_handle); return 0; } @@ -3503,12 +3858,16 @@ static void ixgbe_dev_interrupt_delayed_handler(void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ixgbe_interrupt *intr = IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private); struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t eicr; + ixgbe_disable_intr(hw); + eicr = IXGBE_READ_REG(hw, IXGBE_EICR); if (eicr & IXGBE_EICR_MAILBOX) ixgbe_pf_mbx_process(dev); @@ -3522,12 +3881,22 @@ ixgbe_dev_interrupt_delayed_handler(void *param) ixgbe_dev_link_update(dev, 0); intr->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; ixgbe_dev_link_status_print(dev); - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); + } + + if (intr->flags & IXGBE_FLAG_MACSEC) { + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_MACSEC, + NULL); + intr->flags &= ~IXGBE_FLAG_MACSEC; } + /* restore original mask */ + intr->mask = intr->mask_original; + intr->mask_original = 0; + PMD_DRV_LOG(DEBUG, "enable intr in delayed handler S[%08x]", eicr); ixgbe_enable_intr(dev); - rte_intr_enable(&(dev->pci_dev->intr_handle)); + rte_intr_enable(intr_handle); } /** @@ -3543,13 +3912,13 @@ ixgbe_dev_interrupt_delayed_handler(void *param) * void */ static void -ixgbe_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle, +ixgbe_dev_interrupt_handler(struct rte_intr_handle *handle, void *param) { struct rte_eth_dev *dev = (struct rte_eth_dev *)param; ixgbe_dev_interrupt_get_status(dev); - ixgbe_dev_interrupt_action(dev); + ixgbe_dev_interrupt_action(dev, handle); } static int @@ -3913,7 +4282,7 @@ ixgbe_dev_rss_reta_update(struct rte_eth_dev *dev, if (reta_size != sp_reta_size) { PMD_DRV_LOG(ERR, "The size of hash lookup table configured " "(%d) doesn't match the number hardware can supported " - "(%d)\n", reta_size, sp_reta_size); + "(%d)", reta_size, sp_reta_size); return -EINVAL; } @@ -3960,7 +4329,7 @@ ixgbe_dev_rss_reta_query(struct rte_eth_dev *dev, if (reta_size != sp_reta_size) { PMD_DRV_LOG(ERR, "The size of hash lookup table configured " "(%d) doesn't match the number hardware can supported " - "(%d)\n", reta_size, sp_reta_size); + "(%d)", reta_size, sp_reta_size); return -EINVAL; } @@ -4012,6 +4381,53 @@ ixgbe_set_default_mac_addr(struct rte_eth_dev *dev, struct ether_addr *addr) } static int +is_ixgbe_pmd(const char *driver_name) +{ + if (!strstr(driver_name, "ixgbe")) + return -ENOTSUP; + + if (strstr(driver_name, "ixgbe_vf")) + return -ENOTSUP; + + return 0; +} + +int +rte_pmd_ixgbe_set_vf_mac_addr(uint8_t port, uint16_t vf, + struct ether_addr *mac_addr) +{ + struct ixgbe_hw *hw; + struct ixgbe_vf_info *vfinfo; + int rar_entry; + uint8_t *new_mac = (uint8_t *)(mac_addr); + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (vf >= dev_info.max_vfs) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + vfinfo = *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private)); + rar_entry = hw->mac.num_rar_entries - (vf + 1); + + if (is_valid_assigned_ether_addr((struct ether_addr *)new_mac)) { + rte_memcpy(vfinfo[vf].vf_mac_addresses, new_mac, + ETHER_ADDR_LEN); + return hw->mac.ops.set_rar(hw, rar_entry, new_mac, vf, + IXGBE_RAH_AV); + } + return -EINVAL; +} + +static int ixgbe_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) { uint32_t hlreg0; @@ -4127,7 +4543,8 @@ ixgbevf_dev_start(struct rte_eth_dev *dev) struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t intr_vector = 0; - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; int err, mask = 0; @@ -4172,7 +4589,7 @@ ixgbevf_dev_start(struct rte_eth_dev *dev) dev->data->nb_rx_queues * sizeof(int), 0); if (intr_handle->intr_vec == NULL) { PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues" - " intr_vec\n", dev->data->nb_rx_queues); + " intr_vec", dev->data->nb_rx_queues); return -ENOMEM; } } @@ -4190,7 +4607,8 @@ static void ixgbevf_dev_stop(struct rte_eth_dev *dev) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; PMD_INIT_FUNC_TRACE(); @@ -4332,14 +4750,14 @@ ixgbevf_vlan_offload_set(struct rte_eth_dev *dev, int mask) } static int -ixgbe_vmdq_mode_check(struct ixgbe_hw *hw) +ixgbe_vt_check(struct ixgbe_hw *hw) { uint32_t reg_val; - /* we only need to do this if VMDq is enabled */ + /* if Virtualization Technology is enabled */ reg_val = IXGBE_READ_REG(hw, IXGBE_VT_CTL); if (!(reg_val & IXGBE_VT_CTL_VT_ENABLE)) { - PMD_INIT_LOG(ERR, "VMDq must be enabled for this setting"); + PMD_INIT_LOG(ERR, "VT must be enabled for this setting"); return -1; } @@ -4477,22 +4895,274 @@ ixgbe_convert_vm_rx_mask_to_val(uint16_t rx_mask, uint32_t orig_val) return new_val; } -static int -ixgbe_set_pool_rx_mode(struct rte_eth_dev *dev, uint16_t pool, - uint16_t rx_mask, uint8_t on) + +int +rte_pmd_ixgbe_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf, uint8_t on) +{ + struct ixgbe_hw *hw; + struct ixgbe_mac_info *mac; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (vf >= dev_info.max_vfs) + return -EINVAL; + + if (on > 1) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + mac = &hw->mac; + + mac->ops.set_vlan_anti_spoofing(hw, on, vf); + + return 0; +} + +int +rte_pmd_ixgbe_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf, uint8_t on) +{ + struct ixgbe_hw *hw; + struct ixgbe_mac_info *mac; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (vf >= dev_info.max_vfs) + return -EINVAL; + + if (on > 1) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + mac = &hw->mac; + mac->ops.set_mac_anti_spoofing(hw, on, vf); + + return 0; +} + +int +rte_pmd_ixgbe_set_vf_vlan_insert(uint8_t port, uint16_t vf, uint16_t vlan_id) +{ + struct ixgbe_hw *hw; + uint32_t ctrl; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (vf >= dev_info.max_vfs) + return -EINVAL; + + if (vlan_id > ETHER_MAX_VLAN_ID) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + ctrl = IXGBE_READ_REG(hw, IXGBE_VMVIR(vf)); + if (vlan_id) { + ctrl = vlan_id; + ctrl |= IXGBE_VMVIR_VLANA_DEFAULT; + } else { + ctrl = 0; + } + + IXGBE_WRITE_REG(hw, IXGBE_VMVIR(vf), ctrl); + + return 0; +} + +int +rte_pmd_ixgbe_set_tx_loopback(uint8_t port, uint8_t on) +{ + struct ixgbe_hw *hw; + uint32_t ctrl; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (on > 1) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + ctrl = IXGBE_READ_REG(hw, IXGBE_PFDTXGSWC); + /* enable or disable VMDQ loopback */ + if (on) + ctrl |= IXGBE_PFDTXGSWC_VT_LBEN; + else + ctrl &= ~IXGBE_PFDTXGSWC_VT_LBEN; + + IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, ctrl); + + return 0; +} + +int +rte_pmd_ixgbe_set_all_queues_drop_en(uint8_t port, uint8_t on) +{ + struct ixgbe_hw *hw; + uint32_t reg_value; + int i; + int num_queues = (int)(IXGBE_QDE_IDX_MASK >> IXGBE_QDE_IDX_SHIFT); + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (on > 1) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + for (i = 0; i <= num_queues; i++) { + reg_value = IXGBE_QDE_WRITE | + (i << IXGBE_QDE_IDX_SHIFT) | + (on & IXGBE_QDE_ENABLE); + IXGBE_WRITE_REG(hw, IXGBE_QDE, reg_value); + } + + return 0; +} + +int +rte_pmd_ixgbe_set_vf_split_drop_en(uint8_t port, uint16_t vf, uint8_t on) +{ + struct ixgbe_hw *hw; + uint32_t reg_value; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + /* only support VF's 0 to 63 */ + if ((vf >= dev_info.max_vfs) || (vf > 63)) + return -EINVAL; + + if (on > 1) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + reg_value = IXGBE_READ_REG(hw, IXGBE_SRRCTL(vf)); + if (on) + reg_value |= IXGBE_SRRCTL_DROP_EN; + else + reg_value &= ~IXGBE_SRRCTL_DROP_EN; + + IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(vf), reg_value); + + return 0; +} + +int +rte_pmd_ixgbe_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + uint16_t queues_per_pool; + uint32_t q; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (vf >= dev_info.max_vfs) + return -EINVAL; + + if (on > 1) + return -EINVAL; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_strip_queue_set, -ENOTSUP); + + /* The PF has 128 queue pairs and in SRIOV configuration + * those queues will be assigned to VF's, so RXDCTL + * registers will be dealing with queues which will be + * assigned to VF's. + * Let's say we have SRIOV configured with 31 VF's then the + * first 124 queues 0-123 will be allocated to VF's and only + * the last 4 queues 123-127 will be assigned to the PF. + */ + + queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools; + + for (q = 0; q < queues_per_pool; q++) + (*dev->dev_ops->vlan_strip_queue_set)(dev, + q + vf * queues_per_pool, on); + return 0; +} + +int +rte_pmd_ixgbe_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mask, uint8_t on) { int val = 0; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct ixgbe_hw *hw; + uint32_t vmolr; - struct ixgbe_hw *hw = - IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - uint32_t vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(pool)); + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (vf >= dev_info.max_vfs) + return -EINVAL; + + if (on > 1) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + vmolr = IXGBE_READ_REG(hw, IXGBE_VMOLR(vf)); if (hw->mac.type == ixgbe_mac_82598EB) { PMD_INIT_LOG(ERR, "setting VF receive mode set should be done" " on 82599 hardware and newer"); return -ENOTSUP; } - if (ixgbe_vmdq_mode_check(hw) < 0) + if (ixgbe_vt_check(hw) < 0) return -ENOTSUP; val = ixgbe_convert_vm_rx_mask_to_val(rx_mask, val); @@ -4502,34 +5172,47 @@ ixgbe_set_pool_rx_mode(struct rte_eth_dev *dev, uint16_t pool, else vmolr &= ~val; - IXGBE_WRITE_REG(hw, IXGBE_VMOLR(pool), vmolr); + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(vf), vmolr); return 0; } -static int -ixgbe_set_pool_rx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on) +int +rte_pmd_ixgbe_set_vf_rx(uint8_t port, uint16_t vf, uint8_t on) { + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; uint32_t reg, addr; uint32_t val; const uint8_t bit1 = 0x1; + struct ixgbe_hw *hw; - struct ixgbe_hw *hw = - IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); - if (ixgbe_vmdq_mode_check(hw) < 0) + if (is_ixgbe_pmd(dev_info.driver_name) != 0) return -ENOTSUP; - if (pool >= ETH_64_POOLS) + if (vf >= dev_info.max_vfs) + return -EINVAL; + + if (on > 1) return -EINVAL; - /* for pool >= 32, set bit in PFVFRE[1], otherwise PFVFRE[0] */ - if (pool >= 32) { + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + if (ixgbe_vt_check(hw) < 0) + return -ENOTSUP; + + /* for vf >= 32, set bit in PFVFRE[1], otherwise PFVFRE[0] */ + if (vf >= 32) { addr = IXGBE_VFRE(1); - val = bit1 << (pool - 32); + val = bit1 << (vf - 32); } else { addr = IXGBE_VFRE(0); - val = bit1 << pool; + val = bit1 << vf; } reg = IXGBE_READ_REG(hw, addr); @@ -4544,29 +5227,42 @@ ixgbe_set_pool_rx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on) return 0; } -static int -ixgbe_set_pool_tx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on) +int +rte_pmd_ixgbe_set_vf_tx(uint8_t port, uint16_t vf, uint8_t on) { + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; uint32_t reg, addr; uint32_t val; const uint8_t bit1 = 0x1; - struct ixgbe_hw *hw = - IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_hw *hw; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); - if (ixgbe_vmdq_mode_check(hw) < 0) + if (is_ixgbe_pmd(dev_info.driver_name) != 0) return -ENOTSUP; - if (pool >= ETH_64_POOLS) + if (vf >= dev_info.max_vfs) return -EINVAL; - /* for pool >= 32, set bit in PFVFTE[1], otherwise PFVFTE[0] */ - if (pool >= 32) { + if (on > 1) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + if (ixgbe_vt_check(hw) < 0) + return -ENOTSUP; + + /* for vf >= 32, set bit in PFVFTE[1], otherwise PFVFTE[0] */ + if (vf >= 32) { addr = IXGBE_VFTE(1); - val = bit1 << (pool - 32); + val = bit1 << (vf - 32); } else { addr = IXGBE_VFTE(0); - val = bit1 << pool; + val = bit1 << vf; } reg = IXGBE_READ_REG(hw, addr); @@ -4581,20 +5277,34 @@ ixgbe_set_pool_tx(struct rte_eth_dev *dev, uint16_t pool, uint8_t on) return 0; } -static int -ixgbe_set_pool_vlan_filter(struct rte_eth_dev *dev, uint16_t vlan, - uint64_t pool_mask, uint8_t vlan_on) +int +rte_pmd_ixgbe_set_vf_vlan_filter(uint8_t port, uint16_t vlan, + uint64_t vf_mask, uint8_t vlan_on) { + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; int ret = 0; - uint16_t pool_idx; - struct ixgbe_hw *hw = - IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + uint16_t vf_idx; + struct ixgbe_hw *hw; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if ((vlan > ETHER_MAX_VLAN_ID) || (vf_mask == 0)) + return -EINVAL; - if (ixgbe_vmdq_mode_check(hw) < 0) + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + if (ixgbe_vt_check(hw) < 0) return -ENOTSUP; - for (pool_idx = 0; pool_idx < ETH_64_POOLS; pool_idx++) { - if (pool_mask & ((uint64_t)(1ULL << pool_idx))) { - ret = hw->mac.ops.set_vfta(hw, vlan, pool_idx, + + for (vf_idx = 0; vf_idx < 64; vf_idx++) { + if (vf_mask & ((uint64_t)(1ULL << vf_idx))) { + ret = hw->mac.ops.set_vfta(hw, vlan, vf_idx, vlan_on, false); if (ret < 0) return ret; @@ -4604,6 +5314,88 @@ ixgbe_set_pool_vlan_filter(struct rte_eth_dev *dev, uint16_t vlan, return ret; } +int rte_pmd_ixgbe_set_vf_rate_limit(uint8_t port, uint16_t vf, + uint16_t tx_rate, uint64_t q_msk) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct ixgbe_hw *hw; + struct ixgbe_vf_info *vfinfo; + struct rte_eth_link link; + uint8_t nb_q_per_pool; + uint32_t queue_stride; + uint32_t queue_idx, idx = 0, vf_idx; + uint32_t queue_end; + uint16_t total_rate = 0; + struct rte_pci_device *pci_dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + dev = &rte_eth_devices[port]; + rte_eth_dev_info_get(port, &dev_info); + rte_eth_link_get_nowait(port, &link); + + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + if (vf >= dev_info.max_vfs) + return -EINVAL; + + if (tx_rate > link.link_speed) + return -EINVAL; + + if (q_msk == 0) + return 0; + + pci_dev = IXGBE_DEV_TO_PCI(dev); + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + vfinfo = *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private)); + nb_q_per_pool = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool; + queue_stride = IXGBE_MAX_RX_QUEUE_NUM / RTE_ETH_DEV_SRIOV(dev).active; + queue_idx = vf * queue_stride; + queue_end = queue_idx + nb_q_per_pool - 1; + if (queue_end >= hw->mac.max_tx_queues) + return -EINVAL; + + if (vfinfo) { + for (vf_idx = 0; vf_idx < pci_dev->max_vfs; vf_idx++) { + if (vf_idx == vf) + continue; + for (idx = 0; idx < RTE_DIM(vfinfo[vf_idx].tx_rate); + idx++) + total_rate += vfinfo[vf_idx].tx_rate[idx]; + } + } else { + return -EINVAL; + } + + /* Store tx_rate for this vf. */ + for (idx = 0; idx < nb_q_per_pool; idx++) { + if (((uint64_t)0x1 << idx) & q_msk) { + if (vfinfo[vf].tx_rate[idx] != tx_rate) + vfinfo[vf].tx_rate[idx] = tx_rate; + total_rate += tx_rate; + } + } + + if (total_rate > dev->data->dev_link.link_speed) { + /* Reset stored TX rate of the VF if it causes exceed + * link speed. + */ + memset(vfinfo[vf].tx_rate, 0, sizeof(vfinfo[vf].tx_rate)); + return -EINVAL; + } + + /* Set RTTBCNRC of each queue/pool for vf X */ + for (; queue_idx <= queue_end; queue_idx++) { + if (0x1 & q_msk) + ixgbe_set_queue_rate_limit(dev, queue_idx, tx_rate); + q_msk = q_msk >> 1; + } + + return 0; +} + #define IXGBE_MRCTL_VPME 0x01 /* Virtual Pool Mirroring. */ #define IXGBE_MRCTL_UPME 0x02 /* Uplink Port Mirroring. */ #define IXGBE_MRCTL_DPME 0x04 /* Downlink Port Mirroring. */ @@ -4638,7 +5430,7 @@ ixgbe_mirror_rule_set(struct rte_eth_dev *dev, IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint8_t mirror_type = 0; - if (ixgbe_vmdq_mode_check(hw) < 0) + if (ixgbe_vt_check(hw) < 0) return -ENOTSUP; if (rule_id >= IXGBE_MAX_MIRROR_RULES) @@ -4759,7 +5551,7 @@ ixgbe_mirror_rule_reset(struct rte_eth_dev *dev, uint8_t rule_id) struct ixgbe_mirror_info *mr_info = (IXGBE_DEV_PRIVATE_TO_PFDATA(dev->data->dev_private)); - if (ixgbe_vmdq_mode_check(hw) < 0) + if (ixgbe_vt_check(hw) < 0) return -ENOTSUP; memset(&mr_info->mr_conf[rule_id], 0, @@ -4782,6 +5574,8 @@ ixgbe_mirror_rule_reset(struct rte_eth_dev *dev, uint8_t rule_id) static int ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t mask; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); @@ -4791,7 +5585,7 @@ ixgbevf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) RTE_SET_USED(queue_id); IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, mask); - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(intr_handle); return 0; } @@ -4814,6 +5608,8 @@ ixgbevf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) static int ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; uint32_t mask; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); @@ -4833,7 +5629,7 @@ ixgbe_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) mask &= (1 << (queue_id - 32)); IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); } - rte_intr_enable(&dev->pci_dev->intr_handle); + rte_intr_enable(intr_handle); return 0; } @@ -4937,7 +5733,8 @@ ixgbe_set_ivar_map(struct ixgbe_hw *hw, int8_t direction, static void ixgbevf_configure_msix(struct rte_eth_dev *dev) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t q_idx; @@ -4970,7 +5767,8 @@ ixgbevf_configure_msix(struct rte_eth_dev *dev) static void ixgbe_configure_msix(struct rte_eth_dev *dev) { - struct rte_intr_handle *intr_handle = &dev->pci_dev->intr_handle; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); + struct rte_intr_handle *intr_handle = &pci_dev->intr_handle; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t queue_id, base = IXGBE_MISC_VEC_ID; @@ -5085,61 +5883,6 @@ static int ixgbe_set_queue_rate_limit(struct rte_eth_dev *dev, return 0; } -static int ixgbe_set_vf_rate_limit(struct rte_eth_dev *dev, uint16_t vf, - uint16_t tx_rate, uint64_t q_msk) -{ - struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - struct ixgbe_vf_info *vfinfo = - *(IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private)); - uint8_t nb_q_per_pool = RTE_ETH_DEV_SRIOV(dev).nb_q_per_pool; - uint32_t queue_stride = - IXGBE_MAX_RX_QUEUE_NUM / RTE_ETH_DEV_SRIOV(dev).active; - uint32_t queue_idx = vf * queue_stride, idx = 0, vf_idx; - uint32_t queue_end = queue_idx + nb_q_per_pool - 1; - uint16_t total_rate = 0; - - if (queue_end >= hw->mac.max_tx_queues) - return -EINVAL; - - if (vfinfo != NULL) { - for (vf_idx = 0; vf_idx < dev->pci_dev->max_vfs; vf_idx++) { - if (vf_idx == vf) - continue; - for (idx = 0; idx < RTE_DIM(vfinfo[vf_idx].tx_rate); - idx++) - total_rate += vfinfo[vf_idx].tx_rate[idx]; - } - } else - return -EINVAL; - - /* Store tx_rate for this vf. */ - for (idx = 0; idx < nb_q_per_pool; idx++) { - if (((uint64_t)0x1 << idx) & q_msk) { - if (vfinfo[vf].tx_rate[idx] != tx_rate) - vfinfo[vf].tx_rate[idx] = tx_rate; - total_rate += tx_rate; - } - } - - if (total_rate > dev->data->dev_link.link_speed) { - /* - * Reset stored TX rate of the VF if it causes exceed - * link speed. - */ - memset(vfinfo[vf].tx_rate, 0, sizeof(vfinfo[vf].tx_rate)); - return -EINVAL; - } - - /* Set RTTBCNRC of each queue/pool for vf X */ - for (; queue_idx <= queue_end; queue_idx++) { - if (0x1 & q_msk) - ixgbe_set_queue_rate_limit(dev, queue_idx, tx_rate); - q_msk = q_msk >> 1; - } - - return 0; -} - static void ixgbevf_add_mac_addr(struct rte_eth_dev *dev, struct ether_addr *mac_addr, __attribute__((unused)) uint32_t index, @@ -5224,21 +5967,24 @@ ixgbevf_set_default_mac_addr(struct rte_eth_dev *dev, struct ether_addr *addr) return -ENOTSUP;\ } while (0) -static int +int ixgbe_syn_filter_set(struct rte_eth_dev *dev, struct rte_eth_syn_filter *filter, bool add) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + uint32_t syn_info; uint32_t synqf; if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM) return -EINVAL; - synqf = IXGBE_READ_REG(hw, IXGBE_SYNQF); + syn_info = filter_info->syn_info; if (add) { - if (synqf & IXGBE_SYN_FILTER_ENABLE) + if (syn_info & IXGBE_SYN_FILTER_ENABLE) return -EINVAL; synqf = (uint32_t)(((filter->queue << IXGBE_SYN_FILTER_QUEUE_SHIFT) & IXGBE_SYN_FILTER_QUEUE) | IXGBE_SYN_FILTER_ENABLE); @@ -5248,10 +5994,13 @@ ixgbe_syn_filter_set(struct rte_eth_dev *dev, else synqf &= ~IXGBE_SYN_FILTER_SYNQFP; } else { - if (!(synqf & IXGBE_SYN_FILTER_ENABLE)) + synqf = IXGBE_READ_REG(hw, IXGBE_SYNQF); + if (!(syn_info & IXGBE_SYN_FILTER_ENABLE)) return -ENOENT; synqf &= ~(IXGBE_SYN_FILTER_QUEUE | IXGBE_SYN_FILTER_ENABLE); } + + filter_info->syn_info = synqf; IXGBE_WRITE_REG(hw, IXGBE_SYNQF, synqf); IXGBE_WRITE_FLUSH(hw); return 0; @@ -5307,7 +6056,7 @@ ixgbe_syn_filter_handle(struct rte_eth_dev *dev, (struct rte_eth_syn_filter *)arg); break; default: - PMD_DRV_LOG(ERR, "unsupported operation %u\n", filter_op); + PMD_DRV_LOG(ERR, "unsupported operation %u", filter_op); ret = -EINVAL; break; } @@ -5329,6 +6078,52 @@ convert_protocol_type(uint8_t protocol_value) return IXGBE_FILTER_PROTOCOL_NONE; } +/* inject a 5-tuple filter to HW */ +static inline void +ixgbe_inject_5tuple_filter(struct rte_eth_dev *dev, + struct ixgbe_5tuple_filter *filter) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + int i; + uint32_t ftqf, sdpqf; + uint32_t l34timir = 0; + uint8_t mask = 0xff; + + i = filter->index; + + sdpqf = (uint32_t)(filter->filter_info.dst_port << + IXGBE_SDPQF_DSTPORT_SHIFT); + sdpqf = sdpqf | (filter->filter_info.src_port & IXGBE_SDPQF_SRCPORT); + + ftqf = (uint32_t)(filter->filter_info.proto & + IXGBE_FTQF_PROTOCOL_MASK); + ftqf |= (uint32_t)((filter->filter_info.priority & + IXGBE_FTQF_PRIORITY_MASK) << IXGBE_FTQF_PRIORITY_SHIFT); + if (filter->filter_info.src_ip_mask == 0) /* 0 means compare. */ + mask &= IXGBE_FTQF_SOURCE_ADDR_MASK; + if (filter->filter_info.dst_ip_mask == 0) + mask &= IXGBE_FTQF_DEST_ADDR_MASK; + if (filter->filter_info.src_port_mask == 0) + mask &= IXGBE_FTQF_SOURCE_PORT_MASK; + if (filter->filter_info.dst_port_mask == 0) + mask &= IXGBE_FTQF_DEST_PORT_MASK; + if (filter->filter_info.proto_mask == 0) + mask &= IXGBE_FTQF_PROTOCOL_COMP_MASK; + ftqf |= mask << IXGBE_FTQF_5TUPLE_MASK_SHIFT; + ftqf |= IXGBE_FTQF_POOL_MASK_EN; + ftqf |= IXGBE_FTQF_QUEUE_ENABLE; + + IXGBE_WRITE_REG(hw, IXGBE_DAQF(i), filter->filter_info.dst_ip); + IXGBE_WRITE_REG(hw, IXGBE_SAQF(i), filter->filter_info.src_ip); + IXGBE_WRITE_REG(hw, IXGBE_SDPQF(i), sdpqf); + IXGBE_WRITE_REG(hw, IXGBE_FTQF(i), ftqf); + + l34timir |= IXGBE_L34T_IMIR_RESERVE; + l34timir |= (uint32_t)(filter->queue << + IXGBE_L34T_IMIR_QUEUE_SHIFT); + IXGBE_WRITE_REG(hw, IXGBE_L34T_IMIR(i), l34timir); +} + /* * add a 5tuple filter * @@ -5346,13 +6141,9 @@ static int ixgbe_add_5tuple_filter(struct rte_eth_dev *dev, struct ixgbe_5tuple_filter *filter) { - struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_filter_info *filter_info = IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); int i, idx, shift; - uint32_t ftqf, sdpqf; - uint32_t l34timir = 0; - uint8_t mask = 0xff; /* * look for an unused 5tuple filter index, @@ -5375,37 +6166,8 @@ ixgbe_add_5tuple_filter(struct rte_eth_dev *dev, return -ENOSYS; } - sdpqf = (uint32_t)(filter->filter_info.dst_port << - IXGBE_SDPQF_DSTPORT_SHIFT); - sdpqf = sdpqf | (filter->filter_info.src_port & IXGBE_SDPQF_SRCPORT); + ixgbe_inject_5tuple_filter(dev, filter); - ftqf = (uint32_t)(filter->filter_info.proto & - IXGBE_FTQF_PROTOCOL_MASK); - ftqf |= (uint32_t)((filter->filter_info.priority & - IXGBE_FTQF_PRIORITY_MASK) << IXGBE_FTQF_PRIORITY_SHIFT); - if (filter->filter_info.src_ip_mask == 0) /* 0 means compare. */ - mask &= IXGBE_FTQF_SOURCE_ADDR_MASK; - if (filter->filter_info.dst_ip_mask == 0) - mask &= IXGBE_FTQF_DEST_ADDR_MASK; - if (filter->filter_info.src_port_mask == 0) - mask &= IXGBE_FTQF_SOURCE_PORT_MASK; - if (filter->filter_info.dst_port_mask == 0) - mask &= IXGBE_FTQF_DEST_PORT_MASK; - if (filter->filter_info.proto_mask == 0) - mask &= IXGBE_FTQF_PROTOCOL_COMP_MASK; - ftqf |= mask << IXGBE_FTQF_5TUPLE_MASK_SHIFT; - ftqf |= IXGBE_FTQF_POOL_MASK_EN; - ftqf |= IXGBE_FTQF_QUEUE_ENABLE; - - IXGBE_WRITE_REG(hw, IXGBE_DAQF(i), filter->filter_info.dst_ip); - IXGBE_WRITE_REG(hw, IXGBE_SAQF(i), filter->filter_info.src_ip); - IXGBE_WRITE_REG(hw, IXGBE_SDPQF(i), sdpqf); - IXGBE_WRITE_REG(hw, IXGBE_FTQF(i), ftqf); - - l34timir |= IXGBE_L34T_IMIR_RESERVE; - l34timir |= (uint32_t)(filter->queue << - IXGBE_L34T_IMIR_QUEUE_SHIFT); - IXGBE_WRITE_REG(hw, IXGBE_L34T_IMIR(i), l34timir); return 0; } @@ -5584,7 +6346,7 @@ ntuple_filter_to_5tuple(struct rte_eth_ntuple_filter *filter, * - On success, zero. * - On failure, a negative value. */ -static int +int ixgbe_add_del_ntuple_filter(struct rte_eth_dev *dev, struct rte_eth_ntuple_filter *ntuple_filter, bool add) @@ -5729,48 +6491,7 @@ ixgbe_ntuple_filter_handle(struct rte_eth_dev *dev, return ret; } -static inline int -ixgbe_ethertype_filter_lookup(struct ixgbe_filter_info *filter_info, - uint16_t ethertype) -{ - int i; - - for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) { - if (filter_info->ethertype_filters[i] == ethertype && - (filter_info->ethertype_mask & (1 << i))) - return i; - } - return -1; -} - -static inline int -ixgbe_ethertype_filter_insert(struct ixgbe_filter_info *filter_info, - uint16_t ethertype) -{ - int i; - - for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) { - if (!(filter_info->ethertype_mask & (1 << i))) { - filter_info->ethertype_mask |= 1 << i; - filter_info->ethertype_filters[i] = ethertype; - return i; - } - } - return -1; -} - -static inline int -ixgbe_ethertype_filter_remove(struct ixgbe_filter_info *filter_info, - uint8_t idx) -{ - if (idx >= IXGBE_MAX_ETQF_FILTERS) - return -1; - filter_info->ethertype_mask &= ~(1 << idx); - filter_info->ethertype_filters[idx] = 0; - return idx; -} - -static int +int ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev, struct rte_eth_ethertype_filter *filter, bool add) @@ -5781,20 +6502,17 @@ ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev, uint32_t etqf = 0; uint32_t etqs = 0; int ret; + struct ixgbe_ethertype_filter ethertype_filter; if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM) return -EINVAL; -#define TREX_PATCH -#ifndef TREX_PATCH - // no real reason to block this. - // We configure rules using FDIR and ethertype that point to same queue, so there are no race condition issues. + if (filter->ether_type == ETHER_TYPE_IPv4 || filter->ether_type == ETHER_TYPE_IPv6) { PMD_DRV_LOG(ERR, "unsupported ether_type(0x%04x) in" " ethertype filter.", filter->ether_type); return -EINVAL; } -#endif if (filter->flags & RTE_ETHTYPE_FLAGS_MAC) { PMD_DRV_LOG(ERR, "mac compare is unsupported."); @@ -5818,18 +6536,23 @@ ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev, } if (add) { - ret = ixgbe_ethertype_filter_insert(filter_info, - filter->ether_type); - if (ret < 0) { - PMD_DRV_LOG(ERR, "ethertype filters are full."); - return -ENOSYS; - } etqf = IXGBE_ETQF_FILTER_EN; etqf |= (uint32_t)filter->ether_type; etqs |= (uint32_t)((filter->queue << IXGBE_ETQS_RX_QUEUE_SHIFT) & IXGBE_ETQS_RX_QUEUE); etqs |= IXGBE_ETQS_QUEUE_EN; + + ethertype_filter.ethertype = filter->ether_type; + ethertype_filter.etqf = etqf; + ethertype_filter.etqs = etqs; + ethertype_filter.conf = FALSE; + ret = ixgbe_ethertype_filter_insert(filter_info, + ðertype_filter); + if (ret < 0) { + PMD_DRV_LOG(ERR, "ethertype filters are full."); + return -ENOSPC; + } } else { ret = ixgbe_ethertype_filter_remove(filter_info, (uint8_t)ret); if (ret < 0) @@ -5925,7 +6648,7 @@ ixgbe_dev_filter_ctrl(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg) { - int ret = -EINVAL; + int ret = 0; switch (filter_type) { case RTE_ETH_FILTER_NTUPLE: @@ -5943,9 +6666,15 @@ ixgbe_dev_filter_ctrl(struct rte_eth_dev *dev, case RTE_ETH_FILTER_L2_TUNNEL: ret = ixgbe_dev_l2_tunnel_filter_handle(dev, filter_op, arg); break; + case RTE_ETH_FILTER_GENERIC: + if (filter_op != RTE_ETH_FILTER_GET) + return -EINVAL; + *(const void **)arg = &ixgbe_flow_ops; + break; default: PMD_DRV_LOG(WARNING, "Filter type (%d) not supported", filter_type); + ret = -EINVAL; break; } @@ -6593,12 +7322,15 @@ ixgbe_dev_l2_tunnel_eth_type_conf(struct rte_eth_dev *dev, { int ret = 0; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); if (l2_tunnel == NULL) return -EINVAL; switch (l2_tunnel->l2_tunnel_type) { case RTE_L2_TUNNEL_TYPE_E_TAG: + l2_tn_info->e_tag_ether_type = l2_tunnel->ether_type; ret = ixgbe_update_e_tag_eth_type(hw, l2_tunnel->ether_type); break; default: @@ -6637,9 +7369,12 @@ ixgbe_dev_l2_tunnel_enable(struct rte_eth_dev *dev, { int ret = 0; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); switch (l2_tunnel_type) { case RTE_L2_TUNNEL_TYPE_E_TAG: + l2_tn_info->e_tag_en = TRUE; ret = ixgbe_e_tag_enable(hw); break; default: @@ -6678,9 +7413,12 @@ ixgbe_dev_l2_tunnel_disable(struct rte_eth_dev *dev, { int ret = 0; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); switch (l2_tunnel_type) { case RTE_L2_TUNNEL_TYPE_E_TAG: + l2_tn_info->e_tag_en = FALSE; ret = ixgbe_e_tag_disable(hw); break; default: @@ -6769,12 +7507,108 @@ ixgbe_e_tag_filter_add(struct rte_eth_dev *dev, return -EINVAL; } +static inline struct ixgbe_l2_tn_filter * +ixgbe_l2_tn_filter_lookup(struct ixgbe_l2_tn_info *l2_tn_info, + struct ixgbe_l2_tn_key *key) +{ + int ret; + + ret = rte_hash_lookup(l2_tn_info->hash_handle, (const void *)key); + if (ret < 0) + return NULL; + + return l2_tn_info->hash_map[ret]; +} + +static inline int +ixgbe_insert_l2_tn_filter(struct ixgbe_l2_tn_info *l2_tn_info, + struct ixgbe_l2_tn_filter *l2_tn_filter) +{ + int ret; + + ret = rte_hash_add_key(l2_tn_info->hash_handle, + &l2_tn_filter->key); + + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to insert L2 tunnel filter" + " to hash table %d!", + ret); + return ret; + } + + l2_tn_info->hash_map[ret] = l2_tn_filter; + + TAILQ_INSERT_TAIL(&l2_tn_info->l2_tn_list, l2_tn_filter, entries); + + return 0; +} + +static inline int +ixgbe_remove_l2_tn_filter(struct ixgbe_l2_tn_info *l2_tn_info, + struct ixgbe_l2_tn_key *key) +{ + int ret; + struct ixgbe_l2_tn_filter *l2_tn_filter; + + ret = rte_hash_del_key(l2_tn_info->hash_handle, key); + + if (ret < 0) { + PMD_DRV_LOG(ERR, + "No such L2 tunnel filter to delete %d!", + ret); + return ret; + } + + l2_tn_filter = l2_tn_info->hash_map[ret]; + l2_tn_info->hash_map[ret] = NULL; + + TAILQ_REMOVE(&l2_tn_info->l2_tn_list, l2_tn_filter, entries); + rte_free(l2_tn_filter); + + return 0; +} + /* Add l2 tunnel filter */ -static int +int ixgbe_dev_l2_tunnel_filter_add(struct rte_eth_dev *dev, - struct rte_eth_l2_tunnel_conf *l2_tunnel) + struct rte_eth_l2_tunnel_conf *l2_tunnel, + bool restore) { - int ret = 0; + int ret; + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); + struct ixgbe_l2_tn_key key; + struct ixgbe_l2_tn_filter *node; + + if (!restore) { + key.l2_tn_type = l2_tunnel->l2_tunnel_type; + key.tn_id = l2_tunnel->tunnel_id; + + node = ixgbe_l2_tn_filter_lookup(l2_tn_info, &key); + + if (node) { + PMD_DRV_LOG(ERR, + "The L2 tunnel filter already exists!"); + return -EINVAL; + } + + node = rte_zmalloc("ixgbe_l2_tn", + sizeof(struct ixgbe_l2_tn_filter), + 0); + if (!node) + return -ENOMEM; + + (void)rte_memcpy(&node->key, + &key, + sizeof(struct ixgbe_l2_tn_key)); + node->pool = l2_tunnel->pool; + ret = ixgbe_insert_l2_tn_filter(l2_tn_info, node); + if (ret < 0) { + rte_free(node); + return ret; + } + } switch (l2_tunnel->l2_tunnel_type) { case RTE_L2_TUNNEL_TYPE_E_TAG: @@ -6786,15 +7620,27 @@ ixgbe_dev_l2_tunnel_filter_add(struct rte_eth_dev *dev, break; } + if ((!restore) && (ret < 0)) + (void)ixgbe_remove_l2_tn_filter(l2_tn_info, &key); + return ret; } /* Delete l2 tunnel filter */ -static int +int ixgbe_dev_l2_tunnel_filter_del(struct rte_eth_dev *dev, struct rte_eth_l2_tunnel_conf *l2_tunnel) { - int ret = 0; + int ret; + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); + struct ixgbe_l2_tn_key key; + + key.l2_tn_type = l2_tunnel->l2_tunnel_type; + key.tn_id = l2_tunnel->tunnel_id; + ret = ixgbe_remove_l2_tn_filter(l2_tn_info, &key); + if (ret < 0) + return ret; switch (l2_tunnel->l2_tunnel_type) { case RTE_L2_TUNNEL_TYPE_E_TAG: @@ -6820,7 +7666,7 @@ ixgbe_dev_l2_tunnel_filter_handle(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg) { - int ret = 0; + int ret; if (filter_op == RTE_ETH_FILTER_NOP) return 0; @@ -6835,7 +7681,8 @@ ixgbe_dev_l2_tunnel_filter_handle(struct rte_eth_dev *dev, case RTE_ETH_FILTER_ADD: ret = ixgbe_dev_l2_tunnel_filter_add (dev, - (struct rte_eth_l2_tunnel_conf *)arg); + (struct rte_eth_l2_tunnel_conf *)arg, + FALSE); break; case RTE_ETH_FILTER_DELETE: ret = ixgbe_dev_l2_tunnel_filter_del @@ -6878,10 +7725,13 @@ ixgbe_dev_l2_tunnel_forwarding_enable (struct rte_eth_dev *dev, enum rte_eth_tunnel_type l2_tunnel_type) { + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); int ret = 0; switch (l2_tunnel_type) { case RTE_L2_TUNNEL_TYPE_E_TAG: + l2_tn_info->e_tag_fwd_en = TRUE; ret = ixgbe_e_tag_forwarding_en_dis(dev, 1); break; default: @@ -6899,10 +7749,13 @@ ixgbe_dev_l2_tunnel_forwarding_disable (struct rte_eth_dev *dev, enum rte_eth_tunnel_type l2_tunnel_type) { + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); int ret = 0; switch (l2_tunnel_type) { case RTE_L2_TUNNEL_TYPE_E_TAG: + l2_tn_info->e_tag_fwd_en = FALSE; ret = ixgbe_e_tag_forwarding_en_dis(dev, 0); break; default: @@ -6919,15 +7772,16 @@ ixgbe_e_tag_insertion_en_dis(struct rte_eth_dev *dev, struct rte_eth_l2_tunnel_conf *l2_tunnel, bool en) { + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(dev); int ret = 0; uint32_t vmtir, vmvir; struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - if (l2_tunnel->vf_id >= dev->pci_dev->max_vfs) { + if (l2_tunnel->vf_id >= pci_dev->max_vfs) { PMD_DRV_LOG(ERR, "VF id %u should be less than %u", l2_tunnel->vf_id, - dev->pci_dev->max_vfs); + pci_dev->max_vfs); return -EINVAL; } @@ -7240,51 +8094,12 @@ ixgbe_dev_udp_tunnel_port_del(struct rte_eth_dev *dev, return ret; } -/* ixgbevf_update_xcast_mode - Update Multicast mode - * @hw: pointer to the HW structure - * @netdev: pointer to net device structure - * @xcast_mode: new multicast mode - * - * Updates the Multicast Mode of VF. - */ -static int ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, - int xcast_mode) -{ - struct ixgbe_mbx_info *mbx = &hw->mbx; - u32 msgbuf[2]; - s32 err; - - switch (hw->api_version) { - case ixgbe_mbox_api_12: - break; - default: - return -EOPNOTSUPP; - } - - msgbuf[0] = IXGBE_VF_UPDATE_XCAST_MODE; - msgbuf[1] = xcast_mode; - - err = mbx->ops.write_posted(hw, msgbuf, 2, 0); - if (err) - return err; - - err = mbx->ops.read_posted(hw, msgbuf, 2, 0); - if (err) - return err; - - msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; - if (msgbuf[0] == (IXGBE_VF_UPDATE_XCAST_MODE | IXGBE_VT_MSGTYPE_NACK)) - return -EPERM; - - return 0; -} - static void ixgbevf_dev_allmulticast_enable(struct rte_eth_dev *dev) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - ixgbevf_update_xcast_mode(hw, IXGBEVF_XCAST_MODE_ALLMULTI); + hw->mac.ops.update_xcast_mode(hw, IXGBEVF_XCAST_MODE_ALLMULTI); } static void @@ -7292,7 +8107,7 @@ ixgbevf_dev_allmulticast_disable(struct rte_eth_dev *dev) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); - ixgbevf_update_xcast_mode(hw, IXGBEVF_XCAST_MODE_NONE); + hw->mac.ops.update_xcast_mode(hw, IXGBEVF_XCAST_MODE_NONE); } static void ixgbevf_mbx_process(struct rte_eth_dev *dev) @@ -7305,7 +8120,7 @@ static void ixgbevf_mbx_process(struct rte_eth_dev *dev) /* PF reset VF event */ if (in_msg == IXGBE_PF_CONTROL_MSG) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_RESET, NULL); } static int @@ -7356,17 +8171,529 @@ ixgbevf_dev_interrupt_handler(__rte_unused struct rte_intr_handle *handle, ixgbevf_dev_interrupt_action(dev); } -static struct rte_driver rte_ixgbe_driver = { - .type = PMD_PDEV, - .init = rte_ixgbe_pmd_init, -}; +/** + * ixgbe_disable_sec_tx_path_generic - Stops the transmit data path + * @hw: pointer to hardware structure + * + * Stops the transmit data path and waits for the HW to internally empty + * the Tx security block + **/ +int ixgbe_disable_sec_tx_path_generic(struct ixgbe_hw *hw) +{ +#define IXGBE_MAX_SECTX_POLL 40 -static struct rte_driver rte_ixgbevf_driver = { - .type = PMD_PDEV, - .init = rte_ixgbevf_pmd_init, -}; + int i; + int sectxreg; + + sectxreg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL); + sectxreg |= IXGBE_SECTXCTRL_TX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, sectxreg); + for (i = 0; i < IXGBE_MAX_SECTX_POLL; i++) { + sectxreg = IXGBE_READ_REG(hw, IXGBE_SECTXSTAT); + if (sectxreg & IXGBE_SECTXSTAT_SECTX_RDY) + break; + /* Use interrupt-safe sleep just in case */ + usec_delay(1000); + } + + /* For informational purposes only */ + if (i >= IXGBE_MAX_SECTX_POLL) + PMD_DRV_LOG(DEBUG, "Tx unit being enabled before security " + "path fully disabled. Continuing with init."); + + return IXGBE_SUCCESS; +} + +/** + * ixgbe_enable_sec_tx_path_generic - Enables the transmit data path + * @hw: pointer to hardware structure + * + * Enables the transmit data path. + **/ +int ixgbe_enable_sec_tx_path_generic(struct ixgbe_hw *hw) +{ + uint32_t sectxreg; + + sectxreg = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL); + sectxreg &= ~IXGBE_SECTXCTRL_TX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, sectxreg); + IXGBE_WRITE_FLUSH(hw); + + return IXGBE_SUCCESS; +} + +int +rte_pmd_ixgbe_macsec_enable(uint8_t port, uint8_t en, uint8_t rp) +{ + struct ixgbe_hw *hw; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + uint32_t ctrl; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + rte_eth_dev_info_get(port, &dev_info); + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + dev = &rte_eth_devices[port]; + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + /* Stop the data paths */ + if (ixgbe_disable_sec_rx_path(hw) != IXGBE_SUCCESS) + return -ENOTSUP; + /* + * Workaround: + * As no ixgbe_disable_sec_rx_path equivalent is + * implemented for tx in the base code, and we are + * not allowed to modify the base code in DPDK, so + * just call the hand-written one directly for now. + * The hardware support has been checked by + * ixgbe_disable_sec_rx_path(). + */ + ixgbe_disable_sec_tx_path_generic(hw); + + /* Enable Ethernet CRC (required by MACsec offload) */ + ctrl = IXGBE_READ_REG(hw, IXGBE_HLREG0); + ctrl |= IXGBE_HLREG0_TXCRCEN | IXGBE_HLREG0_RXCRCSTRP; + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, ctrl); + + /* Enable the TX and RX crypto engines */ + ctrl = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL); + ctrl &= ~IXGBE_SECTXCTRL_SECTX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, ctrl); + + ctrl = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL); + ctrl &= ~IXGBE_SECRXCTRL_SECRX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, ctrl); + + ctrl = IXGBE_READ_REG(hw, IXGBE_SECTXMINIFG); + ctrl &= ~IXGBE_SECTX_MINSECIFG_MASK; + ctrl |= 0x3; + IXGBE_WRITE_REG(hw, IXGBE_SECTXMINIFG, ctrl); + + /* Enable SA lookup */ + ctrl = IXGBE_READ_REG(hw, IXGBE_LSECTXCTRL); + ctrl &= ~IXGBE_LSECTXCTRL_EN_MASK; + ctrl |= en ? IXGBE_LSECTXCTRL_AUTH_ENCRYPT : + IXGBE_LSECTXCTRL_AUTH; + ctrl |= IXGBE_LSECTXCTRL_AISCI; + ctrl &= ~IXGBE_LSECTXCTRL_PNTHRSH_MASK; + ctrl |= IXGBE_MACSEC_PNTHRSH & IXGBE_LSECTXCTRL_PNTHRSH_MASK; + IXGBE_WRITE_REG(hw, IXGBE_LSECTXCTRL, ctrl); + + ctrl = IXGBE_READ_REG(hw, IXGBE_LSECRXCTRL); + ctrl &= ~IXGBE_LSECRXCTRL_EN_MASK; + ctrl |= IXGBE_LSECRXCTRL_STRICT << IXGBE_LSECRXCTRL_EN_SHIFT; + ctrl &= ~IXGBE_LSECRXCTRL_PLSH; + if (rp) + ctrl |= IXGBE_LSECRXCTRL_RP; + else + ctrl &= ~IXGBE_LSECRXCTRL_RP; + IXGBE_WRITE_REG(hw, IXGBE_LSECRXCTRL, ctrl); + + /* Start the data paths */ + ixgbe_enable_sec_rx_path(hw); + /* + * Workaround: + * As no ixgbe_enable_sec_rx_path equivalent is + * implemented for tx in the base code, and we are + * not allowed to modify the base code in DPDK, so + * just call the hand-written one directly for now. + */ + ixgbe_enable_sec_tx_path_generic(hw); + + return 0; +} + +int +rte_pmd_ixgbe_macsec_disable(uint8_t port) +{ + struct ixgbe_hw *hw; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + uint32_t ctrl; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + rte_eth_dev_info_get(port, &dev_info); + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + dev = &rte_eth_devices[port]; + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + /* Stop the data paths */ + if (ixgbe_disable_sec_rx_path(hw) != IXGBE_SUCCESS) + return -ENOTSUP; + /* + * Workaround: + * As no ixgbe_disable_sec_rx_path equivalent is + * implemented for tx in the base code, and we are + * not allowed to modify the base code in DPDK, so + * just call the hand-written one directly for now. + * The hardware support has been checked by + * ixgbe_disable_sec_rx_path(). + */ + ixgbe_disable_sec_tx_path_generic(hw); + + /* Disable the TX and RX crypto engines */ + ctrl = IXGBE_READ_REG(hw, IXGBE_SECTXCTRL); + ctrl |= IXGBE_SECTXCTRL_SECTX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECTXCTRL, ctrl); + + ctrl = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL); + ctrl |= IXGBE_SECRXCTRL_SECRX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, ctrl); + + /* Disable SA lookup */ + ctrl = IXGBE_READ_REG(hw, IXGBE_LSECTXCTRL); + ctrl &= ~IXGBE_LSECTXCTRL_EN_MASK; + ctrl |= IXGBE_LSECTXCTRL_DISABLE; + IXGBE_WRITE_REG(hw, IXGBE_LSECTXCTRL, ctrl); + + ctrl = IXGBE_READ_REG(hw, IXGBE_LSECRXCTRL); + ctrl &= ~IXGBE_LSECRXCTRL_EN_MASK; + ctrl |= IXGBE_LSECRXCTRL_DISABLE << IXGBE_LSECRXCTRL_EN_SHIFT; + IXGBE_WRITE_REG(hw, IXGBE_LSECRXCTRL, ctrl); + + /* Start the data paths */ + ixgbe_enable_sec_rx_path(hw); + /* + * Workaround: + * As no ixgbe_enable_sec_rx_path equivalent is + * implemented for tx in the base code, and we are + * not allowed to modify the base code in DPDK, so + * just call the hand-written one directly for now. + */ + ixgbe_enable_sec_tx_path_generic(hw); + + return 0; +} + +int +rte_pmd_ixgbe_macsec_config_txsc(uint8_t port, uint8_t *mac) +{ + struct ixgbe_hw *hw; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + uint32_t ctrl; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + rte_eth_dev_info_get(port, &dev_info); + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + dev = &rte_eth_devices[port]; + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + ctrl = mac[0] | (mac[1] << 8) | (mac[2] << 16) | (mac[3] << 24); + IXGBE_WRITE_REG(hw, IXGBE_LSECTXSCL, ctrl); + + ctrl = mac[4] | (mac[5] << 8); + IXGBE_WRITE_REG(hw, IXGBE_LSECTXSCH, ctrl); + + return 0; +} + +int +rte_pmd_ixgbe_macsec_config_rxsc(uint8_t port, uint8_t *mac, uint16_t pi) +{ + struct ixgbe_hw *hw; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + uint32_t ctrl; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + rte_eth_dev_info_get(port, &dev_info); + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + dev = &rte_eth_devices[port]; + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + ctrl = mac[0] | (mac[1] << 8) | (mac[2] << 16) | (mac[3] << 24); + IXGBE_WRITE_REG(hw, IXGBE_LSECRXSCL, ctrl); + + pi = rte_cpu_to_be_16(pi); + ctrl = mac[4] | (mac[5] << 8) | (pi << 16); + IXGBE_WRITE_REG(hw, IXGBE_LSECRXSCH, ctrl); + + return 0; +} + +int +rte_pmd_ixgbe_macsec_select_txsa(uint8_t port, uint8_t idx, uint8_t an, + uint32_t pn, uint8_t *key) +{ + struct ixgbe_hw *hw; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + uint32_t ctrl, i; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + rte_eth_dev_info_get(port, &dev_info); + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + dev = &rte_eth_devices[port]; + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + if (idx != 0 && idx != 1) + return -EINVAL; + + if (an >= 4) + return -EINVAL; + + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + /* Set the PN and key */ + pn = rte_cpu_to_be_32(pn); + if (idx == 0) { + IXGBE_WRITE_REG(hw, IXGBE_LSECTXPN0, pn); + + for (i = 0; i < 4; i++) { + ctrl = (key[i * 4 + 0] << 0) | + (key[i * 4 + 1] << 8) | + (key[i * 4 + 2] << 16) | + (key[i * 4 + 3] << 24); + IXGBE_WRITE_REG(hw, IXGBE_LSECTXKEY0(i), ctrl); + } + } else { + IXGBE_WRITE_REG(hw, IXGBE_LSECTXPN1, pn); + + for (i = 0; i < 4; i++) { + ctrl = (key[i * 4 + 0] << 0) | + (key[i * 4 + 1] << 8) | + (key[i * 4 + 2] << 16) | + (key[i * 4 + 3] << 24); + IXGBE_WRITE_REG(hw, IXGBE_LSECTXKEY1(i), ctrl); + } + } + + /* Set AN and select the SA */ + ctrl = (an << idx * 2) | (idx << 4); + IXGBE_WRITE_REG(hw, IXGBE_LSECTXSA, ctrl); + + return 0; +} + +int +rte_pmd_ixgbe_macsec_select_rxsa(uint8_t port, uint8_t idx, uint8_t an, + uint32_t pn, uint8_t *key) +{ + struct ixgbe_hw *hw; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + uint32_t ctrl, i; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port, -ENODEV); + + rte_eth_dev_info_get(port, &dev_info); + if (is_ixgbe_pmd(dev_info.driver_name) != 0) + return -ENOTSUP; + + dev = &rte_eth_devices[port]; + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + if (idx != 0 && idx != 1) + return -EINVAL; + + if (an >= 4) + return -EINVAL; + + /* Set the PN */ + pn = rte_cpu_to_be_32(pn); + IXGBE_WRITE_REG(hw, IXGBE_LSECRXPN(idx), pn); + + /* Set the key */ + for (i = 0; i < 4; i++) { + ctrl = (key[i * 4 + 0] << 0) | + (key[i * 4 + 1] << 8) | + (key[i * 4 + 2] << 16) | + (key[i * 4 + 3] << 24); + IXGBE_WRITE_REG(hw, IXGBE_LSECRXKEY(idx, i), ctrl); + } + + /* Set the AN and validate the SA */ + ctrl = an | (1 << 2); + IXGBE_WRITE_REG(hw, IXGBE_LSECRXSA(idx), ctrl); + + return 0; +} + +/* restore n-tuple filter */ +static inline void +ixgbe_ntuple_filter_restore(struct rte_eth_dev *dev) +{ + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + struct ixgbe_5tuple_filter *node; + + TAILQ_FOREACH(node, &filter_info->fivetuple_list, entries) { + ixgbe_inject_5tuple_filter(dev, node); + } +} + +/* restore ethernet type filter */ +static inline void +ixgbe_ethertype_filter_restore(struct rte_eth_dev *dev) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + int i; + + for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) { + if (filter_info->ethertype_mask & (1 << i)) { + IXGBE_WRITE_REG(hw, IXGBE_ETQF(i), + filter_info->ethertype_filters[i].etqf); + IXGBE_WRITE_REG(hw, IXGBE_ETQS(i), + filter_info->ethertype_filters[i].etqs); + IXGBE_WRITE_FLUSH(hw); + } + } +} + +/* restore SYN filter */ +static inline void +ixgbe_syn_filter_restore(struct rte_eth_dev *dev) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + uint32_t synqf; + + synqf = filter_info->syn_info; + + if (synqf & IXGBE_SYN_FILTER_ENABLE) { + IXGBE_WRITE_REG(hw, IXGBE_SYNQF, synqf); + IXGBE_WRITE_FLUSH(hw); + } +} + +/* restore L2 tunnel filter */ +static inline void +ixgbe_l2_tn_filter_restore(struct rte_eth_dev *dev) +{ + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); + struct ixgbe_l2_tn_filter *node; + struct rte_eth_l2_tunnel_conf l2_tn_conf; + + TAILQ_FOREACH(node, &l2_tn_info->l2_tn_list, entries) { + l2_tn_conf.l2_tunnel_type = node->key.l2_tn_type; + l2_tn_conf.tunnel_id = node->key.tn_id; + l2_tn_conf.pool = node->pool; + (void)ixgbe_dev_l2_tunnel_filter_add(dev, &l2_tn_conf, TRUE); + } +} + +static int +ixgbe_filter_restore(struct rte_eth_dev *dev) +{ + ixgbe_ntuple_filter_restore(dev); + ixgbe_ethertype_filter_restore(dev); + ixgbe_syn_filter_restore(dev); + ixgbe_fdir_filter_restore(dev); + ixgbe_l2_tn_filter_restore(dev); + + return 0; +} + +static void +ixgbe_l2_tunnel_conf(struct rte_eth_dev *dev) +{ + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + if (l2_tn_info->e_tag_en) + (void)ixgbe_e_tag_enable(hw); + + if (l2_tn_info->e_tag_fwd_en) + (void)ixgbe_e_tag_forwarding_en_dis(dev, 1); + + (void)ixgbe_update_e_tag_eth_type(hw, l2_tn_info->e_tag_ether_type); +} + +/* remove all the n-tuple filters */ +void +ixgbe_clear_all_ntuple_filter(struct rte_eth_dev *dev) +{ + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + struct ixgbe_5tuple_filter *p_5tuple; + + while ((p_5tuple = TAILQ_FIRST(&filter_info->fivetuple_list))) + ixgbe_remove_5tuple_filter(dev, p_5tuple); +} + +/* remove all the ether type filters */ +void +ixgbe_clear_all_ethertype_filter(struct rte_eth_dev *dev) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + int i; + + for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) { + if (filter_info->ethertype_mask & (1 << i) && + !filter_info->ethertype_filters[i].conf) { + (void)ixgbe_ethertype_filter_remove(filter_info, + (uint8_t)i); + IXGBE_WRITE_REG(hw, IXGBE_ETQF(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_ETQS(i), 0); + IXGBE_WRITE_FLUSH(hw); + } + } +} + +/* remove the SYN filter */ +void +ixgbe_clear_syn_filter(struct rte_eth_dev *dev) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_filter_info *filter_info = + IXGBE_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private); + + if (filter_info->syn_info & IXGBE_SYN_FILTER_ENABLE) { + filter_info->syn_info = 0; + + IXGBE_WRITE_REG(hw, IXGBE_SYNQF, 0); + IXGBE_WRITE_FLUSH(hw); + } +} + +/* remove all the L2 tunnel filters */ +int +ixgbe_clear_all_l2_tn_filter(struct rte_eth_dev *dev) +{ + struct ixgbe_l2_tn_info *l2_tn_info = + IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(dev->data->dev_private); + struct ixgbe_l2_tn_filter *l2_tn_filter; + struct rte_eth_l2_tunnel_conf l2_tn_conf; + int ret = 0; + + while ((l2_tn_filter = TAILQ_FIRST(&l2_tn_info->l2_tn_list))) { + l2_tn_conf.l2_tunnel_type = l2_tn_filter->key.l2_tn_type; + l2_tn_conf.tunnel_id = l2_tn_filter->key.tn_id; + l2_tn_conf.pool = l2_tn_filter->pool; + ret = ixgbe_dev_l2_tunnel_filter_del(dev, &l2_tn_conf); + if (ret < 0) + return ret; + } + + return 0; +} -PMD_REGISTER_DRIVER(rte_ixgbe_driver, ixgbe); -DRIVER_REGISTER_PCI_TABLE(ixgbe, pci_id_ixgbe_map); -PMD_REGISTER_DRIVER(rte_ixgbevf_driver, ixgbevf); -DRIVER_REGISTER_PCI_TABLE(ixgbevf, pci_id_ixgbevf_map); +RTE_PMD_REGISTER_PCI(net_ixgbe, rte_ixgbe_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_ixgbe, pci_id_ixgbe_map); +RTE_PMD_REGISTER_KMOD_DEP(net_ixgbe, "* igb_uio | uio_pci_generic | vfio"); +RTE_PMD_REGISTER_PCI(net_ixgbe_vf, rte_ixgbevf_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_ixgbe_vf, pci_id_ixgbevf_map); +RTE_PMD_REGISTER_KMOD_DEP(net_ixgbe_vf, "* igb_uio | vfio"); diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h b/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h index 4ff6338e..680d5d93 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_ethdev.h @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -38,11 +38,13 @@ #include "base/ixgbe_dcb_82598.h" #include "ixgbe_bypass.h" #include <rte_time.h> +#include <rte_hash.h> /* need update link, bit flag */ #define IXGBE_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0) #define IXGBE_FLAG_MAILBOX (uint32_t)(1 << 1) #define IXGBE_FLAG_PHY_INTERRUPT (uint32_t)(1 << 2) +#define IXGBE_FLAG_MACSEC (uint32_t)(1 << 3) /* * Defines that were not part of ixgbe_type.h as they are not used by the @@ -130,10 +132,16 @@ #define IXGBE_MISC_VEC_ID RTE_INTR_VEC_ZERO_OFFSET #define IXGBE_RX_VEC_START RTE_INTR_VEC_RXTX_OFFSET +#define IXGBE_SECTX_MINSECIFG_MASK 0x0000000F + +#define IXGBE_MACSEC_PNTHRSH 0xFFFFFE00 + +#define IXGBE_MAX_FDIR_FILTER_NUM (1024 * 32) +#define IXGBE_MAX_L2_TN_FILTER_NUM 128 + /* * Information about the fdir mode. */ - struct ixgbe_hw_fdir_mask { uint16_t vlan_tci_mask; uint32_t src_ipv4_mask; @@ -148,6 +156,28 @@ struct ixgbe_hw_fdir_mask { uint8_t tunnel_type_mask; }; +struct ixgbe_fdir_filter { + TAILQ_ENTRY(ixgbe_fdir_filter) entries; + union ixgbe_atr_input ixgbe_fdir; /* key of fdir filter*/ + uint32_t fdirflags; /* drop or forward */ + uint32_t fdirhash; /* hash value for fdir */ + uint8_t queue; /* assigned rx queue */ +}; + +/* list of fdir filters */ +TAILQ_HEAD(ixgbe_fdir_filter_list, ixgbe_fdir_filter); + +struct ixgbe_fdir_rule { + struct ixgbe_hw_fdir_mask mask; + union ixgbe_atr_input ixgbe_fdir; /* key of fdir filter*/ + bool b_spec; /* If TRUE, ixgbe_fdir, fdirflags, queue have meaning. */ + bool b_mask; /* If TRUE, mask has meaning. */ + enum rte_fdir_mode mode; /* IP, MAC VLAN, Tunnel */ + uint32_t fdirflags; /* drop or forward */ + uint32_t soft_id; /* an unique value for this rule */ + uint8_t queue; /* assigned rx queue */ +}; + struct ixgbe_hw_fdir_info { struct ixgbe_hw_fdir_mask mask; uint8_t flex_bytes_offset; @@ -159,12 +189,19 @@ struct ixgbe_hw_fdir_info { uint64_t remove; uint64_t f_add; uint64_t f_remove; + struct ixgbe_fdir_filter_list fdir_list; /* filter list*/ + /* store the pointers of the filters, index is the hash value. */ + struct ixgbe_fdir_filter **hash_map; + struct rte_hash *hash_handle; /* cuckoo hash handler */ + bool mask_added; /* If already got mask from consistent filter */ }; /* structure for interrupt relative data */ struct ixgbe_interrupt { uint32_t flags; uint32_t mask; + /*to save original mask during delayed handler */ + uint32_t mask_original; }; struct ixgbe_stat_mapping_registers { @@ -252,16 +289,131 @@ struct ixgbe_5tuple_filter { (RTE_ALIGN(IXGBE_MAX_FTQF_FILTERS, (sizeof(uint32_t) * NBBY)) / \ (sizeof(uint32_t) * NBBY)) +struct ixgbe_ethertype_filter { + uint16_t ethertype; + uint32_t etqf; + uint32_t etqs; + /** + * If this filter is added by configuration, + * it should not be removed. + */ + bool conf; +}; + /* * Structure to store filters' info. */ struct ixgbe_filter_info { uint8_t ethertype_mask; /* Bit mask for every used ethertype filter */ /* store used ethertype filters*/ - uint16_t ethertype_filters[IXGBE_MAX_ETQF_FILTERS]; + struct ixgbe_ethertype_filter ethertype_filters[IXGBE_MAX_ETQF_FILTERS]; /* Bit mask for every used 5tuple filter */ uint32_t fivetuple_mask[IXGBE_5TUPLE_ARRAY_SIZE]; struct ixgbe_5tuple_filter_list fivetuple_list; + /* store the SYN filter info */ + uint32_t syn_info; +}; + +struct ixgbe_l2_tn_key { + enum rte_eth_tunnel_type l2_tn_type; + uint32_t tn_id; +}; + +struct ixgbe_l2_tn_filter { + TAILQ_ENTRY(ixgbe_l2_tn_filter) entries; + struct ixgbe_l2_tn_key key; + uint32_t pool; +}; + +TAILQ_HEAD(ixgbe_l2_tn_filter_list, ixgbe_l2_tn_filter); + +struct ixgbe_l2_tn_info { + struct ixgbe_l2_tn_filter_list l2_tn_list; + struct ixgbe_l2_tn_filter **hash_map; + struct rte_hash *hash_handle; + bool e_tag_en; /* e-tag enabled */ + bool e_tag_fwd_en; /* e-tag based forwarding enabled */ + bool e_tag_ether_type; /* ether type for e-tag */ +}; + +struct rte_flow { + enum rte_filter_type filter_type; + void *rule; +}; +/* ntuple filter list structure */ +struct ixgbe_ntuple_filter_ele { + TAILQ_ENTRY(ixgbe_ntuple_filter_ele) entries; + struct rte_eth_ntuple_filter filter_info; +}; +/* ethertype filter list structure */ +struct ixgbe_ethertype_filter_ele { + TAILQ_ENTRY(ixgbe_ethertype_filter_ele) entries; + struct rte_eth_ethertype_filter filter_info; +}; +/* syn filter list structure */ +struct ixgbe_eth_syn_filter_ele { + TAILQ_ENTRY(ixgbe_eth_syn_filter_ele) entries; + struct rte_eth_syn_filter filter_info; +}; +/* fdir filter list structure */ +struct ixgbe_fdir_rule_ele { + TAILQ_ENTRY(ixgbe_fdir_rule_ele) entries; + struct ixgbe_fdir_rule filter_info; +}; +/* l2_tunnel filter list structure */ +struct ixgbe_eth_l2_tunnel_conf_ele { + TAILQ_ENTRY(ixgbe_eth_l2_tunnel_conf_ele) entries; + struct rte_eth_l2_tunnel_conf filter_info; +}; +/* ixgbe_flow memory list structure */ +struct ixgbe_flow_mem { + TAILQ_ENTRY(ixgbe_flow_mem) entries; + struct rte_flow *flow; +}; + +TAILQ_HEAD(ixgbe_ntuple_filter_list, ixgbe_ntuple_filter_ele); +struct ixgbe_ntuple_filter_list filter_ntuple_list; +TAILQ_HEAD(ixgbe_ethertype_filter_list, ixgbe_ethertype_filter_ele); +struct ixgbe_ethertype_filter_list filter_ethertype_list; +TAILQ_HEAD(ixgbe_syn_filter_list, ixgbe_eth_syn_filter_ele); +struct ixgbe_syn_filter_list filter_syn_list; +TAILQ_HEAD(ixgbe_fdir_rule_filter_list, ixgbe_fdir_rule_ele); +struct ixgbe_fdir_rule_filter_list filter_fdir_list; +TAILQ_HEAD(ixgbe_l2_tunnel_filter_list, ixgbe_eth_l2_tunnel_conf_ele); +struct ixgbe_l2_tunnel_filter_list filter_l2_tunnel_list; +TAILQ_HEAD(ixgbe_flow_mem_list, ixgbe_flow_mem); +struct ixgbe_flow_mem_list ixgbe_flow_list; + +/* + * Statistics counters collected by the MACsec + */ +struct ixgbe_macsec_stats { + /* TX port statistics */ + uint64_t out_pkts_untagged; + uint64_t out_pkts_encrypted; + uint64_t out_pkts_protected; + uint64_t out_octets_encrypted; + uint64_t out_octets_protected; + + /* RX port statistics */ + uint64_t in_pkts_untagged; + uint64_t in_pkts_badtag; + uint64_t in_pkts_nosci; + uint64_t in_pkts_unknownsci; + uint64_t in_octets_decrypted; + uint64_t in_octets_validated; + + /* RX SC statistics */ + uint64_t in_pkts_unchecked; + uint64_t in_pkts_delayed; + uint64_t in_pkts_late; + + /* RX SA statistics */ + uint64_t in_pkts_ok; + uint64_t in_pkts_invalid; + uint64_t in_pkts_notvalid; + uint64_t in_pkts_unusedsa; + uint64_t in_pkts_notusingsa; }; /* @@ -270,6 +422,7 @@ struct ixgbe_filter_info { struct ixgbe_adapter { struct ixgbe_hw hw; struct ixgbe_hw_stats stats; + struct ixgbe_macsec_stats macsec_stats; struct ixgbe_hw_fdir_info fdir; struct ixgbe_interrupt intr; struct ixgbe_stat_mapping_registers stat_mappings; @@ -283,6 +436,7 @@ struct ixgbe_adapter { struct ixgbe_bypass_info bps; #endif /* RTE_NIC_BYPASS */ struct ixgbe_filter_info filter; + struct ixgbe_l2_tn_info l2_tn; bool rx_bulk_alloc_allowed; bool rx_vec_allowed; @@ -291,12 +445,18 @@ struct ixgbe_adapter { struct rte_timecounter tx_tstamp_tc; }; +#define IXGBE_DEV_TO_PCI(eth_dev) \ + RTE_DEV_TO_PCI((eth_dev)->device) + #define IXGBE_DEV_PRIVATE_TO_HW(adapter)\ (&((struct ixgbe_adapter *)adapter)->hw) #define IXGBE_DEV_PRIVATE_TO_STATS(adapter) \ (&((struct ixgbe_adapter *)adapter)->stats) +#define IXGBE_DEV_PRIVATE_TO_MACSEC_STATS(adapter) \ + (&((struct ixgbe_adapter *)adapter)->macsec_stats) + #define IXGBE_DEV_PRIVATE_TO_INTR(adapter) \ (&((struct ixgbe_adapter *)adapter)->intr) @@ -327,6 +487,9 @@ struct ixgbe_adapter { #define IXGBE_DEV_PRIVATE_TO_FILTER_INFO(adapter) \ (&((struct ixgbe_adapter *)adapter)->filter) +#define IXGBE_DEV_PRIVATE_TO_L2_TN_INFO(adapter) \ + (&((struct ixgbe_adapter *)adapter)->l2_tn) + /* * RX/TX function prototypes */ @@ -396,6 +559,9 @@ uint16_t ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t ixgbe_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); +uint16_t ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + int ixgbe_dev_rss_hash_update(struct rte_eth_dev *dev, struct rte_eth_rss_conf *rss_conf); @@ -412,10 +578,31 @@ uint32_t ixgbe_rssrk_reg_get(enum ixgbe_mac_type mac_type, uint8_t i); bool ixgbe_rss_update_sp(enum ixgbe_mac_type mac_type); +int ixgbe_add_del_ntuple_filter(struct rte_eth_dev *dev, + struct rte_eth_ntuple_filter *filter, + bool add); +int ixgbe_add_del_ethertype_filter(struct rte_eth_dev *dev, + struct rte_eth_ethertype_filter *filter, + bool add); +int ixgbe_syn_filter_set(struct rte_eth_dev *dev, + struct rte_eth_syn_filter *filter, + bool add); +int +ixgbe_dev_l2_tunnel_filter_add(struct rte_eth_dev *dev, + struct rte_eth_l2_tunnel_conf *l2_tunnel, + bool restore); +int +ixgbe_dev_l2_tunnel_filter_del(struct rte_eth_dev *dev, + struct rte_eth_l2_tunnel_conf *l2_tunnel); +void ixgbe_filterlist_flush(void); /* * Flow director function prototypes */ int ixgbe_fdir_configure(struct rte_eth_dev *dev); +int ixgbe_fdir_set_input_mask(struct rte_eth_dev *dev); +int ixgbe_fdir_filter_program(struct rte_eth_dev *dev, + struct ixgbe_fdir_rule *rule, + bool del, bool update); void ixgbe_configure_dcb(struct rte_eth_dev *dev); @@ -442,4 +629,69 @@ uint32_t ixgbe_convert_vm_rx_mask_to_val(uint16_t rx_mask, uint32_t orig_val); int ixgbe_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op, void *arg); +void ixgbe_fdir_filter_restore(struct rte_eth_dev *dev); +int ixgbe_clear_all_fdir_filter(struct rte_eth_dev *dev); + +extern const struct rte_flow_ops ixgbe_flow_ops; + +void ixgbe_clear_all_ethertype_filter(struct rte_eth_dev *dev); +void ixgbe_clear_all_ntuple_filter(struct rte_eth_dev *dev); +void ixgbe_clear_syn_filter(struct rte_eth_dev *dev); +int ixgbe_clear_all_l2_tn_filter(struct rte_eth_dev *dev); + +int ixgbe_disable_sec_tx_path_generic(struct ixgbe_hw *hw); + +int ixgbe_enable_sec_tx_path_generic(struct ixgbe_hw *hw); + +static inline int +ixgbe_ethertype_filter_lookup(struct ixgbe_filter_info *filter_info, + uint16_t ethertype) +{ + int i; + + for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) { + if (filter_info->ethertype_filters[i].ethertype == ethertype && + (filter_info->ethertype_mask & (1 << i))) + return i; + } + return -1; +} + +static inline int +ixgbe_ethertype_filter_insert(struct ixgbe_filter_info *filter_info, + struct ixgbe_ethertype_filter *ethertype_filter) +{ + int i; + + for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) { + if (!(filter_info->ethertype_mask & (1 << i))) { + filter_info->ethertype_mask |= 1 << i; + filter_info->ethertype_filters[i].ethertype = + ethertype_filter->ethertype; + filter_info->ethertype_filters[i].etqf = + ethertype_filter->etqf; + filter_info->ethertype_filters[i].etqs = + ethertype_filter->etqs; + filter_info->ethertype_filters[i].conf = + ethertype_filter->conf; + return i; + } + } + return -1; +} + +static inline int +ixgbe_ethertype_filter_remove(struct ixgbe_filter_info *filter_info, + uint8_t idx) +{ + if (idx >= IXGBE_MAX_ETQF_FILTERS) + return -1; + filter_info->ethertype_mask &= ~(1 << idx); + filter_info->ethertype_filters[idx].ethertype = 0; + filter_info->ethertype_filters[idx].etqf = 0; + filter_info->ethertype_filters[idx].etqs = 0; + filter_info->ethertype_filters[idx].etqs = FALSE; + return idx; +} + #endif /* _IXGBE_ETHDEV_H_ */ diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_fdir.c b/src/dpdk/drivers/net/ixgbe/ixgbe_fdir.c index c38ac97b..3b9d60ca 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_fdir.c +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_fdir.c @@ -43,6 +43,7 @@ #include <rte_pci.h> #include <rte_ether.h> #include <rte_ethdev.h> +#include <rte_malloc.h> #include "ixgbe_logs.h" #include "base/ixgbe_api.h" @@ -111,10 +112,8 @@ static int fdir_erase_filter_82599(struct ixgbe_hw *hw, uint32_t fdirhash); static int fdir_set_input_mask(struct rte_eth_dev *dev, const struct rte_eth_fdir_masks *input_mask); -static int fdir_set_input_mask_82599(struct rte_eth_dev *dev, - const struct rte_eth_fdir_masks *input_mask); -static int fdir_set_input_mask_x550(struct rte_eth_dev *dev, - const struct rte_eth_fdir_masks *input_mask); +static int fdir_set_input_mask_82599(struct rte_eth_dev *dev); +static int fdir_set_input_mask_x550(struct rte_eth_dev *dev); static int ixgbe_set_fdir_flex_conf(struct rte_eth_dev *dev, const struct rte_eth_fdir_flex_conf *conf, uint32_t *fdirctrl); static int fdir_enable_82599(struct ixgbe_hw *hw, uint32_t fdirctrl); @@ -248,13 +247,8 @@ configure_fdir_flags(const struct rte_fdir_conf *conf, uint32_t *fdirctrl) return -EINVAL; }; -#define TREX_PATCH -#ifdef TREX_PATCH - *fdirctrl |= (conf->flexbytes_offset << IXGBE_FDIRCTRL_FLEX_SHIFT); -#else *fdirctrl |= (IXGBE_DEFAULT_FLEXBYTES_OFFSET / sizeof(uint16_t)) << IXGBE_FDIRCTRL_FLEX_SHIFT; -#endif if (conf->mode >= RTE_FDIR_MODE_PERFECT && conf->mode <= RTE_FDIR_MODE_PERFECT_TUNNEL) { @@ -299,8 +293,7 @@ reverse_fdir_bitmasks(uint16_t hi_dword, uint16_t lo_dword) * but makes use of the rte_fdir_masks structure to see which bits to set. */ static int -fdir_set_input_mask_82599(struct rte_eth_dev *dev, - const struct rte_eth_fdir_masks *input_mask) +fdir_set_input_mask_82599(struct rte_eth_dev *dev) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_hw_fdir_info *info = @@ -312,8 +305,6 @@ fdir_set_input_mask_82599(struct rte_eth_dev *dev, uint32_t fdirm = IXGBE_FDIRM_POOL | IXGBE_FDIRM_DIPv6 | IXGBE_FDIRM_FLEX; uint32_t fdirtcpm; /* TCP source and destination port masks. */ uint32_t fdiripv6m; /* IPv6 source and destination masks. */ - uint16_t dst_ipv6m = 0; - uint16_t src_ipv6m = 0; volatile uint32_t *reg; PMD_INIT_FUNC_TRACE(); @@ -324,31 +315,30 @@ fdir_set_input_mask_82599(struct rte_eth_dev *dev, * a VLAN of 0 is unspecified, so mask that out as well. L4type * cannot be masked out in this implementation. */ - if (input_mask->dst_port_mask == 0 && input_mask->src_port_mask == 0) + if (info->mask.dst_port_mask == 0 && info->mask.src_port_mask == 0) /* use the L4 protocol mask for raw IPv4/IPv6 traffic */ fdirm |= IXGBE_FDIRM_L4P; - if (input_mask->vlan_tci_mask == rte_cpu_to_be_16(0x0FFF)) + if (info->mask.vlan_tci_mask == rte_cpu_to_be_16(0x0FFF)) /* mask VLAN Priority */ fdirm |= IXGBE_FDIRM_VLANP; - else if (input_mask->vlan_tci_mask == rte_cpu_to_be_16(0xE000)) + else if (info->mask.vlan_tci_mask == rte_cpu_to_be_16(0xE000)) /* mask VLAN ID */ fdirm |= IXGBE_FDIRM_VLANID; - else if (input_mask->vlan_tci_mask == 0) + else if (info->mask.vlan_tci_mask == 0) /* mask VLAN ID and Priority */ fdirm |= IXGBE_FDIRM_VLANID | IXGBE_FDIRM_VLANP; - else if (input_mask->vlan_tci_mask != rte_cpu_to_be_16(0xEFFF)) { + else if (info->mask.vlan_tci_mask != rte_cpu_to_be_16(0xEFFF)) { PMD_INIT_LOG(ERR, "invalid vlan_tci_mask"); return -EINVAL; } - info->mask.vlan_tci_mask = input_mask->vlan_tci_mask; IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm); /* store the TCP/UDP port masks, bit reversed from port layout */ fdirtcpm = reverse_fdir_bitmasks( - rte_be_to_cpu_16(input_mask->dst_port_mask), - rte_be_to_cpu_16(input_mask->src_port_mask)); + rte_be_to_cpu_16(info->mask.dst_port_mask), + rte_be_to_cpu_16(info->mask.src_port_mask)); /* write all the same so that UDP, TCP and SCTP use the same mask * (little-endian) @@ -356,30 +346,23 @@ fdir_set_input_mask_82599(struct rte_eth_dev *dev, IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, ~fdirtcpm); IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, ~fdirtcpm); IXGBE_WRITE_REG(hw, IXGBE_FDIRSCTPM, ~fdirtcpm); - info->mask.src_port_mask = input_mask->src_port_mask; - info->mask.dst_port_mask = input_mask->dst_port_mask; /* Store source and destination IPv4 masks (big-endian), * can not use IXGBE_WRITE_REG. */ reg = IXGBE_PCI_REG_ADDR(hw, IXGBE_FDIRSIP4M); - *reg = ~(input_mask->ipv4_mask.src_ip); + *reg = ~(info->mask.src_ipv4_mask); reg = IXGBE_PCI_REG_ADDR(hw, IXGBE_FDIRDIP4M); - *reg = ~(input_mask->ipv4_mask.dst_ip); - info->mask.src_ipv4_mask = input_mask->ipv4_mask.src_ip; - info->mask.dst_ipv4_mask = input_mask->ipv4_mask.dst_ip; + *reg = ~(info->mask.dst_ipv4_mask); if (dev->data->dev_conf.fdir_conf.mode == RTE_FDIR_MODE_SIGNATURE) { /* * Store source and destination IPv6 masks (bit reversed) */ - IPV6_ADDR_TO_MASK(input_mask->ipv6_mask.src_ip, src_ipv6m); - IPV6_ADDR_TO_MASK(input_mask->ipv6_mask.dst_ip, dst_ipv6m); - fdiripv6m = (dst_ipv6m << 16) | src_ipv6m; + fdiripv6m = (info->mask.dst_ipv6_mask << 16) | + info->mask.src_ipv6_mask; IXGBE_WRITE_REG(hw, IXGBE_FDIRIP6M, ~fdiripv6m); - info->mask.src_ipv6_mask = src_ipv6m; - info->mask.dst_ipv6_mask = dst_ipv6m; } return IXGBE_SUCCESS; @@ -390,8 +373,7 @@ fdir_set_input_mask_82599(struct rte_eth_dev *dev, * but makes use of the rte_fdir_masks structure to see which bits to set. */ static int -fdir_set_input_mask_x550(struct rte_eth_dev *dev, - const struct rte_eth_fdir_masks *input_mask) +fdir_set_input_mask_x550(struct rte_eth_dev *dev) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_hw_fdir_info *info = @@ -414,20 +396,19 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev, /* some bits must be set for mac vlan or tunnel mode */ fdirm |= IXGBE_FDIRM_L4P | IXGBE_FDIRM_L3P; - if (input_mask->vlan_tci_mask == rte_cpu_to_be_16(0x0FFF)) + if (info->mask.vlan_tci_mask == rte_cpu_to_be_16(0x0FFF)) /* mask VLAN Priority */ fdirm |= IXGBE_FDIRM_VLANP; - else if (input_mask->vlan_tci_mask == rte_cpu_to_be_16(0xE000)) + else if (info->mask.vlan_tci_mask == rte_cpu_to_be_16(0xE000)) /* mask VLAN ID */ fdirm |= IXGBE_FDIRM_VLANID; - else if (input_mask->vlan_tci_mask == 0) + else if (info->mask.vlan_tci_mask == 0) /* mask VLAN ID and Priority */ fdirm |= IXGBE_FDIRM_VLANID | IXGBE_FDIRM_VLANP; - else if (input_mask->vlan_tci_mask != rte_cpu_to_be_16(0xEFFF)) { + else if (info->mask.vlan_tci_mask != rte_cpu_to_be_16(0xEFFF)) { PMD_INIT_LOG(ERR, "invalid vlan_tci_mask"); return -EINVAL; } - info->mask.vlan_tci_mask = input_mask->vlan_tci_mask; IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm); @@ -437,13 +418,12 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev, fdiripv6m |= IXGBE_FDIRIP6M_TUNNEL_TYPE | IXGBE_FDIRIP6M_TNI_VNI; - mac_mask = input_mask->mac_addr_byte_mask; - fdiripv6m |= (mac_mask << IXGBE_FDIRIP6M_INNER_MAC_SHIFT) - & IXGBE_FDIRIP6M_INNER_MAC; - info->mask.mac_addr_byte_mask = input_mask->mac_addr_byte_mask; - if (mode == RTE_FDIR_MODE_PERFECT_TUNNEL) { - switch (input_mask->tunnel_type_mask) { + mac_mask = info->mask.mac_addr_byte_mask; + fdiripv6m |= (mac_mask << IXGBE_FDIRIP6M_INNER_MAC_SHIFT) + & IXGBE_FDIRIP6M_INNER_MAC; + + switch (info->mask.tunnel_type_mask) { case 0: /* Mask turnnel type */ fdiripv6m |= IXGBE_FDIRIP6M_TUNNEL_TYPE; @@ -454,10 +434,8 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev, PMD_INIT_LOG(ERR, "invalid tunnel_type_mask"); return -EINVAL; } - info->mask.tunnel_type_mask = - input_mask->tunnel_type_mask; - switch (rte_be_to_cpu_32(input_mask->tunnel_id_mask)) { + switch (rte_be_to_cpu_32(info->mask.tunnel_id_mask)) { case 0x0: /* Mask vxlan id */ fdiripv6m |= IXGBE_FDIRIP6M_TNI_VNI; @@ -471,8 +449,6 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev, PMD_INIT_LOG(ERR, "invalid tunnel_id_mask"); return -EINVAL; } - info->mask.tunnel_id_mask = - input_mask->tunnel_id_mask; } IXGBE_WRITE_REG(hw, IXGBE_FDIRIP6M, fdiripv6m); @@ -486,22 +462,90 @@ fdir_set_input_mask_x550(struct rte_eth_dev *dev, } static int -fdir_set_input_mask(struct rte_eth_dev *dev, - const struct rte_eth_fdir_masks *input_mask) +ixgbe_fdir_store_input_mask_82599(struct rte_eth_dev *dev, + const struct rte_eth_fdir_masks *input_mask) +{ + struct ixgbe_hw_fdir_info *info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); + uint16_t dst_ipv6m = 0; + uint16_t src_ipv6m = 0; + + memset(&info->mask, 0, sizeof(struct ixgbe_hw_fdir_mask)); + info->mask.vlan_tci_mask = input_mask->vlan_tci_mask; + info->mask.src_port_mask = input_mask->src_port_mask; + info->mask.dst_port_mask = input_mask->dst_port_mask; + info->mask.src_ipv4_mask = input_mask->ipv4_mask.src_ip; + info->mask.dst_ipv4_mask = input_mask->ipv4_mask.dst_ip; + IPV6_ADDR_TO_MASK(input_mask->ipv6_mask.src_ip, src_ipv6m); + IPV6_ADDR_TO_MASK(input_mask->ipv6_mask.dst_ip, dst_ipv6m); + info->mask.src_ipv6_mask = src_ipv6m; + info->mask.dst_ipv6_mask = dst_ipv6m; + + return IXGBE_SUCCESS; +} + +static int +ixgbe_fdir_store_input_mask_x550(struct rte_eth_dev *dev, + const struct rte_eth_fdir_masks *input_mask) +{ + struct ixgbe_hw_fdir_info *info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); + + memset(&info->mask, 0, sizeof(struct ixgbe_hw_fdir_mask)); + info->mask.vlan_tci_mask = input_mask->vlan_tci_mask; + info->mask.mac_addr_byte_mask = input_mask->mac_addr_byte_mask; + info->mask.tunnel_type_mask = input_mask->tunnel_type_mask; + info->mask.tunnel_id_mask = input_mask->tunnel_id_mask; + + return IXGBE_SUCCESS; +} + +static int +ixgbe_fdir_store_input_mask(struct rte_eth_dev *dev, + const struct rte_eth_fdir_masks *input_mask) +{ + enum rte_fdir_mode mode = dev->data->dev_conf.fdir_conf.mode; + + if (mode >= RTE_FDIR_MODE_SIGNATURE && + mode <= RTE_FDIR_MODE_PERFECT) + return ixgbe_fdir_store_input_mask_82599(dev, input_mask); + else if (mode >= RTE_FDIR_MODE_PERFECT_MAC_VLAN && + mode <= RTE_FDIR_MODE_PERFECT_TUNNEL) + return ixgbe_fdir_store_input_mask_x550(dev, input_mask); + + PMD_DRV_LOG(ERR, "Not supported fdir mode - %d!", mode); + return -ENOTSUP; +} + +int +ixgbe_fdir_set_input_mask(struct rte_eth_dev *dev) { enum rte_fdir_mode mode = dev->data->dev_conf.fdir_conf.mode; if (mode >= RTE_FDIR_MODE_SIGNATURE && mode <= RTE_FDIR_MODE_PERFECT) - return fdir_set_input_mask_82599(dev, input_mask); + return fdir_set_input_mask_82599(dev); else if (mode >= RTE_FDIR_MODE_PERFECT_MAC_VLAN && mode <= RTE_FDIR_MODE_PERFECT_TUNNEL) - return fdir_set_input_mask_x550(dev, input_mask); + return fdir_set_input_mask_x550(dev); PMD_DRV_LOG(ERR, "Not supported fdir mode - %d!", mode); return -ENOTSUP; } +static int +fdir_set_input_mask(struct rte_eth_dev *dev, + const struct rte_eth_fdir_masks *input_mask) +{ + int ret; + + ret = ixgbe_fdir_store_input_mask(dev, input_mask); + if (ret) + return ret; + + return ixgbe_fdir_set_input_mask(dev); +} + /* * ixgbe_check_fdir_flex_conf -check if the flex payload and mask configuration * arguments are valid @@ -520,7 +564,7 @@ ixgbe_set_fdir_flex_conf(struct rte_eth_dev *dev, uint16_t i; fdirm = IXGBE_READ_REG(hw, IXGBE_FDIRM); -#ifndef TREX_PATCH + if (conf == NULL) { PMD_DRV_LOG(ERR, "NULL pointer."); return -EINVAL; @@ -561,11 +605,6 @@ ixgbe_set_fdir_flex_conf(struct rte_eth_dev *dev, return -EINVAL; } } -#else - fdirm &= ~IXGBE_FDIRM_FLEX; - flexbytes = 1; - // fdirctrl gets flex_bytes_offset in configure_fdir_flags -#endif IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm); info->mask.flex_bytes_mask = flexbytes ? UINT16_MAX : 0; info->flex_bytes_offset = (uint8_t)((*fdirctrl & @@ -597,9 +636,6 @@ ixgbe_fdir_configure(struct rte_eth_dev *dev) hw->mac.type != ixgbe_mac_X550EM_x && hw->mac.type != ixgbe_mac_X550EM_a && mode != RTE_FDIR_MODE_SIGNATURE && -#ifdef TREX_PATCH - mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN && -#endif mode != RTE_FDIR_MODE_PERFECT) return -ENOSYS; @@ -1088,31 +1124,110 @@ fdir_erase_filter_82599(struct ixgbe_hw *hw, uint32_t fdirhash) } -/* - * ixgbe_add_del_fdir_filter - add or remove a flow diretor filter. - * @dev: pointer to the structure rte_eth_dev - * @fdir_filter: fdir filter entry - * @del: 1 - delete, 0 - add - * @update: 1 - update - */ +static inline struct ixgbe_fdir_filter * +ixgbe_fdir_filter_lookup(struct ixgbe_hw_fdir_info *fdir_info, + union ixgbe_atr_input *key) +{ + int ret; + + ret = rte_hash_lookup(fdir_info->hash_handle, (const void *)key); + if (ret < 0) + return NULL; + + return fdir_info->hash_map[ret]; +} + +static inline int +ixgbe_insert_fdir_filter(struct ixgbe_hw_fdir_info *fdir_info, + struct ixgbe_fdir_filter *fdir_filter) +{ + int ret; + + ret = rte_hash_add_key(fdir_info->hash_handle, + &fdir_filter->ixgbe_fdir); + + if (ret < 0) { + PMD_DRV_LOG(ERR, + "Failed to insert fdir filter to hash table %d!", + ret); + return ret; + } + + fdir_info->hash_map[ret] = fdir_filter; + + TAILQ_INSERT_TAIL(&fdir_info->fdir_list, fdir_filter, entries); + + return 0; +} + +static inline int +ixgbe_remove_fdir_filter(struct ixgbe_hw_fdir_info *fdir_info, + union ixgbe_atr_input *key) +{ + int ret; + struct ixgbe_fdir_filter *fdir_filter; + + ret = rte_hash_del_key(fdir_info->hash_handle, key); + + if (ret < 0) { + PMD_DRV_LOG(ERR, "No such fdir filter to delete %d!", ret); + return ret; + } + + fdir_filter = fdir_info->hash_map[ret]; + fdir_info->hash_map[ret] = NULL; + + TAILQ_REMOVE(&fdir_info->fdir_list, fdir_filter, entries); + rte_free(fdir_filter); + + return 0; +} + static int -ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev, - const struct rte_eth_fdir_filter *fdir_filter, +ixgbe_interpret_fdir_filter(struct rte_eth_dev *dev, + const struct rte_eth_fdir_filter *fdir_filter, + struct ixgbe_fdir_rule *rule) +{ + enum rte_fdir_mode fdir_mode = dev->data->dev_conf.fdir_conf.mode; + int err; + + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + + err = ixgbe_fdir_filter_to_atr_input(fdir_filter, + &rule->ixgbe_fdir, + fdir_mode); + if (err) + return err; + + rule->mode = fdir_mode; + if (fdir_filter->action.behavior == RTE_ETH_FDIR_REJECT) + rule->fdirflags = IXGBE_FDIRCMD_DROP; + rule->queue = fdir_filter->action.rx_queue; + rule->soft_id = fdir_filter->soft_id; + + return 0; +} + +int +ixgbe_fdir_filter_program(struct rte_eth_dev *dev, + struct ixgbe_fdir_rule *rule, bool del, bool update) { struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); uint32_t fdircmd_flags; uint32_t fdirhash; - union ixgbe_atr_input input; uint8_t queue; bool is_perfect = FALSE; int err; struct ixgbe_hw_fdir_info *info = IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); enum rte_fdir_mode fdir_mode = dev->data->dev_conf.fdir_conf.mode; + struct ixgbe_fdir_filter *node; + bool add_node = FALSE; - if (fdir_mode == RTE_FDIR_MODE_NONE) + if (fdir_mode == RTE_FDIR_MODE_NONE || + fdir_mode != rule->mode) return -ENOTSUP; /* @@ -1125,7 +1240,7 @@ ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev, (hw->mac.type == ixgbe_mac_X550 || hw->mac.type == ixgbe_mac_X550EM_x || hw->mac.type == ixgbe_mac_X550EM_a) && - (fdir_filter->input.flow_type == + (rule->ixgbe_fdir.formatted.flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_OTHER) && (info->mask.src_port_mask != 0 || info->mask.dst_port_mask != 0)) { @@ -1139,31 +1254,26 @@ ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev, fdir_mode <= RTE_FDIR_MODE_PERFECT_TUNNEL) is_perfect = TRUE; - memset(&input, 0, sizeof(input)); - - err = ixgbe_fdir_filter_to_atr_input(fdir_filter, &input, - fdir_mode); - if (err) - return err; - if (is_perfect) { -#ifndef TREX_PATCH - // No reason not to use IPV6 in perfect filters. It is working. - if (input.formatted.flow_type & IXGBE_ATR_L4TYPE_IPV6_MASK) { + if (rule->ixgbe_fdir.formatted.flow_type & + IXGBE_ATR_L4TYPE_IPV6_MASK) { PMD_DRV_LOG(ERR, "IPv6 is not supported in" " perfect mode!"); return -ENOTSUP; } -#endif - fdirhash = atr_compute_perfect_hash_82599(&input, + fdirhash = atr_compute_perfect_hash_82599(&rule->ixgbe_fdir, dev->data->dev_conf.fdir_conf.pballoc); - fdirhash |= fdir_filter->soft_id << + fdirhash |= rule->soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT; } else - fdirhash = atr_compute_sig_hash_82599(&input, + fdirhash = atr_compute_sig_hash_82599(&rule->ixgbe_fdir, dev->data->dev_conf.fdir_conf.pballoc); if (del) { + err = ixgbe_remove_fdir_filter(info, &rule->ixgbe_fdir); + if (err < 0) + return err; + err = fdir_erase_filter_82599(hw, fdirhash); if (err < 0) PMD_DRV_LOG(ERR, "Fail to delete FDIR filter!"); @@ -1173,7 +1283,7 @@ ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev, } /* add or update an fdir filter*/ fdircmd_flags = (update) ? IXGBE_FDIRCMD_FILTER_UPDATE : 0; - if (fdir_filter->action.behavior == RTE_ETH_FDIR_REJECT) { + if (rule->fdirflags & IXGBE_FDIRCMD_DROP) { if (is_perfect) { queue = dev->data->dev_conf.fdir_conf.drop_queue; fdircmd_flags |= IXGBE_FDIRCMD_DROP; @@ -1182,28 +1292,86 @@ ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev, " signature mode."); return -EINVAL; } - } else if (fdir_filter->action.behavior == RTE_ETH_FDIR_ACCEPT && - fdir_filter->action.rx_queue < IXGBE_MAX_RX_QUEUE_NUM) - queue = (uint8_t)fdir_filter->action.rx_queue; + } else if (rule->queue < IXGBE_MAX_RX_QUEUE_NUM) + queue = (uint8_t)rule->queue; else return -EINVAL; + node = ixgbe_fdir_filter_lookup(info, &rule->ixgbe_fdir); + if (node) { + if (update) { + node->fdirflags = fdircmd_flags; + node->fdirhash = fdirhash; + node->queue = queue; + } else { + PMD_DRV_LOG(ERR, "Conflict with existing fdir filter!"); + return -EINVAL; + } + } else { + add_node = TRUE; + node = rte_zmalloc("ixgbe_fdir", + sizeof(struct ixgbe_fdir_filter), + 0); + if (!node) + return -ENOMEM; + (void)rte_memcpy(&node->ixgbe_fdir, + &rule->ixgbe_fdir, + sizeof(union ixgbe_atr_input)); + node->fdirflags = fdircmd_flags; + node->fdirhash = fdirhash; + node->queue = queue; + + err = ixgbe_insert_fdir_filter(info, node); + if (err < 0) { + rte_free(node); + return err; + } + } + if (is_perfect) { - err = fdir_write_perfect_filter_82599(hw, &input, queue, - fdircmd_flags, fdirhash, - fdir_mode); + err = fdir_write_perfect_filter_82599(hw, &rule->ixgbe_fdir, + queue, fdircmd_flags, + fdirhash, fdir_mode); } else { - err = fdir_add_signature_filter_82599(hw, &input, queue, - fdircmd_flags, fdirhash); + err = fdir_add_signature_filter_82599(hw, &rule->ixgbe_fdir, + queue, fdircmd_flags, + fdirhash); } - if (err < 0) + if (err < 0) { PMD_DRV_LOG(ERR, "Fail to add FDIR filter!"); - else + + if (add_node) + (void)ixgbe_remove_fdir_filter(info, &rule->ixgbe_fdir); + } else { PMD_DRV_LOG(DEBUG, "Success to add FDIR filter"); + } return err; } +/* ixgbe_add_del_fdir_filter - add or remove a flow diretor filter. + * @dev: pointer to the structure rte_eth_dev + * @fdir_filter: fdir filter entry + * @del: 1 - delete, 0 - add + * @update: 1 - update + */ +static int +ixgbe_add_del_fdir_filter(struct rte_eth_dev *dev, + const struct rte_eth_fdir_filter *fdir_filter, + bool del, + bool update) +{ + struct ixgbe_fdir_rule rule; + int err; + + err = ixgbe_interpret_fdir_filter(dev, fdir_filter, &rule); + + if (err) + return err; + + return ixgbe_fdir_filter_program(dev, &rule, del, update); +} + static int ixgbe_fdir_flush(struct rte_eth_dev *dev) { @@ -1394,3 +1562,66 @@ ixgbe_fdir_ctrl_func(struct rte_eth_dev *dev, } return ret; } + +/* restore flow director filter */ +void +ixgbe_fdir_filter_restore(struct rte_eth_dev *dev) +{ + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + struct ixgbe_hw_fdir_info *fdir_info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); + struct ixgbe_fdir_filter *node; + bool is_perfect = FALSE; + enum rte_fdir_mode fdir_mode = dev->data->dev_conf.fdir_conf.mode; + + if (fdir_mode >= RTE_FDIR_MODE_PERFECT && + fdir_mode <= RTE_FDIR_MODE_PERFECT_TUNNEL) + is_perfect = TRUE; + + if (is_perfect) { + TAILQ_FOREACH(node, &fdir_info->fdir_list, entries) { + (void)fdir_write_perfect_filter_82599(hw, + &node->ixgbe_fdir, + node->queue, + node->fdirflags, + node->fdirhash, + fdir_mode); + } + } else { + TAILQ_FOREACH(node, &fdir_info->fdir_list, entries) { + (void)fdir_add_signature_filter_82599(hw, + &node->ixgbe_fdir, + node->queue, + node->fdirflags, + node->fdirhash); + } + } +} + +/* remove all the flow director filters */ +int +ixgbe_clear_all_fdir_filter(struct rte_eth_dev *dev) +{ + struct ixgbe_hw_fdir_info *fdir_info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); + struct ixgbe_fdir_filter *fdir_filter; + struct ixgbe_fdir_filter *filter_flag; + int ret = 0; + + /* flush flow director */ + rte_hash_reset(fdir_info->hash_handle); + memset(fdir_info->hash_map, 0, + sizeof(struct ixgbe_fdir_filter *) * IXGBE_MAX_FDIR_FILTER_NUM); + filter_flag = TAILQ_FIRST(&fdir_info->fdir_list); + while ((fdir_filter = TAILQ_FIRST(&fdir_info->fdir_list))) { + TAILQ_REMOVE(&fdir_info->fdir_list, + fdir_filter, + entries); + rte_free(fdir_filter); + } + + if (filter_flag != NULL) + ret = ixgbe_fdir_flush(dev); + + return ret; +} diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_flow.c b/src/dpdk/drivers/net/ixgbe/ixgbe_flow.c new file mode 100644 index 00000000..82aceed7 --- /dev/null +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_flow.c @@ -0,0 +1,2878 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/queue.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <stdarg.h> +#include <inttypes.h> +#include <netinet/in.h> +#include <rte_byteorder.h> +#include <rte_common.h> +#include <rte_cycles.h> + +#include <rte_interrupts.h> +#include <rte_log.h> +#include <rte_debug.h> +#include <rte_pci.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_eal.h> +#include <rte_alarm.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_atomic.h> +#include <rte_malloc.h> +#include <rte_random.h> +#include <rte_dev.h> +#include <rte_hash_crc.h> +#include <rte_flow.h> +#include <rte_flow_driver.h> + +#include "ixgbe_logs.h" +#include "base/ixgbe_api.h" +#include "base/ixgbe_vf.h" +#include "base/ixgbe_common.h" +#include "ixgbe_ethdev.h" +#include "ixgbe_bypass.h" +#include "ixgbe_rxtx.h" +#include "base/ixgbe_type.h" +#include "base/ixgbe_phy.h" +#include "rte_pmd_ixgbe.h" + +static int ixgbe_flow_flush(struct rte_eth_dev *dev, + struct rte_flow_error *error); +static int +cons_parse_ntuple_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ntuple_filter *filter, + struct rte_flow_error *error); +static int +ixgbe_parse_ntuple_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ntuple_filter *filter, + struct rte_flow_error *error); +static int +cons_parse_ethertype_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item *pattern, + const struct rte_flow_action *actions, + struct rte_eth_ethertype_filter *filter, + struct rte_flow_error *error); +static int +ixgbe_parse_ethertype_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ethertype_filter *filter, + struct rte_flow_error *error); +static int +cons_parse_syn_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_syn_filter *filter, + struct rte_flow_error *error); +static int +ixgbe_parse_syn_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_syn_filter *filter, + struct rte_flow_error *error); +static int +cons_parse_l2_tn_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_l2_tunnel_conf *filter, + struct rte_flow_error *error); +static int +ixgbe_validate_l2_tn_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_l2_tunnel_conf *rule, + struct rte_flow_error *error); +static int +ixgbe_validate_fdir_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error); +static int +ixgbe_parse_fdir_filter_normal(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error); +static int +ixgbe_parse_fdir_filter_tunnel(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error); +static int +ixgbe_parse_fdir_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error); +static int +ixgbe_flow_validate(__rte_unused struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); +static struct rte_flow *ixgbe_flow_create(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error); +static int ixgbe_flow_destroy(struct rte_eth_dev *dev, + struct rte_flow *flow, + struct rte_flow_error *error); + +const struct rte_flow_ops ixgbe_flow_ops = { + ixgbe_flow_validate, + ixgbe_flow_create, + ixgbe_flow_destroy, + ixgbe_flow_flush, + NULL, +}; + +#define IXGBE_MIN_N_TUPLE_PRIO 1 +#define IXGBE_MAX_N_TUPLE_PRIO 7 +#define NEXT_ITEM_OF_PATTERN(item, pattern, index)\ + do { \ + item = pattern + index;\ + while (item->type == RTE_FLOW_ITEM_TYPE_VOID) {\ + index++; \ + item = pattern + index; \ + } \ + } while (0) + +#define NEXT_ITEM_OF_ACTION(act, actions, index)\ + do { \ + act = actions + index; \ + while (act->type == RTE_FLOW_ACTION_TYPE_VOID) {\ + index++; \ + act = actions + index; \ + } \ + } while (0) + +/** + * Please aware there's an asumption for all the parsers. + * rte_flow_item is using big endian, rte_flow_attr and + * rte_flow_action are using CPU order. + * Because the pattern is used to describe the packets, + * normally the packets should use network order. + */ + +/** + * Parse the rule to see if it is a n-tuple rule. + * And get the n-tuple filter info BTW. + * pattern: + * The first not void item can be ETH or IPV4. + * The second not void item must be IPV4 if the first one is ETH. + * The third not void item must be UDP or TCP. + * The next not void item must be END. + * action: + * The first not void action should be QUEUE. + * The next not void action should be END. + * pattern example: + * ITEM Spec Mask + * ETH NULL NULL + * IPV4 src_addr 192.168.1.20 0xFFFFFFFF + * dst_addr 192.167.3.50 0xFFFFFFFF + * next_proto_id 17 0xFF + * UDP/TCP src_port 80 0xFFFF + * dst_port 80 0xFFFF + * END + * other members in mask and spec should set to 0x00. + * item->last should be NULL. + */ +static int +cons_parse_ntuple_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ntuple_filter *filter, + struct rte_flow_error *error) +{ + const struct rte_flow_item *item; + const struct rte_flow_action *act; + const struct rte_flow_item_ipv4 *ipv4_spec; + const struct rte_flow_item_ipv4 *ipv4_mask; + const struct rte_flow_item_tcp *tcp_spec; + const struct rte_flow_item_tcp *tcp_mask; + const struct rte_flow_item_udp *udp_spec; + const struct rte_flow_item_udp *udp_mask; + uint32_t index; + + if (!pattern) { + rte_flow_error_set(error, + EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -rte_errno; + } + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -rte_errno; + } + + /* parse pattern */ + index = 0; + + /* the first not void item can be MAC or IPv4 */ + NEXT_ITEM_OF_PATTERN(item, pattern, index); + + if (item->type != RTE_FLOW_ITEM_TYPE_ETH && + item->type != RTE_FLOW_ITEM_TYPE_IPV4) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + /* Skip Ethernet */ + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, + EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + + } + /* if the first item is MAC, the content should be NULL */ + if (item->spec || item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + /* check if the next not void item is IPv4 */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_IPV4) { + rte_flow_error_set(error, + EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + } + + /* get the IPv4 info */ + if (!item->spec || !item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid ntuple mask"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + + } + + ipv4_mask = (const struct rte_flow_item_ipv4 *)item->mask; + /** + * Only support src & dst addresses, protocol, + * others should be masked. + */ + if (ipv4_mask->hdr.version_ihl || + ipv4_mask->hdr.type_of_service || + ipv4_mask->hdr.total_length || + ipv4_mask->hdr.packet_id || + ipv4_mask->hdr.fragment_offset || + ipv4_mask->hdr.time_to_live || + ipv4_mask->hdr.hdr_checksum) { + rte_flow_error_set(error, + EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + + filter->dst_ip_mask = ipv4_mask->hdr.dst_addr; + filter->src_ip_mask = ipv4_mask->hdr.src_addr; + filter->proto_mask = ipv4_mask->hdr.next_proto_id; + + ipv4_spec = (const struct rte_flow_item_ipv4 *)item->spec; + filter->dst_ip = ipv4_spec->hdr.dst_addr; + filter->src_ip = ipv4_spec->hdr.src_addr; + filter->proto = ipv4_spec->hdr.next_proto_id; + + /* check if the next not void item is TCP or UDP */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_TCP && + item->type != RTE_FLOW_ITEM_TYPE_UDP) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + + /* get the TCP/UDP info */ + if (!item->spec || !item->mask) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid ntuple mask"); + return -rte_errno; + } + + /*Not supported last point for range*/ + if (item->last) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + + } + + if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { + tcp_mask = (const struct rte_flow_item_tcp *)item->mask; + + /** + * Only support src & dst ports, tcp flags, + * others should be masked. + */ + if (tcp_mask->hdr.sent_seq || + tcp_mask->hdr.recv_ack || + tcp_mask->hdr.data_off || + tcp_mask->hdr.rx_win || + tcp_mask->hdr.cksum || + tcp_mask->hdr.tcp_urp) { + memset(filter, 0, + sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + + filter->dst_port_mask = tcp_mask->hdr.dst_port; + filter->src_port_mask = tcp_mask->hdr.src_port; + if (tcp_mask->hdr.tcp_flags == 0xFF) { + filter->flags |= RTE_NTUPLE_FLAGS_TCP_FLAG; + } else if (!tcp_mask->hdr.tcp_flags) { + filter->flags &= ~RTE_NTUPLE_FLAGS_TCP_FLAG; + } else { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + + tcp_spec = (const struct rte_flow_item_tcp *)item->spec; + filter->dst_port = tcp_spec->hdr.dst_port; + filter->src_port = tcp_spec->hdr.src_port; + filter->tcp_flags = tcp_spec->hdr.tcp_flags; + } else { + udp_mask = (const struct rte_flow_item_udp *)item->mask; + + /** + * Only support src & dst ports, + * others should be masked. + */ + if (udp_mask->hdr.dgram_len || + udp_mask->hdr.dgram_cksum) { + memset(filter, 0, + sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + + filter->dst_port_mask = udp_mask->hdr.dst_port; + filter->src_port_mask = udp_mask->hdr.src_port; + + udp_spec = (const struct rte_flow_item_udp *)item->spec; + filter->dst_port = udp_spec->hdr.dst_port; + filter->src_port = udp_spec->hdr.src_port; + } + + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ntuple filter"); + return -rte_errno; + } + + /* parse action */ + index = 0; + + /** + * n-tuple only supports forwarding, + * check if the first not void action is QUEUE. + */ + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + item, "Not supported action."); + return -rte_errno; + } + filter->queue = + ((const struct rte_flow_action_queue *)act->conf)->index; + + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + /* parse attr */ + /* must be input direction */ + if (!attr->ingress) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + attr, "Only support ingress."); + return -rte_errno; + } + + /* not supported */ + if (attr->egress) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + attr, "Not support egress."); + return -rte_errno; + } + + if (attr->priority > 0xFFFF) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + attr, "Error priority."); + return -rte_errno; + } + filter->priority = (uint16_t)attr->priority; + if (attr->priority < IXGBE_MIN_N_TUPLE_PRIO || + attr->priority > IXGBE_MAX_N_TUPLE_PRIO) + filter->priority = 1; + + return 0; +} + +/* a specific function for ixgbe because the flags is specific */ +static int +ixgbe_parse_ntuple_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ntuple_filter *filter, + struct rte_flow_error *error) +{ + int ret; + + ret = cons_parse_ntuple_filter(attr, pattern, actions, filter, error); + + if (ret) + return ret; + + /* Ixgbe doesn't support tcp flags. */ + if (filter->flags & RTE_NTUPLE_FLAGS_TCP_FLAG) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "Not supported by ntuple filter"); + return -rte_errno; + } + + /* Ixgbe doesn't support many priorities. */ + if (filter->priority < IXGBE_MIN_N_TUPLE_PRIO || + filter->priority > IXGBE_MAX_N_TUPLE_PRIO) { + memset(filter, 0, sizeof(struct rte_eth_ntuple_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "Priority not supported by ntuple filter"); + return -rte_errno; + } + + if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM || + filter->priority > IXGBE_5TUPLE_MAX_PRI || + filter->priority < IXGBE_5TUPLE_MIN_PRI) + return -rte_errno; + + /* fixed value for ixgbe */ + filter->flags = RTE_5TUPLE_FLAGS; + return 0; +} + +/** + * Parse the rule to see if it is a ethertype rule. + * And get the ethertype filter info BTW. + * pattern: + * The first not void item can be ETH. + * The next not void item must be END. + * action: + * The first not void action should be QUEUE. + * The next not void action should be END. + * pattern example: + * ITEM Spec Mask + * ETH type 0x0807 0xFFFF + * END + * other members in mask and spec should set to 0x00. + * item->last should be NULL. + */ +static int +cons_parse_ethertype_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item *pattern, + const struct rte_flow_action *actions, + struct rte_eth_ethertype_filter *filter, + struct rte_flow_error *error) +{ + const struct rte_flow_item *item; + const struct rte_flow_action *act; + const struct rte_flow_item_eth *eth_spec; + const struct rte_flow_item_eth *eth_mask; + const struct rte_flow_action_queue *act_q; + uint32_t index; + + if (!pattern) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -rte_errno; + } + + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -rte_errno; + } + + /* Parse pattern */ + index = 0; + + /* The first non-void item should be MAC. */ + item = pattern + index; + while (item->type == RTE_FLOW_ITEM_TYPE_VOID) { + index++; + item = pattern + index; + } + if (item->type != RTE_FLOW_ITEM_TYPE_ETH) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ethertype filter"); + return -rte_errno; + } + + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + /* Get the MAC info. */ + if (!item->spec || !item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ethertype filter"); + return -rte_errno; + } + + eth_spec = (const struct rte_flow_item_eth *)item->spec; + eth_mask = (const struct rte_flow_item_eth *)item->mask; + + /* Mask bits of source MAC address must be full of 0. + * Mask bits of destination MAC address must be full + * of 1 or full of 0. + */ + if (!is_zero_ether_addr(ð_mask->src) || + (!is_zero_ether_addr(ð_mask->dst) && + !is_broadcast_ether_addr(ð_mask->dst))) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid ether address mask"); + return -rte_errno; + } + + if ((eth_mask->type & UINT16_MAX) != UINT16_MAX) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid ethertype mask"); + return -rte_errno; + } + + /* If mask bits of destination MAC address + * are full of 1, set RTE_ETHTYPE_FLAGS_MAC. + */ + if (is_broadcast_ether_addr(ð_mask->dst)) { + filter->mac_addr = eth_spec->dst; + filter->flags |= RTE_ETHTYPE_FLAGS_MAC; + } else { + filter->flags &= ~RTE_ETHTYPE_FLAGS_MAC; + } + filter->ether_type = rte_be_to_cpu_16(eth_spec->type); + + /* Check if the next non-void item is END. */ + index++; + item = pattern + index; + while (item->type == RTE_FLOW_ITEM_TYPE_VOID) { + index++; + item = pattern + index; + } + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by ethertype filter."); + return -rte_errno; + } + + /* Parse action */ + + index = 0; + /* Check if the first non-void action is QUEUE or DROP. */ + act = actions + index; + while (act->type == RTE_FLOW_ACTION_TYPE_VOID) { + index++; + act = actions + index; + } + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE && + act->type != RTE_FLOW_ACTION_TYPE_DROP) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + if (act->type == RTE_FLOW_ACTION_TYPE_QUEUE) { + act_q = (const struct rte_flow_action_queue *)act->conf; + filter->queue = act_q->index; + } else { + filter->flags |= RTE_ETHTYPE_FLAGS_DROP; + } + + /* Check if the next non-void item is END */ + index++; + act = actions + index; + while (act->type == RTE_FLOW_ACTION_TYPE_VOID) { + index++; + act = actions + index; + } + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + /* Parse attr */ + /* Must be input direction */ + if (!attr->ingress) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + attr, "Only support ingress."); + return -rte_errno; + } + + /* Not supported */ + if (attr->egress) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + attr, "Not support egress."); + return -rte_errno; + } + + /* Not supported */ + if (attr->priority) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + attr, "Not support priority."); + return -rte_errno; + } + + /* Not supported */ + if (attr->group) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, + attr, "Not support group."); + return -rte_errno; + } + + return 0; +} + +static int +ixgbe_parse_ethertype_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_ethertype_filter *filter, + struct rte_flow_error *error) +{ + int ret; + + ret = cons_parse_ethertype_filter(attr, pattern, + actions, filter, error); + + if (ret) + return ret; + + /* Ixgbe doesn't support MAC address. */ + if (filter->flags & RTE_ETHTYPE_FLAGS_MAC) { + memset(filter, 0, sizeof(struct rte_eth_ethertype_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "Not supported by ethertype filter"); + return -rte_errno; + } + + if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM) { + memset(filter, 0, sizeof(struct rte_eth_ethertype_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "queue index much too big"); + return -rte_errno; + } + + if (filter->ether_type == ETHER_TYPE_IPv4 || + filter->ether_type == ETHER_TYPE_IPv6) { + memset(filter, 0, sizeof(struct rte_eth_ethertype_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "IPv4/IPv6 not supported by ethertype filter"); + return -rte_errno; + } + + if (filter->flags & RTE_ETHTYPE_FLAGS_MAC) { + memset(filter, 0, sizeof(struct rte_eth_ethertype_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "mac compare is unsupported"); + return -rte_errno; + } + + if (filter->flags & RTE_ETHTYPE_FLAGS_DROP) { + memset(filter, 0, sizeof(struct rte_eth_ethertype_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "drop option is unsupported"); + return -rte_errno; + } + + return 0; +} + +/** + * Parse the rule to see if it is a TCP SYN rule. + * And get the TCP SYN filter info BTW. + * pattern: + * The first not void item must be ETH. + * The second not void item must be IPV4 or IPV6. + * The third not void item must be TCP. + * The next not void item must be END. + * action: + * The first not void action should be QUEUE. + * The next not void action should be END. + * pattern example: + * ITEM Spec Mask + * ETH NULL NULL + * IPV4/IPV6 NULL NULL + * TCP tcp_flags 0x02 0xFF + * END + * other members in mask and spec should set to 0x00. + * item->last should be NULL. + */ +static int +cons_parse_syn_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_syn_filter *filter, + struct rte_flow_error *error) +{ + const struct rte_flow_item *item; + const struct rte_flow_action *act; + const struct rte_flow_item_tcp *tcp_spec; + const struct rte_flow_item_tcp *tcp_mask; + const struct rte_flow_action_queue *act_q; + uint32_t index; + + if (!pattern) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -rte_errno; + } + + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -rte_errno; + } + + /* parse pattern */ + index = 0; + + /* the first not void item should be MAC or IPv4 or IPv6 or TCP */ + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_ETH && + item->type != RTE_FLOW_ITEM_TYPE_IPV4 && + item->type != RTE_FLOW_ITEM_TYPE_IPV6 && + item->type != RTE_FLOW_ITEM_TYPE_TCP) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by syn filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + /* Skip Ethernet */ + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { + /* if the item is MAC, the content should be NULL */ + if (item->spec || item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid SYN address mask"); + return -rte_errno; + } + + /* check if the next not void item is IPv4 or IPv6 */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_IPV4 && + item->type != RTE_FLOW_ITEM_TYPE_IPV6) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by syn filter"); + return -rte_errno; + } + } + + /* Skip IP */ + if (item->type == RTE_FLOW_ITEM_TYPE_IPV4 || + item->type == RTE_FLOW_ITEM_TYPE_IPV6) { + /* if the item is IP, the content should be NULL */ + if (item->spec || item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid SYN mask"); + return -rte_errno; + } + + /* check if the next not void item is TCP */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_TCP) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by syn filter"); + return -rte_errno; + } + } + + /* Get the TCP info. Only support SYN. */ + if (!item->spec || !item->mask) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Invalid SYN mask"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + tcp_spec = (const struct rte_flow_item_tcp *)item->spec; + tcp_mask = (const struct rte_flow_item_tcp *)item->mask; + if (!(tcp_spec->hdr.tcp_flags & TCP_SYN_FLAG) || + tcp_mask->hdr.src_port || + tcp_mask->hdr.dst_port || + tcp_mask->hdr.sent_seq || + tcp_mask->hdr.recv_ack || + tcp_mask->hdr.data_off || + tcp_mask->hdr.tcp_flags != TCP_SYN_FLAG || + tcp_mask->hdr.rx_win || + tcp_mask->hdr.cksum || + tcp_mask->hdr.tcp_urp) { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by syn filter"); + return -rte_errno; + } + + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by syn filter"); + return -rte_errno; + } + + /* parse action */ + index = 0; + + /* check if the first not void action is QUEUE. */ + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE) { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + act_q = (const struct rte_flow_action_queue *)act->conf; + filter->queue = act_q->index; + if (filter->queue >= IXGBE_MAX_RX_QUEUE_NUM) { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + /* parse attr */ + /* must be input direction */ + if (!attr->ingress) { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + attr, "Only support ingress."); + return -rte_errno; + } + + /* not supported */ + if (attr->egress) { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + attr, "Not support egress."); + return -rte_errno; + } + + /* Support 2 priorities, the lowest or highest. */ + if (!attr->priority) { + filter->hig_pri = 0; + } else if (attr->priority == (uint32_t)~0U) { + filter->hig_pri = 1; + } else { + memset(filter, 0, sizeof(struct rte_eth_syn_filter)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + attr, "Not support priority."); + return -rte_errno; + } + + return 0; +} + +static int +ixgbe_parse_syn_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_syn_filter *filter, + struct rte_flow_error *error) +{ + int ret; + + ret = cons_parse_syn_filter(attr, pattern, + actions, filter, error); + + if (ret) + return ret; + + return 0; +} + +/** + * Parse the rule to see if it is a L2 tunnel rule. + * And get the L2 tunnel filter info BTW. + * Only support E-tag now. + * pattern: + * The first not void item can be E_TAG. + * The next not void item must be END. + * action: + * The first not void action should be QUEUE. + * The next not void action should be END. + * pattern example: + * ITEM Spec Mask + * E_TAG grp 0x1 0x3 + e_cid_base 0x309 0xFFF + * END + * other members in mask and spec should set to 0x00. + * item->last should be NULL. + */ +static int +cons_parse_l2_tn_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_l2_tunnel_conf *filter, + struct rte_flow_error *error) +{ + const struct rte_flow_item *item; + const struct rte_flow_item_e_tag *e_tag_spec; + const struct rte_flow_item_e_tag *e_tag_mask; + const struct rte_flow_action *act; + const struct rte_flow_action_queue *act_q; + uint32_t index; + + if (!pattern) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -rte_errno; + } + + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -rte_errno; + } + /* parse pattern */ + index = 0; + + /* The first not void item should be e-tag. */ + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_E_TAG) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by L2 tunnel filter"); + return -rte_errno; + } + + if (!item->spec || !item->mask) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by L2 tunnel filter"); + return -rte_errno; + } + + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + e_tag_spec = (const struct rte_flow_item_e_tag *)item->spec; + e_tag_mask = (const struct rte_flow_item_e_tag *)item->mask; + + /* Only care about GRP and E cid base. */ + if (e_tag_mask->epcp_edei_in_ecid_b || + e_tag_mask->in_ecid_e || + e_tag_mask->ecid_e || + e_tag_mask->rsvd_grp_ecid_b != rte_cpu_to_be_16(0x3FFF)) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by L2 tunnel filter"); + return -rte_errno; + } + + filter->l2_tunnel_type = RTE_L2_TUNNEL_TYPE_E_TAG; + /** + * grp and e_cid_base are bit fields and only use 14 bits. + * e-tag id is taken as little endian by HW. + */ + filter->tunnel_id = rte_be_to_cpu_16(e_tag_spec->rsvd_grp_ecid_b); + + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by L2 tunnel filter"); + return -rte_errno; + } + + /* parse attr */ + /* must be input direction */ + if (!attr->ingress) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + attr, "Only support ingress."); + return -rte_errno; + } + + /* not supported */ + if (attr->egress) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + attr, "Not support egress."); + return -rte_errno; + } + + /* not supported */ + if (attr->priority) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + attr, "Not support priority."); + return -rte_errno; + } + + /* parse action */ + index = 0; + + /* check if the first not void action is QUEUE. */ + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + act_q = (const struct rte_flow_action_queue *)act->conf; + filter->pool = act_q->index; + + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + memset(filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + return 0; +} + +static int +ixgbe_validate_l2_tn_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_eth_l2_tunnel_conf *l2_tn_filter, + struct rte_flow_error *error) +{ + int ret = 0; + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); + + ret = cons_parse_l2_tn_filter(attr, pattern, + actions, l2_tn_filter, error); + + if (hw->mac.type != ixgbe_mac_X550 && + hw->mac.type != ixgbe_mac_X550EM_x && + hw->mac.type != ixgbe_mac_X550EM_a) { + memset(l2_tn_filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + NULL, "Not supported by L2 tunnel filter"); + return -rte_errno; + } + + return ret; +} + +/* Parse to get the attr and action info of flow director rule. */ +static int +ixgbe_parse_fdir_act_attr(const struct rte_flow_attr *attr, + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error) +{ + const struct rte_flow_action *act; + const struct rte_flow_action_queue *act_q; + const struct rte_flow_action_mark *mark; + uint32_t index; + + /* parse attr */ + /* must be input direction */ + if (!attr->ingress) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + attr, "Only support ingress."); + return -rte_errno; + } + + /* not supported */ + if (attr->egress) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + attr, "Not support egress."); + return -rte_errno; + } + + /* not supported */ + if (attr->priority) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + attr, "Not support priority."); + return -rte_errno; + } + + /* parse action */ + index = 0; + + /* check if the first not void action is QUEUE or DROP. */ + NEXT_ITEM_OF_ACTION(act, actions, index); + if (act->type != RTE_FLOW_ACTION_TYPE_QUEUE && + act->type != RTE_FLOW_ACTION_TYPE_DROP) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + if (act->type == RTE_FLOW_ACTION_TYPE_QUEUE) { + act_q = (const struct rte_flow_action_queue *)act->conf; + rule->queue = act_q->index; + } else { /* drop */ + rule->fdirflags = IXGBE_FDIRCMD_DROP; + } + + /* check if the next not void item is MARK */ + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + if ((act->type != RTE_FLOW_ACTION_TYPE_MARK) && + (act->type != RTE_FLOW_ACTION_TYPE_END)) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + rule->soft_id = 0; + + if (act->type == RTE_FLOW_ACTION_TYPE_MARK) { + mark = (const struct rte_flow_action_mark *)act->conf; + rule->soft_id = mark->id; + index++; + NEXT_ITEM_OF_ACTION(act, actions, index); + } + + /* check if the next not void item is END */ + if (act->type != RTE_FLOW_ACTION_TYPE_END) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + act, "Not supported action."); + return -rte_errno; + } + + return 0; +} + +/** + * Parse the rule to see if it is a IP or MAC VLAN flow director rule. + * And get the flow director filter info BTW. + * UDP/TCP/SCTP PATTERN: + * The first not void item can be ETH or IPV4. + * The second not void item must be IPV4 if the first one is ETH. + * The third not void item must be UDP or TCP or SCTP. + * The next not void item must be END. + * MAC VLAN PATTERN: + * The first not void item must be ETH. + * The second not void item must be MAC VLAN. + * The next not void item must be END. + * ACTION: + * The first not void action should be QUEUE or DROP. + * The second not void optional action should be MARK, + * mark_id is a uint32_t number. + * The next not void action should be END. + * UDP/TCP/SCTP pattern example: + * ITEM Spec Mask + * ETH NULL NULL + * IPV4 src_addr 192.168.1.20 0xFFFFFFFF + * dst_addr 192.167.3.50 0xFFFFFFFF + * UDP/TCP/SCTP src_port 80 0xFFFF + * dst_port 80 0xFFFF + * END + * MAC VLAN pattern example: + * ITEM Spec Mask + * ETH dst_addr + {0xAC, 0x7B, 0xA1, {0xFF, 0xFF, 0xFF, + 0x2C, 0x6D, 0x36} 0xFF, 0xFF, 0xFF} + * MAC VLAN tci 0x2016 0xFFFF + * tpid 0x8100 0xFFFF + * END + * Other members in mask and spec should set to 0x00. + * Item->last should be NULL. + */ +static int +ixgbe_parse_fdir_filter_normal(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error) +{ + const struct rte_flow_item *item; + const struct rte_flow_item_eth *eth_spec; + const struct rte_flow_item_eth *eth_mask; + const struct rte_flow_item_ipv4 *ipv4_spec; + const struct rte_flow_item_ipv4 *ipv4_mask; + const struct rte_flow_item_tcp *tcp_spec; + const struct rte_flow_item_tcp *tcp_mask; + const struct rte_flow_item_udp *udp_spec; + const struct rte_flow_item_udp *udp_mask; + const struct rte_flow_item_sctp *sctp_spec; + const struct rte_flow_item_sctp *sctp_mask; + const struct rte_flow_item_vlan *vlan_spec; + const struct rte_flow_item_vlan *vlan_mask; + + uint32_t index, j; + + if (!pattern) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -rte_errno; + } + + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -rte_errno; + } + + /** + * Some fields may not be provided. Set spec to 0 and mask to default + * value. So, we need not do anything for the not provided fields later. + */ + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + memset(&rule->mask, 0xFF, sizeof(struct ixgbe_hw_fdir_mask)); + rule->mask.vlan_tci_mask = 0; + + /* parse pattern */ + index = 0; + + /** + * The first not void item should be + * MAC or IPv4 or TCP or UDP or SCTP. + */ + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_ETH && + item->type != RTE_FLOW_ITEM_TYPE_IPV4 && + item->type != RTE_FLOW_ITEM_TYPE_TCP && + item->type != RTE_FLOW_ITEM_TYPE_UDP && + item->type != RTE_FLOW_ITEM_TYPE_SCTP) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rule->mode = RTE_FDIR_MODE_PERFECT; + + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + /* Get the MAC info. */ + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { + /** + * Only support vlan and dst MAC address, + * others should be masked. + */ + if (item->spec && !item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + if (item->spec) { + rule->b_spec = TRUE; + eth_spec = (const struct rte_flow_item_eth *)item->spec; + + /* Get the dst MAC. */ + for (j = 0; j < ETHER_ADDR_LEN; j++) { + rule->ixgbe_fdir.formatted.inner_mac[j] = + eth_spec->dst.addr_bytes[j]; + } + } + + + if (item->mask) { + /* If ethernet has meaning, it means MAC VLAN mode. */ + rule->mode = RTE_FDIR_MODE_PERFECT_MAC_VLAN; + + rule->b_mask = TRUE; + eth_mask = (const struct rte_flow_item_eth *)item->mask; + + /* Ether type should be masked. */ + if (eth_mask->type) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + /** + * src MAC address must be masked, + * and don't support dst MAC address mask. + */ + for (j = 0; j < ETHER_ADDR_LEN; j++) { + if (eth_mask->src.addr_bytes[j] || + eth_mask->dst.addr_bytes[j] != 0xFF) { + memset(rule, 0, + sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /* When no VLAN, considered as full mask. */ + rule->mask.vlan_tci_mask = rte_cpu_to_be_16(0xEFFF); + } + /*** If both spec and mask are item, + * it means don't care about ETH. + * Do nothing. + */ + + /** + * Check if the next not void item is vlan or ipv4. + * IPv6 is not supported. + */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (rule->mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) { + if (item->type != RTE_FLOW_ITEM_TYPE_VLAN) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } else { + if (item->type != RTE_FLOW_ITEM_TYPE_IPV4) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + } + + if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { + if (!(item->spec && item->mask)) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + vlan_spec = (const struct rte_flow_item_vlan *)item->spec; + vlan_mask = (const struct rte_flow_item_vlan *)item->mask; + + if (vlan_spec->tpid != rte_cpu_to_be_16(ETHER_TYPE_VLAN)) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rule->ixgbe_fdir.formatted.vlan_id = vlan_spec->tci; + + if (vlan_mask->tpid != (uint16_t)~0U) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->mask.vlan_tci_mask = vlan_mask->tci; + /* More than one tags are not supported. */ + + /** + * Check if the next not void item is not vlan. + */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } else if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /* Get the IP info. */ + if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { + /** + * Set the flow type even if there's no content + * as we must have a flow type. + */ + rule->ixgbe_fdir.formatted.flow_type = + IXGBE_ATR_FLOW_TYPE_IPV4; + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + /** + * Only care about src & dst addresses, + * others should be masked. + */ + if (!item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->b_mask = TRUE; + ipv4_mask = + (const struct rte_flow_item_ipv4 *)item->mask; + if (ipv4_mask->hdr.version_ihl || + ipv4_mask->hdr.type_of_service || + ipv4_mask->hdr.total_length || + ipv4_mask->hdr.packet_id || + ipv4_mask->hdr.fragment_offset || + ipv4_mask->hdr.time_to_live || + ipv4_mask->hdr.next_proto_id || + ipv4_mask->hdr.hdr_checksum) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->mask.dst_ipv4_mask = ipv4_mask->hdr.dst_addr; + rule->mask.src_ipv4_mask = ipv4_mask->hdr.src_addr; + + if (item->spec) { + rule->b_spec = TRUE; + ipv4_spec = + (const struct rte_flow_item_ipv4 *)item->spec; + rule->ixgbe_fdir.formatted.dst_ip[0] = + ipv4_spec->hdr.dst_addr; + rule->ixgbe_fdir.formatted.src_ip[0] = + ipv4_spec->hdr.src_addr; + } + + /** + * Check if the next not void item is + * TCP or UDP or SCTP or END. + */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_TCP && + item->type != RTE_FLOW_ITEM_TYPE_UDP && + item->type != RTE_FLOW_ITEM_TYPE_SCTP && + item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /* Get the TCP info. */ + if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { + /** + * Set the flow type even if there's no content + * as we must have a flow type. + */ + rule->ixgbe_fdir.formatted.flow_type = + IXGBE_ATR_FLOW_TYPE_TCPV4; + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + /** + * Only care about src & dst ports, + * others should be masked. + */ + if (!item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->b_mask = TRUE; + tcp_mask = (const struct rte_flow_item_tcp *)item->mask; + if (tcp_mask->hdr.sent_seq || + tcp_mask->hdr.recv_ack || + tcp_mask->hdr.data_off || + tcp_mask->hdr.tcp_flags || + tcp_mask->hdr.rx_win || + tcp_mask->hdr.cksum || + tcp_mask->hdr.tcp_urp) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->mask.src_port_mask = tcp_mask->hdr.src_port; + rule->mask.dst_port_mask = tcp_mask->hdr.dst_port; + + if (item->spec) { + rule->b_spec = TRUE; + tcp_spec = (const struct rte_flow_item_tcp *)item->spec; + rule->ixgbe_fdir.formatted.src_port = + tcp_spec->hdr.src_port; + rule->ixgbe_fdir.formatted.dst_port = + tcp_spec->hdr.dst_port; + } + } + + /* Get the UDP info */ + if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { + /** + * Set the flow type even if there's no content + * as we must have a flow type. + */ + rule->ixgbe_fdir.formatted.flow_type = + IXGBE_ATR_FLOW_TYPE_UDPV4; + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + /** + * Only care about src & dst ports, + * others should be masked. + */ + if (!item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->b_mask = TRUE; + udp_mask = (const struct rte_flow_item_udp *)item->mask; + if (udp_mask->hdr.dgram_len || + udp_mask->hdr.dgram_cksum) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->mask.src_port_mask = udp_mask->hdr.src_port; + rule->mask.dst_port_mask = udp_mask->hdr.dst_port; + + if (item->spec) { + rule->b_spec = TRUE; + udp_spec = (const struct rte_flow_item_udp *)item->spec; + rule->ixgbe_fdir.formatted.src_port = + udp_spec->hdr.src_port; + rule->ixgbe_fdir.formatted.dst_port = + udp_spec->hdr.dst_port; + } + } + + /* Get the SCTP info */ + if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { + /** + * Set the flow type even if there's no content + * as we must have a flow type. + */ + rule->ixgbe_fdir.formatted.flow_type = + IXGBE_ATR_FLOW_TYPE_SCTPV4; + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + /** + * Only care about src & dst ports, + * others should be masked. + */ + if (!item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->b_mask = TRUE; + sctp_mask = + (const struct rte_flow_item_sctp *)item->mask; + if (sctp_mask->hdr.tag || + sctp_mask->hdr.cksum) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->mask.src_port_mask = sctp_mask->hdr.src_port; + rule->mask.dst_port_mask = sctp_mask->hdr.dst_port; + + if (item->spec) { + rule->b_spec = TRUE; + sctp_spec = + (const struct rte_flow_item_sctp *)item->spec; + rule->ixgbe_fdir.formatted.src_port = + sctp_spec->hdr.src_port; + rule->ixgbe_fdir.formatted.dst_port = + sctp_spec->hdr.dst_port; + } + } + + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + return ixgbe_parse_fdir_act_attr(attr, actions, rule, error); +} + +#define NVGRE_PROTOCOL 0x6558 + +/** + * Parse the rule to see if it is a VxLAN or NVGRE flow director rule. + * And get the flow director filter info BTW. + * VxLAN PATTERN: + * The first not void item must be ETH. + * The second not void item must be IPV4/ IPV6. + * The third not void item must be NVGRE. + * The next not void item must be END. + * NVGRE PATTERN: + * The first not void item must be ETH. + * The second not void item must be IPV4/ IPV6. + * The third not void item must be NVGRE. + * The next not void item must be END. + * ACTION: + * The first not void action should be QUEUE or DROP. + * The second not void optional action should be MARK, + * mark_id is a uint32_t number. + * The next not void action should be END. + * VxLAN pattern example: + * ITEM Spec Mask + * ETH NULL NULL + * IPV4/IPV6 NULL NULL + * UDP NULL NULL + * VxLAN vni{0x00, 0x32, 0x54} {0xFF, 0xFF, 0xFF} + * END + * NEGRV pattern example: + * ITEM Spec Mask + * ETH NULL NULL + * IPV4/IPV6 NULL NULL + * NVGRE protocol 0x6558 0xFFFF + * tni{0x00, 0x32, 0x54} {0xFF, 0xFF, 0xFF} + * END + * other members in mask and spec should set to 0x00. + * item->last should be NULL. + */ +static int +ixgbe_parse_fdir_filter_tunnel(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error) +{ + const struct rte_flow_item *item; + const struct rte_flow_item_vxlan *vxlan_spec; + const struct rte_flow_item_vxlan *vxlan_mask; + const struct rte_flow_item_nvgre *nvgre_spec; + const struct rte_flow_item_nvgre *nvgre_mask; + const struct rte_flow_item_eth *eth_spec; + const struct rte_flow_item_eth *eth_mask; + const struct rte_flow_item_vlan *vlan_spec; + const struct rte_flow_item_vlan *vlan_mask; + uint32_t index, j; + + if (!pattern) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM_NUM, + NULL, "NULL pattern."); + return -rte_errno; + } + + if (!actions) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION_NUM, + NULL, "NULL action."); + return -rte_errno; + } + + if (!attr) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ATTR, + NULL, "NULL attribute."); + return -rte_errno; + } + + /** + * Some fields may not be provided. Set spec to 0 and mask to default + * value. So, we need not do anything for the not provided fields later. + */ + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + memset(&rule->mask, 0xFF, sizeof(struct ixgbe_hw_fdir_mask)); + rule->mask.vlan_tci_mask = 0; + + /* parse pattern */ + index = 0; + + /** + * The first not void item should be + * MAC or IPv4 or IPv6 or UDP or VxLAN. + */ + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_ETH && + item->type != RTE_FLOW_ITEM_TYPE_IPV4 && + item->type != RTE_FLOW_ITEM_TYPE_IPV6 && + item->type != RTE_FLOW_ITEM_TYPE_UDP && + item->type != RTE_FLOW_ITEM_TYPE_VXLAN && + item->type != RTE_FLOW_ITEM_TYPE_NVGRE) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rule->mode = RTE_FDIR_MODE_PERFECT_TUNNEL; + + /* Skip MAC. */ + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { + /* Only used to describe the protocol stack. */ + if (item->spec || item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + /* Check if the next not void item is IPv4 or IPv6. */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_IPV4 && + item->type != RTE_FLOW_ITEM_TYPE_IPV6) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /* Skip IP. */ + if (item->type == RTE_FLOW_ITEM_TYPE_IPV4 || + item->type == RTE_FLOW_ITEM_TYPE_IPV6) { + /* Only used to describe the protocol stack. */ + if (item->spec || item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + /* Check if the next not void item is UDP or NVGRE. */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_UDP && + item->type != RTE_FLOW_ITEM_TYPE_NVGRE) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /* Skip UDP. */ + if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { + /* Only used to describe the protocol stack. */ + if (item->spec || item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + /* Check if the next not void item is VxLAN. */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_VXLAN) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /* Get the VxLAN info */ + if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN) { + rule->ixgbe_fdir.formatted.tunnel_type = + RTE_FDIR_TUNNEL_TYPE_VXLAN; + + /* Only care about VNI, others should be masked. */ + if (!item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + rule->b_mask = TRUE; + + /* Tunnel type is always meaningful. */ + rule->mask.tunnel_type_mask = 1; + + vxlan_mask = + (const struct rte_flow_item_vxlan *)item->mask; + if (vxlan_mask->flags) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /* VNI must be totally masked or not. */ + if ((vxlan_mask->vni[0] || vxlan_mask->vni[1] || + vxlan_mask->vni[2]) && + ((vxlan_mask->vni[0] != 0xFF) || + (vxlan_mask->vni[1] != 0xFF) || + (vxlan_mask->vni[2] != 0xFF))) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rte_memcpy(&rule->mask.tunnel_id_mask, vxlan_mask->vni, + RTE_DIM(vxlan_mask->vni)); + rule->mask.tunnel_id_mask <<= 8; + + if (item->spec) { + rule->b_spec = TRUE; + vxlan_spec = (const struct rte_flow_item_vxlan *) + item->spec; + rte_memcpy(&rule->ixgbe_fdir.formatted.tni_vni, + vxlan_spec->vni, RTE_DIM(vxlan_spec->vni)); + rule->ixgbe_fdir.formatted.tni_vni <<= 8; + } + } + + /* Get the NVGRE info */ + if (item->type == RTE_FLOW_ITEM_TYPE_NVGRE) { + rule->ixgbe_fdir.formatted.tunnel_type = + RTE_FDIR_TUNNEL_TYPE_NVGRE; + + /** + * Only care about flags0, flags1, protocol and TNI, + * others should be masked. + */ + if (!item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + rule->b_mask = TRUE; + + /* Tunnel type is always meaningful. */ + rule->mask.tunnel_type_mask = 1; + + nvgre_mask = + (const struct rte_flow_item_nvgre *)item->mask; + if (nvgre_mask->flow_id) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + if (nvgre_mask->c_k_s_rsvd0_ver != + rte_cpu_to_be_16(0x3000) || + nvgre_mask->protocol != 0xFFFF) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /* TNI must be totally masked or not. */ + if (nvgre_mask->tni[0] && + ((nvgre_mask->tni[0] != 0xFF) || + (nvgre_mask->tni[1] != 0xFF) || + (nvgre_mask->tni[2] != 0xFF))) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /* tni is a 24-bits bit field */ + rte_memcpy(&rule->mask.tunnel_id_mask, nvgre_mask->tni, + RTE_DIM(nvgre_mask->tni)); + rule->mask.tunnel_id_mask <<= 8; + + if (item->spec) { + rule->b_spec = TRUE; + nvgre_spec = + (const struct rte_flow_item_nvgre *)item->spec; + if (nvgre_spec->c_k_s_rsvd0_ver != + rte_cpu_to_be_16(0x2000) || + nvgre_spec->protocol != + rte_cpu_to_be_16(NVGRE_PROTOCOL)) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /* tni is a 24-bits bit field */ + rte_memcpy(&rule->ixgbe_fdir.formatted.tni_vni, + nvgre_spec->tni, RTE_DIM(nvgre_spec->tni)); + rule->ixgbe_fdir.formatted.tni_vni <<= 8; + } + } + + /* check if the next not void item is MAC */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_ETH) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + /** + * Only support vlan and dst MAC address, + * others should be masked. + */ + + if (!item->mask) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + rule->b_mask = TRUE; + eth_mask = (const struct rte_flow_item_eth *)item->mask; + + /* Ether type should be masked. */ + if (eth_mask->type) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + /* src MAC address should be masked. */ + for (j = 0; j < ETHER_ADDR_LEN; j++) { + if (eth_mask->src.addr_bytes[j]) { + memset(rule, 0, + sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + rule->mask.mac_addr_byte_mask = 0; + for (j = 0; j < ETHER_ADDR_LEN; j++) { + /* It's a per byte mask. */ + if (eth_mask->dst.addr_bytes[j] == 0xFF) { + rule->mask.mac_addr_byte_mask |= 0x1 << j; + } else if (eth_mask->dst.addr_bytes[j]) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /* When no vlan, considered as full mask. */ + rule->mask.vlan_tci_mask = rte_cpu_to_be_16(0xEFFF); + + if (item->spec) { + rule->b_spec = TRUE; + eth_spec = (const struct rte_flow_item_eth *)item->spec; + + /* Get the dst MAC. */ + for (j = 0; j < ETHER_ADDR_LEN; j++) { + rule->ixgbe_fdir.formatted.inner_mac[j] = + eth_spec->dst.addr_bytes[j]; + } + } + + /** + * Check if the next not void item is vlan or ipv4. + * IPv6 is not supported. + */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if ((item->type != RTE_FLOW_ITEM_TYPE_VLAN) && + (item->type != RTE_FLOW_ITEM_TYPE_VLAN)) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /*Not supported last point for range*/ + if (item->last) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + item, "Not supported last point for range"); + return -rte_errno; + } + + if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { + if (!(item->spec && item->mask)) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + vlan_spec = (const struct rte_flow_item_vlan *)item->spec; + vlan_mask = (const struct rte_flow_item_vlan *)item->mask; + + if (vlan_spec->tpid != rte_cpu_to_be_16(ETHER_TYPE_VLAN)) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + + rule->ixgbe_fdir.formatted.vlan_id = vlan_spec->tci; + + if (vlan_mask->tpid != (uint16_t)~0U) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + rule->mask.vlan_tci_mask = vlan_mask->tci; + /* More than one tags are not supported. */ + + /** + * Check if the next not void item is not vlan. + */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } else if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + /* check if the next not void item is END */ + index++; + NEXT_ITEM_OF_PATTERN(item, pattern, index); + if (item->type != RTE_FLOW_ITEM_TYPE_END) { + memset(rule, 0, sizeof(struct ixgbe_fdir_rule)); + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ITEM, + item, "Not supported by fdir filter"); + return -rte_errno; + } + } + + /** + * If the tags is 0, it means don't care about the VLAN. + * Do nothing. + */ + + return ixgbe_parse_fdir_act_attr(attr, actions, rule, error); +} + +static int +ixgbe_validate_fdir_filter(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error) +{ + int ret = 0; + + enum rte_fdir_mode fdir_mode = dev->data->dev_conf.fdir_conf.mode; + + ixgbe_parse_fdir_filter(attr, pattern, actions, + rule, error); + + + if (fdir_mode == RTE_FDIR_MODE_NONE || + fdir_mode != rule->mode) + return -ENOTSUP; + + return ret; +} + +static int +ixgbe_parse_fdir_filter(const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct ixgbe_fdir_rule *rule, + struct rte_flow_error *error) +{ + int ret; + + ret = ixgbe_parse_fdir_filter_normal(attr, pattern, + actions, rule, error); + + if (!ret) + return 0; + + ret = ixgbe_parse_fdir_filter_tunnel(attr, pattern, + actions, rule, error); + + return ret; +} + +void +ixgbe_filterlist_flush(void) +{ + struct ixgbe_ntuple_filter_ele *ntuple_filter_ptr; + struct ixgbe_ethertype_filter_ele *ethertype_filter_ptr; + struct ixgbe_eth_syn_filter_ele *syn_filter_ptr; + struct ixgbe_eth_l2_tunnel_conf_ele *l2_tn_filter_ptr; + struct ixgbe_fdir_rule_ele *fdir_rule_ptr; + struct ixgbe_flow_mem *ixgbe_flow_mem_ptr; + + while ((ntuple_filter_ptr = TAILQ_FIRST(&filter_ntuple_list))) { + TAILQ_REMOVE(&filter_ntuple_list, + ntuple_filter_ptr, + entries); + rte_free(ntuple_filter_ptr); + } + + while ((ethertype_filter_ptr = TAILQ_FIRST(&filter_ethertype_list))) { + TAILQ_REMOVE(&filter_ethertype_list, + ethertype_filter_ptr, + entries); + rte_free(ethertype_filter_ptr); + } + + while ((syn_filter_ptr = TAILQ_FIRST(&filter_syn_list))) { + TAILQ_REMOVE(&filter_syn_list, + syn_filter_ptr, + entries); + rte_free(syn_filter_ptr); + } + + while ((l2_tn_filter_ptr = TAILQ_FIRST(&filter_l2_tunnel_list))) { + TAILQ_REMOVE(&filter_l2_tunnel_list, + l2_tn_filter_ptr, + entries); + rte_free(l2_tn_filter_ptr); + } + + while ((fdir_rule_ptr = TAILQ_FIRST(&filter_fdir_list))) { + TAILQ_REMOVE(&filter_fdir_list, + fdir_rule_ptr, + entries); + rte_free(fdir_rule_ptr); + } + + while ((ixgbe_flow_mem_ptr = TAILQ_FIRST(&ixgbe_flow_list))) { + TAILQ_REMOVE(&ixgbe_flow_list, + ixgbe_flow_mem_ptr, + entries); + rte_free(ixgbe_flow_mem_ptr->flow); + rte_free(ixgbe_flow_mem_ptr); + } +} + +/** + * Create or destroy a flow rule. + * Theorically one rule can match more than one filters. + * We will let it use the filter which it hitt first. + * So, the sequence matters. + */ +static struct rte_flow * +ixgbe_flow_create(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + int ret; + struct rte_eth_ntuple_filter ntuple_filter; + struct rte_eth_ethertype_filter ethertype_filter; + struct rte_eth_syn_filter syn_filter; + struct ixgbe_fdir_rule fdir_rule; + struct rte_eth_l2_tunnel_conf l2_tn_filter; + struct ixgbe_hw_fdir_info *fdir_info = + IXGBE_DEV_PRIVATE_TO_FDIR_INFO(dev->data->dev_private); + struct rte_flow *flow = NULL; + struct ixgbe_ntuple_filter_ele *ntuple_filter_ptr; + struct ixgbe_ethertype_filter_ele *ethertype_filter_ptr; + struct ixgbe_eth_syn_filter_ele *syn_filter_ptr; + struct ixgbe_eth_l2_tunnel_conf_ele *l2_tn_filter_ptr; + struct ixgbe_fdir_rule_ele *fdir_rule_ptr; + struct ixgbe_flow_mem *ixgbe_flow_mem_ptr; + + flow = rte_zmalloc("ixgbe_rte_flow", sizeof(struct rte_flow), 0); + if (!flow) { + PMD_DRV_LOG(ERR, "failed to allocate memory"); + return (struct rte_flow *)flow; + } + ixgbe_flow_mem_ptr = rte_zmalloc("ixgbe_flow_mem", + sizeof(struct ixgbe_flow_mem), 0); + if (!ixgbe_flow_mem_ptr) { + PMD_DRV_LOG(ERR, "failed to allocate memory"); + rte_free(flow); + return NULL; + } + ixgbe_flow_mem_ptr->flow = flow; + TAILQ_INSERT_TAIL(&ixgbe_flow_list, + ixgbe_flow_mem_ptr, entries); + + memset(&ntuple_filter, 0, sizeof(struct rte_eth_ntuple_filter)); + ret = ixgbe_parse_ntuple_filter(attr, pattern, + actions, &ntuple_filter, error); + if (!ret) { + ret = ixgbe_add_del_ntuple_filter(dev, &ntuple_filter, TRUE); + if (!ret) { + ntuple_filter_ptr = rte_zmalloc("ixgbe_ntuple_filter", + sizeof(struct ixgbe_ntuple_filter_ele), 0); + (void)rte_memcpy(&ntuple_filter_ptr->filter_info, + &ntuple_filter, + sizeof(struct rte_eth_ntuple_filter)); + TAILQ_INSERT_TAIL(&filter_ntuple_list, + ntuple_filter_ptr, entries); + flow->rule = ntuple_filter_ptr; + flow->filter_type = RTE_ETH_FILTER_NTUPLE; + return flow; + } + goto out; + } + + memset(ðertype_filter, 0, sizeof(struct rte_eth_ethertype_filter)); + ret = ixgbe_parse_ethertype_filter(attr, pattern, + actions, ðertype_filter, error); + if (!ret) { + ret = ixgbe_add_del_ethertype_filter(dev, + ðertype_filter, TRUE); + if (!ret) { + ethertype_filter_ptr = rte_zmalloc( + "ixgbe_ethertype_filter", + sizeof(struct ixgbe_ethertype_filter_ele), 0); + (void)rte_memcpy(ðertype_filter_ptr->filter_info, + ðertype_filter, + sizeof(struct rte_eth_ethertype_filter)); + TAILQ_INSERT_TAIL(&filter_ethertype_list, + ethertype_filter_ptr, entries); + flow->rule = ethertype_filter_ptr; + flow->filter_type = RTE_ETH_FILTER_ETHERTYPE; + return flow; + } + goto out; + } + + memset(&syn_filter, 0, sizeof(struct rte_eth_syn_filter)); + ret = cons_parse_syn_filter(attr, pattern, actions, &syn_filter, error); + if (!ret) { + ret = ixgbe_syn_filter_set(dev, &syn_filter, TRUE); + if (!ret) { + syn_filter_ptr = rte_zmalloc("ixgbe_syn_filter", + sizeof(struct ixgbe_eth_syn_filter_ele), 0); + (void)rte_memcpy(&syn_filter_ptr->filter_info, + &syn_filter, + sizeof(struct rte_eth_syn_filter)); + TAILQ_INSERT_TAIL(&filter_syn_list, + syn_filter_ptr, + entries); + flow->rule = syn_filter_ptr; + flow->filter_type = RTE_ETH_FILTER_SYN; + return flow; + } + goto out; + } + + memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); + ret = ixgbe_parse_fdir_filter(attr, pattern, + actions, &fdir_rule, error); + if (!ret) { + /* A mask cannot be deleted. */ + if (fdir_rule.b_mask) { + if (!fdir_info->mask_added) { + /* It's the first time the mask is set. */ + rte_memcpy(&fdir_info->mask, + &fdir_rule.mask, + sizeof(struct ixgbe_hw_fdir_mask)); + ret = ixgbe_fdir_set_input_mask(dev); + if (ret) + goto out; + + fdir_info->mask_added = TRUE; + } else { + /** + * Only support one global mask, + * all the masks should be the same. + */ + ret = memcmp(&fdir_info->mask, + &fdir_rule.mask, + sizeof(struct ixgbe_hw_fdir_mask)); + if (ret) + goto out; + } + } + + if (fdir_rule.b_spec) { + ret = ixgbe_fdir_filter_program(dev, &fdir_rule, + FALSE, FALSE); + if (!ret) { + fdir_rule_ptr = rte_zmalloc("ixgbe_fdir_filter", + sizeof(struct ixgbe_fdir_rule_ele), 0); + (void)rte_memcpy(&fdir_rule_ptr->filter_info, + &fdir_rule, + sizeof(struct ixgbe_fdir_rule)); + TAILQ_INSERT_TAIL(&filter_fdir_list, + fdir_rule_ptr, entries); + flow->rule = fdir_rule_ptr; + flow->filter_type = RTE_ETH_FILTER_FDIR; + + return flow; + } + + if (ret) + goto out; + } + + goto out; + } + + memset(&l2_tn_filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + ret = cons_parse_l2_tn_filter(attr, pattern, + actions, &l2_tn_filter, error); + if (!ret) { + ret = ixgbe_dev_l2_tunnel_filter_add(dev, &l2_tn_filter, FALSE); + if (!ret) { + l2_tn_filter_ptr = rte_zmalloc("ixgbe_l2_tn_filter", + sizeof(struct ixgbe_eth_l2_tunnel_conf_ele), 0); + (void)rte_memcpy(&l2_tn_filter_ptr->filter_info, + &l2_tn_filter, + sizeof(struct rte_eth_l2_tunnel_conf)); + TAILQ_INSERT_TAIL(&filter_l2_tunnel_list, + l2_tn_filter_ptr, entries); + flow->rule = l2_tn_filter_ptr; + flow->filter_type = RTE_ETH_FILTER_L2_TUNNEL; + return flow; + } + } + +out: + TAILQ_REMOVE(&ixgbe_flow_list, + ixgbe_flow_mem_ptr, entries); + rte_free(ixgbe_flow_mem_ptr); + rte_free(flow); + return NULL; +} + +/** + * Check if the flow rule is supported by ixgbe. + * It only checkes the format. Don't guarantee the rule can be programmed into + * the HW. Because there can be no enough room for the rule. + */ +static int +ixgbe_flow_validate(__rte_unused struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item pattern[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct rte_eth_ntuple_filter ntuple_filter; + struct rte_eth_ethertype_filter ethertype_filter; + struct rte_eth_syn_filter syn_filter; + struct rte_eth_l2_tunnel_conf l2_tn_filter; + struct ixgbe_fdir_rule fdir_rule; + int ret; + + memset(&ntuple_filter, 0, sizeof(struct rte_eth_ntuple_filter)); + ret = ixgbe_parse_ntuple_filter(attr, pattern, + actions, &ntuple_filter, error); + if (!ret) + return 0; + + memset(ðertype_filter, 0, sizeof(struct rte_eth_ethertype_filter)); + ret = ixgbe_parse_ethertype_filter(attr, pattern, + actions, ðertype_filter, error); + if (!ret) + return 0; + + memset(&syn_filter, 0, sizeof(struct rte_eth_syn_filter)); + ret = ixgbe_parse_syn_filter(attr, pattern, + actions, &syn_filter, error); + if (!ret) + return 0; + + memset(&fdir_rule, 0, sizeof(struct ixgbe_fdir_rule)); + ret = ixgbe_validate_fdir_filter(dev, attr, pattern, + actions, &fdir_rule, error); + if (!ret) + return 0; + + memset(&l2_tn_filter, 0, sizeof(struct rte_eth_l2_tunnel_conf)); + ret = ixgbe_validate_l2_tn_filter(dev, attr, pattern, + actions, &l2_tn_filter, error); + + return ret; +} + +/* Destroy a flow rule on ixgbe. */ +static int +ixgbe_flow_destroy(struct rte_eth_dev *dev, + struct rte_flow *flow, + struct rte_flow_error *error) +{ + int ret; + struct rte_flow *pmd_flow = flow; + enum rte_filter_type filter_type = pmd_flow->filter_type; + struct rte_eth_ntuple_filter ntuple_filter; + struct rte_eth_ethertype_filter ethertype_filter; + struct rte_eth_syn_filter syn_filter; + struct ixgbe_fdir_rule fdir_rule; + struct rte_eth_l2_tunnel_conf l2_tn_filter; + struct ixgbe_ntuple_filter_ele *ntuple_filter_ptr; + struct ixgbe_ethertype_filter_ele *ethertype_filter_ptr; + struct ixgbe_eth_syn_filter_ele *syn_filter_ptr; + struct ixgbe_eth_l2_tunnel_conf_ele *l2_tn_filter_ptr; + struct ixgbe_fdir_rule_ele *fdir_rule_ptr; + struct ixgbe_flow_mem *ixgbe_flow_mem_ptr; + + switch (filter_type) { + case RTE_ETH_FILTER_NTUPLE: + ntuple_filter_ptr = (struct ixgbe_ntuple_filter_ele *) + pmd_flow->rule; + (void)rte_memcpy(&ntuple_filter, + &ntuple_filter_ptr->filter_info, + sizeof(struct rte_eth_ntuple_filter)); + ret = ixgbe_add_del_ntuple_filter(dev, &ntuple_filter, FALSE); + if (!ret) { + TAILQ_REMOVE(&filter_ntuple_list, + ntuple_filter_ptr, entries); + rte_free(ntuple_filter_ptr); + } + break; + case RTE_ETH_FILTER_ETHERTYPE: + ethertype_filter_ptr = (struct ixgbe_ethertype_filter_ele *) + pmd_flow->rule; + (void)rte_memcpy(ðertype_filter, + ðertype_filter_ptr->filter_info, + sizeof(struct rte_eth_ethertype_filter)); + ret = ixgbe_add_del_ethertype_filter(dev, + ðertype_filter, FALSE); + if (!ret) { + TAILQ_REMOVE(&filter_ethertype_list, + ethertype_filter_ptr, entries); + rte_free(ethertype_filter_ptr); + } + break; + case RTE_ETH_FILTER_SYN: + syn_filter_ptr = (struct ixgbe_eth_syn_filter_ele *) + pmd_flow->rule; + (void)rte_memcpy(&syn_filter, + &syn_filter_ptr->filter_info, + sizeof(struct rte_eth_syn_filter)); + ret = ixgbe_syn_filter_set(dev, &syn_filter, FALSE); + if (!ret) { + TAILQ_REMOVE(&filter_syn_list, + syn_filter_ptr, entries); + rte_free(syn_filter_ptr); + } + break; + case RTE_ETH_FILTER_FDIR: + fdir_rule_ptr = (struct ixgbe_fdir_rule_ele *)pmd_flow->rule; + (void)rte_memcpy(&fdir_rule, + &fdir_rule_ptr->filter_info, + sizeof(struct ixgbe_fdir_rule)); + ret = ixgbe_fdir_filter_program(dev, &fdir_rule, TRUE, FALSE); + if (!ret) { + TAILQ_REMOVE(&filter_fdir_list, + fdir_rule_ptr, entries); + rte_free(fdir_rule_ptr); + } + break; + case RTE_ETH_FILTER_L2_TUNNEL: + l2_tn_filter_ptr = (struct ixgbe_eth_l2_tunnel_conf_ele *) + pmd_flow->rule; + (void)rte_memcpy(&l2_tn_filter, &l2_tn_filter_ptr->filter_info, + sizeof(struct rte_eth_l2_tunnel_conf)); + ret = ixgbe_dev_l2_tunnel_filter_del(dev, &l2_tn_filter); + if (!ret) { + TAILQ_REMOVE(&filter_l2_tunnel_list, + l2_tn_filter_ptr, entries); + rte_free(l2_tn_filter_ptr); + } + break; + default: + PMD_DRV_LOG(WARNING, "Filter type (%d) not supported", + filter_type); + ret = -EINVAL; + break; + } + + if (ret) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "Failed to destroy flow"); + return ret; + } + + TAILQ_FOREACH(ixgbe_flow_mem_ptr, &ixgbe_flow_list, entries) { + if (ixgbe_flow_mem_ptr->flow == pmd_flow) { + TAILQ_REMOVE(&ixgbe_flow_list, + ixgbe_flow_mem_ptr, entries); + rte_free(ixgbe_flow_mem_ptr); + } + } + rte_free(flow); + + return ret; +} + +/* Destroy all flow rules associated with a port on ixgbe. */ +static int +ixgbe_flow_flush(struct rte_eth_dev *dev, + struct rte_flow_error *error) +{ + int ret = 0; + + ixgbe_clear_all_ntuple_filter(dev); + ixgbe_clear_all_ethertype_filter(dev); + ixgbe_clear_syn_filter(dev); + + ret = ixgbe_clear_all_fdir_filter(dev); + if (ret < 0) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "Failed to flush rule"); + return ret; + } + + ret = ixgbe_clear_all_l2_tn_filter(dev); + if (ret < 0) { + rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "Failed to flush rule"); + return ret; + } + + ixgbe_filterlist_flush(); + + return 0; +} diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_pf.c b/src/dpdk/drivers/net/ixgbe/ixgbe_pf.c index 56393ff2..4715045f 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_pf.c +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_pf.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -51,6 +51,7 @@ #include "base/ixgbe_common.h" #include "ixgbe_ethdev.h" +#include "rte_pmd_ixgbe.h" #define IXGBE_MAX_VFTA (128) #define IXGBE_VF_MSG_SIZE_DEFAULT 1 @@ -60,7 +61,9 @@ static inline uint16_t dev_num_vf(struct rte_eth_dev *eth_dev) { - return eth_dev->pci_dev->max_vfs; + struct rte_pci_device *pci_dev = IXGBE_DEV_TO_PCI(eth_dev); + + return pci_dev->max_vfs; } static inline @@ -175,6 +178,7 @@ ixgbe_add_tx_flow_control_drop_filter(struct rte_eth_dev *eth_dev) IXGBE_DEV_PRIVATE_TO_FILTER_INFO(eth_dev->data->dev_private); uint16_t vf_num; int i; + struct ixgbe_ethertype_filter ethertype_filter; if (!hw->mac.ops.set_ethertype_anti_spoofing) { RTE_LOG(INFO, PMD, "ether type anti-spoofing is not" @@ -182,16 +186,23 @@ ixgbe_add_tx_flow_control_drop_filter(struct rte_eth_dev *eth_dev) return; } - /* occupy an entity of ether type filter */ - for (i = 0; i < IXGBE_MAX_ETQF_FILTERS; i++) { - if (!(filter_info->ethertype_mask & (1 << i))) { - filter_info->ethertype_mask |= 1 << i; - filter_info->ethertype_filters[i] = - IXGBE_ETHERTYPE_FLOW_CTRL; - break; - } + i = ixgbe_ethertype_filter_lookup(filter_info, + IXGBE_ETHERTYPE_FLOW_CTRL); + if (i >= 0) { + RTE_LOG(ERR, PMD, "A ether type filter" + " entity for flow control already exists!\n"); + return; } - if (i == IXGBE_MAX_ETQF_FILTERS) { + + ethertype_filter.ethertype = IXGBE_ETHERTYPE_FLOW_CTRL; + ethertype_filter.etqf = IXGBE_ETQF_FILTER_EN | + IXGBE_ETQF_TX_ANTISPOOF | + IXGBE_ETHERTYPE_FLOW_CTRL; + ethertype_filter.etqs = 0; + ethertype_filter.conf = TRUE; + i = ixgbe_ethertype_filter_insert(filter_info, + ðertype_filter); + if (i < 0) { RTE_LOG(ERR, PMD, "Cannot find an unused ether type filter" " entity for flow control.\n"); return; @@ -660,6 +671,7 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf) struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); struct ixgbe_vf_info *vfinfo = *IXGBE_DEV_PRIVATE_TO_P_VFDATA(dev->data->dev_private); + struct rte_pmd_ixgbe_mb_event_param cb_param; retval = ixgbe_read_mbx(hw, msgbuf, mbx_size, vf); if (retval) { @@ -674,27 +686,54 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf) /* flush the ack before we write any messages back */ IXGBE_WRITE_FLUSH(hw); + /** + * initialise structure to send to user application + * will return response from user in retval field + */ + cb_param.retval = RTE_PMD_IXGBE_MB_EVENT_PROCEED; + cb_param.vfid = vf; + cb_param.msg_type = msgbuf[0] & 0xFFFF; + cb_param.msg = (void *)msgbuf; + /* perform VF reset */ if (msgbuf[0] == IXGBE_VF_RESET) { int ret = ixgbe_vf_reset(dev, vf, msgbuf); vfinfo[vf].clear_to_send = true; + + /* notify application about VF reset */ + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_VF_MBOX, &cb_param); return ret; } + /** + * ask user application if we allowed to perform those functions + * if we get cb_param.retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED + * then business as usual, + * if 0, do nothing and send ACK to VF + * if cb_param.retval > 1, do nothing and send NAK to VF + */ + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_VF_MBOX, &cb_param); + + retval = cb_param.retval; + /* check & process VF to PF mailbox message */ switch ((msgbuf[0] & 0xFFFF)) { case IXGBE_VF_SET_MAC_ADDR: - retval = ixgbe_vf_set_mac_addr(dev, vf, msgbuf); + if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED) + retval = ixgbe_vf_set_mac_addr(dev, vf, msgbuf); break; case IXGBE_VF_SET_MULTICAST: - retval = ixgbe_vf_set_multicast(dev, vf, msgbuf); + if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED) + retval = ixgbe_vf_set_multicast(dev, vf, msgbuf); break; case IXGBE_VF_SET_LPE: - retval = ixgbe_set_vf_lpe(dev, vf, msgbuf); + if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED) + retval = ixgbe_set_vf_lpe(dev, vf, msgbuf); break; case IXGBE_VF_SET_VLAN: - retval = ixgbe_vf_set_vlan(dev, vf, msgbuf); + if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED) + retval = ixgbe_vf_set_vlan(dev, vf, msgbuf); break; case IXGBE_VF_API_NEGOTIATE: retval = ixgbe_negotiate_vf_api(dev, vf, msgbuf); @@ -704,7 +743,8 @@ ixgbe_rcv_msg_from_vf(struct rte_eth_dev *dev, uint16_t vf) msg_size = IXGBE_VF_GET_QUEUE_MSG_SIZE; break; case IXGBE_VF_UPDATE_XCAST_MODE: - retval = ixgbe_set_vf_mc_promisc(dev, vf, msgbuf); + if (retval == RTE_PMD_IXGBE_MB_EVENT_PROCEED) + retval = ixgbe_set_vf_mc_promisc(dev, vf, msgbuf); break; default: PMD_DRV_LOG(DEBUG, "Unhandled Msg %8.8x", (unsigned)msgbuf[0]); diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_regs.h b/src/dpdk/drivers/net/ixgbe/ixgbe_regs.h index c7457a6f..2aa48201 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_regs.h +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_regs.h @@ -41,7 +41,7 @@ struct reg_info { uint32_t count; uint32_t stride; const char *name; -} reg_info; +}; static const struct reg_info ixgbe_regs_general[] = { {IXGBE_CTRL, 1, 1, "IXGBE_CTRL"}, @@ -56,10 +56,10 @@ static const struct reg_info ixgbe_regs_general[] = { }; static const struct reg_info ixgbevf_regs_general[] = { - {IXGBE_CTRL, 1, 1, "IXGBE_CTRL"}, - {IXGBE_STATUS, 1, 1, "IXGBE_STATUS"}, + {IXGBE_VFCTRL, 1, 1, "IXGBE_VFCTRL"}, + {IXGBE_VFSTATUS, 1, 1, "IXGBE_VFSTATUS"}, {IXGBE_VFLINKS, 1, 1, "IXGBE_VFLINKS"}, - {IXGBE_FRTIMER, 1, 1, "IXGBE_FRTIMER"}, + {IXGBE_VFFRTIMER, 1, 1, "IXGBE_VFFRTIMER"}, {IXGBE_VFMAILBOX, 1, 1, "IXGBE_VFMAILBOX"}, {IXGBE_VFMBMEM, 16, 4, "IXGBE_VFMBMEM"}, {IXGBE_VFRXMEMWRAP, 1, 1, "IXGBE_VFRXMEMWRAP"}, @@ -145,17 +145,17 @@ static const struct reg_info ixgbe_regs_rxdma[] = { }; static const struct reg_info ixgbevf_regs_rxdma[] = { - {IXGBE_RDBAL(0), 8, 0x40, "IXGBE_RDBAL"}, - {IXGBE_RDBAH(0), 8, 0x40, "IXGBE_RDBAH"}, - {IXGBE_RDLEN(0), 8, 0x40, "IXGBE_RDLEN"}, - {IXGBE_RDH(0), 8, 0x40, "IXGBE_RDH"}, - {IXGBE_RDT(0), 8, 0x40, "IXGBE_RDT"}, - {IXGBE_RXDCTL(0), 8, 0x40, "IXGBE_RXDCTL"}, - {IXGBE_SRRCTL(0), 8, 0x40, "IXGBE_SRRCTL"}, + {IXGBE_VFRDBAL(0), 8, 0x40, "IXGBE_VFRDBAL"}, + {IXGBE_VFRDBAH(0), 8, 0x40, "IXGBE_VFRDBAH"}, + {IXGBE_VFRDLEN(0), 8, 0x40, "IXGBE_VFRDLEN"}, + {IXGBE_VFRDH(0), 8, 0x40, "IXGBE_VFRDH"}, + {IXGBE_VFRDT(0), 8, 0x40, "IXGBE_VFRDT"}, + {IXGBE_VFRXDCTL(0), 8, 0x40, "IXGBE_VFRXDCTL"}, + {IXGBE_VFSRRCTL(0), 8, 0x40, "IXGBE_VFSRRCTL"}, {IXGBE_VFPSRTYPE, 1, 1, "IXGBE_VFPSRTYPE"}, {IXGBE_VFRSCCTL(0), 8, 0x40, "IXGBE_VFRSCCTL"}, - {IXGBE_PVFDCA_RXCTRL(0), 8, 0x40, "IXGBE_PVFDCA_RXCTRL"}, - {IXGBE_PVFDCA_TXCTRL(0), 8, 0x40, "IXGBE_PVFDCA_TXCTRL"}, + {IXGBE_VFDCA_RXCTRL(0), 8, 0x40, "IXGBE_VFDCA_RXCTRL"}, + {IXGBE_VFDCA_TXCTRL(0), 8, 0x40, "IXGBE_VFDCA_TXCTRL"}, {0, 0, 0, ""} }; @@ -193,14 +193,14 @@ static struct reg_info ixgbe_regs_tx[] = { }; static const struct reg_info ixgbevf_regs_tx[] = { - {IXGBE_TDBAL(0), 4, 0x40, "IXGBE_TDBAL"}, - {IXGBE_TDBAH(0), 4, 0x40, "IXGBE_TDBAH"}, - {IXGBE_TDLEN(0), 4, 0x40, "IXGBE_TDLEN"}, - {IXGBE_TDH(0), 4, 0x40, "IXGBE_TDH"}, - {IXGBE_TDT(0), 4, 0x40, "IXGBE_TDT"}, - {IXGBE_TXDCTL(0), 4, 0x40, "IXGBE_TXDCTL"}, - {IXGBE_TDWBAL(0), 4, 0x40, "IXGBE_TDWBAL"}, - {IXGBE_TDWBAH(0), 4, 0x40, "IXGBE_TDWBAH"}, + {IXGBE_VFTDBAL(0), 4, 0x40, "IXGBE_VFTDBAL"}, + {IXGBE_VFTDBAH(0), 4, 0x40, "IXGBE_VFTDBAH"}, + {IXGBE_VFTDLEN(0), 4, 0x40, "IXGBE_VFTDLEN"}, + {IXGBE_VFTDH(0), 4, 0x40, "IXGBE_VFTDH"}, + {IXGBE_VFTDT(0), 4, 0x40, "IXGBE_VFTDT"}, + {IXGBE_VFTXDCTL(0), 4, 0x40, "IXGBE_VFTXDCTL"}, + {IXGBE_VFTDWBAL(0), 4, 0x40, "IXGBE_VFTDWBAL"}, + {IXGBE_VFTDWBAH(0), 4, 0x40, "IXGBE_VFTDWBAH"}, {0, 0, 0, ""} }; diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c index a018e926..36f1c020 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.c @@ -1,7 +1,7 @@ /*- * BSD LICENSE * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. * Copyright 2014 6WIND S.A. * All rights reserved. * @@ -58,7 +58,6 @@ #include <rte_lcore.h> #include <rte_atomic.h> #include <rte_branch_prediction.h> -#include <rte_ring.h> #include <rte_mempool.h> #include <rte_malloc.h> #include <rte_mbuf.h> @@ -71,6 +70,7 @@ #include <rte_string_fns.h> #include <rte_errno.h> #include <rte_ip.h> +#include <rte_net.h> #include "ixgbe_logs.h" #include "base/ixgbe_api.h" @@ -86,8 +86,12 @@ PKT_TX_IP_CKSUM | \ PKT_TX_L4_MASK | \ PKT_TX_TCP_SEG | \ + PKT_TX_MACSEC | \ PKT_TX_OUTER_IP_CKSUM) +#define IXGBE_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ IXGBE_TX_OFFLOAD_MASK) + #if 1 #define RTE_PMD_USE_PREFETCH #endif @@ -322,7 +326,7 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, /* update tail pointer */ rte_wmb(); - IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail); + IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, txq->tx_tail); return nb_pkts; } @@ -520,6 +524,8 @@ tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags) cmdtype |= IXGBE_ADVTXD_DCMD_TSE; if (ol_flags & PKT_TX_OUTER_IP_CKSUM) cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT); + if (ol_flags & PKT_TX_MACSEC) + cmdtype |= IXGBE_ADVTXD_MAC_LINKSEC; return cmdtype; } @@ -898,7 +904,7 @@ end_of_tx: PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u", (unsigned) txq->port_id, (unsigned) txq->queue_id, (unsigned) tx_id, (unsigned) nb_tx); - IXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, tx_id); + IXGBE_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id); txq->tx_tail = tx_id; return nb_tx; @@ -906,6 +912,57 @@ end_of_tx: /********************************************************************* * + * TX prep functions + * + **********************************************************************/ +uint16_t +ixgbe_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + int i, ret; + uint64_t ol_flags; + struct rte_mbuf *m; + struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue; + + for (i = 0; i < nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /** + * Check if packet meets requirements for number of segments + * + * NOTE: for ixgbe it's always (40 - WTHRESH) for both TSO and + * non-TSO + */ + + if (m->nb_segs > IXGBE_TX_MAX_SEG - txq->wthresh) { + rte_errno = -EINVAL; + return i; + } + + if (ol_flags & IXGBE_TX_OFFLOAD_NOTSUP_MASK) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +/********************************************************************* + * * RX functions * **********************************************************************/ @@ -1345,7 +1402,9 @@ rx_desc_error_to_pkt_flags(uint32_t rx_status) * Bit 30: L4I, L4I integrity error */ static uint64_t error_to_pkt_flags_map[4] = { - 0, PKT_RX_L4_CKSUM_BAD, PKT_RX_IP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD, PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD }; pkt_flags = error_to_pkt_flags_map[(rx_status >> @@ -1580,7 +1639,8 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, /* update tail pointer */ rte_wmb(); - IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, cur_free_trigger); + IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, + cur_free_trigger); } if (rxq->rx_tail >= rxq->nb_rx_desc) @@ -1984,8 +2044,8 @@ next_desc: if (!ixgbe_rx_alloc_bufs(rxq, false)) { rte_wmb(); - IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, - next_rdt); + IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, + next_rdt); nb_hold -= rxq->rx_free_thresh; } else { PMD_RX_LOG(DEBUG, "RX bulk alloc failed " @@ -2156,7 +2216,7 @@ next_desc: rxq->port_id, rxq->queue_id, rx_id, nb_hold, nb_rx); rte_wmb(); - IXGBE_PCI_REG_WRITE(rxq->rdt_reg_addr, prev_id); + IXGBE_PCI_REG_WRITE_RELAXED(rxq->rdt_reg_addr, prev_id); nb_hold = 0; } @@ -2281,6 +2341,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq) if (((txq->txq_flags & IXGBE_SIMPLE_FLAGS) == IXGBE_SIMPLE_FLAGS) && (txq->tx_rs_thresh >= RTE_PMD_IXGBE_TX_MAX_BURST)) { PMD_INIT_LOG(DEBUG, "Using simple tx code path"); + dev->tx_pkt_prepare = NULL; #ifdef RTE_IXGBE_INC_VECTOR if (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ && (rte_eal_process_type() != RTE_PROC_PRIMARY || @@ -2301,6 +2362,7 @@ ixgbe_set_tx_function(struct rte_eth_dev *dev, struct ixgbe_tx_queue *txq) (unsigned long)txq->tx_rs_thresh, (unsigned long)RTE_PMD_IXGBE_TX_MAX_BURST); dev->tx_pkt_burst = ixgbe_xmit_pkts; + dev->tx_pkt_prepare = ixgbe_prep_pkts; } } @@ -2584,7 +2646,6 @@ check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq) * rxq->rx_free_thresh >= RTE_PMD_IXGBE_RX_MAX_BURST * rxq->rx_free_thresh < rxq->nb_rx_desc * (rxq->nb_rx_desc % rxq->rx_free_thresh) == 0 - * rxq->nb_rx_desc<(IXGBE_MAX_RING_DESC-RTE_PMD_IXGBE_RX_MAX_BURST) * Scattered packets are not supported. This should be checked * outside of this function. */ @@ -2606,15 +2667,6 @@ check_rx_burst_bulk_alloc_preconditions(struct ixgbe_rx_queue *rxq) "rxq->rx_free_thresh=%d", rxq->nb_rx_desc, rxq->rx_free_thresh); ret = -EINVAL; - } else if (!(rxq->nb_rx_desc < - (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST))) { - PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions: " - "rxq->nb_rx_desc=%d, " - "IXGBE_MAX_RING_DESC=%d, " - "RTE_PMD_IXGBE_RX_MAX_BURST=%d", - rxq->nb_rx_desc, IXGBE_MAX_RING_DESC, - RTE_PMD_IXGBE_RX_MAX_BURST); - ret = -EINVAL; } return ret; @@ -2631,12 +2683,7 @@ ixgbe_reset_rx_queue(struct ixgbe_adapter *adapter, struct ixgbe_rx_queue *rxq) /* * By default, the Rx queue setup function allocates enough memory for * IXGBE_MAX_RING_DESC. The Rx Burst bulk allocation function requires - * extra memory at the end of the descriptor ring to be zero'd out. A - * pre-condition for using the Rx burst bulk alloc function is that the - * number of descriptors is less than or equal to - * (IXGBE_MAX_RING_DESC - RTE_PMD_IXGBE_RX_MAX_BURST). Check all the - * constraints here to see if we need to zero out memory after the end - * of the H/W descriptor ring. + * extra memory at the end of the descriptor ring to be zero'd out. */ if (adapter->rx_bulk_alloc_allowed) /* zero out extra memory */ @@ -3312,15 +3359,16 @@ ixgbe_vmdq_dcb_configure(struct rte_eth_dev *dev) /** * ixgbe_dcb_config_tx_hw_config - Configure general DCB TX parameters - * @hw: pointer to hardware structure + * @dev: pointer to eth_dev structure * @dcb_config: pointer to ixgbe_dcb_config structure */ static void -ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw, +ixgbe_dcb_tx_hw_config(struct rte_eth_dev *dev, struct ixgbe_dcb_config *dcb_config) { uint32_t reg; uint32_t q; + struct ixgbe_hw *hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private); PMD_INIT_FUNC_TRACE(); if (hw->mac.type != ixgbe_mac_82598EB) { @@ -3339,10 +3387,17 @@ ixgbe_dcb_tx_hw_config(struct ixgbe_hw *hw, reg |= IXGBE_MTQC_VT_ENA; IXGBE_WRITE_REG(hw, IXGBE_MTQC, reg); - /* Disable drop for all queues */ - for (q = 0; q < 128; q++) - IXGBE_WRITE_REG(hw, IXGBE_QDE, - (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT))); + if (RTE_ETH_DEV_SRIOV(dev).active == 0) { + /* Disable drop for all queues in VMDQ mode*/ + for (q = 0; q < 128; q++) + IXGBE_WRITE_REG(hw, IXGBE_QDE, + (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT))); + } else { + /* Enable drop for all queues in SRIOV mode */ + for (q = 0; q < 128; q++) + IXGBE_WRITE_REG(hw, IXGBE_QDE, + (IXGBE_QDE_WRITE | (q << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE)); + } /* Enable the Tx desc arbiter */ reg = IXGBE_READ_REG(hw, IXGBE_RTTDCS); @@ -3377,7 +3432,7 @@ ixgbe_vmdq_dcb_hw_tx_config(struct rte_eth_dev *dev, vmdq_tx_conf->nb_queue_pools == ETH_16_POOLS ? 0xFFFF : 0xFFFFFFFF); /*Configure general DCB TX parameters*/ - ixgbe_dcb_tx_hw_config(hw, dcb_config); + ixgbe_dcb_tx_hw_config(dev, dcb_config); } static void @@ -3660,7 +3715,7 @@ ixgbe_dcb_hw_configure(struct rte_eth_dev *dev, /*get DCB TX configuration parameters from rte_eth_conf*/ ixgbe_dcb_tx_config(dev, dcb_config); /*Configure general DCB TX parameters*/ - ixgbe_dcb_tx_hw_config(hw, dcb_config); + ixgbe_dcb_tx_hw_config(dev, dcb_config); break; default: PMD_INIT_LOG(ERR, "Incorrect DCB TX mode configuration"); @@ -3809,7 +3864,7 @@ void ixgbe_configure_dcb(struct rte_eth_dev *dev) (dev_conf->rxmode.mq_mode != ETH_MQ_RX_DCB_RSS)) return; - if (dev->data->nb_rx_queues != ETH_DCB_NUM_QUEUES) + if (dev->data->nb_rx_queues > ETH_DCB_NUM_QUEUES) return; /** Configure DCB hardware **/ @@ -4081,12 +4136,13 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev) case ETH_MQ_RX_VMDQ_RSS: ixgbe_config_vf_rss(dev); break; - - /* FIXME if support DCB/RSS together with VMDq & SRIOV */ case ETH_MQ_RX_VMDQ_DCB: + ixgbe_vmdq_dcb_configure(dev); + break; + /* FIXME if support DCB/RSS together with VMDq & SRIOV */ case ETH_MQ_RX_VMDQ_DCB_RSS: PMD_INIT_LOG(ERR, - "Could not support DCB with VMDq & SRIOV"); + "Could not support DCB/RSS with VMDq & SRIOV"); return -1; default: ixgbe_config_vf_default(dev); @@ -4913,8 +4969,7 @@ ixgbe_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id) rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxq->reg_idx)); } while (--poll_ms && (rxdctl & IXGBE_RXDCTL_ENABLE)); if (!poll_ms) - // TREX_PATCH - changed log level from ERR to DEBUG - PMD_INIT_LOG(DEBUG, "Could not disable Rx Queue %d", + PMD_INIT_LOG(ERR, "Could not disable Rx Queue %d", rx_queue_id); rte_delay_us(RTE_IXGBE_WAIT_100_US); diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h index 2608b364..739fd198 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx.h @@ -67,7 +67,7 @@ #define RTE_IXGBE_MAX_RX_BURST RTE_IXGBE_RXQ_REARM_THRESH #endif -#define RX_RING_SZ ((IXGBE_MAX_RING_DESC + RTE_IXGBE_DESCS_PER_LOOP - 1) * \ +#define RX_RING_SZ ((IXGBE_MAX_RING_DESC + RTE_PMD_IXGBE_RX_MAX_BURST) * \ sizeof(union ixgbe_adv_rx_desc)) #ifdef RTE_PMD_PACKET_PREFETCH @@ -80,6 +80,8 @@ #define RTE_IXGBE_WAIT_100_US 100 #define RTE_IXGBE_VMTXSW_REGISTER_COUNT 2 +#define IXGBE_TX_MAX_SEG 40 + #define IXGBE_PACKET_TYPE_MASK_82599 0X7F #define IXGBE_PACKET_TYPE_MASK_X550 0X10FF #define IXGBE_PACKET_TYPE_MASK_TUNNEL 0XFF diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h index 62b82013..a3473b98 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_common.h @@ -204,8 +204,20 @@ _ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq) return; /* free all mbufs that are valid in the ring */ - for (i = rxq->rx_tail; i != rxq->rxrearm_start; i = (i + 1) & mask) - rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + if (rxq->rxrearm_nb == 0) { + for (i = 0; i < rxq->nb_rx_desc; i++) { + if (rxq->sw_ring[i].mbuf != NULL) + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + } + } else { + for (i = rxq->rx_tail; + i != rxq->rxrearm_start; + i = (i + 1) & mask) { + if (rxq->sw_ring[i].mbuf != NULL) + rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf); + } + } + rxq->rxrearm_nb = rxq->nb_rx_desc; /* set all entries to NULL */ @@ -309,12 +321,8 @@ ixgbe_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev) if (fconf->mode != RTE_FDIR_MODE_NONE) return -1; - /* - * - no csum error report support - * - no header split support - */ - if (rxmode->hw_ip_checksum == 1 || - rxmode->header_split == 1) + /* no header split support */ + if (rxmode->header_split == 1) return -1; return 0; diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c index 64a329ea..f96cc85c 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_neon.c @@ -556,5 +556,11 @@ ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq) int __attribute__((cold)) ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) { + struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; + + /* no csum error report support */ + if (rxmode->hw_ip_checksum == 1) + return -1; + return ixgbe_rx_vec_dev_conf_condition_check_default(dev); } diff --git a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c index 1c4fd7c1..abbf2841 100644 --- a/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c +++ b/src/dpdk/drivers/net/ixgbe/ixgbe_rxtx_vec_sse.c @@ -145,7 +145,7 @@ static inline void desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags, struct rte_mbuf **rx_pkts) { - __m128i ptype0, ptype1, vtag0, vtag1; + __m128i ptype0, ptype1, vtag0, vtag1, csum; union { uint16_t e[4]; uint64_t dword; @@ -156,24 +156,45 @@ desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags, 0x0000, 0x0000, 0x0000, 0x0000, 0x000F, 0x000F, 0x000F, 0x000F); + /* mask the lower byte of ol_flags */ + const __m128i ol_flags_msk = _mm_set_epi16( + 0x0000, 0x0000, 0x0000, 0x0000, + 0x00FF, 0x00FF, 0x00FF, 0x00FF); + /* map rss type to rss hash flag */ const __m128i rss_flags = _mm_set_epi8(PKT_RX_FDIR, 0, 0, 0, 0, 0, 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0); - /* mask everything except vlan present bit */ - const __m128i vlan_msk = _mm_set_epi16( - 0x0000, 0x0000, - 0x0000, 0x0000, - IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP, - IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP); - /* map vlan present (0x8) to ol_flags */ - const __m128i vlan_map = _mm_set_epi8( + /* mask everything except vlan present and l4/ip csum error */ + const __m128i vlan_csum_msk = _mm_set_epi16( + (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16, + (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16, + (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16, + (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16, + IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP, + IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP); + /* map vlan present (0x8), IPE (0x2), L4E (0x1) to ol_flags */ + const __m128i vlan_csum_map_lo = _mm_set_epi8( 0, 0, 0, 0, - 0, 0, 0, vlan_flags, + vlan_flags | PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, + vlan_flags | PKT_RX_IP_CKSUM_BAD, + vlan_flags | PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD, + vlan_flags | PKT_RX_IP_CKSUM_GOOD, 0, 0, 0, 0, - 0, 0, 0, 0); + PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_IP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD); + + const __m128i vlan_csum_map_hi = _mm_set_epi8( + 0, 0, 0, 0, + 0, PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, + PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), + 0, 0, 0, 0, + 0, PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, + PKT_RX_L4_CKSUM_GOOD >> sizeof(uint8_t)); ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]); ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]); @@ -185,8 +206,26 @@ desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags, ptype0 = _mm_shuffle_epi8(rss_flags, ptype0); vtag1 = _mm_unpacklo_epi32(vtag0, vtag1); - vtag1 = _mm_and_si128(vtag1, vlan_msk); - vtag1 = _mm_shuffle_epi8(vlan_map, vtag1); + vtag1 = _mm_and_si128(vtag1, vlan_csum_msk); + + /* csum bits are in the most significant, to use shuffle we need to + * shift them. Change mask to 0xc000 to 0x0003. + */ + csum = _mm_srli_epi16(vtag1, 14); + + /* now or the most significant 64 bits containing the checksum + * flags with the vlan present flags. + */ + csum = _mm_srli_si128(csum, 8); + vtag1 = _mm_or_si128(csum, vtag1); + + /* convert VP, IPE, L4E to ol_flags */ + vtag0 = _mm_shuffle_epi8(vlan_csum_map_hi, vtag1); + vtag0 = _mm_slli_epi16(vtag0, sizeof(uint8_t)); + + vtag1 = _mm_shuffle_epi8(vlan_csum_map_lo, vtag1); + vtag1 = _mm_and_si128(vtag1, ol_flags_msk); + vtag1 = _mm_or_si128(vtag0, vtag1); vtag1 = _mm_or_si128(ptype0, vtag1); vol.dword = _mm_cvtsi128_si64(vtag1); @@ -210,7 +249,6 @@ desc_to_olflags_v(__m128i descs[4], uint8_t vlan_flags, * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST * numbers of DD bit * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two - * - don't support ol_flags for rss and csum err */ static inline uint16_t _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, @@ -243,7 +281,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, */ rxdp = rxq->rx_ring + rxq->rx_tail; - _mm_prefetch((const void *)rxdp, _MM_HINT_T0); + rte_prefetch0(rxdp); /* See if we need to rearm the RX queue - gives the prefetch a bit * of time to act @@ -305,6 +343,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, /* Read desc statuses backwards to avoid race condition */ /* A.1 load 4 pkts desc */ descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); + rte_compiler_barrier(); /* B.2 copy 2 mbuf point into rx_pkts */ _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); @@ -313,8 +352,10 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]); descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); + rte_compiler_barrier(); /* B.1 load 2 mbuf point */ descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); + rte_compiler_barrier(); descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); /* B.2 copy 2 mbuf point into rx_pkts */ @@ -425,7 +466,6 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST * numbers of DD bit * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two - * - don't support ol_flags for rss and csum err */ uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -438,7 +478,6 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, * vPMD receive routine that reassembles scattered packets * * Notice: - * - don't support ol_flags for rss and csum err * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet * - nb_pkts > RTE_IXGBE_MAX_RX_BURST, only scan RTE_IXGBE_MAX_RX_BURST * numbers of DD bit diff --git a/src/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.h b/src/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.h new file mode 100644 index 00000000..4d7b507d --- /dev/null +++ b/src/dpdk/drivers/net/ixgbe/rte_pmd_ixgbe.h @@ -0,0 +1,412 @@ +/*- + * BSD LICENSE + * + * Copyright (c) 2016 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file rte_pmd_ixgbe.h + * ixgbe PMD specific functions. + * + **/ + +#ifndef _PMD_IXGBE_H_ +#define _PMD_IXGBE_H_ + +#include <rte_ethdev.h> + +/** + * Set the VF MAC address. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * VF id. + * @param mac_addr + * VF MAC address. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if *vf* or *mac_addr* is invalid. + */ +int rte_pmd_ixgbe_set_vf_mac_addr(uint8_t port, uint16_t vf, + struct ether_addr *mac_addr); + +/** + * Enable/Disable VF VLAN anti spoofing. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * VF on which to set VLAN anti spoofing. + * @param on + * 1 - Enable VFs VLAN anti spoofing. + * 0 - Disable VFs VLAN anti spoofing. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_set_vf_vlan_anti_spoof(uint8_t port, uint16_t vf, uint8_t on); + +/** + * Enable/Disable VF MAC anti spoofing. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * VF on which to set MAC anti spoofing. + * @param on + * 1 - Enable VFs MAC anti spoofing. + * 0 - Disable VFs MAC anti spoofing. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_set_vf_mac_anti_spoof(uint8_t port, uint16_t vf, uint8_t on); + +/** + * Enable/Disable vf vlan insert + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * ID specifying VF. + * @param vlan_id + * 0 - Disable VF's vlan insert. + * n - Enable; n is inserted as the vlan id. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_set_vf_vlan_insert(uint8_t port, uint16_t vf, + uint16_t vlan_id); + +/** + * Enable/Disable tx loopback + * + * @param port + * The port identifier of the Ethernet device. + * @param on + * 1 - Enable tx loopback. + * 0 - Disable tx loopback. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_set_tx_loopback(uint8_t port, uint8_t on); + +/** + * set all queues drop enable bit + * + * @param port + * The port identifier of the Ethernet device. + * @param on + * 1 - set the queue drop enable bit for all pools. + * 0 - reset the queue drop enable bit for all pools. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_set_all_queues_drop_en(uint8_t port, uint8_t on); + +/** + * set drop enable bit in the VF split rx control register + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * ID specifying VF. + * @param on + * 1 - set the drop enable bit in the split rx control register. + * 0 - reset the drop enable bit in the split rx control register. + * + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ + +int rte_pmd_ixgbe_set_vf_split_drop_en(uint8_t port, uint16_t vf, uint8_t on); + +/** + * Enable/Disable vf vlan strip for all queues in a pool + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * ID specifying VF. + * @param on + * 1 - Enable VF's vlan strip on RX queues. + * 0 - Disable VF's vlan strip on RX queues. + * + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if bad parameter. + */ +int +rte_pmd_ixgbe_set_vf_vlan_stripq(uint8_t port, uint16_t vf, uint8_t on); + +/** + * Enable MACsec offload. + * + * @param port + * The port identifier of the Ethernet device. + * @param en + * 1 - Enable encryption (encrypt and add integrity signature). + * 0 - Disable encryption (only add integrity signature). + * @param rp + * 1 - Enable replay protection. + * 0 - Disable replay protection. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-ENOTSUP) if hardware doesn't support this feature. + */ +int rte_pmd_ixgbe_macsec_enable(uint8_t port, uint8_t en, uint8_t rp); + +/** + * Disable MACsec offload. + * + * @param port + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-ENOTSUP) if hardware doesn't support this feature. + */ +int rte_pmd_ixgbe_macsec_disable(uint8_t port); + +/** + * Configure Tx SC (Secure Connection). + * + * @param port + * The port identifier of the Ethernet device. + * @param mac + * The MAC address on the local side. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-ENOTSUP) if hardware doesn't support this feature. + */ +int rte_pmd_ixgbe_macsec_config_txsc(uint8_t port, uint8_t *mac); + +/** + * Configure Rx SC (Secure Connection). + * + * @param port + * The port identifier of the Ethernet device. + * @param mac + * The MAC address on the remote side. + * @param pi + * The PI (port identifier) on the remote side. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-ENOTSUP) if hardware doesn't support this feature. + */ +int rte_pmd_ixgbe_macsec_config_rxsc(uint8_t port, uint8_t *mac, uint16_t pi); + +/** + * Enable Tx SA (Secure Association). + * + * @param port + * The port identifier of the Ethernet device. + * @param idx + * The SA to be enabled (0 or 1). + * @param an + * The association number on the local side. + * @param pn + * The packet number on the local side. + * @param key + * The key on the local side. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_macsec_select_txsa(uint8_t port, uint8_t idx, uint8_t an, + uint32_t pn, uint8_t *key); + +/** + * Enable Rx SA (Secure Association). + * + * @param port + * The port identifier of the Ethernet device. + * @param idx + * The SA to be enabled (0 or 1) + * @param an + * The association number on the remote side. + * @param pn + * The packet number on the remote side. + * @param key + * The key on the remote side. + * @return + * - (0) if successful. + * - (-ENODEV) if *port* invalid. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_macsec_select_rxsa(uint8_t port, uint8_t idx, uint8_t an, + uint32_t pn, uint8_t *key); + +/** +* Set RX L2 Filtering mode of a VF of an Ethernet device. +* +* @param port +* The port identifier of the Ethernet device. +* @param vf +* VF id. +* @param rx_mask +* The RX mode mask, which is one or more of accepting Untagged Packets, +* packets that match the PFUTA table, Broadcast and Multicast Promiscuous. +* ETH_VMDQ_ACCEPT_UNTAG,ETH_VMDQ_ACCEPT_HASH_UC, +* ETH_VMDQ_ACCEPT_BROADCAST and ETH_VMDQ_ACCEPT_MULTICAST will be used +* in rx_mode. +* @param on +* 1 - Enable a VF RX mode. +* 0 - Disable a VF RX mode. +* @return +* - (0) if successful. +* - (-ENOTSUP) if hardware doesn't support. +* - (-ENODEV) if *port_id* invalid. +* - (-EINVAL) if bad parameter. +*/ +int +rte_pmd_ixgbe_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mask, uint8_t on); + +/** +* Enable or disable a VF traffic receive of an Ethernet device. +* +* @param port +* The port identifier of the Ethernet device. +* @param vf +* VF id. +* @param on +* 1 - Enable a VF traffic receive. +* 0 - Disable a VF traffic receive. +* @return +* - (0) if successful. +* - (-ENOTSUP) if hardware doesn't support. +* - (-ENODEV) if *port_id* invalid. +* - (-EINVAL) if bad parameter. +*/ +int +rte_pmd_ixgbe_set_vf_rx(uint8_t port, uint16_t vf, uint8_t on); + +/** +* Enable or disable a VF traffic transmit of the Ethernet device. +* +* @param port +* The port identifier of the Ethernet device. +* @param vf +* VF id. +* @param on +* 1 - Enable a VF traffic transmit. +* 0 - Disable a VF traffic transmit. +* @return +* - (0) if successful. +* - (-ENODEV) if *port_id* invalid. +* - (-ENOTSUP) if hardware doesn't support. +* - (-EINVAL) if bad parameter. +*/ +int +rte_pmd_ixgbe_set_vf_tx(uint8_t port, uint16_t vf, uint8_t on); + +/** +* Enable/Disable hardware VF VLAN filtering by an Ethernet device of +* received VLAN packets tagged with a given VLAN Tag Identifier. +* +* @param port +* The port identifier of the Ethernet device. +* @param vlan +* The VLAN Tag Identifier whose filtering must be enabled or disabled. +* @param vf_mask +* Bitmap listing which VFs participate in the VLAN filtering. +* @param vlan_on +* 1 - Enable VFs VLAN filtering. +* 0 - Disable VFs VLAN filtering. +* @return +* - (0) if successful. +* - (-ENOTSUP) if hardware doesn't support. +* - (-ENODEV) if *port_id* invalid. +* - (-EINVAL) if bad parameter. +*/ +int +rte_pmd_ixgbe_set_vf_vlan_filter(uint8_t port, uint16_t vlan, uint64_t vf_mask, uint8_t vlan_on); + +/** + * Set the rate limitation for a vf on an Ethernet device. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * VF id. + * @param tx_rate + * The tx rate allocated from the total link speed for this VF id. + * @param q_msk + * The queue mask which need to set the rate. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_pmd_ixgbe_set_vf_rate_limit(uint8_t port, uint16_t vf, uint16_t tx_rate, uint64_t q_msk); + +/** + * Response sent back to ixgbe driver from user app after callback + */ +enum rte_pmd_ixgbe_mb_event_rsp { + RTE_PMD_IXGBE_MB_EVENT_NOOP_ACK, /**< skip mbox request and ACK */ + RTE_PMD_IXGBE_MB_EVENT_NOOP_NACK, /**< skip mbox request and NACK */ + RTE_PMD_IXGBE_MB_EVENT_PROCEED, /**< proceed with mbox request */ + RTE_PMD_IXGBE_MB_EVENT_MAX /**< max value of this enum */ +}; + +/** + * Data sent to the user application when the callback is executed. + */ +struct rte_pmd_ixgbe_mb_event_param { + uint16_t vfid; /**< Virtual Function number */ + uint16_t msg_type; /**< VF to PF message type, defined in ixgbe_mbx.h */ + uint16_t retval; /**< return value */ + void *msg; /**< pointer to message */ +}; +#endif /* _PMD_IXGBE_H_ */ diff --git a/src/dpdk/drivers/net/mlx4/mlx4.c b/src/dpdk/drivers/net/mlx4/mlx4.c index 304c8461..79efaaa3 100644 --- a/src/dpdk/drivers/net/mlx4/mlx4.c +++ b/src/dpdk/drivers/net/mlx4/mlx4.c @@ -61,16 +61,16 @@ /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <infiniband/verbs.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC -#pragma GCC diagnostic ignored "-pedantic" +#pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ether.h> #include <rte_ethdev.h> @@ -87,7 +87,7 @@ #include <rte_alarm.h> #include <rte_memory.h> #ifdef PEDANTIC -#pragma GCC diagnostic error "-pedantic" +#pragma GCC diagnostic error "-Wpedantic" #endif /* Generated configuration header. */ @@ -2961,19 +2961,25 @@ rxq_cq_to_pkt_type(uint32_t flags) if (flags & IBV_EXP_CQ_RX_TUNNEL_PACKET) pkt_type = TRANSPOSE(flags, - IBV_EXP_CQ_RX_OUTER_IPV4_PACKET, RTE_PTYPE_L3_IPV4) | + IBV_EXP_CQ_RX_OUTER_IPV4_PACKET, + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN) | TRANSPOSE(flags, - IBV_EXP_CQ_RX_OUTER_IPV6_PACKET, RTE_PTYPE_L3_IPV6) | + IBV_EXP_CQ_RX_OUTER_IPV6_PACKET, + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN) | TRANSPOSE(flags, - IBV_EXP_CQ_RX_IPV4_PACKET, RTE_PTYPE_INNER_L3_IPV4) | + IBV_EXP_CQ_RX_IPV4_PACKET, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN) | TRANSPOSE(flags, - IBV_EXP_CQ_RX_IPV6_PACKET, RTE_PTYPE_INNER_L3_IPV6); + IBV_EXP_CQ_RX_IPV6_PACKET, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN); else pkt_type = TRANSPOSE(flags, - IBV_EXP_CQ_RX_IPV4_PACKET, RTE_PTYPE_L3_IPV4) | + IBV_EXP_CQ_RX_IPV4_PACKET, + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN) | TRANSPOSE(flags, - IBV_EXP_CQ_RX_IPV6_PACKET, RTE_PTYPE_L3_IPV6); + IBV_EXP_CQ_RX_IPV6_PACKET, + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN); return pkt_type; } @@ -2995,25 +3001,20 @@ rxq_cq_to_ol_flags(const struct rxq *rxq, uint32_t flags) if (rxq->csum) ol_flags |= - TRANSPOSE(~flags, + TRANSPOSE(flags, IBV_EXP_CQ_RX_IP_CSUM_OK, - PKT_RX_IP_CKSUM_BAD) | - TRANSPOSE(~flags, + PKT_RX_IP_CKSUM_GOOD) | + TRANSPOSE(flags, IBV_EXP_CQ_RX_TCP_UDP_CSUM_OK, - PKT_RX_L4_CKSUM_BAD); - /* - * PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place - * of PKT_RX_EIP_CKSUM_BAD because the latter is not functional - * (its value is 0). - */ + PKT_RX_L4_CKSUM_GOOD); if ((flags & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun)) ol_flags |= - TRANSPOSE(~flags, + TRANSPOSE(flags, IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK, - PKT_RX_IP_CKSUM_BAD) | - TRANSPOSE(~flags, + PKT_RX_IP_CKSUM_GOOD) | + TRANSPOSE(flags, IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK, - PKT_RX_L4_CKSUM_BAD); + PKT_RX_L4_CKSUM_GOOD); return ol_flags; } @@ -4426,6 +4427,8 @@ mlx4_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) unsigned int max; char ifname[IF_NAMESIZE]; + info->pci_dev = RTE_DEV_TO_PCI(dev->device); + if (priv == NULL) return; priv_lock(priv); @@ -4826,7 +4829,7 @@ end: } /** - * DPDK callback to retrieve physical link information (unlocked version). + * DPDK callback to retrieve physical link information. * * @param dev * Pointer to Ethernet device structure. @@ -4834,9 +4837,9 @@ end: * Wait for request completion (ignored). */ static int -mlx4_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) +mlx4_link_update(struct rte_eth_dev *dev, int wait_to_complete) { - struct priv *priv = mlx4_get_priv(dev); + const struct priv *priv = mlx4_get_priv(dev); struct ethtool_cmd edata = { .cmd = ETHTOOL_GSET }; @@ -4844,6 +4847,8 @@ mlx4_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) struct rte_eth_link dev_link; int link_speed = 0; + /* priv_lock() is not taken to allow concurrent calls. */ + if (priv == NULL) return -EINVAL; (void)wait_to_complete; @@ -4879,28 +4884,6 @@ mlx4_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) } /** - * DPDK callback to retrieve physical link information. - * - * @param dev - * Pointer to Ethernet device structure. - * @param wait_to_complete - * Wait for request completion (ignored). - */ -static int -mlx4_link_update(struct rte_eth_dev *dev, int wait_to_complete) -{ - struct priv *priv = mlx4_get_priv(dev); - int ret; - - if (priv == NULL) - return -EINVAL; - priv_lock(priv); - ret = mlx4_link_update_unlocked(dev, wait_to_complete); - priv_unlock(priv); - return ret; -} - -/** * DPDK callback to change the MTU. * * Setting the MTU affects hardware MRU (packets larger than the MTU cannot be @@ -5416,7 +5399,7 @@ priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) struct rte_eth_link *link = &dev->data->dev_link; priv->pending_alarm = 0; - mlx4_link_update_unlocked(dev, 0); + mlx4_link_update(dev, 0); if (((link->link_speed == 0) && link->link_status) || ((link->link_speed != 0) && !link->link_status)) { /* Inconsistent status, check again later. */ @@ -5448,7 +5431,7 @@ mlx4_dev_link_status_handler(void *arg) ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); if (ret) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } /** @@ -5471,7 +5454,7 @@ mlx4_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg) ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); if (ret) - _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC); + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } /** @@ -5544,7 +5527,7 @@ static struct eth_driver mlx4_driver; * 0 on success, negative errno value on failure. */ static int -mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) +mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) { struct ibv_device **list; struct ibv_device *ibv_dev; @@ -5803,7 +5786,7 @@ mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) snprintf(name, sizeof(name), "%s port %u", ibv_get_device_name(ibv_dev), port); - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_PCI); + eth_dev = rte_eth_dev_allocate(name); } if (eth_dev == NULL) { ERROR("can not allocate rte ethdev"); @@ -5839,11 +5822,9 @@ mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) eth_dev->rx_pkt_burst = mlx4_rx_burst_secondary_setup; } else { eth_dev->data->dev_private = priv; - eth_dev->data->rx_mbuf_alloc_failed = 0; - eth_dev->data->mtu = ETHER_MTU; eth_dev->data->mac_addrs = priv->mac; } - eth_dev->pci_dev = pci_dev; + eth_dev->device = &pci_dev->device; rte_eth_copy_pci_info(eth_dev, pci_dev); @@ -5851,7 +5832,6 @@ mlx4_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) priv->dev = eth_dev; eth_dev->dev_ops = &mlx4_dev_ops; - TAILQ_INIT(ð_dev->link_intr_cbs); /* Bring Ethernet device up. */ DEBUG("forcing Ethernet interface up"); @@ -5911,9 +5891,11 @@ static const struct rte_pci_id mlx4_pci_id_map[] = { static struct eth_driver mlx4_driver = { .pci_drv = { - .name = MLX4_DRIVER_NAME, + .driver = { + .name = MLX4_DRIVER_NAME + }, .id_table = mlx4_pci_id_map, - .devinit = mlx4_pci_devinit, + .probe = mlx4_pci_probe, .drv_flags = RTE_PCI_DRV_INTR_LSC, }, .dev_private_size = sizeof(struct priv) @@ -5922,12 +5904,10 @@ static struct eth_driver mlx4_driver = { /** * Driver initialization routine. */ -static int -rte_mlx4_pmd_init(const char *name, const char *args) +RTE_INIT(rte_mlx4_pmd_init); +static void +rte_mlx4_pmd_init(void) { - (void)name; - (void)args; - RTE_BUILD_BUG_ON(sizeof(wr_id_t) != sizeof(uint64_t)); /* * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use @@ -5938,13 +5918,9 @@ rte_mlx4_pmd_init(const char *name, const char *args) setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); ibv_fork_init(); rte_eal_pci_register(&mlx4_driver.pci_drv); - return 0; } -static struct rte_driver rte_mlx4_driver = { - .type = PMD_PDEV, - .init = rte_mlx4_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_mlx4_driver, mlx4); -DRIVER_REGISTER_PCI_TABLE(mlx4, mlx4_pci_id_map); +RTE_PMD_EXPORT_NAME(net_mlx4, __COUNTER__); +RTE_PMD_REGISTER_PCI_TABLE(net_mlx4, mlx4_pci_id_map); +RTE_PMD_REGISTER_KMOD_DEP(net_mlx4, + "* ib_uverbs & mlx4_en & mlx4_core & mlx4_ib"); diff --git a/src/dpdk/drivers/net/mlx4/mlx4.h b/src/dpdk/drivers/net/mlx4/mlx4.h index d0c7bc29..4c7505e2 100644 --- a/src/dpdk/drivers/net/mlx4/mlx4.h +++ b/src/dpdk/drivers/net/mlx4/mlx4.h @@ -96,7 +96,7 @@ enum { PCI_DEVICE_ID_MELLANOX_CONNECTX3PRO = 0x1007, }; -#define MLX4_DRIVER_NAME "librte_pmd_mlx4" +#define MLX4_DRIVER_NAME "net_mlx4" /* Bit-field manipulation. */ #define BITFIELD_DECLARE(bf, type, size) \ diff --git a/src/dpdk/drivers/net/mlx5/mlx5.c b/src/dpdk/drivers/net/mlx5/mlx5.c index 7c072391..d4bd4696 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5.c +++ b/src/dpdk/drivers/net/mlx5/mlx5.c @@ -181,9 +181,6 @@ mlx5_dev_close(struct rte_eth_dev *dev) } if (priv->reta_idx != NULL) rte_free(priv->reta_idx); - - mlx5_stats_free(dev); - priv_unlock(priv); memset(priv, 0, sizeof(*priv)); } @@ -202,6 +199,9 @@ static const struct eth_dev_ops mlx5_dev_ops = { .link_update = mlx5_link_update, .stats_get = mlx5_stats_get, .stats_reset = mlx5_stats_reset, + .xstats_get = mlx5_xstats_get, + .xstats_reset = mlx5_xstats_reset, + .xstats_get_names = mlx5_xstats_get_names, .dev_infos_get = mlx5_dev_infos_get, .dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get, .vlan_filter_set = mlx5_vlan_filter_set, @@ -257,7 +257,6 @@ mlx5_dev_idx(struct rte_pci_addr *pci_addr) return ret; } - /** * Verify and store value for device argument. * @@ -290,7 +289,7 @@ mlx5_args_check(const char *key, const char *val, void *opaque) } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { priv->txqs_inline = tmp; } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { - priv->mps = !!tmp; + priv->mps &= !!tmp; /* Enable MPW only if HW supports */ } else { WARN("%s: unknown parameter", key); return -EINVAL; @@ -298,8 +297,6 @@ mlx5_args_check(const char *key, const char *val, void *opaque) return 0; } - - /** * Parse device parameters. * @@ -336,16 +333,16 @@ mlx5_args(struct priv *priv, struct rte_devargs *devargs) if (rte_kvargs_count(kvlist, params[i])) { ret = rte_kvargs_process(kvlist, params[i], mlx5_args_check, priv); - if (ret != 0) + if (ret != 0) { + rte_kvargs_free(kvlist); return ret; + } } } rte_kvargs_free(kvlist); return 0; } - - static struct eth_driver mlx5_driver; /** @@ -363,7 +360,7 @@ static struct eth_driver mlx5_driver; * 0 on success, negative errno value on failure. */ static int -mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) +mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) { struct ibv_device **list; struct ibv_device *ibv_dev; @@ -374,13 +371,6 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) unsigned int mps; int idx; int i; - static int ibv_was_init=0; - - if (ibv_was_init==0) { - ibv_fork_init(); - ibv_was_init=1; - } - (void)pci_drv; assert(pci_drv == &mlx5_driver.pci_drv); @@ -423,10 +413,26 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) sriov = ((pci_dev->id.device_id == PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) || (pci_dev->id.device_id == - PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF)); - /* Multi-packet send is only supported by ConnectX-4 Lx PF. */ - mps = (pci_dev->id.device_id == - PCI_DEVICE_ID_MELLANOX_CONNECTX4LX); + PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) || + (pci_dev->id.device_id == + PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) || + (pci_dev->id.device_id == + PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF)); + /* + * Multi-packet send is supported by ConnectX-4 Lx PF as well + * as all ConnectX-5 devices. + */ + switch (pci_dev->id.device_id) { + case PCI_DEVICE_ID_MELLANOX_CONNECTX4LX: + case PCI_DEVICE_ID_MELLANOX_CONNECTX5: + case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF: + case PCI_DEVICE_ID_MELLANOX_CONNECTX5EX: + case PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF: + mps = 1; + break; + default: + mps = 0; + } INFO("PCI information matches, using device \"%s\"" " (SR-IOV: %s, MPS: %s)", list[i]->name, @@ -526,16 +532,7 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) priv->mtu = ETHER_MTU; priv->mps = mps; /* Enable MPW by default if supported. */ priv->cqe_comp = 1; /* Enable compression by default. */ - - - err = mlx5_args(priv, pci_dev->devargs); - - /* TREX PATCH */ - /* set for maximum performance default */ - priv->txq_inline =64; - priv->txqs_inline =4; - - + err = mlx5_args(priv, pci_dev->device.devargs); if (err) { ERROR("failed to process device arguments: %s", strerror(err)); @@ -562,8 +559,9 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size; /* Remove this check once DPDK supports larger/variable * indirection tables. */ - if (priv->ind_table_max_size > (unsigned int)RSS_INDIRECTION_TABLE_SIZE) - priv->ind_table_max_size = RSS_INDIRECTION_TABLE_SIZE; + if (priv->ind_table_max_size > + (unsigned int)ETH_RSS_RETA_SIZE_512) + priv->ind_table_max_size = ETH_RSS_RETA_SIZE_512; DEBUG("maximum RX indirection table size is %u", priv->ind_table_max_size); priv->hw_vlan_strip = !!(exp_device_attr.wq_vlan_offloads_cap & @@ -641,7 +639,7 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) snprintf(name, sizeof(name), "%s port %u", ibv_get_device_name(ibv_dev), port); - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_PCI); + eth_dev = rte_eth_dev_allocate(name); } if (eth_dev == NULL) { ERROR("can not allocate rte ethdev"); @@ -676,22 +674,19 @@ mlx5_pci_devinit(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) eth_dev->rx_pkt_burst = mlx5_rx_burst_secondary_setup; } else { eth_dev->data->dev_private = priv; - eth_dev->data->rx_mbuf_alloc_failed = 0; - eth_dev->data->mtu = ETHER_MTU; eth_dev->data->mac_addrs = priv->mac; } - eth_dev->pci_dev = pci_dev; + eth_dev->device = &pci_dev->device; rte_eth_copy_pci_info(eth_dev, pci_dev); eth_dev->driver = &mlx5_driver; priv->dev = eth_dev; eth_dev->dev_ops = &mlx5_dev_ops; - TAILQ_INIT(ð_dev->link_intr_cbs); - /* Bring Ethernet device up. */ DEBUG("forcing Ethernet interface up"); priv_set_flags(priv, ~IFF_UP, IFF_UP); + mlx5_link_update(priv->dev, 1); continue; port_error: @@ -746,520 +741,20 @@ static const struct rte_pci_id mlx5_pci_id_map[] = { PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) }, { - .vendor_id = 0 - } -}; - -static struct eth_driver mlx5_driver = { - .pci_drv = { - .name = MLX5_DRIVER_NAME, - .id_table = mlx5_pci_id_map, - .devinit = mlx5_pci_devinit, - .drv_flags = RTE_PCI_DRV_INTR_LSC, - }, - .dev_private_size = sizeof(struct priv) -}; - -/** - * Driver initialization routine. - */ -static int -rte_mlx5_pmd_init(const char *name, const char *args) -{ - (void)name; - (void)args; - /* - * RDMAV_HUGEPAGES_SAFE tells ibv_fork_init() we intend to use - * huge pages. Calling ibv_fork_init() during init allows - * applications to use fork() safely for purposes other than - * using this PMD, which is not supported in forked processes. - */ - setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); - rte_eal_pci_register(&mlx5_driver.pci_drv); - return 0; -} - -static struct rte_driver rte_mlx5_driver = { - .type = PMD_PDEV, - .init = rte_mlx5_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_mlx5_driver, mlx5); -DRIVER_REGISTER_PCI_TABLE(mlx5, mlx5_pci_id_map); - - - - - - -#if 0 -/** - * Verify and store value for device argument. - * - * @param[in] key - * Key argument to verify. - * @param[in] val - * Value associated with key. - * @param opaque - * User data. - * - * @return - * 0 on success, negative errno value on failure. - */ -static int -mlx5_args_check(const char *key, const char *val, void *opaque) -{ - struct priv *priv = opaque; - unsigned long tmp; - - errno = 0; - tmp = strtoul(val, NULL, 0); - if (errno) { - WARN("%s: \"%s\" is not a valid integer", key, val); - return errno; - } - if (strcmp(MLX5_RXQ_CQE_COMP_EN, key) == 0) { - priv->cqe_comp = !!tmp; - } else if (strcmp(MLX5_TXQ_INLINE, key) == 0) { - priv->txq_inline = tmp; - } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) { - priv->txqs_inline = tmp; - } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) { - priv->mps = !!tmp; - } else { - WARN("%s: unknown parameter", key); - return -EINVAL; - } - return 0; -} - -/** - * Parse device parameters. - * - * @param priv - * Pointer to private structure. - * @param devargs - * Device arguments structure. - * - * @return - * 0 on success, errno value on failure. - */ -static int -mlx5_args(struct priv *priv, struct rte_devargs *devargs) -{ - const char **params = (const char *[]){ - MLX5_RXQ_CQE_COMP_EN, - MLX5_TXQ_INLINE, - MLX5_TXQS_MIN_INLINE, - MLX5_TXQ_MPW_EN, - NULL, - }; - struct rte_kvargs *kvlist; - int ret = 0; - int i; - - if (devargs == NULL) - return 0; - /* Following UGLY cast is done to pass checkpatch. */ - kvlist = rte_kvargs_parse(devargs->args, params); - if (kvlist == NULL) - return 0; - /* Process parameters. */ - for (i = 0; (params[i] != NULL); ++i) { - if (rte_kvargs_count(kvlist, params[i])) { - ret = rte_kvargs_process(kvlist, params[i], - mlx5_args_check, priv); - if (ret != 0) - return ret; - } - } - rte_kvargs_free(kvlist); - return 0; -} - -static struct eth_driver mlx5_driver; - -/** - * DPDK callback to register a PCI device. - * - * This function creates an Ethernet device for each port of a given - * PCI device. - * - * @param[in] pci_drv - * PCI driver structure (mlx5_driver). - * @param[in] pci_dev - * PCI device information. - * - * @return - * 0 on success, negative errno value on failure. - */ -static int -mlx5_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev) -{ - struct ibv_device **list; - struct ibv_device *ibv_dev; - int err = 0; - struct ibv_context *attr_ctx = NULL; - struct ibv_device_attr device_attr; - unsigned int sriov; - unsigned int mps; - int idx; - int i; - - (void)pci_drv; - assert(pci_drv == &mlx5_driver.pci_drv); - /* Get mlx5_dev[] index. */ - idx = mlx5_dev_idx(&pci_dev->addr); - if (idx == -1) { - ERROR("this driver cannot support any more adapters"); - return -ENOMEM; - } - DEBUG("using driver device index %d", idx); - - /* Save PCI address. */ - mlx5_dev[idx].pci_addr = pci_dev->addr; - list = ibv_get_device_list(&i); - if (list == NULL) { - assert(errno); - if (errno == ENOSYS) { - WARN("cannot list devices, is ib_uverbs loaded?"); - return 0; - } - return -errno; - } - assert(i >= 0); - /* - * For each listed device, check related sysfs entry against - * the provided PCI ID. - */ - while (i != 0) { - struct rte_pci_addr pci_addr; - - --i; - DEBUG("checking device \"%s\"", list[i]->name); - if (mlx5_ibv_device_to_pci_addr(list[i], &pci_addr)) - continue; - if ((pci_dev->addr.domain != pci_addr.domain) || - (pci_dev->addr.bus != pci_addr.bus) || - (pci_dev->addr.devid != pci_addr.devid) || - (pci_dev->addr.function != pci_addr.function)) - continue; - sriov = ((pci_dev->id.device_id == - PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) || - (pci_dev->id.device_id == - PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF)); - /* Multi-packet send is only supported by ConnectX-4 Lx PF. */ - mps = (pci_dev->id.device_id == - PCI_DEVICE_ID_MELLANOX_CONNECTX4LX); - INFO("PCI information matches, using device \"%s\"" - " (SR-IOV: %s, MPS: %s)", - list[i]->name, - sriov ? "true" : "false", - mps ? "true" : "false"); - attr_ctx = ibv_open_device(list[i]); - err = errno; - break; - } - if (attr_ctx == NULL) { - ibv_free_device_list(list); - switch (err) { - case 0: - WARN("cannot access device, is mlx5_ib loaded?"); - return 0; - case EINVAL: - WARN("cannot use device, are drivers up to date?"); - return 0; - } - assert(err > 0); - return -err; - } - ibv_dev = list[i]; - - DEBUG("device opened"); - if (ibv_query_device(attr_ctx, &device_attr)) - goto error; - INFO("%u port(s) detected", device_attr.phys_port_cnt); - - for (i = 0; i < device_attr.phys_port_cnt; i++) { - uint32_t port = i + 1; /* ports are indexed from one */ - uint32_t test = (1 << i); - struct ibv_context *ctx = NULL; - struct ibv_port_attr port_attr; - struct ibv_pd *pd = NULL; - struct priv *priv = NULL; - struct rte_eth_dev *eth_dev; - struct ibv_exp_device_attr exp_device_attr; - struct ether_addr mac; - uint16_t num_vfs = 0; - - exp_device_attr.comp_mask = - IBV_EXP_DEVICE_ATTR_EXP_CAP_FLAGS | - IBV_EXP_DEVICE_ATTR_RX_HASH | - IBV_EXP_DEVICE_ATTR_VLAN_OFFLOADS | - IBV_EXP_DEVICE_ATTR_RX_PAD_END_ALIGN | - 0; - - DEBUG("using port %u (%08" PRIx32 ")", port, test); - - ctx = ibv_open_device(ibv_dev); - if (ctx == NULL) - goto port_error; - - /* Check port status. */ - err = ibv_query_port(ctx, port, &port_attr); - if (err) { - ERROR("port query failed: %s", strerror(err)); - goto port_error; - } - - if (port_attr.link_layer != IBV_LINK_LAYER_ETHERNET) { - ERROR("port %d is not configured in Ethernet mode", - port); - goto port_error; - } - - if (port_attr.state != IBV_PORT_ACTIVE) - DEBUG("port %d is not active: \"%s\" (%d)", - port, ibv_port_state_str(port_attr.state), - port_attr.state); - - /* Allocate protection domain. */ - pd = ibv_alloc_pd(ctx); - if (pd == NULL) { - ERROR("PD allocation failure"); - err = ENOMEM; - goto port_error; - } - - mlx5_dev[idx].ports |= test; - - /* from rte_ethdev.c */ - priv = rte_zmalloc("ethdev private structure", - sizeof(*priv), - RTE_CACHE_LINE_SIZE); - if (priv == NULL) { - ERROR("priv allocation failure"); - err = ENOMEM; - goto port_error; - } - - priv->ctx = ctx; - priv->device_attr = device_attr; - priv->port = port; - priv->pd = pd; - priv->mtu = ETHER_MTU; - priv->mps = mps; /* Enable MPW by default if supported. */ - priv->cqe_comp = 1; /* Enable compression by default. */ - err = mlx5_args(priv, pci_dev->device.devargs); - if (err) { - ERROR("failed to process device arguments: %s", - strerror(err)); - goto port_error; - } - if (ibv_exp_query_device(ctx, &exp_device_attr)) { - ERROR("ibv_exp_query_device() failed"); - goto port_error; - } - - priv->hw_csum = - ((exp_device_attr.exp_device_cap_flags & - IBV_EXP_DEVICE_RX_CSUM_TCP_UDP_PKT) && - (exp_device_attr.exp_device_cap_flags & - IBV_EXP_DEVICE_RX_CSUM_IP_PKT)); - DEBUG("checksum offloading is %ssupported", - (priv->hw_csum ? "" : "not ")); - - priv->hw_csum_l2tun = !!(exp_device_attr.exp_device_cap_flags & - IBV_EXP_DEVICE_VXLAN_SUPPORT); - DEBUG("L2 tunnel checksum offloads are %ssupported", - (priv->hw_csum_l2tun ? "" : "not ")); - - priv->ind_table_max_size = exp_device_attr.rx_hash_caps.max_rwq_indirection_table_size; - /* Remove this check once DPDK supports larger/variable - * indirection tables. */ - if (priv->ind_table_max_size > (unsigned int)RSS_INDIRECTION_TABLE_SIZE) - priv->ind_table_max_size = RSS_INDIRECTION_TABLE_SIZE; - DEBUG("maximum RX indirection table size is %u", - priv->ind_table_max_size); - priv->hw_vlan_strip = !!(exp_device_attr.wq_vlan_offloads_cap & - IBV_EXP_RECEIVE_WQ_CVLAN_STRIP); - DEBUG("VLAN stripping is %ssupported", - (priv->hw_vlan_strip ? "" : "not ")); - - priv->hw_fcs_strip = !!(exp_device_attr.exp_device_cap_flags & - IBV_EXP_DEVICE_SCATTER_FCS); - DEBUG("FCS stripping configuration is %ssupported", - (priv->hw_fcs_strip ? "" : "not ")); - - priv->hw_padding = !!exp_device_attr.rx_pad_end_addr_align; - DEBUG("hardware RX end alignment padding is %ssupported", - (priv->hw_padding ? "" : "not ")); - - priv_get_num_vfs(priv, &num_vfs); - priv->sriov = (num_vfs || sriov); - if (priv->mps && !mps) { - ERROR("multi-packet send not supported on this device" - " (" MLX5_TXQ_MPW_EN ")"); - err = ENOTSUP; - goto port_error; - } - /* Allocate and register default RSS hash keys. */ - priv->rss_conf = rte_calloc(__func__, hash_rxq_init_n, - sizeof((*priv->rss_conf)[0]), 0); - if (priv->rss_conf == NULL) { - err = ENOMEM; - goto port_error; - } - err = rss_hash_rss_conf_new_key(priv, - rss_hash_default_key, - rss_hash_default_key_len, - ETH_RSS_PROTO_MASK); - if (err) - goto port_error; - /* Configure the first MAC address by default. */ - if (priv_get_mac(priv, &mac.addr_bytes)) { - ERROR("cannot get MAC address, is mlx5_en loaded?" - " (errno: %s)", strerror(errno)); - goto port_error; - } - INFO("port %u MAC address is %02x:%02x:%02x:%02x:%02x:%02x", - priv->port, - mac.addr_bytes[0], mac.addr_bytes[1], - mac.addr_bytes[2], mac.addr_bytes[3], - mac.addr_bytes[4], mac.addr_bytes[5]); - /* Register MAC address. */ - claim_zero(priv_mac_addr_add(priv, 0, - (const uint8_t (*)[ETHER_ADDR_LEN]) - mac.addr_bytes)); - /* Initialize FD filters list. */ - err = fdir_init_filters_list(priv); - if (err) - goto port_error; -#ifndef NDEBUG - { - char ifname[IF_NAMESIZE]; - - if (priv_get_ifname(priv, &ifname) == 0) - DEBUG("port %u ifname is \"%s\"", - priv->port, ifname); - else - DEBUG("port %u ifname is unknown", priv->port); - } -#endif - /* Get actual MTU if possible. */ - priv_get_mtu(priv, &priv->mtu); - DEBUG("port %u MTU is %u", priv->port, priv->mtu); - - /* from rte_ethdev.c */ - { - char name[RTE_ETH_NAME_MAX_LEN]; - - snprintf(name, sizeof(name), "%s port %u", - ibv_get_device_name(ibv_dev), port); - eth_dev = rte_eth_dev_allocate(name); - } - if (eth_dev == NULL) { - ERROR("can not allocate rte ethdev"); - err = ENOMEM; - goto port_error; - } - - /* Secondary processes have to use local storage for their - * private data as well as a copy of eth_dev->data, but this - * pointer must not be modified before burst functions are - * actually called. */ - if (mlx5_is_secondary()) { - struct mlx5_secondary_data *sd = - &mlx5_secondary_data[eth_dev->data->port_id]; - sd->primary_priv = eth_dev->data->dev_private; - if (sd->primary_priv == NULL) { - ERROR("no private data for port %u", - eth_dev->data->port_id); - err = EINVAL; - goto port_error; - } - sd->shared_dev_data = eth_dev->data; - rte_spinlock_init(&sd->lock); - memcpy(sd->data.name, sd->shared_dev_data->name, - sizeof(sd->data.name)); - sd->data.dev_private = priv; - sd->data.rx_mbuf_alloc_failed = 0; - sd->data.mtu = ETHER_MTU; - sd->data.port_id = sd->shared_dev_data->port_id; - sd->data.mac_addrs = priv->mac; - eth_dev->tx_pkt_burst = mlx5_tx_burst_secondary_setup; - eth_dev->rx_pkt_burst = mlx5_rx_burst_secondary_setup; - } else { - eth_dev->data->dev_private = priv; - eth_dev->data->rx_mbuf_alloc_failed = 0; - eth_dev->data->mtu = ETHER_MTU; - eth_dev->data->mac_addrs = priv->mac; - } - - eth_dev->pci_dev = pci_dev; - rte_eth_copy_pci_info(eth_dev, pci_dev); - eth_dev->driver = &mlx5_driver; - priv->dev = eth_dev; - eth_dev->dev_ops = &mlx5_dev_ops; - - TAILQ_INIT(ð_dev->link_intr_cbs); - - /* Bring Ethernet device up. */ - DEBUG("forcing Ethernet interface up"); - priv_set_flags(priv, ~IFF_UP, IFF_UP); - mlx5_link_update_unlocked(priv->dev, 1); - continue; - -port_error: - if (priv) { - rte_free(priv->rss_conf); - rte_free(priv); - } - if (pd) - claim_zero(ibv_dealloc_pd(pd)); - if (ctx) - claim_zero(ibv_close_device(ctx)); - break; - } - - /* - * XXX if something went wrong in the loop above, there is a resource - * leak (ctx, pd, priv, dpdk ethdev) but we can do nothing about it as - * long as the dpdk does not provide a way to deallocate a ethdev and a - * way to enumerate the registered ethdevs to free the previous ones. - */ - - /* no port found, complain */ - if (!mlx5_dev[idx].ports) { - err = ENODEV; - goto error; - } - -error: - if (attr_ctx) - claim_zero(ibv_close_device(attr_ctx)); - if (list) - ibv_free_device_list(list); - assert(err >= 0); - return -err; -} - -static const struct rte_pci_id mlx5_pci_id_map[] = { - { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX4) + PCI_DEVICE_ID_MELLANOX_CONNECTX5) }, { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX4VF) + PCI_DEVICE_ID_MELLANOX_CONNECTX5VF) }, { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX4LX) + PCI_DEVICE_ID_MELLANOX_CONNECTX5EX) }, { RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, - PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF) + PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF) }, { .vendor_id = 0 @@ -1292,9 +787,10 @@ rte_mlx5_pmd_init(void) * using this PMD, which is not supported in forked processes. */ setenv("RDMAV_HUGEPAGES_SAFE", "1", 1); + ibv_fork_init(); rte_eal_pci_register(&mlx5_driver.pci_drv); } RTE_PMD_EXPORT_NAME(net_mlx5, __COUNTER__); RTE_PMD_REGISTER_PCI_TABLE(net_mlx5, mlx5_pci_id_map); -#endif +RTE_PMD_REGISTER_KMOD_DEP(net_mlx5, "* ib_uverbs & mlx5_core & mlx5_ib"); diff --git a/src/dpdk/drivers/net/mlx5/mlx5.h b/src/dpdk/drivers/net/mlx5/mlx5.h index 83b29e18..879da5ef 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5.h +++ b/src/dpdk/drivers/net/mlx5/mlx5.h @@ -59,6 +59,7 @@ #include <rte_spinlock.h> #include <rte_interrupts.h> #include <rte_errno.h> +#include <rte_flow.h> #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif @@ -82,36 +83,20 @@ enum { PCI_DEVICE_ID_MELLANOX_CONNECTX4VF = 0x1014, PCI_DEVICE_ID_MELLANOX_CONNECTX4LX = 0x1015, PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF = 0x1016, + PCI_DEVICE_ID_MELLANOX_CONNECTX5 = 0x1017, + PCI_DEVICE_ID_MELLANOX_CONNECTX5VF = 0x1018, + PCI_DEVICE_ID_MELLANOX_CONNECTX5EX = 0x1019, + PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a, }; -struct mlx5_stats_priv { - - struct rte_eth_stats m_shadow; - uint32_t n_stats; /* number of counters */ - - void * et_stats ;/* point to ethtool counter struct ethtool_stats*/ - - /* index into ethtool */ - uint16_t inx_rx_vport_unicast_bytes; - uint16_t inx_rx_vport_multicast_bytes; - uint16_t inx_rx_vport_broadcast_bytes; - uint16_t inx_rx_vport_unicast_packets; - uint16_t inx_rx_vport_multicast_packets; - uint16_t inx_rx_vport_broadcast_packets; - uint16_t inx_tx_vport_unicast_bytes; - uint16_t inx_tx_vport_multicast_bytes; - uint16_t inx_tx_vport_broadcast_bytes; - uint16_t inx_tx_vport_unicast_packets; - uint16_t inx_tx_vport_multicast_packets; - uint16_t inx_tx_vport_broadcast_packets; - uint16_t inx_rx_wqe_err; - uint16_t inx_rx_crc_errors_phy; - uint16_t inx_rx_in_range_len_errors_phy; - uint16_t inx_rx_symbol_err_phy; - uint16_t inx_tx_errors_phy; +struct mlx5_xstats_ctrl { + /* Number of device stats. */ + uint16_t stats_n; + /* Index in the device counters table. */ + uint16_t dev_table_idx[MLX5_MAX_XSTATS]; + uint64_t base[MLX5_MAX_XSTATS]; }; - struct priv { struct rte_eth_dev *dev; /* Ethernet device. */ struct ibv_context *ctx; /* Verbs context. */ @@ -163,9 +148,10 @@ struct priv { unsigned int reta_idx_n; /* RETA index size. */ struct fdir_filter_list *fdir_filter_list; /* Flow director rules. */ struct fdir_queue *fdir_drop_queue; /* Flow director drop queue. */ + LIST_HEAD(mlx5_flows, rte_flow) flows; /* RTE Flow rules. */ uint32_t link_speed_capa; /* Link speed capabilities. */ + struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */ rte_spinlock_t lock; /* Lock for control functions. */ - struct mlx5_stats_priv m_stats; }; /* Local storage for secondary process data. */ @@ -217,7 +203,6 @@ int priv_set_flags(struct priv *, unsigned int, unsigned int); int mlx5_dev_configure(struct rte_eth_dev *); void mlx5_dev_infos_get(struct rte_eth_dev *, struct rte_eth_dev_info *); const uint32_t *mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev); -int mlx5_link_update_unlocked(struct rte_eth_dev *, int); int mlx5_link_update(struct rte_eth_dev *, int); int mlx5_dev_set_mtu(struct rte_eth_dev *, uint16_t); int mlx5_dev_get_flow_ctrl(struct rte_eth_dev *, struct rte_eth_fc_conf *); @@ -273,9 +258,14 @@ void mlx5_allmulticast_disable(struct rte_eth_dev *); /* mlx5_stats.c */ +void priv_xstats_init(struct priv *); void mlx5_stats_get(struct rte_eth_dev *, struct rte_eth_stats *); void mlx5_stats_reset(struct rte_eth_dev *); -void mlx5_stats_free(struct rte_eth_dev *dev); +int mlx5_xstats_get(struct rte_eth_dev *, + struct rte_eth_xstat *, unsigned int); +void mlx5_xstats_reset(struct rte_eth_dev *); +int mlx5_xstats_get_names(struct rte_eth_dev *, + struct rte_eth_xstat_name *, unsigned int); /* mlx5_vlan.c */ @@ -298,4 +288,21 @@ void priv_fdir_enable(struct priv *); int mlx5_dev_filter_ctrl(struct rte_eth_dev *, enum rte_filter_type, enum rte_filter_op, void *); +/* mlx5_flow.c */ + +int mlx5_flow_validate(struct rte_eth_dev *, const struct rte_flow_attr *, + const struct rte_flow_item [], + const struct rte_flow_action [], + struct rte_flow_error *); +struct rte_flow *mlx5_flow_create(struct rte_eth_dev *, + const struct rte_flow_attr *, + const struct rte_flow_item [], + const struct rte_flow_action [], + struct rte_flow_error *); +int mlx5_flow_destroy(struct rte_eth_dev *, struct rte_flow *, + struct rte_flow_error *); +int mlx5_flow_flush(struct rte_eth_dev *, struct rte_flow_error *); +int priv_flow_start(struct priv *); +void priv_flow_stop(struct priv *); + #endif /* RTE_PMD_MLX5_H_ */ diff --git a/src/dpdk/drivers/net/mlx5/mlx5_defs.h b/src/dpdk/drivers/net/mlx5/mlx5_defs.h index 30adfebb..e91d2454 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_defs.h +++ b/src/dpdk/drivers/net/mlx5/mlx5_defs.h @@ -54,9 +54,6 @@ */ #define MLX5_TX_COMP_THRESH 32 -/* RSS Indirection table size. */ -#define RSS_INDIRECTION_TABLE_SIZE 256 - /* * Maximum number of cached Memory Pools (MPs) per TX queue. Each RTE MP * from which buffers are to be transmitted will have to be mapped by this @@ -79,41 +76,7 @@ /* Alarm timeout. */ #define MLX5_ALARM_TIMEOUT_US 100000 - -//#ifdef TREX_PATCH_DPDK PATH for DPDK16.11 should be removed - -/** - * Mask of bits used to determine the status of RX IP checksum. - * - PKT_RX_IP_CKSUM_UNKNOWN: no information about the RX IP checksum - * - PKT_RX_IP_CKSUM_BAD: the IP checksum in the packet is wrong - * - PKT_RX_IP_CKSUM_GOOD: the IP checksum in the packet is valid - * - PKT_RX_IP_CKSUM_NONE: the IP checksum is not correct in the packet - * data, but the integrity of the IP header is verified. - */ -#define PKT_RX_IP_CKSUM_MASK ((1ULL << 4) | (1ULL << 7)) - -#define PKT_RX_IP_CKSUM_UNKNOWN 0 -#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) -#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7) -#define PKT_RX_IP_CKSUM_NONE ((1ULL << 4) | (1ULL << 7)) - -/** - * Mask of bits used to determine the status of RX L4 checksum. - * - PKT_RX_L4_CKSUM_UNKNOWN: no information about the RX L4 checksum - * - PKT_RX_L4_CKSUM_BAD: the L4 checksum in the packet is wrong - * - PKT_RX_L4_CKSUM_GOOD: the L4 checksum in the packet is valid - * - PKT_RX_L4_CKSUM_NONE: the L4 checksum is not correct in the packet - * data, but the integrity of the L4 data is verified. - */ -#define PKT_RX_L4_CKSUM_MASK ((1ULL << 3) | (1ULL << 8)) - -#define PKT_RX_L4_CKSUM_UNKNOWN 0 -#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) -#define PKT_RX_L4_CKSUM_GOOD (1ULL << 8) -#define PKT_RX_L4_CKSUM_NONE ((1ULL << 3) | (1ULL << 8)) - - -//#endif - +/* Maximum number of extended statistics counters. */ +#define MLX5_MAX_XSTATS 32 #endif /* RTE_PMD_MLX5_DEFS_H_ */ diff --git a/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c b/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c index 85b81360..2145965f 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_ethdev.c @@ -43,9 +43,11 @@ #include <net/if.h> #include <sys/ioctl.h> #include <sys/socket.h> +#include <sys/utsname.h> #include <netinet/in.h> #include <linux/ethtool.h> #include <linux/sockios.h> +#include <linux/version.h> #include <fcntl.h> /* DPDK headers don't like -pedantic. */ @@ -67,6 +69,57 @@ #include "mlx5_rxtx.h" #include "mlx5_utils.h" +/* Add defines in case the running kernel is not the same as user headers. */ +#ifndef ETHTOOL_GLINKSETTINGS +struct ethtool_link_settings { + uint32_t cmd; + uint32_t speed; + uint8_t duplex; + uint8_t port; + uint8_t phy_address; + uint8_t autoneg; + uint8_t mdio_support; + uint8_t eth_to_mdix; + uint8_t eth_tp_mdix_ctrl; + int8_t link_mode_masks_nwords; + uint32_t reserved[8]; + uint32_t link_mode_masks[]; +}; + +#define ETHTOOL_GLINKSETTINGS 0x0000004c +#define ETHTOOL_LINK_MODE_1000baseT_Full_BIT 5 +#define ETHTOOL_LINK_MODE_Autoneg_BIT 6 +#define ETHTOOL_LINK_MODE_1000baseKX_Full_BIT 17 +#define ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT 18 +#define ETHTOOL_LINK_MODE_10000baseKR_Full_BIT 19 +#define ETHTOOL_LINK_MODE_10000baseR_FEC_BIT 20 +#define ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT 21 +#define ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT 22 +#define ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT 23 +#define ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT 24 +#define ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT 25 +#define ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT 26 +#define ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT 27 +#define ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT 28 +#define ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT 29 +#define ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT 30 +#endif +#ifndef HAVE_ETHTOOL_LINK_MODE_25G +#define ETHTOOL_LINK_MODE_25000baseCR_Full_BIT 31 +#define ETHTOOL_LINK_MODE_25000baseKR_Full_BIT 32 +#define ETHTOOL_LINK_MODE_25000baseSR_Full_BIT 33 +#endif +#ifndef HAVE_ETHTOOL_LINK_MODE_50G +#define ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT 34 +#define ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT 35 +#endif +#ifndef HAVE_ETHTOOL_LINK_MODE_100G +#define ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT 36 +#define ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT 37 +#define ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT 38 +#define ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT 39 +#endif + /** * Return private structure associated with an Ethernet device. * @@ -562,6 +615,8 @@ mlx5_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *info) unsigned int max; char ifname[IF_NAMESIZE]; + info->pci_dev = RTE_DEV_TO_PCI(dev->device); + priv_lock(priv); /* FIXME: we should ask the device for these values. */ info->min_rx_bufsize = 32; @@ -626,7 +681,7 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev) } /** - * Retrieve physical link information (unlocked version using legacy ioctl). + * DPDK callback to retrieve physical link information. * * @param dev * Pointer to Ethernet device structure. @@ -644,6 +699,8 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) struct rte_eth_link dev_link; int link_speed = 0; + /* priv_lock() is not taken to allow concurrent calls. */ + (void)wait_to_complete; if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) { WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno)); @@ -690,8 +747,7 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) } /** - * Retrieve physical link information (unlocked version using new ioctl from - * Linux 4.5). + * Retrieve physical link information (unlocked version using new ioctl). * * @param dev * Pointer to Ethernet device structure. @@ -701,7 +757,6 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete) static int mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) { -#ifdef ETHTOOL_GLINKSETTINGS struct priv *priv = mlx5_get_priv(dev); struct ethtool_link_settings edata = { .cmd = ETHTOOL_GLINKSETTINGS, @@ -728,7 +783,6 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) sc = edata.link_mode_masks[0] | ((uint64_t)edata.link_mode_masks[1] << 32); priv->link_speed_capa = 0; - /* Link speeds available in kernel v4.5. */ if (sc & ETHTOOL_LINK_MODE_Autoneg_BIT) priv->link_speed_capa |= ETH_LINK_SPEED_AUTONEG; if (sc & (ETHTOOL_LINK_MODE_1000baseT_Full_BIT | @@ -751,25 +805,18 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT | ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT)) priv->link_speed_capa |= ETH_LINK_SPEED_56G; - /* Link speeds available in kernel v4.6. */ -#ifdef HAVE_ETHTOOL_LINK_MODE_25G if (sc & (ETHTOOL_LINK_MODE_25000baseCR_Full_BIT | ETHTOOL_LINK_MODE_25000baseKR_Full_BIT | ETHTOOL_LINK_MODE_25000baseSR_Full_BIT)) priv->link_speed_capa |= ETH_LINK_SPEED_25G; -#endif -#ifdef HAVE_ETHTOOL_LINK_MODE_50G if (sc & (ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT | ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT)) priv->link_speed_capa |= ETH_LINK_SPEED_50G; -#endif -#ifdef HAVE_ETHTOOL_LINK_MODE_100G if (sc & (ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT | ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT | ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT | ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT)) priv->link_speed_capa |= ETH_LINK_SPEED_100G; -#endif dev_link.link_duplex = ((edata.duplex == DUPLEX_HALF) ? ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX); dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds & @@ -779,34 +826,11 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete) dev->data->dev_link = dev_link; return 0; } -#else - (void)dev; - (void)wait_to_complete; -#endif /* Link status is still the same. */ return -1; } /** - * DPDK callback to retrieve physical link information (unlocked version). - * - * @param dev - * Pointer to Ethernet device structure. - * @param wait_to_complete - * Wait for request completion (ignored). - */ -int -mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) -{ - int ret; - - ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete); - if (ret < 0) - ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete); - return ret; -} - -/** * DPDK callback to retrieve physical link information. * * @param dev @@ -817,13 +841,15 @@ mlx5_link_update_unlocked(struct rte_eth_dev *dev, int wait_to_complete) int mlx5_link_update(struct rte_eth_dev *dev, int wait_to_complete) { - struct priv *priv = mlx5_get_priv(dev); - int ret; - - priv_lock(priv); - ret = mlx5_link_update_unlocked(dev, wait_to_complete); - priv_unlock(priv); - return ret; + struct utsname utsname; + int ver[3]; + + if (uname(&utsname) == -1 || + sscanf(utsname.release, "%d.%d.%d", + &ver[0], &ver[1], &ver[2]) != 3 || + KERNEL_VERSION(ver[0], ver[1], ver[2]) < KERNEL_VERSION(4, 9, 0)) + return mlx5_link_update_unlocked_gset(dev, wait_to_complete); + return mlx5_link_update_unlocked_gs(dev, wait_to_complete); } /** @@ -1141,7 +1167,7 @@ static int priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) { struct ibv_async_event event; - int port_change = 0; + struct rte_eth_link *link = &dev->data->dev_link; int ret = 0; /* Read all message and acknowledge them. */ @@ -1149,29 +1175,24 @@ priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev) if (ibv_get_async_event(priv->ctx, &event)) break; - if (event.event_type == IBV_EVENT_PORT_ACTIVE || - event.event_type == IBV_EVENT_PORT_ERR) - port_change = 1; - else + if (event.event_type != IBV_EVENT_PORT_ACTIVE && + event.event_type != IBV_EVENT_PORT_ERR) DEBUG("event type %d on port %d not handled", event.event_type, event.element.port_num); ibv_ack_async_event(&event); } - - if (port_change ^ priv->pending_alarm) { - struct rte_eth_link *link = &dev->data->dev_link; - - priv->pending_alarm = 0; - mlx5_link_update_unlocked(dev, 0); - if (((link->link_speed == 0) && link->link_status) || - ((link->link_speed != 0) && !link->link_status)) { + mlx5_link_update(dev, 0); + if (((link->link_speed == 0) && link->link_status) || + ((link->link_speed != 0) && !link->link_status)) { + if (!priv->pending_alarm) { /* Inconsistent status, check again later. */ priv->pending_alarm = 1; rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US, mlx5_dev_link_status_handler, dev); - } else - ret = 1; + } + } else { + ret = 1; } return ret; } @@ -1191,10 +1212,11 @@ mlx5_dev_link_status_handler(void *arg) priv_lock(priv); assert(priv->pending_alarm == 1); + priv->pending_alarm = 0; ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); - //if (ret) - // _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); + if (ret) + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } /** @@ -1216,8 +1238,8 @@ mlx5_dev_interrupt_handler(struct rte_intr_handle *intr_handle, void *cb_arg) priv_lock(priv); ret = priv_dev_link_status_handler(priv, dev); priv_unlock(priv); - //if (ret) - // _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); + if (ret) + _rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_INTR_LSC, NULL); } /** @@ -1515,14 +1537,11 @@ void priv_select_tx_function(struct priv *priv) { priv->dev->tx_pkt_burst = mlx5_tx_burst; - /* Display warning for unsupported configurations. */ - if (priv->sriov && priv->mps) - WARN("multi-packet send WQE cannot be used on a SR-IOV setup"); /* Select appropriate TX function. */ - if ((priv->sriov == 0) && priv->mps && priv->txq_inline) { + if (priv->mps && priv->txq_inline) { priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline; DEBUG("selected MPW inline TX function"); - } else if ((priv->sriov == 0) && priv->mps) { + } else if (priv->mps) { priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw; DEBUG("selected MPW TX function"); } diff --git a/src/dpdk/drivers/net/mlx5/mlx5_fdir.c b/src/dpdk/drivers/net/mlx5/mlx5_fdir.c index f03e95ef..f80c58b4 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_fdir.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_fdir.c @@ -37,14 +37,12 @@ #include <string.h> #include <errno.h> -#define TREX_PATCH - /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC #pragma GCC diagnostic ignored "-Wpedantic" #endif -#include <infiniband/verbs_exp.h> +#include <infiniband/verbs.h> #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif @@ -57,6 +55,8 @@ #include <rte_malloc.h> #include <rte_ethdev.h> #include <rte_common.h> +#include <rte_flow.h> +#include <rte_flow_driver.h> #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif @@ -69,9 +69,6 @@ struct fdir_flow_desc { uint16_t src_port; uint32_t src_ip[4]; uint32_t dst_ip[4]; - uint8_t tos; - uint8_t ip_id; - uint8_t proto; uint8_t mac[6]; uint16_t vlan_tag; enum hash_rxq_type type; @@ -107,7 +104,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, /* Set VLAN ID. */ desc->vlan_tag = fdir_filter->input.flow_ext.vlan_tci; -#ifndef TREX_PATCH /* Set MAC address. */ if (mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) { rte_memcpy(desc->mac, @@ -117,14 +113,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, desc->type = HASH_RXQ_ETH; return; } -#else - if (fdir_filter->input.flow.ip4_flow.ip_id == 2) { - desc->type = HASH_RXQ_ETH; - desc->ip_id = fdir_filter->input.flow.ip4_flow.ip_id; - return; - } -#endif - /* Set mode */ switch (fdir_filter->input.flow_type) { @@ -159,9 +147,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER: desc->src_ip[0] = fdir_filter->input.flow.ip4_flow.src_ip; desc->dst_ip[0] = fdir_filter->input.flow.ip4_flow.dst_ip; - desc->tos = fdir_filter->input.flow.ip4_flow.ttl; /* TTL is mapped to TOS TREX_PATCH */ - desc->ip_id = fdir_filter->input.flow.ip4_flow.ip_id; - desc->proto = fdir_filter->input.flow.ip4_flow.proto; break; case RTE_ETH_FLOW_NONFRAG_IPV6_UDP: case RTE_ETH_FLOW_NONFRAG_IPV6_TCP: @@ -175,9 +160,6 @@ fdir_filter_to_flow_desc(const struct rte_eth_fdir_filter *fdir_filter, rte_memcpy(desc->dst_ip, fdir_filter->input.flow.ipv6_flow.dst_ip, sizeof(desc->dst_ip)); - desc->tos = (uint8_t)fdir_filter->input.flow.ipv6_flow.hop_limits; /* TTL is mapped to TOS - TREX_PATCH */ - desc->ip_id = (uint8_t)fdir_filter->input.flow.ipv6_flow.flow_label; - desc->proto = fdir_filter->input.flow.ipv6_flow.proto; break; default: break; @@ -218,11 +200,6 @@ priv_fdir_overlap(const struct priv *priv, ((desc1->dst_port & mask->dst_port_mask) != (desc2->dst_port & mask->dst_port_mask))) return 0; - if ( (desc1->tos != desc2->tos) || - (desc1->ip_id != desc2->ip_id) || - (desc1->proto != desc2->proto) ) - return 0; - switch (desc1->type) { case HASH_RXQ_IPV4: case HASH_RXQ_UDPV4: @@ -277,8 +254,8 @@ priv_fdir_flow_add(struct priv *priv, struct ibv_exp_flow_attr *attr = &data->attr; uintptr_t spec_offset = (uintptr_t)&data->spec; struct ibv_exp_flow_spec_eth *spec_eth; - struct ibv_exp_flow_spec_ipv4_ext *spec_ipv4; - struct ibv_exp_flow_spec_ipv6_ext *spec_ipv6; + struct ibv_exp_flow_spec_ipv4 *spec_ipv4; + struct ibv_exp_flow_spec_ipv6 *spec_ipv6; struct ibv_exp_flow_spec_tcp_udp *spec_tcp_udp; struct mlx5_fdir_filter *iter_fdir_filter; unsigned int i; @@ -290,10 +267,8 @@ priv_fdir_flow_add(struct priv *priv, (iter_fdir_filter->flow != NULL) && (priv_fdir_overlap(priv, &mlx5_fdir_filter->desc, - &iter_fdir_filter->desc))){ - ERROR("overlap rules, please check your rules"); - return EEXIST; - } + &iter_fdir_filter->desc))) + return EEXIST; /* * No padding must be inserted by the compiler between attr and spec. @@ -316,7 +291,6 @@ priv_fdir_flow_add(struct priv *priv, /* Update priority */ attr->priority = 2; -#ifndef TREX_PATCH if (fdir_mode == RTE_FDIR_MODE_PERFECT_MAC_VLAN) { /* MAC Address */ for (i = 0; i != RTE_DIM(spec_eth->mask.dst_mac); ++i) { @@ -326,14 +300,6 @@ priv_fdir_flow_add(struct priv *priv, } goto create_flow; } -#else - // empty mask means "match everything". This rule will match all packets, no matter what is the ether type - if (desc->ip_id == 2) { - spec_eth->val.ether_type = 0x0806; - spec_eth->mask.ether_type = 0x0000; - goto create_flow; - } -#endif switch (desc->type) { case HASH_RXQ_IPV4: @@ -342,10 +308,10 @@ priv_fdir_flow_add(struct priv *priv, spec_offset += spec_eth->size; /* Set IP spec */ - spec_ipv4 = (struct ibv_exp_flow_spec_ipv4_ext *)spec_offset; + spec_ipv4 = (struct ibv_exp_flow_spec_ipv4 *)spec_offset; /* The second specification must be IP. */ - assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4_EXT); + assert(spec_ipv4->type == IBV_EXP_FLOW_SPEC_IPV4); assert(spec_ipv4->size == sizeof(*spec_ipv4)); spec_ipv4->val.src_ip = @@ -355,21 +321,6 @@ priv_fdir_flow_add(struct priv *priv, spec_ipv4->mask.src_ip = mask->ipv4_mask.src_ip; spec_ipv4->mask.dst_ip = mask->ipv4_mask.dst_ip; - /* PROTO */ - spec_ipv4->val.proto = desc->proto & mask->ipv4_mask.proto; - spec_ipv4->mask.proto = mask->ipv4_mask.proto; - -#ifdef TREX_PATCH - /* TOS */ - if (desc->ip_id == 1) { - spec_ipv4->mask.tos = 0x1; - spec_ipv4->val.tos = 0x1; - } else { - spec_ipv4->mask.tos = 0x0; - spec_ipv4->val.tos = 0x0; - } -#endif - /* Update priority */ attr->priority = 1; @@ -384,10 +335,10 @@ priv_fdir_flow_add(struct priv *priv, spec_offset += spec_eth->size; /* Set IP spec */ - spec_ipv6 = (struct ibv_exp_flow_spec_ipv6_ext *)spec_offset; + spec_ipv6 = (struct ibv_exp_flow_spec_ipv6 *)spec_offset; /* The second specification must be IP. */ - assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6_EXT); + assert(spec_ipv6->type == IBV_EXP_FLOW_SPEC_IPV6); assert(spec_ipv6->size == sizeof(*spec_ipv6)); for (i = 0; i != RTE_DIM(desc->src_ip); ++i) { @@ -403,20 +354,6 @@ priv_fdir_flow_add(struct priv *priv, mask->ipv6_mask.dst_ip, sizeof(spec_ipv6->mask.dst_ip)); - spec_ipv6->val.next_hdr = desc->proto & mask->ipv6_mask.proto; - spec_ipv6->mask.next_hdr = mask->ipv6_mask.proto; - -#ifdef TREX_PATCH - /* TOS */ - if (desc->ip_id == 1) { - spec_ipv6->mask.traffic_class = 0x1; - spec_ipv6->val.traffic_class = 0x1; - } else { - spec_ipv6->mask.traffic_class = 0; - spec_ipv6->val.traffic_class = 0; - } -#endif - /* Update priority */ attr->priority = 1; @@ -894,10 +831,8 @@ priv_fdir_filter_add(struct priv *priv, /* Duplicate filters are currently unsupported. */ mlx5_fdir_filter = priv_find_filter_in_list(priv, fdir_filter); if (mlx5_fdir_filter != NULL) { -#ifndef TREX_PATCH ERROR("filter already exists"); -#endif - return EEXIST; + return EINVAL; } /* Create new flow director filter. */ @@ -1022,11 +957,9 @@ priv_fdir_filter_delete(struct priv *priv, return 0; } -#ifndef TREX_PATCH ERROR("%p: flow director delete failed, cannot find filter", (void *)priv); -#endif - return ENOENT; + return EINVAL; } /** @@ -1111,6 +1044,14 @@ priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg) return ret; } +static const struct rte_flow_ops mlx5_flow_ops = { + .validate = mlx5_flow_validate, + .create = mlx5_flow_create, + .destroy = mlx5_flow_destroy, + .flush = mlx5_flow_flush, + .query = NULL, +}; + /** * Manage filter operations. * @@ -1136,6 +1077,11 @@ mlx5_dev_filter_ctrl(struct rte_eth_dev *dev, struct priv *priv = dev->data->dev_private; switch (filter_type) { + case RTE_ETH_FILTER_GENERIC: + if (filter_op != RTE_ETH_FILTER_GET) + return -EINVAL; + *(const void **)arg = &mlx5_flow_ops; + return 0; case RTE_ETH_FILTER_FDIR: priv_lock(priv); ret = priv_fdir_ctrl_func(priv, filter_op, arg); diff --git a/src/dpdk/drivers/net/mlx5/mlx5_flow.c b/src/dpdk/drivers/net/mlx5/mlx5_flow.c new file mode 100644 index 00000000..23c1b5ef --- /dev/null +++ b/src/dpdk/drivers/net/mlx5/mlx5_flow.c @@ -0,0 +1,1247 @@ +/*- + * BSD LICENSE + * + * Copyright 2016 6WIND S.A. + * Copyright 2016 Mellanox. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/queue.h> +#include <string.h> + +/* Verbs header. */ +/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ +#ifdef PEDANTIC +#pragma GCC diagnostic ignored "-Wpedantic" +#endif +#include <infiniband/verbs.h> +#ifdef PEDANTIC +#pragma GCC diagnostic error "-Wpedantic" +#endif + +#include <rte_ethdev.h> +#include <rte_flow.h> +#include <rte_flow_driver.h> +#include <rte_malloc.h> + +#include "mlx5.h" +#include "mlx5_prm.h" + +static int +mlx5_flow_create_eth(const struct rte_flow_item *item, + const void *default_mask, + void *data); + +static int +mlx5_flow_create_vlan(const struct rte_flow_item *item, + const void *default_mask, + void *data); + +static int +mlx5_flow_create_ipv4(const struct rte_flow_item *item, + const void *default_mask, + void *data); + +static int +mlx5_flow_create_ipv6(const struct rte_flow_item *item, + const void *default_mask, + void *data); + +static int +mlx5_flow_create_udp(const struct rte_flow_item *item, + const void *default_mask, + void *data); + +static int +mlx5_flow_create_tcp(const struct rte_flow_item *item, + const void *default_mask, + void *data); + +static int +mlx5_flow_create_vxlan(const struct rte_flow_item *item, + const void *default_mask, + void *data); + +struct rte_flow { + LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */ + struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */ + struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */ + struct ibv_qp *qp; /**< Verbs queue pair. */ + struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */ + struct ibv_exp_wq *wq; /**< Verbs work queue. */ + struct ibv_cq *cq; /**< Verbs completion queue. */ + struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */ + uint32_t mark:1; /**< Set if the flow is marked. */ +}; + +/** Static initializer for items. */ +#define ITEMS(...) \ + (const enum rte_flow_item_type []){ \ + __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \ + } + +/** Structure to generate a simple graph of layers supported by the NIC. */ +struct mlx5_flow_items { + /** List of possible actions for these items. */ + const enum rte_flow_action_type *const actions; + /** Bit-masks corresponding to the possibilities for the item. */ + const void *mask; + /** + * Default bit-masks to use when item->mask is not provided. When + * \default_mask is also NULL, the full supported bit-mask (\mask) is + * used instead. + */ + const void *default_mask; + /** Bit-masks size in bytes. */ + const unsigned int mask_sz; + /** + * Conversion function from rte_flow to NIC specific flow. + * + * @param item + * rte_flow item to convert. + * @param default_mask + * Default bit-masks to use when item->mask is not provided. + * @param data + * Internal structure to store the conversion. + * + * @return + * 0 on success, negative value otherwise. + */ + int (*convert)(const struct rte_flow_item *item, + const void *default_mask, + void *data); + /** Size in bytes of the destination structure. */ + const unsigned int dst_sz; + /** List of possible following items. */ + const enum rte_flow_item_type *const items; +}; + +/** Valid action for this PMD. */ +static const enum rte_flow_action_type valid_actions[] = { + RTE_FLOW_ACTION_TYPE_DROP, + RTE_FLOW_ACTION_TYPE_QUEUE, + RTE_FLOW_ACTION_TYPE_MARK, + RTE_FLOW_ACTION_TYPE_END, +}; + +/** Graph of supported items and associated actions. */ +static const struct mlx5_flow_items mlx5_flow_items[] = { + [RTE_FLOW_ITEM_TYPE_END] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH, + RTE_FLOW_ITEM_TYPE_VXLAN), + }, + [RTE_FLOW_ITEM_TYPE_ETH] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN, + RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_IPV6), + .actions = valid_actions, + .mask = &(const struct rte_flow_item_eth){ + .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff", + .src.addr_bytes = "\xff\xff\xff\xff\xff\xff", + }, + .default_mask = &rte_flow_item_eth_mask, + .mask_sz = sizeof(struct rte_flow_item_eth), + .convert = mlx5_flow_create_eth, + .dst_sz = sizeof(struct ibv_exp_flow_spec_eth), + }, + [RTE_FLOW_ITEM_TYPE_VLAN] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4, + RTE_FLOW_ITEM_TYPE_IPV6), + .actions = valid_actions, + .mask = &(const struct rte_flow_item_vlan){ + .tci = -1, + }, + .default_mask = &rte_flow_item_vlan_mask, + .mask_sz = sizeof(struct rte_flow_item_vlan), + .convert = mlx5_flow_create_vlan, + .dst_sz = 0, + }, + [RTE_FLOW_ITEM_TYPE_IPV4] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_TCP), + .actions = valid_actions, + .mask = &(const struct rte_flow_item_ipv4){ + .hdr = { + .src_addr = -1, + .dst_addr = -1, + .type_of_service = -1, + .next_proto_id = -1, + }, + }, + .default_mask = &rte_flow_item_ipv4_mask, + .mask_sz = sizeof(struct rte_flow_item_ipv4), + .convert = mlx5_flow_create_ipv4, + .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext), + }, + [RTE_FLOW_ITEM_TYPE_IPV6] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP, + RTE_FLOW_ITEM_TYPE_TCP), + .actions = valid_actions, + .mask = &(const struct rte_flow_item_ipv6){ + .hdr = { + .src_addr = { + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + }, + .dst_addr = { + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, + }, + }, + }, + .default_mask = &rte_flow_item_ipv6_mask, + .mask_sz = sizeof(struct rte_flow_item_ipv6), + .convert = mlx5_flow_create_ipv6, + .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6), + }, + [RTE_FLOW_ITEM_TYPE_UDP] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN), + .actions = valid_actions, + .mask = &(const struct rte_flow_item_udp){ + .hdr = { + .src_port = -1, + .dst_port = -1, + }, + }, + .default_mask = &rte_flow_item_udp_mask, + .mask_sz = sizeof(struct rte_flow_item_udp), + .convert = mlx5_flow_create_udp, + .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp), + }, + [RTE_FLOW_ITEM_TYPE_TCP] = { + .actions = valid_actions, + .mask = &(const struct rte_flow_item_tcp){ + .hdr = { + .src_port = -1, + .dst_port = -1, + }, + }, + .default_mask = &rte_flow_item_tcp_mask, + .mask_sz = sizeof(struct rte_flow_item_tcp), + .convert = mlx5_flow_create_tcp, + .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp), + }, + [RTE_FLOW_ITEM_TYPE_VXLAN] = { + .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH), + .actions = valid_actions, + .mask = &(const struct rte_flow_item_vxlan){ + .vni = "\xff\xff\xff", + }, + .default_mask = &rte_flow_item_vxlan_mask, + .mask_sz = sizeof(struct rte_flow_item_vxlan), + .convert = mlx5_flow_create_vxlan, + .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel), + }, +}; + +/** Structure to pass to the conversion function. */ +struct mlx5_flow { + struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */ + unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */ + uint32_t inner; /**< Set once VXLAN is encountered. */ +}; + +struct mlx5_flow_action { + uint32_t queue:1; /**< Target is a receive queue. */ + uint32_t drop:1; /**< Target is a drop queue. */ + uint32_t mark:1; /**< Mark is present in the flow. */ + uint32_t queue_id; /**< Identifier of the queue. */ + uint32_t mark_id; /**< Mark identifier. */ +}; + +/** + * Check support for a given item. + * + * @param item[in] + * Item specification. + * @param mask[in] + * Bit-masks covering supported fields to compare with spec, last and mask in + * \item. + * @param size + * Bit-Mask size in bytes. + * + * @return + * 0 on success. + */ +static int +mlx5_flow_item_validate(const struct rte_flow_item *item, + const uint8_t *mask, unsigned int size) +{ + int ret = 0; + + if (!item->spec && (item->mask || item->last)) + return -1; + if (item->spec && !item->mask) { + unsigned int i; + const uint8_t *spec = item->spec; + + for (i = 0; i < size; ++i) + if ((spec[i] | mask[i]) != mask[i]) + return -1; + } + if (item->last && !item->mask) { + unsigned int i; + const uint8_t *spec = item->last; + + for (i = 0; i < size; ++i) + if ((spec[i] | mask[i]) != mask[i]) + return -1; + } + if (item->mask) { + unsigned int i; + const uint8_t *spec = item->mask; + + for (i = 0; i < size; ++i) + if ((spec[i] | mask[i]) != mask[i]) + return -1; + } + if (item->spec && item->last) { + uint8_t spec[size]; + uint8_t last[size]; + const uint8_t *apply = mask; + unsigned int i; + + if (item->mask) + apply = item->mask; + for (i = 0; i < size; ++i) { + spec[i] = ((const uint8_t *)item->spec)[i] & apply[i]; + last[i] = ((const uint8_t *)item->last)[i] & apply[i]; + } + ret = memcmp(spec, last, size); + } + return ret; +} + +/** + * Validate a flow supported by the NIC. + * + * @param priv + * Pointer to private structure. + * @param[in] attr + * Flow rule attributes. + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] error + * Perform verbose error reporting if not NULL. + * @param[in, out] flow + * Flow structure to update. + * + * @return + * 0 on success, a negative errno value otherwise and rte_errno is set. + */ +static int +priv_flow_validate(struct priv *priv, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + struct rte_flow_error *error, + struct mlx5_flow *flow) +{ + const struct mlx5_flow_items *cur_item = mlx5_flow_items; + struct mlx5_flow_action action = { + .queue = 0, + .drop = 0, + .mark = 0, + }; + + (void)priv; + if (attr->group) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_GROUP, + NULL, + "groups are not supported"); + return -rte_errno; + } + if (attr->priority) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, + NULL, + "priorities are not supported"); + return -rte_errno; + } + if (attr->egress) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, + NULL, + "egress is not supported"); + return -rte_errno; + } + if (!attr->ingress) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, + NULL, + "only ingress is supported"); + return -rte_errno; + } + for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) { + const struct mlx5_flow_items *token = NULL; + unsigned int i; + int err; + + if (items->type == RTE_FLOW_ITEM_TYPE_VOID) + continue; + for (i = 0; + cur_item->items && + cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END; + ++i) { + if (cur_item->items[i] == items->type) { + token = &mlx5_flow_items[items->type]; + break; + } + } + if (!token) + goto exit_item_not_supported; + cur_item = token; + err = mlx5_flow_item_validate(items, + (const uint8_t *)cur_item->mask, + cur_item->mask_sz); + if (err) + goto exit_item_not_supported; + if (flow->ibv_attr && cur_item->convert) { + err = cur_item->convert(items, + (cur_item->default_mask ? + cur_item->default_mask : + cur_item->mask), + flow); + if (err) + goto exit_item_not_supported; + } + flow->offset += cur_item->dst_sz; + } + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) { + if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) { + continue; + } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) { + action.drop = 1; + } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) { + const struct rte_flow_action_queue *queue = + (const struct rte_flow_action_queue *) + actions->conf; + + if (!queue || (queue->index > (priv->rxqs_n - 1))) + goto exit_action_not_supported; + action.queue = 1; + } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) { + const struct rte_flow_action_mark *mark = + (const struct rte_flow_action_mark *) + actions->conf; + + if (!mark) { + rte_flow_error_set(error, EINVAL, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, + "mark must be defined"); + return -rte_errno; + } else if (mark->id >= MLX5_FLOW_MARK_MAX) { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, + "mark must be between 0" + " and 16777199"); + return -rte_errno; + } + action.mark = 1; + } else { + goto exit_action_not_supported; + } + } + if (action.mark && !flow->ibv_attr && !action.drop) + flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag); + if (!action.queue && !action.drop) { + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "no valid action"); + return -rte_errno; + } + return 0; +exit_item_not_supported: + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, + items, "item not supported"); + return -rte_errno; +exit_action_not_supported: + rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, + actions, "action not supported"); + return -rte_errno; +} + +/** + * Validate a flow supported by the NIC. + * + * @see rte_flow_validate() + * @see rte_flow_ops + */ +int +mlx5_flow_validate(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct priv *priv = dev->data->dev_private; + int ret; + struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) }; + + priv_lock(priv); + ret = priv_flow_validate(priv, attr, items, actions, error, &flow); + priv_unlock(priv); + return ret; +} + +/** + * Convert Ethernet item to Verbs specification. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + */ +static int +mlx5_flow_create_eth(const struct rte_flow_item *item, + const void *default_mask, + void *data) +{ + const struct rte_flow_item_eth *spec = item->spec; + const struct rte_flow_item_eth *mask = item->mask; + struct mlx5_flow *flow = (struct mlx5_flow *)data; + struct ibv_exp_flow_spec_eth *eth; + const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth); + unsigned int i; + + ++flow->ibv_attr->num_of_specs; + flow->ibv_attr->priority = 2; + eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + *eth = (struct ibv_exp_flow_spec_eth) { + .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH, + .size = eth_size, + }; + if (!spec) + return 0; + if (!mask) + mask = default_mask; + memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN); + memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN); + memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN); + memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN); + /* Remove unwanted bits from values. */ + for (i = 0; i < ETHER_ADDR_LEN; ++i) { + eth->val.dst_mac[i] &= eth->mask.dst_mac[i]; + eth->val.src_mac[i] &= eth->mask.src_mac[i]; + } + return 0; +} + +/** + * Convert VLAN item to Verbs specification. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + */ +static int +mlx5_flow_create_vlan(const struct rte_flow_item *item, + const void *default_mask, + void *data) +{ + const struct rte_flow_item_vlan *spec = item->spec; + const struct rte_flow_item_vlan *mask = item->mask; + struct mlx5_flow *flow = (struct mlx5_flow *)data; + struct ibv_exp_flow_spec_eth *eth; + const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth); + + eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size); + if (!spec) + return 0; + if (!mask) + mask = default_mask; + eth->val.vlan_tag = spec->tci; + eth->mask.vlan_tag = mask->tci; + eth->val.vlan_tag &= eth->mask.vlan_tag; + return 0; +} + +/** + * Convert IPv4 item to Verbs specification. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + */ +static int +mlx5_flow_create_ipv4(const struct rte_flow_item *item, + const void *default_mask, + void *data) +{ + const struct rte_flow_item_ipv4 *spec = item->spec; + const struct rte_flow_item_ipv4 *mask = item->mask; + struct mlx5_flow *flow = (struct mlx5_flow *)data; + struct ibv_exp_flow_spec_ipv4_ext *ipv4; + unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext); + + ++flow->ibv_attr->num_of_specs; + flow->ibv_attr->priority = 1; + ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) { + .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT, + .size = ipv4_size, + }; + if (!spec) + return 0; + if (!mask) + mask = default_mask; + ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){ + .src_ip = spec->hdr.src_addr, + .dst_ip = spec->hdr.dst_addr, + .proto = spec->hdr.next_proto_id, + .tos = spec->hdr.type_of_service, + }; + ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){ + .src_ip = mask->hdr.src_addr, + .dst_ip = mask->hdr.dst_addr, + .proto = mask->hdr.next_proto_id, + .tos = mask->hdr.type_of_service, + }; + /* Remove unwanted bits from values. */ + ipv4->val.src_ip &= ipv4->mask.src_ip; + ipv4->val.dst_ip &= ipv4->mask.dst_ip; + ipv4->val.proto &= ipv4->mask.proto; + ipv4->val.tos &= ipv4->mask.tos; + return 0; +} + +/** + * Convert IPv6 item to Verbs specification. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + */ +static int +mlx5_flow_create_ipv6(const struct rte_flow_item *item, + const void *default_mask, + void *data) +{ + const struct rte_flow_item_ipv6 *spec = item->spec; + const struct rte_flow_item_ipv6 *mask = item->mask; + struct mlx5_flow *flow = (struct mlx5_flow *)data; + struct ibv_exp_flow_spec_ipv6 *ipv6; + unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6); + unsigned int i; + + ++flow->ibv_attr->num_of_specs; + flow->ibv_attr->priority = 1; + ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + *ipv6 = (struct ibv_exp_flow_spec_ipv6) { + .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6, + .size = ipv6_size, + }; + if (!spec) + return 0; + if (!mask) + mask = default_mask; + memcpy(ipv6->val.src_ip, spec->hdr.src_addr, + RTE_DIM(ipv6->val.src_ip)); + memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr, + RTE_DIM(ipv6->val.dst_ip)); + memcpy(ipv6->mask.src_ip, mask->hdr.src_addr, + RTE_DIM(ipv6->mask.src_ip)); + memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr, + RTE_DIM(ipv6->mask.dst_ip)); + /* Remove unwanted bits from values. */ + for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) { + ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i]; + ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i]; + } + return 0; +} + +/** + * Convert UDP item to Verbs specification. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + */ +static int +mlx5_flow_create_udp(const struct rte_flow_item *item, + const void *default_mask, + void *data) +{ + const struct rte_flow_item_udp *spec = item->spec; + const struct rte_flow_item_udp *mask = item->mask; + struct mlx5_flow *flow = (struct mlx5_flow *)data; + struct ibv_exp_flow_spec_tcp_udp *udp; + unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp); + + ++flow->ibv_attr->num_of_specs; + flow->ibv_attr->priority = 0; + udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + *udp = (struct ibv_exp_flow_spec_tcp_udp) { + .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP, + .size = udp_size, + }; + if (!spec) + return 0; + if (!mask) + mask = default_mask; + udp->val.dst_port = spec->hdr.dst_port; + udp->val.src_port = spec->hdr.src_port; + udp->mask.dst_port = mask->hdr.dst_port; + udp->mask.src_port = mask->hdr.src_port; + /* Remove unwanted bits from values. */ + udp->val.src_port &= udp->mask.src_port; + udp->val.dst_port &= udp->mask.dst_port; + return 0; +} + +/** + * Convert TCP item to Verbs specification. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + */ +static int +mlx5_flow_create_tcp(const struct rte_flow_item *item, + const void *default_mask, + void *data) +{ + const struct rte_flow_item_tcp *spec = item->spec; + const struct rte_flow_item_tcp *mask = item->mask; + struct mlx5_flow *flow = (struct mlx5_flow *)data; + struct ibv_exp_flow_spec_tcp_udp *tcp; + unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp); + + ++flow->ibv_attr->num_of_specs; + flow->ibv_attr->priority = 0; + tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + *tcp = (struct ibv_exp_flow_spec_tcp_udp) { + .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP, + .size = tcp_size, + }; + if (!spec) + return 0; + if (!mask) + mask = default_mask; + tcp->val.dst_port = spec->hdr.dst_port; + tcp->val.src_port = spec->hdr.src_port; + tcp->mask.dst_port = mask->hdr.dst_port; + tcp->mask.src_port = mask->hdr.src_port; + /* Remove unwanted bits from values. */ + tcp->val.src_port &= tcp->mask.src_port; + tcp->val.dst_port &= tcp->mask.dst_port; + return 0; +} + +/** + * Convert VXLAN item to Verbs specification. + * + * @param item[in] + * Item specification. + * @param default_mask[in] + * Default bit-masks to use when item->mask is not provided. + * @param data[in, out] + * User structure. + */ +static int +mlx5_flow_create_vxlan(const struct rte_flow_item *item, + const void *default_mask, + void *data) +{ + const struct rte_flow_item_vxlan *spec = item->spec; + const struct rte_flow_item_vxlan *mask = item->mask; + struct mlx5_flow *flow = (struct mlx5_flow *)data; + struct ibv_exp_flow_spec_tunnel *vxlan; + unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel); + union vni { + uint32_t vlan_id; + uint8_t vni[4]; + } id; + + ++flow->ibv_attr->num_of_specs; + flow->ibv_attr->priority = 0; + id.vni[0] = 0; + vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + *vxlan = (struct ibv_exp_flow_spec_tunnel) { + .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL, + .size = size, + }; + flow->inner = IBV_EXP_FLOW_SPEC_INNER; + if (!spec) + return 0; + if (!mask) + mask = default_mask; + memcpy(&id.vni[1], spec->vni, 3); + vxlan->val.tunnel_id = id.vlan_id; + memcpy(&id.vni[1], mask->vni, 3); + vxlan->mask.tunnel_id = id.vlan_id; + /* Remove unwanted bits from values. */ + vxlan->val.tunnel_id &= vxlan->mask.tunnel_id; + return 0; +} + +/** + * Convert mark/flag action to Verbs specification. + * + * @param flow + * Pointer to MLX5 flow structure. + * @param mark_id + * Mark identifier. + */ +static int +mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id) +{ + struct ibv_exp_flow_spec_action_tag *tag; + unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag); + + tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset); + *tag = (struct ibv_exp_flow_spec_action_tag){ + .type = IBV_EXP_FLOW_SPEC_ACTION_TAG, + .size = size, + .tag_id = mlx5_flow_mark_set(mark_id), + }; + ++flow->ibv_attr->num_of_specs; + return 0; +} + +/** + * Complete flow rule creation. + * + * @param priv + * Pointer to private structure. + * @param ibv_attr + * Verbs flow attributes. + * @param action + * Target action structure. + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * A flow if the rule could be created. + */ +static struct rte_flow * +priv_flow_create_action_queue(struct priv *priv, + struct ibv_exp_flow_attr *ibv_attr, + struct mlx5_flow_action *action, + struct rte_flow_error *error) +{ + struct rxq_ctrl *rxq; + struct rte_flow *rte_flow; + + assert(priv->pd); + assert(priv->ctx); + rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0); + if (!rte_flow) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "cannot allocate flow memory"); + return NULL; + } + if (action->drop) { + rte_flow->cq = + ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0, + &(struct ibv_exp_cq_init_attr){ + .comp_mask = 0, + }); + if (!rte_flow->cq) { + rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "cannot allocate CQ"); + goto error; + } + rte_flow->wq = ibv_exp_create_wq(priv->ctx, + &(struct ibv_exp_wq_init_attr){ + .wq_type = IBV_EXP_WQT_RQ, + .max_recv_wr = 1, + .max_recv_sge = 1, + .pd = priv->pd, + .cq = rte_flow->cq, + }); + } else { + rxq = container_of((*priv->rxqs)[action->queue_id], + struct rxq_ctrl, rxq); + rte_flow->rxq = &rxq->rxq; + rxq->rxq.mark |= action->mark; + rte_flow->wq = rxq->wq; + } + rte_flow->mark = action->mark; + rte_flow->ibv_attr = ibv_attr; + rte_flow->ind_table = ibv_exp_create_rwq_ind_table( + priv->ctx, + &(struct ibv_exp_rwq_ind_table_init_attr){ + .pd = priv->pd, + .log_ind_tbl_size = 0, + .ind_tbl = &rte_flow->wq, + .comp_mask = 0, + }); + if (!rte_flow->ind_table) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "cannot allocate indirection table"); + goto error; + } + rte_flow->qp = ibv_exp_create_qp( + priv->ctx, + &(struct ibv_exp_qp_init_attr){ + .qp_type = IBV_QPT_RAW_PACKET, + .comp_mask = + IBV_EXP_QP_INIT_ATTR_PD | + IBV_EXP_QP_INIT_ATTR_PORT | + IBV_EXP_QP_INIT_ATTR_RX_HASH, + .pd = priv->pd, + .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){ + .rx_hash_function = + IBV_EXP_RX_HASH_FUNC_TOEPLITZ, + .rx_hash_key_len = rss_hash_default_key_len, + .rx_hash_key = rss_hash_default_key, + .rx_hash_fields_mask = 0, + .rwq_ind_tbl = rte_flow->ind_table, + }, + .port_num = priv->port, + }); + if (!rte_flow->qp) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "cannot allocate QP"); + goto error; + } + if (!priv->started) + return rte_flow; + rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp, + rte_flow->ibv_attr); + if (!rte_flow->ibv_flow) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "flow rule creation failure"); + goto error; + } + return rte_flow; +error: + assert(rte_flow); + if (rte_flow->qp) + ibv_destroy_qp(rte_flow->qp); + if (rte_flow->ind_table) + ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table); + if (!rte_flow->rxq && rte_flow->wq) + ibv_exp_destroy_wq(rte_flow->wq); + if (!rte_flow->rxq && rte_flow->cq) + ibv_destroy_cq(rte_flow->cq); + rte_free(rte_flow->ibv_attr); + rte_free(rte_flow); + return NULL; +} + +/** + * Convert a flow. + * + * @param priv + * Pointer to private structure. + * @param[in] attr + * Flow rule attributes. + * @param[in] pattern + * Pattern specification (list terminated by the END pattern item). + * @param[in] actions + * Associated actions (list terminated by the END action). + * @param[out] error + * Perform verbose error reporting if not NULL. + * + * @return + * A flow on success, NULL otherwise. + */ +static struct rte_flow * +priv_flow_create(struct priv *priv, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct rte_flow *rte_flow; + struct mlx5_flow_action action; + struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), }; + int err; + + err = priv_flow_validate(priv, attr, items, actions, error, &flow); + if (err) + goto exit; + flow.ibv_attr = rte_malloc(__func__, flow.offset, 0); + flow.offset = sizeof(struct ibv_exp_flow_attr); + if (!flow.ibv_attr) { + rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE, + NULL, "cannot allocate ibv_attr memory"); + goto exit; + } + *flow.ibv_attr = (struct ibv_exp_flow_attr){ + .type = IBV_EXP_FLOW_ATTR_NORMAL, + .size = sizeof(struct ibv_exp_flow_attr), + .priority = attr->priority, + .num_of_specs = 0, + .port = 0, + .flags = 0, + .reserved = 0, + }; + flow.inner = 0; + claim_zero(priv_flow_validate(priv, attr, items, actions, + error, &flow)); + action = (struct mlx5_flow_action){ + .queue = 0, + .drop = 0, + .mark = 0, + .mark_id = MLX5_FLOW_MARK_DEFAULT, + }; + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) { + if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) { + continue; + } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) { + action.queue = 1; + action.queue_id = + ((const struct rte_flow_action_queue *) + actions->conf)->index; + } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) { + action.drop = 1; + action.mark = 0; + } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) { + const struct rte_flow_action_mark *mark = + (const struct rte_flow_action_mark *) + actions->conf; + + if (mark) + action.mark_id = mark->id; + action.mark = !action.drop; + } else { + rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, "unsupported action"); + goto exit; + } + } + if (action.mark) { + mlx5_flow_create_flag_mark(&flow, action.mark_id); + flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag); + } + rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr, + &action, error); + return rte_flow; +exit: + rte_free(flow.ibv_attr); + return NULL; +} + +/** + * Create a flow. + * + * @see rte_flow_create() + * @see rte_flow_ops + */ +struct rte_flow * +mlx5_flow_create(struct rte_eth_dev *dev, + const struct rte_flow_attr *attr, + const struct rte_flow_item items[], + const struct rte_flow_action actions[], + struct rte_flow_error *error) +{ + struct priv *priv = dev->data->dev_private; + struct rte_flow *flow; + + priv_lock(priv); + flow = priv_flow_create(priv, attr, items, actions, error); + if (flow) { + LIST_INSERT_HEAD(&priv->flows, flow, next); + DEBUG("Flow created %p", (void *)flow); + } + priv_unlock(priv); + return flow; +} + +/** + * Destroy a flow. + * + * @param priv + * Pointer to private structure. + * @param[in] flow + * Flow to destroy. + */ +static void +priv_flow_destroy(struct priv *priv, + struct rte_flow *flow) +{ + (void)priv; + LIST_REMOVE(flow, next); + if (flow->ibv_flow) + claim_zero(ibv_exp_destroy_flow(flow->ibv_flow)); + if (flow->qp) + claim_zero(ibv_destroy_qp(flow->qp)); + if (flow->ind_table) + claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table)); + if (!flow->rxq && flow->wq) + claim_zero(ibv_exp_destroy_wq(flow->wq)); + if (!flow->rxq && flow->cq) + claim_zero(ibv_destroy_cq(flow->cq)); + if (flow->mark) { + struct rte_flow *tmp; + uint32_t mark_n = 0; + + for (tmp = LIST_FIRST(&priv->flows); + tmp; + tmp = LIST_NEXT(tmp, next)) { + if ((flow->rxq == tmp->rxq) && tmp->mark) + ++mark_n; + } + flow->rxq->mark = !!mark_n; + } + rte_free(flow->ibv_attr); + DEBUG("Flow destroyed %p", (void *)flow); + rte_free(flow); +} + +/** + * Destroy a flow. + * + * @see rte_flow_destroy() + * @see rte_flow_ops + */ +int +mlx5_flow_destroy(struct rte_eth_dev *dev, + struct rte_flow *flow, + struct rte_flow_error *error) +{ + struct priv *priv = dev->data->dev_private; + + (void)error; + priv_lock(priv); + priv_flow_destroy(priv, flow); + priv_unlock(priv); + return 0; +} + +/** + * Destroy all flows. + * + * @param priv + * Pointer to private structure. + */ +static void +priv_flow_flush(struct priv *priv) +{ + while (!LIST_EMPTY(&priv->flows)) { + struct rte_flow *flow; + + flow = LIST_FIRST(&priv->flows); + priv_flow_destroy(priv, flow); + } +} + +/** + * Destroy all flows. + * + * @see rte_flow_flush() + * @see rte_flow_ops + */ +int +mlx5_flow_flush(struct rte_eth_dev *dev, + struct rte_flow_error *error) +{ + struct priv *priv = dev->data->dev_private; + + (void)error; + priv_lock(priv); + priv_flow_flush(priv); + priv_unlock(priv); + return 0; +} + +/** + * Remove all flows. + * + * Called by dev_stop() to remove all flows. + * + * @param priv + * Pointer to private structure. + */ +void +priv_flow_stop(struct priv *priv) +{ + struct rte_flow *flow; + + for (flow = LIST_FIRST(&priv->flows); + flow; + flow = LIST_NEXT(flow, next)) { + claim_zero(ibv_exp_destroy_flow(flow->ibv_flow)); + flow->ibv_flow = NULL; + if (flow->mark) + flow->rxq->mark = 0; + DEBUG("Flow %p removed", (void *)flow); + } +} + +/** + * Add all flows. + * + * @param priv + * Pointer to private structure. + * + * @return + * 0 on success, a errno value otherwise and rte_errno is set. + */ +int +priv_flow_start(struct priv *priv) +{ + struct rte_flow *flow; + + for (flow = LIST_FIRST(&priv->flows); + flow; + flow = LIST_NEXT(flow, next)) { + flow->ibv_flow = ibv_exp_create_flow(flow->qp, + flow->ibv_attr); + if (!flow->ibv_flow) { + DEBUG("Flow %p cannot be applied", (void *)flow); + rte_errno = EINVAL; + return rte_errno; + } + DEBUG("Flow %p applied", (void *)flow); + if (flow->rxq) + flow->rxq->mark |= flow->mark; + } + return 0; +} diff --git a/src/dpdk/drivers/net/mlx5/mlx5_prm.h b/src/dpdk/drivers/net/mlx5/mlx5_prm.h index 8426adb3..755b5d77 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_prm.h +++ b/src/dpdk/drivers/net/mlx5/mlx5_prm.h @@ -34,6 +34,8 @@ #ifndef RTE_PMD_MLX5_PRM_H_ #define RTE_PMD_MLX5_PRM_H_ +#include <assert.h> + /* Verbs header. */ /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ #ifdef PEDANTIC @@ -44,6 +46,7 @@ #pragma GCC diagnostic error "-Wpedantic" #endif +#include <rte_vect.h> #include "mlx5_autoconf.h" /* Get CQE owner bit. */ @@ -61,9 +64,6 @@ /* Invalidate a CQE. */ #define MLX5_CQE_INVALIDATE (MLX5_CQE_INVALID << 4) -/* CQE value to inform that VLAN is stripped. */ -#define MLX5_CQE_VLAN_STRIPPED 0x1 - /* Maximum number of packets a multi-packet WQE can handle. */ #define MLX5_MPW_DSEG_MAX 5 @@ -80,30 +80,54 @@ /* Room for inline data in multi-packet WQE. */ #define MLX5_MWQE64_INL_DATA 28 -//#ifndef HAVE_VERBS_MLX5_OPCODE_TSO -//#define MLX5_OPCODE_TSO MLX5_OPCODE_LSO_MPW /* Compat with OFED 3.3. */ -//#endif +#ifndef HAVE_VERBS_MLX5_OPCODE_TSO +#define MLX5_OPCODE_TSO MLX5_OPCODE_LSO_MPW /* Compat with OFED 3.3. */ +#endif + +/* CQE value to inform that VLAN is stripped. */ +#define MLX5_CQE_VLAN_STRIPPED (1u << 0) -/* IPv4 packet. */ -#define MLX5_CQE_RX_IPV4_PACKET (1u << 2) +/* IPv4 options. */ +#define MLX5_CQE_RX_IP_EXT_OPTS_PACKET (1u << 1) /* IPv6 packet. */ -#define MLX5_CQE_RX_IPV6_PACKET (1u << 3) +#define MLX5_CQE_RX_IPV6_PACKET (1u << 2) + +/* IPv4 packet. */ +#define MLX5_CQE_RX_IPV4_PACKET (1u << 3) + +/* TCP packet. */ +#define MLX5_CQE_RX_TCP_PACKET (1u << 4) + +/* UDP packet. */ +#define MLX5_CQE_RX_UDP_PACKET (1u << 5) + +/* IP is fragmented. */ +#define MLX5_CQE_RX_IP_FRAG_PACKET (1u << 7) -/* Outer IPv4 packet. */ -#define MLX5_CQE_RX_OUTER_IPV4_PACKET (1u << 7) +/* L2 header is valid. */ +#define MLX5_CQE_RX_L2_HDR_VALID (1u << 8) -/* Outer IPv6 packet. */ -#define MLX5_CQE_RX_OUTER_IPV6_PACKET (1u << 8) +/* L3 header is valid. */ +#define MLX5_CQE_RX_L3_HDR_VALID (1u << 9) + +/* L4 header is valid. */ +#define MLX5_CQE_RX_L4_HDR_VALID (1u << 10) + +/* Outer packet, 0 IPv4, 1 IPv6. */ +#define MLX5_CQE_RX_OUTER_PACKET (1u << 1) /* Tunnel packet bit in the CQE. */ -#define MLX5_CQE_RX_TUNNEL_PACKET (1u << 4) +#define MLX5_CQE_RX_TUNNEL_PACKET (1u << 0) -/* Outer IP checksum OK. */ -#define MLX5_CQE_RX_OUTER_IP_CSUM_OK (1u << 5) +/* INVALID is used by packets matching no flow rules. */ +#define MLX5_FLOW_MARK_INVALID 0 -/* Outer UDP header and checksum OK. */ -#define MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK (1u << 6) +/* Maximum allowed value to mark a packet. */ +#define MLX5_FLOW_MARK_MAX 0xfffff0 + +/* Default mark value used when none is provided. */ +#define MLX5_FLOW_MARK_DEFAULT 0xffffff /* Subset of struct mlx5_wqe_eth_seg. */ struct mlx5_wqe_eth_seg_small { @@ -114,12 +138,19 @@ struct mlx5_wqe_eth_seg_small { uint32_t rsvd2; uint16_t inline_hdr_sz; uint8_t inline_hdr[2]; -}; +} __rte_aligned(MLX5_WQE_DWORD_SIZE); struct mlx5_wqe_inl_small { uint32_t byte_cnt; uint8_t raw; -}; +} __rte_aligned(MLX5_WQE_DWORD_SIZE); + +struct mlx5_wqe_ctrl { + uint32_t ctrl0; + uint32_t ctrl1; + uint32_t ctrl2; + uint32_t ctrl3; +} __rte_aligned(MLX5_WQE_DWORD_SIZE); /* Small common part of the WQE. */ struct mlx5_wqe { @@ -127,11 +158,17 @@ struct mlx5_wqe { struct mlx5_wqe_eth_seg_small eseg; }; +/* Vectorize WQE header. */ +struct mlx5_wqe_v { + rte_v128u32_t ctrl; + rte_v128u32_t eseg; +}; + /* WQE. */ struct mlx5_wqe64 { struct mlx5_wqe hdr; uint8_t raw[32]; -} __rte_aligned(64); +} __rte_aligned(MLX5_WQE_SIZE); /* MPW session status. */ enum mlx5_mpw_state { @@ -163,16 +200,72 @@ struct mlx5_cqe { uint32_t rx_hash_res; uint8_t rx_hash_type; uint8_t rsvd1[11]; - uint8_t hds_ip_ext; - uint8_t l4_hdr_type_etc; + uint16_t hdr_type_etc; uint16_t vlan_info; uint8_t rsvd2[12]; uint32_t byte_cnt; uint64_t timestamp; - uint8_t rsvd3[4]; + uint32_t sop_drop_qpn; uint16_t wqe_counter; uint8_t rsvd4; uint8_t op_own; }; +/** + * Convert a user mark to flow mark. + * + * @param val + * Mark value to convert. + * + * @return + * Converted mark value. + */ +static inline uint32_t +mlx5_flow_mark_set(uint32_t val) +{ + uint32_t ret; + + /* + * Add one to the user value to differentiate un-marked flows from + * marked flows. + */ + ++val; +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + /* + * Mark is 24 bits (minus reserved values) but is stored on a 32 bit + * word, byte-swapped by the kernel on little-endian systems. In this + * case, left-shifting the resulting big-endian value ensures the + * least significant 24 bits are retained when converting it back. + */ + ret = rte_cpu_to_be_32(val) >> 8; +#else + ret = val; +#endif + assert(ret <= MLX5_FLOW_MARK_MAX); + return ret; +} + +/** + * Convert a mark to user mark. + * + * @param val + * Mark value to convert. + * + * @return + * Converted mark value. + */ +static inline uint32_t +mlx5_flow_mark_get(uint32_t val) +{ + /* + * Subtract one from the retrieved value. It was added by + * mlx5_flow_mark_set() to distinguish unmarked flows. + */ +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + return (val >> 8) - 1; +#else + return val - 1; +#endif +} + #endif /* RTE_PMD_MLX5_PRM_H_ */ diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rxq.c b/src/dpdk/drivers/net/mlx5/mlx5_rxq.c index c5746fa0..28e93d3e 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rxq.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_rxq.c @@ -102,7 +102,7 @@ const struct hash_rxq_init hash_rxq_init[] = { ETH_RSS_FRAG_IPV4), .flow_priority = 1, .flow_spec.ipv4 = { - .type = IBV_EXP_FLOW_SPEC_IPV4_EXT, + .type = IBV_EXP_FLOW_SPEC_IPV4, .size = sizeof(hash_rxq_init[0].flow_spec.ipv4), }, .underlayer = &hash_rxq_init[HASH_RXQ_ETH], @@ -140,7 +140,7 @@ const struct hash_rxq_init hash_rxq_init[] = { ETH_RSS_FRAG_IPV6), .flow_priority = 1, .flow_spec.ipv6 = { - .type = IBV_EXP_FLOW_SPEC_IPV6_EXT, + .type = IBV_EXP_FLOW_SPEC_IPV6, .size = sizeof(hash_rxq_init[0].flow_spec.ipv6), }, .underlayer = &hash_rxq_init[HASH_RXQ_ETH], diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c index b56c0a11..88b03544 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.c @@ -69,7 +69,34 @@ #include "mlx5_defs.h" #include "mlx5_prm.h" -//#define MLX5_OPCODE_TSO 0xe +static inline int +check_cqe(volatile struct mlx5_cqe *cqe, + unsigned int cqes_n, const uint16_t ci) + __attribute__((always_inline)); + +static inline void +txq_complete(struct txq *txq) __attribute__((always_inline)); + +static inline uint32_t +txq_mp2mr(struct txq *txq, struct rte_mempool *mp) + __attribute__((always_inline)); + +static inline void +mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe) + __attribute__((always_inline)); + +static inline uint32_t +rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe) + __attribute__((always_inline)); + +static inline int +mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, + uint16_t cqe_cnt, uint32_t *rss_hash) + __attribute__((always_inline)); + +static inline uint32_t +rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe) + __attribute__((always_inline)); #ifndef NDEBUG @@ -86,7 +113,7 @@ static inline int check_cqe_seen(volatile struct mlx5_cqe *cqe) { static const uint8_t magic[] = "seen"; - volatile uint8_t (*buf)[sizeof(cqe->rsvd3)] = &cqe->rsvd3; + volatile uint8_t (*buf)[sizeof(cqe->rsvd0)] = &cqe->rsvd0; int ret = 1; unsigned int i; @@ -100,11 +127,6 @@ check_cqe_seen(volatile struct mlx5_cqe *cqe) #endif /* NDEBUG */ -static inline int -check_cqe(volatile struct mlx5_cqe *cqe, - unsigned int cqes_n, const uint16_t ci) - __attribute__((always_inline)); - /** * Check whether CQE is valid. * @@ -154,8 +176,23 @@ check_cqe(volatile struct mlx5_cqe *cqe, return 0; } -static inline void -txq_complete(struct txq *txq) __attribute__((always_inline)); +/** + * Return the address of the WQE. + * + * @param txq + * Pointer to TX queue structure. + * @param wqe_ci + * WQE consumer index. + * + * @return + * WQE address. + */ +static inline uintptr_t * +tx_mlx5_wqe(struct txq *txq, uint16_t ci) +{ + ci &= ((1 << txq->wqe_n) - 1); + return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE); +} /** * Manage TX completions. @@ -175,7 +212,7 @@ txq_complete(struct txq *txq) uint16_t elts_tail; uint16_t cq_ci = txq->cq_ci; volatile struct mlx5_cqe *cqe = NULL; - volatile struct mlx5_wqe *wqe; + volatile struct mlx5_wqe_ctrl *ctrl; do { volatile struct mlx5_cqe *tmp; @@ -201,9 +238,10 @@ txq_complete(struct txq *txq) } while (1); if (unlikely(cqe == NULL)) return; - wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & - ((1 << txq->wqe_n) - 1)].hdr; - elts_tail = wqe->ctrl[3]; + txq->wqe_pi = ntohs(cqe->wqe_counter); + ctrl = (volatile struct mlx5_wqe_ctrl *) + tx_mlx5_wqe(txq, txq->wqe_pi); + elts_tail = ctrl->ctrl3; assert(elts_tail < (1 << txq->wqe_n)); /* Free buffers. */ while (elts_free != elts_tail) { @@ -248,10 +286,6 @@ txq_mb2mp(struct rte_mbuf *buf) return buf->pool; } -static inline uint32_t -txq_mp2mr(struct txq *txq, struct rte_mempool *mp) - __attribute__((always_inline)); - /** * Get Memory Region (MR) <-> Memory Pool (MP) association from txq->mp2mr[]. * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full, @@ -294,57 +328,20 @@ txq_mp2mr(struct txq *txq, struct rte_mempool *mp) * * @param txq * Pointer to TX queue structure. + * @param wqe + * Pointer to the last WQE posted in the NIC. */ static inline void -mlx5_tx_dbrec(struct txq *txq) +mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe) { - uint8_t *dst = (uint8_t *)((uintptr_t)txq->bf_reg + txq->bf_offset); - uint32_t data[4] = { - htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND), - htonl(txq->qp_num_8s), - 0, - 0, - }; + uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg); + volatile uint64_t *src = ((volatile uint64_t *)wqe); + rte_wmb(); *txq->qp_db = htonl(txq->wqe_ci); /* Ensure ordering between DB record and BF copy. */ rte_wmb(); - memcpy(dst, (uint8_t *)data, 16); - txq->bf_offset ^= (1 << txq->bf_buf_size); -} - -/** - * Prefetch a CQE. - * - * @param txq - * Pointer to TX queue structure. - * @param cqe_ci - * CQE consumer index. - */ -static inline void -tx_prefetch_cqe(struct txq *txq, uint16_t ci) -{ - volatile struct mlx5_cqe *cqe; - - cqe = &(*txq->cqes)[ci & ((1 << txq->cqe_n) - 1)]; - rte_prefetch0(cqe); -} - -/** - * Prefetch a WQE. - * - * @param txq - * Pointer to TX queue structure. - * @param wqe_ci - * WQE consumer index. - */ -static inline void -tx_prefetch_wqe(struct txq *txq, uint16_t ci) -{ - volatile struct mlx5_wqe64 *wqe; - - wqe = &(*txq->wqes)[ci & ((1 << txq->wqe_n) - 1)]; - rte_prefetch0(wqe); + *dst = *src; } /** @@ -369,8 +366,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) unsigned int i = 0; unsigned int j = 0; unsigned int max; + uint16_t max_wqe; unsigned int comp; - volatile struct mlx5_wqe *wqe = NULL; + volatile struct mlx5_wqe_v *wqe = NULL; unsigned int segs_n = 0; struct rte_mbuf *buf = NULL; uint8_t *raw; @@ -378,19 +376,24 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (unlikely(!pkts_n)) return 0; /* Prefetch first packet cacheline. */ - tx_prefetch_cqe(txq, txq->cq_ci); - tx_prefetch_cqe(txq, txq->cq_ci + 1); rte_prefetch0(*pkts); /* Start processing. */ txq_complete(txq); max = (elts_n - (elts_head - txq->elts_tail)); if (max > elts_n) max -= elts_n; + max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); + if (unlikely(!max_wqe)) + return 0; do { - volatile struct mlx5_wqe_data_seg *dseg = NULL; + volatile rte_v128u32_t *dseg = NULL; uint32_t length; unsigned int ds = 0; uintptr_t addr; + uint64_t naddr; + uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2; + uint16_t ehdr; + uint8_t cs_flags = 0; #ifdef MLX5_PMD_SOFT_COUNTERS uint32_t total_length = 0; #endif @@ -409,13 +412,17 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) --segs_n; if (!segs_n) --pkts_n; - wqe = &(*txq->wqes)[txq->wqe_ci & - ((1 << txq->wqe_n) - 1)].hdr; - tx_prefetch_wqe(txq, txq->wqe_ci + 1); + if (unlikely(--max_wqe == 0)) + break; + wqe = (volatile struct mlx5_wqe_v *) + tx_mlx5_wqe(txq, txq->wqe_ci); + rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); if (pkts_n > 1) rte_prefetch0(*pkts); addr = rte_pktmbuf_mtod(buf, uintptr_t); length = DATA_LEN(buf); + ehdr = (((uint8_t *)addr)[1] << 8) | + ((uint8_t *)addr)[0]; #ifdef MLX5_PMD_SOFT_COUNTERS total_length = length; #endif @@ -433,78 +440,88 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { - wqe->eseg.cs_flags = - MLX5_ETH_WQE_L3_CSUM | - MLX5_ETH_WQE_L4_CSUM; - } else { - wqe->eseg.cs_flags = 0; + cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; } - raw = (uint8_t *)(uintptr_t)&wqe->eseg.inline_hdr[0]; - /* Start the know and common part of the WQE structure. */ - wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND); - wqe->ctrl[2] = 0; - wqe->ctrl[3] = 0; - wqe->eseg.rsvd0 = 0; - wqe->eseg.rsvd1 = 0; - wqe->eseg.mss = 0; - wqe->eseg.rsvd2 = 0; - /* Start by copying the Ethernet Header. */ - memcpy((uint8_t *)raw, ((uint8_t *)addr), 16); - length -= MLX5_WQE_DWORD_SIZE; - addr += MLX5_WQE_DWORD_SIZE; + raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE; /* Replace the Ethernet type by the VLAN if necessary. */ if (buf->ol_flags & PKT_TX_VLAN_PKT) { uint32_t vlan = htonl(0x81000000 | buf->vlan_tci); - - memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE - - sizeof(vlan)), - &vlan, sizeof(vlan)); - addr -= sizeof(vlan); - length += sizeof(vlan); + unsigned int len = 2 * ETHER_ADDR_LEN - 2; + + addr += 2; + length -= 2; + /* Copy Destination and source mac address. */ + memcpy((uint8_t *)raw, ((uint8_t *)addr), len); + /* Copy VLAN. */ + memcpy((uint8_t *)raw + len, &vlan, sizeof(vlan)); + /* Copy missing two bytes to end the DSeg. */ + memcpy((uint8_t *)raw + len + sizeof(vlan), + ((uint8_t *)addr) + len, 2); + addr += len + 2; + length -= (len + 2); + } else { + memcpy((uint8_t *)raw, ((uint8_t *)addr) + 2, + MLX5_WQE_DWORD_SIZE); + length -= pkt_inline_sz; + addr += pkt_inline_sz; } /* Inline if enough room. */ - if (txq->max_inline != 0) { - uintptr_t end = - (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n]; - uint16_t max_inline = - txq->max_inline * RTE_CACHE_LINE_SIZE; - uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE; - uint16_t room; + if (txq->max_inline) { + uintptr_t end = (uintptr_t) + (((uintptr_t)txq->wqes) + + (1 << txq->wqe_n) * MLX5_WQE_SIZE); + unsigned int max_inline = txq->max_inline * + RTE_CACHE_LINE_SIZE - + MLX5_WQE_DWORD_SIZE; + uintptr_t addr_end = (addr + max_inline) & + ~(RTE_CACHE_LINE_SIZE - 1); + unsigned int copy_b = (addr_end > addr) ? + RTE_MIN((addr_end - addr), length) : + 0; raw += MLX5_WQE_DWORD_SIZE; - room = end - (uintptr_t)raw; - if (room > max_inline) { - uintptr_t addr_end = (addr + max_inline) & - ~(RTE_CACHE_LINE_SIZE - 1); - uint16_t copy_b = ((addr_end - addr) > length) ? - length : - (addr_end - addr); + if (copy_b && ((end - (uintptr_t)raw) > copy_b)) { + /* + * One Dseg remains in the current WQE. To + * keep the computation positive, it is + * removed after the bytes to Dseg conversion. + */ + uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; + if (unlikely(max_wqe < n)) + break; + max_wqe -= n; rte_memcpy((void *)raw, (void *)addr, copy_b); addr += copy_b; length -= copy_b; pkt_inline_sz += copy_b; - /* Sanity check. */ - assert(addr <= addr_end); } - /* Store the inlined packet size in the WQE. */ - wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz); /* - * 2 DWORDs consumed by the WQE header + 1 DSEG + + * 2 DWORDs consumed by the WQE header + ETH segment + * the size of the inline part of the packet. */ ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2); if (length > 0) { - dseg = (struct mlx5_wqe_data_seg *) - ((uintptr_t)wqe + - (ds * MLX5_WQE_DWORD_SIZE)); - if ((uintptr_t)dseg >= end) - dseg = (struct mlx5_wqe_data_seg *) - ((uintptr_t)&(*txq->wqes)[0]); + if (ds % (MLX5_WQE_SIZE / + MLX5_WQE_DWORD_SIZE) == 0) { + if (unlikely(--max_wqe == 0)) + break; + dseg = (volatile rte_v128u32_t *) + tx_mlx5_wqe(txq, txq->wqe_ci + + ds / 4); + } else { + dseg = (volatile rte_v128u32_t *) + ((uintptr_t)wqe + + (ds * MLX5_WQE_DWORD_SIZE)); + } goto use_dseg; } else if (!segs_n) { goto next_pkt; } else { + /* dseg will be advance as part of next_seg */ + dseg = (volatile rte_v128u32_t *) + ((uintptr_t)wqe + + ((ds - 1) * MLX5_WQE_DWORD_SIZE)); goto next_seg; } } else { @@ -512,16 +529,17 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) * No inline has been done in the packet, only the * Ethernet Header as been stored. */ - wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE); - dseg = (struct mlx5_wqe_data_seg *) + dseg = (volatile rte_v128u32_t *) ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE)); ds = 3; use_dseg: /* Add the remaining packet as a simple ds. */ - *dseg = (struct mlx5_wqe_data_seg) { - .addr = htonll(addr), - .byte_count = htonl(length), - .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + naddr = htonll(addr); + *dseg = (rte_v128u32_t){ + htonl(length), + txq_mp2mr(txq, txq_mb2mp(buf)), + naddr, + naddr >> 32, }; ++ds; if (!segs_n) @@ -538,17 +556,13 @@ next_seg: */ assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE)); if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) { - unsigned int n = (txq->wqe_ci + ((ds + 3) / 4)) & - ((1 << txq->wqe_n) - 1); - - dseg = (struct mlx5_wqe_data_seg *) - ((uintptr_t)&(*txq->wqes)[n]); - tx_prefetch_wqe(txq, n + 1); - } else if (!dseg) { - dseg = (struct mlx5_wqe_data_seg *) - ((uintptr_t)wqe + - (ds * MLX5_WQE_DWORD_SIZE)); - } else { + if (unlikely(--max_wqe == 0)) + break; + dseg = (volatile rte_v128u32_t *) + tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4); + rte_prefetch0(tx_mlx5_wqe(txq, + txq->wqe_ci + ds / 4 + 1)); + } else { ++dseg; } ++ds; @@ -559,10 +573,12 @@ next_seg: total_length += length; #endif /* Store segment information. */ - *dseg = (struct mlx5_wqe_data_seg) { - .addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)), - .byte_count = htonl(length), - .lkey = txq_mp2mr(txq, txq_mb2mp(buf)), + naddr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)); + *dseg = (rte_v128u32_t){ + htonl(length), + txq_mp2mr(txq, txq_mb2mp(buf)), + naddr, + naddr >> 32, }; (*txq->elts)[elts_head] = buf; elts_head = (elts_head + 1) & (elts_n - 1); @@ -574,7 +590,19 @@ next_seg: --pkts_n; next_pkt: ++i; - wqe->ctrl[1] = htonl(txq->qp_num_8s | ds); + /* Initialize known and common part of the WQE structure. */ + wqe->ctrl = (rte_v128u32_t){ + htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND), + htonl(txq->qp_num_8s | ds), + 0, + 0, + }; + wqe->eseg = (rte_v128u32_t){ + 0, + cs_flags, + 0, + (ehdr << 16) | htons(pkt_inline_sz), + }; txq->wqe_ci += (ds + 3) / 4; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ @@ -587,10 +615,13 @@ next_pkt: /* Check whether completion threshold has been reached. */ comp = txq->elts_comp + i + j; if (comp >= MLX5_TX_COMP_THRESH) { + volatile struct mlx5_wqe_ctrl *w = + (volatile struct mlx5_wqe_ctrl *)wqe; + /* Request completion on last WQE. */ - wqe->ctrl[2] = htonl(8); + w->ctrl2 = htonl(8); /* Save elts_head in unused "immediate" field of WQE. */ - wqe->ctrl[3] = elts_head; + w->ctrl3 = elts_head; txq->elts_comp = 0; } else { txq->elts_comp = comp; @@ -600,7 +631,7 @@ next_pkt: txq->stats.opackets += i; #endif /* Ring QP doorbell. */ - mlx5_tx_dbrec(txq); + mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)wqe); txq->elts_head = elts_head; return i; } @@ -621,13 +652,13 @@ mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length) uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] = (volatile struct mlx5_wqe_data_seg (*)[]) - (uintptr_t)&(*txq->wqes)[(idx + 1) & ((1 << txq->wqe_n) - 1)]; + tx_mlx5_wqe(txq, idx + 1); mpw->state = MLX5_MPW_STATE_OPENED; mpw->pkts_n = 0; mpw->len = length; mpw->total_len = 0; - mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr; + mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); mpw->wqe->eseg.mss = htons(length); mpw->wqe->eseg.inline_hdr_sz = 0; mpw->wqe->eseg.rsvd0 = 0; @@ -669,8 +700,8 @@ mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw) ++txq->wqe_ci; else txq->wqe_ci += 2; - tx_prefetch_wqe(txq, txq->wqe_ci); - tx_prefetch_wqe(txq, txq->wqe_ci + 1); + rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); + rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); } /** @@ -695,6 +726,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) unsigned int i = 0; unsigned int j = 0; unsigned int max; + uint16_t max_wqe; unsigned int comp; struct mlx5_mpw mpw = { .state = MLX5_MPW_STATE_CLOSED, @@ -703,14 +735,16 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) if (unlikely(!pkts_n)) return 0; /* Prefetch first packet cacheline. */ - tx_prefetch_cqe(txq, txq->cq_ci); - tx_prefetch_wqe(txq, txq->wqe_ci); - tx_prefetch_wqe(txq, txq->wqe_ci + 1); + rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); + rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); /* Start processing. */ txq_complete(txq); max = (elts_n - (elts_head - txq->elts_tail)); if (max > elts_n) max -= elts_n; + max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); + if (unlikely(!max_wqe)) + return 0; do { struct rte_mbuf *buf = *(pkts++); unsigned int elts_head_next; @@ -744,6 +778,14 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) (mpw.wqe->eseg.cs_flags != cs_flags))) mlx5_mpw_close(txq, &mpw); if (mpw.state == MLX5_MPW_STATE_CLOSED) { + /* + * Multi-Packet WQE consumes at most two WQE. + * mlx5_mpw_new() expects to be able to use such + * resources. + */ + if (unlikely(max_wqe < 2)) + break; + max_wqe -= 2; mlx5_mpw_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; } @@ -808,7 +850,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Ring QP doorbell. */ if (mpw.state == MLX5_MPW_STATE_OPENED) mlx5_mpw_close(txq, &mpw); - mlx5_tx_dbrec(txq); + mlx5_tx_dbrec(txq, mpw.wqe); txq->elts_head = elts_head; return i; } @@ -833,7 +875,7 @@ mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length) mpw->pkts_n = 0; mpw->len = length; mpw->total_len = 0; - mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr; + mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) | (txq->wqe_ci << 8) | MLX5_OPCODE_TSO); @@ -899,18 +941,30 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, unsigned int i = 0; unsigned int j = 0; unsigned int max; + uint16_t max_wqe; unsigned int comp; unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE; struct mlx5_mpw mpw = { .state = MLX5_MPW_STATE_CLOSED, }; + /* + * Compute the maximum number of WQE which can be consumed by inline + * code. + * - 2 DSEG for: + * - 1 control segment, + * - 1 Ethernet segment, + * - N Dseg from the inline request. + */ + const unsigned int wqe_inl_n = + ((2 * MLX5_WQE_DWORD_SIZE + + txq->max_inline * RTE_CACHE_LINE_SIZE) + + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; if (unlikely(!pkts_n)) return 0; /* Prefetch first packet cacheline. */ - tx_prefetch_cqe(txq, txq->cq_ci); - tx_prefetch_wqe(txq, txq->wqe_ci); - tx_prefetch_wqe(txq, txq->wqe_ci + 1); + rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); + rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); /* Start processing. */ txq_complete(txq); max = (elts_n - (elts_head - txq->elts_tail)); @@ -936,6 +990,11 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, break; max -= segs_n; --pkts_n; + /* + * Compute max_wqe in case less WQE were consumed in previous + * iteration. + */ + max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); /* Should we enable HW CKSUM offload */ if (buf->ol_flags & (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) @@ -961,9 +1020,20 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, if (mpw.state == MLX5_MPW_STATE_CLOSED) { if ((segs_n != 1) || (length > inline_room)) { + /* + * Multi-Packet WQE consumes at most two WQE. + * mlx5_mpw_new() expects to be able to use + * such resources. + */ + if (unlikely(max_wqe < 2)) + break; + max_wqe -= 2; mlx5_mpw_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; } else { + if (unlikely(max_wqe < wqe_inl_n)) + break; + max_wqe -= wqe_inl_n; mlx5_mpw_inline_new(txq, &mpw, length); mpw.wqe->eseg.cs_flags = cs_flags; } @@ -1011,14 +1081,15 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, addr = rte_pktmbuf_mtod(buf, uintptr_t); (*txq->elts)[elts_head] = buf; /* Maximum number of bytes before wrapping. */ - max = ((uintptr_t)&(*txq->wqes)[1 << txq->wqe_n] - + max = ((((uintptr_t)(txq->wqes)) + + (1 << txq->wqe_n) * + MLX5_WQE_SIZE) - (uintptr_t)mpw.data.raw); if (length > max) { rte_memcpy((void *)(uintptr_t)mpw.data.raw, (void *)addr, max); - mpw.data.raw = - (volatile void *)&(*txq->wqes)[0]; + mpw.data.raw = (volatile void *)txq->wqes; rte_memcpy((void *)(uintptr_t)mpw.data.raw, (void *)(addr + max), length - max); @@ -1027,13 +1098,15 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, rte_memcpy((void *)(uintptr_t)mpw.data.raw, (void *)addr, length); - mpw.data.raw += length; + + if (length == max) + mpw.data.raw = + (volatile void *)txq->wqes; + else + mpw.data.raw += length; } - if ((uintptr_t)mpw.data.raw == - (uintptr_t)&(*txq->wqes)[1 << txq->wqe_n]) - mpw.data.raw = - (volatile void *)&(*txq->wqes)[0]; ++mpw.pkts_n; + mpw.total_len += length; ++j; if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) { mlx5_mpw_inline_close(txq, &mpw); @@ -1043,7 +1116,6 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, inline_room -= length; } } - mpw.total_len += length; elts_head = elts_head_next; #ifdef MLX5_PMD_SOFT_COUNTERS /* Increment sent bytes counter. */ @@ -1077,7 +1149,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, mlx5_mpw_inline_close(txq, &mpw); else if (mpw.state == MLX5_MPW_STATE_OPENED) mlx5_mpw_close(txq, &mpw); - mlx5_tx_dbrec(txq); + mlx5_tx_dbrec(txq, mpw.wqe); txq->elts_head = elts_head; return i; } @@ -1097,30 +1169,28 @@ static inline uint32_t rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe) { uint32_t pkt_type; - uint8_t flags = cqe->l4_hdr_type_etc; + uint16_t flags = ntohs(cqe->hdr_type_etc); - if (cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) + if (cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) { pkt_type = TRANSPOSE(flags, - MLX5_CQE_RX_OUTER_IPV4_PACKET, - RTE_PTYPE_L3_IPV4) | - TRANSPOSE(flags, - MLX5_CQE_RX_OUTER_IPV6_PACKET, - RTE_PTYPE_L3_IPV6) | - TRANSPOSE(flags, MLX5_CQE_RX_IPV4_PACKET, - RTE_PTYPE_INNER_L3_IPV4) | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN) | TRANSPOSE(flags, MLX5_CQE_RX_IPV6_PACKET, - RTE_PTYPE_INNER_L3_IPV6); - else + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN); + pkt_type |= ((cqe->pkt_info & MLX5_CQE_RX_OUTER_PACKET) ? + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN : + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN); + } else { pkt_type = TRANSPOSE(flags, MLX5_CQE_L3_HDR_TYPE_IPV6, - RTE_PTYPE_L3_IPV6) | + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN) | TRANSPOSE(flags, MLX5_CQE_L3_HDR_TYPE_IPV4, - RTE_PTYPE_L3_IPV4); + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN); + } return pkt_type; } @@ -1147,6 +1217,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, struct rxq_zip *zip = &rxq->zip; uint16_t cqe_n = cqe_cnt + 1; int len = 0; + uint16_t idx, end; /* Process compressed data in the CQE and mini arrays. */ if (zip->ai) { @@ -1157,6 +1228,14 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, len = ntohl((*mc)[zip->ai & 7].byte_cnt); *rss_hash = ntohl((*mc)[zip->ai & 7].rx_hash_result); if ((++zip->ai & 7) == 0) { + /* Invalidate consumed CQEs */ + idx = zip->ca; + end = zip->na; + while (idx != end) { + (*rxq->cqes)[idx & cqe_cnt].op_own = + MLX5_CQE_INVALIDATE; + ++idx; + } /* * Increment consumer index to skip the number of * CQEs consumed. Hardware leaves holes in the CQ @@ -1166,8 +1245,9 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, zip->na += 8; } if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { - uint16_t idx = rxq->cq_ci; - uint16_t end = zip->cq_ci; + /* Invalidate the rest */ + idx = zip->ca; + end = zip->cq_ci; while (idx != end) { (*rxq->cqes)[idx & cqe_cnt].op_own = @@ -1203,7 +1283,7 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, * special case the second one is located 7 CQEs after * the initial CQE instead of 8 for subsequent ones. */ - zip->ca = rxq->cq_ci & cqe_cnt; + zip->ca = rxq->cq_ci; zip->na = zip->ca + 7; /* Compute the next non compressed CQE. */ --rxq->cq_ci; @@ -1212,6 +1292,13 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, len = ntohl((*mc)[0].byte_cnt); *rss_hash = ntohl((*mc)[0].rx_hash_result); zip->ai = 1; + /* Prefetch all the entries to be invalidated */ + idx = zip->ca; + end = zip->cq_ci; + while (idx != end) { + rte_prefetch0(&(*rxq->cqes)[(idx) & cqe_cnt]); + ++idx; + } } else { len = ntohl(cqe->byte_cnt); *rss_hash = ntohl(cqe->rx_hash_res); @@ -1238,28 +1325,22 @@ static inline uint32_t rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe) { uint32_t ol_flags = 0; - uint8_t l3_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L3_HDR_TYPE_MASK; - uint8_t l4_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L4_HDR_TYPE_MASK; - - if ((l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV4) || - (l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV6)) - ol_flags |= TRANSPOSE(cqe->hds_ip_ext, - MLX5_CQE_L3_OK, - PKT_RX_IP_CKSUM_GOOD); - if ((l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP) || - (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_EMP_ACK) || - (l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_ACK) || - (l4_hdr == MLX5_CQE_L4_HDR_TYPE_UDP)) - ol_flags |= TRANSPOSE(cqe->hds_ip_ext, - MLX5_CQE_L4_OK, - PKT_RX_L4_CKSUM_GOOD); + uint16_t flags = ntohs(cqe->hdr_type_etc); + + ol_flags = + TRANSPOSE(flags, + MLX5_CQE_RX_L3_HDR_VALID, + PKT_RX_IP_CKSUM_GOOD) | + TRANSPOSE(flags, + MLX5_CQE_RX_L4_HDR_VALID, + PKT_RX_L4_CKSUM_GOOD); if ((cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) && (rxq->csum_l2tun)) ol_flags |= - TRANSPOSE(cqe->l4_hdr_type_etc, - MLX5_CQE_RX_OUTER_IP_CSUM_OK, + TRANSPOSE(flags, + MLX5_CQE_RX_L3_HDR_VALID, PKT_RX_IP_CKSUM_GOOD) | - TRANSPOSE(cqe->l4_hdr_type_etc, - MLX5_CQE_RX_OUTER_TCP_UDP_CSUM_OK, + TRANSPOSE(flags, + MLX5_CQE_RX_L4_HDR_VALID, PKT_RX_L4_CKSUM_GOOD); return ol_flags; } @@ -1316,10 +1397,10 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) } while (pkt != seg) { assert(pkt != (*rxq->elts)[idx]); - seg = NEXT(pkt); + rep = NEXT(pkt); rte_mbuf_refcnt_set(pkt, 0); __rte_mbuf_raw_free(pkt); - pkt = seg; + pkt = rep; } break; } @@ -1344,10 +1425,20 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) /* Update packet information. */ pkt->packet_type = 0; pkt->ol_flags = 0; - if (rxq->rss_hash) { + if (rss_hash_res && rxq->rss_hash) { pkt->hash.rss = rss_hash_res; pkt->ol_flags = PKT_RX_RSS_HASH; } + if (rxq->mark && + ((cqe->sop_drop_qpn != + htonl(MLX5_FLOW_MARK_INVALID)) || + (cqe->sop_drop_qpn != + htonl(MLX5_FLOW_MARK_DEFAULT)))) { + pkt->hash.fdir.hi = + mlx5_flow_mark_get(cqe->sop_drop_qpn); + pkt->ol_flags &= ~PKT_RX_RSS_HASH; + pkt->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID; + } if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip | rxq->crc_present) { if (rxq->csum) { @@ -1356,7 +1447,7 @@ mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) pkt->ol_flags |= rxq_cq_to_ol_flags(rxq, cqe); } - if (cqe->l4_hdr_type_etc & + if (cqe->hdr_type_etc & MLX5_CQE_VLAN_STRIPPED) { pkt->ol_flags |= PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED; diff --git a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h index f45e3f51..41a34d7f 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h +++ b/src/dpdk/drivers/net/mlx5/mlx5_rxtx.h @@ -114,7 +114,8 @@ struct rxq { unsigned int elts_n:4; /* Log 2 of Mbufs. */ unsigned int port_id:8; unsigned int rss_hash:1; /* RSS hash result is enabled. */ - unsigned int :9; /* Remaining bits. */ + unsigned int mark:1; /* Marked flow available on the queue. */ + unsigned int :8; /* Remaining bits. */ volatile uint32_t *rq_db; volatile uint32_t *cq_db; uint16_t rq_ci; @@ -178,8 +179,8 @@ struct hash_rxq_init { uint16_t size; } hdr; struct ibv_exp_flow_spec_tcp_udp tcp_udp; - struct ibv_exp_flow_spec_ipv4_ext ipv4; - struct ibv_exp_flow_spec_ipv6_ext ipv6; + struct ibv_exp_flow_spec_ipv4 ipv4; + struct ibv_exp_flow_spec_ipv6 ipv6; struct ibv_exp_flow_spec_eth eth; } flow_spec; /* Flow specification template. */ const struct hash_rxq_init *underlayer; /* Pointer to underlayer. */ @@ -240,13 +241,6 @@ struct hash_rxq { [MLX5_MAX_SPECIAL_FLOWS][MLX5_MAX_VLAN_IDS]; }; -/** C extension macro for environments lacking C11 features. */ -#if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 201112L -#define RTE_STD_C11 __extension__ -#else -#define RTE_STD_C11 -#endif - /* TX queue descriptor. */ RTE_STD_C11 struct txq { @@ -255,15 +249,14 @@ struct txq { uint16_t elts_comp; /* Counter since last completion request. */ uint16_t cq_ci; /* Consumer index for completion queue. */ uint16_t wqe_ci; /* Consumer index for work queue. */ + uint16_t wqe_pi; /* Producer index for work queue. */ uint16_t elts_n:4; /* (*elts)[] length (in log2). */ uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */ - uint16_t bf_buf_size:4; /* Log2 Blueflame size. */ - uint16_t bf_offset; /* Blueflame offset. */ uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */ uint32_t qp_num_8s; /* QP number shifted by 8. */ volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */ - volatile struct mlx5_wqe64 (*wqes)[]; /* Work queue. */ + volatile void *wqes; /* Work queue (use volatile to write into). */ volatile uint32_t *qp_db; /* Work queue doorbell. */ volatile uint32_t *cq_db; /* Completion queue doorbell. */ volatile void *bf_reg; /* Blueflame register. */ diff --git a/src/dpdk/drivers/net/mlx5/mlx5_stats.c b/src/dpdk/drivers/net/mlx5/mlx5_stats.c index c6087d4e..20c957e8 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_stats.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_stats.c @@ -31,11 +31,16 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <linux/sockios.h> +#include <linux/ethtool.h> + /* DPDK headers don't like -pedantic. */ #ifdef PEDANTIC #pragma GCC diagnostic ignored "-Wpedantic" #endif #include <rte_ethdev.h> +#include <rte_common.h> +#include <rte_malloc.h> #ifdef PEDANTIC #pragma GCC diagnostic error "-Wpedantic" #endif @@ -44,243 +49,314 @@ #include "mlx5_rxtx.h" #include "mlx5_defs.h" -#include <linux/ethtool.h> -#include <linux/sockios.h> - -static void -mlx5_stats_read_hw(struct rte_eth_dev *dev, - struct rte_eth_stats *stats){ - struct priv *priv = mlx5_get_priv(dev); - struct mlx5_stats_priv * lps = &priv->m_stats; - unsigned int i; - - struct rte_eth_stats tmp = {0}; - struct ethtool_stats *et_stats = (struct ethtool_stats *)lps->et_stats; - struct ifreq ifr; - - et_stats->cmd = ETHTOOL_GSTATS; - et_stats->n_stats = lps->n_stats; - - ifr.ifr_data = (caddr_t) et_stats; - - if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { - WARN("unable to get statistic values for mlnx5 "); - } - - tmp.ibytes += et_stats->data[lps->inx_rx_vport_unicast_bytes] + - et_stats->data[lps->inx_rx_vport_multicast_bytes] + - et_stats->data[lps->inx_rx_vport_broadcast_bytes]; - - tmp.ipackets += et_stats->data[lps->inx_rx_vport_unicast_packets] + - et_stats->data[lps->inx_rx_vport_multicast_packets] + - et_stats->data[lps->inx_rx_vport_broadcast_packets]; - - tmp.ierrors += (et_stats->data[lps->inx_rx_wqe_err] + - et_stats->data[lps->inx_rx_crc_errors_phy] + - et_stats->data[lps->inx_rx_in_range_len_errors_phy] + - et_stats->data[lps->inx_rx_symbol_err_phy]); +struct mlx5_counter_ctrl { + /* Name of the counter. */ + char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE]; + /* Name of the counter on the device table. */ + char ctr_name[RTE_ETH_XSTATS_NAME_SIZE]; +}; + +static const struct mlx5_counter_ctrl mlx5_counters_init[] = { + { + .dpdk_name = "rx_port_unicast_bytes", + .ctr_name = "rx_vport_unicast_bytes", + }, + { + .dpdk_name = "rx_port_multicast_bytes", + .ctr_name = "rx_vport_multicast_bytes", + }, + { + .dpdk_name = "rx_port_broadcast_bytes", + .ctr_name = "rx_vport_broadcast_bytes", + }, + { + .dpdk_name = "rx_port_unicast_packets", + .ctr_name = "rx_vport_unicast_packets", + }, + { + .dpdk_name = "rx_port_multicast_packets", + .ctr_name = "rx_vport_multicast_packets", + }, + { + .dpdk_name = "rx_port_broadcast_packets", + .ctr_name = "rx_vport_broadcast_packets", + }, + { + .dpdk_name = "tx_port_unicast_bytes", + .ctr_name = "tx_vport_unicast_bytes", + }, + { + .dpdk_name = "tx_port_multicast_bytes", + .ctr_name = "tx_vport_multicast_bytes", + }, + { + .dpdk_name = "tx_port_broadcast_bytes", + .ctr_name = "tx_vport_broadcast_bytes", + }, + { + .dpdk_name = "tx_port_unicast_packets", + .ctr_name = "tx_vport_unicast_packets", + }, + { + .dpdk_name = "tx_port_multicast_packets", + .ctr_name = "tx_vport_multicast_packets", + }, + { + .dpdk_name = "tx_port_broadcast_packets", + .ctr_name = "tx_vport_broadcast_packets", + }, + { + .dpdk_name = "rx_wqe_err", + .ctr_name = "rx_wqe_err", + }, + { + .dpdk_name = "rx_crc_errors_phy", + .ctr_name = "rx_crc_errors_phy", + }, + { + .dpdk_name = "rx_in_range_len_errors_phy", + .ctr_name = "rx_in_range_len_errors_phy", + }, + { + .dpdk_name = "rx_symbol_err_phy", + .ctr_name = "rx_symbol_err_phy", + }, + { + .dpdk_name = "tx_errors_phy", + .ctr_name = "tx_errors_phy", + }, +}; + +static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init); - tmp.obytes += et_stats->data[lps->inx_tx_vport_unicast_bytes] + - et_stats->data[lps->inx_tx_vport_multicast_bytes] + - et_stats->data[lps->inx_tx_vport_broadcast_bytes]; - - tmp.opackets += (et_stats->data[lps->inx_tx_vport_unicast_packets] + - et_stats->data[lps->inx_tx_vport_multicast_packets] + - et_stats->data[lps->inx_tx_vport_broadcast_packets]); - - tmp.oerrors += et_stats->data[lps->inx_tx_errors_phy]; - - /* SW Rx */ - for (i = 0; (i != priv->rxqs_n); ++i) { - struct rxq *rxq = (*priv->rxqs)[i]; - if (rxq) { - tmp.imissed += rxq->stats.idropped; - tmp.rx_nombuf += rxq->stats.rx_nombuf; - } - } - - /*SW Tx */ - for (i = 0; (i != priv->txqs_n); ++i) { - struct txq *txq = (*priv->txqs)[i]; - if (txq) { - tmp.oerrors += txq->stats.odropped; - } - } - - *stats =tmp; +/** + * Read device counters table. + * + * @param priv + * Pointer to private structure. + * @param[out] stats + * Counters table output buffer. + * + * @return + * 0 on success and stats is filled, negative on error. + */ +static int +priv_read_dev_counters(struct priv *priv, uint64_t *stats) +{ + struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; + unsigned int i; + struct ifreq ifr; + unsigned int stats_sz = (xstats_ctrl->stats_n * sizeof(uint64_t)) + + sizeof(struct ethtool_stats); + struct ethtool_stats et_stats[(stats_sz + ( + sizeof(struct ethtool_stats) - 1)) / + sizeof(struct ethtool_stats)]; + + et_stats->cmd = ETHTOOL_GSTATS; + et_stats->n_stats = xstats_ctrl->stats_n; + ifr.ifr_data = (caddr_t)et_stats; + if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { + WARN("unable to read statistic values from device"); + return -1; + } + for (i = 0; i != xstats_n; ++i) + stats[i] = (uint64_t) + et_stats->data[xstats_ctrl->dev_table_idx[i]]; + return 0; } +/** + * Init the structures to read device counters. + * + * @param priv + * Pointer to private structure. + */ void -mlx5_stats_free(struct rte_eth_dev *dev) +priv_xstats_init(struct priv *priv) { - struct priv *priv = mlx5_get_priv(dev); - struct mlx5_stats_priv * lps = &priv->m_stats; - - if ( lps->et_stats ){ - free(lps->et_stats); - lps->et_stats=0; - } + struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; + unsigned int i; + unsigned int j; + char ifname[IF_NAMESIZE]; + struct ifreq ifr; + struct ethtool_drvinfo drvinfo; + struct ethtool_gstrings *strings = NULL; + unsigned int dev_stats_n; + unsigned int str_sz; + + if (priv_get_ifname(priv, &ifname)) { + WARN("unable to get interface name"); + return; + } + /* How many statistics are available. */ + drvinfo.cmd = ETHTOOL_GDRVINFO; + ifr.ifr_data = (caddr_t)&drvinfo; + if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { + WARN("unable to get driver info"); + return; + } + dev_stats_n = drvinfo.n_stats; + if (dev_stats_n < 1) { + WARN("no extended statistics available"); + return; + } + xstats_ctrl->stats_n = dev_stats_n; + /* Allocate memory to grab stat names and values. */ + str_sz = dev_stats_n * ETH_GSTRING_LEN; + strings = (struct ethtool_gstrings *) + rte_malloc("xstats_strings", + str_sz + sizeof(struct ethtool_gstrings), 0); + if (!strings) { + WARN("unable to allocate memory for xstats"); + return; + } + strings->cmd = ETHTOOL_GSTRINGS; + strings->string_set = ETH_SS_STATS; + strings->len = dev_stats_n; + ifr.ifr_data = (caddr_t)strings; + if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { + WARN("unable to get statistic names"); + goto free; + } + for (j = 0; j != xstats_n; ++j) + xstats_ctrl->dev_table_idx[j] = dev_stats_n; + for (i = 0; i != dev_stats_n; ++i) { + const char *curr_string = (const char *) + &strings->data[i * ETH_GSTRING_LEN]; + + for (j = 0; j != xstats_n; ++j) { + if (!strcmp(mlx5_counters_init[j].ctr_name, + curr_string)) { + xstats_ctrl->dev_table_idx[j] = i; + break; + } + } + } + for (j = 0; j != xstats_n; ++j) { + if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) { + WARN("counter \"%s\" is not recognized", + mlx5_counters_init[j].dpdk_name); + goto free; + } + } + /* Copy to base at first time. */ + assert(xstats_n <= MLX5_MAX_XSTATS); + priv_read_dev_counters(priv, xstats_ctrl->base); +free: + rte_free(strings); } - -static void -mlx5_stats_init(struct rte_eth_dev *dev) +/** + * Get device extended statistics. + * + * @param priv + * Pointer to private structure. + * @param[out] stats + * Pointer to rte extended stats table. + * + * @return + * Number of extended stats on success and stats is filled, + * negative on error. + */ +static int +priv_xstats_get(struct priv *priv, struct rte_eth_xstat *stats) { - struct priv *priv = mlx5_get_priv(dev); - struct mlx5_stats_priv * lps = &priv->m_stats; - struct rte_eth_stats tmp = {0}; - - unsigned int i; - unsigned int idx; - char ifname[IF_NAMESIZE]; - struct ifreq ifr; - - struct ethtool_stats *et_stats = NULL; - struct ethtool_drvinfo drvinfo; - struct ethtool_gstrings *strings = NULL; - unsigned int n_stats, sz_str, sz_stats; - - if (priv_get_ifname(priv, &ifname)) { - WARN("unable to get interface name"); - return; - } - /* How many statistics are available ? */ - drvinfo.cmd = ETHTOOL_GDRVINFO; - ifr.ifr_data = (caddr_t) &drvinfo; - if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { - WARN("unable to get driver info for %s", ifname); - return; - } - - n_stats = drvinfo.n_stats; - if (n_stats < 1) { - WARN("no statistics available for %s", ifname); - return; - } - lps->n_stats = n_stats; - - /* Allocate memory to grab stat names and values */ - sz_str = n_stats * ETH_GSTRING_LEN; - sz_stats = n_stats * sizeof(uint64_t); - strings = calloc(1, sz_str + sizeof(struct ethtool_gstrings)); - if (!strings) { - WARN("unable to allocate memory for strings"); - return; - } - - et_stats = calloc(1, sz_stats + sizeof(struct ethtool_stats)); - if (!et_stats) { - free(strings); - WARN("unable to allocate memory for stats"); - } - - strings->cmd = ETHTOOL_GSTRINGS; - strings->string_set = ETH_SS_STATS; - strings->len = n_stats; - ifr.ifr_data = (caddr_t) strings; - if (priv_ifreq(priv, SIOCETHTOOL, &ifr) != 0) { - WARN("unable to get statistic names for %s", ifname); - free(strings); - free(et_stats); - return; - } - - for (i = 0; (i != n_stats); ++i) { - - const char * curr_string = (const char*) &(strings->data[i * ETH_GSTRING_LEN]); - - if (!strcmp("rx_vport_unicast_bytes", curr_string)) lps->inx_rx_vport_unicast_bytes = i; - if (!strcmp("rx_vport_multicast_bytes", curr_string)) lps->inx_rx_vport_multicast_bytes = i; - if (!strcmp("rx_vport_broadcast_bytes", curr_string)) lps->inx_rx_vport_broadcast_bytes = i; - - if (!strcmp("rx_vport_unicast_packets", curr_string)) lps->inx_rx_vport_unicast_packets = i; - if (!strcmp("rx_vport_multicast_packets", curr_string)) lps->inx_rx_vport_multicast_packets = i; - if (!strcmp("rx_vport_broadcast_packets", curr_string)) lps->inx_rx_vport_broadcast_packets = i; - - if (!strcmp("tx_vport_unicast_bytes", curr_string)) lps->inx_tx_vport_unicast_bytes = i; - if (!strcmp("tx_vport_multicast_bytes", curr_string)) lps->inx_tx_vport_multicast_bytes = i; - if (!strcmp("tx_vport_broadcast_bytes", curr_string)) lps->inx_tx_vport_broadcast_bytes = i; - - if (!strcmp("tx_vport_unicast_packets", curr_string)) lps->inx_tx_vport_unicast_packets = i; - if (!strcmp("tx_vport_multicast_packets", curr_string)) lps->inx_tx_vport_multicast_packets = i; - if (!strcmp("tx_vport_broadcast_packets", curr_string)) lps->inx_tx_vport_broadcast_packets = i; - - if (!strcmp("rx_wqe_err", curr_string)) lps->inx_rx_wqe_err = i; - if (!strcmp("rx_crc_errors_phy", curr_string)) lps->inx_rx_crc_errors_phy = i; - if (!strcmp("rx_in_range_len_errors_phy", curr_string)) lps->inx_rx_in_range_len_errors_phy = i; - if (!strcmp("rx_symbol_err_phy", curr_string)) lps->inx_rx_symbol_err_phy = i; - - if (!strcmp("tx_errors_phy", curr_string)) lps->inx_tx_errors_phy = i; - } - - lps->et_stats =(void *)et_stats; - - if (!lps->inx_rx_vport_unicast_bytes || - !lps->inx_rx_vport_multicast_bytes || - !lps->inx_rx_vport_broadcast_bytes || - !lps->inx_rx_vport_unicast_packets || - !lps->inx_rx_vport_multicast_packets || - !lps->inx_rx_vport_broadcast_packets || - !lps->inx_tx_vport_unicast_bytes || - !lps->inx_tx_vport_multicast_bytes || - !lps->inx_tx_vport_broadcast_bytes || - !lps->inx_tx_vport_unicast_packets || - !lps->inx_tx_vport_multicast_packets || - !lps->inx_tx_vport_broadcast_packets || - !lps->inx_rx_wqe_err || - !lps->inx_rx_crc_errors_phy || - !lps->inx_rx_in_range_len_errors_phy) { - WARN("Counters are not recognized %s", ifname); - return; - } - - mlx5_stats_read_hw(dev,&tmp); - - /* copy yo shadow at first time */ - lps->m_shadow = tmp; - - free(strings); + struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; + unsigned int i; + unsigned int n = xstats_n; + uint64_t counters[n]; + + if (priv_read_dev_counters(priv, counters) < 0) + return -1; + for (i = 0; i != xstats_n; ++i) { + stats[i].id = i; + stats[i].value = (counters[i] - xstats_ctrl->base[i]); + } + return n; } - +/** + * Reset device extended statistics. + * + * @param priv + * Pointer to private structure. + */ static void -mlx5_stats_diff(struct rte_eth_stats *a, - struct rte_eth_stats *b, - struct rte_eth_stats *c){ - #define MLX5_DIFF(cnt) { a->cnt = (b->cnt - c->cnt); } - - MLX5_DIFF(ipackets); - MLX5_DIFF(opackets); - MLX5_DIFF(ibytes); - MLX5_DIFF(obytes); - MLX5_DIFF(imissed); - - MLX5_DIFF(ierrors); - MLX5_DIFF(oerrors); - MLX5_DIFF(rx_nombuf); +priv_xstats_reset(struct priv *priv) +{ + struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl; + unsigned int i; + unsigned int n = xstats_n; + uint64_t counters[n]; + + if (priv_read_dev_counters(priv, counters) < 0) + return; + for (i = 0; i != n; ++i) + xstats_ctrl->base[i] = counters[i]; } - - +/** + * DPDK callback to get device statistics. + * + * @param dev + * Pointer to Ethernet device structure. + * @param[out] stats + * Stats structure output buffer. + */ void mlx5_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) { struct priv *priv = mlx5_get_priv(dev); - - struct mlx5_stats_priv * lps = &priv->m_stats; - priv_lock(priv); - - if (lps->et_stats == NULL) { - mlx5_stats_init(dev); - } - struct rte_eth_stats tmp = {0}; - - mlx5_stats_read_hw(dev,&tmp); - - mlx5_stats_diff(stats, - &tmp, - &lps->m_shadow); - + struct rte_eth_stats tmp = {0}; + unsigned int i; + unsigned int idx; + + priv_lock(priv); + /* Add software counters. */ + for (i = 0; (i != priv->rxqs_n); ++i) { + struct rxq *rxq = (*priv->rxqs)[i]; + + if (rxq == NULL) + continue; + idx = rxq->stats.idx; + if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) { +#ifdef MLX5_PMD_SOFT_COUNTERS + tmp.q_ipackets[idx] += rxq->stats.ipackets; + tmp.q_ibytes[idx] += rxq->stats.ibytes; +#endif + tmp.q_errors[idx] += (rxq->stats.idropped + + rxq->stats.rx_nombuf); + } +#ifdef MLX5_PMD_SOFT_COUNTERS + tmp.ipackets += rxq->stats.ipackets; + tmp.ibytes += rxq->stats.ibytes; +#endif + tmp.ierrors += rxq->stats.idropped; + tmp.rx_nombuf += rxq->stats.rx_nombuf; + } + for (i = 0; (i != priv->txqs_n); ++i) { + struct txq *txq = (*priv->txqs)[i]; + + if (txq == NULL) + continue; + idx = txq->stats.idx; + if (idx < RTE_ETHDEV_QUEUE_STAT_CNTRS) { +#ifdef MLX5_PMD_SOFT_COUNTERS + tmp.q_opackets[idx] += txq->stats.opackets; + tmp.q_obytes[idx] += txq->stats.obytes; +#endif + tmp.q_errors[idx] += txq->stats.odropped; + } +#ifdef MLX5_PMD_SOFT_COUNTERS + tmp.opackets += txq->stats.opackets; + tmp.obytes += txq->stats.obytes; +#endif + tmp.oerrors += txq->stats.odropped; + } +#ifndef MLX5_PMD_SOFT_COUNTERS + /* FIXME: retrieve and add hardware counters. */ +#endif + *stats = tmp; priv_unlock(priv); } @@ -294,20 +370,103 @@ void mlx5_stats_reset(struct rte_eth_dev *dev) { struct priv *priv = dev->data->dev_private; - struct mlx5_stats_priv * lps = &priv->m_stats; - - priv_lock(priv); - - if (lps->et_stats == NULL) { - mlx5_stats_init(dev); - } - struct rte_eth_stats tmp = {0}; - + unsigned int i; + unsigned int idx; + + priv_lock(priv); + for (i = 0; (i != priv->rxqs_n); ++i) { + if ((*priv->rxqs)[i] == NULL) + continue; + idx = (*priv->rxqs)[i]->stats.idx; + (*priv->rxqs)[i]->stats = + (struct mlx5_rxq_stats){ .idx = idx }; + } + for (i = 0; (i != priv->txqs_n); ++i) { + if ((*priv->txqs)[i] == NULL) + continue; + idx = (*priv->txqs)[i]->stats.idx; + (*priv->txqs)[i]->stats = + (struct mlx5_txq_stats){ .idx = idx }; + } +#ifndef MLX5_PMD_SOFT_COUNTERS + /* FIXME: reset hardware counters. */ +#endif + priv_unlock(priv); +} - mlx5_stats_read_hw(dev,&tmp); +/** + * DPDK callback to get extended device statistics. + * + * @param dev + * Pointer to Ethernet device structure. + * @param[out] stats + * Stats table output buffer. + * @param n + * The size of the stats table. + * + * @return + * Number of xstats on success, negative on failure. + */ +int +mlx5_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstat *stats, unsigned int n) +{ + struct priv *priv = mlx5_get_priv(dev); + int ret = xstats_n; + + if (n >= xstats_n && stats) { + priv_lock(priv); + ret = priv_xstats_get(priv, stats); + priv_unlock(priv); + } + return ret; +} - /* copy to shadow */ - lps->m_shadow = tmp; +/** + * DPDK callback to clear device extended statistics. + * + * @param dev + * Pointer to Ethernet device structure. + */ +void +mlx5_xstats_reset(struct rte_eth_dev *dev) +{ + struct priv *priv = mlx5_get_priv(dev); + priv_lock(priv); + priv_xstats_reset(priv); priv_unlock(priv); } + +/** + * DPDK callback to retrieve names of extended device statistics + * + * @param dev + * Pointer to Ethernet device structure. + * @param[out] xstats_names + * Buffer to insert names into. + * @param n + * Number of names. + * + * @return + * Number of xstats names. + */ +int +mlx5_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, unsigned int n) +{ + struct priv *priv = mlx5_get_priv(dev); + unsigned int i; + + if (n >= xstats_n && xstats_names) { + priv_lock(priv); + for (i = 0; i != xstats_n; ++i) { + strncpy(xstats_names[i].name, + mlx5_counters_init[i].dpdk_name, + RTE_ETH_XSTATS_NAME_SIZE); + xstats_names[i].name[RTE_ETH_XSTATS_NAME_SIZE - 1] = 0; + } + priv_unlock(priv); + } + return xstats_n; +} diff --git a/src/dpdk/drivers/net/mlx5/mlx5_trigger.c b/src/dpdk/drivers/net/mlx5/mlx5_trigger.c index d4dccd88..30addd2d 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_trigger.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_trigger.c @@ -90,6 +90,8 @@ mlx5_dev_start(struct rte_eth_dev *dev) if (dev->data->dev_conf.fdir_conf.mode != RTE_FDIR_MODE_NONE) priv_fdir_enable(priv); priv_dev_interrupt_handler_install(priv, dev); + err = priv_flow_start(priv); + priv_xstats_init(priv); priv_unlock(priv); return -err; } @@ -120,6 +122,7 @@ mlx5_dev_stop(struct rte_eth_dev *dev) priv_mac_addrs_disable(priv); priv_destroy_hash_rxqs(priv); priv_fdir_disable(priv); + priv_flow_stop(priv); priv_dev_interrupt_handler_uninstall(priv, dev); priv->started = 0; priv_unlock(priv); diff --git a/src/dpdk/drivers/net/mlx5/mlx5_txq.c b/src/dpdk/drivers/net/mlx5/mlx5_txq.c index 053665d5..949035bd 100644 --- a/src/dpdk/drivers/net/mlx5/mlx5_txq.c +++ b/src/dpdk/drivers/net/mlx5/mlx5_txq.c @@ -82,7 +82,9 @@ txq_alloc_elts(struct txq_ctrl *txq_ctrl, unsigned int elts_n) for (i = 0; (i != elts_n); ++i) (*txq_ctrl->txq.elts)[i] = NULL; for (i = 0; (i != (1u << txq_ctrl->txq.wqe_n)); ++i) { - volatile struct mlx5_wqe64 *wqe = &(*txq_ctrl->txq.wqes)[i]; + volatile struct mlx5_wqe64 *wqe = + (volatile struct mlx5_wqe64 *) + txq_ctrl->txq.wqes + i; memset((void *)(uintptr_t)wqe, 0x0, sizeof(*wqe)); } @@ -214,14 +216,10 @@ txq_setup(struct txq_ctrl *tmpl, struct txq_ctrl *txq_ctrl) } tmpl->txq.cqe_n = log2above(ibcq->cqe); tmpl->txq.qp_num_8s = qp->ctrl_seg.qp_num << 8; - tmpl->txq.wqes = - (volatile struct mlx5_wqe64 (*)[]) - (uintptr_t)qp->gen_data.sqstart; + tmpl->txq.wqes = qp->gen_data.sqstart; tmpl->txq.wqe_n = log2above(qp->sq.wqe_cnt); tmpl->txq.qp_db = &qp->gen_data.db[MLX5_SND_DBR]; tmpl->txq.bf_reg = qp->gen_data.bf->reg; - tmpl->txq.bf_offset = qp->gen_data.bf->offset; - tmpl->txq.bf_buf_size = log2above(qp->gen_data.bf->buf_size); tmpl->txq.cq_db = cq->dbrec; tmpl->txq.cqes = (volatile struct mlx5_cqe (*)[]) @@ -412,7 +410,7 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl, .obj = tmpl.qp, /* Enable multi-packet send if supported. */ .family_flags = - ((priv->mps && !priv->sriov) ? + (priv->mps ? IBV_EXP_QP_BURST_CREATE_ENABLE_MULTI_PACKET_SEND_WR : 0), }; diff --git a/src/dpdk/drivers/net/null/rte_eth_null.c b/src/dpdk/drivers/net/null/rte_eth_null.c index 7a248842..57203e2e 100644 --- a/src/dpdk/drivers/net/null/rte_eth_null.c +++ b/src/dpdk/drivers/net/null/rte_eth_null.c @@ -35,7 +35,7 @@ #include <rte_ethdev.h> #include <rte_malloc.h> #include <rte_memcpy.h> -#include <rte_dev.h> +#include <rte_vdev.h> #include <rte_kvargs.h> #include <rte_spinlock.h> @@ -88,7 +88,6 @@ struct pmd_internals { static struct ether_addr eth_addr = { .addr_bytes = {0} }; -static const char *drivername = "Null PMD"; static struct rte_eth_link pmd_link = { .link_speed = ETH_SPEED_NUM_10G, .link_duplex = ETH_LINK_FULL_DUPLEX, @@ -295,13 +294,11 @@ eth_dev_info(struct rte_eth_dev *dev, return; internals = dev->data->dev_private; - dev_info->driver_name = drivername; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = (uint32_t)-1; dev_info->max_rx_queues = RTE_DIM(internals->rx_null_queues); dev_info->max_tx_queues = RTE_DIM(internals->tx_null_queues); dev_info->min_rx_bufsize = 0; - dev_info->pci_dev = NULL; dev_info->reta_size = internals->reta_size; dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads; } @@ -480,6 +477,8 @@ static const struct eth_dev_ops ops = { .rss_hash_conf_get = eth_rss_hash_conf_get }; +static struct rte_vdev_driver pmd_null_drv; + int eth_dev_null_create(const char *name, const unsigned numa_node, @@ -517,7 +516,7 @@ eth_dev_null_create(const char *name, goto error; /* reserve an ethdev entry */ - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + eth_dev = rte_eth_dev_allocate(name); if (eth_dev == NULL) goto error; @@ -550,12 +549,10 @@ eth_dev_null_create(const char *name, eth_dev->data = data; eth_dev->dev_ops = &ops; - TAILQ_INIT(ð_dev->link_intr_cbs); - eth_dev->driver = NULL; data->dev_flags = RTE_ETH_DEV_DETACHABLE; data->kdrv = RTE_KDRV_NONE; - data->drv_name = drivername; + data->drv_name = pmd_null_drv.driver.name; data->numa_node = numa_node; /* finally assign rx and tx ops */ @@ -611,7 +608,7 @@ get_packet_copy_arg(const char *key __rte_unused, } static int -rte_pmd_null_devinit(const char *name, const char *params) +rte_pmd_null_probe(const char *name, const char *params) { unsigned numa_node; unsigned packet_size = default_packet_size; @@ -663,7 +660,7 @@ free_kvlist: } static int -rte_pmd_null_devuninit(const char *name) +rte_pmd_null_remove(const char *name) { struct rte_eth_dev *eth_dev = NULL; @@ -686,13 +683,13 @@ rte_pmd_null_devuninit(const char *name) return 0; } -static struct rte_driver pmd_null_drv = { - .type = PMD_VDEV, - .init = rte_pmd_null_devinit, - .uninit = rte_pmd_null_devuninit, +static struct rte_vdev_driver pmd_null_drv = { + .probe = rte_pmd_null_probe, + .remove = rte_pmd_null_remove, }; -PMD_REGISTER_DRIVER(pmd_null_drv, eth_null); -DRIVER_REGISTER_PARAM_STRING(eth_null, +RTE_PMD_REGISTER_VDEV(net_null, pmd_null_drv); +RTE_PMD_REGISTER_ALIAS(net_null, eth_null); +RTE_PMD_REGISTER_PARAM_STRING(net_null, "size=<int> " "copy=<int>"); diff --git a/src/dpdk/drivers/net/ring/rte_eth_ring.c b/src/dpdk/drivers/net/ring/rte_eth_ring.c index a7048c77..6f9cc1a6 100644 --- a/src/dpdk/drivers/net/ring/rte_eth_ring.c +++ b/src/dpdk/drivers/net/ring/rte_eth_ring.c @@ -38,7 +38,7 @@ #include <rte_memcpy.h> #include <rte_memzone.h> #include <rte_string_fns.h> -#include <rte_dev.h> +#include <rte_vdev.h> #include <rte_kvargs.h> #include <rte_errno.h> @@ -75,7 +75,6 @@ struct pmd_internals { }; -static const char *drivername = "Rings PMD"; static struct rte_eth_link pmd_link = { .link_speed = ETH_SPEED_NUM_10G, .link_duplex = ETH_LINK_FULL_DUPLEX, @@ -173,13 +172,11 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct pmd_internals *internals = dev->data->dev_private; - dev_info->driver_name = drivername; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = (uint32_t)-1; dev_info->max_rx_queues = (uint16_t)internals->max_rx_queues; dev_info->max_tx_queues = (uint16_t)internals->max_tx_queues; dev_info->min_rx_bufsize = 0; - dev_info->pci_dev = NULL; } static void @@ -259,6 +256,8 @@ static const struct eth_dev_ops ops = { .mac_addr_add = eth_mac_addr_add, }; +static struct rte_vdev_driver pmd_ring_drv; + static int do_eth_dev_ring_create(const char *name, struct rte_ring * const rx_queues[], const unsigned nb_rx_queues, @@ -303,7 +302,7 @@ do_eth_dev_ring_create(const char *name, } /* reserve an ethdev entry */ - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + eth_dev = rte_eth_dev_allocate(name); if (eth_dev == NULL) { rte_errno = ENOSPC; goto error; @@ -343,11 +342,9 @@ do_eth_dev_ring_create(const char *name, eth_dev->dev_ops = &ops; data->dev_flags = RTE_ETH_DEV_DETACHABLE; data->kdrv = RTE_KDRV_NONE; - data->drv_name = drivername; + data->drv_name = pmd_ring_drv.driver.name; data->numa_node = numa_node; - TAILQ_INIT(&(eth_dev->link_intr_cbs)); - /* finally assign rx and tx ops */ eth_dev->rx_pkt_burst = eth_ring_rx; eth_dev->tx_pkt_burst = eth_ring_tx; @@ -505,7 +502,7 @@ out: } static int -rte_pmd_ring_devinit(const char *name, const char *params) +rte_pmd_ring_probe(const char *name, const char *params) { struct rte_kvargs *kvlist = NULL; int ret = 0; @@ -557,7 +554,7 @@ rte_pmd_ring_devinit(const char *name, const char *params) goto out_free; for (info->count = 0; info->count < info->total; info->count++) { - ret = eth_dev_ring_create(name, + ret = eth_dev_ring_create(info->list[info->count].name, info->list[info->count].node, info->list[info->count].action); if ((ret == -1) && @@ -580,7 +577,7 @@ out_free: } static int -rte_pmd_ring_devuninit(const char *name) +rte_pmd_ring_remove(const char *name) { struct rte_eth_dev *eth_dev = NULL; struct pmd_internals *internals = NULL; @@ -599,36 +596,34 @@ rte_pmd_ring_devuninit(const char *name) eth_dev_stop(eth_dev); - if (eth_dev->data) { - internals = eth_dev->data->dev_private; - if (internals->action == DEV_CREATE) { - /* - * it is only necessary to delete the rings in rx_queues because - * they are the same used in tx_queues - */ - for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { - r = eth_dev->data->rx_queues[i]; - rte_ring_free(r->rng); - } + internals = eth_dev->data->dev_private; + if (internals->action == DEV_CREATE) { + /* + * it is only necessary to delete the rings in rx_queues because + * they are the same used in tx_queues + */ + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) { + r = eth_dev->data->rx_queues[i]; + rte_ring_free(r->rng); } - - rte_free(eth_dev->data->rx_queues); - rte_free(eth_dev->data->tx_queues); - rte_free(eth_dev->data->dev_private); } + rte_free(eth_dev->data->rx_queues); + rte_free(eth_dev->data->tx_queues); + rte_free(eth_dev->data->dev_private); + rte_free(eth_dev->data); rte_eth_dev_release_port(eth_dev); return 0; } -static struct rte_driver pmd_ring_drv = { - .type = PMD_VDEV, - .init = rte_pmd_ring_devinit, - .uninit = rte_pmd_ring_devuninit, +static struct rte_vdev_driver pmd_ring_drv = { + .probe = rte_pmd_ring_probe, + .remove = rte_pmd_ring_remove, }; -PMD_REGISTER_DRIVER(pmd_ring_drv, eth_ring); -DRIVER_REGISTER_PARAM_STRING(eth_ring, - "nodeaction=[attach|detach]"); +RTE_PMD_REGISTER_VDEV(net_ring, pmd_ring_drv); +RTE_PMD_REGISTER_ALIAS(net_ring, eth_ring); +RTE_PMD_REGISTER_PARAM_STRING(net_ring, + ETH_RING_NUMA_NODE_ACTION_ARG "=name:node:action(ATTACH|CREATE)"); diff --git a/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.c b/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.c index 483d7894..fe7a6b3b 100644 --- a/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.c +++ b/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.c @@ -62,7 +62,7 @@ */ #define RTE_SZE2_PACKET_HEADER_SIZE_ALIGNED 8 -#define RTE_SZEDATA2_DRIVER_NAME rte_szedata2_pmd +#define RTE_SZEDATA2_DRIVER_NAME net_szedata2 #define RTE_SZEDATA2_PCI_DRIVER_NAME "rte_szedata2_pmd" #define SZEDATA2_DEV_PATH_FMT "/dev/szedataII%u" @@ -91,6 +91,7 @@ struct pmd_internals { uint16_t max_rx_queues; uint16_t max_tx_queues; char sze_dev[PATH_MAX]; + struct rte_mem_resource *pci_rsc; }; static struct ether_addr eth_addr = { @@ -1030,6 +1031,7 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { struct pmd_internals *internals = dev->data->dev_private; + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); dev_info->if_index = 0; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = (uint32_t)-1; @@ -1144,8 +1146,10 @@ eth_link_update(struct rte_eth_dev *dev, struct rte_eth_link link; struct rte_eth_link *link_ptr = &link; struct rte_eth_link *dev_link = &dev->data->dev_link; + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_IBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF, volatile struct szedata2_cgmii_ibuf *); switch (cgmii_link_speed(ibuf)) { @@ -1180,11 +1184,13 @@ eth_link_update(struct rte_eth_dev *dev, static int eth_dev_set_link_up(struct rte_eth_dev *dev) { + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_IBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF, volatile struct szedata2_cgmii_ibuf *); volatile struct szedata2_cgmii_obuf *obuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_OBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_OBUF_BASE_OFF, volatile struct szedata2_cgmii_obuf *); cgmii_ibuf_enable(ibuf); @@ -1195,11 +1201,13 @@ eth_dev_set_link_up(struct rte_eth_dev *dev) static int eth_dev_set_link_down(struct rte_eth_dev *dev) { + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_IBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF, volatile struct szedata2_cgmii_ibuf *); volatile struct szedata2_cgmii_obuf *obuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_OBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_OBUF_BASE_OFF, volatile struct szedata2_cgmii_obuf *); cgmii_ibuf_disable(ibuf); @@ -1281,8 +1289,10 @@ eth_mac_addr_set(struct rte_eth_dev *dev __rte_unused, static void eth_promiscuous_enable(struct rte_eth_dev *dev) { + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_IBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF, volatile struct szedata2_cgmii_ibuf *); cgmii_ibuf_mac_mode_write(ibuf, SZEDATA2_MAC_CHMODE_PROMISC); } @@ -1290,8 +1300,10 @@ eth_promiscuous_enable(struct rte_eth_dev *dev) static void eth_promiscuous_disable(struct rte_eth_dev *dev) { + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_IBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF, volatile struct szedata2_cgmii_ibuf *); cgmii_ibuf_mac_mode_write(ibuf, SZEDATA2_MAC_CHMODE_ONLY_VALID); } @@ -1299,8 +1311,10 @@ eth_promiscuous_disable(struct rte_eth_dev *dev) static void eth_allmulticast_enable(struct rte_eth_dev *dev) { + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_IBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF, volatile struct szedata2_cgmii_ibuf *); cgmii_ibuf_mac_mode_write(ibuf, SZEDATA2_MAC_CHMODE_ALL_MULTICAST); } @@ -1308,8 +1322,10 @@ eth_allmulticast_enable(struct rte_eth_dev *dev) static void eth_allmulticast_disable(struct rte_eth_dev *dev) { + struct pmd_internals *internals = (struct pmd_internals *) + dev->data->dev_private; volatile struct szedata2_cgmii_ibuf *ibuf = SZEDATA2_PCI_RESOURCE_PTR( - dev, SZEDATA2_CGMII_IBUF_BASE_OFF, + internals->pci_rsc, SZEDATA2_CGMII_IBUF_BASE_OFF, volatile struct szedata2_cgmii_ibuf *); cgmii_ibuf_mac_mode_write(ibuf, SZEDATA2_MAC_CHMODE_ONLY_VALID); } @@ -1349,7 +1365,7 @@ static const struct eth_dev_ops ops = { * -1 on error */ static int -get_szedata2_index(struct rte_eth_dev *dev, uint32_t *index) +get_szedata2_index(const struct rte_pci_addr *pcislot_addr, uint32_t *index) { DIR *dir; struct dirent *entry; @@ -1357,7 +1373,6 @@ get_szedata2_index(struct rte_eth_dev *dev, uint32_t *index) uint32_t tmp_index; FILE *fd; char pcislot_path[PATH_MAX]; - struct rte_pci_addr pcislot_addr = dev->pci_dev->addr; uint32_t domain; uint32_t bus; uint32_t devid; @@ -1392,10 +1407,10 @@ get_szedata2_index(struct rte_eth_dev *dev, uint32_t *index) if (ret != 4) continue; - if (pcislot_addr.domain == domain && - pcislot_addr.bus == bus && - pcislot_addr.devid == devid && - pcislot_addr.function == function) { + if (pcislot_addr->domain == domain && + pcislot_addr->bus == bus && + pcislot_addr->devid == devid && + pcislot_addr->function == function) { *index = tmp_index; closedir(dir); return 0; @@ -1415,9 +1430,10 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev) struct szedata *szedata_temp; int ret; uint32_t szedata2_index; - struct rte_pci_addr *pci_addr = &dev->pci_dev->addr; - struct rte_pci_resource *pci_rsc = - &dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER]; + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device); + struct rte_pci_addr *pci_addr = &pci_dev->addr; + struct rte_mem_resource *pci_rsc = + &pci_dev->mem_resource[PCI_RESOURCE_NUMBER]; char rsc_filename[PATH_MAX]; void *pci_resource_ptr = NULL; int fd; @@ -1427,7 +1443,7 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev) pci_addr->function); /* Get index of szedata2 device file and create path to device file */ - ret = get_szedata2_index(dev, &szedata2_index); + ret = get_szedata2_index(pci_addr, &szedata2_index); if (ret != 0) { RTE_LOG(ERR, PMD, "Failed to get szedata2 device index!\n"); return -ENODEV; @@ -1471,10 +1487,10 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev) /* Set function callbacks for Ethernet API */ dev->dev_ops = &ops; - rte_eth_copy_pci_info(dev, dev->pci_dev); + rte_eth_copy_pci_info(dev, pci_dev); - /* mmap pci resource0 file to rte_pci_resource structure */ - if (dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].phys_addr == + /* mmap pci resource0 file to rte_mem_resource structure */ + if (pci_dev->mem_resource[PCI_RESOURCE_NUMBER].phys_addr == 0) { RTE_LOG(ERR, PMD, "Missing resource%u file\n", PCI_RESOURCE_NUMBER); @@ -1491,7 +1507,7 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev) } pci_resource_ptr = mmap(0, - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len, + pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); close(fd); if (pci_resource_ptr == NULL) { @@ -1499,8 +1515,8 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev) rsc_filename, fd); return -EINVAL; } - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr = - pci_resource_ptr; + pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr = pci_resource_ptr; + internals->pci_rsc = pci_rsc; RTE_LOG(DEBUG, PMD, "resource%u phys_addr = 0x%llx len = %llu " "virt addr = %llx\n", PCI_RESOURCE_NUMBER, @@ -1516,8 +1532,8 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev) RTE_CACHE_LINE_SIZE); if (data->mac_addrs == NULL) { RTE_LOG(ERR, PMD, "Could not alloc space for MAC address!\n"); - munmap(dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr, - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len); + munmap(pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr, + pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len); return -EINVAL; } @@ -1537,12 +1553,13 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev) static int rte_szedata2_eth_dev_uninit(struct rte_eth_dev *dev) { - struct rte_pci_addr *pci_addr = &dev->pci_dev->addr; + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device); + struct rte_pci_addr *pci_addr = &pci_dev->addr; rte_free(dev->data->mac_addrs); dev->data->mac_addrs = NULL; - munmap(dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr, - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len); + munmap(pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr, + pci_dev->mem_resource[PCI_RESOURCE_NUMBER].len); RTE_LOG(INFO, PMD, "szedata2 device (" PCI_PRI_FMT ") successfully uninitialized\n", @@ -1572,33 +1589,16 @@ static const struct rte_pci_id rte_szedata2_pci_id_table[] = { static struct eth_driver szedata2_eth_driver = { .pci_drv = { - .name = RTE_SZEDATA2_PCI_DRIVER_NAME, .id_table = rte_szedata2_pci_id_table, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = rte_szedata2_eth_dev_init, .eth_dev_uninit = rte_szedata2_eth_dev_uninit, .dev_private_size = sizeof(struct pmd_internals), }; -static int -rte_szedata2_init(const char *name __rte_unused, - const char *args __rte_unused) -{ - rte_eth_driver_register(&szedata2_eth_driver); - return 0; -} - -static int -rte_szedata2_uninit(const char *name __rte_unused) -{ - return 0; -} - -static struct rte_driver rte_szedata2_driver = { - .type = PMD_PDEV, - .init = rte_szedata2_init, - .uninit = rte_szedata2_uninit, -}; - -PMD_REGISTER_DRIVER(rte_szedata2_driver, RTE_SZEDATA2_DRIVER_NAME); -DRIVER_REGISTER_PCI_TABLE(RTE_SZEDATA2_DRIVER_NAME, rte_szedata2_pci_id_table); +RTE_PMD_REGISTER_PCI(RTE_SZEDATA2_DRIVER_NAME, szedata2_eth_driver.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(RTE_SZEDATA2_DRIVER_NAME, rte_szedata2_pci_id_table); +RTE_PMD_REGISTER_KMOD_DEP(RTE_SZEDATA2_DRIVER_NAME, + "* combo6core & combov3 & szedata2 & szedata2_cv3"); diff --git a/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.h b/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.h index 522cf47f..afe8a383 100644 --- a/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.h +++ b/src/dpdk/drivers/net/szedata2/rte_eth_szedata2.h @@ -117,94 +117,82 @@ struct szedata { * @return Byte from PCI resource at offset "offset". */ static inline uint8_t -pci_resource_read8(struct rte_eth_dev *dev, uint32_t offset) +pci_resource_read8(struct rte_mem_resource *rsc, uint32_t offset) { - return *((uint8_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset)); + return *((uint8_t *)((uint8_t *)rsc->addr + offset)); } /* * @return Two bytes from PCI resource starting at offset "offset". */ static inline uint16_t -pci_resource_read16(struct rte_eth_dev *dev, uint32_t offset) +pci_resource_read16(struct rte_mem_resource *rsc, uint32_t offset) { - return rte_le_to_cpu_16(*((uint16_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset))); + return rte_le_to_cpu_16(*((uint16_t *)((uint8_t *)rsc->addr + + offset))); } /* * @return Four bytes from PCI resource starting at offset "offset". */ static inline uint32_t -pci_resource_read32(struct rte_eth_dev *dev, uint32_t offset) +pci_resource_read32(struct rte_mem_resource *rsc, uint32_t offset) { - return rte_le_to_cpu_32(*((uint32_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset))); + return rte_le_to_cpu_32(*((uint32_t *)((uint8_t *)rsc->addr + + offset))); } /* * @return Eight bytes from PCI resource starting at offset "offset". */ static inline uint64_t -pci_resource_read64(struct rte_eth_dev *dev, uint32_t offset) +pci_resource_read64(struct rte_mem_resource *rsc, uint32_t offset) { - return rte_le_to_cpu_64(*((uint64_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset))); + return rte_le_to_cpu_64(*((uint64_t *)((uint8_t *)rsc->addr + + offset))); } /* * Write one byte to PCI resource address space at offset "offset". */ static inline void -pci_resource_write8(struct rte_eth_dev *dev, uint32_t offset, uint8_t val) +pci_resource_write8(struct rte_mem_resource *rsc, uint32_t offset, uint8_t val) { - *((uint8_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset)) = val; + *((uint8_t *)((uint8_t *)rsc->addr + offset)) = val; } /* * Write two bytes to PCI resource address space at offset "offset". */ static inline void -pci_resource_write16(struct rte_eth_dev *dev, uint32_t offset, uint16_t val) +pci_resource_write16(struct rte_mem_resource *rsc, uint32_t offset, + uint16_t val) { - *((uint16_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset)) = rte_cpu_to_le_16(val); + *((uint16_t *)((uint8_t *)rsc->addr + offset)) = rte_cpu_to_le_16(val); } /* * Write four bytes to PCI resource address space at offset "offset". */ static inline void -pci_resource_write32(struct rte_eth_dev *dev, uint32_t offset, uint32_t val) +pci_resource_write32(struct rte_mem_resource *rsc, uint32_t offset, + uint32_t val) { - *((uint32_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset)) = rte_cpu_to_le_32(val); + *((uint32_t *)((uint8_t *)rsc->addr + offset)) = rte_cpu_to_le_32(val); } /* * Write eight bytes to PCI resource address space at offset "offset". */ static inline void -pci_resource_write64(struct rte_eth_dev *dev, uint32_t offset, uint64_t val) +pci_resource_write64(struct rte_mem_resource *rsc, uint32_t offset, + uint64_t val) { - *((uint64_t *)((uint8_t *) - dev->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr + - offset)) = rte_cpu_to_le_64(val); + *((uint64_t *)((uint8_t *)rsc->addr + offset)) = rte_cpu_to_le_64(val); } -#define SZEDATA2_PCI_RESOURCE_PTR(dev, offset, type) \ - ((type)((uint8_t *) \ - ((dev)->pci_dev->mem_resource[PCI_RESOURCE_NUMBER].addr) \ - + (offset))) +#define SZEDATA2_PCI_RESOURCE_PTR(rsc, offset, type) \ + ((type)(((uint8_t *)(rsc)->addr) + (offset))) enum szedata2_link_speed { SZEDATA2_LINK_SPEED_DEFAULT = 0, diff --git a/src/dpdk/drivers/net/virtio/virtio_ethdev.c b/src/dpdk/drivers/net/virtio/virtio_ethdev.c index 35e67b90..1d572b5d 100644 --- a/src/dpdk/drivers/net/virtio/virtio_ethdev.c +++ b/src/dpdk/drivers/net/virtio/virtio_ethdev.c @@ -103,7 +103,8 @@ static int virtio_dev_queue_stats_mapping_set( * The set of PCI devices this driver supports */ static const struct rte_pci_id pci_id_virtio_map[] = { - { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_DEVICEID_MIN) }, + { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_LEGACY_DEVICEID_NET) }, + { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_MODERN_DEVICEID_NET) }, { .vendor_id = 0, /* sentinel */ }, }; @@ -125,8 +126,8 @@ static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = { {"size_128_255_packets", offsetof(struct virtnet_rx, stats.size_bins[3])}, {"size_256_511_packets", offsetof(struct virtnet_rx, stats.size_bins[4])}, {"size_512_1023_packets", offsetof(struct virtnet_rx, stats.size_bins[5])}, - {"size_1024_1517_packets", offsetof(struct virtnet_rx, stats.size_bins[6])}, - {"size_1518_max_packets", offsetof(struct virtnet_rx, stats.size_bins[7])}, + {"size_1024_1518_packets", offsetof(struct virtnet_rx, stats.size_bins[6])}, + {"size_1519_max_packets", offsetof(struct virtnet_rx, stats.size_bins[7])}, }; /* [rt]x_qX_ is prepended to the name string here */ @@ -142,8 +143,8 @@ static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = { {"size_128_255_packets", offsetof(struct virtnet_tx, stats.size_bins[3])}, {"size_256_511_packets", offsetof(struct virtnet_tx, stats.size_bins[4])}, {"size_512_1023_packets", offsetof(struct virtnet_tx, stats.size_bins[5])}, - {"size_1024_1517_packets", offsetof(struct virtnet_tx, stats.size_bins[6])}, - {"size_1518_max_packets", offsetof(struct virtnet_tx, stats.size_bins[7])}, + {"size_1024_1518_packets", offsetof(struct virtnet_tx, stats.size_bins[6])}, + {"size_1519_max_packets", offsetof(struct virtnet_tx, stats.size_bins[7])}, }; #define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \ @@ -151,6 +152,8 @@ static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = { #define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \ sizeof(rte_virtio_txq_stat_strings[0])) +struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS]; + static int virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, int *dlen, int pkt_num) @@ -279,28 +282,65 @@ virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues) return 0; } -void -virtio_dev_queue_release(struct virtqueue *vq) +static void +virtio_dev_queue_release(void *queue __rte_unused) { - struct virtio_hw *hw; + /* do nothing */ +} - if (vq) { - hw = vq->hw; - if (vq->configured) - hw->vtpci_ops->del_queue(hw, vq); +static int +virtio_get_queue_type(struct virtio_hw *hw, uint16_t vtpci_queue_idx) +{ + if (vtpci_queue_idx == hw->max_queue_pairs * 2) + return VTNET_CQ; + else if (vtpci_queue_idx % 2 == 0) + return VTNET_RQ; + else + return VTNET_TQ; +} - rte_free(vq->sw_ring); - rte_free(vq); - } +static uint16_t +virtio_get_nr_vq(struct virtio_hw *hw) +{ + uint16_t nr_vq = hw->max_queue_pairs * 2; + + if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) + nr_vq += 1; + + return nr_vq; +} + +static void +virtio_init_vring(struct virtqueue *vq) +{ + int size = vq->vq_nentries; + struct vring *vr = &vq->vq_ring; + uint8_t *ring_mem = vq->vq_ring_virt_mem; + + PMD_INIT_FUNC_TRACE(); + + /* + * Reinitialise since virtio port might have been stopped and restarted + */ + memset(ring_mem, 0, vq->vq_ring_size); + vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN); + vq->vq_used_cons_idx = 0; + vq->vq_desc_head_idx = 0; + vq->vq_avail_idx = 0; + vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1); + vq->vq_free_cnt = vq->vq_nentries; + memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); + + vring_desc_init(vr->desc, size); + + /* + * Disable device(host) interrupting guest + */ + virtqueue_disable_intr(vq); } -int virtio_dev_queue_setup(struct rte_eth_dev *dev, - int queue_type, - uint16_t queue_idx, - uint16_t vtpci_queue_idx, - uint16_t nb_desc, - unsigned int socket_id, - void **pvq) +static int +virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx) { char vq_name[VIRTQUEUE_MAX_NAME_SZ]; char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ]; @@ -311,9 +351,9 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, struct virtnet_tx *txvq = NULL; struct virtnet_ctl *cvq = NULL; struct virtqueue *vq; - const char *queue_names[] = {"rvq", "txq", "cvq"}; - size_t sz_vq, sz_q = 0, sz_hdr_mz = 0; + size_t sz_hdr_mz = 0; void *sw_ring = NULL; + int queue_type = virtio_get_queue_type(hw, vtpci_queue_idx); int ret; PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx); @@ -322,8 +362,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, * Read the virtqueue size from the Queue Size field * Always power of 2 and if 0 virtqueue does not exist */ - vq_size = hw->vtpci_ops->get_queue_num(hw, vtpci_queue_idx); - PMD_INIT_LOG(DEBUG, "vq_size: %u nb_desc:%u", vq_size, nb_desc); + vq_size = VTPCI_OPS(hw)->get_queue_num(hw, vtpci_queue_idx); + PMD_INIT_LOG(DEBUG, "vq_size: %u", vq_size); if (vq_size == 0) { PMD_INIT_LOG(ERR, "virtqueue does not exist"); return -EINVAL; @@ -334,40 +374,35 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, return -EINVAL; } - snprintf(vq_name, sizeof(vq_name), "port%d_%s%d", - dev->data->port_id, queue_names[queue_type], queue_idx); + snprintf(vq_name, sizeof(vq_name), "port%d_vq%d", + dev->data->port_id, vtpci_queue_idx); - sz_vq = RTE_ALIGN_CEIL(sizeof(*vq) + + size = RTE_ALIGN_CEIL(sizeof(*vq) + vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE); - if (queue_type == VTNET_RQ) { - sz_q = sz_vq + sizeof(*rxvq); - } else if (queue_type == VTNET_TQ) { - sz_q = sz_vq + sizeof(*txvq); + if (queue_type == VTNET_TQ) { /* * For each xmit packet, allocate a virtio_net_hdr * and indirect ring elements */ sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region); } else if (queue_type == VTNET_CQ) { - sz_q = sz_vq + sizeof(*cvq); /* Allocate a page for control vq command, data and status */ sz_hdr_mz = PAGE_SIZE; } - vq = rte_zmalloc_socket(vq_name, sz_q, RTE_CACHE_LINE_SIZE, socket_id); + vq = rte_zmalloc_socket(vq_name, size, RTE_CACHE_LINE_SIZE, + SOCKET_ID_ANY); if (vq == NULL) { PMD_INIT_LOG(ERR, "can not allocate vq"); return -ENOMEM; } + hw->vqs[vtpci_queue_idx] = vq; + vq->hw = hw; vq->vq_queue_index = vtpci_queue_idx; vq->vq_nentries = vq_size; - if (nb_desc == 0 || nb_desc > vq_size) - nb_desc = vq_size; - vq->vq_free_cnt = nb_desc; - /* * Reserve a memzone for vring elements */ @@ -376,7 +411,8 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d", size, vq->vq_ring_size); - mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size, socket_id, + mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size, + SOCKET_ID_ANY, 0, VIRTIO_PCI_VRING_ALIGN); if (mz == NULL) { if (rte_errno == EEXIST) @@ -396,12 +432,13 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64, (uint64_t)(uintptr_t)mz->addr); + virtio_init_vring(vq); + if (sz_hdr_mz) { - snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_%s%d_hdr", - dev->data->port_id, queue_names[queue_type], - queue_idx); + snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_vq%d_hdr", + dev->data->port_id, vtpci_queue_idx); hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz, - socket_id, 0, + SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE); if (hdr_mz == NULL) { if (rte_errno == EEXIST) @@ -418,7 +455,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, sizeof(vq->sw_ring[0]); sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, - RTE_CACHE_LINE_SIZE, socket_id); + RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY); if (!sw_ring) { PMD_INIT_LOG(ERR, "can not allocate RX soft ring"); ret = -ENOMEM; @@ -426,37 +463,33 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, } vq->sw_ring = sw_ring; - rxvq = (struct virtnet_rx *)RTE_PTR_ADD(vq, sz_vq); + rxvq = &vq->rxq; rxvq->vq = vq; rxvq->port_id = dev->data->port_id; - rxvq->queue_id = queue_idx; rxvq->mz = mz; - *pvq = rxvq; } else if (queue_type == VTNET_TQ) { - txvq = (struct virtnet_tx *)RTE_PTR_ADD(vq, sz_vq); + txvq = &vq->txq; txvq->vq = vq; txvq->port_id = dev->data->port_id; - txvq->queue_id = queue_idx; txvq->mz = mz; txvq->virtio_net_hdr_mz = hdr_mz; txvq->virtio_net_hdr_mem = hdr_mz->phys_addr; - - *pvq = txvq; } else if (queue_type == VTNET_CQ) { - cvq = (struct virtnet_ctl *)RTE_PTR_ADD(vq, sz_vq); + cvq = &vq->cq; cvq->vq = vq; cvq->mz = mz; cvq->virtio_net_hdr_mz = hdr_mz; cvq->virtio_net_hdr_mem = hdr_mz->phys_addr; memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); - *pvq = cvq; + + hw->cvq = cvq; } - /* For virtio_user case (that is when dev->pci_dev is NULL), we use + /* For virtio_user case (that is when hw->dev is NULL), we use * virtual address. And we need properly set _offset_, please see * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information. */ - if (dev->pci_dev) + if (!hw->virtio_user_dev) vq->offset = offsetof(struct rte_mbuf, buf_physaddr); else { vq->vq_ring_mem = (uintptr_t)mz->addr; @@ -488,13 +521,11 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev, } } - if (hw->vtpci_ops->setup_queue(hw, vq) < 0) { + if (VTPCI_OPS(hw)->setup_queue(hw, vq) < 0) { PMD_INIT_LOG(ERR, "setup_queue failed"); - virtio_dev_queue_release(vq); return -EINVAL; } - vq->configured = 1; return 0; fail_q_alloc: @@ -506,58 +537,88 @@ fail_q_alloc: return ret; } -static int -virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx, - uint32_t socket_id) +static void +virtio_free_queues(struct virtio_hw *hw) { - struct virtnet_ctl *cvq; - int ret; - struct virtio_hw *hw = dev->data->dev_private; + uint16_t nr_vq = virtio_get_nr_vq(hw); + struct virtqueue *vq; + int queue_type; + uint16_t i; - PMD_INIT_FUNC_TRACE(); - ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX, - vtpci_queue_idx, 0, socket_id, (void **)&cvq); - if (ret < 0) { - PMD_INIT_LOG(ERR, "control vq initialization failed"); - return ret; + for (i = 0; i < nr_vq; i++) { + vq = hw->vqs[i]; + if (!vq) + continue; + + queue_type = virtio_get_queue_type(hw, i); + if (queue_type == VTNET_RQ) { + rte_free(vq->sw_ring); + rte_memzone_free(vq->rxq.mz); + } else if (queue_type == VTNET_TQ) { + rte_memzone_free(vq->txq.mz); + rte_memzone_free(vq->txq.virtio_net_hdr_mz); + } else { + rte_memzone_free(vq->cq.mz); + rte_memzone_free(vq->cq.virtio_net_hdr_mz); + } + + rte_free(vq); } - hw->cvq = cvq; - return 0; + rte_free(hw->vqs); } -static void -virtio_free_queues(struct rte_eth_dev *dev) +static int +virtio_alloc_queues(struct rte_eth_dev *dev) { - unsigned int i; - - for (i = 0; i < dev->data->nb_rx_queues; i++) - virtio_dev_rx_queue_release(dev->data->rx_queues[i]); + struct virtio_hw *hw = dev->data->dev_private; + uint16_t nr_vq = virtio_get_nr_vq(hw); + uint16_t i; + int ret; - dev->data->nb_rx_queues = 0; + hw->vqs = rte_zmalloc(NULL, sizeof(struct virtqueue *) * nr_vq, 0); + if (!hw->vqs) { + PMD_INIT_LOG(ERR, "failed to allocate vqs"); + return -ENOMEM; + } - for (i = 0; i < dev->data->nb_tx_queues; i++) - virtio_dev_tx_queue_release(dev->data->tx_queues[i]); + for (i = 0; i < nr_vq; i++) { + ret = virtio_init_queue(dev, i); + if (ret < 0) { + virtio_free_queues(hw); + return ret; + } + } - dev->data->nb_tx_queues = 0; + return 0; } +static void virtio_queues_unbind_intr(struct rte_eth_dev *dev); + static void virtio_dev_close(struct rte_eth_dev *dev) { struct virtio_hw *hw = dev->data->dev_private; + struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf; PMD_INIT_LOG(DEBUG, "virtio_dev_close"); - if (hw->started == 1) - virtio_dev_stop(dev); - /* reset the NIC */ if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) - vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR); + VTPCI_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR); + if (intr_conf->rxq) + virtio_queues_unbind_intr(dev); + + if (intr_conf->lsc || intr_conf->rxq) { + rte_intr_disable(dev->intr_handle); + rte_intr_efd_disable(dev->intr_handle); + rte_free(dev->intr_handle->intr_vec); + dev->intr_handle->intr_vec = NULL; + } + vtpci_reset(hw); virtio_dev_free_mbufs(dev); - virtio_free_queues(dev); + virtio_free_queues(hw); } static void @@ -569,7 +630,7 @@ virtio_dev_promiscuous_enable(struct rte_eth_dev *dev) int ret; if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { - PMD_INIT_LOG(INFO, "host does not support rx control\n"); + PMD_INIT_LOG(INFO, "host does not support rx control"); return; } @@ -592,7 +653,7 @@ virtio_dev_promiscuous_disable(struct rte_eth_dev *dev) int ret; if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { - PMD_INIT_LOG(INFO, "host does not support rx control\n"); + PMD_INIT_LOG(INFO, "host does not support rx control"); return; } @@ -615,7 +676,7 @@ virtio_dev_allmulticast_enable(struct rte_eth_dev *dev) int ret; if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { - PMD_INIT_LOG(INFO, "host does not support rx control\n"); + PMD_INIT_LOG(INFO, "host does not support rx control"); return; } @@ -638,7 +699,7 @@ virtio_dev_allmulticast_disable(struct rte_eth_dev *dev) int ret; if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { - PMD_INIT_LOG(INFO, "host does not support rx control\n"); + PMD_INIT_LOG(INFO, "host does not support rx control"); return; } @@ -652,6 +713,43 @@ virtio_dev_allmulticast_disable(struct rte_eth_dev *dev) PMD_INIT_LOG(ERR, "Failed to disable allmulticast"); } +#define VLAN_TAG_LEN 4 /* 802.3ac tag (not DMA'd) */ +static int +virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) +{ + struct virtio_hw *hw = dev->data->dev_private; + uint32_t ether_hdr_len = ETHER_HDR_LEN + VLAN_TAG_LEN + + hw->vtnet_hdr_size; + uint32_t frame_size = mtu + ether_hdr_len; + + if (mtu < ETHER_MIN_MTU || frame_size > VIRTIO_MAX_RX_PKTLEN) { + PMD_INIT_LOG(ERR, "MTU should be between %d and %d", + ETHER_MIN_MTU, VIRTIO_MAX_RX_PKTLEN - ether_hdr_len); + return -EINVAL; + } + return 0; +} + +static int +virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) +{ + struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id]; + struct virtqueue *vq = rxvq->vq; + + virtqueue_enable_intr(vq); + return 0; +} + +static int +virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) +{ + struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id]; + struct virtqueue *vq = rxvq->vq; + + virtqueue_disable_intr(vq); + return 0; +} + /* * dev_ops for virtio, bare necessities for basic operation */ @@ -664,7 +762,7 @@ static const struct eth_dev_ops virtio_eth_dev_ops = { .promiscuous_disable = virtio_dev_promiscuous_disable, .allmulticast_enable = virtio_dev_allmulticast_enable, .allmulticast_disable = virtio_dev_allmulticast_disable, - + .mtu_set = virtio_mtu_set, .dev_infos_get = virtio_dev_info_get, .stats_get = virtio_dev_stats_get, .xstats_get = virtio_dev_xstats_get, @@ -673,9 +771,12 @@ static const struct eth_dev_ops virtio_eth_dev_ops = { .xstats_reset = virtio_dev_stats_reset, .link_update = virtio_dev_link_update, .rx_queue_setup = virtio_dev_rx_queue_setup, - .rx_queue_release = virtio_dev_rx_queue_release, + .rx_queue_intr_enable = virtio_dev_rx_queue_intr_enable, + .rx_queue_intr_disable = virtio_dev_rx_queue_intr_disable, + .rx_queue_release = virtio_dev_queue_release, + .rx_descriptor_done = virtio_dev_rx_queue_done, .tx_queue_setup = virtio_dev_tx_queue_setup, - .tx_queue_release = virtio_dev_tx_queue_release, + .tx_queue_release = virtio_dev_queue_release, /* collect stats per queue */ .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set, .vlan_filter_set = virtio_vlan_filter_set, @@ -830,6 +931,7 @@ virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) { xstats[count].value = *(uint64_t *)(((char *)rxvq) + rte_virtio_rxq_stat_strings[t].offset); + xstats[count].id = count; count++; } } @@ -845,6 +947,7 @@ virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) { xstats[count].value = *(uint64_t *)(((char *)txvq) + rte_virtio_txq_stat_strings[t].offset); + xstats[count].id = count; count++; } } @@ -1042,17 +1145,16 @@ virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) } static int -virtio_negotiate_features(struct virtio_hw *hw) +virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features) { uint64_t host_features; /* Prepare guest_features: feature that driver wants to support */ - hw->guest_features = VIRTIO_PMD_GUEST_FEATURES; PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64, - hw->guest_features); + req_features); /* Read device(host) feature bits */ - host_features = hw->vtpci_ops->get_features(hw); + host_features = VTPCI_OPS(hw)->get_features(hw); PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64, host_features); @@ -1060,6 +1162,7 @@ virtio_negotiate_features(struct virtio_hw *hw) * Negotiate features: Subset of device feature bits are written back * guest feature bits. */ + hw->guest_features = req_features; hw->guest_features = vtpci_negotiate_features(hw, host_features); PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64, hw->guest_features); @@ -1078,6 +1181,8 @@ virtio_negotiate_features(struct virtio_hw *hw) } } + hw->req_guest_features = req_features; + return 0; } @@ -1086,7 +1191,7 @@ virtio_negotiate_features(struct virtio_hw *hw) * if link state changed. */ static void -virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle, +virtio_interrupt_handler(struct rte_intr_handle *handle, void *param) { struct rte_eth_dev *dev = param; @@ -1097,13 +1202,13 @@ virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle, isr = vtpci_isr(hw); PMD_DRV_LOG(INFO, "interrupt status = %#x", isr); - if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) + if (rte_intr_enable(handle) < 0) PMD_DRV_LOG(ERR, "interrupt enable failed"); if (isr & VIRTIO_PCI_ISR_CONFIG) { if (virtio_dev_link_update(dev, 0) == 0) _rte_eth_dev_callback_process(dev, - RTE_ETH_EVENT_INTR_LSC); + RTE_ETH_EVENT_INTR_LSC, NULL); } } @@ -1118,47 +1223,105 @@ rx_func_get(struct rte_eth_dev *eth_dev) eth_dev->rx_pkt_burst = &virtio_recv_pkts; } -/* - * This function is based on probe() function in virtio_pci.c - * It returns 0 on success. +/* Only support 1:1 queue/interrupt mapping so far. + * TODO: support n:1 queue/interrupt mapping when there are limited number of + * interrupt vectors (<N+1). */ -int -eth_virtio_dev_init(struct rte_eth_dev *eth_dev) +static int +virtio_queues_bind_intr(struct rte_eth_dev *dev) { - struct virtio_hw *hw = eth_dev->data->dev_private; - struct virtio_net_config *config; - struct virtio_net_config local_config; - struct rte_pci_device *pci_dev; - uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE; - int ret; + uint32_t i; + struct virtio_hw *hw = dev->data->dev_private; - RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf)); + PMD_INIT_LOG(INFO, "queue/interrupt binding"); + for (i = 0; i < dev->data->nb_rx_queues; ++i) { + dev->intr_handle->intr_vec[i] = i + 1; + if (VTPCI_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) == + VIRTIO_MSI_NO_VECTOR) { + PMD_DRV_LOG(ERR, "failed to set queue vector"); + return -EBUSY; + } + } - eth_dev->dev_ops = &virtio_eth_dev_ops; - eth_dev->tx_pkt_burst = &virtio_xmit_pkts; + return 0; +} - if (rte_eal_process_type() == RTE_PROC_SECONDARY) { - rx_func_get(eth_dev); - return 0; +static void +virtio_queues_unbind_intr(struct rte_eth_dev *dev) +{ + uint32_t i; + struct virtio_hw *hw = dev->data->dev_private; + + PMD_INIT_LOG(INFO, "queue/interrupt unbinding"); + for (i = 0; i < dev->data->nb_rx_queues; ++i) + VTPCI_OPS(hw)->set_queue_irq(hw, + hw->vqs[i * VTNET_CQ], + VIRTIO_MSI_NO_VECTOR); +} + +static int +virtio_configure_intr(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + + if (!rte_intr_cap_multiple(dev->intr_handle)) { + PMD_INIT_LOG(ERR, "Multiple intr vector not supported"); + return -ENOTSUP; } - /* Allocate memory for storing MAC addresses */ - eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0); - if (eth_dev->data->mac_addrs == NULL) { - PMD_INIT_LOG(ERR, - "Failed to allocate %d bytes needed to store MAC addresses", - VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN); - return -ENOMEM; + if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) { + PMD_INIT_LOG(ERR, "Fail to create eventfd"); + return -1; } - pci_dev = eth_dev->pci_dev; + if (!dev->intr_handle->intr_vec) { + dev->intr_handle->intr_vec = + rte_zmalloc("intr_vec", + hw->max_queue_pairs * sizeof(int), 0); + if (!dev->intr_handle->intr_vec) { + PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors", + hw->max_queue_pairs); + return -ENOMEM; + } + } - if (pci_dev) { - ret = vtpci_init(pci_dev, hw, &dev_flags); - if (ret) - return ret; + /* Re-register callback to update max_intr */ + rte_intr_callback_unregister(dev->intr_handle, + virtio_interrupt_handler, + dev); + rte_intr_callback_register(dev->intr_handle, + virtio_interrupt_handler, + dev); + + /* DO NOT try to remove this! This function will enable msix, or QEMU + * will encounter SIGSEGV when DRIVER_OK is sent. + * And for legacy devices, this should be done before queue/vec binding + * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR + * (22) will be ignored. + */ + if (rte_intr_enable(dev->intr_handle) < 0) { + PMD_DRV_LOG(ERR, "interrupt enable failed"); + return -1; } + if (virtio_queues_bind_intr(dev) < 0) { + PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt"); + return -1; + } + + return 0; +} + +/* reset device and renegotiate features if needed */ +static int +virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features) +{ + struct virtio_hw *hw = eth_dev->data->dev_private; + struct virtio_net_config *config; + struct virtio_net_config local_config; + struct rte_pci_device *pci_dev = NULL; + int ret; + /* Reset the device although not necessary at startup */ vtpci_reset(hw); @@ -1167,15 +1330,19 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) /* Tell the host we've known how to drive the device. */ vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER); - if (virtio_negotiate_features(hw) < 0) + if (virtio_negotiate_features(hw, req_features) < 0) return -1; + if (eth_dev->device) { + pci_dev = RTE_DEV_TO_PCI(eth_dev->device); + rte_eth_copy_pci_info(eth_dev, pci_dev); + } + /* If host does not support status then disable LSC */ if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) - dev_flags &= ~RTE_ETH_DEV_INTR_LSC; - - rte_eth_copy_pci_info(eth_dev, pci_dev); - eth_dev->data->dev_flags = dev_flags; + eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; + else + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; rx_func_get(eth_dev); @@ -1223,16 +1390,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) config->max_virtqueue_pairs = 1; } - hw->max_rx_queues = - (VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ? - VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs; - hw->max_tx_queues = - (VIRTIO_MAX_TX_QUEUES < config->max_virtqueue_pairs) ? - VIRTIO_MAX_TX_QUEUES : config->max_virtqueue_pairs; - - virtio_dev_cq_queue_setup(eth_dev, - config->max_virtqueue_pairs * 2, - SOCKET_ID_ANY); + hw->max_queue_pairs = config->max_virtqueue_pairs; PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d", config->max_virtqueue_pairs); @@ -1243,23 +1401,142 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) config->mac[2], config->mac[3], config->mac[4], config->mac[5]); } else { - hw->max_rx_queues = 1; - hw->max_tx_queues = 1; + PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=1"); + hw->max_queue_pairs = 1; } - PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d hw->max_tx_queues=%d", - hw->max_rx_queues, hw->max_tx_queues); + ret = virtio_alloc_queues(eth_dev); + if (ret < 0) + return ret; + + if (eth_dev->data->dev_conf.intr_conf.rxq) { + if (virtio_configure_intr(eth_dev) < 0) { + PMD_INIT_LOG(ERR, "failed to configure interrupt"); + return -1; + } + } + + vtpci_reinit_complete(hw); + if (pci_dev) PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id); + return 0; +} + +/* + * Remap the PCI device again (IO port map for legacy device and + * memory map for modern device), so that the secondary process + * could have the PCI initiated correctly. + */ +static int +virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw) +{ + if (hw->modern) { + /* + * We don't have to re-parse the PCI config space, since + * rte_eal_pci_map_device() makes sure the mapped address + * in secondary process would equal to the one mapped in + * the primary process: error will be returned if that + * requirement is not met. + * + * That said, we could simply reuse all cap pointers + * (such as dev_cfg, common_cfg, etc.) parsed from the + * primary process, which is stored in shared memory. + */ + if (rte_eal_pci_map_device(pci_dev)) { + PMD_INIT_LOG(DEBUG, "failed to map pci device!"); + return -1; + } + } else { + if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0) + return -1; + } + + return 0; +} + +static void +virtio_set_vtpci_ops(struct virtio_hw *hw) +{ +#ifdef RTE_VIRTIO_USER + if (hw->virtio_user_dev) + VTPCI_OPS(hw) = &virtio_user_ops; + else +#endif + if (hw->modern) + VTPCI_OPS(hw) = &modern_ops; + else + VTPCI_OPS(hw) = &legacy_ops; +} + +/* + * This function is based on probe() function in virtio_pci.c + * It returns 0 on success. + */ +int +eth_virtio_dev_init(struct rte_eth_dev *eth_dev) +{ + struct virtio_hw *hw = eth_dev->data->dev_private; + uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE; + int ret; + + RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf)); + + eth_dev->dev_ops = &virtio_eth_dev_ops; + eth_dev->tx_pkt_burst = &virtio_xmit_pkts; + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + if (!hw->virtio_user_dev) { + ret = virtio_remap_pci(RTE_DEV_TO_PCI(eth_dev->device), + hw); + if (ret) + return ret; + } + + virtio_set_vtpci_ops(hw); + if (hw->use_simple_rxtx) { + eth_dev->tx_pkt_burst = virtio_xmit_pkts_simple; + eth_dev->rx_pkt_burst = virtio_recv_pkts_vec; + } else { + rx_func_get(eth_dev); + } + return 0; + } + + /* Allocate memory for storing MAC addresses */ + eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0); + if (eth_dev->data->mac_addrs == NULL) { + PMD_INIT_LOG(ERR, + "Failed to allocate %d bytes needed to store MAC addresses", + VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN); + return -ENOMEM; + } + + hw->port_id = eth_dev->data->port_id; + /* For virtio_user case the hw->virtio_user_dev is populated by + * virtio_user_eth_dev_alloc() before eth_virtio_dev_init() is called. + */ + if (!hw->virtio_user_dev) { + ret = vtpci_init(RTE_DEV_TO_PCI(eth_dev->device), hw, + &dev_flags); + if (ret) + return ret; + } + + eth_dev->data->dev_flags = dev_flags; + + /* reset device and negotiate default features */ + ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES); + if (ret < 0) + return ret; + /* Setup interrupt callback */ if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) - rte_intr_callback_register(&pci_dev->intr_handle, - virtio_interrupt_handler, eth_dev); - - virtio_dev_cq_start(eth_dev); + rte_intr_callback_register(eth_dev->intr_handle, + virtio_interrupt_handler, eth_dev); return 0; } @@ -1267,35 +1544,28 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) { - struct rte_pci_device *pci_dev; - struct virtio_hw *hw = eth_dev->data->dev_private; - PMD_INIT_FUNC_TRACE(); if (rte_eal_process_type() == RTE_PROC_SECONDARY) return -EPERM; - /* Close it anyway since there's no way to know if closed */ + virtio_dev_stop(eth_dev); virtio_dev_close(eth_dev); - pci_dev = eth_dev->pci_dev; - eth_dev->dev_ops = NULL; eth_dev->tx_pkt_burst = NULL; eth_dev->rx_pkt_burst = NULL; - if (hw->cvq) - virtio_dev_queue_release(hw->cvq->vq); - rte_free(eth_dev->data->mac_addrs); eth_dev->data->mac_addrs = NULL; /* reset interrupt callback */ if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) - rte_intr_callback_unregister(&pci_dev->intr_handle, + rte_intr_callback_unregister(eth_dev->intr_handle, virtio_interrupt_handler, eth_dev); - rte_eal_pci_unmap_device(pci_dev); + if (eth_dev->device) + rte_eal_pci_unmap_device(RTE_DEV_TO_PCI(eth_dev->device)); PMD_INIT_LOG(DEBUG, "dev_uninit completed"); @@ -1304,32 +1574,29 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) static struct eth_driver rte_virtio_pmd = { .pci_drv = { - .name = "rte_virtio_pmd", + .driver = { + .name = "net_virtio", + }, .id_table = pci_id_virtio_map, - .drv_flags = RTE_PCI_DRV_DETACHABLE, + .drv_flags = 0, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_virtio_dev_init, .eth_dev_uninit = eth_virtio_dev_uninit, .dev_private_size = sizeof(struct virtio_hw), }; -/* - * Driver initialization routine. - * Invoked once at EAL init time. - * Register itself as the [Poll Mode] Driver of PCI virtio devices. - * Returns 0 on success. - */ -static int -rte_virtio_pmd_init(const char *name __rte_unused, - const char *param __rte_unused) +RTE_INIT(rte_virtio_pmd_init); +static void +rte_virtio_pmd_init(void) { if (rte_eal_iopl_init() != 0) { PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD"); - return -1; + return; } - rte_eth_driver_register(&rte_virtio_pmd); - return 0; + rte_eal_pci_register(&rte_virtio_pmd.pci_drv); } /* @@ -1341,14 +1608,44 @@ virtio_dev_configure(struct rte_eth_dev *dev) { const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; struct virtio_hw *hw = dev->data->dev_private; + uint64_t req_features; + int ret; PMD_INIT_LOG(DEBUG, "configure"); + req_features = VIRTIO_PMD_DEFAULT_GUEST_FEATURES; + if (rxmode->hw_ip_checksum) + req_features |= (1ULL << VIRTIO_NET_F_GUEST_CSUM); + if (rxmode->enable_lro) + req_features |= + (1ULL << VIRTIO_NET_F_GUEST_TSO4) | + (1ULL << VIRTIO_NET_F_GUEST_TSO6); + + /* if request features changed, reinit the device */ + if (req_features != hw->req_guest_features) { + ret = virtio_init_device(dev, req_features); + if (ret < 0) + return ret; + } - if (rxmode->hw_ip_checksum) { - PMD_DRV_LOG(ERR, "HW IP checksum not supported"); - return -EINVAL; + if (rxmode->hw_ip_checksum && + !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM)) { + PMD_DRV_LOG(NOTICE, + "rx ip checksum not available on this host"); + return -ENOTSUP; + } + + if (rxmode->enable_lro && + (!vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || + !vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4))) { + PMD_DRV_LOG(NOTICE, + "lro not available on this host"); + return -ENOTSUP; } + /* start control queue */ + if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) + virtio_dev_cq_start(dev); + hw->vlan_strip = rxmode->hw_vlan_strip; if (rxmode->hw_vlan_filter @@ -1359,7 +1656,9 @@ virtio_dev_configure(struct rte_eth_dev *dev) } if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) - if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) { + /* Enable vector (0) for Link State Intrerrupt */ + if (VTPCI_OPS(hw)->set_config_irq(hw, 0) == + VIRTIO_MSI_NO_VECTOR) { PMD_DRV_LOG(ERR, "failed to set config vector"); return -EBUSY; } @@ -1372,9 +1671,9 @@ static int virtio_dev_start(struct rte_eth_dev *dev) { uint16_t nb_queues, i; - struct virtio_hw *hw = dev->data->dev_private; struct virtnet_rx *rxvq; struct virtnet_tx *txvq __rte_unused; + struct virtio_hw *hw = dev->data->dev_private; /* check if lsc interrupt feature is enabled */ if (dev->data->dev_conf.intr_conf.lsc) { @@ -1382,8 +1681,17 @@ virtio_dev_start(struct rte_eth_dev *dev) PMD_DRV_LOG(ERR, "link status not supported by host"); return -ENOTSUP; } + } + + /* Enable uio/vfio intr/eventfd mapping: althrough we already did that + * in device configure, but it could be unmapped when device is + * stopped. + */ + if (dev->data->dev_conf.intr_conf.lsc || + dev->data->dev_conf.intr_conf.rxq) { + rte_intr_disable(dev->intr_handle); - if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) { + if (rte_intr_enable(dev->intr_handle) < 0) { PMD_DRV_LOG(ERR, "interrupt enable failed"); return -EIO; } @@ -1392,29 +1700,19 @@ virtio_dev_start(struct rte_eth_dev *dev) /* Initialize Link state */ virtio_dev_link_update(dev, 0); - /* On restart after stop do not touch queues */ - if (hw->started) - return 0; - - /* Do final configuration before rx/tx engine starts */ - virtio_dev_rxtx_start(dev); - vtpci_reinit_complete(hw); - - hw->started = 1; - /*Notify the backend *Otherwise the tap backend might already stop its queue due to fullness. *vhost backend will have no chance to be waked up */ - nb_queues = dev->data->nb_rx_queues; - if (nb_queues > 1) { + nb_queues = RTE_MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues); + if (hw->max_queue_pairs > 1) { if (virtio_set_multiple_queues(dev, nb_queues) != 0) return -EINVAL; } PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues); - for (i = 0; i < nb_queues; i++) { + for (i = 0; i < dev->data->nb_rx_queues; i++) { rxvq = dev->data->rx_queues[i]; virtqueue_notify(rxvq->vq); } @@ -1486,14 +1784,12 @@ static void virtio_dev_stop(struct rte_eth_dev *dev) { struct rte_eth_link link; - struct virtio_hw *hw = dev->data->dev_private; + struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf; PMD_INIT_LOG(DEBUG, "stop"); - hw->started = 0; - - if (dev->data->dev_conf.intr_conf.lsc) - rte_intr_disable(&dev->pci_dev->intr_handle); + if (intr_conf->lsc || intr_conf->rxq) + rte_intr_disable(dev->intr_handle); memset(&link, 0, sizeof(link)); virtio_dev_atomic_write_link_status(dev, &link); @@ -1536,21 +1832,43 @@ virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complet static void virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { + uint64_t tso_mask, host_features; struct virtio_hw *hw = dev->data->dev_private; - if (dev->pci_dev) - dev_info->driver_name = dev->driver->pci_drv.name; - else - dev_info->driver_name = "virtio_user PMD"; - dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues; - dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues; + dev_info->pci_dev = dev->device ? RTE_DEV_TO_PCI(dev->device) : NULL; + dev_info->max_rx_queues = + RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES); + dev_info->max_tx_queues = + RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_TX_QUEUES); dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE; dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN; dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS; dev_info->default_txconf = (struct rte_eth_txconf) { .txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS }; - /* TRex patch */ + + host_features = VTPCI_OPS(hw)->get_features(hw); + dev_info->rx_offload_capa = 0; + if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) { + dev_info->rx_offload_capa |= + DEV_RX_OFFLOAD_TCP_CKSUM | + DEV_RX_OFFLOAD_UDP_CKSUM; + } + tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) | + (1ULL << VIRTIO_NET_F_GUEST_TSO6); + if ((host_features & tso_mask) == tso_mask) + dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO; + + dev_info->tx_offload_capa = 0; + if (hw->guest_features & (1ULL << VIRTIO_NET_F_CSUM)) { + dev_info->tx_offload_capa |= + DEV_TX_OFFLOAD_UDP_CKSUM | + DEV_TX_OFFLOAD_TCP_CKSUM; + } + tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) | + (1ULL << VIRTIO_NET_F_HOST_TSO6); + if ((hw->guest_features & tso_mask) == tso_mask) + dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO; dev_info->speed_capa = ETH_LINK_SPEED_10G; } @@ -1565,10 +1883,6 @@ __rte_unused uint8_t is_rx) return 0; } -static struct rte_driver rte_virtio_driver = { - .type = PMD_PDEV, - .init = rte_virtio_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_virtio_driver, virtio_net); -DRIVER_REGISTER_PCI_TABLE(virtio_net, pci_id_virtio_map); +RTE_PMD_EXPORT_NAME(net_virtio, __COUNTER__); +RTE_PMD_REGISTER_PCI_TABLE(net_virtio, pci_id_virtio_map); +RTE_PMD_REGISTER_KMOD_DEP(net_virtio, "* igb_uio | uio_pci_generic | vfio"); diff --git a/src/dpdk/drivers/net/virtio/virtio_ethdev.h b/src/dpdk/drivers/net/virtio/virtio_ethdev.h index 2ecec6eb..777a14be 100644 --- a/src/dpdk/drivers/net/virtio/virtio_ethdev.h +++ b/src/dpdk/drivers/net/virtio/virtio_ethdev.h @@ -47,14 +47,14 @@ #define PAGE_SIZE 4096 #endif -#define VIRTIO_MAX_RX_QUEUES 128 -#define VIRTIO_MAX_TX_QUEUES 128 +#define VIRTIO_MAX_RX_QUEUES 128U +#define VIRTIO_MAX_TX_QUEUES 128U #define VIRTIO_MAX_MAC_ADDRS 64 #define VIRTIO_MIN_RX_BUFSIZE 64 #define VIRTIO_MAX_RX_PKTLEN 9728 /* Features desired/implemented by this driver. */ -#define VIRTIO_PMD_GUEST_FEATURES \ +#define VIRTIO_PMD_DEFAULT_GUEST_FEATURES \ (1u << VIRTIO_NET_F_MAC | \ 1u << VIRTIO_NET_F_STATUS | \ 1u << VIRTIO_NET_F_MQ | \ @@ -62,9 +62,19 @@ 1u << VIRTIO_NET_F_CTRL_VQ | \ 1u << VIRTIO_NET_F_CTRL_RX | \ 1u << VIRTIO_NET_F_CTRL_VLAN | \ + 1u << VIRTIO_NET_F_CSUM | \ + 1u << VIRTIO_NET_F_HOST_TSO4 | \ + 1u << VIRTIO_NET_F_HOST_TSO6 | \ 1u << VIRTIO_NET_F_MRG_RXBUF | \ - 1ULL << VIRTIO_F_VERSION_1) - + 1u << VIRTIO_RING_F_INDIRECT_DESC | \ + 1ULL << VIRTIO_F_VERSION_1 | \ + 1ULL << VIRTIO_F_IOMMU_PLATFORM) + +#define VIRTIO_PMD_SUPPORTED_GUEST_FEATURES \ + (VIRTIO_PMD_DEFAULT_GUEST_FEATURES | \ + 1u << VIRTIO_NET_F_GUEST_CSUM | \ + 1u << VIRTIO_NET_F_GUEST_TSO4 | \ + 1u << VIRTIO_NET_F_GUEST_TSO6) /* * CQ function prototype */ @@ -73,31 +83,18 @@ void virtio_dev_cq_start(struct rte_eth_dev *dev); /* * RX/TX function prototypes */ -void virtio_dev_rxtx_start(struct rte_eth_dev *dev); - -int virtio_dev_queue_setup(struct rte_eth_dev *dev, - int queue_type, - uint16_t queue_idx, - uint16_t vtpci_queue_idx, - uint16_t nb_desc, - unsigned int socket_id, - void **pvq); -void virtio_dev_queue_release(struct virtqueue *vq); +int virtio_dev_rx_queue_done(void *rxq, uint16_t offset); int virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, uint16_t nb_rx_desc, unsigned int socket_id, const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool); -void virtio_dev_rx_queue_release(void *rxq); - int virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, uint16_t nb_tx_desc, unsigned int socket_id, const struct rte_eth_txconf *tx_conf); -void virtio_dev_tx_queue_release(void *txq); - uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); @@ -115,13 +112,4 @@ uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); -/* - * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us - * frames larger than 1514 bytes. We do not yet support software LRO - * via tcp_lro_rx(). - */ -#define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \ - VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN) - - #endif /* _VIRTIO_ETHDEV_H_ */ diff --git a/src/dpdk/drivers/net/virtio/virtio_pci.c b/src/dpdk/drivers/net/virtio/virtio_pci.c index f1a7ca7e..ce9a9d3f 100644 --- a/src/dpdk/drivers/net/virtio/virtio_pci.c +++ b/src/dpdk/drivers/net/virtio/virtio_pci.c @@ -37,6 +37,8 @@ #include <fcntl.h> #endif +#include <rte_io.h> + #include "virtio_pci.h" #include "virtio_logs.h" #include "virtqueue.h" @@ -92,17 +94,17 @@ legacy_read_dev_config(struct virtio_hw *hw, size_t offset, while (length > 0) { if (length >= 4) { size = 4; - rte_eal_pci_ioport_read(&hw->io, dst, size, + rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size, VIRTIO_PCI_CONFIG(hw) + offset); *(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst); } else if (length >= 2) { size = 2; - rte_eal_pci_ioport_read(&hw->io, dst, size, + rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size, VIRTIO_PCI_CONFIG(hw) + offset); *(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst); } else { size = 1; - rte_eal_pci_ioport_read(&hw->io, dst, size, + rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size, VIRTIO_PCI_CONFIG(hw) + offset); } @@ -111,7 +113,7 @@ legacy_read_dev_config(struct virtio_hw *hw, size_t offset, length -= size; } #else - rte_eal_pci_ioport_read(&hw->io, dst, length, + rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, length, VIRTIO_PCI_CONFIG(hw) + offset); #endif } @@ -131,16 +133,16 @@ legacy_write_dev_config(struct virtio_hw *hw, size_t offset, if (length >= 4) { size = 4; tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src); - rte_eal_pci_ioport_write(&hw->io, &tmp.u32, size, + rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size, VIRTIO_PCI_CONFIG(hw) + offset); } else if (length >= 2) { size = 2; tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src); - rte_eal_pci_ioport_write(&hw->io, &tmp.u16, size, + rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size, VIRTIO_PCI_CONFIG(hw) + offset); } else { size = 1; - rte_eal_pci_ioport_write(&hw->io, src, size, + rte_eal_pci_ioport_write(VTPCI_IO(hw), src, size, VIRTIO_PCI_CONFIG(hw) + offset); } @@ -149,7 +151,7 @@ legacy_write_dev_config(struct virtio_hw *hw, size_t offset, length -= size; } #else - rte_eal_pci_ioport_write(&hw->io, src, length, + rte_eal_pci_ioport_write(VTPCI_IO(hw), src, length, VIRTIO_PCI_CONFIG(hw) + offset); #endif } @@ -159,7 +161,8 @@ legacy_get_features(struct virtio_hw *hw) { uint32_t dst; - rte_eal_pci_ioport_read(&hw->io, &dst, 4, VIRTIO_PCI_HOST_FEATURES); + rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 4, + VIRTIO_PCI_HOST_FEATURES); return dst; } @@ -171,7 +174,7 @@ legacy_set_features(struct virtio_hw *hw, uint64_t features) "only 32 bit features are allowed for legacy virtio!"); return; } - rte_eal_pci_ioport_write(&hw->io, &features, 4, + rte_eal_pci_ioport_write(VTPCI_IO(hw), &features, 4, VIRTIO_PCI_GUEST_FEATURES); } @@ -180,14 +183,14 @@ legacy_get_status(struct virtio_hw *hw) { uint8_t dst; - rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_STATUS); + rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS); return dst; } static void legacy_set_status(struct virtio_hw *hw, uint8_t status) { - rte_eal_pci_ioport_write(&hw->io, &status, 1, VIRTIO_PCI_STATUS); + rte_eal_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS); } static void @@ -201,7 +204,7 @@ legacy_get_isr(struct virtio_hw *hw) { uint8_t dst; - rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_ISR); + rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR); return dst; } @@ -211,8 +214,23 @@ legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec) { uint16_t dst; - rte_eal_pci_ioport_write(&hw->io, &vec, 2, VIRTIO_MSI_CONFIG_VECTOR); - rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_MSI_CONFIG_VECTOR); + rte_eal_pci_ioport_write(VTPCI_IO(hw), &vec, 2, + VIRTIO_MSI_CONFIG_VECTOR); + rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, + VIRTIO_MSI_CONFIG_VECTOR); + return dst; +} + +static uint16_t +legacy_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec) +{ + uint16_t dst; + + rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, + VIRTIO_PCI_QUEUE_SEL); + rte_eal_pci_ioport_write(VTPCI_IO(hw), &vec, 2, + VIRTIO_MSI_QUEUE_VECTOR); + rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_QUEUE_VECTOR); return dst; } @@ -221,8 +239,9 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) { uint16_t dst; - rte_eal_pci_ioport_write(&hw->io, &queue_id, 2, VIRTIO_PCI_QUEUE_SEL); - rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_PCI_QUEUE_NUM); + rte_eal_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2, + VIRTIO_PCI_QUEUE_SEL); + rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM); return dst; } @@ -234,10 +253,10 @@ legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) if (!check_vq_phys_addr_ok(vq)) return -1; - rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2, + rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, VIRTIO_PCI_QUEUE_SEL); src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; - rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN); + rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN); return 0; } @@ -247,15 +266,15 @@ legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint32_t src = 0; - rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2, + rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, VIRTIO_PCI_QUEUE_SEL); - rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN); + rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN); } static void legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) { - rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2, + rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, VIRTIO_PCI_QUEUE_NOTIFY); } @@ -289,7 +308,7 @@ static int legacy_virtio_resource_init(struct rte_pci_device *pci_dev, struct virtio_hw *hw, uint32_t *dev_flags) { - if (rte_eal_pci_ioport_map(pci_dev, 0, &hw->io) < 0) + if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0) return -1; if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UNKNOWN) @@ -300,7 +319,7 @@ legacy_virtio_resource_init(struct rte_pci_device *pci_dev, return 0; } -static const struct virtio_pci_ops legacy_ops = { +const struct virtio_pci_ops legacy_ops = { .read_dev_cfg = legacy_read_dev_config, .write_dev_cfg = legacy_write_dev_config, .reset = legacy_reset, @@ -310,54 +329,18 @@ static const struct virtio_pci_ops legacy_ops = { .set_features = legacy_set_features, .get_isr = legacy_get_isr, .set_config_irq = legacy_set_config_irq, + .set_queue_irq = legacy_set_queue_irq, .get_queue_num = legacy_get_queue_num, .setup_queue = legacy_setup_queue, .del_queue = legacy_del_queue, .notify_queue = legacy_notify_queue, }; - -static inline uint8_t -io_read8(uint8_t *addr) -{ - return *(volatile uint8_t *)addr; -} - -static inline void -io_write8(uint8_t val, uint8_t *addr) -{ - *(volatile uint8_t *)addr = val; -} - -static inline uint16_t -io_read16(uint16_t *addr) -{ - return *(volatile uint16_t *)addr; -} - -static inline void -io_write16(uint16_t val, uint16_t *addr) -{ - *(volatile uint16_t *)addr = val; -} - -static inline uint32_t -io_read32(uint32_t *addr) -{ - return *(volatile uint32_t *)addr; -} - -static inline void -io_write32(uint32_t val, uint32_t *addr) -{ - *(volatile uint32_t *)addr = val; -} - static inline void io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi) { - io_write32(val & ((1ULL << 32) - 1), lo); - io_write32(val >> 32, hi); + rte_write32(val & ((1ULL << 32) - 1), lo); + rte_write32(val >> 32, hi); } static void @@ -369,13 +352,13 @@ modern_read_dev_config(struct virtio_hw *hw, size_t offset, uint8_t old_gen, new_gen; do { - old_gen = io_read8(&hw->common_cfg->config_generation); + old_gen = rte_read8(&hw->common_cfg->config_generation); p = dst; for (i = 0; i < length; i++) - *p++ = io_read8((uint8_t *)hw->dev_cfg + offset + i); + *p++ = rte_read8((uint8_t *)hw->dev_cfg + offset + i); - new_gen = io_read8(&hw->common_cfg->config_generation); + new_gen = rte_read8(&hw->common_cfg->config_generation); } while (old_gen != new_gen); } @@ -387,7 +370,7 @@ modern_write_dev_config(struct virtio_hw *hw, size_t offset, const uint8_t *p = src; for (i = 0; i < length; i++) - io_write8(*p++, (uint8_t *)hw->dev_cfg + offset + i); + rte_write8((*p++), (((uint8_t *)hw->dev_cfg) + offset + i)); } static uint64_t @@ -395,11 +378,11 @@ modern_get_features(struct virtio_hw *hw) { uint32_t features_lo, features_hi; - io_write32(0, &hw->common_cfg->device_feature_select); - features_lo = io_read32(&hw->common_cfg->device_feature); + rte_write32(0, &hw->common_cfg->device_feature_select); + features_lo = rte_read32(&hw->common_cfg->device_feature); - io_write32(1, &hw->common_cfg->device_feature_select); - features_hi = io_read32(&hw->common_cfg->device_feature); + rte_write32(1, &hw->common_cfg->device_feature_select); + features_hi = rte_read32(&hw->common_cfg->device_feature); return ((uint64_t)features_hi << 32) | features_lo; } @@ -407,25 +390,25 @@ modern_get_features(struct virtio_hw *hw) static void modern_set_features(struct virtio_hw *hw, uint64_t features) { - io_write32(0, &hw->common_cfg->guest_feature_select); - io_write32(features & ((1ULL << 32) - 1), - &hw->common_cfg->guest_feature); + rte_write32(0, &hw->common_cfg->guest_feature_select); + rte_write32(features & ((1ULL << 32) - 1), + &hw->common_cfg->guest_feature); - io_write32(1, &hw->common_cfg->guest_feature_select); - io_write32(features >> 32, - &hw->common_cfg->guest_feature); + rte_write32(1, &hw->common_cfg->guest_feature_select); + rte_write32(features >> 32, + &hw->common_cfg->guest_feature); } static uint8_t modern_get_status(struct virtio_hw *hw) { - return io_read8(&hw->common_cfg->device_status); + return rte_read8(&hw->common_cfg->device_status); } static void modern_set_status(struct virtio_hw *hw, uint8_t status) { - io_write8(status, &hw->common_cfg->device_status); + rte_write8(status, &hw->common_cfg->device_status); } static void @@ -438,21 +421,29 @@ modern_reset(struct virtio_hw *hw) static uint8_t modern_get_isr(struct virtio_hw *hw) { - return io_read8(hw->isr); + return rte_read8(hw->isr); } static uint16_t modern_set_config_irq(struct virtio_hw *hw, uint16_t vec) { - io_write16(vec, &hw->common_cfg->msix_config); - return io_read16(&hw->common_cfg->msix_config); + rte_write16(vec, &hw->common_cfg->msix_config); + return rte_read16(&hw->common_cfg->msix_config); +} + +static uint16_t +modern_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec) +{ + rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); + rte_write16(vec, &hw->common_cfg->queue_msix_vector); + return rte_read16(&hw->common_cfg->queue_msix_vector); } static uint16_t modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) { - io_write16(queue_id, &hw->common_cfg->queue_select); - return io_read16(&hw->common_cfg->queue_size); + rte_write16(queue_id, &hw->common_cfg->queue_select); + return rte_read16(&hw->common_cfg->queue_size); } static int @@ -470,7 +461,7 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) ring[vq->vq_nentries]), VIRTIO_PCI_VRING_ALIGN); - io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); + rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo, &hw->common_cfg->queue_desc_hi); @@ -479,11 +470,11 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo, &hw->common_cfg->queue_used_hi); - notify_off = io_read16(&hw->common_cfg->queue_notify_off); + notify_off = rte_read16(&hw->common_cfg->queue_notify_off); vq->notify_addr = (void *)((uint8_t *)hw->notify_base + notify_off * hw->notify_off_multiplier); - io_write16(1, &hw->common_cfg->queue_enable); + rte_write16(1, &hw->common_cfg->queue_enable); PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index); PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr); @@ -498,7 +489,7 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) static void modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq) { - io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); + rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); io_write64_twopart(0, &hw->common_cfg->queue_desc_lo, &hw->common_cfg->queue_desc_hi); @@ -507,16 +498,16 @@ modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq) io_write64_twopart(0, &hw->common_cfg->queue_used_lo, &hw->common_cfg->queue_used_hi); - io_write16(0, &hw->common_cfg->queue_enable); + rte_write16(0, &hw->common_cfg->queue_enable); } static void modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq) { - io_write16(1, vq->notify_addr); + rte_write16(1, vq->notify_addr); } -static const struct virtio_pci_ops modern_ops = { +const struct virtio_pci_ops modern_ops = { .read_dev_cfg = modern_read_dev_config, .write_dev_cfg = modern_write_dev_config, .reset = modern_reset, @@ -526,6 +517,7 @@ static const struct virtio_pci_ops modern_ops = { .set_features = modern_set_features, .get_isr = modern_get_isr, .set_config_irq = modern_set_config_irq, + .set_queue_irq = modern_set_queue_irq, .get_queue_num = modern_get_queue_num, .setup_queue = modern_setup_queue, .del_queue = modern_del_queue, @@ -537,14 +529,14 @@ void vtpci_read_dev_config(struct virtio_hw *hw, size_t offset, void *dst, int length) { - hw->vtpci_ops->read_dev_cfg(hw, offset, dst, length); + VTPCI_OPS(hw)->read_dev_cfg(hw, offset, dst, length); } void vtpci_write_dev_config(struct virtio_hw *hw, size_t offset, const void *src, int length) { - hw->vtpci_ops->write_dev_cfg(hw, offset, src, length); + VTPCI_OPS(hw)->write_dev_cfg(hw, offset, src, length); } uint64_t @@ -557,7 +549,7 @@ vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features) * host all support. */ features = host_features & hw->guest_features; - hw->vtpci_ops->set_features(hw, features); + VTPCI_OPS(hw)->set_features(hw, features); return features; } @@ -565,9 +557,9 @@ vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features) void vtpci_reset(struct virtio_hw *hw) { - hw->vtpci_ops->set_status(hw, VIRTIO_CONFIG_STATUS_RESET); + VTPCI_OPS(hw)->set_status(hw, VIRTIO_CONFIG_STATUS_RESET); /* flush status write */ - hw->vtpci_ops->get_status(hw); + VTPCI_OPS(hw)->get_status(hw); } void @@ -580,29 +572,21 @@ void vtpci_set_status(struct virtio_hw *hw, uint8_t status) { if (status != VIRTIO_CONFIG_STATUS_RESET) - status |= hw->vtpci_ops->get_status(hw); + status |= VTPCI_OPS(hw)->get_status(hw); - hw->vtpci_ops->set_status(hw, status); + VTPCI_OPS(hw)->set_status(hw, status); } uint8_t vtpci_get_status(struct virtio_hw *hw) { - return hw->vtpci_ops->get_status(hw); + return VTPCI_OPS(hw)->get_status(hw); } uint8_t vtpci_isr(struct virtio_hw *hw) { - return hw->vtpci_ops->get_isr(hw); -} - - -/* Enable one vector (0) for Link State Intrerrupt */ -uint16_t -vtpci_irq_config(struct virtio_hw *hw, uint16_t vec) -{ - return hw->vtpci_ops->set_config_irq(hw, vec); + return VTPCI_OPS(hw)->get_isr(hw); } static void * @@ -727,8 +711,6 @@ int vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw, uint32_t *dev_flags) { - hw->dev = dev; - /* * Try if we can succeed reading virtio pci caps, which exists * only on modern pci device. If failed, we fallback to legacy @@ -736,8 +718,8 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw, */ if (virtio_read_caps(dev, hw) == 0) { PMD_INIT_LOG(INFO, "modern virtio pci detected."); - hw->vtpci_ops = &modern_ops; - hw->modern = 1; + virtio_hw_internal[hw->port_id].vtpci_ops = &modern_ops; + hw->modern = 1; *dev_flags |= RTE_ETH_DEV_INTR_LSC; return 0; } @@ -745,8 +727,9 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw, PMD_INIT_LOG(INFO, "trying with legacy virtio pci."); if (legacy_virtio_resource_init(dev, hw, dev_flags) < 0) { if (dev->kdrv == RTE_KDRV_UNKNOWN && - (!dev->devargs || - dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)) { + (!dev->device.devargs || + dev->device.devargs->type != + RTE_DEVTYPE_WHITELISTED_PCI)) { PMD_INIT_LOG(INFO, "skip kernel managed virtio device."); return 1; @@ -754,7 +737,7 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw, return -1; } - hw->vtpci_ops = &legacy_ops; + virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops; hw->use_msix = legacy_virtio_has_msix(&dev->addr); hw->modern = 0; diff --git a/src/dpdk/drivers/net/virtio/virtio_pci.h b/src/dpdk/drivers/net/virtio/virtio_pci.h index dd7693fe..59e45c4d 100644 --- a/src/dpdk/drivers/net/virtio/virtio_pci.h +++ b/src/dpdk/drivers/net/virtio/virtio_pci.h @@ -44,8 +44,8 @@ struct virtnet_ctl; /* VirtIO PCI vendor/device ID. */ #define VIRTIO_PCI_VENDORID 0x1AF4 -#define VIRTIO_PCI_DEVICEID_MIN 0x1000 -#define VIRTIO_PCI_DEVICEID_MAX 0x103F +#define VIRTIO_PCI_LEGACY_DEVICEID_NET 0x1000 +#define VIRTIO_PCI_MODERN_DEVICEID_NET 0x1041 /* VirtIO ABI version, this must match exactly. */ #define VIRTIO_PCI_ABI_VERSION 0 @@ -138,6 +138,7 @@ struct virtnet_ctl; #define VIRTIO_RING_F_INDIRECT_DESC 28 #define VIRTIO_F_VERSION_1 32 +#define VIRTIO_F_IOMMU_PLATFORM 33 /* * Some VirtIO feature bits (currently bits 28 through 31) are @@ -145,7 +146,7 @@ struct virtnet_ctl; * rest are per-device feature bits. */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 32 +#define VIRTIO_TRANSPORT_F_END 34 /* The Guest publishes the used index for which it expects an interrupt * at the end of the avail ring. Host should ignore the avail->flags field. */ @@ -234,6 +235,9 @@ struct virtio_pci_ops { uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec); + uint16_t (*set_queue_irq)(struct virtio_hw *hw, struct virtqueue *vq, + uint16_t vec); + uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id); int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq); @@ -244,26 +248,43 @@ struct virtio_net_config; struct virtio_hw { struct virtnet_ctl *cvq; - struct rte_pci_ioport io; + uint64_t req_guest_features; uint64_t guest_features; - uint32_t max_tx_queues; - uint32_t max_rx_queues; + uint32_t max_queue_pairs; uint16_t vtnet_hdr_size; uint8_t vlan_strip; uint8_t use_msix; - uint8_t started; uint8_t modern; + uint8_t use_simple_rxtx; + uint8_t port_id; uint8_t mac_addr[ETHER_ADDR_LEN]; uint32_t notify_off_multiplier; uint8_t *isr; uint16_t *notify_base; - struct rte_pci_device *dev; struct virtio_pci_common_cfg *common_cfg; struct virtio_net_config *dev_cfg; - const struct virtio_pci_ops *vtpci_ops; void *virtio_user_dev; + + struct virtqueue **vqs; +}; + + +/* + * While virtio_hw is stored in shared memory, this structure stores + * some infos that may vary in the multiple process model locally. + * For example, the vtpci_ops pointer. + */ +struct virtio_hw_internal { + const struct virtio_pci_ops *vtpci_ops; + struct rte_pci_ioport io; }; +#define VTPCI_OPS(hw) (virtio_hw_internal[(hw)->port_id].vtpci_ops) +#define VTPCI_IO(hw) (&virtio_hw_internal[(hw)->port_id].io) + +extern struct virtio_hw_internal virtio_hw_internal[RTE_MAX_ETHPORTS]; + + /* * This structure is just a reference to read * net device specific config space; it just a chodu structure @@ -312,6 +333,8 @@ void vtpci_read_dev_config(struct virtio_hw *, size_t, void *, int); uint8_t vtpci_isr(struct virtio_hw *); -uint16_t vtpci_irq_config(struct virtio_hw *, uint16_t); +extern const struct virtio_pci_ops legacy_ops; +extern const struct virtio_pci_ops modern_ops; +extern const struct virtio_pci_ops virtio_user_ops; #endif /* _VIRTIO_PCI_H_ */ diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx.c b/src/dpdk/drivers/net/virtio/virtio_rxtx.c index 724517e2..cab6e8fc 100644 --- a/src/dpdk/drivers/net/virtio/virtio_rxtx.c +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx.c @@ -50,6 +50,11 @@ #include <rte_string_fns.h> #include <rte_errno.h> #include <rte_byteorder.h> +#include <rte_cpuflags.h> +#include <rte_net.h> +#include <rte_ip.h> +#include <rte_udp.h> +#include <rte_tcp.h> #include "virtio_logs.h" #include "virtio_ethdev.h" @@ -67,9 +72,14 @@ #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \ ETH_TXQ_FLAGS_NOOFFLOADS) -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 -static int use_simple_rxtx; -#endif +int +virtio_dev_rx_queue_done(void *rxq, uint16_t offset) +{ + struct virtnet_rx *rxvq = rxq; + struct virtqueue *vq = rxvq->vq; + + return VIRTQUEUE_NUSED(vq) >= offset; +} static void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) @@ -123,7 +133,7 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; if (unlikely(cookie == NULL)) { - PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n", + PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", vq->vq_used_cons_idx); break; } @@ -208,18 +218,76 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) return 0; } +/* When doing TSO, the IP length is not included in the pseudo header + * checksum of the packet given to the PMD, but for virtio it is + * expected. + */ +static void +virtio_tso_fix_cksum(struct rte_mbuf *m) +{ + /* common case: header is not fragmented */ + if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len + + m->l4_len)) { + struct ipv4_hdr *iph; + struct ipv6_hdr *ip6h; + struct tcp_hdr *th; + uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; + uint32_t tmp; + + iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); + th = RTE_PTR_ADD(iph, m->l3_len); + if ((iph->version_ihl >> 4) == 4) { + iph->hdr_checksum = 0; + iph->hdr_checksum = rte_ipv4_cksum(iph); + ip_len = iph->total_length; + ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - + m->l3_len); + } else { + ip6h = (struct ipv6_hdr *)iph; + ip_paylen = ip6h->payload_len; + } + + /* calculate the new phdr checksum not including ip_paylen */ + prev_cksum = th->cksum; + tmp = prev_cksum; + tmp += ip_paylen; + tmp = (tmp & 0xffff) + (tmp >> 16); + new_cksum = tmp; + + /* replace it in the packet */ + th->cksum = new_cksum; + } +} + +static inline int +tx_offload_enabled(struct virtio_hw *hw) +{ + return vtpci_with_feature(hw, VIRTIO_NET_F_CSUM) || + vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO4) || + vtpci_with_feature(hw, VIRTIO_NET_F_HOST_TSO6); +} + +/* avoid write operation when necessary, to lessen cache issues */ +#define ASSIGN_UNLESS_EQUAL(var, val) do { \ + if ((var) != (val)) \ + (var) = (val); \ +} while (0) + static inline void virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, uint16_t needed, int use_indirect, int can_push) { + struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; struct vq_desc_extra *dxp; struct virtqueue *vq = txvq->vq; struct vring_desc *start_dp; uint16_t seg_num = cookie->nb_segs; uint16_t head_idx, idx; uint16_t head_size = vq->hw->vtnet_hdr_size; - unsigned long offs; + struct virtio_net_hdr *hdr; + int offload; + offload = tx_offload_enabled(vq->hw); head_idx = vq->vq_desc_head_idx; idx = head_idx; dxp = &vq->vq_descx[idx]; @@ -229,10 +297,18 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, start_dp = vq->vq_ring.desc; if (can_push) { - /* put on zero'd transmit header (no offloads) */ - void *hdr = rte_pktmbuf_prepend(cookie, head_size); - - memset(hdr, 0, head_size); + /* prepend cannot fail, checked by caller */ + hdr = (struct virtio_net_hdr *) + rte_pktmbuf_prepend(cookie, head_size); + /* if offload disabled, it is not zeroed below, do it now */ + if (offload == 0) { + ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); + ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); + ASSIGN_UNLESS_EQUAL(hdr->flags, 0); + ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); + ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); + ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); + } } else if (use_indirect) { /* setup tx ring slot to point to indirect * descriptor list stored in reserved region. @@ -240,14 +316,11 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, * the first slot in indirect ring is already preset * to point to the header in reserved region */ - struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; - - offs = idx * sizeof(struct virtio_tx_region) - + offsetof(struct virtio_tx_region, tx_indir); - - start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; + start_dp[idx].addr = txvq->virtio_net_hdr_mem + + RTE_PTR_DIFF(&txr[idx].tx_indir, txr); start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); start_dp[idx].flags = VRING_DESC_F_INDIRECT; + hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; /* loop below will fill in rest of the indirect elements */ start_dp = txr[idx].tx_indir; @@ -256,15 +329,59 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, /* setup first tx ring slot to point to header * stored in reserved region. */ - offs = idx * sizeof(struct virtio_tx_region) - + offsetof(struct virtio_tx_region, tx_hdr); - - start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; + start_dp[idx].addr = txvq->virtio_net_hdr_mem + + RTE_PTR_DIFF(&txr[idx].tx_hdr, txr); start_dp[idx].len = vq->hw->vtnet_hdr_size; start_dp[idx].flags = VRING_DESC_F_NEXT; + hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; + idx = start_dp[idx].next; } + /* Checksum Offload / TSO */ + if (offload) { + if (cookie->ol_flags & PKT_TX_TCP_SEG) + cookie->ol_flags |= PKT_TX_TCP_CKSUM; + + switch (cookie->ol_flags & PKT_TX_L4_MASK) { + case PKT_TX_UDP_CKSUM: + hdr->csum_start = cookie->l2_len + cookie->l3_len; + hdr->csum_offset = offsetof(struct udp_hdr, + dgram_cksum); + hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + break; + + case PKT_TX_TCP_CKSUM: + hdr->csum_start = cookie->l2_len + cookie->l3_len; + hdr->csum_offset = offsetof(struct tcp_hdr, cksum); + hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + break; + + default: + ASSIGN_UNLESS_EQUAL(hdr->csum_start, 0); + ASSIGN_UNLESS_EQUAL(hdr->csum_offset, 0); + ASSIGN_UNLESS_EQUAL(hdr->flags, 0); + break; + } + + /* TCP Segmentation Offload */ + if (cookie->ol_flags & PKT_TX_TCP_SEG) { + virtio_tso_fix_cksum(cookie); + hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? + VIRTIO_NET_HDR_GSO_TCPV6 : + VIRTIO_NET_HDR_GSO_TCPV4; + hdr->gso_size = cookie->tso_segsz; + hdr->hdr_len = + cookie->l2_len + + cookie->l3_len + + cookie->l4_len; + } else { + ASSIGN_UNLESS_EQUAL(hdr->gso_type, 0); + ASSIGN_UNLESS_EQUAL(hdr->gso_size, 0); + ASSIGN_UNLESS_EQUAL(hdr->hdr_len, 0); + } + } + do { start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); start_dp[idx].len = cookie->data_len; @@ -282,207 +399,120 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, vq_update_avail_ring(vq, head_idx); } -static void -virtio_dev_vring_start(struct virtqueue *vq) -{ - int size = vq->vq_nentries; - struct vring *vr = &vq->vq_ring; - uint8_t *ring_mem = vq->vq_ring_virt_mem; - - PMD_INIT_FUNC_TRACE(); - - /* - * Reinitialise since virtio port might have been stopped and restarted - */ - memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size); - vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN); - vq->vq_used_cons_idx = 0; - vq->vq_desc_head_idx = 0; - vq->vq_avail_idx = 0; - vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1); - vq->vq_free_cnt = vq->vq_nentries; - memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); - - vring_desc_init(vr->desc, size); - - /* - * Disable device(host) interrupting guest - */ - virtqueue_disable_intr(vq); -} - void virtio_dev_cq_start(struct rte_eth_dev *dev) { struct virtio_hw *hw = dev->data->dev_private; if (hw->cvq && hw->cvq->vq) { - virtio_dev_vring_start(hw->cvq->vq); VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq); } } -void -virtio_dev_rxtx_start(struct rte_eth_dev *dev) +int +virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t nb_desc, + unsigned int socket_id __rte_unused, + __rte_unused const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) { - /* - * Start receive and transmit vrings - * - Setup vring structure for all queues - * - Initialize descriptor for the rx vring - * - Allocate blank mbufs for the each rx descriptor - * - */ - uint16_t i; + uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; + struct virtio_hw *hw = dev->data->dev_private; + struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; + struct virtnet_rx *rxvq; + int error, nbufs; + struct rte_mbuf *m; uint16_t desc_idx; PMD_INIT_FUNC_TRACE(); - /* Start rx vring. */ - for (i = 0; i < dev->data->nb_rx_queues; i++) { - struct virtnet_rx *rxvq = dev->data->rx_queues[i]; - struct virtqueue *vq = rxvq->vq; - int error, nbufs; - struct rte_mbuf *m; - - virtio_dev_vring_start(vq); - if (rxvq->mpool == NULL) { - rte_exit(EXIT_FAILURE, - "Cannot allocate mbufs for rx virtqueue"); - } - - /* Allocate blank mbufs for the each rx descriptor */ - nbufs = 0; - error = ENOSPC; - -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 - if (use_simple_rxtx) { - for (desc_idx = 0; desc_idx < vq->vq_nentries; - desc_idx++) { - vq->vq_ring.avail->ring[desc_idx] = desc_idx; - vq->vq_ring.desc[desc_idx].flags = - VRING_DESC_F_WRITE; - } - } -#endif - memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); - for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; - desc_idx++) { - vq->sw_ring[vq->vq_nentries + desc_idx] = - &rxvq->fake_mbuf; - } - - while (!virtqueue_full(vq)) { - m = rte_mbuf_raw_alloc(rxvq->mpool); - if (m == NULL) - break; + if (nb_desc == 0 || nb_desc > vq->vq_nentries) + nb_desc = vq->vq_nentries; + vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); - /****************************************** - * Enqueue allocated buffers * - *******************************************/ -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 - if (use_simple_rxtx) - error = virtqueue_enqueue_recv_refill_simple(vq, m); - else -#endif - error = virtqueue_enqueue_recv_refill(vq, m); - if (error) { - rte_pktmbuf_free(m); - break; - } - nbufs++; - } + rxvq = &vq->rxq; + rxvq->queue_id = queue_idx; + rxvq->mpool = mp; + if (rxvq->mpool == NULL) { + rte_exit(EXIT_FAILURE, + "Cannot allocate mbufs for rx virtqueue"); + } + dev->data->rx_queues[queue_idx] = rxvq; - vq_update_avail_idx(vq); - PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); + /* Allocate blank mbufs for the each rx descriptor */ + nbufs = 0; + error = ENOSPC; - VIRTQUEUE_DUMP(vq); + if (hw->use_simple_rxtx) { + for (desc_idx = 0; desc_idx < vq->vq_nentries; + desc_idx++) { + vq->vq_ring.avail->ring[desc_idx] = desc_idx; + vq->vq_ring.desc[desc_idx].flags = + VRING_DESC_F_WRITE; + } } - /* Start tx vring. */ - for (i = 0; i < dev->data->nb_tx_queues; i++) { - struct virtnet_tx *txvq = dev->data->tx_queues[i]; - struct virtqueue *vq = txvq->vq; - - virtio_dev_vring_start(vq); -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 - if (use_simple_rxtx) { - uint16_t mid_idx = vq->vq_nentries >> 1; - - for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { - vq->vq_ring.avail->ring[desc_idx] = - desc_idx + mid_idx; - vq->vq_ring.desc[desc_idx + mid_idx].next = - desc_idx; - vq->vq_ring.desc[desc_idx + mid_idx].addr = - txvq->virtio_net_hdr_mem + - offsetof(struct virtio_tx_region, tx_hdr); - vq->vq_ring.desc[desc_idx + mid_idx].len = - vq->hw->vtnet_hdr_size; - vq->vq_ring.desc[desc_idx + mid_idx].flags = - VRING_DESC_F_NEXT; - vq->vq_ring.desc[desc_idx].flags = 0; - } - for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; - desc_idx++) - vq->vq_ring.avail->ring[desc_idx] = desc_idx; - } -#endif - VIRTQUEUE_DUMP(vq); + memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); + for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; + desc_idx++) { + vq->sw_ring[vq->vq_nentries + desc_idx] = + &rxvq->fake_mbuf; } -} -int -virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, - uint16_t queue_idx, - uint16_t nb_desc, - unsigned int socket_id, - __rte_unused const struct rte_eth_rxconf *rx_conf, - struct rte_mempool *mp) -{ - uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; - struct virtnet_rx *rxvq; - int ret; + while (!virtqueue_full(vq)) { + m = rte_mbuf_raw_alloc(rxvq->mpool); + if (m == NULL) + break; - PMD_INIT_FUNC_TRACE(); - ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx, - nb_desc, socket_id, (void **)&rxvq); - if (ret < 0) { - PMD_INIT_LOG(ERR, "rvq initialization failed"); - return ret; + /* Enqueue allocated buffers */ + if (hw->use_simple_rxtx) + error = virtqueue_enqueue_recv_refill_simple(vq, m); + else + error = virtqueue_enqueue_recv_refill(vq, m); + + if (error) { + rte_pktmbuf_free(m); + break; + } + nbufs++; } - /* Create mempool for rx mbuf allocation */ - rxvq->mpool = mp; + vq_update_avail_idx(vq); - dev->data->rx_queues[queue_idx] = rxvq; + PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 virtio_rxq_vec_setup(rxvq); -#endif + + VIRTQUEUE_DUMP(vq); return 0; } -void -virtio_dev_rx_queue_release(void *rxq) +static void +virtio_update_rxtx_handler(struct rte_eth_dev *dev, + const struct rte_eth_txconf *tx_conf) { - struct virtnet_rx *rxvq = rxq; - struct virtqueue *vq; - const struct rte_memzone *mz; - - if (rxvq == NULL) - return; - - /* - * rxvq is freed when vq is freed, and as mz should be freed after the - * del_queue, so we reserve the mz pointer first. - */ - vq = rxvq->vq; - mz = rxvq->mz; + uint8_t use_simple_rxtx = 0; + struct virtio_hw *hw = dev->data->dev_private; - virtio_dev_queue_release(vq); - rte_memzone_free(mz); +#if defined RTE_ARCH_X86 + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE3)) + use_simple_rxtx = 1; +#elif defined RTE_ARCH_ARM64 || defined CONFIG_RTE_ARCH_ARM + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) + use_simple_rxtx = 1; +#endif + /* Use simple rx/tx func if single segment and no offloads */ + if (use_simple_rxtx && + (tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS && + !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { + PMD_INIT_LOG(INFO, "Using simple rx/tx path"); + dev->tx_pkt_burst = virtio_xmit_pkts_simple; + dev->rx_pkt_burst = virtio_recv_pkts_vec; + hw->use_simple_rxtx = use_simple_rxtx; + } } /* @@ -496,45 +526,26 @@ int virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, - unsigned int socket_id, + unsigned int socket_id __rte_unused, const struct rte_eth_txconf *tx_conf) { uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; - -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 struct virtio_hw *hw = dev->data->dev_private; -#endif + struct virtqueue *vq = hw->vqs[vtpci_queue_idx]; struct virtnet_tx *txvq; - struct virtqueue *vq; uint16_t tx_free_thresh; - int ret; + uint16_t desc_idx; PMD_INIT_FUNC_TRACE(); - if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) - != ETH_TXQ_FLAGS_NOXSUMS) { - PMD_INIT_LOG(ERR, "TX checksum offload not supported\n"); - return -EINVAL; - } + virtio_update_rxtx_handler(dev, tx_conf); -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 - /* Use simple rx/tx func if single segment and no offloads */ - if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS && - !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { - PMD_INIT_LOG(INFO, "Using simple rx/tx path"); - dev->tx_pkt_burst = virtio_xmit_pkts_simple; - dev->rx_pkt_burst = virtio_recv_pkts_vec; - use_simple_rxtx = 1; - } -#endif + if (nb_desc == 0 || nb_desc > vq->vq_nentries) + nb_desc = vq->vq_nentries; + vq->vq_free_cnt = RTE_MIN(vq->vq_free_cnt, nb_desc); - ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx, - nb_desc, socket_id, (void **)&txvq); - if (ret < 0) { - PMD_INIT_LOG(ERR, "tvq initialization failed"); - return ret; - } - vq = txvq->vq; + txvq = &vq->txq; + txvq->queue_id = queue_idx; tx_free_thresh = tx_conf->tx_free_thresh; if (tx_free_thresh == 0) @@ -552,32 +563,32 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, vq->vq_free_thresh = tx_free_thresh; - dev->data->tx_queues[queue_idx] = txvq; - return 0; -} - -void -virtio_dev_tx_queue_release(void *txq) -{ - struct virtnet_tx *txvq = txq; - struct virtqueue *vq; - const struct rte_memzone *mz; - const struct rte_memzone *hdr_mz; - - if (txvq == NULL) - return; + if (hw->use_simple_rxtx) { + uint16_t mid_idx = vq->vq_nentries >> 1; + + for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { + vq->vq_ring.avail->ring[desc_idx] = + desc_idx + mid_idx; + vq->vq_ring.desc[desc_idx + mid_idx].next = + desc_idx; + vq->vq_ring.desc[desc_idx + mid_idx].addr = + txvq->virtio_net_hdr_mem + + offsetof(struct virtio_tx_region, tx_hdr); + vq->vq_ring.desc[desc_idx + mid_idx].len = + vq->hw->vtnet_hdr_size; + vq->vq_ring.desc[desc_idx + mid_idx].flags = + VRING_DESC_F_NEXT; + vq->vq_ring.desc[desc_idx].flags = 0; + } + for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; + desc_idx++) + vq->vq_ring.avail->ring[desc_idx] = desc_idx; + } - /* - * txvq is freed when vq is freed, and as mz should be freed after the - * del_queue, so we reserve the mz pointer first. - */ - vq = txvq->vq; - mz = txvq->mz; - hdr_mz = txvq->virtio_net_hdr_mz; + VIRTQUEUE_DUMP(vq); - virtio_dev_queue_release(vq); - rte_memzone_free(mz); - rte_memzone_free(hdr_mz); + dev->data->tx_queues[queue_idx] = txvq; + return 0; } static void @@ -627,6 +638,86 @@ virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) } } +/* Optionally fill offload information in structure */ +static int +virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr) +{ + struct rte_net_hdr_lens hdr_lens; + uint32_t hdrlen, ptype; + int l4_supported = 0; + + /* nothing to do */ + if (hdr->flags == 0 && hdr->gso_type == VIRTIO_NET_HDR_GSO_NONE) + return 0; + + m->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN; + + ptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK); + m->packet_type = ptype; + if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP || + (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP || + (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP) + l4_supported = 1; + + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { + hdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len; + if (hdr->csum_start <= hdrlen && l4_supported) { + m->ol_flags |= PKT_RX_L4_CKSUM_NONE; + } else { + /* Unknown proto or tunnel, do sw cksum. We can assume + * the cksum field is in the first segment since the + * buffers we provided to the host are large enough. + * In case of SCTP, this will be wrong since it's a CRC + * but there's nothing we can do. + */ + uint16_t csum, off; + + rte_raw_cksum_mbuf(m, hdr->csum_start, + rte_pktmbuf_pkt_len(m) - hdr->csum_start, + &csum); + if (likely(csum != 0xffff)) + csum = ~csum; + off = hdr->csum_offset + hdr->csum_start; + if (rte_pktmbuf_data_len(m) >= off + 1) + *rte_pktmbuf_mtod_offset(m, uint16_t *, + off) = csum; + } + } else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) { + m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; + } + + /* GSO request, save required information in mbuf */ + if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + /* Check unsupported modes */ + if ((hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) || + (hdr->gso_size == 0)) { + return -EINVAL; + } + + /* Update mss lengthes in mbuf */ + m->tso_segsz = hdr->gso_size; + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_TCPV6: + m->ol_flags |= PKT_RX_LRO | \ + PKT_RX_L4_CKSUM_NONE; + break; + default: + return -EINVAL; + } + } + + return 0; +} + +static inline int +rx_offload_enabled(struct virtio_hw *hw) +{ + return vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_CSUM) || + vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO4) || + vtpci_with_feature(hw, VIRTIO_NET_F_GUEST_TSO6); +} + #define VIRTIO_MBUF_BURST_SZ 64 #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) uint16_t @@ -642,6 +733,8 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) int error; uint32_t i, nb_enqueued; uint32_t hdr_size; + int offload; + struct virtio_net_hdr *hdr; nb_used = VIRTQUEUE_NUSED(vq); @@ -659,6 +752,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) nb_rx = 0; nb_enqueued = 0; hdr_size = hw->vtnet_hdr_size; + offload = rx_offload_enabled(hw); for (i = 0; i < num ; i++) { rxm = rcv_pkts[i]; @@ -683,9 +777,18 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rxm->pkt_len = (uint32_t)(len[i] - hdr_size); rxm->data_len = (uint16_t)(len[i] - hdr_size); + hdr = (struct virtio_net_hdr *)((char *)rxm->buf_addr + + RTE_PKTMBUF_HEADROOM - hdr_size); + if (hw->vlan_strip) rte_vlan_strip(rxm); + if (offload && virtio_rx_offload(rxm, hdr) < 0) { + virtio_discard_rxbuf(vq, rxm); + rxvq->stats.errors++; + continue; + } + VIRTIO_DUMP_PACKET(rxm, rxm->data_len); rx_pkts[nb_rx++] = rxm; @@ -745,6 +848,7 @@ virtio_recv_mergeable_pkts(void *rx_queue, uint16_t extra_idx; uint32_t seg_res; uint32_t hdr_size; + int offload; nb_used = VIRTQUEUE_NUSED(vq); @@ -760,6 +864,7 @@ virtio_recv_mergeable_pkts(void *rx_queue, extra_idx = 0; seg_res = 0; hdr_size = hw->vtnet_hdr_size; + offload = rx_offload_enabled(hw); while (i < nb_used) { struct virtio_net_hdr_mrg_rxbuf *header; @@ -805,6 +910,12 @@ virtio_recv_mergeable_pkts(void *rx_queue, rx_pkts[nb_rx] = rxm; prev = rxm; + if (offload && virtio_rx_offload(rxm, &header->hdr) < 0) { + virtio_discard_rxbuf(vq, rxm); + rxvq->stats.errors++; + continue; + } + seg_res = seg_num - 1; while (seg_res != 0) { @@ -925,7 +1036,8 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) } /* optimize ring usage */ - if (vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) && + if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || + vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) && rte_mbuf_refcnt_read(txm) == 1 && RTE_MBUF_DIRECT(txm) && txm->nb_segs == 1 && diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx.h b/src/dpdk/drivers/net/virtio/virtio_rxtx.h index 058b56a1..28f82d6a 100644 --- a/src/dpdk/drivers/net/virtio/virtio_rxtx.h +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx.h @@ -86,10 +86,9 @@ struct virtnet_ctl { const struct rte_memzone *mz; /**< mem zone to populate RX ring. */ }; -#ifdef RTE_MACHINE_CPUFLAG_SSSE3 int virtio_rxq_vec_setup(struct virtnet_rx *rxvq); int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq, struct rte_mbuf *m); -#endif + #endif /* _VIRTIO_RXTX_H_ */ diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.c b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.c index 6517aa80..b651e53b 100644 --- a/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.c +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.c @@ -37,8 +37,6 @@ #include <string.h> #include <errno.h> -#include <tmmintrin.h> - #include <rte_cycles.h> #include <rte_memory.h> #include <rte_memzone.h> @@ -53,14 +51,7 @@ #include <rte_errno.h> #include <rte_byteorder.h> -#include "virtio_logs.h" -#include "virtio_ethdev.h" -#include "virtqueue.h" -#include "virtio_rxtx.h" - -#define RTE_VIRTIO_VPMD_RX_BURST 32 -#define RTE_VIRTIO_DESC_PER_LOOP 8 -#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST +#include "virtio_rxtx_simple.h" #ifndef __INTEL_COMPILER #pragma GCC diagnostic ignored "-Wcast-qual" @@ -92,257 +83,6 @@ virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq, return 0; } -static inline void -virtio_rxq_rearm_vec(struct virtnet_rx *rxvq) -{ - int i; - uint16_t desc_idx; - struct rte_mbuf **sw_ring; - struct vring_desc *start_dp; - int ret; - struct virtqueue *vq = rxvq->vq; - - desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1); - sw_ring = &vq->sw_ring[desc_idx]; - start_dp = &vq->vq_ring.desc[desc_idx]; - - ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring, - RTE_VIRTIO_VPMD_RX_REARM_THRESH); - if (unlikely(ret)) { - rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed += - RTE_VIRTIO_VPMD_RX_REARM_THRESH; - return; - } - - for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) { - uintptr_t p; - - p = (uintptr_t)&sw_ring[i]->rearm_data; - *(uint64_t *)p = rxvq->mbuf_initializer; - - start_dp[i].addr = - VIRTIO_MBUF_ADDR(sw_ring[i], vq) + - RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size; - start_dp[i].len = sw_ring[i]->buf_len - - RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size; - } - - vq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH; - vq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH; - vq_update_avail_idx(vq); -} - -/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP) - * - * This routine is for non-mergeable RX, one desc for each guest buffer. - * This routine is based on the RX ring layout optimization. Each entry in the - * avail ring points to the desc with the same index in the desc ring and this - * will never be changed in the driver. - * - * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet - */ -uint16_t -virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts) -{ - struct virtnet_rx *rxvq = rx_queue; - struct virtqueue *vq = rxvq->vq; - uint16_t nb_used; - uint16_t desc_idx; - struct vring_used_elem *rused; - struct rte_mbuf **sw_ring; - struct rte_mbuf **sw_ring_end; - uint16_t nb_pkts_received; - __m128i shuf_msk1, shuf_msk2, len_adjust; - - shuf_msk1 = _mm_set_epi8( - 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, /* vlan tci */ - 5, 4, /* dat len */ - 0xFF, 0xFF, 5, 4, /* pkt len */ - 0xFF, 0xFF, 0xFF, 0xFF /* packet type */ - - ); - - shuf_msk2 = _mm_set_epi8( - 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, /* vlan tci */ - 13, 12, /* dat len */ - 0xFF, 0xFF, 13, 12, /* pkt len */ - 0xFF, 0xFF, 0xFF, 0xFF /* packet type */ - ); - - /* Subtract the header length. - * In which case do we need the header length in used->len ? - */ - len_adjust = _mm_set_epi16( - 0, 0, - 0, - (uint16_t)-vq->hw->vtnet_hdr_size, - 0, (uint16_t)-vq->hw->vtnet_hdr_size, - 0, 0); - - if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) - return 0; - - nb_used = VIRTQUEUE_NUSED(vq); - - rte_compiler_barrier(); - - if (unlikely(nb_used == 0)) - return 0; - - nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP); - nb_used = RTE_MIN(nb_used, nb_pkts); - - desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); - rused = &vq->vq_ring.used->ring[desc_idx]; - sw_ring = &vq->sw_ring[desc_idx]; - sw_ring_end = &vq->sw_ring[vq->vq_nentries]; - - _mm_prefetch((const void *)rused, _MM_HINT_T0); - - if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { - virtio_rxq_rearm_vec(rxvq); - if (unlikely(virtqueue_kick_prepare(vq))) - virtqueue_notify(vq); - } - - for (nb_pkts_received = 0; - nb_pkts_received < nb_used;) { - __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2]; - __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2]; - __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP]; - - mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0)); - desc[0] = _mm_loadu_si128((__m128i *)(rused + 0)); - _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]); - - mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2)); - desc[1] = _mm_loadu_si128((__m128i *)(rused + 2)); - _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]); - - mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4)); - desc[2] = _mm_loadu_si128((__m128i *)(rused + 4)); - _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]); - - mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6)); - desc[3] = _mm_loadu_si128((__m128i *)(rused + 6)); - _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]); - - pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2); - pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1); - pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust); - pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust); - _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1, - pkt_mb[1]); - _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1, - pkt_mb[0]); - - pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2); - pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1); - pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust); - pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust); - _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1, - pkt_mb[3]); - _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1, - pkt_mb[2]); - - pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2); - pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1); - pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust); - pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust); - _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1, - pkt_mb[5]); - _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1, - pkt_mb[4]); - - pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2); - pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1); - pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust); - pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust); - _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1, - pkt_mb[7]); - _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1, - pkt_mb[6]); - - if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) { - if (sw_ring + nb_used <= sw_ring_end) - nb_pkts_received += nb_used; - else - nb_pkts_received += sw_ring_end - sw_ring; - break; - } else { - if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >= - sw_ring_end)) { - nb_pkts_received += sw_ring_end - sw_ring; - break; - } else { - nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP; - - rx_pkts += RTE_VIRTIO_DESC_PER_LOOP; - sw_ring += RTE_VIRTIO_DESC_PER_LOOP; - rused += RTE_VIRTIO_DESC_PER_LOOP; - nb_used -= RTE_VIRTIO_DESC_PER_LOOP; - } - } - } - - vq->vq_used_cons_idx += nb_pkts_received; - vq->vq_free_cnt += nb_pkts_received; - rxvq->stats.packets += nb_pkts_received; - return nb_pkts_received; -} - -#define VIRTIO_TX_FREE_THRESH 32 -#define VIRTIO_TX_MAX_FREE_BUF_SZ 32 -#define VIRTIO_TX_FREE_NR 32 -/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift */ -static inline void -virtio_xmit_cleanup(struct virtqueue *vq) -{ - uint16_t i, desc_idx; - uint32_t nb_free = 0; - struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ]; - - desc_idx = (uint16_t)(vq->vq_used_cons_idx & - ((vq->vq_nentries >> 1) - 1)); - m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; - m = __rte_pktmbuf_prefree_seg(m); - if (likely(m != NULL)) { - free[0] = m; - nb_free = 1; - for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { - m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; - m = __rte_pktmbuf_prefree_seg(m); - if (likely(m != NULL)) { - if (likely(m->pool == free[0]->pool)) - free[nb_free++] = m; - else { - rte_mempool_put_bulk(free[0]->pool, - (void **)free, - RTE_MIN(RTE_DIM(free), - nb_free)); - free[0] = m; - nb_free = 1; - } - } - } - rte_mempool_put_bulk(free[0]->pool, (void **)free, - RTE_MIN(RTE_DIM(free), nb_free)); - } else { - for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { - m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; - m = __rte_pktmbuf_prefree_seg(m); - if (m != NULL) - rte_mempool_put(m->pool, m); - } - } - - vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR; - vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1); -} - uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) @@ -423,3 +163,13 @@ virtio_rxq_vec_setup(struct virtnet_rx *rxq) return 0; } + +/* Stub for linkage when arch specific implementation is not available */ +uint16_t __attribute__((weak)) +virtio_recv_pkts_vec(void *rx_queue __rte_unused, + struct rte_mbuf **rx_pkts __rte_unused, + uint16_t nb_pkts __rte_unused) +{ + rte_panic("Wrong weak function linked by linker\n"); + return 0; +} diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.h b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.h new file mode 100644 index 00000000..b08f8594 --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.h @@ -0,0 +1,136 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_RXTX_SIMPLE_H_ +#define _VIRTIO_RXTX_SIMPLE_H_ + +#include <stdint.h> + +#include "virtio_logs.h" +#include "virtio_ethdev.h" +#include "virtqueue.h" +#include "virtio_rxtx.h" + +#define RTE_VIRTIO_VPMD_RX_BURST 32 +#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST + +static inline void +virtio_rxq_rearm_vec(struct virtnet_rx *rxvq) +{ + int i; + uint16_t desc_idx; + struct rte_mbuf **sw_ring; + struct vring_desc *start_dp; + int ret; + struct virtqueue *vq = rxvq->vq; + + desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1); + sw_ring = &vq->sw_ring[desc_idx]; + start_dp = &vq->vq_ring.desc[desc_idx]; + + ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring, + RTE_VIRTIO_VPMD_RX_REARM_THRESH); + if (unlikely(ret)) { + rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed += + RTE_VIRTIO_VPMD_RX_REARM_THRESH; + return; + } + + for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) { + uintptr_t p; + + p = (uintptr_t)&sw_ring[i]->rearm_data; + *(uint64_t *)p = rxvq->mbuf_initializer; + + start_dp[i].addr = + VIRTIO_MBUF_ADDR(sw_ring[i], vq) + + RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size; + start_dp[i].len = sw_ring[i]->buf_len - + RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size; + } + + vq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH; + vq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH; + vq_update_avail_idx(vq); +} + +#define VIRTIO_TX_FREE_THRESH 32 +#define VIRTIO_TX_MAX_FREE_BUF_SZ 32 +#define VIRTIO_TX_FREE_NR 32 +/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift */ +static inline void +virtio_xmit_cleanup(struct virtqueue *vq) +{ + uint16_t i, desc_idx; + uint32_t nb_free = 0; + struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ]; + + desc_idx = (uint16_t)(vq->vq_used_cons_idx & + ((vq->vq_nentries >> 1) - 1)); + m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; + m = __rte_pktmbuf_prefree_seg(m); + if (likely(m != NULL)) { + free[0] = m; + nb_free = 1; + for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { + m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; + m = __rte_pktmbuf_prefree_seg(m); + if (likely(m != NULL)) { + if (likely(m->pool == free[0]->pool)) + free[nb_free++] = m; + else { + rte_mempool_put_bulk(free[0]->pool, + (void **)free, + RTE_MIN(RTE_DIM(free), + nb_free)); + free[0] = m; + nb_free = 1; + } + } + } + rte_mempool_put_bulk(free[0]->pool, (void **)free, + RTE_MIN(RTE_DIM(free), nb_free)); + } else { + for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { + m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; + m = __rte_pktmbuf_prefree_seg(m); + if (m != NULL) + rte_mempool_put(m->pool, m); + } + } + + vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR; + vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1); +} + +#endif /* _VIRTIO_RXTX_SIMPLE_H_ */ diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_neon.c b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_neon.c new file mode 100644 index 00000000..793eefbe --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_neon.c @@ -0,0 +1,235 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2016 + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include <rte_byteorder.h> +#include <rte_branch_prediction.h> +#include <rte_cycles.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_errno.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_mempool.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_prefetch.h> +#include <rte_string_fns.h> +#include <rte_vect.h> + +#include "virtio_rxtx_simple.h" + +#define RTE_VIRTIO_VPMD_RX_BURST 32 +#define RTE_VIRTIO_DESC_PER_LOOP 8 +#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST + +/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP) + * + * This routine is for non-mergeable RX, one desc for each guest buffer. + * This routine is based on the RX ring layout optimization. Each entry in the + * avail ring points to the desc with the same index in the desc ring and this + * will never be changed in the driver. + * + * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet + */ +uint16_t +virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct virtnet_rx *rxvq = rx_queue; + struct virtqueue *vq = rxvq->vq; + uint16_t nb_used; + uint16_t desc_idx; + struct vring_used_elem *rused; + struct rte_mbuf **sw_ring; + struct rte_mbuf **sw_ring_end; + uint16_t nb_pkts_received; + + uint8x16_t shuf_msk1 = { + 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */ + 4, 5, 0xFF, 0xFF, /* pkt len */ + 4, 5, /* dat len */ + 0xFF, 0xFF, /* vlan tci */ + 0xFF, 0xFF, 0xFF, 0xFF + }; + + uint8x16_t shuf_msk2 = { + 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */ + 12, 13, 0xFF, 0xFF, /* pkt len */ + 12, 13, /* dat len */ + 0xFF, 0xFF, /* vlan tci */ + 0xFF, 0xFF, 0xFF, 0xFF + }; + + /* Subtract the header length. + * In which case do we need the header length in used->len ? + */ + uint16x8_t len_adjust = { + 0, 0, + (uint16_t)vq->hw->vtnet_hdr_size, 0, + (uint16_t)vq->hw->vtnet_hdr_size, + 0, + 0, 0 + }; + + if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) + return 0; + + nb_used = VIRTQUEUE_NUSED(vq); + + rte_rmb(); + + if (unlikely(nb_used == 0)) + return 0; + + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP); + nb_used = RTE_MIN(nb_used, nb_pkts); + + desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + rused = &vq->vq_ring.used->ring[desc_idx]; + sw_ring = &vq->sw_ring[desc_idx]; + sw_ring_end = &vq->sw_ring[vq->vq_nentries]; + + rte_prefetch_non_temporal(rused); + + if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { + virtio_rxq_rearm_vec(rxvq); + if (unlikely(virtqueue_kick_prepare(vq))) + virtqueue_notify(vq); + } + + for (nb_pkts_received = 0; + nb_pkts_received < nb_used;) { + uint64x2_t desc[RTE_VIRTIO_DESC_PER_LOOP / 2]; + uint64x2_t mbp[RTE_VIRTIO_DESC_PER_LOOP / 2]; + uint64x2_t pkt_mb[RTE_VIRTIO_DESC_PER_LOOP]; + + mbp[0] = vld1q_u64((uint64_t *)(sw_ring + 0)); + desc[0] = vld1q_u64((uint64_t *)(rused + 0)); + vst1q_u64((uint64_t *)&rx_pkts[0], mbp[0]); + + mbp[1] = vld1q_u64((uint64_t *)(sw_ring + 2)); + desc[1] = vld1q_u64((uint64_t *)(rused + 2)); + vst1q_u64((uint64_t *)&rx_pkts[2], mbp[1]); + + mbp[2] = vld1q_u64((uint64_t *)(sw_ring + 4)); + desc[2] = vld1q_u64((uint64_t *)(rused + 4)); + vst1q_u64((uint64_t *)&rx_pkts[4], mbp[2]); + + mbp[3] = vld1q_u64((uint64_t *)(sw_ring + 6)); + desc[3] = vld1q_u64((uint64_t *)(rused + 6)); + vst1q_u64((uint64_t *)&rx_pkts[6], mbp[3]); + + pkt_mb[1] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[0]), shuf_msk2)); + pkt_mb[0] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[0]), shuf_msk1)); + pkt_mb[1] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[1]), len_adjust)); + pkt_mb[0] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[0]), len_adjust)); + vst1q_u64((void *)&rx_pkts[1]->rx_descriptor_fields1, + pkt_mb[1]); + vst1q_u64((void *)&rx_pkts[0]->rx_descriptor_fields1, + pkt_mb[0]); + + pkt_mb[3] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[1]), shuf_msk2)); + pkt_mb[2] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[1]), shuf_msk1)); + pkt_mb[3] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[3]), len_adjust)); + pkt_mb[2] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[2]), len_adjust)); + vst1q_u64((void *)&rx_pkts[3]->rx_descriptor_fields1, + pkt_mb[3]); + vst1q_u64((void *)&rx_pkts[2]->rx_descriptor_fields1, + pkt_mb[2]); + + pkt_mb[5] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[2]), shuf_msk2)); + pkt_mb[4] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[2]), shuf_msk1)); + pkt_mb[5] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[5]), len_adjust)); + pkt_mb[4] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[4]), len_adjust)); + vst1q_u64((void *)&rx_pkts[5]->rx_descriptor_fields1, + pkt_mb[5]); + vst1q_u64((void *)&rx_pkts[4]->rx_descriptor_fields1, + pkt_mb[4]); + + pkt_mb[7] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[3]), shuf_msk2)); + pkt_mb[6] = vreinterpretq_u64_u8(vqtbl1q_u8( + vreinterpretq_u8_u64(desc[3]), shuf_msk1)); + pkt_mb[7] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[7]), len_adjust)); + pkt_mb[6] = vreinterpretq_u64_u16(vsubq_u16( + vreinterpretq_u16_u64(pkt_mb[6]), len_adjust)); + vst1q_u64((void *)&rx_pkts[7]->rx_descriptor_fields1, + pkt_mb[7]); + vst1q_u64((void *)&rx_pkts[6]->rx_descriptor_fields1, + pkt_mb[6]); + + if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) { + if (sw_ring + nb_used <= sw_ring_end) + nb_pkts_received += nb_used; + else + nb_pkts_received += sw_ring_end - sw_ring; + break; + } else { + if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >= + sw_ring_end)) { + nb_pkts_received += sw_ring_end - sw_ring; + break; + } else { + nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP; + + rx_pkts += RTE_VIRTIO_DESC_PER_LOOP; + sw_ring += RTE_VIRTIO_DESC_PER_LOOP; + rused += RTE_VIRTIO_DESC_PER_LOOP; + nb_used -= RTE_VIRTIO_DESC_PER_LOOP; + } + } + } + + vq->vq_used_cons_idx += nb_pkts_received; + vq->vq_free_cnt += nb_pkts_received; + rxvq->stats.packets += nb_pkts_received; + return nb_pkts_received; +} diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_sse.c b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_sse.c new file mode 100644 index 00000000..87bb5c63 --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple_sse.c @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include <tmmintrin.h> + +#include <rte_byteorder.h> +#include <rte_branch_prediction.h> +#include <rte_cycles.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_errno.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_mempool.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_prefetch.h> +#include <rte_string_fns.h> + +#include "virtio_rxtx_simple.h" + +#define RTE_VIRTIO_VPMD_RX_BURST 32 +#define RTE_VIRTIO_DESC_PER_LOOP 8 +#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST + +/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP) + * + * This routine is for non-mergeable RX, one desc for each guest buffer. + * This routine is based on the RX ring layout optimization. Each entry in the + * avail ring points to the desc with the same index in the desc ring and this + * will never be changed in the driver. + * + * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet + */ +uint16_t +virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct virtnet_rx *rxvq = rx_queue; + struct virtqueue *vq = rxvq->vq; + uint16_t nb_used; + uint16_t desc_idx; + struct vring_used_elem *rused; + struct rte_mbuf **sw_ring; + struct rte_mbuf **sw_ring_end; + uint16_t nb_pkts_received; + __m128i shuf_msk1, shuf_msk2, len_adjust; + + shuf_msk1 = _mm_set_epi8( + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, /* vlan tci */ + 5, 4, /* dat len */ + 0xFF, 0xFF, 5, 4, /* pkt len */ + 0xFF, 0xFF, 0xFF, 0xFF /* packet type */ + + ); + + shuf_msk2 = _mm_set_epi8( + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, /* vlan tci */ + 13, 12, /* dat len */ + 0xFF, 0xFF, 13, 12, /* pkt len */ + 0xFF, 0xFF, 0xFF, 0xFF /* packet type */ + ); + + /* Subtract the header length. + * In which case do we need the header length in used->len ? + */ + len_adjust = _mm_set_epi16( + 0, 0, + 0, + (uint16_t)-vq->hw->vtnet_hdr_size, + 0, (uint16_t)-vq->hw->vtnet_hdr_size, + 0, 0); + + if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) + return 0; + + nb_used = VIRTQUEUE_NUSED(vq); + + rte_compiler_barrier(); + + if (unlikely(nb_used == 0)) + return 0; + + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP); + nb_used = RTE_MIN(nb_used, nb_pkts); + + desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + rused = &vq->vq_ring.used->ring[desc_idx]; + sw_ring = &vq->sw_ring[desc_idx]; + sw_ring_end = &vq->sw_ring[vq->vq_nentries]; + + rte_prefetch0(rused); + + if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { + virtio_rxq_rearm_vec(rxvq); + if (unlikely(virtqueue_kick_prepare(vq))) + virtqueue_notify(vq); + } + + for (nb_pkts_received = 0; + nb_pkts_received < nb_used;) { + __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2]; + __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2]; + __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP]; + + mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0)); + desc[0] = _mm_loadu_si128((__m128i *)(rused + 0)); + _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]); + + mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2)); + desc[1] = _mm_loadu_si128((__m128i *)(rused + 2)); + _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]); + + mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4)); + desc[2] = _mm_loadu_si128((__m128i *)(rused + 4)); + _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]); + + mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6)); + desc[3] = _mm_loadu_si128((__m128i *)(rused + 6)); + _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]); + + pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2); + pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1); + pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust); + pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1, + pkt_mb[1]); + _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1, + pkt_mb[0]); + + pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2); + pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1); + pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust); + pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1, + pkt_mb[3]); + _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1, + pkt_mb[2]); + + pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2); + pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1); + pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust); + pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1, + pkt_mb[5]); + _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1, + pkt_mb[4]); + + pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2); + pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1); + pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust); + pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1, + pkt_mb[7]); + _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1, + pkt_mb[6]); + + if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) { + if (sw_ring + nb_used <= sw_ring_end) + nb_pkts_received += nb_used; + else + nb_pkts_received += sw_ring_end - sw_ring; + break; + } else { + if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >= + sw_ring_end)) { + nb_pkts_received += sw_ring_end - sw_ring; + break; + } else { + nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP; + + rx_pkts += RTE_VIRTIO_DESC_PER_LOOP; + sw_ring += RTE_VIRTIO_DESC_PER_LOOP; + rused += RTE_VIRTIO_DESC_PER_LOOP; + nb_used -= RTE_VIRTIO_DESC_PER_LOOP; + } + } + } + + vq->vq_used_cons_idx += nb_pkts_received; + vq->vq_free_cnt += nb_pkts_received; + rxvq->stats.packets += nb_pkts_received; + return nb_pkts_received; +} diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost.h b/src/dpdk/drivers/net/virtio/virtio_user/vhost.h new file mode 100644 index 00000000..5c983bd4 --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user/vhost.h @@ -0,0 +1,123 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VHOST_NET_USER_H +#define _VHOST_NET_USER_H + +#include <stdint.h> +#include <linux/types.h> +#include <linux/ioctl.h> + +#include "../virtio_pci.h" +#include "../virtio_logs.h" +#include "../virtqueue.h" + +struct vhost_vring_state { + unsigned int index; + unsigned int num; +}; + +struct vhost_vring_file { + unsigned int index; + int fd; +}; + +struct vhost_vring_addr { + unsigned int index; + /* Option flags. */ + unsigned int flags; + /* Flag values: */ + /* Whether log address is valid. If set enables logging. */ +#define VHOST_VRING_F_LOG 0 + + /* Start of array of descriptors (virtually contiguous) */ + uint64_t desc_user_addr; + /* Used structure address. Must be 32 bit aligned */ + uint64_t used_user_addr; + /* Available structure address. Must be 16 bit aligned */ + uint64_t avail_user_addr; + /* Logging support. */ + /* Log writes to used structure, at offset calculated from specified + * address. Address must be 32 bit aligned. + */ + uint64_t log_guest_addr; +}; + +enum vhost_user_request { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_MAX +}; + +const char * const vhost_msg_strings[VHOST_USER_MAX]; + +struct vhost_memory_region { + uint64_t guest_phys_addr; + uint64_t memory_size; /* bytes */ + uint64_t userspace_addr; + uint64_t mmap_offset; +}; + +struct virtio_user_dev; + +struct virtio_user_backend_ops { + int (*setup)(struct virtio_user_dev *dev); + int (*send_request)(struct virtio_user_dev *dev, + enum vhost_user_request req, + void *arg); + int (*enable_qp)(struct virtio_user_dev *dev, + uint16_t pair_idx, + int enable); +}; + +struct virtio_user_backend_ops ops_user; +struct virtio_user_backend_ops ops_kernel; + +#endif diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c new file mode 100644 index 00000000..05aa6c6d --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel.c @@ -0,0 +1,403 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include <rte_memory.h> +#include <rte_eal_memconfig.h> + +#include "vhost.h" +#include "virtio_user_dev.h" +#include "vhost_kernel_tap.h" + +struct vhost_memory_kernel { + uint32_t nregions; + uint32_t padding; + struct vhost_memory_region regions[0]; +}; + +/* vhost kernel ioctls */ +#define VHOST_VIRTIO 0xAF +#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) +#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) +#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) +#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) +#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel) +#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) +#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) +#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) +#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) +#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) +#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) +#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) +#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) + +static uint64_t max_regions = 64; + +static void +get_vhost_kernel_max_regions(void) +{ + int fd; + char buf[20] = {'\0'}; + + fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY); + if (fd < 0) + return; + + if (read(fd, buf, sizeof(buf) - 1) > 0) + max_regions = strtoull(buf, NULL, 10); + + close(fd); +} + +static uint64_t vhost_req_user_to_kernel[] = { + [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER, + [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, + [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES, + [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES, + [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, + [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, + [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, + [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, + [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, + [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, + [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, +}; + +/* By default, vhost kernel module allows 64 regions, but DPDK allows + * 256 segments. As a relief, below function merges those virtually + * adjacent memsegs into one region. + */ +static struct vhost_memory_kernel * +prepare_vhost_memory_kernel(void) +{ + uint32_t i, j, k = 0; + struct rte_memseg *seg; + struct vhost_memory_region *mr; + struct vhost_memory_kernel *vm; + + vm = malloc(sizeof(struct vhost_memory_kernel) + + max_regions * + sizeof(struct vhost_memory_region)); + if (!vm) + return NULL; + + for (i = 0; i < RTE_MAX_MEMSEG; ++i) { + seg = &rte_eal_get_configuration()->mem_config->memseg[i]; + if (!seg->addr) + break; + + int new_region = 1; + + for (j = 0; j < k; ++j) { + mr = &vm->regions[j]; + + if (mr->userspace_addr + mr->memory_size == + (uint64_t)(uintptr_t)seg->addr) { + mr->memory_size += seg->len; + new_region = 0; + break; + } + + if ((uint64_t)(uintptr_t)seg->addr + seg->len == + mr->userspace_addr) { + mr->guest_phys_addr = + (uint64_t)(uintptr_t)seg->addr; + mr->userspace_addr = + (uint64_t)(uintptr_t)seg->addr; + mr->memory_size += seg->len; + new_region = 0; + break; + } + } + + if (new_region == 0) + continue; + + mr = &vm->regions[k++]; + /* use vaddr here! */ + mr->guest_phys_addr = (uint64_t)(uintptr_t)seg->addr; + mr->userspace_addr = (uint64_t)(uintptr_t)seg->addr; + mr->memory_size = seg->len; + mr->mmap_offset = 0; + + if (k >= max_regions) { + free(vm); + return NULL; + } + } + + vm->nregions = k; + vm->padding = 0; + return vm; +} + +/* with below features, vhost kernel does not need to do the checksum and TSO, + * these info will be passed to virtio_user through virtio net header. + */ +#define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ + ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ + (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ + (1ULL << VIRTIO_NET_F_GUEST_UFO)) + +/* with below features, when flows from virtio_user to vhost kernel + * (1) if flows goes up through the kernel networking stack, it does not need + * to verify checksum, which can save CPU cycles; + * (2) if flows goes through a Linux bridge and outside from an interface + * (kernel driver), checksum and TSO will be done by GSO in kernel or even + * offloaded into real physical device. + */ +#define VHOST_KERNEL_HOST_OFFLOADS_MASK \ + ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ + (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ + (1ULL << VIRTIO_NET_F_CSUM)) + +static int +tap_supporte_mq(void) +{ + int tapfd; + unsigned int tap_features; + + tapfd = open(PATH_NET_TUN, O_RDWR); + if (tapfd < 0) { + PMD_DRV_LOG(ERR, "fail to open %s: %s", + PATH_NET_TUN, strerror(errno)); + return -1; + } + + if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { + PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); + close(tapfd); + return -1; + } + + close(tapfd); + return tap_features & IFF_MULTI_QUEUE; +} + +static int +vhost_kernel_ioctl(struct virtio_user_dev *dev, + enum vhost_user_request req, + void *arg) +{ + int ret = -1; + unsigned int i; + uint64_t req_kernel; + struct vhost_memory_kernel *vm = NULL; + int vhostfd; + unsigned int queue_sel; + + PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); + + req_kernel = vhost_req_user_to_kernel[req]; + + if (req_kernel == VHOST_SET_MEM_TABLE) { + vm = prepare_vhost_memory_kernel(); + if (!vm) + return -1; + arg = (void *)vm; + } + + if (req_kernel == VHOST_SET_FEATURES) { + /* We don't need memory protection here */ + *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); + + /* VHOST kernel does not know about below flags */ + *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; + *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; + + *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ); + } + + switch (req_kernel) { + case VHOST_SET_VRING_NUM: + case VHOST_SET_VRING_ADDR: + case VHOST_SET_VRING_BASE: + case VHOST_GET_VRING_BASE: + case VHOST_SET_VRING_KICK: + case VHOST_SET_VRING_CALL: + queue_sel = *(unsigned int *)arg; + vhostfd = dev->vhostfds[queue_sel / 2]; + *(unsigned int *)arg = queue_sel % 2; + PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u", + vhostfd, *(unsigned int *)arg); + break; + default: + vhostfd = -1; + } + if (vhostfd == -1) { + for (i = 0; i < dev->max_queue_pairs; ++i) { + if (dev->vhostfds[i] < 0) + continue; + + ret = ioctl(dev->vhostfds[i], req_kernel, arg); + if (ret < 0) + break; + } + } else { + ret = ioctl(vhostfd, req_kernel, arg); + } + + if (!ret && req_kernel == VHOST_GET_FEATURES) { + /* with tap as the backend, all these features are supported + * but not claimed by vhost-net, so we add them back when + * reporting to upper layer. + */ + *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; + *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; + + /* vhost_kernel will not declare this feature, but it does + * support multi-queue. + */ + if (tap_supporte_mq()) + *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ); + } + + if (vm) + free(vm); + + if (ret < 0) + PMD_DRV_LOG(ERR, "%s failed: %s", + vhost_msg_strings[req], strerror(errno)); + + return ret; +} + +/** + * Set up environment to talk with a vhost kernel backend. + * + * @return + * - (-1) if fail to set up; + * - (>=0) if successful. + */ +static int +vhost_kernel_setup(struct virtio_user_dev *dev) +{ + int vhostfd; + uint32_t i; + + get_vhost_kernel_max_regions(); + + for (i = 0; i < dev->max_queue_pairs; ++i) { + vhostfd = open(dev->path, O_RDWR); + if (vhostfd < 0) { + PMD_DRV_LOG(ERR, "fail to open %s, %s", + dev->path, strerror(errno)); + return -1; + } + + dev->vhostfds[i] = vhostfd; + } + + return 0; +} + +static int +vhost_kernel_set_backend(int vhostfd, int tapfd) +{ + struct vhost_vring_file f; + + f.fd = tapfd; + f.index = 0; + if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { + PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", + strerror(errno)); + return -1; + } + + f.index = 1; + if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { + PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", + strerror(errno)); + return -1; + } + + return 0; +} + +static int +vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, + uint16_t pair_idx, + int enable) +{ + int hdr_size; + int vhostfd; + int tapfd; + int req_mq = (dev->max_queue_pairs > 1); + + vhostfd = dev->vhostfds[pair_idx]; + + if (!enable) { + if (dev->tapfds[pair_idx]) { + close(dev->tapfds[pair_idx]); + dev->tapfds[pair_idx] = -1; + } + return vhost_kernel_set_backend(vhostfd, -1); + } else if (dev->tapfds[pair_idx] >= 0) { + return 0; + } + + if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || + (dev->features & (1ULL << VIRTIO_F_VERSION_1))) + hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); + else + hdr_size = sizeof(struct virtio_net_hdr); + + tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq); + if (tapfd < 0) { + PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); + return -1; + } + + if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { + PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); + close(tapfd); + return -1; + } + + dev->tapfds[pair_idx] = tapfd; + return 0; +} + +struct virtio_user_backend_ops ops_kernel = { + .setup = vhost_kernel_setup, + .send_request = vhost_kernel_ioctl, + .enable_qp = vhost_kernel_enable_queue_pair +}; diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.c b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.c new file mode 100644 index 00000000..f585de8c --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.c @@ -0,0 +1,133 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <net/if.h> +#include <errno.h> +#include <string.h> +#include <limits.h> + +#include "vhost_kernel_tap.h" +#include "../virtio_logs.h" + +int +vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq) +{ + unsigned int tap_features; + int sndbuf = INT_MAX; + struct ifreq ifr; + int tapfd; + unsigned int offload = + TUN_F_CSUM | + TUN_F_TSO4 | + TUN_F_TSO6 | + TUN_F_TSO_ECN | + TUN_F_UFO; + + /* TODO: + * 1. verify we can get/set vnet_hdr_len, tap_probe_vnet_hdr_len + * 2. get number of memory regions from vhost module parameter + * max_mem_regions, supported in newer version linux kernel + */ + tapfd = open(PATH_NET_TUN, O_RDWR); + if (tapfd < 0) { + PMD_DRV_LOG(ERR, "fail to open %s: %s", + PATH_NET_TUN, strerror(errno)); + return -1; + } + + /* Construct ifr */ + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + + if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { + PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); + goto error; + } + if (tap_features & IFF_ONE_QUEUE) + ifr.ifr_flags |= IFF_ONE_QUEUE; + + /* Let tap instead of vhost-net handle vnet header, as the latter does + * not support offloading. And in this case, we should not set feature + * bit VHOST_NET_F_VIRTIO_NET_HDR. + */ + if (tap_features & IFF_VNET_HDR) { + ifr.ifr_flags |= IFF_VNET_HDR; + } else { + PMD_DRV_LOG(ERR, "TAP does not support IFF_VNET_HDR"); + goto error; + } + + if (req_mq) + ifr.ifr_flags |= IFF_MULTI_QUEUE; + + if (*p_ifname) + strncpy(ifr.ifr_name, *p_ifname, IFNAMSIZ); + else + strncpy(ifr.ifr_name, "tap%d", IFNAMSIZ); + if (ioctl(tapfd, TUNSETIFF, (void *)&ifr) == -1) { + PMD_DRV_LOG(ERR, "TUNSETIFF failed: %s", strerror(errno)); + goto error; + } + + fcntl(tapfd, F_SETFL, O_NONBLOCK); + + if (ioctl(tapfd, TUNSETVNETHDRSZ, &hdr_size) < 0) { + PMD_DRV_LOG(ERR, "TUNSETVNETHDRSZ failed: %s", strerror(errno)); + goto error; + } + + if (ioctl(tapfd, TUNSETSNDBUF, &sndbuf) < 0) { + PMD_DRV_LOG(ERR, "TUNSETSNDBUF failed: %s", strerror(errno)); + goto error; + } + + /* TODO: before set the offload capabilities, we'd better (1) check + * negotiated features to see if necessary to offload; (2) query tap + * to see if it supports the offload capabilities. + */ + if (ioctl(tapfd, TUNSETOFFLOAD, offload) != 0) + PMD_DRV_LOG(ERR, "TUNSETOFFLOAD ioctl() failed: %s", + strerror(errno)); + + if (!(*p_ifname)) + *p_ifname = strdup(ifr.ifr_name); + + return tapfd; +error: + close(tapfd); + return -1; +} diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.h b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.h new file mode 100644 index 00000000..eae340cc --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user/vhost_kernel_tap.h @@ -0,0 +1,67 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/ioctl.h> + +/* TUN ioctls */ +#define TUNSETIFF _IOW('T', 202, int) +#define TUNGETFEATURES _IOR('T', 207, unsigned int) +#define TUNSETOFFLOAD _IOW('T', 208, unsigned int) +#define TUNGETIFF _IOR('T', 210, unsigned int) +#define TUNSETSNDBUF _IOW('T', 212, int) +#define TUNGETVNETHDRSZ _IOR('T', 215, int) +#define TUNSETVNETHDRSZ _IOW('T', 216, int) +#define TUNSETQUEUE _IOW('T', 217, int) +#define TUNSETVNETLE _IOW('T', 220, int) +#define TUNSETVNETBE _IOW('T', 222, int) + +/* TUNSETIFF ifr flags */ +#define IFF_TAP 0x0002 +#define IFF_NO_PI 0x1000 +#define IFF_ONE_QUEUE 0x2000 +#define IFF_VNET_HDR 0x4000 +#define IFF_MULTI_QUEUE 0x0100 +#define IFF_ATTACH_QUEUE 0x0200 +#define IFF_DETACH_QUEUE 0x0400 + +/* Features for GSO (TUNSETOFFLOAD). */ +#define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ +#define TUN_F_TSO4 0x02 /* I can handle TSO for IPv4 packets */ +#define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */ +#define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */ +#define TUN_F_UFO 0x10 /* I can handle UFO packets */ + +/* Constants */ +#define PATH_NET_TUN "/dev/net/tun" + +int vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq); diff --git a/src/dpdk/drivers/net/virtio/virtio_user/vhost_user.c b/src/dpdk/drivers/net/virtio/virtio_user/vhost_user.c new file mode 100644 index 00000000..4ad7b21b --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user/vhost_user.c @@ -0,0 +1,467 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/socket.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/un.h> +#include <string.h> +#include <errno.h> + +#include "vhost.h" +#include "virtio_user_dev.h" + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION 0x1 + +#define VHOST_MEMORY_MAX_NREGIONS 8 +struct vhost_memory { + uint32_t nregions; + uint32_t padding; + struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS]; +}; + +struct vhost_user_msg { + enum vhost_user_request request; + +#define VHOST_USER_VERSION_MASK 0x3 +#define VHOST_USER_REPLY_MASK (0x1 << 2) + uint32_t flags; + uint32_t size; /* the following payload size */ + union { +#define VHOST_USER_VRING_IDX_MASK 0xff +#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + struct vhost_memory memory; + } payload; + int fds[VHOST_MEMORY_MAX_NREGIONS]; +} __attribute((packed)); + +#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64) +#define VHOST_USER_PAYLOAD_SIZE \ + (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE) + +static int +vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num) +{ + int r; + struct msghdr msgh; + struct iovec iov; + size_t fd_size = fd_num * sizeof(int); + char control[CMSG_SPACE(fd_size)]; + struct cmsghdr *cmsg; + + memset(&msgh, 0, sizeof(msgh)); + memset(control, 0, sizeof(control)); + + iov.iov_base = (uint8_t *)buf; + iov.iov_len = len; + + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msgh); + cmsg->cmsg_len = CMSG_LEN(fd_size); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), fds, fd_size); + + do { + r = sendmsg(fd, &msgh, 0); + } while (r < 0 && errno == EINTR); + + return r; +} + +static int +vhost_user_read(int fd, struct vhost_user_msg *msg) +{ + uint32_t valid_flags = VHOST_USER_REPLY_MASK | VHOST_USER_VERSION; + int ret, sz_hdr = VHOST_USER_HDR_SIZE, sz_payload; + + ret = recv(fd, (void *)msg, sz_hdr, 0); + if (ret < sz_hdr) { + PMD_DRV_LOG(ERR, "Failed to recv msg hdr: %d instead of %d.", + ret, sz_hdr); + goto fail; + } + + /* validate msg flags */ + if (msg->flags != (valid_flags)) { + PMD_DRV_LOG(ERR, "Failed to recv msg: flags %x instead of %x.", + msg->flags, valid_flags); + goto fail; + } + + sz_payload = msg->size; + if (sz_payload) { + ret = recv(fd, (void *)((char *)msg + sz_hdr), sz_payload, 0); + if (ret < sz_payload) { + PMD_DRV_LOG(ERR, + "Failed to recv msg payload: %d instead of %d.", + ret, msg->size); + goto fail; + } + } + + return 0; + +fail: + return -1; +} + +struct hugepage_file_info { + uint64_t addr; /**< virtual addr */ + size_t size; /**< the file size */ + char path[PATH_MAX]; /**< path to backing file */ +}; + +/* Two possible options: + * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file + * array. This is simple but cannot be used in secondary process because + * secondary process will close and munmap that file. + * 2. Match HUGEFILE_FMT to find hugepage files directly. + * + * We choose option 2. + */ +static int +get_hugepage_file_info(struct hugepage_file_info huges[], int max) +{ + int idx; + FILE *f; + char buf[BUFSIZ], *tmp, *tail; + char *str_underline, *str_start; + int huge_index; + uint64_t v_start, v_end; + + f = fopen("/proc/self/maps", "r"); + if (!f) { + PMD_DRV_LOG(ERR, "cannot open /proc/self/maps"); + return -1; + } + + idx = 0; + while (fgets(buf, sizeof(buf), f) != NULL) { + if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) { + PMD_DRV_LOG(ERR, "Failed to parse address"); + goto error; + } + + tmp = strchr(buf, ' ') + 1; /** skip address */ + tmp = strchr(tmp, ' ') + 1; /** skip perm */ + tmp = strchr(tmp, ' ') + 1; /** skip offset */ + tmp = strchr(tmp, ' ') + 1; /** skip dev */ + tmp = strchr(tmp, ' ') + 1; /** skip inode */ + while (*tmp == ' ') /** skip spaces */ + tmp++; + tail = strrchr(tmp, '\n'); /** remove newline if exists */ + if (tail) + *tail = '\0'; + + /* Match HUGEFILE_FMT, aka "%s/%smap_%d", + * which is defined in eal_filesystem.h + */ + str_underline = strrchr(tmp, '_'); + if (!str_underline) + continue; + + str_start = str_underline - strlen("map"); + if (str_start < tmp) + continue; + + if (sscanf(str_start, "map_%d", &huge_index) != 1) + continue; + + if (idx >= max) { + PMD_DRV_LOG(ERR, "Exceed maximum of %d", max); + goto error; + } + huges[idx].addr = v_start; + huges[idx].size = v_end - v_start; + snprintf(huges[idx].path, PATH_MAX, "%s", tmp); + idx++; + } + + fclose(f); + return idx; + +error: + fclose(f); + return -1; +} + +static int +prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[]) +{ + int i, num; + struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS]; + struct vhost_memory_region *mr; + + num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS); + if (num < 0) { + PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user"); + return -1; + } + + for (i = 0; i < num; ++i) { + mr = &msg->payload.memory.regions[i]; + mr->guest_phys_addr = huges[i].addr; /* use vaddr! */ + mr->userspace_addr = huges[i].addr; + mr->memory_size = huges[i].size; + mr->mmap_offset = 0; + fds[i] = open(huges[i].path, O_RDWR); + } + + msg->payload.memory.nregions = num; + msg->payload.memory.padding = 0; + + return 0; +} + +static struct vhost_user_msg m; + +const char * const vhost_msg_strings[] = { + [VHOST_USER_SET_OWNER] = "VHOST_SET_OWNER", + [VHOST_USER_RESET_OWNER] = "VHOST_RESET_OWNER", + [VHOST_USER_SET_FEATURES] = "VHOST_SET_FEATURES", + [VHOST_USER_GET_FEATURES] = "VHOST_GET_FEATURES", + [VHOST_USER_SET_VRING_CALL] = "VHOST_SET_VRING_CALL", + [VHOST_USER_SET_VRING_NUM] = "VHOST_SET_VRING_NUM", + [VHOST_USER_SET_VRING_BASE] = "VHOST_SET_VRING_BASE", + [VHOST_USER_GET_VRING_BASE] = "VHOST_GET_VRING_BASE", + [VHOST_USER_SET_VRING_ADDR] = "VHOST_SET_VRING_ADDR", + [VHOST_USER_SET_VRING_KICK] = "VHOST_SET_VRING_KICK", + [VHOST_USER_SET_MEM_TABLE] = "VHOST_SET_MEM_TABLE", + [VHOST_USER_SET_VRING_ENABLE] = "VHOST_SET_VRING_ENABLE", +}; + +static int +vhost_user_sock(struct virtio_user_dev *dev, + enum vhost_user_request req, + void *arg) +{ + struct vhost_user_msg msg; + struct vhost_vring_file *file = 0; + int need_reply = 0; + int fds[VHOST_MEMORY_MAX_NREGIONS]; + int fd_num = 0; + int i, len; + int vhostfd = dev->vhostfd; + + RTE_SET_USED(m); + + PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); + + msg.request = req; + msg.flags = VHOST_USER_VERSION; + msg.size = 0; + + switch (req) { + case VHOST_USER_GET_FEATURES: + need_reply = 1; + break; + + case VHOST_USER_SET_FEATURES: + case VHOST_USER_SET_LOG_BASE: + msg.payload.u64 = *((__u64 *)arg); + msg.size = sizeof(m.payload.u64); + break; + + case VHOST_USER_SET_OWNER: + case VHOST_USER_RESET_OWNER: + break; + + case VHOST_USER_SET_MEM_TABLE: + if (prepare_vhost_memory_user(&msg, fds) < 0) + return -1; + fd_num = msg.payload.memory.nregions; + msg.size = sizeof(m.payload.memory.nregions); + msg.size += sizeof(m.payload.memory.padding); + msg.size += fd_num * sizeof(struct vhost_memory_region); + break; + + case VHOST_USER_SET_LOG_FD: + fds[fd_num++] = *((int *)arg); + break; + + case VHOST_USER_SET_VRING_NUM: + case VHOST_USER_SET_VRING_BASE: + case VHOST_USER_SET_VRING_ENABLE: + memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); + msg.size = sizeof(m.payload.state); + break; + + case VHOST_USER_GET_VRING_BASE: + memcpy(&msg.payload.state, arg, sizeof(msg.payload.state)); + msg.size = sizeof(m.payload.state); + need_reply = 1; + break; + + case VHOST_USER_SET_VRING_ADDR: + memcpy(&msg.payload.addr, arg, sizeof(msg.payload.addr)); + msg.size = sizeof(m.payload.addr); + break; + + case VHOST_USER_SET_VRING_KICK: + case VHOST_USER_SET_VRING_CALL: + case VHOST_USER_SET_VRING_ERR: + file = arg; + msg.payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK; + msg.size = sizeof(m.payload.u64); + if (file->fd > 0) + fds[fd_num++] = file->fd; + else + msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; + break; + + default: + PMD_DRV_LOG(ERR, "trying to send unhandled msg type"); + return -1; + } + + len = VHOST_USER_HDR_SIZE + msg.size; + if (vhost_user_write(vhostfd, &msg, len, fds, fd_num) < 0) { + PMD_DRV_LOG(ERR, "%s failed: %s", + vhost_msg_strings[req], strerror(errno)); + return -1; + } + + if (req == VHOST_USER_SET_MEM_TABLE) + for (i = 0; i < fd_num; ++i) + close(fds[i]); + + if (need_reply) { + if (vhost_user_read(vhostfd, &msg) < 0) { + PMD_DRV_LOG(ERR, "Received msg failed: %s", + strerror(errno)); + return -1; + } + + if (req != msg.request) { + PMD_DRV_LOG(ERR, "Received unexpected msg type"); + return -1; + } + + switch (req) { + case VHOST_USER_GET_FEATURES: + if (msg.size != sizeof(m.payload.u64)) { + PMD_DRV_LOG(ERR, "Received bad msg size"); + return -1; + } + *((__u64 *)arg) = msg.payload.u64; + break; + case VHOST_USER_GET_VRING_BASE: + if (msg.size != sizeof(m.payload.state)) { + PMD_DRV_LOG(ERR, "Received bad msg size"); + return -1; + } + memcpy(arg, &msg.payload.state, + sizeof(struct vhost_vring_state)); + break; + default: + PMD_DRV_LOG(ERR, "Received unexpected msg type"); + return -1; + } + } + + return 0; +} + +/** + * Set up environment to talk with a vhost user backend. + * + * @return + * - (-1) if fail; + * - (0) if succeed. + */ +static int +vhost_user_setup(struct virtio_user_dev *dev) +{ + int fd; + int flag; + struct sockaddr_un un; + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { + PMD_DRV_LOG(ERR, "socket() error, %s", strerror(errno)); + return -1; + } + + flag = fcntl(fd, F_GETFD); + if (fcntl(fd, F_SETFD, flag | FD_CLOEXEC) < 0) + PMD_DRV_LOG(WARNING, "fcntl failed, %s", strerror(errno)); + + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; + snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path); + if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) { + PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno)); + close(fd); + return -1; + } + + dev->vhostfd = fd; + return 0; +} + +static int +vhost_user_enable_queue_pair(struct virtio_user_dev *dev, + uint16_t pair_idx, + int enable) +{ + int i; + + for (i = 0; i < 2; ++i) { + struct vhost_vring_state state = { + .index = pair_idx * 2 + i, + .num = enable, + }; + + if (vhost_user_sock(dev, VHOST_USER_SET_VRING_ENABLE, &state)) + return -1; + } + + return 0; +} + +struct virtio_user_backend_ops ops_user = { + .setup = vhost_user_setup, + .send_request = vhost_user_sock, + .enable_qp = vhost_user_enable_queue_pair +}; diff --git a/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c b/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c new file mode 100644 index 00000000..21ed00d7 --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -0,0 +1,414 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <stdio.h> +#include <fcntl.h> +#include <string.h> +#include <errno.h> +#include <sys/mman.h> +#include <unistd.h> +#include <sys/eventfd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include "vhost.h" +#include "virtio_user_dev.h" +#include "../virtio_ethdev.h" + +static int +virtio_user_create_queue(struct virtio_user_dev *dev, uint32_t queue_sel) +{ + /* Of all per virtqueue MSGs, make sure VHOST_SET_VRING_CALL come + * firstly because vhost depends on this msg to allocate virtqueue + * pair. + */ + int callfd; + struct vhost_vring_file file; + + /* May use invalid flag, but some backend leverages kickfd and callfd as + * criteria to judge if dev is alive. so finally we use real event_fd. + */ + callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (callfd < 0) { + PMD_DRV_LOG(ERR, "callfd error, %s", strerror(errno)); + return -1; + } + file.index = queue_sel; + file.fd = callfd; + dev->ops->send_request(dev, VHOST_USER_SET_VRING_CALL, &file); + dev->callfds[queue_sel] = callfd; + + return 0; +} + +static int +virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) +{ + int kickfd; + struct vhost_vring_file file; + struct vhost_vring_state state; + struct vring *vring = &dev->vrings[queue_sel]; + struct vhost_vring_addr addr = { + .index = queue_sel, + .desc_user_addr = (uint64_t)(uintptr_t)vring->desc, + .avail_user_addr = (uint64_t)(uintptr_t)vring->avail, + .used_user_addr = (uint64_t)(uintptr_t)vring->used, + .log_guest_addr = 0, + .flags = 0, /* disable log */ + }; + + state.index = queue_sel; + state.num = vring->num; + dev->ops->send_request(dev, VHOST_USER_SET_VRING_NUM, &state); + + state.index = queue_sel; + state.num = 0; /* no reservation */ + dev->ops->send_request(dev, VHOST_USER_SET_VRING_BASE, &state); + + dev->ops->send_request(dev, VHOST_USER_SET_VRING_ADDR, &addr); + + /* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes + * lastly because vhost depends on this msg to judge if + * virtio is ready. + */ + kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (kickfd < 0) { + PMD_DRV_LOG(ERR, "kickfd error, %s", strerror(errno)); + return -1; + } + file.index = queue_sel; + file.fd = kickfd; + dev->ops->send_request(dev, VHOST_USER_SET_VRING_KICK, &file); + dev->kickfds[queue_sel] = kickfd; + + return 0; +} + +static int +virtio_user_queue_setup(struct virtio_user_dev *dev, + int (*fn)(struct virtio_user_dev *, uint32_t)) +{ + uint32_t i, queue_sel; + + for (i = 0; i < dev->max_queue_pairs; ++i) { + queue_sel = 2 * i + VTNET_SQ_RQ_QUEUE_IDX; + if (fn(dev, queue_sel) < 0) { + PMD_DRV_LOG(INFO, "setup rx vq fails: %u", i); + return -1; + } + } + for (i = 0; i < dev->max_queue_pairs; ++i) { + queue_sel = 2 * i + VTNET_SQ_TQ_QUEUE_IDX; + if (fn(dev, queue_sel) < 0) { + PMD_DRV_LOG(INFO, "setup tx vq fails: %u", i); + return -1; + } + } + + return 0; +} + +int +virtio_user_start_device(struct virtio_user_dev *dev) +{ + uint64_t features; + int ret; + + /* Step 0: tell vhost to create queues */ + if (virtio_user_queue_setup(dev, virtio_user_create_queue) < 0) + goto error; + + /* Step 1: set features */ + features = dev->features; + /* Strip VIRTIO_NET_F_MAC, as MAC address is handled in vdev init */ + features &= ~(1ull << VIRTIO_NET_F_MAC); + /* Strip VIRTIO_NET_F_CTRL_VQ, as devices do not really need to know */ + features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); + ret = dev->ops->send_request(dev, VHOST_USER_SET_FEATURES, &features); + if (ret < 0) + goto error; + PMD_DRV_LOG(INFO, "set features: %" PRIx64, features); + + /* Step 2: share memory regions */ + ret = dev->ops->send_request(dev, VHOST_USER_SET_MEM_TABLE, NULL); + if (ret < 0) + goto error; + + /* Step 3: kick queues */ + if (virtio_user_queue_setup(dev, virtio_user_kick_queue) < 0) + goto error; + + /* Step 4: enable queues + * we enable the 1st queue pair by default. + */ + dev->ops->enable_qp(dev, 0, 1); + + return 0; +error: + /* TODO: free resource here or caller to check */ + return -1; +} + +int virtio_user_stop_device(struct virtio_user_dev *dev) +{ + uint32_t i; + + for (i = 0; i < dev->max_queue_pairs * 2; ++i) { + close(dev->callfds[i]); + close(dev->kickfds[i]); + } + + for (i = 0; i < dev->max_queue_pairs; ++i) + dev->ops->enable_qp(dev, i, 0); + + free(dev->ifname); + dev->ifname = NULL; + + return 0; +} + +static inline void +parse_mac(struct virtio_user_dev *dev, const char *mac) +{ + int i, r; + uint32_t tmp[ETHER_ADDR_LEN]; + + if (!mac) + return; + + r = sscanf(mac, "%x:%x:%x:%x:%x:%x", &tmp[0], + &tmp[1], &tmp[2], &tmp[3], &tmp[4], &tmp[5]); + if (r == ETHER_ADDR_LEN) { + for (i = 0; i < ETHER_ADDR_LEN; ++i) + dev->mac_addr[i] = (uint8_t)tmp[i]; + dev->mac_specified = 1; + } else { + /* ignore the wrong mac, use random mac */ + PMD_DRV_LOG(ERR, "wrong format of mac: %s", mac); + } +} + +static int +is_vhost_user_by_type(const char *path) +{ + struct stat sb; + + if (stat(path, &sb) == -1) + return 0; + + return S_ISSOCK(sb.st_mode); +} + +static int +virtio_user_dev_setup(struct virtio_user_dev *dev) +{ + uint32_t i, q; + + dev->vhostfd = -1; + for (i = 0; i < VIRTIO_MAX_VIRTQUEUES * 2 + 1; ++i) { + dev->kickfds[i] = -1; + dev->callfds[i] = -1; + } + + dev->vhostfds = NULL; + dev->tapfds = NULL; + + if (is_vhost_user_by_type(dev->path)) { + dev->ops = &ops_user; + } else { + dev->ops = &ops_kernel; + + dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int)); + dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int)); + if (!dev->vhostfds || !dev->tapfds) { + PMD_INIT_LOG(ERR, "Failed to malloc"); + return -1; + } + + for (q = 0; q < dev->max_queue_pairs; ++q) { + dev->vhostfds[q] = -1; + dev->tapfds[q] = -1; + } + } + + return dev->ops->setup(dev); +} + +int +virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, + int cq, int queue_size, const char *mac) +{ + snprintf(dev->path, PATH_MAX, "%s", path); + dev->max_queue_pairs = queues; + dev->queue_pairs = 1; /* mq disabled by default */ + dev->queue_size = queue_size; + dev->mac_specified = 0; + parse_mac(dev, mac); + + if (virtio_user_dev_setup(dev) < 0) { + PMD_INIT_LOG(ERR, "backend set up fails"); + return -1; + } + if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL) < 0) { + PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno)); + return -1; + } + + if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES, + &dev->device_features) < 0) { + PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno)); + return -1; + } + if (dev->mac_specified) + dev->device_features |= (1ull << VIRTIO_NET_F_MAC); + + if (cq) { + /* device does not really need to know anything about CQ, + * so if necessary, we just claim to support CQ + */ + dev->device_features |= (1ull << VIRTIO_NET_F_CTRL_VQ); + } else { + dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); + /* Also disable features depends on VIRTIO_NET_F_CTRL_VQ */ + dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_RX); + dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_VLAN); + dev->device_features &= ~(1ull << VIRTIO_NET_F_GUEST_ANNOUNCE); + dev->device_features &= ~(1ull << VIRTIO_NET_F_MQ); + dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR); + } + + return 0; +} + +void +virtio_user_dev_uninit(struct virtio_user_dev *dev) +{ + uint32_t i; + + virtio_user_stop_device(dev); + + close(dev->vhostfd); + + if (dev->vhostfds) { + for (i = 0; i < dev->max_queue_pairs; ++i) + close(dev->vhostfds[i]); + free(dev->vhostfds); + free(dev->tapfds); + } +} + +static uint8_t +virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs) +{ + uint16_t i; + uint8_t ret = 0; + + if (q_pairs > dev->max_queue_pairs) { + PMD_INIT_LOG(ERR, "multi-q config %u, but only %u supported", + q_pairs, dev->max_queue_pairs); + return -1; + } + + for (i = 0; i < q_pairs; ++i) + ret |= dev->ops->enable_qp(dev, i, 1); + for (i = q_pairs; i < dev->max_queue_pairs; ++i) + ret |= dev->ops->enable_qp(dev, i, 0); + + dev->queue_pairs = q_pairs; + + return ret; +} + +static uint32_t +virtio_user_handle_ctrl_msg(struct virtio_user_dev *dev, struct vring *vring, + uint16_t idx_hdr) +{ + struct virtio_net_ctrl_hdr *hdr; + virtio_net_ctrl_ack status = ~0; + uint16_t i, idx_data, idx_status; + uint32_t n_descs = 0; + + /* locate desc for header, data, and status */ + idx_data = vring->desc[idx_hdr].next; + n_descs++; + + i = idx_data; + while (vring->desc[i].flags == VRING_DESC_F_NEXT) { + i = vring->desc[i].next; + n_descs++; + } + + /* locate desc for status */ + idx_status = i; + n_descs++; + + hdr = (void *)(uintptr_t)vring->desc[idx_hdr].addr; + if (hdr->class == VIRTIO_NET_CTRL_MQ && + hdr->cmd == VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) { + uint16_t queues; + + queues = *(uint16_t *)(uintptr_t)vring->desc[idx_data].addr; + status = virtio_user_handle_mq(dev, queues); + } + + /* Update status */ + *(virtio_net_ctrl_ack *)(uintptr_t)vring->desc[idx_status].addr = status; + + return n_descs; +} + +void +virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx) +{ + uint16_t avail_idx, desc_idx; + struct vring_used_elem *uep; + uint32_t n_descs; + struct vring *vring = &dev->vrings[queue_idx]; + + /* Consume avail ring, using used ring idx as first one */ + while (vring->used->idx != vring->avail->idx) { + avail_idx = (vring->used->idx) & (vring->num - 1); + desc_idx = vring->avail->ring[avail_idx]; + + n_descs = virtio_user_handle_ctrl_msg(dev, vring, desc_idx); + + /* Update used ring */ + uep = &vring->used->ring[avail_idx]; + uep->id = avail_idx; + uep->len = n_descs; + + vring->used->idx++; + } +} diff --git a/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h b/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h new file mode 100644 index 00000000..0d39f40c --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user/virtio_user_dev.h @@ -0,0 +1,75 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_USER_DEV_H +#define _VIRTIO_USER_DEV_H + +#include <limits.h> +#include "../virtio_pci.h" +#include "../virtio_ring.h" +#include "vhost.h" + +struct virtio_user_dev { + /* for vhost_user backend */ + int vhostfd; + + /* for vhost_kernel backend */ + char *ifname; + int *vhostfds; + int *tapfds; + + /* for both vhost_user and vhost_kernel */ + int callfds[VIRTIO_MAX_VIRTQUEUES * 2 + 1]; + int kickfds[VIRTIO_MAX_VIRTQUEUES * 2 + 1]; + int mac_specified; + uint32_t max_queue_pairs; + uint32_t queue_pairs; + uint32_t queue_size; + uint64_t features; /* the negotiated features with driver, + * and will be sync with device + */ + uint64_t device_features; /* supported features by device */ + uint8_t status; + uint8_t mac_addr[ETHER_ADDR_LEN]; + char path[PATH_MAX]; + struct vring vrings[VIRTIO_MAX_VIRTQUEUES * 2 + 1]; + struct virtio_user_backend_ops *ops; +}; + +int virtio_user_start_device(struct virtio_user_dev *dev); +int virtio_user_stop_device(struct virtio_user_dev *dev); +int virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, + int cq, int queue_size, const char *mac); +void virtio_user_dev_uninit(struct virtio_user_dev *dev); +void virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx); +#endif diff --git a/src/dpdk/drivers/net/virtio/virtio_user_ethdev.c b/src/dpdk/drivers/net/virtio/virtio_user_ethdev.c index daef09bd..0b226ac7 100644 --- a/src/dpdk/drivers/net/virtio/virtio_user_ethdev.c +++ b/src/dpdk/drivers/net/virtio/virtio_user_ethdev.c @@ -37,6 +37,7 @@ #include <rte_malloc.h> #include <rte_kvargs.h> +#include <rte_vdev.h> #include "virtio_ethdev.h" #include "virtio_logs.h" @@ -81,26 +82,29 @@ virtio_user_write_dev_config(struct virtio_hw *hw, size_t offset, for (i = 0; i < ETHER_ADDR_LEN; ++i) dev->mac_addr[i] = ((const uint8_t *)src)[i]; else - PMD_DRV_LOG(ERR, "not supported offset=%zu, len=%d\n", + PMD_DRV_LOG(ERR, "not supported offset=%zu, len=%d", offset, length); } static void -virtio_user_set_status(struct virtio_hw *hw, uint8_t status) +virtio_user_reset(struct virtio_hw *hw) { struct virtio_user_dev *dev = virtio_user_get_dev(hw); - if (status & VIRTIO_CONFIG_STATUS_DRIVER_OK) - virtio_user_start_device(dev); - dev->status = status; + if (dev->status & VIRTIO_CONFIG_STATUS_DRIVER_OK) + virtio_user_stop_device(dev); } static void -virtio_user_reset(struct virtio_hw *hw) +virtio_user_set_status(struct virtio_hw *hw, uint8_t status) { struct virtio_user_dev *dev = virtio_user_get_dev(hw); - virtio_user_stop_device(dev); + if (status & VIRTIO_CONFIG_STATUS_DRIVER_OK) + virtio_user_start_device(dev); + else if (status == VIRTIO_CONFIG_STATUS_RESET) + virtio_user_reset(hw); + dev->status = status; } static uint8_t @@ -116,7 +120,8 @@ virtio_user_get_features(struct virtio_hw *hw) { struct virtio_user_dev *dev = virtio_user_get_dev(hw); - return dev->features; + /* unmask feature bits defined in vhost user protocol */ + return dev->device_features & VIRTIO_PMD_SUPPORTED_GUEST_FEATURES; } static void @@ -124,7 +129,7 @@ virtio_user_set_features(struct virtio_hw *hw, uint64_t features) { struct virtio_user_dev *dev = virtio_user_get_dev(hw); - dev->features = features; + dev->features = features & dev->device_features; } static uint8_t @@ -207,11 +212,11 @@ virtio_user_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) } if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0) - PMD_DRV_LOG(ERR, "failed to kick backend: %s\n", + PMD_DRV_LOG(ERR, "failed to kick backend: %s", strerror(errno)); } -static const struct virtio_pci_ops virtio_user_ops = { +const struct virtio_pci_ops virtio_user_ops = { .read_dev_cfg = virtio_user_read_dev_config, .write_dev_cfg = virtio_user_write_dev_config, .reset = virtio_user_reset, @@ -269,6 +274,8 @@ get_integer_arg(const char *key __rte_unused, return 0; } +static struct rte_vdev_driver virtio_user_driver; + static struct rte_eth_dev * virtio_user_eth_dev_alloc(const char *name) { @@ -277,7 +284,7 @@ virtio_user_eth_dev_alloc(const char *name) struct virtio_hw *hw; struct virtio_user_dev *dev; - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + eth_dev = rte_eth_dev_allocate(name); if (!eth_dev) { PMD_INIT_LOG(ERR, "cannot alloc rte_eth_dev"); return NULL; @@ -300,25 +307,38 @@ virtio_user_eth_dev_alloc(const char *name) return NULL; } - hw->vtpci_ops = &virtio_user_ops; + hw->port_id = data->port_id; + virtio_hw_internal[hw->port_id].vtpci_ops = &virtio_user_ops; hw->use_msix = 0; hw->modern = 0; + hw->use_simple_rxtx = 0; hw->virtio_user_dev = dev; data->dev_private = hw; + data->drv_name = virtio_user_driver.driver.name; data->numa_node = SOCKET_ID_ANY; data->kdrv = RTE_KDRV_NONE; data->dev_flags = RTE_ETH_DEV_DETACHABLE; - eth_dev->pci_dev = NULL; eth_dev->driver = NULL; return eth_dev; } +static void +virtio_user_eth_dev_free(struct rte_eth_dev *eth_dev) +{ + struct rte_eth_dev_data *data = eth_dev->data; + struct virtio_hw *hw = data->dev_private; + + rte_free(hw->virtio_user_dev); + rte_free(hw); + rte_eth_dev_release_port(eth_dev); +} + /* Dev initialization routine. Invoked once for each virtio vdev at * EAL init time, see rte_eal_dev_init(). * Returns 0 on success. */ static int -virtio_user_pmd_devinit(const char *name, const char *params) +virtio_user_pmd_probe(const char *name, const char *params) { struct rte_kvargs *kvlist = NULL; struct rte_eth_dev *eth_dev; @@ -343,23 +363,21 @@ virtio_user_pmd_devinit(const char *name, const char *params) } if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_PATH) == 1) { - ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_PATH, - &get_string_arg, &path); - if (ret < 0) { + if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_PATH, + &get_string_arg, &path) < 0) { PMD_INIT_LOG(ERR, "error to parse %s", VIRTIO_USER_ARG_PATH); goto end; } } else { - PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user\n", + PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user", VIRTIO_USER_ARG_QUEUE_SIZE); goto end; } if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_MAC) == 1) { - ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_MAC, - &get_string_arg, &mac_addr); - if (ret < 0) { + if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_MAC, + &get_string_arg, &mac_addr) < 0) { PMD_INIT_LOG(ERR, "error to parse %s", VIRTIO_USER_ARG_MAC); goto end; @@ -367,9 +385,8 @@ virtio_user_pmd_devinit(const char *name, const char *params) } if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE) == 1) { - ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE, - &get_integer_arg, &queue_size); - if (ret < 0) { + if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE, + &get_integer_arg, &queue_size) < 0) { PMD_INIT_LOG(ERR, "error to parse %s", VIRTIO_USER_ARG_QUEUE_SIZE); goto end; @@ -377,9 +394,8 @@ virtio_user_pmd_devinit(const char *name, const char *params) } if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUES_NUM) == 1) { - ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUES_NUM, - &get_integer_arg, &queues); - if (ret < 0) { + if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUES_NUM, + &get_integer_arg, &queues) < 0) { PMD_INIT_LOG(ERR, "error to parse %s", VIRTIO_USER_ARG_QUEUES_NUM); goto end; @@ -387,9 +403,8 @@ virtio_user_pmd_devinit(const char *name, const char *params) } if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) { - ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM, - &get_integer_arg, &cq); - if (ret < 0) { + if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM, + &get_integer_arg, &cq) < 0) { PMD_INIT_LOG(ERR, "error to parse %s", VIRTIO_USER_ARG_CQ_NUM); goto end; @@ -411,12 +426,16 @@ virtio_user_pmd_devinit(const char *name, const char *params) hw = eth_dev->data->dev_private; if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq, - queue_size, mac_addr) < 0) + queue_size, mac_addr) < 0) { + PMD_INIT_LOG(ERR, "virtio_user_dev_init fails"); + virtio_user_eth_dev_free(eth_dev); goto end; + } /* previously called by rte_eal_pci_probe() for physical dev */ if (eth_virtio_dev_init(eth_dev) < 0) { PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails"); + virtio_user_eth_dev_free(eth_dev); goto end; } ret = 0; @@ -433,7 +452,7 @@ end: /** Called by rte_eth_dev_detach() */ static int -virtio_user_pmd_devuninit(const char *name) +virtio_user_pmd_remove(const char *name) { struct rte_eth_dev *eth_dev; struct virtio_hw *hw; @@ -442,7 +461,7 @@ virtio_user_pmd_devuninit(const char *name) if (!name) return -EINVAL; - PMD_DRV_LOG(INFO, "Un-Initializing %s\n", name); + PMD_DRV_LOG(INFO, "Un-Initializing %s", name); eth_dev = rte_eth_dev_allocated(name); if (!eth_dev) return -ENODEV; @@ -461,14 +480,14 @@ virtio_user_pmd_devuninit(const char *name) return 0; } -static struct rte_driver virtio_user_driver = { - .type = PMD_VDEV, - .init = virtio_user_pmd_devinit, - .uninit = virtio_user_pmd_devuninit, +static struct rte_vdev_driver virtio_user_driver = { + .probe = virtio_user_pmd_probe, + .remove = virtio_user_pmd_remove, }; -PMD_REGISTER_DRIVER(virtio_user_driver, virtio_user); -DRIVER_REGISTER_PARAM_STRING(virtio_user, +RTE_PMD_REGISTER_VDEV(net_virtio_user, virtio_user_driver); +RTE_PMD_REGISTER_ALIAS(net_virtio_user, virtio_user); +RTE_PMD_REGISTER_PARAM_STRING(net_virtio_user, "path=<path> " "mac=<mac addr> " "cq=<int> " diff --git a/src/dpdk/drivers/net/virtio/virtqueue.c b/src/dpdk/drivers/net/virtio/virtqueue.c index 7f60e3ef..9ad77b8a 100644 --- a/src/dpdk/drivers/net/virtio/virtqueue.c +++ b/src/dpdk/drivers/net/virtio/virtqueue.c @@ -38,17 +38,6 @@ #include "virtio_logs.h" #include "virtio_pci.h" -void -virtqueue_disable_intr(struct virtqueue *vq) -{ - /* - * Set VRING_AVAIL_F_NO_INTERRUPT to hint host - * not to interrupt when it consumes packets - * Note: this is only considered a hint to the host - */ - vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; -} - /* * Two types of mbuf to be cleaned: * 1) mbuf that has been consumed by backend but not used by virtio. diff --git a/src/dpdk/drivers/net/virtio/virtqueue.h b/src/dpdk/drivers/net/virtio/virtqueue.h index 6737b81d..f9e37367 100644 --- a/src/dpdk/drivers/net/virtio/virtqueue.h +++ b/src/dpdk/drivers/net/virtio/virtqueue.h @@ -44,6 +44,7 @@ #include "virtio_pci.h" #include "virtio_ring.h" #include "virtio_logs.h" +#include "virtio_rxtx.h" struct rte_mbuf; @@ -191,6 +192,12 @@ struct virtqueue { void *vq_ring_virt_mem; /**< linear address of vring*/ unsigned int vq_ring_size; + union { + struct virtnet_rx rxq; + struct virtnet_tx txq; + struct virtnet_ctl cq; + }; + phys_addr_t vq_ring_mem; /**< physical address of vring, * or virtual address for virtio_user. */ @@ -204,7 +211,6 @@ struct virtqueue { uint16_t vq_queue_index; /**< PCI queue index */ uint16_t offset; /**< relative offset to obtain addr in mbuf */ uint16_t *notify_addr; - int configured; struct rte_mbuf **sw_ring; /**< RX software ring. */ struct vq_desc_extra vq_descx[0]; }; @@ -223,6 +229,7 @@ struct virtqueue { */ struct virtio_net_hdr { #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /**< Use csum_start,csum_offset*/ +#define VIRTIO_NET_HDR_F_DATA_VALID 2 /**< Checksum is valid */ uint8_t flags; #define VIRTIO_NET_HDR_GSO_NONE 0 /**< Not a GSO frame */ #define VIRTIO_NET_HDR_GSO_TCPV4 1 /**< GSO frame, IPv4 TCP (TSO) */ @@ -267,7 +274,21 @@ vring_desc_init(struct vring_desc *dp, uint16_t n) /** * Tell the backend not to interrupt us. */ -void virtqueue_disable_intr(struct virtqueue *vq); +static inline void +virtqueue_disable_intr(struct virtqueue *vq) +{ + vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; +} + +/** + * Tell the backend to interrupt us. + */ +static inline void +virtqueue_enable_intr(struct virtqueue *vq) +{ + vq->vq_ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT); +} + /** * Dump virtqueue internal structures, for debug purpose only. */ @@ -323,7 +344,7 @@ virtqueue_notify(struct virtqueue *vq) * For virtio on IA, the notificaiton is through io port operation * which is a serialization instruction itself. */ - vq->hw->vtpci_ops->notify_queue(vq->hw, vq); + VTPCI_OPS(vq->hw)->notify_queue(vq->hw, vq); } #ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP diff --git a/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c b/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c index 47fdc3ec..ff63a536 100644 --- a/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c +++ b/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.c @@ -69,6 +69,8 @@ #define PROCESS_SYS_EVENTS 0 +#define VMXNET3_TX_MAX_SEG UINT8_MAX + static int eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev); static int eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev); static int vmxnet3_dev_configure(struct rte_eth_dev *dev); @@ -81,11 +83,11 @@ static void vmxnet3_dev_promiscuous_disable(struct rte_eth_dev *dev); static void vmxnet3_dev_allmulticast_enable(struct rte_eth_dev *dev); static void vmxnet3_dev_allmulticast_disable(struct rte_eth_dev *dev); static int vmxnet3_dev_link_update(struct rte_eth_dev *dev, - int wait_to_complete); + int wait_to_complete); static void vmxnet3_dev_stats_get(struct rte_eth_dev *dev, - struct rte_eth_stats *stats); + struct rte_eth_stats *stats); static void vmxnet3_dev_info_get(struct rte_eth_dev *dev, - struct rte_eth_dev_info *dev_info); + struct rte_eth_dev_info *dev_info); static const uint32_t * vmxnet3_dev_supported_ptypes_get(struct rte_eth_dev *dev); static int vmxnet3_dev_vlan_filter_set(struct rte_eth_dev *dev, @@ -118,7 +120,7 @@ static const struct eth_dev_ops vmxnet3_eth_dev_ops = { .allmulticast_disable = vmxnet3_dev_allmulticast_disable, .link_update = vmxnet3_dev_link_update, .stats_get = vmxnet3_dev_stats_get, - .mac_addr_set = vmxnet3_mac_addr_set, + .mac_addr_set = vmxnet3_mac_addr_set, .dev_infos_get = vmxnet3_dev_info_get, .dev_supported_ptypes_get = vmxnet3_dev_supported_ptypes_get, .vlan_filter_set = vmxnet3_dev_vlan_filter_set, @@ -131,20 +133,27 @@ static const struct eth_dev_ops vmxnet3_eth_dev_ops = { static const struct rte_memzone * gpa_zone_reserve(struct rte_eth_dev *dev, uint32_t size, - const char *post_string, int socket_id, uint16_t align) + const char *post_string, int socket_id, + uint16_t align, bool reuse) { char z_name[RTE_MEMZONE_NAMESIZE]; const struct rte_memzone *mz; snprintf(z_name, sizeof(z_name), "%s_%d_%s", - dev->driver->pci_drv.name, dev->data->port_id, post_string); + dev->data->drv_name, dev->data->port_id, post_string); mz = rte_memzone_lookup(z_name); + if (!reuse) { + if (mz) + rte_memzone_free(mz); + return rte_memzone_reserve_aligned(z_name, size, socket_id, + 0, align); + } + if (mz) return mz; - return rte_memzone_reserve_aligned(z_name, size, - socket_id, 0, align); + return rte_memzone_reserve_aligned(z_name, size, socket_id, 0, align); } /** @@ -194,7 +203,7 @@ vmxnet3_dev_atomic_write_link_status(struct rte_eth_dev *dev, struct rte_eth_link *src = link; if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst, - *(uint64_t *)src) == 0) + *(uint64_t *)src) == 0) return -1; return 0; @@ -212,7 +221,7 @@ vmxnet3_disable_intr(struct vmxnet3_hw *hw) hw->shared->devRead.intrConf.intrCtrl |= VMXNET3_IC_DISABLE_ALL; for (i = 0; i < VMXNET3_MAX_INTRS; i++) - VMXNET3_WRITE_BAR0_REG(hw, VMXNET3_REG_IMR + i * 8, 1); + VMXNET3_WRITE_BAR0_REG(hw, VMXNET3_REG_IMR + i * 8, 1); } /* @@ -230,7 +239,8 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = &vmxnet3_eth_dev_ops; eth_dev->rx_pkt_burst = &vmxnet3_recv_pkts; eth_dev->tx_pkt_burst = &vmxnet3_xmit_pkts; - pci_dev = eth_dev->pci_dev; + eth_dev->tx_pkt_prepare = vmxnet3_prep_pkts; + pci_dev = RTE_DEV_TO_PCI(eth_dev->device); /* * for secondary processes, we don't initialize any further as primary @@ -240,6 +250,7 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev) return 0; rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; /* Vendor and Device ID need to be set before init of shared code */ hw->device_id = pci_dev->id.device_id; @@ -274,8 +285,8 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev) /* Getting MAC Address */ mac_lo = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_MACL); mac_hi = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_MACH); - memcpy(hw->perm_addr , &mac_lo, 4); - memcpy(hw->perm_addr+4, &mac_hi, 2); + memcpy(hw->perm_addr, &mac_lo, 4); + memcpy(hw->perm_addr + 4, &mac_hi, 2); /* Allocate memory for storing MAC addresses */ eth_dev->data->mac_addrs = rte_zmalloc("vmxnet3", ETHER_ADDR_LEN * @@ -319,6 +330,7 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev) eth_dev->dev_ops = NULL; eth_dev->rx_pkt_burst = NULL; eth_dev->tx_pkt_burst = NULL; + eth_dev->tx_pkt_prepare = NULL; rte_free(eth_dev->data->mac_addrs); eth_dev->data->mac_addrs = NULL; @@ -328,29 +340,16 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev) static struct eth_driver rte_vmxnet3_pmd = { .pci_drv = { - .name = "rte_vmxnet3_pmd", .id_table = pci_id_vmxnet3_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_DETACHABLE, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING, + .probe = rte_eth_dev_pci_probe, + .remove = rte_eth_dev_pci_remove, }, .eth_dev_init = eth_vmxnet3_dev_init, .eth_dev_uninit = eth_vmxnet3_dev_uninit, .dev_private_size = sizeof(struct vmxnet3_hw), }; -/* - * Driver initialization routine. - * Invoked once at EAL init time. - * Register itself as the [Poll Mode] Driver of Virtual PCI VMXNET3 devices. - */ -static int -rte_vmxnet3_pmd_init(const char *name __rte_unused, const char *param __rte_unused) -{ - PMD_INIT_FUNC_TRACE(); - - rte_eth_driver_register(&rte_vmxnet3_pmd); - return 0; -} - static int vmxnet3_dev_configure(struct rte_eth_dev *dev) { @@ -360,9 +359,16 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev) PMD_INIT_FUNC_TRACE(); - if (dev->data->nb_rx_queues > UINT8_MAX || - dev->data->nb_tx_queues > UINT8_MAX) + if (dev->data->nb_tx_queues > VMXNET3_MAX_TX_QUEUES || + dev->data->nb_rx_queues > VMXNET3_MAX_RX_QUEUES) { + PMD_INIT_LOG(ERR, "ERROR: Number of queues not supported"); return -EINVAL; + } + + if (!rte_is_power_of_2(dev->data->nb_rx_queues)) { + PMD_INIT_LOG(ERR, "ERROR: Number of rx queues not power of 2"); + return -EINVAL; + } size = dev->data->nb_rx_queues * sizeof(struct Vmxnet3_TxQueueDesc) + dev->data->nb_tx_queues * sizeof(struct Vmxnet3_RxQueueDesc); @@ -378,7 +384,7 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev) * on current socket */ mz = gpa_zone_reserve(dev, sizeof(struct Vmxnet3_DriverShared), - "shared", rte_socket_id(), 8); + "shared", rte_socket_id(), 8, 1); if (mz == NULL) { PMD_INIT_LOG(ERR, "ERROR: Creating shared zone"); @@ -391,10 +397,14 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev) /* * Allocate a memzone for Vmxnet3_RxQueueDesc - Vmxnet3_TxQueueDesc - * on current socket + * on current socket. + * + * We cannot reuse this memzone from previous allocation as its size + * depends on the number of tx and rx queues, which could be different + * from one config to another. */ - mz = gpa_zone_reserve(dev, size, "queuedesc", - rte_socket_id(), VMXNET3_QUEUE_DESC_ALIGN); + mz = gpa_zone_reserve(dev, size, "queuedesc", rte_socket_id(), + VMXNET3_QUEUE_DESC_ALIGN, 0); if (mz == NULL) { PMD_INIT_LOG(ERR, "ERROR: Creating queue descriptors zone"); return -ENOMEM; @@ -408,10 +418,10 @@ vmxnet3_dev_configure(struct rte_eth_dev *dev) hw->queue_desc_len = (uint16_t)size; if (dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) { - /* Allocate memory structure for UPT1_RSSConf and configure */ - mz = gpa_zone_reserve(dev, sizeof(struct VMXNET3_RSSConf), "rss_conf", - rte_socket_id(), RTE_CACHE_LINE_SIZE); + mz = gpa_zone_reserve(dev, sizeof(struct VMXNET3_RSSConf), + "rss_conf", rte_socket_id(), + RTE_CACHE_LINE_SIZE, 1); if (mz == NULL) { PMD_INIT_LOG(ERR, "ERROR: Creating rss_conf structure zone"); @@ -459,8 +469,7 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev) /* Setting up Guest OS information */ devRead->misc.driverInfo.gos.gosBits = sizeof(void *) == 4 ? - VMXNET3_GOS_BITS_32 : - VMXNET3_GOS_BITS_64; + VMXNET3_GOS_BITS_32 : VMXNET3_GOS_BITS_64; devRead->misc.driverInfo.gos.gosType = VMXNET3_GOS_TYPE_LINUX; devRead->misc.driverInfo.vmxnet3RevSpt = 1; devRead->misc.driverInfo.uptVerSpt = 1; @@ -523,6 +532,11 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev) if (dev->data->dev_conf.rxmode.hw_ip_checksum) devRead->misc.uptFeatures |= VMXNET3_F_RXCSUM; + if (dev->data->dev_conf.rxmode.enable_lro) { + devRead->misc.uptFeatures |= VMXNET3_F_LRO; + devRead->misc.maxNumRxSG = 0; + } + if (port_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) { ret = vmxnet3_rss_configure(dev); if (ret != VMXNET3_SUCCESS) @@ -535,7 +549,7 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev) } vmxnet3_dev_vlan_offload_set(dev, - ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK); + ETH_VLAN_STRIP_MASK | ETH_VLAN_FILTER_MASK); vmxnet3_write_mac(hw, hw->perm_addr); @@ -550,7 +564,7 @@ vmxnet3_setup_driver_shared(struct rte_eth_dev *dev) static int vmxnet3_dev_start(struct rte_eth_dev *dev) { - int status, ret; + int ret; struct vmxnet3_hw *hw = dev->data->dev_private; PMD_INIT_FUNC_TRACE(); @@ -567,11 +581,11 @@ vmxnet3_dev_start(struct rte_eth_dev *dev) /* Activate device by register write */ VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_ACTIVATE_DEV); - status = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD); + ret = VMXNET3_READ_BAR1_REG(hw, VMXNET3_REG_CMD); - if (status != 0) { + if (ret != 0) { PMD_INIT_LOG(ERR, "Device activation: UNSUCCESSFUL"); - return -1; + return -EINVAL; } /* Disable interrupts */ @@ -583,7 +597,7 @@ vmxnet3_dev_start(struct rte_eth_dev *dev) */ ret = vmxnet3_dev_rxtx_init(dev); if (ret != VMXNET3_SUCCESS) { - PMD_INIT_LOG(ERR, "Device receive init: UNSUCCESSFUL"); + PMD_INIT_LOG(ERR, "Device queue init: UNSUCCESSFUL"); return ret; } @@ -598,7 +612,7 @@ vmxnet3_dev_start(struct rte_eth_dev *dev) PMD_INIT_LOG(DEBUG, "Reading events: 0x%X", events); vmxnet3_process_events(hw); #endif - return status; + return VMXNET3_SUCCESS; } /* @@ -664,16 +678,15 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) struct UPT1_TxStats *txStats = &hw->tqd_start[i].stats; stats->q_opackets[i] = txStats->ucastPktsTxOK + - txStats->mcastPktsTxOK + - txStats->bcastPktsTxOK; + txStats->mcastPktsTxOK + + txStats->bcastPktsTxOK; stats->q_obytes[i] = txStats->ucastBytesTxOK + - txStats->mcastBytesTxOK + - txStats->bcastBytesTxOK; + txStats->mcastBytesTxOK + + txStats->bcastBytesTxOK; stats->opackets += stats->q_opackets[i]; stats->obytes += stats->q_obytes[i]; - stats->oerrors += txStats->pktsTxError + - txStats->pktsTxDiscard; + stats->oerrors += txStats->pktsTxError + txStats->pktsTxDiscard; } RTE_BUILD_BUG_ON(RTE_ETHDEV_QUEUE_STAT_CNTRS < VMXNET3_MAX_RX_QUEUES); @@ -681,12 +694,12 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) struct UPT1_RxStats *rxStats = &hw->rqd_start[i].stats; stats->q_ipackets[i] = rxStats->ucastPktsRxOK + - rxStats->mcastPktsRxOK + - rxStats->bcastPktsRxOK; + rxStats->mcastPktsRxOK + + rxStats->bcastPktsRxOK; stats->q_ibytes[i] = rxStats->ucastBytesRxOK + - rxStats->mcastBytesRxOK + - rxStats->bcastBytesRxOK; + rxStats->mcastBytesRxOK + + rxStats->bcastBytesRxOK; stats->ipackets += stats->q_ipackets[i]; stats->ibytes += stats->q_ibytes[i]; @@ -698,16 +711,17 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) } static void -vmxnet3_dev_info_get(__attribute__((unused))struct rte_eth_dev *dev, +vmxnet3_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { + dev_info->pci_dev = RTE_DEV_TO_PCI(dev->device); + dev_info->max_rx_queues = VMXNET3_MAX_RX_QUEUES; dev_info->max_tx_queues = VMXNET3_MAX_TX_QUEUES; dev_info->min_rx_bufsize = 1518 + RTE_PKTMBUF_HEADROOM; dev_info->max_rx_pktlen = 16384; /* includes CRC, cf MAXFRS register */ - dev_info->max_mac_addrs = VMXNET3_MAX_MAC_ADDRS; - /* TRex patch */ dev_info->speed_capa = ETH_LINK_SPEED_10G; + dev_info->max_mac_addrs = VMXNET3_MAX_MAC_ADDRS; dev_info->default_txconf.txq_flags = ETH_TXQ_FLAGS_NOXSUMSCTP; dev_info->flow_type_rss_offloads = VMXNET3_RSS_OFFLOAD_ALL; @@ -722,12 +736,15 @@ vmxnet3_dev_info_get(__attribute__((unused))struct rte_eth_dev *dev, .nb_max = VMXNET3_TX_RING_MAX_SIZE, .nb_min = VMXNET3_DEF_TX_RING_SIZE, .nb_align = 1, + .nb_seg_max = VMXNET3_TX_MAX_SEG, + .nb_mtu_seg_max = VMXNET3_MAX_TXD_PER_PKT, }; dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP | DEV_RX_OFFLOAD_UDP_CKSUM | - DEV_RX_OFFLOAD_TCP_CKSUM; + DEV_RX_OFFLOAD_TCP_CKSUM | + DEV_RX_OFFLOAD_TCP_LRO; dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT | @@ -760,14 +777,16 @@ vmxnet3_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr) /* return 0 means link status changed, -1 means not changed */ static int -vmxnet3_dev_link_update(struct rte_eth_dev *dev, __attribute__((unused)) int wait_to_complete) +vmxnet3_dev_link_update(struct rte_eth_dev *dev, + __rte_unused int wait_to_complete) { struct vmxnet3_hw *hw = dev->data->dev_private; struct rte_eth_link old, link; uint32_t ret; + /* Link status doesn't change for stopped dev */ if (dev->data->dev_started == 0) - return -1; /* Link status doesn't change for stopped dev */ + return -1; memset(&link, 0, sizeof(link)); vmxnet3_dev_atomic_read_link_status(dev, &old); @@ -789,8 +808,8 @@ vmxnet3_dev_link_update(struct rte_eth_dev *dev, __attribute__((unused)) int wai /* Updating rxmode through Vmxnet3_DriverShared structure in adapter */ static void -vmxnet3_dev_set_rxmode(struct vmxnet3_hw *hw, uint32_t feature, int set) { - +vmxnet3_dev_set_rxmode(struct vmxnet3_hw *hw, uint32_t feature, int set) +{ struct Vmxnet3_RxFilterConf *rxConf = &hw->shared->devRead.rxFilterConf; if (set) @@ -923,11 +942,13 @@ vmxnet3_process_events(struct vmxnet3_hw *hw) /* Check if link state has changed */ if (events & VMXNET3_ECR_LINK) PMD_INIT_LOG(ERR, - "Process events in %s(): VMXNET3_ECR_LINK event", __func__); + "Process events in %s(): VMXNET3_ECR_LINK event", + __func__); /* Check if there is an error on xmit/recv queues */ if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) { - VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_GET_QUEUE_STATUS); + VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, + VMXNET3_CMD_GET_QUEUE_STATUS); if (hw->tqd_start->status.stopped) PMD_INIT_LOG(ERR, "tq error 0x%x", @@ -946,14 +967,9 @@ vmxnet3_process_events(struct vmxnet3_hw *hw) if (events & VMXNET3_ECR_DEBUG) PMD_INIT_LOG(ERR, "Debug event generated by device."); - } #endif -static struct rte_driver rte_vmxnet3_driver = { - .type = PMD_PDEV, - .init = rte_vmxnet3_pmd_init, -}; - -PMD_REGISTER_DRIVER(rte_vmxnet3_driver, vmxnet3); -DRIVER_REGISTER_PCI_TABLE(vmxnet3, pci_id_vmxnet3_map); +RTE_PMD_REGISTER_PCI(net_vmxnet3, rte_vmxnet3_pmd.pci_drv); +RTE_PMD_REGISTER_PCI_TABLE(net_vmxnet3, pci_id_vmxnet3_map); +RTE_PMD_REGISTER_KMOD_DEP(net_vmxnet3, "* igb_uio | uio_pci_generic | vfio"); diff --git a/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h b/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h index 1be833ab..348c840b 100644 --- a/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h +++ b/src/dpdk/drivers/net/vmxnet3/vmxnet3_ethdev.h @@ -34,6 +34,8 @@ #ifndef _VMXNET3_ETHDEV_H_ #define _VMXNET3_ETHDEV_H_ +#include <rte_io.h> + #define VMXNET3_MAX_MAC_ADDRS 1 /* UPT feature to negotiate */ @@ -62,8 +64,7 @@ ETH_RSS_NONFRAG_IPV6_TCP) /* RSS configuration structure - shared with device through GPA */ -typedef -struct VMXNET3_RSSConf { +typedef struct VMXNET3_RSSConf { uint16_t hashType; uint16_t hashFunc; uint16_t hashKeySize; @@ -76,15 +77,13 @@ struct VMXNET3_RSSConf { uint8_t indTable[VMXNET3_RSS_MAX_IND_TABLE_SIZE]; } VMXNET3_RSSConf; -typedef -struct vmxnet3_mf_table { +typedef struct vmxnet3_mf_table { void *mfTableBase; /* Multicast addresses list */ uint64_t mfTablePA; /* Physical address of the list */ uint16_t num_addrs; /* number of multicast addrs */ } vmxnet3_mf_table_t; struct vmxnet3_hw { - uint8_t *hw_addr0; /* BAR0: PT-Passthrough Regs */ uint8_t *hw_addr1; /* BAR1: VD-Virtual Device Regs */ /* BAR2: MSI-X Regs */ @@ -111,10 +110,10 @@ struct vmxnet3_hw { uint64_t queueDescPA; uint16_t queue_desc_len; - VMXNET3_RSSConf *rss_conf; - uint64_t rss_confPA; - vmxnet3_mf_table_t *mf_table; - uint32_t shadow_vfta[VMXNET3_VFT_SIZE]; + VMXNET3_RSSConf *rss_conf; + uint64_t rss_confPA; + vmxnet3_mf_table_t *mf_table; + uint32_t shadow_vfta[VMXNET3_VFT_SIZE]; #define VMXNET3_VFT_TABLE_SIZE (VMXNET3_VFT_SIZE * sizeof(uint32_t)) }; @@ -123,16 +122,15 @@ struct vmxnet3_hw { /* Config space read/writes */ -#define VMXNET3_PCI_REG(reg) (*((volatile uint32_t *)(reg))) +#define VMXNET3_PCI_REG(reg) rte_read32(reg) -static inline uint32_t vmxnet3_read_addr(volatile void *addr) +static inline uint32_t +vmxnet3_read_addr(volatile void *addr) { return VMXNET3_PCI_REG(addr); } -#define VMXNET3_PCI_REG_WRITE(reg, value) do { \ - VMXNET3_PCI_REG((reg)) = (value); \ -} while(0) +#define VMXNET3_PCI_REG_WRITE(reg, value) rte_write32((value), (reg)) #define VMXNET3_PCI_BAR0_REG_ADDR(hw, reg) \ ((volatile uint32_t *)((char *)(hw)->hw_addr0 + (reg))) @@ -158,20 +156,22 @@ void vmxnet3_dev_rx_queue_release(void *rxq); void vmxnet3_dev_tx_queue_release(void *txq); int vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, - uint16_t nb_rx_desc, unsigned int socket_id, - const struct rte_eth_rxconf *rx_conf, - struct rte_mempool *mb_pool); + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); int vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, - uint16_t nb_tx_desc, unsigned int socket_id, - const struct rte_eth_txconf *tx_conf); + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); int vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev); int vmxnet3_rss_configure(struct rte_eth_dev *dev); uint16_t vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, - uint16_t nb_pkts); + uint16_t nb_pkts); uint16_t vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, - uint16_t nb_pkts); + uint16_t nb_pkts); +uint16_t vmxnet3_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); #endif /* _VMXNET3_ETHDEV_H_ */ diff --git a/src/dpdk/drivers/net/vmxnet3/vmxnet3_ring.h b/src/dpdk/drivers/net/vmxnet3/vmxnet3_ring.h index 69ff2ded..b50d2b00 100644 --- a/src/dpdk/drivers/net/vmxnet3/vmxnet3_ring.h +++ b/src/dpdk/drivers/net/vmxnet3/vmxnet3_ring.h @@ -96,12 +96,12 @@ vmxnet3_cmd_ring_desc_empty(struct vmxnet3_cmd_ring *ring) } typedef struct vmxnet3_comp_ring { - uint32_t size; - uint32_t next2proc; - uint8_t gen; - uint8_t intr_idx; + uint32_t size; + uint32_t next2proc; + uint8_t gen; + uint8_t intr_idx; Vmxnet3_GenericDesc *base; - uint64_t basePA; + uint64_t basePA; } vmxnet3_comp_ring_t; struct vmxnet3_data_ring { @@ -121,13 +121,13 @@ vmxnet3_comp_ring_adv_next2proc(struct vmxnet3_comp_ring *ring) } struct vmxnet3_txq_stats { - uint64_t drop_total; /* # of pkts dropped by the driver, + uint64_t drop_total; /* # of pkts dropped by the driver, * the counters below track droppings due to * different reasons */ - uint64_t drop_too_many_segs; - uint64_t drop_tso; - uint64_t tx_ring_full; + uint64_t drop_too_many_segs; + uint64_t drop_tso; + uint64_t tx_ring_full; }; typedef struct vmxnet3_tx_queue { @@ -158,8 +158,8 @@ typedef struct vmxnet3_rx_queue { uint32_t qid1; uint32_t qid2; Vmxnet3_RxQueueDesc *shared; - struct rte_mbuf *start_seg; - struct rte_mbuf *last_seg; + struct rte_mbuf *start_seg; + struct rte_mbuf *last_seg; struct vmxnet3_rxq_stats stats; bool stopped; uint16_t queue_id; /**< Device RX queue index. */ diff --git a/src/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c b/src/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c index 9deeb3ff..b246884b 100644 --- a/src/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c +++ b/src/dpdk/drivers/net/vmxnet3/vmxnet3_rxtx.c @@ -57,7 +57,6 @@ #include <rte_lcore.h> #include <rte_atomic.h> #include <rte_branch_prediction.h> -#include <rte_ring.h> #include <rte_mempool.h> #include <rte_malloc.h> #include <rte_mbuf.h> @@ -70,6 +69,7 @@ #include <rte_sctp.h> #include <rte_string_fns.h> #include <rte_errno.h> +#include <rte_net.h> #include "base/vmxnet3_defs.h" #include "vmxnet3_ring.h" @@ -77,6 +77,14 @@ #include "vmxnet3_logs.h" #include "vmxnet3_ethdev.h" +#define VMXNET3_TX_OFFLOAD_MASK ( \ + PKT_TX_VLAN_PKT | \ + PKT_TX_L4_MASK | \ + PKT_TX_TCP_SEG) + +#define VMXNET3_TX_OFFLOAD_NOTSUP_MASK \ + (PKT_TX_OFFLOAD_MASK ^ VMXNET3_TX_OFFLOAD_MASK) + static const uint32_t rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2}; static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t*, uint8_t); @@ -141,10 +149,10 @@ vmxnet3_txq_dump(struct vmxnet3_tx_queue *txq) #endif static void -vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) +vmxnet3_tx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) { while (ring->next2comp != ring->next2fill) { - /* No need to worry about tx desc ownership, device is quiesced by now. */ + /* No need to worry about desc ownership, device is quiesced by now. */ vmxnet3_buf_info_t *buf_info = ring->buf_info + ring->next2comp; if (buf_info->m) { @@ -158,20 +166,39 @@ vmxnet3_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) } static void +vmxnet3_rx_cmd_ring_release_mbufs(vmxnet3_cmd_ring_t *ring) +{ + uint32_t i; + + for (i = 0; i < ring->size; i++) { + /* No need to worry about desc ownership, device is quiesced by now. */ + vmxnet3_buf_info_t *buf_info = &ring->buf_info[i]; + + if (buf_info->m) { + rte_pktmbuf_free_seg(buf_info->m); + buf_info->m = NULL; + buf_info->bufPA = 0; + buf_info->len = 0; + } + vmxnet3_cmd_ring_adv_next2comp(ring); + } +} + +static void vmxnet3_cmd_ring_release(vmxnet3_cmd_ring_t *ring) { - vmxnet3_cmd_ring_release_mbufs(ring); rte_free(ring->buf_info); ring->buf_info = NULL; } - void vmxnet3_dev_tx_queue_release(void *txq) { vmxnet3_tx_queue_t *tq = txq; if (tq != NULL) { + /* Release mbufs */ + vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring); /* Release the cmd_ring */ vmxnet3_cmd_ring_release(&tq->cmd_ring); } @@ -184,6 +211,10 @@ vmxnet3_dev_rx_queue_release(void *rxq) vmxnet3_rx_queue_t *rq = rxq; if (rq != NULL) { + /* Release mbufs */ + for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) + vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]); + /* Release both the cmd_rings */ for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) vmxnet3_cmd_ring_release(&rq->cmd_ring[i]); @@ -201,7 +232,7 @@ vmxnet3_dev_tx_queue_reset(void *txq) if (tq != NULL) { /* Release the cmd_ring mbufs */ - vmxnet3_cmd_ring_release_mbufs(&tq->cmd_ring); + vmxnet3_tx_cmd_ring_release_mbufs(&tq->cmd_ring); } /* Tx vmxnet rings structure initialization*/ @@ -230,7 +261,7 @@ vmxnet3_dev_rx_queue_reset(void *rxq) if (rq != NULL) { /* Release both the cmd_rings mbufs */ for (i = 0; i < VMXNET3_RX_CMDRING_SIZE; i++) - vmxnet3_cmd_ring_release_mbufs(&rq->cmd_ring[i]); + vmxnet3_rx_cmd_ring_release_mbufs(&rq->cmd_ring[i]); } ring0 = &rq->cmd_ring[0]; @@ -328,6 +359,53 @@ vmxnet3_tq_tx_complete(vmxnet3_tx_queue_t *txq) } uint16_t +vmxnet3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + int32_t ret; + uint32_t i; + uint64_t ol_flags; + struct rte_mbuf *m; + + for (i = 0; i != nb_pkts; i++) { + m = tx_pkts[i]; + ol_flags = m->ol_flags; + + /* Non-TSO packet cannot occupy more than + * VMXNET3_MAX_TXD_PER_PKT TX descriptors. + */ + if ((ol_flags & PKT_TX_TCP_SEG) == 0 && + m->nb_segs > VMXNET3_MAX_TXD_PER_PKT) { + rte_errno = -EINVAL; + return i; + } + + /* check that only supported TX offloads are requested. */ + if ((ol_flags & VMXNET3_TX_OFFLOAD_NOTSUP_MASK) != 0 || + (ol_flags & PKT_TX_L4_MASK) == + PKT_TX_SCTP_CKSUM) { + rte_errno = -ENOTSUP; + return i; + } + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + ret = rte_validate_tx_offload(m); + if (ret != 0) { + rte_errno = ret; + return i; + } +#endif + ret = rte_net_intel_cksum_prepare(m); + if (ret != 0) { + rte_errno = ret; + return i; + } + } + + return i; +} + +uint16_t vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { @@ -392,7 +470,8 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, continue; } - if (txm->nb_segs == 1 && rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) { + if (txm->nb_segs == 1 && + rte_pktmbuf_pkt_len(txm) <= VMXNET3_HDR_COPY_SIZE) { struct Vmxnet3_TxDataDesc *tdd; tdd = txq->data_ring.base + txq->cmd_ring.next2fill; @@ -414,8 +493,8 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, gdesc = txq->cmd_ring.base + txq->cmd_ring.next2fill; if (copy_size) gdesc->txd.addr = rte_cpu_to_le_64(txq->data_ring.basePA + - txq->cmd_ring.next2fill * - sizeof(struct Vmxnet3_TxDataDesc)); + txq->cmd_ring.next2fill * + sizeof(struct Vmxnet3_TxDataDesc)); else gdesc->txd.addr = rte_mbuf_data_dma_addr(m_seg); @@ -495,16 +574,41 @@ vmxnet3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, return nb_tx; } +static inline void +vmxnet3_renew_desc(vmxnet3_rx_queue_t *rxq, uint8_t ring_id, + struct rte_mbuf *mbuf) +{ + uint32_t val = 0; + struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id]; + struct Vmxnet3_RxDesc *rxd = + (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill); + vmxnet3_buf_info_t *buf_info = &ring->buf_info[ring->next2fill]; + + if (ring_id == 0) + val = VMXNET3_RXD_BTYPE_HEAD; + else + val = VMXNET3_RXD_BTYPE_BODY; + + buf_info->m = mbuf; + buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM); + buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf); + + rxd->addr = buf_info->bufPA; + rxd->btype = val; + rxd->len = buf_info->len; + rxd->gen = ring->gen; + + vmxnet3_cmd_ring_adv_next2fill(ring); +} /* * Allocates mbufs and clusters. Post rx descriptors with buffer details * so that device can receive packets in those buffers. - * Ring layout: - * Among the two rings, 1st ring contains buffers of type 0 and type1. + * Ring layout: + * Among the two rings, 1st ring contains buffers of type 0 and type 1. * bufs_per_pkt is set such that for non-LRO cases all the buffers required * by a frame will fit in 1st ring (1st buf of type0 and rest of type1). * 2nd ring contains buffers of type 1 alone. Second ring mostly be used * only for LRO. - * */ static int vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id) @@ -549,8 +653,7 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id) buf_info->m = mbuf; buf_info->len = (uint16_t)(mbuf->buf_len - RTE_PKTMBUF_HEADROOM); - buf_info->bufPA = - rte_mbuf_data_dma_addr_default(mbuf); + buf_info->bufPA = rte_mbuf_data_dma_addr_default(mbuf); /* Load Rx Descriptor with the buffer's GPA */ rxd->addr = buf_info->bufPA; @@ -636,9 +739,18 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) } while (rcd->gen == rxq->comp_ring.gen) { + struct rte_mbuf *newm; + if (nb_rx >= nb_pkts) break; + newm = rte_mbuf_raw_alloc(rxq->mp); + if (unlikely(newm == NULL)) { + PMD_RX_LOG(ERR, "Error allocating mbuf"); + rxq->stats.rx_buf_alloc_failure++; + break; + } + idx = rcd->rxdIdx; ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1); rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx; @@ -676,7 +788,6 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) goto rcd_done; } - /* Initialize newly received packet buffer */ rxm->port = rxq->port_id; rxm->nb_segs = 1; @@ -736,10 +847,11 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rcd_done: rxq->cmd_ring[ring_idx].next2comp = idx; - VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size); + VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, + rxq->cmd_ring[ring_idx].size); - /* It's time to allocate some new buf and renew descriptors */ - vmxnet3_post_rx_bufs(rxq, ring_idx); + /* It's time to renew descriptors */ + vmxnet3_renew_desc(rxq, ring_idx, newm); if (unlikely(rxq->shared->ctrl.updateRxProd)) { VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN), rxq->cmd_ring[ring_idx].next2fill); @@ -751,8 +863,7 @@ rcd_done: rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; nb_rxd++; if (nb_rxd > rxq->cmd_ring[0].size) { - PMD_RX_LOG(ERR, - "Used up quota of receiving packets," + PMD_RX_LOG(ERR, "Used up quota of receiving packets," " relinquish control."); break; } @@ -774,15 +885,15 @@ ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name, const struct rte_memzone *mz; snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", - dev->driver->pci_drv.name, ring_name, - dev->data->port_id, queue_id); + dev->driver->pci_drv.driver.name, ring_name, + dev->data->port_id, queue_id); mz = rte_memzone_lookup(z_name); if (mz) return mz; return rte_memzone_reserve_aligned(z_name, ring_size, - socket_id, 0, VMXNET3_RING_BA_ALIGN); + socket_id, 0, VMXNET3_RING_BA_ALIGN); } int @@ -790,7 +901,7 @@ vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, unsigned int socket_id, - __attribute__((unused)) const struct rte_eth_txconf *tx_conf) + __rte_unused const struct rte_eth_txconf *tx_conf) { struct vmxnet3_hw *hw = dev->data->dev_private; const struct rte_memzone *mz; @@ -808,7 +919,8 @@ vmxnet3_dev_tx_queue_setup(struct rte_eth_dev *dev, return -EINVAL; } - txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), RTE_CACHE_LINE_SIZE); + txq = rte_zmalloc("ethdev_tx_queue", sizeof(struct vmxnet3_tx_queue), + RTE_CACHE_LINE_SIZE); if (txq == NULL) { PMD_INIT_LOG(ERR, "Can not allocate tx queue structure"); return -ENOMEM; @@ -891,12 +1003,12 @@ vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, uint16_t nb_desc, unsigned int socket_id, - __attribute__((unused)) const struct rte_eth_rxconf *rx_conf, + __rte_unused const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mp) { const struct rte_memzone *mz; struct vmxnet3_rx_queue *rxq; - struct vmxnet3_hw *hw = dev->data->dev_private; + struct vmxnet3_hw *hw = dev->data->dev_private; struct vmxnet3_cmd_ring *ring0, *ring1, *ring; struct vmxnet3_comp_ring *comp_ring; int size; @@ -905,7 +1017,8 @@ vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, PMD_INIT_FUNC_TRACE(); - rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), RTE_CACHE_LINE_SIZE); + rxq = rte_zmalloc("ethdev_rx_queue", sizeof(struct vmxnet3_rx_queue), + RTE_CACHE_LINE_SIZE); if (rxq == NULL) { PMD_INIT_LOG(ERR, "Can not allocate rx queue structure"); return -ENOMEM; @@ -979,7 +1092,9 @@ vmxnet3_dev_rx_queue_setup(struct rte_eth_dev *dev, ring->rid = i; snprintf(mem_name, sizeof(mem_name), "rx_ring_%d_buf_info", i); - ring->buf_info = rte_zmalloc(mem_name, ring->size * sizeof(vmxnet3_buf_info_t), RTE_CACHE_LINE_SIZE); + ring->buf_info = rte_zmalloc(mem_name, + ring->size * sizeof(vmxnet3_buf_info_t), + RTE_CACHE_LINE_SIZE); if (ring->buf_info == NULL) { PMD_INIT_LOG(ERR, "ERROR: Creating rx_buf_info structure"); return -ENOMEM; @@ -1013,10 +1128,15 @@ vmxnet3_dev_rxtx_init(struct rte_eth_dev *dev) /* Passing 0 as alloc_num will allocate full ring */ ret = vmxnet3_post_rx_bufs(rxq, j); if (ret <= 0) { - PMD_INIT_LOG(ERR, "ERROR: Posting Rxq: %d buffers ring: %d", i, j); + PMD_INIT_LOG(ERR, + "ERROR: Posting Rxq: %d buffers ring: %d", + i, j); return -ret; } - /* Updating device with the index:next2fill to fill the mbufs for coming packets */ + /* + * Updating device with the index:next2fill to fill the + * mbufs for coming packets. + */ if (unlikely(rxq->shared->ctrl.updateRxProd)) { VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[j] + (rxq->queue_id * VMXNET3_REG_ALIGN), rxq->cmd_ring[j].next2fill); @@ -1064,7 +1184,7 @@ vmxnet3_rss_configure(struct rte_eth_dev *dev) dev_rss_conf->hashFunc = VMXNET3_RSS_HASH_FUNC_TOEPLITZ; /* loading hashKeySize */ dev_rss_conf->hashKeySize = VMXNET3_RSS_MAX_KEY_SIZE; - /* loading indTableSize : Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/ + /* loading indTableSize: Must not exceed VMXNET3_RSS_MAX_IND_TABLE_SIZE (128)*/ dev_rss_conf->indTableSize = (uint16_t)(hw->num_rx_queues * 4); if (port_rss_conf->rss_key == NULL) { @@ -1073,7 +1193,8 @@ vmxnet3_rss_configure(struct rte_eth_dev *dev) } /* loading hashKey */ - memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, dev_rss_conf->hashKeySize); + memcpy(&dev_rss_conf->hashKey[0], port_rss_conf->rss_key, + dev_rss_conf->hashKeySize); /* loading indTable */ for (i = 0, j = 0; i < dev_rss_conf->indTableSize; i++, j++) { diff --git a/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.c b/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.c index 99f6cc81..19bc09a3 100644 --- a/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.c +++ b/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.c @@ -56,7 +56,7 @@ #include <rte_malloc.h> #include <rte_memcpy.h> #include <rte_string_fns.h> -#include <rte_dev.h> +#include <rte_vdev.h> #include <cmdline_parse.h> #include <cmdline_parse_etheraddr.h> @@ -70,8 +70,6 @@ /* virtio_idx is increased after new device is created.*/ static int virtio_idx = 0; -static const char *drivername = "xen virtio PMD"; - static struct rte_eth_link pmd_link = { .link_speed = ETH_SPEED_NUM_10G, .link_duplex = ETH_LINK_FULL_DUPLEX, @@ -331,13 +329,11 @@ eth_dev_info(struct rte_eth_dev *dev, struct pmd_internals *internals = dev->data->dev_private; RTE_SET_USED(internals); - dev_info->driver_name = drivername; dev_info->max_mac_addrs = 1; dev_info->max_rx_pktlen = (uint32_t)2048; dev_info->max_rx_queues = (uint16_t)1; dev_info->max_tx_queues = (uint16_t)1; dev_info->min_rx_bufsize = 0; - dev_info->pci_dev = NULL; } static void @@ -620,6 +616,7 @@ enum dev_action { DEV_ATTACH }; +static struct rte_vdev_driver pmd_xenvirt_drv; static int eth_dev_xenvirt_create(const char *name, const char *params, @@ -654,7 +651,7 @@ eth_dev_xenvirt_create(const char *name, const char *params, goto err; /* reserve an ethdev entry */ - eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + eth_dev = rte_eth_dev_allocate(name); if (eth_dev == NULL) goto err; @@ -673,9 +670,9 @@ eth_dev_xenvirt_create(const char *name, const char *params, eth_dev->data = data; eth_dev->dev_ops = &ops; - eth_dev->data->dev_flags = RTE_PCI_DRV_DETACHABLE; + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; eth_dev->data->kdrv = RTE_KDRV_NONE; - eth_dev->data->drv_name = drivername; + eth_dev->data->drv_name = pmd_xenvirt_drv.driver.name; eth_dev->driver = NULL; eth_dev->data->numa_node = numa_node; @@ -729,7 +726,7 @@ eth_dev_xenvirt_free(const char *name, const unsigned numa_node) /*TODO: Support multiple process model */ static int -rte_pmd_xenvirt_devinit(const char *name, const char *params) +rte_pmd_xenvirt_probe(const char *name, const char *params) { if (virtio_idx == 0) { if (xenstore_init() != 0) { @@ -746,7 +743,7 @@ rte_pmd_xenvirt_devinit(const char *name, const char *params) } static int -rte_pmd_xenvirt_devuninit(const char *name) +rte_pmd_xenvirt_remove(const char *name) { eth_dev_xenvirt_free(name, rte_socket_id()); @@ -759,12 +756,12 @@ rte_pmd_xenvirt_devuninit(const char *name) return 0; } -static struct rte_driver pmd_xenvirt_drv = { - .type = PMD_VDEV, - .init = rte_pmd_xenvirt_devinit, - .uninit = rte_pmd_xenvirt_devuninit, +static struct rte_vdev_driver pmd_xenvirt_drv = { + .probe = rte_pmd_xenvirt_probe, + .remove = rte_pmd_xenvirt_remove, }; -PMD_REGISTER_DRIVER(pmd_xenvirt_drv, eth_xenvirt); -DRIVER_REGISTER_PARAM_STRING(eth_xenvirt, +RTE_PMD_REGISTER_VDEV(net_xenvirt, pmd_xenvirt_drv); +RTE_PMD_REGISTER_ALIAS(net_xenvirt, eth_xenvirt); +RTE_PMD_REGISTER_PARAM_STRING(net_xenvirt, "mac=<mac addr>"); diff --git a/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.h b/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.h index 4995a9b4..598adc6f 100644 --- a/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.h +++ b/src/dpdk/drivers/net/xenvirt/rte_eth_xenvirt.h @@ -39,7 +39,6 @@ extern "C" { #endif #include <rte_mempool.h> -#include <rte_ring.h> /** * Creates mempool for xen virtio PMD. |