diff options
Diffstat (limited to 'drivers/net/enic')
-rw-r--r-- | drivers/net/enic/Makefile | 28 | ||||
-rw-r--r-- | drivers/net/enic/base/vnic_dev.c | 105 | ||||
-rw-r--r-- | drivers/net/enic/base/vnic_dev.h | 8 | ||||
-rw-r--r-- | drivers/net/enic/base/vnic_devcmd.h | 72 | ||||
-rw-r--r-- | drivers/net/enic/enic.h | 12 | ||||
-rw-r--r-- | drivers/net/enic/enic_ethdev.c | 61 | ||||
-rw-r--r-- | drivers/net/enic/enic_flow.c | 180 | ||||
-rw-r--r-- | drivers/net/enic/enic_main.c | 81 | ||||
-rw-r--r-- | drivers/net/enic/enic_res.c | 11 | ||||
-rw-r--r-- | drivers/net/enic/enic_rxtx.c | 286 | ||||
-rw-r--r-- | drivers/net/enic/enic_rxtx_common.h | 271 | ||||
-rw-r--r-- | drivers/net/enic/enic_rxtx_vec_avx2.c | 831 | ||||
-rw-r--r-- | drivers/net/enic/meson.build | 16 |
13 files changed, 1645 insertions, 317 deletions
diff --git a/drivers/net/enic/Makefile b/drivers/net/enic/Makefile index 7c6c29cc..e39e4763 100644 --- a/drivers/net/enic/Makefile +++ b/drivers/net/enic/Makefile @@ -39,4 +39,32 @@ SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_intr.c SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_rq.c SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_rss.c +# The current implementation assumes 64-bit pointers +CC_AVX2_SUPPORT=0 +ifeq ($(CONFIG_RTE_ARCH_X86_64),y) +# Figure out if the compiler supports avx2. The extra check using +# -march=core-avx2 is necessary to support users who build for the +# 'default' machine (corei7 which has no avx2) and run the binary on +# newer CPUs that have avx2. +# This part is verbatim from i40e makefile. +ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2) + CC_AVX2_SUPPORT=1 +else + CC_AVX2_SUPPORT=\ + $(shell $(CC) -march=core-avx2 -dM -E - </dev/null 2>&1 | \ + grep -q AVX2 && echo 1) + ifeq ($(CC_AVX2_SUPPORT), 1) + ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y) + CFLAGS_enic_rxtx_vec_avx2.o += -march=core-avx2 + else + CFLAGS_enic_rxtx_vec_avx2.o += -mavx2 + endif + endif +endif +endif + +ifeq ($(CC_AVX2_SUPPORT), 1) + SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic_rxtx_vec_avx2.c +endif + include $(RTE_SDK)/mk/rte.lib.mk diff --git a/drivers/net/enic/base/vnic_dev.c b/drivers/net/enic/base/vnic_dev.c index 16e8814a..fd303fec 100644 --- a/drivers/net/enic/base/vnic_dev.c +++ b/drivers/net/enic/base/vnic_dev.c @@ -57,6 +57,9 @@ struct vnic_dev { void (*free_consistent)(void *priv, size_t size, void *vaddr, dma_addr_t dma_handle); + struct vnic_counter_counts *flow_counters; + dma_addr_t flow_counters_pa; + u8 flow_counters_dma_active; }; #define VNIC_MAX_RES_HDR_SIZE \ @@ -64,6 +67,8 @@ struct vnic_dev { sizeof(struct vnic_resource) * RES_TYPE_MAX) #define VNIC_RES_STRIDE 128 +#define VNIC_MAX_FLOW_COUNTERS 2048 + void *vnic_dev_priv(struct vnic_dev *vdev) { return vdev->priv; @@ -611,6 +616,35 @@ int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats) return vnic_dev_cmd(vdev, CMD_STATS_DUMP, &a0, &a1, wait); } +/* + * Configure counter DMA + */ +int vnic_dev_counter_dma_cfg(struct vnic_dev *vdev, u32 period, + u32 num_counters) +{ + u64 args[3]; + int wait = 1000; + int err; + + if (num_counters > VNIC_MAX_FLOW_COUNTERS) + return -ENOMEM; + if (period > 0 && (period < VNIC_COUNTER_DMA_MIN_PERIOD || + num_counters == 0)) + return -EINVAL; + + args[0] = num_counters; + args[1] = vdev->flow_counters_pa; + args[2] = period; + err = vnic_dev_cmd_args(vdev, CMD_COUNTER_DMA_CONFIG, args, 3, wait); + + /* record if DMAs need to be stopped on close */ + if (!err) + vdev->flow_counters_dma_active = (num_counters != 0 && + period != 0); + + return err; +} + int vnic_dev_close(struct vnic_dev *vdev) { u64 a0 = 0, a1 = 0; @@ -939,6 +973,24 @@ int vnic_dev_alloc_stats_mem(struct vnic_dev *vdev) return vdev->stats == NULL ? -ENOMEM : 0; } +/* + * Initialize for up to VNIC_MAX_FLOW_COUNTERS + */ +int vnic_dev_alloc_counter_mem(struct vnic_dev *vdev) +{ + char name[NAME_MAX]; + static u32 instance; + + snprintf((char *)name, sizeof(name), "vnic_flow_ctrs-%u", instance++); + vdev->flow_counters = vdev->alloc_consistent(vdev->priv, + sizeof(struct vnic_counter_counts) + * VNIC_MAX_FLOW_COUNTERS, + &vdev->flow_counters_pa, + (u8 *)name); + vdev->flow_counters_dma_active = 0; + return vdev->flow_counters == NULL ? -ENOMEM : 0; +} + void vnic_dev_unregister(struct vnic_dev *vdev) { if (vdev) { @@ -951,6 +1003,16 @@ void vnic_dev_unregister(struct vnic_dev *vdev) vdev->free_consistent(vdev->priv, sizeof(struct vnic_stats), vdev->stats, vdev->stats_pa); + if (vdev->flow_counters) { + /* turn off counter DMAs before freeing memory */ + if (vdev->flow_counters_dma_active) + vnic_dev_counter_dma_cfg(vdev, 0, 0); + + vdev->free_consistent(vdev->priv, + sizeof(struct vnic_counter_counts) + * VNIC_MAX_FLOW_COUNTERS, + vdev->flow_counters, vdev->flow_counters_pa); + } if (vdev->fw_info) vdev->free_consistent(vdev->priv, sizeof(struct vnic_devcmd_fw_info), @@ -1094,3 +1156,46 @@ int vnic_dev_capable_vxlan(struct vnic_dev *vdev) (a1 & (FEATURE_VXLAN_IPV6 | FEATURE_VXLAN_MULTI_WQ)) == (FEATURE_VXLAN_IPV6 | FEATURE_VXLAN_MULTI_WQ); } + +bool vnic_dev_counter_alloc(struct vnic_dev *vdev, uint32_t *idx) +{ + u64 a0 = 0; + u64 a1 = 0; + int wait = 1000; + + if (vnic_dev_cmd(vdev, CMD_COUNTER_ALLOC, &a0, &a1, wait)) + return false; + *idx = (uint32_t)a0; + return true; +} + +bool vnic_dev_counter_free(struct vnic_dev *vdev, uint32_t idx) +{ + u64 a0 = idx; + u64 a1 = 0; + int wait = 1000; + + return vnic_dev_cmd(vdev, CMD_COUNTER_FREE, &a0, &a1, + wait) == 0; +} + +bool vnic_dev_counter_query(struct vnic_dev *vdev, uint32_t idx, + bool reset, uint64_t *packets, uint64_t *bytes) +{ + u64 a0 = idx; + u64 a1 = reset ? 1 : 0; + int wait = 1000; + + if (reset) { + /* query/reset returns updated counters */ + if (vnic_dev_cmd(vdev, CMD_COUNTER_QUERY, &a0, &a1, wait)) + return false; + *packets = a0; + *bytes = a1; + } else { + /* Get values DMA'd from the adapter */ + *packets = vdev->flow_counters[idx].vcc_packets; + *bytes = vdev->flow_counters[idx].vcc_bytes; + } + return true; +} diff --git a/drivers/net/enic/base/vnic_dev.h b/drivers/net/enic/base/vnic_dev.h index 270a47bd..de2645c4 100644 --- a/drivers/net/enic/base/vnic_dev.h +++ b/drivers/net/enic/base/vnic_dev.h @@ -118,6 +118,8 @@ int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset, size_t size, void *value); int vnic_dev_stats_clear(struct vnic_dev *vdev); int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats); +int vnic_dev_counter_dma_cfg(struct vnic_dev *vdev, u32 period, + u32 num_counters); int vnic_dev_hang_notify(struct vnic_dev *vdev); int vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast, int broadcast, int promisc, int allmulti); @@ -170,6 +172,7 @@ struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev, unsigned int num_bars); struct rte_pci_device *vnic_dev_get_pdev(struct vnic_dev *vdev); int vnic_dev_alloc_stats_mem(struct vnic_dev *vdev); +int vnic_dev_alloc_counter_mem(struct vnic_dev *vdev); int vnic_dev_cmd_init(struct vnic_dev *vdev, int fallback); int vnic_dev_get_size(void); int vnic_dev_int13(struct vnic_dev *vdev, u64 arg, u32 op); @@ -187,4 +190,9 @@ int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev, int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay, u16 vxlan_udp_port_number); int vnic_dev_capable_vxlan(struct vnic_dev *vdev); +bool vnic_dev_counter_alloc(struct vnic_dev *vdev, uint32_t *idx); +bool vnic_dev_counter_free(struct vnic_dev *vdev, uint32_t idx); +bool vnic_dev_counter_query(struct vnic_dev *vdev, uint32_t idx, + bool reset, uint64_t *packets, uint64_t *bytes); + #endif /* _VNIC_DEV_H_ */ diff --git a/drivers/net/enic/base/vnic_devcmd.h b/drivers/net/enic/base/vnic_devcmd.h index a22d8a76..3aad2dbd 100644 --- a/drivers/net/enic/base/vnic_devcmd.h +++ b/drivers/net/enic/base/vnic_devcmd.h @@ -598,6 +598,48 @@ enum vnic_devcmd_cmd { * a3 = bitmask of supported actions */ CMD_ADD_ADV_FILTER = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 77), + + /* + * Allocate a counter for use with CMD_ADD_FILTER + * out:(u32) a0 = counter index + */ + CMD_COUNTER_ALLOC = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ENET, 85), + + /* + * Free a counter + * in: (u32) a0 = counter_id + */ + CMD_COUNTER_FREE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 86), + + /* + * Read a counter + * in: (u32) a0 = counter_id + * (u32) a1 = clear counter if non-zero + * out:(u64) a0 = packet count + * (u64) a1 = byte count + */ + CMD_COUNTER_QUERY = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 87), + + /* + * Configure periodic counter DMA. This will trigger an immediate + * DMA of the counters (unless period == 0), and then schedule a DMA + * of the counters every <period> seconds until disdabled. + * Each new COUNTER_DMA_CONFIG will override all previous commands on + * this vnic. + * Setting a2 (period) = 0 will disable periodic DMAs + * If a0 (num_counters) != 0, an immediate DMA will always be done, + * irrespective of the value in a2. + * in: (u32) a0 = number of counters to DMA + * (u64) a1 = host target DMA address + * (u32) a2 = DMA period in milliseconds (0 to disable) + */ + CMD_COUNTER_DMA_CONFIG = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 88), +#define VNIC_COUNTER_DMA_MIN_PERIOD 500 + + /* + * Clear all counters on a vnic + */ + CMD_COUNTER_CLEAR_ALL = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ENET, 89), }; /* Modes for exchanging advanced filter capabilities. The modes supported by @@ -863,9 +905,11 @@ struct filter_action { #define FILTER_ACTION_RQ_STEERING_FLAG (1 << 0) #define FILTER_ACTION_FILTER_ID_FLAG (1 << 1) #define FILTER_ACTION_DROP_FLAG (1 << 2) +#define FILTER_ACTION_COUNTER_FLAG (1 << 3) #define FILTER_ACTION_V2_ALL (FILTER_ACTION_RQ_STEERING_FLAG \ + | FILTER_ACTION_FILTER_ID_FLAG \ | FILTER_ACTION_DROP_FLAG \ - | FILTER_ACTION_FILTER_ID_FLAG) + | FILTER_ACTION_COUNTER_FLAG) /* Version 2 of filter action must be a strict extension of struct filter_action * where the first fields exactly match in size and meaning. @@ -875,7 +919,8 @@ struct filter_action_v2 { u32 rq_idx; u32 flags; /* use FILTER_ACTION_XXX_FLAG defines */ u16 filter_id; - u_int8_t reserved[32]; /* for future expansion */ + u32 counter_index; + uint8_t reserved[28]; /* for future expansion */ } __attribute__((packed)); /* Specifies the filter type. */ @@ -941,9 +986,9 @@ enum { }; struct filter_tlv { - u_int32_t type; - u_int32_t length; - u_int32_t val[0]; + uint32_t type; + uint32_t length; + uint32_t val[0]; }; /* Data for CMD_ADD_FILTER is 2 TLV and filter + action structs */ @@ -957,10 +1002,10 @@ struct filter_tlv { * drivers should use this instead of "sizeof (struct filter_v2)" when * computing length for TLV. */ -static inline u_int32_t +static inline uint32_t vnic_filter_size(struct filter_v2 *fp) { - u_int32_t size; + uint32_t size; switch (fp->type) { case FILTER_USNIC_ID: @@ -999,10 +1044,10 @@ enum { * drivers should use this instead of "sizeof (struct filter_action_v2)" * when computing length for TLV. */ -static inline u_int32_t +static inline uint32_t vnic_action_size(struct filter_action_v2 *fap) { - u_int32_t size; + uint32_t size; switch (fap->type) { case FILTER_ACTION_RQ_STEERING: @@ -1122,4 +1167,13 @@ typedef enum { GRPINTR_UPD_VECT, } grpintr_subcmd_t; +/* + * Structure for counter DMA + * (DMAed by CMD_COUNTER_DMA_CONFIG) + */ +struct vnic_counter_counts { + u64 vcc_packets; + u64 vcc_bytes; +}; + #endif /* _VNIC_DEVCMD_H_ */ diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h index 7c27bd51..7bca3cad 100644 --- a/drivers/net/enic/enic.h +++ b/drivers/net/enic/enic.h @@ -39,6 +39,9 @@ #define PAGE_ROUND_UP(x) \ ((((unsigned long)(x)) + ENIC_PAGE_SIZE-1) & (~(ENIC_PAGE_SIZE-1))) +/* must be >= VNIC_COUNTER_DMA_MIN_PERIOD */ +#define VNIC_FLOW_COUNTER_UPDATE_MSECS 500 + #define ENICPMD_VFIO_PATH "/dev/vfio/vfio" /*#define ENIC_DESC_COUNT_MAKE_ODD (x) do{if ((~(x)) & 1) { (x)--; } }while(0)*/ @@ -94,6 +97,7 @@ struct rte_flow { LIST_ENTRY(rte_flow) next; u16 enic_filter_id; struct filter_v2 enic_filter; + int counter_idx; /* NIC allocated counter index (-1 = invalid) */ }; /* Per-instance private data structure */ @@ -104,6 +108,11 @@ struct enic { struct vnic_dev_bar bar0; struct vnic_dev *vdev; + /* + * mbuf_initializer contains 64 bits of mbuf rearm_data, used by + * the avx2 handler at this time. + */ + uint64_t mbuf_initializer; unsigned int port_id; bool overlay_offload; struct rte_eth_dev *rte_dev; @@ -126,6 +135,7 @@ struct enic { u8 filter_actions; /* HW supported actions */ bool vxlan; bool disable_overlay; /* devargs disable_overlay=1 */ + uint8_t enable_avx2_rx; /* devargs enable-avx2-rx=1 */ bool nic_cfg_chk; /* NIC_CFG_CHK available */ bool udp_rss_weak; /* Bodega style UDP RSS */ uint8_t ig_vlan_rewrite_mode; /* devargs ig-vlan-rewrite */ @@ -165,6 +175,7 @@ struct enic { rte_spinlock_t mtu_lock; LIST_HEAD(enic_flows, rte_flow) flows; + int max_flow_counter; rte_spinlock_t flows_lock; /* RSS */ @@ -326,6 +337,7 @@ uint16_t enic_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); int enic_set_mtu(struct enic *enic, uint16_t new_mtu); int enic_link_update(struct enic *enic); +bool enic_use_vector_rx_handler(struct enic *enic); void enic_fdir_info(struct enic *enic); void enic_fdir_info_get(struct enic *enic, struct rte_eth_fdir_info *stats); void copy_fltr_v1(struct filter_v2 *fltr, struct rte_eth_fdir_input *input, diff --git a/drivers/net/enic/enic_ethdev.c b/drivers/net/enic/enic_ethdev.c index b3d57771..996bb554 100644 --- a/drivers/net/enic/enic_ethdev.c +++ b/drivers/net/enic/enic_ethdev.c @@ -37,6 +37,7 @@ static const struct rte_pci_id pci_id_enic_map[] = { }; #define ENIC_DEVARG_DISABLE_OVERLAY "disable-overlay" +#define ENIC_DEVARG_ENABLE_AVX2_RX "enable-avx2-rx" #define ENIC_DEVARG_IG_VLAN_REWRITE "ig-vlan-rewrite" RTE_INIT(enicpmd_init_log) @@ -521,10 +522,34 @@ static const uint32_t *enicpmd_dev_supported_ptypes_get(struct rte_eth_dev *dev) RTE_PTYPE_L4_NONFRAG, RTE_PTYPE_UNKNOWN }; + static const uint32_t ptypes_overlay[] = { + RTE_PTYPE_L2_ETHER, + RTE_PTYPE_L2_ETHER_VLAN, + RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, + RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, + RTE_PTYPE_L4_TCP, + RTE_PTYPE_L4_UDP, + RTE_PTYPE_L4_FRAG, + RTE_PTYPE_L4_NONFRAG, + RTE_PTYPE_TUNNEL_GRENAT, + RTE_PTYPE_INNER_L2_ETHER, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, + RTE_PTYPE_INNER_L4_TCP, + RTE_PTYPE_INNER_L4_UDP, + RTE_PTYPE_INNER_L4_FRAG, + RTE_PTYPE_INNER_L4_NONFRAG, + RTE_PTYPE_UNKNOWN + }; - if (dev->rx_pkt_burst == enic_recv_pkts || - dev->rx_pkt_burst == enic_noscatter_recv_pkts) - return ptypes; + if (dev->rx_pkt_burst != enic_dummy_recv_pkts && + dev->rx_pkt_burst != NULL) { + struct enic *enic = pmd_priv(dev); + if (enic->overlay_offload) + return ptypes_overlay; + else + return ptypes; + } return NULL; } @@ -915,22 +940,27 @@ static const struct eth_dev_ops enicpmd_eth_dev_ops = { .udp_tunnel_port_del = enicpmd_dev_udp_tunnel_port_del, }; -static int enic_parse_disable_overlay(__rte_unused const char *key, - const char *value, - void *opaque) +static int enic_parse_zero_one(const char *key, + const char *value, + void *opaque) { struct enic *enic; + bool b; enic = (struct enic *)opaque; if (strcmp(value, "0") == 0) { - enic->disable_overlay = false; + b = false; } else if (strcmp(value, "1") == 0) { - enic->disable_overlay = true; + b = true; } else { - dev_err(enic, "Invalid value for " ENIC_DEVARG_DISABLE_OVERLAY - ": expected=0|1 given=%s\n", value); + dev_err(enic, "Invalid value for %s" + ": expected=0|1 given=%s\n", key, value); return -EINVAL; } + if (strcmp(key, ENIC_DEVARG_DISABLE_OVERLAY) == 0) + enic->disable_overlay = b; + if (strcmp(key, ENIC_DEVARG_ENABLE_AVX2_RX) == 0) + enic->enable_avx2_rx = b; return 0; } @@ -971,6 +1001,7 @@ static int enic_check_devargs(struct rte_eth_dev *dev) { static const char *const valid_keys[] = { ENIC_DEVARG_DISABLE_OVERLAY, + ENIC_DEVARG_ENABLE_AVX2_RX, ENIC_DEVARG_IG_VLAN_REWRITE, NULL}; struct enic *enic = pmd_priv(dev); @@ -979,6 +1010,7 @@ static int enic_check_devargs(struct rte_eth_dev *dev) ENICPMD_FUNC_TRACE(); enic->disable_overlay = false; + enic->enable_avx2_rx = false; enic->ig_vlan_rewrite_mode = IG_VLAN_REWRITE_MODE_PASS_THRU; if (!dev->device->devargs) return 0; @@ -986,7 +1018,9 @@ static int enic_check_devargs(struct rte_eth_dev *dev) if (!kvlist) return -EINVAL; if (rte_kvargs_process(kvlist, ENIC_DEVARG_DISABLE_OVERLAY, - enic_parse_disable_overlay, enic) < 0 || + enic_parse_zero_one, enic) < 0 || + rte_kvargs_process(kvlist, ENIC_DEVARG_ENABLE_AVX2_RX, + enic_parse_zero_one, enic) < 0 || rte_kvargs_process(kvlist, ENIC_DEVARG_IG_VLAN_REWRITE, enic_parse_ig_vlan_rewrite, enic) < 0) { rte_kvargs_free(kvlist); @@ -996,7 +1030,6 @@ static int enic_check_devargs(struct rte_eth_dev *dev) return 0; } -struct enic *enicpmd_list_head = NULL; /* Initialize the driver * It returns 0 on success. */ @@ -1044,7 +1077,8 @@ static int eth_enic_pci_remove(struct rte_pci_device *pci_dev) static struct rte_pci_driver rte_enic_pmd = { .id_table = pci_id_enic_map, - .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC | + RTE_PCI_DRV_IOVA_AS_VA, .probe = eth_enic_pci_probe, .remove = eth_enic_pci_remove, }; @@ -1054,4 +1088,5 @@ RTE_PMD_REGISTER_PCI_TABLE(net_enic, pci_id_enic_map); RTE_PMD_REGISTER_KMOD_DEP(net_enic, "* igb_uio | uio_pci_generic | vfio-pci"); RTE_PMD_REGISTER_PARAM_STRING(net_enic, ENIC_DEVARG_DISABLE_OVERLAY "=0|1 " + ENIC_DEVARG_ENABLE_AVX2_RX "=0|1 " ENIC_DEVARG_IG_VLAN_REWRITE "=trunk|untag|priority|pass"); diff --git a/drivers/net/enic/enic_flow.c b/drivers/net/enic/enic_flow.c index 0cf04aef..bb9ed037 100644 --- a/drivers/net/enic/enic_flow.c +++ b/drivers/net/enic/enic_flow.c @@ -289,6 +289,15 @@ static const enum rte_flow_action_type enic_supported_actions_v2_drop[] = { RTE_FLOW_ACTION_TYPE_END, }; +static const enum rte_flow_action_type enic_supported_actions_v2_count[] = { + RTE_FLOW_ACTION_TYPE_QUEUE, + RTE_FLOW_ACTION_TYPE_MARK, + RTE_FLOW_ACTION_TYPE_FLAG, + RTE_FLOW_ACTION_TYPE_DROP, + RTE_FLOW_ACTION_TYPE_COUNT, + RTE_FLOW_ACTION_TYPE_END, +}; + /** Action capabilities indexed by NIC version information */ static const struct enic_action_cap enic_action_cap[] = { [FILTER_ACTION_RQ_STEERING_FLAG] = { @@ -303,6 +312,10 @@ static const struct enic_action_cap enic_action_cap[] = { .actions = enic_supported_actions_v2_drop, .copy_fn = enic_copy_action_v2, }, + [FILTER_ACTION_COUNTER_FLAG] = { + .actions = enic_supported_actions_v2_count, + .copy_fn = enic_copy_action_v2, + }, }; static int @@ -1068,6 +1081,10 @@ enic_copy_action_v2(const struct rte_flow_action actions[], enic_action->flags |= FILTER_ACTION_DROP_FLAG; break; } + case RTE_FLOW_ACTION_TYPE_COUNT: { + enic_action->flags |= FILTER_ACTION_COUNTER_FLAG; + break; + } case RTE_FLOW_ACTION_TYPE_VOID: continue; default: @@ -1112,7 +1129,9 @@ enic_get_action_cap(struct enic *enic) uint8_t actions; actions = enic->filter_actions; - if (actions & FILTER_ACTION_DROP_FLAG) + if (actions & FILTER_ACTION_COUNTER_FLAG) + ea = &enic_action_cap[FILTER_ACTION_COUNTER_FLAG]; + else if (actions & FILTER_ACTION_DROP_FLAG) ea = &enic_action_cap[FILTER_ACTION_DROP_FLAG]; else if (actions & FILTER_ACTION_FILTER_ID_FLAG) ea = &enic_action_cap[FILTER_ACTION_FILTER_ID_FLAG]; @@ -1395,8 +1414,10 @@ enic_flow_add_filter(struct enic *enic, struct filter_v2 *enic_filter, struct rte_flow_error *error) { struct rte_flow *flow; - int ret; - u16 entry; + int err; + uint16_t entry; + int ctr_idx; + int last_max_flow_ctr; FLOW_TRACE(); @@ -1407,20 +1428,64 @@ enic_flow_add_filter(struct enic *enic, struct filter_v2 *enic_filter, return NULL; } + flow->counter_idx = -1; + last_max_flow_ctr = -1; + if (enic_action->flags & FILTER_ACTION_COUNTER_FLAG) { + if (!vnic_dev_counter_alloc(enic->vdev, (uint32_t *)&ctr_idx)) { + rte_flow_error_set(error, ENOMEM, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + NULL, "cannot allocate counter"); + goto unwind_flow_alloc; + } + flow->counter_idx = ctr_idx; + enic_action->counter_index = ctr_idx; + + /* If index is the largest, increase the counter DMA size */ + if (ctr_idx > enic->max_flow_counter) { + err = vnic_dev_counter_dma_cfg(enic->vdev, + VNIC_FLOW_COUNTER_UPDATE_MSECS, + ctr_idx + 1); + if (err) { + rte_flow_error_set(error, -err, + RTE_FLOW_ERROR_TYPE_ACTION_CONF, + NULL, "counter DMA config failed"); + goto unwind_ctr_alloc; + } + last_max_flow_ctr = enic->max_flow_counter; + enic->max_flow_counter = ctr_idx; + } + } + /* entry[in] is the queue id, entry[out] is the filter Id for delete */ entry = enic_action->rq_idx; - ret = vnic_dev_classifier(enic->vdev, CLSF_ADD, &entry, enic_filter, + err = vnic_dev_classifier(enic->vdev, CLSF_ADD, &entry, enic_filter, enic_action); - if (!ret) { - flow->enic_filter_id = entry; - flow->enic_filter = *enic_filter; - } else { - rte_flow_error_set(error, ret, RTE_FLOW_ERROR_TYPE_HANDLE, + if (err) { + rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, "vnic_dev_classifier error"); - rte_free(flow); - return NULL; + goto unwind_ctr_dma_cfg; } + + flow->enic_filter_id = entry; + flow->enic_filter = *enic_filter; + return flow; + +/* unwind if there are errors */ +unwind_ctr_dma_cfg: + if (last_max_flow_ctr != -1) { + /* reduce counter DMA size */ + vnic_dev_counter_dma_cfg(enic->vdev, + VNIC_FLOW_COUNTER_UPDATE_MSECS, + last_max_flow_ctr + 1); + enic->max_flow_counter = last_max_flow_ctr; + } +unwind_ctr_alloc: + if (flow->counter_idx != -1) + vnic_dev_counter_free(enic->vdev, ctr_idx); +unwind_flow_alloc: + rte_free(flow); + return NULL; } /** @@ -1435,18 +1500,29 @@ enic_flow_add_filter(struct enic *enic, struct filter_v2 *enic_filter, * @param error[out] */ static int -enic_flow_del_filter(struct enic *enic, u16 filter_id, +enic_flow_del_filter(struct enic *enic, struct rte_flow *flow, struct rte_flow_error *error) { - int ret; + u16 filter_id; + int err; FLOW_TRACE(); - ret = vnic_dev_classifier(enic->vdev, CLSF_DEL, &filter_id, NULL, NULL); - if (!ret) - rte_flow_error_set(error, ret, RTE_FLOW_ERROR_TYPE_HANDLE, + filter_id = flow->enic_filter_id; + err = vnic_dev_classifier(enic->vdev, CLSF_DEL, &filter_id, NULL, NULL); + if (err) { + rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_HANDLE, NULL, "vnic_dev_classifier failed"); - return ret; + return -err; + } + + if (flow->counter_idx != -1) { + if (!vnic_dev_counter_free(enic->vdev, flow->counter_idx)) + dev_err(enic, "counter free failed, idx: %d\n", + flow->counter_idx); + flow->counter_idx = -1; + } + return 0; } /* @@ -1529,9 +1605,10 @@ enic_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow, FLOW_TRACE(); rte_spinlock_lock(&enic->flows_lock); - enic_flow_del_filter(enic, flow->enic_filter_id, error); + enic_flow_del_filter(enic, flow, error); LIST_REMOVE(flow, next); rte_spinlock_unlock(&enic->flows_lock); + rte_free(flow); return 0; } @@ -1553,13 +1630,77 @@ enic_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error) while (!LIST_EMPTY(&enic->flows)) { flow = LIST_FIRST(&enic->flows); - enic_flow_del_filter(enic, flow->enic_filter_id, error); + enic_flow_del_filter(enic, flow, error); LIST_REMOVE(flow, next); + rte_free(flow); } rte_spinlock_unlock(&enic->flows_lock); return 0; } +static int +enic_flow_query_count(struct rte_eth_dev *dev, + struct rte_flow *flow, void *data, + struct rte_flow_error *error) +{ + struct enic *enic = pmd_priv(dev); + struct rte_flow_query_count *query; + uint64_t packets, bytes; + + FLOW_TRACE(); + + if (flow->counter_idx == -1) { + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, + "flow does not have counter"); + } + query = (struct rte_flow_query_count *)data; + if (!vnic_dev_counter_query(enic->vdev, flow->counter_idx, + !!query->reset, &packets, &bytes)) { + return rte_flow_error_set + (error, EINVAL, + RTE_FLOW_ERROR_TYPE_UNSPECIFIED, + NULL, + "cannot read counter"); + } + query->hits_set = 1; + query->bytes_set = 1; + query->hits = packets; + query->bytes = bytes; + return 0; +} + +static int +enic_flow_query(struct rte_eth_dev *dev, + struct rte_flow *flow, + const struct rte_flow_action *actions, + void *data, + struct rte_flow_error *error) +{ + int ret = 0; + + FLOW_TRACE(); + + for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) { + switch (actions->type) { + case RTE_FLOW_ACTION_TYPE_VOID: + break; + case RTE_FLOW_ACTION_TYPE_COUNT: + ret = enic_flow_query_count(dev, flow, data, error); + break; + default: + return rte_flow_error_set(error, ENOTSUP, + RTE_FLOW_ERROR_TYPE_ACTION, + actions, + "action not supported"); + } + if (ret < 0) + return ret; + } + return 0; +} + /** * Flow callback registration. * @@ -1570,4 +1711,5 @@ const struct rte_flow_ops enic_flow_ops = { .create = enic_flow_create, .destroy = enic_flow_destroy, .flush = enic_flow_flush, + .query = enic_flow_query, }; diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c index fd940c58..e81c3f3b 100644 --- a/drivers/net/enic/enic_main.c +++ b/drivers/net/enic/enic_main.c @@ -514,12 +514,29 @@ static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx) } } +/* + * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used + * used when that file is not compiled. + */ +bool __attribute__((weak)) +enic_use_vector_rx_handler(__rte_unused struct enic *enic) +{ + return false; +} + static void pick_rx_handler(struct enic *enic) { struct rte_eth_dev *eth_dev; - /* Use the non-scatter, simplified RX handler if possible. */ + /* + * Preference order: + * 1. The vectorized handler if possible and requested. + * 2. The non-scatter, simplified handler if scatter Rx is not used. + * 3. The default handler as a fallback. + */ eth_dev = enic->rte_dev; + if (enic_use_vector_rx_handler(enic)) + return; if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) { PMD_INIT_LOG(DEBUG, " use the non-scatter Rx handler"); eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts; @@ -534,6 +551,25 @@ int enic_enable(struct enic *enic) unsigned int index; int err; struct rte_eth_dev *eth_dev = enic->rte_dev; + uint64_t simple_tx_offloads; + uintptr_t p; + + if (enic->enable_avx2_rx) { + struct rte_mbuf mb_def = { .buf_addr = 0 }; + + /* + * mbuf_initializer contains const-after-init fields of + * receive mbufs (i.e. 64 bits of fields from rearm_data). + * It is currently used by the vectorized handler. + */ + mb_def.nb_segs = 1; + mb_def.data_off = RTE_PKTMBUF_HEADROOM; + mb_def.port = enic->port_id; + rte_mbuf_refcnt_set(&mb_def, 1); + rte_compiler_barrier(); + p = (uintptr_t)&mb_def.rearm_data; + enic->mbuf_initializer = *(uint64_t *)p; + } eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev); eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX; @@ -572,10 +608,17 @@ int enic_enable(struct enic *enic) } /* - * Use the simple TX handler if possible. All offloads must be - * disabled. + * Use the simple TX handler if possible. Only checksum offloads + * and vlan insertion are supported. */ - if (eth_dev->data->dev_conf.txmode.offloads == 0) { + simple_tx_offloads = enic->tx_offload_capa & + (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | + DEV_TX_OFFLOAD_VLAN_INSERT | + DEV_TX_OFFLOAD_IPV4_CKSUM | + DEV_TX_OFFLOAD_UDP_CKSUM | + DEV_TX_OFFLOAD_TCP_CKSUM); + if ((eth_dev->data->dev_conf.txmode.offloads & + ~simple_tx_offloads) == 0) { PMD_INIT_LOG(DEBUG, " use the simple tx handler"); eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts; for (index = 0; index < enic->wq_count; index++) @@ -1639,11 +1682,25 @@ static int enic_dev_init(struct enic *enic) LIST_INIT(&enic->flows); rte_spinlock_init(&enic->flows_lock); + enic->max_flow_counter = -1; /* set up link status checking */ vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */ enic->overlay_offload = false; + if (enic->disable_overlay && enic->vxlan) { + /* + * Explicitly disable overlay offload as the setting is + * sticky, and resetting vNIC does not disable it. + */ + if (vnic_dev_overlay_offload_ctrl(enic->vdev, + OVERLAY_FEATURE_VXLAN, + OVERLAY_OFFLOAD_DISABLE)) { + dev_err(enic, "failed to disable overlay offload\n"); + } else { + dev_info(enic, "Overlay offload is disabled\n"); + } + } if (!enic->disable_overlay && enic->vxlan && /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */ vnic_dev_overlay_offload_ctrl(enic->vdev, @@ -1653,11 +1710,9 @@ static int enic_dev_init(struct enic *enic) DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM | DEV_TX_OFFLOAD_GENEVE_TNL_TSO | DEV_TX_OFFLOAD_VXLAN_TNL_TSO; - /* - * Do not add PKT_TX_OUTER_{IPV4,IPV6} as they are not - * 'offload' flags (i.e. not part of PKT_TX_OFFLOAD_MASK). - */ enic->tx_offload_mask |= + PKT_TX_OUTER_IPV6 | + PKT_TX_OUTER_IPV4 | PKT_TX_OUTER_IP_CKSUM | PKT_TX_TUNNEL_MASK; enic->overlay_offload = true; @@ -1708,14 +1763,20 @@ int enic_probe(struct enic *enic) enic_free_consistent); /* - * Allocate the consistent memory for stats upfront so both primary and - * secondary processes can dump stats. + * Allocate the consistent memory for stats and counters upfront so + * both primary and secondary processes can access them. */ err = vnic_dev_alloc_stats_mem(enic->vdev); if (err) { dev_err(enic, "Failed to allocate cmd memory, aborting\n"); goto err_out_unregister; } + err = vnic_dev_alloc_counter_mem(enic->vdev); + if (err) { + dev_err(enic, "Failed to allocate counter memory, aborting\n"); + goto err_out_unregister; + } + /* Issue device open to get device in known state */ err = enic_dev_open(enic); if (err) { diff --git a/drivers/net/enic/enic_res.c b/drivers/net/enic/enic_res.c index 8d493ffe..24b2844f 100644 --- a/drivers/net/enic/enic_res.c +++ b/drivers/net/enic/enic_res.c @@ -85,7 +85,7 @@ int enic_get_vnic_config(struct enic *enic) vnic_dev_capable_udp_rss_weak(enic->vdev, &enic->nic_cfg_chk, &enic->udp_rss_weak); - dev_info(enic, "Flow api filter mode: %s Actions: %s%s%s\n", + dev_info(enic, "Flow api filter mode: %s Actions: %s%s%s%s\n", ((enic->flow_filter_mode == FILTER_DPDK_1) ? "DPDK" : ((enic->flow_filter_mode == FILTER_USNIC_IP) ? "USNIC" : ((enic->flow_filter_mode == FILTER_IPV4_5TUPLE) ? "5TUPLE" : @@ -95,7 +95,9 @@ int enic_get_vnic_config(struct enic *enic) ((enic->filter_actions & FILTER_ACTION_FILTER_ID_FLAG) ? "tag " : ""), ((enic->filter_actions & FILTER_ACTION_DROP_FLAG) ? - "drop " : "")); + "drop " : ""), + ((enic->filter_actions & FILTER_ACTION_COUNTER_FLAG) ? + "count " : "")); c->wq_desc_count = min_t(u32, ENIC_MAX_WQ_DESCS, @@ -195,13 +197,14 @@ int enic_get_vnic_config(struct enic *enic) enic->rx_offload_capa = DEV_RX_OFFLOAD_SCATTER | DEV_RX_OFFLOAD_JUMBO_FRAME | - DEV_RX_OFFLOAD_CRC_STRIP | DEV_RX_OFFLOAD_VLAN_STRIP | DEV_RX_OFFLOAD_IPV4_CKSUM | DEV_RX_OFFLOAD_UDP_CKSUM | DEV_RX_OFFLOAD_TCP_CKSUM; enic->tx_offload_mask = - PKT_TX_VLAN_PKT | + PKT_TX_IPV6 | + PKT_TX_IPV4 | + PKT_TX_VLAN | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK | PKT_TX_TCP_SEG; diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c index 7129e121..5189ee63 100644 --- a/drivers/net/enic/enic_rxtx.c +++ b/drivers/net/enic/enic_rxtx.c @@ -11,6 +11,7 @@ #include "enic_compat.h" #include "rq_enet_desc.h" #include "enic.h" +#include "enic_rxtx_common.h" #include <rte_ether.h> #include <rte_ip.h> #include <rte_tcp.h> @@ -30,266 +31,6 @@ #define rte_packet_prefetch(p) do {} while (0) #endif -static inline uint16_t -enic_cq_rx_desc_ciflags(struct cq_enet_rq_desc *crd) -{ - return le16_to_cpu(crd->completed_index_flags) & ~CQ_DESC_COMP_NDX_MASK; -} - -static inline uint16_t -enic_cq_rx_desc_bwflags(struct cq_enet_rq_desc *crd) -{ - return le16_to_cpu(crd->bytes_written_flags) & - ~CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; -} - -static inline uint8_t -enic_cq_rx_desc_packet_error(uint16_t bwflags) -{ - return (bwflags & CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) == - CQ_ENET_RQ_DESC_FLAGS_TRUNCATED; -} - -static inline uint8_t -enic_cq_rx_desc_eop(uint16_t ciflags) -{ - return (ciflags & CQ_ENET_RQ_DESC_FLAGS_EOP) - == CQ_ENET_RQ_DESC_FLAGS_EOP; -} - -static inline uint8_t -enic_cq_rx_desc_csum_not_calc(struct cq_enet_rq_desc *cqrd) -{ - return (le16_to_cpu(cqrd->q_number_rss_type_flags) & - CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) == - CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC; -} - -static inline uint8_t -enic_cq_rx_desc_ipv4_csum_ok(struct cq_enet_rq_desc *cqrd) -{ - return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) == - CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK; -} - -static inline uint8_t -enic_cq_rx_desc_tcp_udp_csum_ok(struct cq_enet_rq_desc *cqrd) -{ - return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) == - CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK; -} - -static inline uint8_t -enic_cq_rx_desc_rss_type(struct cq_enet_rq_desc *cqrd) -{ - return (uint8_t)((le16_to_cpu(cqrd->q_number_rss_type_flags) >> - CQ_DESC_Q_NUM_BITS) & CQ_ENET_RQ_DESC_RSS_TYPE_MASK); -} - -static inline uint32_t -enic_cq_rx_desc_rss_hash(struct cq_enet_rq_desc *cqrd) -{ - return le32_to_cpu(cqrd->rss_hash); -} - -static inline uint16_t -enic_cq_rx_desc_vlan(struct cq_enet_rq_desc *cqrd) -{ - return le16_to_cpu(cqrd->vlan); -} - -static inline uint16_t -enic_cq_rx_desc_n_bytes(struct cq_desc *cqd) -{ - struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; - return le16_to_cpu(cqrd->bytes_written_flags) & - CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; -} - - -static inline uint8_t -enic_cq_rx_check_err(struct cq_desc *cqd) -{ - struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; - uint16_t bwflags; - - bwflags = enic_cq_rx_desc_bwflags(cqrd); - if (unlikely(enic_cq_rx_desc_packet_error(bwflags))) - return 1; - return 0; -} - -/* Lookup table to translate RX CQ flags to mbuf flags. */ -static inline uint32_t -enic_cq_rx_flags_to_pkt_type(struct cq_desc *cqd, uint8_t tnl) -{ - struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; - uint8_t cqrd_flags = cqrd->flags; - /* - * Odd-numbered entries are for tunnel packets. All packet type info - * applies to the inner packet, and there is no info on the outer - * packet. The outer flags in these entries exist only to avoid - * changing enic_cq_rx_to_pkt_flags(). They are cleared from mbuf - * afterwards. - * - * Also, as there is no tunnel type info (VXLAN, NVGRE, or GENEVE), set - * RTE_PTYPE_TUNNEL_GRENAT.. - */ - static const uint32_t cq_type_table[128] __rte_cache_aligned = { - [0x00] = RTE_PTYPE_UNKNOWN, - [0x01] = RTE_PTYPE_UNKNOWN | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER, - [0x20] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG, - [0x21] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [0x22] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, - [0x23] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - [0x24] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, - [0x25] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [0x60] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, - [0x61] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [0x62] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, - [0x63] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [0x64] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, - [0x65] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [0x10] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG, - [0x11] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_NONFRAG, - [0x12] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, - [0x13] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_UDP, - [0x14] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, - [0x15] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_TCP, - [0x50] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, - [0x51] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [0x52] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, - [0x53] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - [0x54] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, - [0x55] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | - RTE_PTYPE_TUNNEL_GRENAT | - RTE_PTYPE_INNER_L2_ETHER | - RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | - RTE_PTYPE_INNER_L4_FRAG, - /* All others reserved */ - }; - cqrd_flags &= CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT - | CQ_ENET_RQ_DESC_FLAGS_IPV4 | CQ_ENET_RQ_DESC_FLAGS_IPV6 - | CQ_ENET_RQ_DESC_FLAGS_TCP | CQ_ENET_RQ_DESC_FLAGS_UDP; - return cq_type_table[cqrd_flags + tnl]; -} - -static inline void -enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) -{ - struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; - uint16_t bwflags, pkt_flags = 0, vlan_tci; - bwflags = enic_cq_rx_desc_bwflags(cqrd); - vlan_tci = enic_cq_rx_desc_vlan(cqrd); - - /* VLAN STRIPPED flag. The L2 packet type updated here also */ - if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) { - pkt_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; - mbuf->packet_type |= RTE_PTYPE_L2_ETHER; - } else { - if (vlan_tci != 0) - mbuf->packet_type |= RTE_PTYPE_L2_ETHER_VLAN; - else - mbuf->packet_type |= RTE_PTYPE_L2_ETHER; - } - mbuf->vlan_tci = vlan_tci; - - if ((cqd->type_color & CQ_DESC_TYPE_MASK) == CQ_DESC_TYPE_CLASSIFIER) { - struct cq_enet_rq_clsf_desc *clsf_cqd; - uint16_t filter_id; - clsf_cqd = (struct cq_enet_rq_clsf_desc *)cqd; - filter_id = clsf_cqd->filter_id; - if (filter_id) { - pkt_flags |= PKT_RX_FDIR; - if (filter_id != ENIC_MAGIC_FILTER_ID) { - mbuf->hash.fdir.hi = clsf_cqd->filter_id; - pkt_flags |= PKT_RX_FDIR_ID; - } - } - } else if (enic_cq_rx_desc_rss_type(cqrd)) { - /* RSS flag */ - pkt_flags |= PKT_RX_RSS_HASH; - mbuf->hash.rss = enic_cq_rx_desc_rss_hash(cqrd); - } - - /* checksum flags */ - if (mbuf->packet_type & (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6)) { - if (!enic_cq_rx_desc_csum_not_calc(cqrd)) { - uint32_t l4_flags; - l4_flags = mbuf->packet_type & RTE_PTYPE_L4_MASK; - - /* - * When overlay offload is enabled, the NIC may - * set ipv4_csum_ok=1 if the inner packet is IPv6.. - * So, explicitly check for IPv4 before checking - * ipv4_csum_ok. - */ - if (mbuf->packet_type & RTE_PTYPE_L3_IPV4) { - if (enic_cq_rx_desc_ipv4_csum_ok(cqrd)) - pkt_flags |= PKT_RX_IP_CKSUM_GOOD; - else - pkt_flags |= PKT_RX_IP_CKSUM_BAD; - } - - if (l4_flags == RTE_PTYPE_L4_UDP || - l4_flags == RTE_PTYPE_L4_TCP) { - if (enic_cq_rx_desc_tcp_udp_csum_ok(cqrd)) - pkt_flags |= PKT_RX_L4_CKSUM_GOOD; - else - pkt_flags |= PKT_RX_L4_CKSUM_BAD; - } - } - } - - mbuf->ol_flags = pkt_flags; -} - /* dummy receive function to replace actual function in * order to do safe reconfiguration operations. */ @@ -707,7 +448,7 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, wq_desc_avail = vnic_wq_desc_avail(wq); head_idx = wq->head_idx; desc_count = wq->ring.desc_count; - ol_flags_mask = PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK; + ol_flags_mask = PKT_TX_VLAN | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK; tx_oversized = &enic->soft_stats.tx_oversized; nb_pkts = RTE_MIN(nb_pkts, ENIC_TX_XMIT_MAX); @@ -735,7 +476,7 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, mss = 0; vlan_id = tx_pkt->vlan_tci; - vlan_tag_insert = !!(ol_flags & PKT_TX_VLAN_PKT); + vlan_tag_insert = !!(ol_flags & PKT_TX_VLAN); bus_addr = (dma_addr_t) (tx_pkt->buf_iova + tx_pkt->data_off); @@ -840,12 +581,33 @@ static void enqueue_simple_pkts(struct rte_mbuf **pkts, struct enic *enic) { struct rte_mbuf *p; + uint16_t mss; while (n) { n--; p = *pkts++; desc->address = p->buf_iova + p->data_off; desc->length = p->pkt_len; + /* VLAN insert */ + desc->vlan_tag = p->vlan_tci; + desc->header_length_flags &= + ((1 << WQ_ENET_FLAGS_EOP_SHIFT) | + (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT)); + if (p->ol_flags & PKT_TX_VLAN) { + desc->header_length_flags |= + 1 << WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT; + } + /* + * Checksum offload. We use WQ_ENET_OFFLOAD_MODE_CSUM, which + * is 0, so no need to set offload_mode. + */ + mss = 0; + if (p->ol_flags & PKT_TX_IP_CKSUM) + mss |= ENIC_CALC_IP_CKSUM << WQ_ENET_MSS_SHIFT; + if (p->ol_flags & PKT_TX_L4_MASK) + mss |= ENIC_CALC_TCP_UDP_CKSUM << WQ_ENET_MSS_SHIFT; + desc->mss_loopback = mss; + /* * The app should not send oversized * packets. tx_pkt_prepare includes a check as diff --git a/drivers/net/enic/enic_rxtx_common.h b/drivers/net/enic/enic_rxtx_common.h new file mode 100644 index 00000000..bfbb4909 --- /dev/null +++ b/drivers/net/enic/enic_rxtx_common.h @@ -0,0 +1,271 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#ifndef _ENIC_RXTX_COMMON_H_ +#define _ENIC_RXTX_COMMON_H_ + +static inline uint16_t +enic_cq_rx_desc_ciflags(struct cq_enet_rq_desc *crd) +{ + return le16_to_cpu(crd->completed_index_flags) & ~CQ_DESC_COMP_NDX_MASK; +} + +static inline uint16_t +enic_cq_rx_desc_bwflags(struct cq_enet_rq_desc *crd) +{ + return le16_to_cpu(crd->bytes_written_flags) & + ~CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; +} + +static inline uint8_t +enic_cq_rx_desc_packet_error(uint16_t bwflags) +{ + return (bwflags & CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) == + CQ_ENET_RQ_DESC_FLAGS_TRUNCATED; +} + +static inline uint8_t +enic_cq_rx_desc_eop(uint16_t ciflags) +{ + return (ciflags & CQ_ENET_RQ_DESC_FLAGS_EOP) + == CQ_ENET_RQ_DESC_FLAGS_EOP; +} + +static inline uint8_t +enic_cq_rx_desc_csum_not_calc(struct cq_enet_rq_desc *cqrd) +{ + return (le16_to_cpu(cqrd->q_number_rss_type_flags) & + CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) == + CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC; +} + +static inline uint8_t +enic_cq_rx_desc_ipv4_csum_ok(struct cq_enet_rq_desc *cqrd) +{ + return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) == + CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK; +} + +static inline uint8_t +enic_cq_rx_desc_tcp_udp_csum_ok(struct cq_enet_rq_desc *cqrd) +{ + return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) == + CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK; +} + +static inline uint8_t +enic_cq_rx_desc_rss_type(struct cq_enet_rq_desc *cqrd) +{ + return (uint8_t)((le16_to_cpu(cqrd->q_number_rss_type_flags) >> + CQ_DESC_Q_NUM_BITS) & CQ_ENET_RQ_DESC_RSS_TYPE_MASK); +} + +static inline uint32_t +enic_cq_rx_desc_rss_hash(struct cq_enet_rq_desc *cqrd) +{ + return le32_to_cpu(cqrd->rss_hash); +} + +static inline uint16_t +enic_cq_rx_desc_vlan(struct cq_enet_rq_desc *cqrd) +{ + return le16_to_cpu(cqrd->vlan); +} + +static inline uint16_t +enic_cq_rx_desc_n_bytes(struct cq_desc *cqd) +{ + struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; + return le16_to_cpu(cqrd->bytes_written_flags) & + CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; +} + + +static inline uint8_t +enic_cq_rx_check_err(struct cq_desc *cqd) +{ + struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; + uint16_t bwflags; + + bwflags = enic_cq_rx_desc_bwflags(cqrd); + if (unlikely(enic_cq_rx_desc_packet_error(bwflags))) + return 1; + return 0; +} + +/* Lookup table to translate RX CQ flags to mbuf flags. */ +static uint32_t +enic_cq_rx_flags_to_pkt_type(struct cq_desc *cqd, uint8_t tnl) +{ + struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; + uint8_t cqrd_flags = cqrd->flags; + /* + * Odd-numbered entries are for tunnel packets. All packet type info + * applies to the inner packet, and there is no info on the outer + * packet. The outer flags in these entries exist only to avoid + * changing enic_cq_rx_to_pkt_flags(). They are cleared from mbuf + * afterwards. + * + * Also, as there is no tunnel type info (VXLAN, NVGRE, or GENEVE), set + * RTE_PTYPE_TUNNEL_GRENAT.. + */ + static const uint32_t cq_type_table[128] __rte_cache_aligned = { + [0x00] = RTE_PTYPE_UNKNOWN, + [0x01] = RTE_PTYPE_UNKNOWN | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER, + [0x20] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG, + [0x21] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [0x22] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, + [0x23] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + [0x24] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, + [0x25] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [0x60] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x61] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [0x62] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x63] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [0x64] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x65] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [0x10] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG, + [0x11] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_NONFRAG, + [0x12] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP, + [0x13] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_UDP, + [0x14] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP, + [0x15] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_TCP, + [0x50] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x51] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [0x52] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x53] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + [0x54] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG, + [0x55] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG | + RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER | + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_INNER_L4_FRAG, + /* All others reserved */ + }; + cqrd_flags &= CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT + | CQ_ENET_RQ_DESC_FLAGS_IPV4 | CQ_ENET_RQ_DESC_FLAGS_IPV6 + | CQ_ENET_RQ_DESC_FLAGS_TCP | CQ_ENET_RQ_DESC_FLAGS_UDP; + return cq_type_table[cqrd_flags + tnl]; +} + +static void +enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf) +{ + struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd; + uint16_t bwflags, pkt_flags = 0, vlan_tci; + bwflags = enic_cq_rx_desc_bwflags(cqrd); + vlan_tci = enic_cq_rx_desc_vlan(cqrd); + + /* VLAN STRIPPED flag. The L2 packet type updated here also */ + if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) { + pkt_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; + mbuf->packet_type |= RTE_PTYPE_L2_ETHER; + } else { + if (vlan_tci != 0) { + pkt_flags |= PKT_RX_VLAN; + mbuf->packet_type |= RTE_PTYPE_L2_ETHER_VLAN; + } else { + mbuf->packet_type |= RTE_PTYPE_L2_ETHER; + } + } + mbuf->vlan_tci = vlan_tci; + + if ((cqd->type_color & CQ_DESC_TYPE_MASK) == CQ_DESC_TYPE_CLASSIFIER) { + struct cq_enet_rq_clsf_desc *clsf_cqd; + uint16_t filter_id; + clsf_cqd = (struct cq_enet_rq_clsf_desc *)cqd; + filter_id = clsf_cqd->filter_id; + if (filter_id) { + pkt_flags |= PKT_RX_FDIR; + if (filter_id != ENIC_MAGIC_FILTER_ID) { + mbuf->hash.fdir.hi = clsf_cqd->filter_id; + pkt_flags |= PKT_RX_FDIR_ID; + } + } + } else if (enic_cq_rx_desc_rss_type(cqrd)) { + /* RSS flag */ + pkt_flags |= PKT_RX_RSS_HASH; + mbuf->hash.rss = enic_cq_rx_desc_rss_hash(cqrd); + } + + /* checksum flags */ + if (mbuf->packet_type & (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6)) { + if (!enic_cq_rx_desc_csum_not_calc(cqrd)) { + uint32_t l4_flags; + l4_flags = mbuf->packet_type & RTE_PTYPE_L4_MASK; + + /* + * When overlay offload is enabled, the NIC may + * set ipv4_csum_ok=1 if the inner packet is IPv6.. + * So, explicitly check for IPv4 before checking + * ipv4_csum_ok. + */ + if (mbuf->packet_type & RTE_PTYPE_L3_IPV4) { + if (enic_cq_rx_desc_ipv4_csum_ok(cqrd)) + pkt_flags |= PKT_RX_IP_CKSUM_GOOD; + else + pkt_flags |= PKT_RX_IP_CKSUM_BAD; + } + + if (l4_flags == RTE_PTYPE_L4_UDP || + l4_flags == RTE_PTYPE_L4_TCP) { + if (enic_cq_rx_desc_tcp_udp_csum_ok(cqrd)) + pkt_flags |= PKT_RX_L4_CKSUM_GOOD; + else + pkt_flags |= PKT_RX_L4_CKSUM_BAD; + } + } + } + + mbuf->ol_flags = pkt_flags; +} + +#endif /* _ENIC_RXTX_COMMON_H_ */ diff --git a/drivers/net/enic/enic_rxtx_vec_avx2.c b/drivers/net/enic/enic_rxtx_vec_avx2.c new file mode 100644 index 00000000..d2185490 --- /dev/null +++ b/drivers/net/enic/enic_rxtx_vec_avx2.c @@ -0,0 +1,831 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved. + * Copyright 2007 Nuova Systems, Inc. All rights reserved. + */ + +#include <rte_mbuf.h> +#include <rte_ethdev_driver.h> + +#include "enic_compat.h" +#include "rq_enet_desc.h" +#include "enic.h" +#include "enic_rxtx_common.h" + +#include <x86intrin.h> + +static struct rte_mbuf * +rx_one(struct cq_enet_rq_desc *cqd, struct rte_mbuf *mb, struct enic *enic) +{ + bool tnl; + + *(uint64_t *)&mb->rearm_data = enic->mbuf_initializer; + mb->data_len = cqd->bytes_written_flags & + CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK; + mb->pkt_len = mb->data_len; + tnl = enic->overlay_offload && (cqd->completed_index_flags & + CQ_ENET_RQ_DESC_FLAGS_FCOE) != 0; + mb->packet_type = + enic_cq_rx_flags_to_pkt_type((struct cq_desc *)cqd, tnl); + enic_cq_rx_to_pkt_flags((struct cq_desc *)cqd, mb); + /* Wipe the outer types set by enic_cq_rx_flags_to_pkt_type() */ + if (tnl) { + mb->packet_type &= ~(RTE_PTYPE_L3_MASK | + RTE_PTYPE_L4_MASK); + } + return mb; +} + +static uint16_t +enic_noscatter_vec_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct rte_mbuf **rx, **rxmb; + uint16_t cq_idx, nb_rx, max_rx; + struct cq_enet_rq_desc *cqd; + struct rq_enet_desc *rqd; + struct vnic_cq *cq; + struct vnic_rq *rq; + struct enic *enic; + uint8_t color; + + rq = rx_queue; + enic = vnic_dev_priv(rq->vdev); + cq = &enic->cq[enic_cq_rq(enic, rq->index)]; + cq_idx = cq->to_clean; + + /* + * Fill up the reserve of free mbufs. Below, we restock the receive + * ring with these mbufs to avoid allocation failures. + */ + if (rq->num_free_mbufs == 0) { + if (rte_mempool_get_bulk(rq->mp, (void **)rq->free_mbufs, + ENIC_RX_BURST_MAX)) + return 0; + rq->num_free_mbufs = ENIC_RX_BURST_MAX; + } + /* Receive until the end of the ring, at most. */ + max_rx = RTE_MIN(nb_pkts, rq->num_free_mbufs); + max_rx = RTE_MIN(max_rx, cq->ring.desc_count - cq_idx); + + rxmb = rq->mbuf_ring + cq_idx; + color = cq->last_color; + cqd = (struct cq_enet_rq_desc *)(cq->ring.descs) + cq_idx; + rx = rx_pkts; + if (max_rx == 0 || + (cqd->type_color & CQ_DESC_COLOR_MASK_NOSHIFT) == color) + return 0; + + /* Step 1: Process one packet to do aligned 256-bit load below */ + if (cq_idx & 0x1) { + if (unlikely(cqd->bytes_written_flags & + CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) { + rte_pktmbuf_free(*rxmb++); + rte_atomic64_inc(&enic->soft_stats.rx_packet_errors); + } else { + *rx++ = rx_one(cqd, *rxmb++, enic); + } + cqd++; + max_rx--; + } + + const __m256i mask = + _mm256_set_epi8(/* Second descriptor */ + 0xff, /* type_color */ + (CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT | + CQ_ENET_RQ_DESC_FLAGS_IPV4 | + CQ_ENET_RQ_DESC_FLAGS_IPV6 | + CQ_ENET_RQ_DESC_FLAGS_TCP | + CQ_ENET_RQ_DESC_FLAGS_UDP), /* flags */ + 0, 0, /* checksum_fcoe */ + 0xff, 0xff, /* vlan */ + 0x3f, 0xff, /* bytes_written_flags */ + 0xff, 0xff, 0xff, 0xff, /* rss_hash */ + 0xff, 0xff, /* q_number_rss_type_flags */ + 0, 0, /* completed_index_flags */ + /* First descriptor */ + 0xff, /* type_color */ + (CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT | + CQ_ENET_RQ_DESC_FLAGS_IPV4 | + CQ_ENET_RQ_DESC_FLAGS_IPV6 | + CQ_ENET_RQ_DESC_FLAGS_TCP | + CQ_ENET_RQ_DESC_FLAGS_UDP), /* flags */ + 0, 0, /* checksum_fcoe */ + 0xff, 0xff, /* vlan */ + 0x3f, 0xff, /* bytes_written_flags */ + 0xff, 0xff, 0xff, 0xff, /* rss_hash */ + 0xff, 0xff, /* q_number_rss_type_flags */ + 0, 0 /* completed_index_flags */ + ); + const __m256i shuffle_mask = + _mm256_set_epi8(/* Second descriptor */ + 7, 6, 5, 4, /* rss = rss_hash */ + 11, 10, /* vlan_tci = vlan */ + 9, 8, /* data_len = bytes_written */ + 0x80, 0x80, 9, 8, /* pkt_len = bytes_written */ + 0x80, 0x80, 0x80, 0x80, /* packet_type = 0 */ + /* First descriptor */ + 7, 6, 5, 4, /* rss = rss_hash */ + 11, 10, /* vlan_tci = vlan */ + 9, 8, /* data_len = bytes_written */ + 0x80, 0x80, 9, 8, /* pkt_len = bytes_written */ + 0x80, 0x80, 0x80, 0x80 /* packet_type = 0 */ + ); + /* Used to collect 8 flags from 8 desc into one register */ + const __m256i flags_shuffle_mask = + _mm256_set_epi8(/* Second descriptor */ + 1, 3, 9, 14, + 1, 3, 9, 14, + 1, 3, 9, 14, + 1, 3, 9, 14, + /* First descriptor */ + 1, 3, 9, 14, + 1, 3, 9, 14, + 1, 3, 9, 14, + /* + * Byte 3: upper byte of completed_index_flags + * bit 5 = fcoe (tunnel) + * Byte 2: upper byte of q_number_rss_type_flags + * bits 2,3,4,5 = rss type + * bit 6 = csum_not_calc + * Byte 1: upper byte of bytes_written_flags + * bit 6 = truncated + * bit 7 = vlan stripped + * Byte 0: flags + */ + 1, 3, 9, 14 + ); + /* Used to collect 8 VLAN IDs from 8 desc into one register */ + const __m256i vlan_shuffle_mask = + _mm256_set_epi8(/* Second descriptor */ + 0x80, 0x80, 11, 10, + 0x80, 0x80, 11, 10, + 0x80, 0x80, 11, 10, + 0x80, 0x80, 11, 10, + /* First descriptor */ + 0x80, 0x80, 11, 10, + 0x80, 0x80, 11, 10, + 0x80, 0x80, 11, 10, + 0x80, 0x80, 11, 10); + /* PKT_RX_RSS_HASH is 1<<1 so fits in 8-bit integer */ + const __m256i rss_shuffle = + _mm256_set_epi8(/* second 128 bits */ + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + 0, /* rss_types = 0 */ + /* first 128 bits */ + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, + 0 /* rss_types = 0 */); + /* + * VLAN offload flags. + * shuffle index: + * vlan_stripped => bit 0 + * vlan_id == 0 => bit 1 + */ + const __m256i vlan_shuffle = + _mm256_set_epi32(0, 0, 0, 0, + PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, 0, + PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN); + /* Use the same shuffle index as vlan_shuffle */ + const __m256i vlan_ptype_shuffle = + _mm256_set_epi32(0, 0, 0, 0, + RTE_PTYPE_L2_ETHER, + RTE_PTYPE_L2_ETHER, + RTE_PTYPE_L2_ETHER, + RTE_PTYPE_L2_ETHER_VLAN); + /* + * CKSUM flags. Shift right so they fit int 8-bit integers. + * shuffle index: + * ipv4_csum_ok => bit 3 + * ip4 => bit 2 + * tcp_or_udp => bit 1 + * tcp_udp_csum_ok => bit 0 + */ + const __m256i csum_shuffle = + _mm256_set_epi8(/* second 128 bits */ + /* 1111 ip4+ip4_ok+l4+l4_ok */ + ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1), + /* 1110 ip4_ok+ip4+l4+!l4_ok */ + ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1), + (PKT_RX_IP_CKSUM_GOOD >> 1), /* 1101 ip4+ip4_ok */ + (PKT_RX_IP_CKSUM_GOOD >> 1), /* 1100 ip4_ok+ip4 */ + (PKT_RX_L4_CKSUM_GOOD >> 1), /* 1011 l4+l4_ok */ + (PKT_RX_L4_CKSUM_BAD >> 1), /* 1010 l4+!l4_ok */ + 0, /* 1001 */ + 0, /* 1000 */ + /* 0111 !ip4_ok+ip4+l4+l4_ok */ + ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD) >> 1), + /* 0110 !ip4_ok+ip4+l4+!l4_ok */ + ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1), + (PKT_RX_IP_CKSUM_BAD >> 1), /* 0101 !ip4_ok+ip4 */ + (PKT_RX_IP_CKSUM_BAD >> 1), /* 0100 !ip4_ok+ip4 */ + (PKT_RX_L4_CKSUM_GOOD >> 1), /* 0011 l4+l4_ok */ + (PKT_RX_L4_CKSUM_BAD >> 1), /* 0010 l4+!l4_ok */ + 0, /* 0001 */ + 0, /* 0000 */ + /* first 128 bits */ + ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1), + ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1), + (PKT_RX_IP_CKSUM_GOOD >> 1), + (PKT_RX_IP_CKSUM_GOOD >> 1), + (PKT_RX_L4_CKSUM_GOOD >> 1), + (PKT_RX_L4_CKSUM_BAD >> 1), + 0, 0, + ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD) >> 1), + ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1), + (PKT_RX_IP_CKSUM_BAD >> 1), + (PKT_RX_IP_CKSUM_BAD >> 1), + (PKT_RX_L4_CKSUM_GOOD >> 1), + (PKT_RX_L4_CKSUM_BAD >> 1), + 0, 0); + /* + * Non-fragment PTYPEs. + * Shuffle 4-bit index: + * ip6 => bit 0 + * ip4 => bit 1 + * udp => bit 2 + * tcp => bit 3 + * bit + * 3 2 1 0 + * ------- + * 0 0 0 0 unknown + * 0 0 0 1 ip6 | nonfrag + * 0 0 1 0 ip4 | nonfrag + * 0 0 1 1 unknown + * 0 1 0 0 unknown + * 0 1 0 1 ip6 | udp + * 0 1 1 0 ip4 | udp + * 0 1 1 1 unknown + * 1 0 0 0 unknown + * 1 0 0 1 ip6 | tcp + * 1 0 1 0 ip4 | tcp + * 1 0 1 1 unknown + * 1 1 0 0 unknown + * 1 1 0 1 unknown + * 1 1 1 0 unknown + * 1 1 1 1 unknown + * + * PTYPEs do not fit in 8 bits, so shift right 4.. + */ + const __m256i nonfrag_ptype_shuffle = + _mm256_set_epi8(/* second 128 bits */ + RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG) >> 4, + RTE_PTYPE_UNKNOWN, + /* first 128 bits */ + RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_NONFRAG) >> 4, + RTE_PTYPE_UNKNOWN); + /* Fragment PTYPEs. Use the same shuffle index as above. */ + const __m256i frag_ptype_shuffle = + _mm256_set_epi8(/* second 128 bits */ + RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + RTE_PTYPE_UNKNOWN, + /* first 128 bits */ + RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + RTE_PTYPE_L4_FRAG) >> 4, + RTE_PTYPE_UNKNOWN); + /* + * Tunnel PTYPEs. Use the same shuffle index as above. + * L4 types are not part of this table. They come from non-tunnel + * types above. + */ + const __m256i tnl_l3_ptype_shuffle = + _mm256_set_epi8(/* second 128 bits */ + RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16, + RTE_PTYPE_UNKNOWN, + /* first 128 bits */ + RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16, + RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, + RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16, + RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16, + RTE_PTYPE_UNKNOWN); + + const __m256i mbuf_init = _mm256_set_epi64x(0, enic->mbuf_initializer, + 0, enic->mbuf_initializer); + + /* + * --- cq desc fields --- offset + * completed_index_flags - 0 use: fcoe + * q_number_rss_type_flags - 2 use: rss types, csum_not_calc + * rss_hash - 4 ==> mbuf.hash.rss + * bytes_written_flags - 8 ==> mbuf.pkt_len,data_len + * use: truncated, vlan_stripped + * vlan - 10 ==> mbuf.vlan_tci + * checksum_fcoe - 12 (unused) + * flags - 14 use: all bits + * type_color - 15 (unused) + * + * --- mbuf fields --- offset + * rearm_data ---- 16 + * data_off - 0 (mbuf_init) -+ + * refcnt - 2 (mbuf_init) | + * nb_segs - 4 (mbuf_init) | 16B 128b + * port - 6 (mbuf_init) | + * ol_flag - 8 (from cqd) -+ + * rx_descriptor_fields1 ---- 32 + * packet_type - 0 (from cqd) -+ + * pkt_len - 4 (from cqd) | + * data_len - 8 (from cqd) | 16B 128b + * vlan_tci - 10 (from cqd) | + * rss - 12 (from cqd) -+ + */ + + __m256i overlay_enabled = + _mm256_set1_epi32((uint32_t)enic->overlay_offload); + + /* Step 2: Process 8 packets per loop using SIMD */ + while (max_rx > 7 && (((cqd + 7)->type_color & + CQ_DESC_COLOR_MASK_NOSHIFT) != color)) { + /* Load 8 16B CQ descriptors */ + __m256i cqd01 = _mm256_load_si256((void *)cqd); + __m256i cqd23 = _mm256_load_si256((void *)(cqd + 2)); + __m256i cqd45 = _mm256_load_si256((void *)(cqd + 4)); + __m256i cqd67 = _mm256_load_si256((void *)(cqd + 6)); + /* Copy 8 mbuf pointers to rx_pkts */ + _mm256_storeu_si256((void *)rx, + _mm256_loadu_si256((void *)rxmb)); + _mm256_storeu_si256((void *)(rx + 4), + _mm256_loadu_si256((void *)(rxmb + 4))); + + /* + * Collect 8 flags (each 32 bits) into one register. + * 4 shuffles, 3 blends, 1 permute for 8 desc: 1 inst/desc + */ + __m256i flags01 = + _mm256_shuffle_epi8(cqd01, flags_shuffle_mask); + /* + * Shuffle above produces 8 x 32-bit flags for 8 descriptors + * in this order: 0, 0, 0, 0, 1, 1, 1, 1 + * The duplicates in each 128-bit lane simplifies blending + * below. + */ + __m256i flags23 = + _mm256_shuffle_epi8(cqd23, flags_shuffle_mask); + __m256i flags45 = + _mm256_shuffle_epi8(cqd45, flags_shuffle_mask); + __m256i flags67 = + _mm256_shuffle_epi8(cqd67, flags_shuffle_mask); + /* 1st blend produces flags for desc: 0, 2, 0, 0, 1, 3, 1, 1 */ + __m256i flags0_3 = _mm256_blend_epi32(flags01, flags23, 0x22); + /* 2nd blend produces flags for desc: 4, 4, 4, 6, 5, 5, 5, 7 */ + __m256i flags4_7 = _mm256_blend_epi32(flags45, flags67, 0x88); + /* 3rd blend produces flags for desc: 0, 2, 4, 6, 1, 3, 5, 7 */ + __m256i flags0_7 = _mm256_blend_epi32(flags0_3, flags4_7, 0xcc); + /* + * Swap to reorder flags in this order: 1, 3, 5, 7, 0, 2, 4, 6 + * This order simplifies blend operations way below that + * produce 'rearm' data for each mbuf. + */ + flags0_7 = _mm256_permute4x64_epi64(flags0_7, + (1 << 6) + (0 << 4) + (3 << 2) + 2); + + /* + * Check truncated bits and bail out early on. + * 6 avx inst, 1 or, 1 if-then-else for 8 desc: 1 inst/desc + */ + __m256i trunc = + _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 17), 31); + trunc = _mm256_add_epi64(trunc, _mm256_permute4x64_epi64(trunc, + (1 << 6) + (0 << 4) + (3 << 2) + 2)); + /* 0:63 contains 1+3+0+2 and 64:127 contains 5+7+4+6 */ + if (_mm256_extract_epi64(trunc, 0) || + _mm256_extract_epi64(trunc, 1)) + break; + + /* + * Compute PKT_RX_RSS_HASH. + * Use 2 shifts and 1 shuffle for 8 desc: 0.375 inst/desc + * RSS types in byte 0, 4, 8, 12, 16, 20, 24, 28 + * Everything else is zero. + */ + __m256i rss_types = + _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 10), 28); + /* + * RSS flags (PKT_RX_RSS_HASH) are in + * byte 0, 4, 8, 12, 16, 20, 24, 28 + * Everything else is zero. + */ + __m256i rss_flags = _mm256_shuffle_epi8(rss_shuffle, rss_types); + + /* + * Compute CKSUM flags. First build the index and then + * use it to shuffle csum_shuffle. + * 20 instructions including const loads: 2.5 inst/desc + */ + /* + * csum_not_calc (bit 22) + * csum_not_calc (0) => 0xffffffff + * csum_not_calc (1) => 0x0 + */ + const __m256i zero4 = _mm256_setzero_si256(); + const __m256i mask22 = _mm256_set1_epi32(0x400000); + __m256i csum_not_calc = _mm256_cmpeq_epi32(zero4, + _mm256_and_si256(flags0_7, mask22)); + /* + * (tcp|udp) && !fragment => bit 1 + * tcp = bit 2, udp = bit 1, frag = bit 6 + */ + const __m256i mask1 = _mm256_set1_epi32(0x2); + __m256i tcp_udp = + _mm256_andnot_si256(_mm256_srli_epi32(flags0_7, 5), + _mm256_or_si256(flags0_7, + _mm256_srli_epi32(flags0_7, 1))); + tcp_udp = _mm256_and_si256(tcp_udp, mask1); + /* ipv4 (bit 5) => bit 2 */ + const __m256i mask2 = _mm256_set1_epi32(0x4); + __m256i ipv4 = _mm256_and_si256(mask2, + _mm256_srli_epi32(flags0_7, 3)); + /* + * ipv4_csum_ok (bit 3) => bit 3 + * tcp_udp_csum_ok (bit 0) => bit 0 + * 0x9 + */ + const __m256i mask0_3 = _mm256_set1_epi32(0x9); + __m256i csum_idx = _mm256_and_si256(flags0_7, mask0_3); + csum_idx = _mm256_and_si256(csum_not_calc, + _mm256_or_si256(_mm256_or_si256(csum_idx, ipv4), + tcp_udp)); + __m256i csum_flags = + _mm256_shuffle_epi8(csum_shuffle, csum_idx); + /* Shift left to restore CKSUM flags. See csum_shuffle. */ + csum_flags = _mm256_slli_epi32(csum_flags, 1); + /* Combine csum flags and offload flags: 0.125 inst/desc */ + rss_flags = _mm256_or_si256(rss_flags, csum_flags); + + /* + * Collect 8 VLAN IDs and compute vlan_id != 0 on each. + * 4 shuffles, 3 blends, 1 permute, 1 cmp, 1 sub for 8 desc: + * 1.25 inst/desc + */ + __m256i vlan01 = _mm256_shuffle_epi8(cqd01, vlan_shuffle_mask); + __m256i vlan23 = _mm256_shuffle_epi8(cqd23, vlan_shuffle_mask); + __m256i vlan45 = _mm256_shuffle_epi8(cqd45, vlan_shuffle_mask); + __m256i vlan67 = _mm256_shuffle_epi8(cqd67, vlan_shuffle_mask); + __m256i vlan0_3 = _mm256_blend_epi32(vlan01, vlan23, 0x22); + __m256i vlan4_7 = _mm256_blend_epi32(vlan45, vlan67, 0x88); + /* desc: 0, 2, 4, 6, 1, 3, 5, 7 */ + __m256i vlan0_7 = _mm256_blend_epi32(vlan0_3, vlan4_7, 0xcc); + /* desc: 1, 3, 5, 7, 0, 2, 4, 6 */ + vlan0_7 = _mm256_permute4x64_epi64(vlan0_7, + (1 << 6) + (0 << 4) + (3 << 2) + 2); + /* + * Compare 0 == vlan_id produces 0xffffffff (-1) if + * vlan 0 and 0 if vlan non-0. Then subtracting the + * result from 0 produces 0 - (-1) = 1 for vlan 0, and + * 0 - 0 = 0 for vlan non-0. + */ + vlan0_7 = _mm256_cmpeq_epi32(zero4, vlan0_7); + /* vlan_id != 0 => 0, vlan_id == 0 => 1 */ + vlan0_7 = _mm256_sub_epi32(zero4, vlan0_7); + + /* + * Compute PKT_RX_VLAN and PKT_RX_VLAN_STRIPPED. + * Use 3 shifts, 1 or, 1 shuffle for 8 desc: 0.625 inst/desc + * VLAN offload flags in byte 0, 4, 8, 12, 16, 20, 24, 28 + * Everything else is zero. + */ + __m256i vlan_idx = + _mm256_or_si256(/* vlan_stripped => bit 0 */ + _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, + 16), 31), + /* (vlan_id == 0) => bit 1 */ + _mm256_slli_epi32(vlan0_7, 1)); + /* + * The index captures 4 cases. + * stripped, id = 0 ==> 11b = 3 + * stripped, id != 0 ==> 01b = 1 + * not strip, id == 0 ==> 10b = 2 + * not strip, id != 0 ==> 00b = 0 + */ + __m256i vlan_flags = _mm256_permutevar8x32_epi32(vlan_shuffle, + vlan_idx); + /* Combine vlan and offload flags: 0.125 inst/desc */ + rss_flags = _mm256_or_si256(rss_flags, vlan_flags); + + /* + * Compute non-tunnel PTYPEs. + * 17 inst / 8 desc = 2.125 inst/desc + */ + /* ETHER and ETHER_VLAN */ + __m256i vlan_ptype = + _mm256_permutevar8x32_epi32(vlan_ptype_shuffle, + vlan_idx); + /* Build the ptype index from flags */ + tcp_udp = _mm256_slli_epi32(flags0_7, 29); + tcp_udp = _mm256_slli_epi32(_mm256_srli_epi32(tcp_udp, 30), 2); + __m256i ip4_ip6 = + _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 26), 30); + __m256i ptype_idx = _mm256_or_si256(tcp_udp, ip4_ip6); + __m256i frag_bit = + _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 25), 31); + __m256i nonfrag_ptype = + _mm256_shuffle_epi8(nonfrag_ptype_shuffle, ptype_idx); + __m256i frag_ptype = + _mm256_shuffle_epi8(frag_ptype_shuffle, ptype_idx); + /* + * Zero out the unwanted types and combine the remaining bits. + * The effect is same as selecting non-frag or frag types + * depending on the frag bit. + */ + nonfrag_ptype = _mm256_and_si256(nonfrag_ptype, + _mm256_cmpeq_epi32(zero4, frag_bit)); + frag_ptype = _mm256_and_si256(frag_ptype, + _mm256_cmpgt_epi32(frag_bit, zero4)); + __m256i ptype = _mm256_or_si256(nonfrag_ptype, frag_ptype); + ptype = _mm256_slli_epi32(ptype, 4); + /* + * Compute tunnel PTYPEs. + * 15 inst / 8 desc = 1.875 inst/desc + */ + __m256i tnl_l3_ptype = + _mm256_shuffle_epi8(tnl_l3_ptype_shuffle, ptype_idx); + tnl_l3_ptype = _mm256_slli_epi32(tnl_l3_ptype, 16); + /* + * Shift non-tunnel L4 types to make them tunnel types. + * RTE_PTYPE_L4_TCP << 16 == RTE_PTYPE_INNER_L4_TCP + */ + __m256i tnl_l4_ptype = + _mm256_slli_epi32(_mm256_and_si256(ptype, + _mm256_set1_epi32(RTE_PTYPE_L4_MASK)), 16); + __m256i tnl_ptype = + _mm256_or_si256(tnl_l3_ptype, tnl_l4_ptype); + tnl_ptype = _mm256_or_si256(tnl_ptype, + _mm256_set1_epi32(RTE_PTYPE_TUNNEL_GRENAT | + RTE_PTYPE_INNER_L2_ETHER)); + /* + * Select non-tunnel or tunnel types by zeroing out the + * unwanted ones. + */ + __m256i tnl_flags = _mm256_and_si256(overlay_enabled, + _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 2), 31)); + tnl_ptype = _mm256_and_si256(tnl_ptype, + _mm256_sub_epi32(zero4, tnl_flags)); + ptype = _mm256_and_si256(ptype, + _mm256_cmpeq_epi32(zero4, tnl_flags)); + /* + * Combine types and swap to have ptypes in the same order + * as desc. + * desc: 0 2 4 6 1 3 5 7 + * 3 inst / 8 desc = 0.375 inst/desc + */ + ptype = _mm256_or_si256(ptype, tnl_ptype); + ptype = _mm256_or_si256(ptype, vlan_ptype); + ptype = _mm256_permute4x64_epi64(ptype, + (1 << 6) + (0 << 4) + (3 << 2) + 2); + + /* + * Mask packet length. + * Use 4 ands: 0.5 instructions/desc + */ + cqd01 = _mm256_and_si256(cqd01, mask); + cqd23 = _mm256_and_si256(cqd23, mask); + cqd45 = _mm256_and_si256(cqd45, mask); + cqd67 = _mm256_and_si256(cqd67, mask); + /* + * Shuffle. Two 16B sets of the mbuf fields. + * packet_type, pkt_len, data_len, vlan_tci, rss + */ + __m256i rearm01 = _mm256_shuffle_epi8(cqd01, shuffle_mask); + __m256i rearm23 = _mm256_shuffle_epi8(cqd23, shuffle_mask); + __m256i rearm45 = _mm256_shuffle_epi8(cqd45, shuffle_mask); + __m256i rearm67 = _mm256_shuffle_epi8(cqd67, shuffle_mask); + + /* + * Blend in ptypes + * 4 blends and 3 shuffles for 8 desc: 0.875 inst/desc + */ + rearm01 = _mm256_blend_epi32(rearm01, ptype, 0x11); + rearm23 = _mm256_blend_epi32(rearm23, + _mm256_shuffle_epi32(ptype, 1), 0x11); + rearm45 = _mm256_blend_epi32(rearm45, + _mm256_shuffle_epi32(ptype, 2), 0x11); + rearm67 = _mm256_blend_epi32(rearm67, + _mm256_shuffle_epi32(ptype, 3), 0x11); + + /* + * Move rss_flags into ol_flags in mbuf_init. + * Use 1 shift and 1 blend for each desc: 2 inst/desc + */ + __m256i mbuf_init4_5 = _mm256_blend_epi32(mbuf_init, + rss_flags, 0x44); + __m256i mbuf_init2_3 = _mm256_blend_epi32(mbuf_init, + _mm256_slli_si256(rss_flags, 4), 0x44); + __m256i mbuf_init0_1 = _mm256_blend_epi32(mbuf_init, + _mm256_slli_si256(rss_flags, 8), 0x44); + __m256i mbuf_init6_7 = _mm256_blend_epi32(mbuf_init, + _mm256_srli_si256(rss_flags, 4), 0x44); + + /* + * Build rearm, one per desc. + * 8 blends and 4 permutes: 1.5 inst/desc + */ + __m256i rearm0 = _mm256_blend_epi32(rearm01, + mbuf_init0_1, 0xf0); + __m256i rearm1 = _mm256_blend_epi32(mbuf_init0_1, + rearm01, 0xf0); + __m256i rearm2 = _mm256_blend_epi32(rearm23, + mbuf_init2_3, 0xf0); + __m256i rearm3 = _mm256_blend_epi32(mbuf_init2_3, + rearm23, 0xf0); + /* Swap upper and lower 64 bits */ + rearm0 = _mm256_permute4x64_epi64(rearm0, + (1 << 6) + (0 << 4) + (3 << 2) + 2); + rearm2 = _mm256_permute4x64_epi64(rearm2, + (1 << 6) + (0 << 4) + (3 << 2) + 2); + /* Second set of 4 descriptors */ + __m256i rearm4 = _mm256_blend_epi32(rearm45, + mbuf_init4_5, 0xf0); + __m256i rearm5 = _mm256_blend_epi32(mbuf_init4_5, + rearm45, 0xf0); + __m256i rearm6 = _mm256_blend_epi32(rearm67, + mbuf_init6_7, 0xf0); + __m256i rearm7 = _mm256_blend_epi32(mbuf_init6_7, + rearm67, 0xf0); + rearm4 = _mm256_permute4x64_epi64(rearm4, + (1 << 6) + (0 << 4) + (3 << 2) + 2); + rearm6 = _mm256_permute4x64_epi64(rearm6, + (1 << 6) + (0 << 4) + (3 << 2) + 2); + + /* + * Write out 32B of mbuf fields. + * data_off - off 0 (mbuf_init) + * refcnt - 2 (mbuf_init) + * nb_segs - 4 (mbuf_init) + * port - 6 (mbuf_init) + * ol_flag - 8 (from cqd) + * packet_type - 16 (from cqd) + * pkt_len - 20 (from cqd) + * data_len - 24 (from cqd) + * vlan_tci - 26 (from cqd) + * rss - 28 (from cqd) + */ + _mm256_storeu_si256((__m256i *)&rxmb[0]->rearm_data, rearm0); + _mm256_storeu_si256((__m256i *)&rxmb[1]->rearm_data, rearm1); + _mm256_storeu_si256((__m256i *)&rxmb[2]->rearm_data, rearm2); + _mm256_storeu_si256((__m256i *)&rxmb[3]->rearm_data, rearm3); + _mm256_storeu_si256((__m256i *)&rxmb[4]->rearm_data, rearm4); + _mm256_storeu_si256((__m256i *)&rxmb[5]->rearm_data, rearm5); + _mm256_storeu_si256((__m256i *)&rxmb[6]->rearm_data, rearm6); + _mm256_storeu_si256((__m256i *)&rxmb[7]->rearm_data, rearm7); + + max_rx -= 8; + cqd += 8; + rx += 8; + rxmb += 8; + } + + /* + * Step 3: Slow path to handle a small (<8) number of packets and + * occasional truncated packets. + */ + while (max_rx && ((cqd->type_color & + CQ_DESC_COLOR_MASK_NOSHIFT) != color)) { + if (unlikely(cqd->bytes_written_flags & + CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) { + rte_pktmbuf_free(*rxmb++); + rte_atomic64_inc(&enic->soft_stats.rx_packet_errors); + } else { + *rx++ = rx_one(cqd, *rxmb++, enic); + } + cqd++; + max_rx--; + } + + /* Number of descriptors visited */ + nb_rx = cqd - (struct cq_enet_rq_desc *)(cq->ring.descs) - cq_idx; + if (nb_rx == 0) + return 0; + rqd = ((struct rq_enet_desc *)rq->ring.descs) + cq_idx; + rxmb = rq->mbuf_ring + cq_idx; + cq_idx += nb_rx; + rq->rx_nb_hold += nb_rx; + if (unlikely(cq_idx == cq->ring.desc_count)) { + cq_idx = 0; + cq->last_color ^= CQ_DESC_COLOR_MASK_NOSHIFT; + } + cq->to_clean = cq_idx; + + /* Step 4: Restock RQ with new mbufs */ + memcpy(rxmb, rq->free_mbufs + ENIC_RX_BURST_MAX - rq->num_free_mbufs, + sizeof(struct rte_mbuf *) * nb_rx); + rq->num_free_mbufs -= nb_rx; + while (nb_rx) { + rqd->address = (*rxmb)->buf_iova + RTE_PKTMBUF_HEADROOM; + nb_rx--; + rqd++; + rxmb++; + } + if (rq->rx_nb_hold > rq->rx_free_thresh) { + rq->posted_index = enic_ring_add(rq->ring.desc_count, + rq->posted_index, + rq->rx_nb_hold); + rq->rx_nb_hold = 0; + rte_wmb(); + iowrite32_relaxed(rq->posted_index, + &rq->ctrl->posted_index); + } + + return rx - rx_pkts; +} + +bool +enic_use_vector_rx_handler(struct enic *enic) +{ + struct rte_eth_dev *eth_dev; + struct rte_fdir_conf *fconf; + + eth_dev = enic->rte_dev; + /* User needs to request for the avx2 handler */ + if (!enic->enable_avx2_rx) + return false; + /* Do not support scatter Rx */ + if (!(enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0)) + return false; + /* Do not support fdir/flow */ + fconf = ð_dev->data->dev_conf.fdir_conf; + if (fconf->mode != RTE_FDIR_MODE_NONE) + return false; + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) { + PMD_INIT_LOG(DEBUG, " use the non-scatter avx2 Rx handler"); + eth_dev->rx_pkt_burst = &enic_noscatter_vec_recv_pkts; + return true; + } + return false; +} diff --git a/drivers/net/enic/meson.build b/drivers/net/enic/meson.build index bfd4e237..06448711 100644 --- a/drivers/net/enic/meson.build +++ b/drivers/net/enic/meson.build @@ -17,3 +17,19 @@ sources = files( ) deps += ['hash'] includes += include_directories('base') + +# The current implementation assumes 64-bit pointers +if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') and cc.sizeof('void *') == 8 + sources += files('enic_rxtx_vec_avx2.c') +# Build the avx2 handler if the compiler supports it, even though 'machine' +# does not. This is to support users who build for the min supported machine +# and need to run the binary on newer CPUs too. +# This part is from i40e meson.build +elif cc.has_argument('-mavx2') and cc.sizeof('void *') == 8 + enic_avx2_lib = static_library('enic_avx2_lib', + 'enic_rxtx_vec_avx2.c', + dependencies: [static_rte_ethdev, static_rte_bus_pci], + include_directories: includes, + c_args: [cflags, '-mavx2']) + objs += enic_avx2_lib.extract_objects('enic_rxtx_vec_avx2.c') +endif |