aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/enic
diff options
context:
space:
mode:
authorLuca Boccassi <luca.boccassi@gmail.com>2018-11-01 11:59:50 +0000
committerLuca Boccassi <luca.boccassi@gmail.com>2018-11-01 12:00:19 +0000
commit8d01b9cd70a67cdafd5b965a70420c3bd7fb3f82 (patch)
tree208e3bc33c220854d89d010e3abf720a2e62e546 /drivers/net/enic
parentb63264c8342e6a1b6971c79550d2af2024b6a4de (diff)
New upstream version 18.11-rc1upstream/18.11-rc1
Change-Id: Iaa71986dd6332e878d8f4bf493101b2bbc6313bb Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'drivers/net/enic')
-rw-r--r--drivers/net/enic/Makefile28
-rw-r--r--drivers/net/enic/base/vnic_dev.c105
-rw-r--r--drivers/net/enic/base/vnic_dev.h8
-rw-r--r--drivers/net/enic/base/vnic_devcmd.h72
-rw-r--r--drivers/net/enic/enic.h12
-rw-r--r--drivers/net/enic/enic_ethdev.c61
-rw-r--r--drivers/net/enic/enic_flow.c180
-rw-r--r--drivers/net/enic/enic_main.c81
-rw-r--r--drivers/net/enic/enic_res.c11
-rw-r--r--drivers/net/enic/enic_rxtx.c286
-rw-r--r--drivers/net/enic/enic_rxtx_common.h271
-rw-r--r--drivers/net/enic/enic_rxtx_vec_avx2.c831
-rw-r--r--drivers/net/enic/meson.build16
13 files changed, 1645 insertions, 317 deletions
diff --git a/drivers/net/enic/Makefile b/drivers/net/enic/Makefile
index 7c6c29cc..e39e4763 100644
--- a/drivers/net/enic/Makefile
+++ b/drivers/net/enic/Makefile
@@ -39,4 +39,32 @@ SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_intr.c
SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_rq.c
SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_rss.c
+# The current implementation assumes 64-bit pointers
+CC_AVX2_SUPPORT=0
+ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
+# Figure out if the compiler supports avx2. The extra check using
+# -march=core-avx2 is necessary to support users who build for the
+# 'default' machine (corei7 which has no avx2) and run the binary on
+# newer CPUs that have avx2.
+# This part is verbatim from i40e makefile.
+ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2)
+ CC_AVX2_SUPPORT=1
+else
+ CC_AVX2_SUPPORT=\
+ $(shell $(CC) -march=core-avx2 -dM -E - </dev/null 2>&1 | \
+ grep -q AVX2 && echo 1)
+ ifeq ($(CC_AVX2_SUPPORT), 1)
+ ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y)
+ CFLAGS_enic_rxtx_vec_avx2.o += -march=core-avx2
+ else
+ CFLAGS_enic_rxtx_vec_avx2.o += -mavx2
+ endif
+ endif
+endif
+endif
+
+ifeq ($(CC_AVX2_SUPPORT), 1)
+ SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic_rxtx_vec_avx2.c
+endif
+
include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/enic/base/vnic_dev.c b/drivers/net/enic/base/vnic_dev.c
index 16e8814a..fd303fec 100644
--- a/drivers/net/enic/base/vnic_dev.c
+++ b/drivers/net/enic/base/vnic_dev.c
@@ -57,6 +57,9 @@ struct vnic_dev {
void (*free_consistent)(void *priv,
size_t size, void *vaddr,
dma_addr_t dma_handle);
+ struct vnic_counter_counts *flow_counters;
+ dma_addr_t flow_counters_pa;
+ u8 flow_counters_dma_active;
};
#define VNIC_MAX_RES_HDR_SIZE \
@@ -64,6 +67,8 @@ struct vnic_dev {
sizeof(struct vnic_resource) * RES_TYPE_MAX)
#define VNIC_RES_STRIDE 128
+#define VNIC_MAX_FLOW_COUNTERS 2048
+
void *vnic_dev_priv(struct vnic_dev *vdev)
{
return vdev->priv;
@@ -611,6 +616,35 @@ int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats)
return vnic_dev_cmd(vdev, CMD_STATS_DUMP, &a0, &a1, wait);
}
+/*
+ * Configure counter DMA
+ */
+int vnic_dev_counter_dma_cfg(struct vnic_dev *vdev, u32 period,
+ u32 num_counters)
+{
+ u64 args[3];
+ int wait = 1000;
+ int err;
+
+ if (num_counters > VNIC_MAX_FLOW_COUNTERS)
+ return -ENOMEM;
+ if (period > 0 && (period < VNIC_COUNTER_DMA_MIN_PERIOD ||
+ num_counters == 0))
+ return -EINVAL;
+
+ args[0] = num_counters;
+ args[1] = vdev->flow_counters_pa;
+ args[2] = period;
+ err = vnic_dev_cmd_args(vdev, CMD_COUNTER_DMA_CONFIG, args, 3, wait);
+
+ /* record if DMAs need to be stopped on close */
+ if (!err)
+ vdev->flow_counters_dma_active = (num_counters != 0 &&
+ period != 0);
+
+ return err;
+}
+
int vnic_dev_close(struct vnic_dev *vdev)
{
u64 a0 = 0, a1 = 0;
@@ -939,6 +973,24 @@ int vnic_dev_alloc_stats_mem(struct vnic_dev *vdev)
return vdev->stats == NULL ? -ENOMEM : 0;
}
+/*
+ * Initialize for up to VNIC_MAX_FLOW_COUNTERS
+ */
+int vnic_dev_alloc_counter_mem(struct vnic_dev *vdev)
+{
+ char name[NAME_MAX];
+ static u32 instance;
+
+ snprintf((char *)name, sizeof(name), "vnic_flow_ctrs-%u", instance++);
+ vdev->flow_counters = vdev->alloc_consistent(vdev->priv,
+ sizeof(struct vnic_counter_counts)
+ * VNIC_MAX_FLOW_COUNTERS,
+ &vdev->flow_counters_pa,
+ (u8 *)name);
+ vdev->flow_counters_dma_active = 0;
+ return vdev->flow_counters == NULL ? -ENOMEM : 0;
+}
+
void vnic_dev_unregister(struct vnic_dev *vdev)
{
if (vdev) {
@@ -951,6 +1003,16 @@ void vnic_dev_unregister(struct vnic_dev *vdev)
vdev->free_consistent(vdev->priv,
sizeof(struct vnic_stats),
vdev->stats, vdev->stats_pa);
+ if (vdev->flow_counters) {
+ /* turn off counter DMAs before freeing memory */
+ if (vdev->flow_counters_dma_active)
+ vnic_dev_counter_dma_cfg(vdev, 0, 0);
+
+ vdev->free_consistent(vdev->priv,
+ sizeof(struct vnic_counter_counts)
+ * VNIC_MAX_FLOW_COUNTERS,
+ vdev->flow_counters, vdev->flow_counters_pa);
+ }
if (vdev->fw_info)
vdev->free_consistent(vdev->priv,
sizeof(struct vnic_devcmd_fw_info),
@@ -1094,3 +1156,46 @@ int vnic_dev_capable_vxlan(struct vnic_dev *vdev)
(a1 & (FEATURE_VXLAN_IPV6 | FEATURE_VXLAN_MULTI_WQ)) ==
(FEATURE_VXLAN_IPV6 | FEATURE_VXLAN_MULTI_WQ);
}
+
+bool vnic_dev_counter_alloc(struct vnic_dev *vdev, uint32_t *idx)
+{
+ u64 a0 = 0;
+ u64 a1 = 0;
+ int wait = 1000;
+
+ if (vnic_dev_cmd(vdev, CMD_COUNTER_ALLOC, &a0, &a1, wait))
+ return false;
+ *idx = (uint32_t)a0;
+ return true;
+}
+
+bool vnic_dev_counter_free(struct vnic_dev *vdev, uint32_t idx)
+{
+ u64 a0 = idx;
+ u64 a1 = 0;
+ int wait = 1000;
+
+ return vnic_dev_cmd(vdev, CMD_COUNTER_FREE, &a0, &a1,
+ wait) == 0;
+}
+
+bool vnic_dev_counter_query(struct vnic_dev *vdev, uint32_t idx,
+ bool reset, uint64_t *packets, uint64_t *bytes)
+{
+ u64 a0 = idx;
+ u64 a1 = reset ? 1 : 0;
+ int wait = 1000;
+
+ if (reset) {
+ /* query/reset returns updated counters */
+ if (vnic_dev_cmd(vdev, CMD_COUNTER_QUERY, &a0, &a1, wait))
+ return false;
+ *packets = a0;
+ *bytes = a1;
+ } else {
+ /* Get values DMA'd from the adapter */
+ *packets = vdev->flow_counters[idx].vcc_packets;
+ *bytes = vdev->flow_counters[idx].vcc_bytes;
+ }
+ return true;
+}
diff --git a/drivers/net/enic/base/vnic_dev.h b/drivers/net/enic/base/vnic_dev.h
index 270a47bd..de2645c4 100644
--- a/drivers/net/enic/base/vnic_dev.h
+++ b/drivers/net/enic/base/vnic_dev.h
@@ -118,6 +118,8 @@ int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset, size_t size,
void *value);
int vnic_dev_stats_clear(struct vnic_dev *vdev);
int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats);
+int vnic_dev_counter_dma_cfg(struct vnic_dev *vdev, u32 period,
+ u32 num_counters);
int vnic_dev_hang_notify(struct vnic_dev *vdev);
int vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast,
int broadcast, int promisc, int allmulti);
@@ -170,6 +172,7 @@ struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev,
unsigned int num_bars);
struct rte_pci_device *vnic_dev_get_pdev(struct vnic_dev *vdev);
int vnic_dev_alloc_stats_mem(struct vnic_dev *vdev);
+int vnic_dev_alloc_counter_mem(struct vnic_dev *vdev);
int vnic_dev_cmd_init(struct vnic_dev *vdev, int fallback);
int vnic_dev_get_size(void);
int vnic_dev_int13(struct vnic_dev *vdev, u64 arg, u32 op);
@@ -187,4 +190,9 @@ int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev,
int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
u16 vxlan_udp_port_number);
int vnic_dev_capable_vxlan(struct vnic_dev *vdev);
+bool vnic_dev_counter_alloc(struct vnic_dev *vdev, uint32_t *idx);
+bool vnic_dev_counter_free(struct vnic_dev *vdev, uint32_t idx);
+bool vnic_dev_counter_query(struct vnic_dev *vdev, uint32_t idx,
+ bool reset, uint64_t *packets, uint64_t *bytes);
+
#endif /* _VNIC_DEV_H_ */
diff --git a/drivers/net/enic/base/vnic_devcmd.h b/drivers/net/enic/base/vnic_devcmd.h
index a22d8a76..3aad2dbd 100644
--- a/drivers/net/enic/base/vnic_devcmd.h
+++ b/drivers/net/enic/base/vnic_devcmd.h
@@ -598,6 +598,48 @@ enum vnic_devcmd_cmd {
* a3 = bitmask of supported actions
*/
CMD_ADD_ADV_FILTER = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 77),
+
+ /*
+ * Allocate a counter for use with CMD_ADD_FILTER
+ * out:(u32) a0 = counter index
+ */
+ CMD_COUNTER_ALLOC = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ENET, 85),
+
+ /*
+ * Free a counter
+ * in: (u32) a0 = counter_id
+ */
+ CMD_COUNTER_FREE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 86),
+
+ /*
+ * Read a counter
+ * in: (u32) a0 = counter_id
+ * (u32) a1 = clear counter if non-zero
+ * out:(u64) a0 = packet count
+ * (u64) a1 = byte count
+ */
+ CMD_COUNTER_QUERY = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 87),
+
+ /*
+ * Configure periodic counter DMA. This will trigger an immediate
+ * DMA of the counters (unless period == 0), and then schedule a DMA
+ * of the counters every <period> seconds until disdabled.
+ * Each new COUNTER_DMA_CONFIG will override all previous commands on
+ * this vnic.
+ * Setting a2 (period) = 0 will disable periodic DMAs
+ * If a0 (num_counters) != 0, an immediate DMA will always be done,
+ * irrespective of the value in a2.
+ * in: (u32) a0 = number of counters to DMA
+ * (u64) a1 = host target DMA address
+ * (u32) a2 = DMA period in milliseconds (0 to disable)
+ */
+ CMD_COUNTER_DMA_CONFIG = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 88),
+#define VNIC_COUNTER_DMA_MIN_PERIOD 500
+
+ /*
+ * Clear all counters on a vnic
+ */
+ CMD_COUNTER_CLEAR_ALL = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ENET, 89),
};
/* Modes for exchanging advanced filter capabilities. The modes supported by
@@ -863,9 +905,11 @@ struct filter_action {
#define FILTER_ACTION_RQ_STEERING_FLAG (1 << 0)
#define FILTER_ACTION_FILTER_ID_FLAG (1 << 1)
#define FILTER_ACTION_DROP_FLAG (1 << 2)
+#define FILTER_ACTION_COUNTER_FLAG (1 << 3)
#define FILTER_ACTION_V2_ALL (FILTER_ACTION_RQ_STEERING_FLAG \
+ | FILTER_ACTION_FILTER_ID_FLAG \
| FILTER_ACTION_DROP_FLAG \
- | FILTER_ACTION_FILTER_ID_FLAG)
+ | FILTER_ACTION_COUNTER_FLAG)
/* Version 2 of filter action must be a strict extension of struct filter_action
* where the first fields exactly match in size and meaning.
@@ -875,7 +919,8 @@ struct filter_action_v2 {
u32 rq_idx;
u32 flags; /* use FILTER_ACTION_XXX_FLAG defines */
u16 filter_id;
- u_int8_t reserved[32]; /* for future expansion */
+ u32 counter_index;
+ uint8_t reserved[28]; /* for future expansion */
} __attribute__((packed));
/* Specifies the filter type. */
@@ -941,9 +986,9 @@ enum {
};
struct filter_tlv {
- u_int32_t type;
- u_int32_t length;
- u_int32_t val[0];
+ uint32_t type;
+ uint32_t length;
+ uint32_t val[0];
};
/* Data for CMD_ADD_FILTER is 2 TLV and filter + action structs */
@@ -957,10 +1002,10 @@ struct filter_tlv {
* drivers should use this instead of "sizeof (struct filter_v2)" when
* computing length for TLV.
*/
-static inline u_int32_t
+static inline uint32_t
vnic_filter_size(struct filter_v2 *fp)
{
- u_int32_t size;
+ uint32_t size;
switch (fp->type) {
case FILTER_USNIC_ID:
@@ -999,10 +1044,10 @@ enum {
* drivers should use this instead of "sizeof (struct filter_action_v2)"
* when computing length for TLV.
*/
-static inline u_int32_t
+static inline uint32_t
vnic_action_size(struct filter_action_v2 *fap)
{
- u_int32_t size;
+ uint32_t size;
switch (fap->type) {
case FILTER_ACTION_RQ_STEERING:
@@ -1122,4 +1167,13 @@ typedef enum {
GRPINTR_UPD_VECT,
} grpintr_subcmd_t;
+/*
+ * Structure for counter DMA
+ * (DMAed by CMD_COUNTER_DMA_CONFIG)
+ */
+struct vnic_counter_counts {
+ u64 vcc_packets;
+ u64 vcc_bytes;
+};
+
#endif /* _VNIC_DEVCMD_H_ */
diff --git a/drivers/net/enic/enic.h b/drivers/net/enic/enic.h
index 7c27bd51..7bca3cad 100644
--- a/drivers/net/enic/enic.h
+++ b/drivers/net/enic/enic.h
@@ -39,6 +39,9 @@
#define PAGE_ROUND_UP(x) \
((((unsigned long)(x)) + ENIC_PAGE_SIZE-1) & (~(ENIC_PAGE_SIZE-1)))
+/* must be >= VNIC_COUNTER_DMA_MIN_PERIOD */
+#define VNIC_FLOW_COUNTER_UPDATE_MSECS 500
+
#define ENICPMD_VFIO_PATH "/dev/vfio/vfio"
/*#define ENIC_DESC_COUNT_MAKE_ODD (x) do{if ((~(x)) & 1) { (x)--; } }while(0)*/
@@ -94,6 +97,7 @@ struct rte_flow {
LIST_ENTRY(rte_flow) next;
u16 enic_filter_id;
struct filter_v2 enic_filter;
+ int counter_idx; /* NIC allocated counter index (-1 = invalid) */
};
/* Per-instance private data structure */
@@ -104,6 +108,11 @@ struct enic {
struct vnic_dev_bar bar0;
struct vnic_dev *vdev;
+ /*
+ * mbuf_initializer contains 64 bits of mbuf rearm_data, used by
+ * the avx2 handler at this time.
+ */
+ uint64_t mbuf_initializer;
unsigned int port_id;
bool overlay_offload;
struct rte_eth_dev *rte_dev;
@@ -126,6 +135,7 @@ struct enic {
u8 filter_actions; /* HW supported actions */
bool vxlan;
bool disable_overlay; /* devargs disable_overlay=1 */
+ uint8_t enable_avx2_rx; /* devargs enable-avx2-rx=1 */
bool nic_cfg_chk; /* NIC_CFG_CHK available */
bool udp_rss_weak; /* Bodega style UDP RSS */
uint8_t ig_vlan_rewrite_mode; /* devargs ig-vlan-rewrite */
@@ -165,6 +175,7 @@ struct enic {
rte_spinlock_t mtu_lock;
LIST_HEAD(enic_flows, rte_flow) flows;
+ int max_flow_counter;
rte_spinlock_t flows_lock;
/* RSS */
@@ -326,6 +337,7 @@ uint16_t enic_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
int enic_set_mtu(struct enic *enic, uint16_t new_mtu);
int enic_link_update(struct enic *enic);
+bool enic_use_vector_rx_handler(struct enic *enic);
void enic_fdir_info(struct enic *enic);
void enic_fdir_info_get(struct enic *enic, struct rte_eth_fdir_info *stats);
void copy_fltr_v1(struct filter_v2 *fltr, struct rte_eth_fdir_input *input,
diff --git a/drivers/net/enic/enic_ethdev.c b/drivers/net/enic/enic_ethdev.c
index b3d57771..996bb554 100644
--- a/drivers/net/enic/enic_ethdev.c
+++ b/drivers/net/enic/enic_ethdev.c
@@ -37,6 +37,7 @@ static const struct rte_pci_id pci_id_enic_map[] = {
};
#define ENIC_DEVARG_DISABLE_OVERLAY "disable-overlay"
+#define ENIC_DEVARG_ENABLE_AVX2_RX "enable-avx2-rx"
#define ENIC_DEVARG_IG_VLAN_REWRITE "ig-vlan-rewrite"
RTE_INIT(enicpmd_init_log)
@@ -521,10 +522,34 @@ static const uint32_t *enicpmd_dev_supported_ptypes_get(struct rte_eth_dev *dev)
RTE_PTYPE_L4_NONFRAG,
RTE_PTYPE_UNKNOWN
};
+ static const uint32_t ptypes_overlay[] = {
+ RTE_PTYPE_L2_ETHER,
+ RTE_PTYPE_L2_ETHER_VLAN,
+ RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
+ RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
+ RTE_PTYPE_L4_TCP,
+ RTE_PTYPE_L4_UDP,
+ RTE_PTYPE_L4_FRAG,
+ RTE_PTYPE_L4_NONFRAG,
+ RTE_PTYPE_TUNNEL_GRENAT,
+ RTE_PTYPE_INNER_L2_ETHER,
+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
+ RTE_PTYPE_INNER_L4_TCP,
+ RTE_PTYPE_INNER_L4_UDP,
+ RTE_PTYPE_INNER_L4_FRAG,
+ RTE_PTYPE_INNER_L4_NONFRAG,
+ RTE_PTYPE_UNKNOWN
+ };
- if (dev->rx_pkt_burst == enic_recv_pkts ||
- dev->rx_pkt_burst == enic_noscatter_recv_pkts)
- return ptypes;
+ if (dev->rx_pkt_burst != enic_dummy_recv_pkts &&
+ dev->rx_pkt_burst != NULL) {
+ struct enic *enic = pmd_priv(dev);
+ if (enic->overlay_offload)
+ return ptypes_overlay;
+ else
+ return ptypes;
+ }
return NULL;
}
@@ -915,22 +940,27 @@ static const struct eth_dev_ops enicpmd_eth_dev_ops = {
.udp_tunnel_port_del = enicpmd_dev_udp_tunnel_port_del,
};
-static int enic_parse_disable_overlay(__rte_unused const char *key,
- const char *value,
- void *opaque)
+static int enic_parse_zero_one(const char *key,
+ const char *value,
+ void *opaque)
{
struct enic *enic;
+ bool b;
enic = (struct enic *)opaque;
if (strcmp(value, "0") == 0) {
- enic->disable_overlay = false;
+ b = false;
} else if (strcmp(value, "1") == 0) {
- enic->disable_overlay = true;
+ b = true;
} else {
- dev_err(enic, "Invalid value for " ENIC_DEVARG_DISABLE_OVERLAY
- ": expected=0|1 given=%s\n", value);
+ dev_err(enic, "Invalid value for %s"
+ ": expected=0|1 given=%s\n", key, value);
return -EINVAL;
}
+ if (strcmp(key, ENIC_DEVARG_DISABLE_OVERLAY) == 0)
+ enic->disable_overlay = b;
+ if (strcmp(key, ENIC_DEVARG_ENABLE_AVX2_RX) == 0)
+ enic->enable_avx2_rx = b;
return 0;
}
@@ -971,6 +1001,7 @@ static int enic_check_devargs(struct rte_eth_dev *dev)
{
static const char *const valid_keys[] = {
ENIC_DEVARG_DISABLE_OVERLAY,
+ ENIC_DEVARG_ENABLE_AVX2_RX,
ENIC_DEVARG_IG_VLAN_REWRITE,
NULL};
struct enic *enic = pmd_priv(dev);
@@ -979,6 +1010,7 @@ static int enic_check_devargs(struct rte_eth_dev *dev)
ENICPMD_FUNC_TRACE();
enic->disable_overlay = false;
+ enic->enable_avx2_rx = false;
enic->ig_vlan_rewrite_mode = IG_VLAN_REWRITE_MODE_PASS_THRU;
if (!dev->device->devargs)
return 0;
@@ -986,7 +1018,9 @@ static int enic_check_devargs(struct rte_eth_dev *dev)
if (!kvlist)
return -EINVAL;
if (rte_kvargs_process(kvlist, ENIC_DEVARG_DISABLE_OVERLAY,
- enic_parse_disable_overlay, enic) < 0 ||
+ enic_parse_zero_one, enic) < 0 ||
+ rte_kvargs_process(kvlist, ENIC_DEVARG_ENABLE_AVX2_RX,
+ enic_parse_zero_one, enic) < 0 ||
rte_kvargs_process(kvlist, ENIC_DEVARG_IG_VLAN_REWRITE,
enic_parse_ig_vlan_rewrite, enic) < 0) {
rte_kvargs_free(kvlist);
@@ -996,7 +1030,6 @@ static int enic_check_devargs(struct rte_eth_dev *dev)
return 0;
}
-struct enic *enicpmd_list_head = NULL;
/* Initialize the driver
* It returns 0 on success.
*/
@@ -1044,7 +1077,8 @@ static int eth_enic_pci_remove(struct rte_pci_device *pci_dev)
static struct rte_pci_driver rte_enic_pmd = {
.id_table = pci_id_enic_map,
- .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+ .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+ RTE_PCI_DRV_IOVA_AS_VA,
.probe = eth_enic_pci_probe,
.remove = eth_enic_pci_remove,
};
@@ -1054,4 +1088,5 @@ RTE_PMD_REGISTER_PCI_TABLE(net_enic, pci_id_enic_map);
RTE_PMD_REGISTER_KMOD_DEP(net_enic, "* igb_uio | uio_pci_generic | vfio-pci");
RTE_PMD_REGISTER_PARAM_STRING(net_enic,
ENIC_DEVARG_DISABLE_OVERLAY "=0|1 "
+ ENIC_DEVARG_ENABLE_AVX2_RX "=0|1 "
ENIC_DEVARG_IG_VLAN_REWRITE "=trunk|untag|priority|pass");
diff --git a/drivers/net/enic/enic_flow.c b/drivers/net/enic/enic_flow.c
index 0cf04aef..bb9ed037 100644
--- a/drivers/net/enic/enic_flow.c
+++ b/drivers/net/enic/enic_flow.c
@@ -289,6 +289,15 @@ static const enum rte_flow_action_type enic_supported_actions_v2_drop[] = {
RTE_FLOW_ACTION_TYPE_END,
};
+static const enum rte_flow_action_type enic_supported_actions_v2_count[] = {
+ RTE_FLOW_ACTION_TYPE_QUEUE,
+ RTE_FLOW_ACTION_TYPE_MARK,
+ RTE_FLOW_ACTION_TYPE_FLAG,
+ RTE_FLOW_ACTION_TYPE_DROP,
+ RTE_FLOW_ACTION_TYPE_COUNT,
+ RTE_FLOW_ACTION_TYPE_END,
+};
+
/** Action capabilities indexed by NIC version information */
static const struct enic_action_cap enic_action_cap[] = {
[FILTER_ACTION_RQ_STEERING_FLAG] = {
@@ -303,6 +312,10 @@ static const struct enic_action_cap enic_action_cap[] = {
.actions = enic_supported_actions_v2_drop,
.copy_fn = enic_copy_action_v2,
},
+ [FILTER_ACTION_COUNTER_FLAG] = {
+ .actions = enic_supported_actions_v2_count,
+ .copy_fn = enic_copy_action_v2,
+ },
};
static int
@@ -1068,6 +1081,10 @@ enic_copy_action_v2(const struct rte_flow_action actions[],
enic_action->flags |= FILTER_ACTION_DROP_FLAG;
break;
}
+ case RTE_FLOW_ACTION_TYPE_COUNT: {
+ enic_action->flags |= FILTER_ACTION_COUNTER_FLAG;
+ break;
+ }
case RTE_FLOW_ACTION_TYPE_VOID:
continue;
default:
@@ -1112,7 +1129,9 @@ enic_get_action_cap(struct enic *enic)
uint8_t actions;
actions = enic->filter_actions;
- if (actions & FILTER_ACTION_DROP_FLAG)
+ if (actions & FILTER_ACTION_COUNTER_FLAG)
+ ea = &enic_action_cap[FILTER_ACTION_COUNTER_FLAG];
+ else if (actions & FILTER_ACTION_DROP_FLAG)
ea = &enic_action_cap[FILTER_ACTION_DROP_FLAG];
else if (actions & FILTER_ACTION_FILTER_ID_FLAG)
ea = &enic_action_cap[FILTER_ACTION_FILTER_ID_FLAG];
@@ -1395,8 +1414,10 @@ enic_flow_add_filter(struct enic *enic, struct filter_v2 *enic_filter,
struct rte_flow_error *error)
{
struct rte_flow *flow;
- int ret;
- u16 entry;
+ int err;
+ uint16_t entry;
+ int ctr_idx;
+ int last_max_flow_ctr;
FLOW_TRACE();
@@ -1407,20 +1428,64 @@ enic_flow_add_filter(struct enic *enic, struct filter_v2 *enic_filter,
return NULL;
}
+ flow->counter_idx = -1;
+ last_max_flow_ctr = -1;
+ if (enic_action->flags & FILTER_ACTION_COUNTER_FLAG) {
+ if (!vnic_dev_counter_alloc(enic->vdev, (uint32_t *)&ctr_idx)) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ NULL, "cannot allocate counter");
+ goto unwind_flow_alloc;
+ }
+ flow->counter_idx = ctr_idx;
+ enic_action->counter_index = ctr_idx;
+
+ /* If index is the largest, increase the counter DMA size */
+ if (ctr_idx > enic->max_flow_counter) {
+ err = vnic_dev_counter_dma_cfg(enic->vdev,
+ VNIC_FLOW_COUNTER_UPDATE_MSECS,
+ ctr_idx + 1);
+ if (err) {
+ rte_flow_error_set(error, -err,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ NULL, "counter DMA config failed");
+ goto unwind_ctr_alloc;
+ }
+ last_max_flow_ctr = enic->max_flow_counter;
+ enic->max_flow_counter = ctr_idx;
+ }
+ }
+
/* entry[in] is the queue id, entry[out] is the filter Id for delete */
entry = enic_action->rq_idx;
- ret = vnic_dev_classifier(enic->vdev, CLSF_ADD, &entry, enic_filter,
+ err = vnic_dev_classifier(enic->vdev, CLSF_ADD, &entry, enic_filter,
enic_action);
- if (!ret) {
- flow->enic_filter_id = entry;
- flow->enic_filter = *enic_filter;
- } else {
- rte_flow_error_set(error, ret, RTE_FLOW_ERROR_TYPE_HANDLE,
+ if (err) {
+ rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_HANDLE,
NULL, "vnic_dev_classifier error");
- rte_free(flow);
- return NULL;
+ goto unwind_ctr_dma_cfg;
}
+
+ flow->enic_filter_id = entry;
+ flow->enic_filter = *enic_filter;
+
return flow;
+
+/* unwind if there are errors */
+unwind_ctr_dma_cfg:
+ if (last_max_flow_ctr != -1) {
+ /* reduce counter DMA size */
+ vnic_dev_counter_dma_cfg(enic->vdev,
+ VNIC_FLOW_COUNTER_UPDATE_MSECS,
+ last_max_flow_ctr + 1);
+ enic->max_flow_counter = last_max_flow_ctr;
+ }
+unwind_ctr_alloc:
+ if (flow->counter_idx != -1)
+ vnic_dev_counter_free(enic->vdev, ctr_idx);
+unwind_flow_alloc:
+ rte_free(flow);
+ return NULL;
}
/**
@@ -1435,18 +1500,29 @@ enic_flow_add_filter(struct enic *enic, struct filter_v2 *enic_filter,
* @param error[out]
*/
static int
-enic_flow_del_filter(struct enic *enic, u16 filter_id,
+enic_flow_del_filter(struct enic *enic, struct rte_flow *flow,
struct rte_flow_error *error)
{
- int ret;
+ u16 filter_id;
+ int err;
FLOW_TRACE();
- ret = vnic_dev_classifier(enic->vdev, CLSF_DEL, &filter_id, NULL, NULL);
- if (!ret)
- rte_flow_error_set(error, ret, RTE_FLOW_ERROR_TYPE_HANDLE,
+ filter_id = flow->enic_filter_id;
+ err = vnic_dev_classifier(enic->vdev, CLSF_DEL, &filter_id, NULL, NULL);
+ if (err) {
+ rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_HANDLE,
NULL, "vnic_dev_classifier failed");
- return ret;
+ return -err;
+ }
+
+ if (flow->counter_idx != -1) {
+ if (!vnic_dev_counter_free(enic->vdev, flow->counter_idx))
+ dev_err(enic, "counter free failed, idx: %d\n",
+ flow->counter_idx);
+ flow->counter_idx = -1;
+ }
+ return 0;
}
/*
@@ -1529,9 +1605,10 @@ enic_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow,
FLOW_TRACE();
rte_spinlock_lock(&enic->flows_lock);
- enic_flow_del_filter(enic, flow->enic_filter_id, error);
+ enic_flow_del_filter(enic, flow, error);
LIST_REMOVE(flow, next);
rte_spinlock_unlock(&enic->flows_lock);
+ rte_free(flow);
return 0;
}
@@ -1553,13 +1630,77 @@ enic_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
while (!LIST_EMPTY(&enic->flows)) {
flow = LIST_FIRST(&enic->flows);
- enic_flow_del_filter(enic, flow->enic_filter_id, error);
+ enic_flow_del_filter(enic, flow, error);
LIST_REMOVE(flow, next);
+ rte_free(flow);
}
rte_spinlock_unlock(&enic->flows_lock);
return 0;
}
+static int
+enic_flow_query_count(struct rte_eth_dev *dev,
+ struct rte_flow *flow, void *data,
+ struct rte_flow_error *error)
+{
+ struct enic *enic = pmd_priv(dev);
+ struct rte_flow_query_count *query;
+ uint64_t packets, bytes;
+
+ FLOW_TRACE();
+
+ if (flow->counter_idx == -1) {
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "flow does not have counter");
+ }
+ query = (struct rte_flow_query_count *)data;
+ if (!vnic_dev_counter_query(enic->vdev, flow->counter_idx,
+ !!query->reset, &packets, &bytes)) {
+ return rte_flow_error_set
+ (error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "cannot read counter");
+ }
+ query->hits_set = 1;
+ query->bytes_set = 1;
+ query->hits = packets;
+ query->bytes = bytes;
+ return 0;
+}
+
+static int
+enic_flow_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_action *actions,
+ void *data,
+ struct rte_flow_error *error)
+{
+ int ret = 0;
+
+ FLOW_TRACE();
+
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+ switch (actions->type) {
+ case RTE_FLOW_ACTION_TYPE_VOID:
+ break;
+ case RTE_FLOW_ACTION_TYPE_COUNT:
+ ret = enic_flow_query_count(dev, flow, data, error);
+ break;
+ default:
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "action not supported");
+ }
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
/**
* Flow callback registration.
*
@@ -1570,4 +1711,5 @@ const struct rte_flow_ops enic_flow_ops = {
.create = enic_flow_create,
.destroy = enic_flow_destroy,
.flush = enic_flow_flush,
+ .query = enic_flow_query,
};
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index fd940c58..e81c3f3b 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -514,12 +514,29 @@ static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
}
}
+/*
+ * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
+ * used when that file is not compiled.
+ */
+bool __attribute__((weak))
+enic_use_vector_rx_handler(__rte_unused struct enic *enic)
+{
+ return false;
+}
+
static void pick_rx_handler(struct enic *enic)
{
struct rte_eth_dev *eth_dev;
- /* Use the non-scatter, simplified RX handler if possible. */
+ /*
+ * Preference order:
+ * 1. The vectorized handler if possible and requested.
+ * 2. The non-scatter, simplified handler if scatter Rx is not used.
+ * 3. The default handler as a fallback.
+ */
eth_dev = enic->rte_dev;
+ if (enic_use_vector_rx_handler(enic))
+ return;
if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
PMD_INIT_LOG(DEBUG, " use the non-scatter Rx handler");
eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
@@ -534,6 +551,25 @@ int enic_enable(struct enic *enic)
unsigned int index;
int err;
struct rte_eth_dev *eth_dev = enic->rte_dev;
+ uint64_t simple_tx_offloads;
+ uintptr_t p;
+
+ if (enic->enable_avx2_rx) {
+ struct rte_mbuf mb_def = { .buf_addr = 0 };
+
+ /*
+ * mbuf_initializer contains const-after-init fields of
+ * receive mbufs (i.e. 64 bits of fields from rearm_data).
+ * It is currently used by the vectorized handler.
+ */
+ mb_def.nb_segs = 1;
+ mb_def.data_off = RTE_PKTMBUF_HEADROOM;
+ mb_def.port = enic->port_id;
+ rte_mbuf_refcnt_set(&mb_def, 1);
+ rte_compiler_barrier();
+ p = (uintptr_t)&mb_def.rearm_data;
+ enic->mbuf_initializer = *(uint64_t *)p;
+ }
eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
@@ -572,10 +608,17 @@ int enic_enable(struct enic *enic)
}
/*
- * Use the simple TX handler if possible. All offloads must be
- * disabled.
+ * Use the simple TX handler if possible. Only checksum offloads
+ * and vlan insertion are supported.
*/
- if (eth_dev->data->dev_conf.txmode.offloads == 0) {
+ simple_tx_offloads = enic->tx_offload_capa &
+ (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
+ DEV_TX_OFFLOAD_VLAN_INSERT |
+ DEV_TX_OFFLOAD_IPV4_CKSUM |
+ DEV_TX_OFFLOAD_UDP_CKSUM |
+ DEV_TX_OFFLOAD_TCP_CKSUM);
+ if ((eth_dev->data->dev_conf.txmode.offloads &
+ ~simple_tx_offloads) == 0) {
PMD_INIT_LOG(DEBUG, " use the simple tx handler");
eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
for (index = 0; index < enic->wq_count; index++)
@@ -1639,11 +1682,25 @@ static int enic_dev_init(struct enic *enic)
LIST_INIT(&enic->flows);
rte_spinlock_init(&enic->flows_lock);
+ enic->max_flow_counter = -1;
/* set up link status checking */
vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
enic->overlay_offload = false;
+ if (enic->disable_overlay && enic->vxlan) {
+ /*
+ * Explicitly disable overlay offload as the setting is
+ * sticky, and resetting vNIC does not disable it.
+ */
+ if (vnic_dev_overlay_offload_ctrl(enic->vdev,
+ OVERLAY_FEATURE_VXLAN,
+ OVERLAY_OFFLOAD_DISABLE)) {
+ dev_err(enic, "failed to disable overlay offload\n");
+ } else {
+ dev_info(enic, "Overlay offload is disabled\n");
+ }
+ }
if (!enic->disable_overlay && enic->vxlan &&
/* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
vnic_dev_overlay_offload_ctrl(enic->vdev,
@@ -1653,11 +1710,9 @@ static int enic_dev_init(struct enic *enic)
DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
- /*
- * Do not add PKT_TX_OUTER_{IPV4,IPV6} as they are not
- * 'offload' flags (i.e. not part of PKT_TX_OFFLOAD_MASK).
- */
enic->tx_offload_mask |=
+ PKT_TX_OUTER_IPV6 |
+ PKT_TX_OUTER_IPV4 |
PKT_TX_OUTER_IP_CKSUM |
PKT_TX_TUNNEL_MASK;
enic->overlay_offload = true;
@@ -1708,14 +1763,20 @@ int enic_probe(struct enic *enic)
enic_free_consistent);
/*
- * Allocate the consistent memory for stats upfront so both primary and
- * secondary processes can dump stats.
+ * Allocate the consistent memory for stats and counters upfront so
+ * both primary and secondary processes can access them.
*/
err = vnic_dev_alloc_stats_mem(enic->vdev);
if (err) {
dev_err(enic, "Failed to allocate cmd memory, aborting\n");
goto err_out_unregister;
}
+ err = vnic_dev_alloc_counter_mem(enic->vdev);
+ if (err) {
+ dev_err(enic, "Failed to allocate counter memory, aborting\n");
+ goto err_out_unregister;
+ }
+
/* Issue device open to get device in known state */
err = enic_dev_open(enic);
if (err) {
diff --git a/drivers/net/enic/enic_res.c b/drivers/net/enic/enic_res.c
index 8d493ffe..24b2844f 100644
--- a/drivers/net/enic/enic_res.c
+++ b/drivers/net/enic/enic_res.c
@@ -85,7 +85,7 @@ int enic_get_vnic_config(struct enic *enic)
vnic_dev_capable_udp_rss_weak(enic->vdev, &enic->nic_cfg_chk,
&enic->udp_rss_weak);
- dev_info(enic, "Flow api filter mode: %s Actions: %s%s%s\n",
+ dev_info(enic, "Flow api filter mode: %s Actions: %s%s%s%s\n",
((enic->flow_filter_mode == FILTER_DPDK_1) ? "DPDK" :
((enic->flow_filter_mode == FILTER_USNIC_IP) ? "USNIC" :
((enic->flow_filter_mode == FILTER_IPV4_5TUPLE) ? "5TUPLE" :
@@ -95,7 +95,9 @@ int enic_get_vnic_config(struct enic *enic)
((enic->filter_actions & FILTER_ACTION_FILTER_ID_FLAG) ?
"tag " : ""),
((enic->filter_actions & FILTER_ACTION_DROP_FLAG) ?
- "drop " : ""));
+ "drop " : ""),
+ ((enic->filter_actions & FILTER_ACTION_COUNTER_FLAG) ?
+ "count " : ""));
c->wq_desc_count =
min_t(u32, ENIC_MAX_WQ_DESCS,
@@ -195,13 +197,14 @@ int enic_get_vnic_config(struct enic *enic)
enic->rx_offload_capa =
DEV_RX_OFFLOAD_SCATTER |
DEV_RX_OFFLOAD_JUMBO_FRAME |
- DEV_RX_OFFLOAD_CRC_STRIP |
DEV_RX_OFFLOAD_VLAN_STRIP |
DEV_RX_OFFLOAD_IPV4_CKSUM |
DEV_RX_OFFLOAD_UDP_CKSUM |
DEV_RX_OFFLOAD_TCP_CKSUM;
enic->tx_offload_mask =
- PKT_TX_VLAN_PKT |
+ PKT_TX_IPV6 |
+ PKT_TX_IPV4 |
+ PKT_TX_VLAN |
PKT_TX_IP_CKSUM |
PKT_TX_L4_MASK |
PKT_TX_TCP_SEG;
diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c
index 7129e121..5189ee63 100644
--- a/drivers/net/enic/enic_rxtx.c
+++ b/drivers/net/enic/enic_rxtx.c
@@ -11,6 +11,7 @@
#include "enic_compat.h"
#include "rq_enet_desc.h"
#include "enic.h"
+#include "enic_rxtx_common.h"
#include <rte_ether.h>
#include <rte_ip.h>
#include <rte_tcp.h>
@@ -30,266 +31,6 @@
#define rte_packet_prefetch(p) do {} while (0)
#endif
-static inline uint16_t
-enic_cq_rx_desc_ciflags(struct cq_enet_rq_desc *crd)
-{
- return le16_to_cpu(crd->completed_index_flags) & ~CQ_DESC_COMP_NDX_MASK;
-}
-
-static inline uint16_t
-enic_cq_rx_desc_bwflags(struct cq_enet_rq_desc *crd)
-{
- return le16_to_cpu(crd->bytes_written_flags) &
- ~CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
-}
-
-static inline uint8_t
-enic_cq_rx_desc_packet_error(uint16_t bwflags)
-{
- return (bwflags & CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) ==
- CQ_ENET_RQ_DESC_FLAGS_TRUNCATED;
-}
-
-static inline uint8_t
-enic_cq_rx_desc_eop(uint16_t ciflags)
-{
- return (ciflags & CQ_ENET_RQ_DESC_FLAGS_EOP)
- == CQ_ENET_RQ_DESC_FLAGS_EOP;
-}
-
-static inline uint8_t
-enic_cq_rx_desc_csum_not_calc(struct cq_enet_rq_desc *cqrd)
-{
- return (le16_to_cpu(cqrd->q_number_rss_type_flags) &
- CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) ==
- CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC;
-}
-
-static inline uint8_t
-enic_cq_rx_desc_ipv4_csum_ok(struct cq_enet_rq_desc *cqrd)
-{
- return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) ==
- CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK;
-}
-
-static inline uint8_t
-enic_cq_rx_desc_tcp_udp_csum_ok(struct cq_enet_rq_desc *cqrd)
-{
- return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) ==
- CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK;
-}
-
-static inline uint8_t
-enic_cq_rx_desc_rss_type(struct cq_enet_rq_desc *cqrd)
-{
- return (uint8_t)((le16_to_cpu(cqrd->q_number_rss_type_flags) >>
- CQ_DESC_Q_NUM_BITS) & CQ_ENET_RQ_DESC_RSS_TYPE_MASK);
-}
-
-static inline uint32_t
-enic_cq_rx_desc_rss_hash(struct cq_enet_rq_desc *cqrd)
-{
- return le32_to_cpu(cqrd->rss_hash);
-}
-
-static inline uint16_t
-enic_cq_rx_desc_vlan(struct cq_enet_rq_desc *cqrd)
-{
- return le16_to_cpu(cqrd->vlan);
-}
-
-static inline uint16_t
-enic_cq_rx_desc_n_bytes(struct cq_desc *cqd)
-{
- struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
- return le16_to_cpu(cqrd->bytes_written_flags) &
- CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
-}
-
-
-static inline uint8_t
-enic_cq_rx_check_err(struct cq_desc *cqd)
-{
- struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
- uint16_t bwflags;
-
- bwflags = enic_cq_rx_desc_bwflags(cqrd);
- if (unlikely(enic_cq_rx_desc_packet_error(bwflags)))
- return 1;
- return 0;
-}
-
-/* Lookup table to translate RX CQ flags to mbuf flags. */
-static inline uint32_t
-enic_cq_rx_flags_to_pkt_type(struct cq_desc *cqd, uint8_t tnl)
-{
- struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
- uint8_t cqrd_flags = cqrd->flags;
- /*
- * Odd-numbered entries are for tunnel packets. All packet type info
- * applies to the inner packet, and there is no info on the outer
- * packet. The outer flags in these entries exist only to avoid
- * changing enic_cq_rx_to_pkt_flags(). They are cleared from mbuf
- * afterwards.
- *
- * Also, as there is no tunnel type info (VXLAN, NVGRE, or GENEVE), set
- * RTE_PTYPE_TUNNEL_GRENAT..
- */
- static const uint32_t cq_type_table[128] __rte_cache_aligned = {
- [0x00] = RTE_PTYPE_UNKNOWN,
- [0x01] = RTE_PTYPE_UNKNOWN |
- RTE_PTYPE_TUNNEL_GRENAT |
- RTE_PTYPE_INNER_L2_ETHER,
- [0x20] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG,
- [0x21] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG |
- RTE_PTYPE_TUNNEL_GRENAT |
- RTE_PTYPE_INNER_L2_ETHER |
- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_INNER_L4_NONFRAG,
- [0x22] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP,
- [0x23] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
- RTE_PTYPE_TUNNEL_GRENAT |
- RTE_PTYPE_INNER_L2_ETHER |
- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_INNER_L4_UDP,
- [0x24] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP,
- [0x25] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP |
- RTE_PTYPE_TUNNEL_GRENAT |
- RTE_PTYPE_INNER_L2_ETHER |
- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_INNER_L4_TCP,
- [0x60] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
- [0x61] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
- RTE_PTYPE_TUNNEL_GRENAT |
- RTE_PTYPE_INNER_L2_ETHER |
- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_INNER_L4_FRAG,
- [0x62] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
- [0x63] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
- RTE_PTYPE_TUNNEL_GRENAT |
- RTE_PTYPE_INNER_L2_ETHER |
- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_INNER_L4_FRAG,
- [0x64] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
- [0x65] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
- RTE_PTYPE_TUNNEL_GRENAT |
- RTE_PTYPE_INNER_L2_ETHER |
- RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
- RTE_PTYPE_INNER_L4_FRAG,
- [0x10] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG,
- [0x11] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG |
- RTE_PTYPE_TUNNEL_GRENAT |
- RTE_PTYPE_INNER_L2_ETHER |
- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_INNER_L4_NONFRAG,
- [0x12] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP,
- [0x13] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
- RTE_PTYPE_TUNNEL_GRENAT |
- RTE_PTYPE_INNER_L2_ETHER |
- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_INNER_L4_UDP,
- [0x14] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP,
- [0x15] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP |
- RTE_PTYPE_TUNNEL_GRENAT |
- RTE_PTYPE_INNER_L2_ETHER |
- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_INNER_L4_TCP,
- [0x50] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
- [0x51] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
- RTE_PTYPE_TUNNEL_GRENAT |
- RTE_PTYPE_INNER_L2_ETHER |
- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_INNER_L4_FRAG,
- [0x52] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
- [0x53] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
- RTE_PTYPE_TUNNEL_GRENAT |
- RTE_PTYPE_INNER_L2_ETHER |
- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_INNER_L4_FRAG,
- [0x54] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
- [0x55] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
- RTE_PTYPE_TUNNEL_GRENAT |
- RTE_PTYPE_INNER_L2_ETHER |
- RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
- RTE_PTYPE_INNER_L4_FRAG,
- /* All others reserved */
- };
- cqrd_flags &= CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT
- | CQ_ENET_RQ_DESC_FLAGS_IPV4 | CQ_ENET_RQ_DESC_FLAGS_IPV6
- | CQ_ENET_RQ_DESC_FLAGS_TCP | CQ_ENET_RQ_DESC_FLAGS_UDP;
- return cq_type_table[cqrd_flags + tnl];
-}
-
-static inline void
-enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf)
-{
- struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
- uint16_t bwflags, pkt_flags = 0, vlan_tci;
- bwflags = enic_cq_rx_desc_bwflags(cqrd);
- vlan_tci = enic_cq_rx_desc_vlan(cqrd);
-
- /* VLAN STRIPPED flag. The L2 packet type updated here also */
- if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) {
- pkt_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
- mbuf->packet_type |= RTE_PTYPE_L2_ETHER;
- } else {
- if (vlan_tci != 0)
- mbuf->packet_type |= RTE_PTYPE_L2_ETHER_VLAN;
- else
- mbuf->packet_type |= RTE_PTYPE_L2_ETHER;
- }
- mbuf->vlan_tci = vlan_tci;
-
- if ((cqd->type_color & CQ_DESC_TYPE_MASK) == CQ_DESC_TYPE_CLASSIFIER) {
- struct cq_enet_rq_clsf_desc *clsf_cqd;
- uint16_t filter_id;
- clsf_cqd = (struct cq_enet_rq_clsf_desc *)cqd;
- filter_id = clsf_cqd->filter_id;
- if (filter_id) {
- pkt_flags |= PKT_RX_FDIR;
- if (filter_id != ENIC_MAGIC_FILTER_ID) {
- mbuf->hash.fdir.hi = clsf_cqd->filter_id;
- pkt_flags |= PKT_RX_FDIR_ID;
- }
- }
- } else if (enic_cq_rx_desc_rss_type(cqrd)) {
- /* RSS flag */
- pkt_flags |= PKT_RX_RSS_HASH;
- mbuf->hash.rss = enic_cq_rx_desc_rss_hash(cqrd);
- }
-
- /* checksum flags */
- if (mbuf->packet_type & (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6)) {
- if (!enic_cq_rx_desc_csum_not_calc(cqrd)) {
- uint32_t l4_flags;
- l4_flags = mbuf->packet_type & RTE_PTYPE_L4_MASK;
-
- /*
- * When overlay offload is enabled, the NIC may
- * set ipv4_csum_ok=1 if the inner packet is IPv6..
- * So, explicitly check for IPv4 before checking
- * ipv4_csum_ok.
- */
- if (mbuf->packet_type & RTE_PTYPE_L3_IPV4) {
- if (enic_cq_rx_desc_ipv4_csum_ok(cqrd))
- pkt_flags |= PKT_RX_IP_CKSUM_GOOD;
- else
- pkt_flags |= PKT_RX_IP_CKSUM_BAD;
- }
-
- if (l4_flags == RTE_PTYPE_L4_UDP ||
- l4_flags == RTE_PTYPE_L4_TCP) {
- if (enic_cq_rx_desc_tcp_udp_csum_ok(cqrd))
- pkt_flags |= PKT_RX_L4_CKSUM_GOOD;
- else
- pkt_flags |= PKT_RX_L4_CKSUM_BAD;
- }
- }
- }
-
- mbuf->ol_flags = pkt_flags;
-}
-
/* dummy receive function to replace actual function in
* order to do safe reconfiguration operations.
*/
@@ -707,7 +448,7 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
wq_desc_avail = vnic_wq_desc_avail(wq);
head_idx = wq->head_idx;
desc_count = wq->ring.desc_count;
- ol_flags_mask = PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK;
+ ol_flags_mask = PKT_TX_VLAN | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK;
tx_oversized = &enic->soft_stats.tx_oversized;
nb_pkts = RTE_MIN(nb_pkts, ENIC_TX_XMIT_MAX);
@@ -735,7 +476,7 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
mss = 0;
vlan_id = tx_pkt->vlan_tci;
- vlan_tag_insert = !!(ol_flags & PKT_TX_VLAN_PKT);
+ vlan_tag_insert = !!(ol_flags & PKT_TX_VLAN);
bus_addr = (dma_addr_t)
(tx_pkt->buf_iova + tx_pkt->data_off);
@@ -840,12 +581,33 @@ static void enqueue_simple_pkts(struct rte_mbuf **pkts,
struct enic *enic)
{
struct rte_mbuf *p;
+ uint16_t mss;
while (n) {
n--;
p = *pkts++;
desc->address = p->buf_iova + p->data_off;
desc->length = p->pkt_len;
+ /* VLAN insert */
+ desc->vlan_tag = p->vlan_tci;
+ desc->header_length_flags &=
+ ((1 << WQ_ENET_FLAGS_EOP_SHIFT) |
+ (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT));
+ if (p->ol_flags & PKT_TX_VLAN) {
+ desc->header_length_flags |=
+ 1 << WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT;
+ }
+ /*
+ * Checksum offload. We use WQ_ENET_OFFLOAD_MODE_CSUM, which
+ * is 0, so no need to set offload_mode.
+ */
+ mss = 0;
+ if (p->ol_flags & PKT_TX_IP_CKSUM)
+ mss |= ENIC_CALC_IP_CKSUM << WQ_ENET_MSS_SHIFT;
+ if (p->ol_flags & PKT_TX_L4_MASK)
+ mss |= ENIC_CALC_TCP_UDP_CKSUM << WQ_ENET_MSS_SHIFT;
+ desc->mss_loopback = mss;
+
/*
* The app should not send oversized
* packets. tx_pkt_prepare includes a check as
diff --git a/drivers/net/enic/enic_rxtx_common.h b/drivers/net/enic/enic_rxtx_common.h
new file mode 100644
index 00000000..bfbb4909
--- /dev/null
+++ b/drivers/net/enic/enic_rxtx_common.h
@@ -0,0 +1,271 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc. All rights reserved.
+ */
+
+#ifndef _ENIC_RXTX_COMMON_H_
+#define _ENIC_RXTX_COMMON_H_
+
+static inline uint16_t
+enic_cq_rx_desc_ciflags(struct cq_enet_rq_desc *crd)
+{
+ return le16_to_cpu(crd->completed_index_flags) & ~CQ_DESC_COMP_NDX_MASK;
+}
+
+static inline uint16_t
+enic_cq_rx_desc_bwflags(struct cq_enet_rq_desc *crd)
+{
+ return le16_to_cpu(crd->bytes_written_flags) &
+ ~CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
+}
+
+static inline uint8_t
+enic_cq_rx_desc_packet_error(uint16_t bwflags)
+{
+ return (bwflags & CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) ==
+ CQ_ENET_RQ_DESC_FLAGS_TRUNCATED;
+}
+
+static inline uint8_t
+enic_cq_rx_desc_eop(uint16_t ciflags)
+{
+ return (ciflags & CQ_ENET_RQ_DESC_FLAGS_EOP)
+ == CQ_ENET_RQ_DESC_FLAGS_EOP;
+}
+
+static inline uint8_t
+enic_cq_rx_desc_csum_not_calc(struct cq_enet_rq_desc *cqrd)
+{
+ return (le16_to_cpu(cqrd->q_number_rss_type_flags) &
+ CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) ==
+ CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC;
+}
+
+static inline uint8_t
+enic_cq_rx_desc_ipv4_csum_ok(struct cq_enet_rq_desc *cqrd)
+{
+ return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) ==
+ CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK;
+}
+
+static inline uint8_t
+enic_cq_rx_desc_tcp_udp_csum_ok(struct cq_enet_rq_desc *cqrd)
+{
+ return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) ==
+ CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK;
+}
+
+static inline uint8_t
+enic_cq_rx_desc_rss_type(struct cq_enet_rq_desc *cqrd)
+{
+ return (uint8_t)((le16_to_cpu(cqrd->q_number_rss_type_flags) >>
+ CQ_DESC_Q_NUM_BITS) & CQ_ENET_RQ_DESC_RSS_TYPE_MASK);
+}
+
+static inline uint32_t
+enic_cq_rx_desc_rss_hash(struct cq_enet_rq_desc *cqrd)
+{
+ return le32_to_cpu(cqrd->rss_hash);
+}
+
+static inline uint16_t
+enic_cq_rx_desc_vlan(struct cq_enet_rq_desc *cqrd)
+{
+ return le16_to_cpu(cqrd->vlan);
+}
+
+static inline uint16_t
+enic_cq_rx_desc_n_bytes(struct cq_desc *cqd)
+{
+ struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
+ return le16_to_cpu(cqrd->bytes_written_flags) &
+ CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
+}
+
+
+static inline uint8_t
+enic_cq_rx_check_err(struct cq_desc *cqd)
+{
+ struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
+ uint16_t bwflags;
+
+ bwflags = enic_cq_rx_desc_bwflags(cqrd);
+ if (unlikely(enic_cq_rx_desc_packet_error(bwflags)))
+ return 1;
+ return 0;
+}
+
+/* Lookup table to translate RX CQ flags to mbuf flags. */
+static uint32_t
+enic_cq_rx_flags_to_pkt_type(struct cq_desc *cqd, uint8_t tnl)
+{
+ struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
+ uint8_t cqrd_flags = cqrd->flags;
+ /*
+ * Odd-numbered entries are for tunnel packets. All packet type info
+ * applies to the inner packet, and there is no info on the outer
+ * packet. The outer flags in these entries exist only to avoid
+ * changing enic_cq_rx_to_pkt_flags(). They are cleared from mbuf
+ * afterwards.
+ *
+ * Also, as there is no tunnel type info (VXLAN, NVGRE, or GENEVE), set
+ * RTE_PTYPE_TUNNEL_GRENAT..
+ */
+ static const uint32_t cq_type_table[128] __rte_cache_aligned = {
+ [0x00] = RTE_PTYPE_UNKNOWN,
+ [0x01] = RTE_PTYPE_UNKNOWN |
+ RTE_PTYPE_TUNNEL_GRENAT |
+ RTE_PTYPE_INNER_L2_ETHER,
+ [0x20] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG,
+ [0x21] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG |
+ RTE_PTYPE_TUNNEL_GRENAT |
+ RTE_PTYPE_INNER_L2_ETHER |
+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_INNER_L4_NONFRAG,
+ [0x22] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP,
+ [0x23] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
+ RTE_PTYPE_TUNNEL_GRENAT |
+ RTE_PTYPE_INNER_L2_ETHER |
+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_INNER_L4_UDP,
+ [0x24] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP,
+ [0x25] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP |
+ RTE_PTYPE_TUNNEL_GRENAT |
+ RTE_PTYPE_INNER_L2_ETHER |
+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_INNER_L4_TCP,
+ [0x60] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
+ [0x61] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
+ RTE_PTYPE_TUNNEL_GRENAT |
+ RTE_PTYPE_INNER_L2_ETHER |
+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_INNER_L4_FRAG,
+ [0x62] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
+ [0x63] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
+ RTE_PTYPE_TUNNEL_GRENAT |
+ RTE_PTYPE_INNER_L2_ETHER |
+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_INNER_L4_FRAG,
+ [0x64] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
+ [0x65] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
+ RTE_PTYPE_TUNNEL_GRENAT |
+ RTE_PTYPE_INNER_L2_ETHER |
+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_INNER_L4_FRAG,
+ [0x10] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG,
+ [0x11] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG |
+ RTE_PTYPE_TUNNEL_GRENAT |
+ RTE_PTYPE_INNER_L2_ETHER |
+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_INNER_L4_NONFRAG,
+ [0x12] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP,
+ [0x13] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
+ RTE_PTYPE_TUNNEL_GRENAT |
+ RTE_PTYPE_INNER_L2_ETHER |
+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_INNER_L4_UDP,
+ [0x14] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP,
+ [0x15] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP |
+ RTE_PTYPE_TUNNEL_GRENAT |
+ RTE_PTYPE_INNER_L2_ETHER |
+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_INNER_L4_TCP,
+ [0x50] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
+ [0x51] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
+ RTE_PTYPE_TUNNEL_GRENAT |
+ RTE_PTYPE_INNER_L2_ETHER |
+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_INNER_L4_FRAG,
+ [0x52] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
+ [0x53] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
+ RTE_PTYPE_TUNNEL_GRENAT |
+ RTE_PTYPE_INNER_L2_ETHER |
+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_INNER_L4_FRAG,
+ [0x54] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
+ [0x55] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
+ RTE_PTYPE_TUNNEL_GRENAT |
+ RTE_PTYPE_INNER_L2_ETHER |
+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_INNER_L4_FRAG,
+ /* All others reserved */
+ };
+ cqrd_flags &= CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT
+ | CQ_ENET_RQ_DESC_FLAGS_IPV4 | CQ_ENET_RQ_DESC_FLAGS_IPV6
+ | CQ_ENET_RQ_DESC_FLAGS_TCP | CQ_ENET_RQ_DESC_FLAGS_UDP;
+ return cq_type_table[cqrd_flags + tnl];
+}
+
+static void
+enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf)
+{
+ struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
+ uint16_t bwflags, pkt_flags = 0, vlan_tci;
+ bwflags = enic_cq_rx_desc_bwflags(cqrd);
+ vlan_tci = enic_cq_rx_desc_vlan(cqrd);
+
+ /* VLAN STRIPPED flag. The L2 packet type updated here also */
+ if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) {
+ pkt_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
+ mbuf->packet_type |= RTE_PTYPE_L2_ETHER;
+ } else {
+ if (vlan_tci != 0) {
+ pkt_flags |= PKT_RX_VLAN;
+ mbuf->packet_type |= RTE_PTYPE_L2_ETHER_VLAN;
+ } else {
+ mbuf->packet_type |= RTE_PTYPE_L2_ETHER;
+ }
+ }
+ mbuf->vlan_tci = vlan_tci;
+
+ if ((cqd->type_color & CQ_DESC_TYPE_MASK) == CQ_DESC_TYPE_CLASSIFIER) {
+ struct cq_enet_rq_clsf_desc *clsf_cqd;
+ uint16_t filter_id;
+ clsf_cqd = (struct cq_enet_rq_clsf_desc *)cqd;
+ filter_id = clsf_cqd->filter_id;
+ if (filter_id) {
+ pkt_flags |= PKT_RX_FDIR;
+ if (filter_id != ENIC_MAGIC_FILTER_ID) {
+ mbuf->hash.fdir.hi = clsf_cqd->filter_id;
+ pkt_flags |= PKT_RX_FDIR_ID;
+ }
+ }
+ } else if (enic_cq_rx_desc_rss_type(cqrd)) {
+ /* RSS flag */
+ pkt_flags |= PKT_RX_RSS_HASH;
+ mbuf->hash.rss = enic_cq_rx_desc_rss_hash(cqrd);
+ }
+
+ /* checksum flags */
+ if (mbuf->packet_type & (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6)) {
+ if (!enic_cq_rx_desc_csum_not_calc(cqrd)) {
+ uint32_t l4_flags;
+ l4_flags = mbuf->packet_type & RTE_PTYPE_L4_MASK;
+
+ /*
+ * When overlay offload is enabled, the NIC may
+ * set ipv4_csum_ok=1 if the inner packet is IPv6..
+ * So, explicitly check for IPv4 before checking
+ * ipv4_csum_ok.
+ */
+ if (mbuf->packet_type & RTE_PTYPE_L3_IPV4) {
+ if (enic_cq_rx_desc_ipv4_csum_ok(cqrd))
+ pkt_flags |= PKT_RX_IP_CKSUM_GOOD;
+ else
+ pkt_flags |= PKT_RX_IP_CKSUM_BAD;
+ }
+
+ if (l4_flags == RTE_PTYPE_L4_UDP ||
+ l4_flags == RTE_PTYPE_L4_TCP) {
+ if (enic_cq_rx_desc_tcp_udp_csum_ok(cqrd))
+ pkt_flags |= PKT_RX_L4_CKSUM_GOOD;
+ else
+ pkt_flags |= PKT_RX_L4_CKSUM_BAD;
+ }
+ }
+ }
+
+ mbuf->ol_flags = pkt_flags;
+}
+
+#endif /* _ENIC_RXTX_COMMON_H_ */
diff --git a/drivers/net/enic/enic_rxtx_vec_avx2.c b/drivers/net/enic/enic_rxtx_vec_avx2.c
new file mode 100644
index 00000000..d2185490
--- /dev/null
+++ b/drivers/net/enic/enic_rxtx_vec_avx2.c
@@ -0,0 +1,831 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2008-2018 Cisco Systems, Inc. All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc. All rights reserved.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev_driver.h>
+
+#include "enic_compat.h"
+#include "rq_enet_desc.h"
+#include "enic.h"
+#include "enic_rxtx_common.h"
+
+#include <x86intrin.h>
+
+static struct rte_mbuf *
+rx_one(struct cq_enet_rq_desc *cqd, struct rte_mbuf *mb, struct enic *enic)
+{
+ bool tnl;
+
+ *(uint64_t *)&mb->rearm_data = enic->mbuf_initializer;
+ mb->data_len = cqd->bytes_written_flags &
+ CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
+ mb->pkt_len = mb->data_len;
+ tnl = enic->overlay_offload && (cqd->completed_index_flags &
+ CQ_ENET_RQ_DESC_FLAGS_FCOE) != 0;
+ mb->packet_type =
+ enic_cq_rx_flags_to_pkt_type((struct cq_desc *)cqd, tnl);
+ enic_cq_rx_to_pkt_flags((struct cq_desc *)cqd, mb);
+ /* Wipe the outer types set by enic_cq_rx_flags_to_pkt_type() */
+ if (tnl) {
+ mb->packet_type &= ~(RTE_PTYPE_L3_MASK |
+ RTE_PTYPE_L4_MASK);
+ }
+ return mb;
+}
+
+static uint16_t
+enic_noscatter_vec_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct rte_mbuf **rx, **rxmb;
+ uint16_t cq_idx, nb_rx, max_rx;
+ struct cq_enet_rq_desc *cqd;
+ struct rq_enet_desc *rqd;
+ struct vnic_cq *cq;
+ struct vnic_rq *rq;
+ struct enic *enic;
+ uint8_t color;
+
+ rq = rx_queue;
+ enic = vnic_dev_priv(rq->vdev);
+ cq = &enic->cq[enic_cq_rq(enic, rq->index)];
+ cq_idx = cq->to_clean;
+
+ /*
+ * Fill up the reserve of free mbufs. Below, we restock the receive
+ * ring with these mbufs to avoid allocation failures.
+ */
+ if (rq->num_free_mbufs == 0) {
+ if (rte_mempool_get_bulk(rq->mp, (void **)rq->free_mbufs,
+ ENIC_RX_BURST_MAX))
+ return 0;
+ rq->num_free_mbufs = ENIC_RX_BURST_MAX;
+ }
+ /* Receive until the end of the ring, at most. */
+ max_rx = RTE_MIN(nb_pkts, rq->num_free_mbufs);
+ max_rx = RTE_MIN(max_rx, cq->ring.desc_count - cq_idx);
+
+ rxmb = rq->mbuf_ring + cq_idx;
+ color = cq->last_color;
+ cqd = (struct cq_enet_rq_desc *)(cq->ring.descs) + cq_idx;
+ rx = rx_pkts;
+ if (max_rx == 0 ||
+ (cqd->type_color & CQ_DESC_COLOR_MASK_NOSHIFT) == color)
+ return 0;
+
+ /* Step 1: Process one packet to do aligned 256-bit load below */
+ if (cq_idx & 0x1) {
+ if (unlikely(cqd->bytes_written_flags &
+ CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) {
+ rte_pktmbuf_free(*rxmb++);
+ rte_atomic64_inc(&enic->soft_stats.rx_packet_errors);
+ } else {
+ *rx++ = rx_one(cqd, *rxmb++, enic);
+ }
+ cqd++;
+ max_rx--;
+ }
+
+ const __m256i mask =
+ _mm256_set_epi8(/* Second descriptor */
+ 0xff, /* type_color */
+ (CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT |
+ CQ_ENET_RQ_DESC_FLAGS_IPV4 |
+ CQ_ENET_RQ_DESC_FLAGS_IPV6 |
+ CQ_ENET_RQ_DESC_FLAGS_TCP |
+ CQ_ENET_RQ_DESC_FLAGS_UDP), /* flags */
+ 0, 0, /* checksum_fcoe */
+ 0xff, 0xff, /* vlan */
+ 0x3f, 0xff, /* bytes_written_flags */
+ 0xff, 0xff, 0xff, 0xff, /* rss_hash */
+ 0xff, 0xff, /* q_number_rss_type_flags */
+ 0, 0, /* completed_index_flags */
+ /* First descriptor */
+ 0xff, /* type_color */
+ (CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT |
+ CQ_ENET_RQ_DESC_FLAGS_IPV4 |
+ CQ_ENET_RQ_DESC_FLAGS_IPV6 |
+ CQ_ENET_RQ_DESC_FLAGS_TCP |
+ CQ_ENET_RQ_DESC_FLAGS_UDP), /* flags */
+ 0, 0, /* checksum_fcoe */
+ 0xff, 0xff, /* vlan */
+ 0x3f, 0xff, /* bytes_written_flags */
+ 0xff, 0xff, 0xff, 0xff, /* rss_hash */
+ 0xff, 0xff, /* q_number_rss_type_flags */
+ 0, 0 /* completed_index_flags */
+ );
+ const __m256i shuffle_mask =
+ _mm256_set_epi8(/* Second descriptor */
+ 7, 6, 5, 4, /* rss = rss_hash */
+ 11, 10, /* vlan_tci = vlan */
+ 9, 8, /* data_len = bytes_written */
+ 0x80, 0x80, 9, 8, /* pkt_len = bytes_written */
+ 0x80, 0x80, 0x80, 0x80, /* packet_type = 0 */
+ /* First descriptor */
+ 7, 6, 5, 4, /* rss = rss_hash */
+ 11, 10, /* vlan_tci = vlan */
+ 9, 8, /* data_len = bytes_written */
+ 0x80, 0x80, 9, 8, /* pkt_len = bytes_written */
+ 0x80, 0x80, 0x80, 0x80 /* packet_type = 0 */
+ );
+ /* Used to collect 8 flags from 8 desc into one register */
+ const __m256i flags_shuffle_mask =
+ _mm256_set_epi8(/* Second descriptor */
+ 1, 3, 9, 14,
+ 1, 3, 9, 14,
+ 1, 3, 9, 14,
+ 1, 3, 9, 14,
+ /* First descriptor */
+ 1, 3, 9, 14,
+ 1, 3, 9, 14,
+ 1, 3, 9, 14,
+ /*
+ * Byte 3: upper byte of completed_index_flags
+ * bit 5 = fcoe (tunnel)
+ * Byte 2: upper byte of q_number_rss_type_flags
+ * bits 2,3,4,5 = rss type
+ * bit 6 = csum_not_calc
+ * Byte 1: upper byte of bytes_written_flags
+ * bit 6 = truncated
+ * bit 7 = vlan stripped
+ * Byte 0: flags
+ */
+ 1, 3, 9, 14
+ );
+ /* Used to collect 8 VLAN IDs from 8 desc into one register */
+ const __m256i vlan_shuffle_mask =
+ _mm256_set_epi8(/* Second descriptor */
+ 0x80, 0x80, 11, 10,
+ 0x80, 0x80, 11, 10,
+ 0x80, 0x80, 11, 10,
+ 0x80, 0x80, 11, 10,
+ /* First descriptor */
+ 0x80, 0x80, 11, 10,
+ 0x80, 0x80, 11, 10,
+ 0x80, 0x80, 11, 10,
+ 0x80, 0x80, 11, 10);
+ /* PKT_RX_RSS_HASH is 1<<1 so fits in 8-bit integer */
+ const __m256i rss_shuffle =
+ _mm256_set_epi8(/* second 128 bits */
+ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+ 0, /* rss_types = 0 */
+ /* first 128 bits */
+ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+ PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+ 0 /* rss_types = 0 */);
+ /*
+ * VLAN offload flags.
+ * shuffle index:
+ * vlan_stripped => bit 0
+ * vlan_id == 0 => bit 1
+ */
+ const __m256i vlan_shuffle =
+ _mm256_set_epi32(0, 0, 0, 0,
+ PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, 0,
+ PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN);
+ /* Use the same shuffle index as vlan_shuffle */
+ const __m256i vlan_ptype_shuffle =
+ _mm256_set_epi32(0, 0, 0, 0,
+ RTE_PTYPE_L2_ETHER,
+ RTE_PTYPE_L2_ETHER,
+ RTE_PTYPE_L2_ETHER,
+ RTE_PTYPE_L2_ETHER_VLAN);
+ /*
+ * CKSUM flags. Shift right so they fit int 8-bit integers.
+ * shuffle index:
+ * ipv4_csum_ok => bit 3
+ * ip4 => bit 2
+ * tcp_or_udp => bit 1
+ * tcp_udp_csum_ok => bit 0
+ */
+ const __m256i csum_shuffle =
+ _mm256_set_epi8(/* second 128 bits */
+ /* 1111 ip4+ip4_ok+l4+l4_ok */
+ ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1),
+ /* 1110 ip4_ok+ip4+l4+!l4_ok */
+ ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1),
+ (PKT_RX_IP_CKSUM_GOOD >> 1), /* 1101 ip4+ip4_ok */
+ (PKT_RX_IP_CKSUM_GOOD >> 1), /* 1100 ip4_ok+ip4 */
+ (PKT_RX_L4_CKSUM_GOOD >> 1), /* 1011 l4+l4_ok */
+ (PKT_RX_L4_CKSUM_BAD >> 1), /* 1010 l4+!l4_ok */
+ 0, /* 1001 */
+ 0, /* 1000 */
+ /* 0111 !ip4_ok+ip4+l4+l4_ok */
+ ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD) >> 1),
+ /* 0110 !ip4_ok+ip4+l4+!l4_ok */
+ ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1),
+ (PKT_RX_IP_CKSUM_BAD >> 1), /* 0101 !ip4_ok+ip4 */
+ (PKT_RX_IP_CKSUM_BAD >> 1), /* 0100 !ip4_ok+ip4 */
+ (PKT_RX_L4_CKSUM_GOOD >> 1), /* 0011 l4+l4_ok */
+ (PKT_RX_L4_CKSUM_BAD >> 1), /* 0010 l4+!l4_ok */
+ 0, /* 0001 */
+ 0, /* 0000 */
+ /* first 128 bits */
+ ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1),
+ ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1),
+ (PKT_RX_IP_CKSUM_GOOD >> 1),
+ (PKT_RX_IP_CKSUM_GOOD >> 1),
+ (PKT_RX_L4_CKSUM_GOOD >> 1),
+ (PKT_RX_L4_CKSUM_BAD >> 1),
+ 0, 0,
+ ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD) >> 1),
+ ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1),
+ (PKT_RX_IP_CKSUM_BAD >> 1),
+ (PKT_RX_IP_CKSUM_BAD >> 1),
+ (PKT_RX_L4_CKSUM_GOOD >> 1),
+ (PKT_RX_L4_CKSUM_BAD >> 1),
+ 0, 0);
+ /*
+ * Non-fragment PTYPEs.
+ * Shuffle 4-bit index:
+ * ip6 => bit 0
+ * ip4 => bit 1
+ * udp => bit 2
+ * tcp => bit 3
+ * bit
+ * 3 2 1 0
+ * -------
+ * 0 0 0 0 unknown
+ * 0 0 0 1 ip6 | nonfrag
+ * 0 0 1 0 ip4 | nonfrag
+ * 0 0 1 1 unknown
+ * 0 1 0 0 unknown
+ * 0 1 0 1 ip6 | udp
+ * 0 1 1 0 ip4 | udp
+ * 0 1 1 1 unknown
+ * 1 0 0 0 unknown
+ * 1 0 0 1 ip6 | tcp
+ * 1 0 1 0 ip4 | tcp
+ * 1 0 1 1 unknown
+ * 1 1 0 0 unknown
+ * 1 1 0 1 unknown
+ * 1 1 1 0 unknown
+ * 1 1 1 1 unknown
+ *
+ * PTYPEs do not fit in 8 bits, so shift right 4..
+ */
+ const __m256i nonfrag_ptype_shuffle =
+ _mm256_set_epi8(/* second 128 bits */
+ RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
+ (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
+ (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L4_NONFRAG) >> 4,
+ (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L4_NONFRAG) >> 4,
+ RTE_PTYPE_UNKNOWN,
+ /* first 128 bits */
+ RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
+ (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
+ (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L4_NONFRAG) >> 4,
+ (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L4_NONFRAG) >> 4,
+ RTE_PTYPE_UNKNOWN);
+ /* Fragment PTYPEs. Use the same shuffle index as above. */
+ const __m256i frag_ptype_shuffle =
+ _mm256_set_epi8(/* second 128 bits */
+ RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L4_FRAG) >> 4,
+ (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L4_FRAG) >> 4,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L4_FRAG) >> 4,
+ (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L4_FRAG) >> 4,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L4_FRAG) >> 4,
+ (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L4_FRAG) >> 4,
+ RTE_PTYPE_UNKNOWN,
+ /* first 128 bits */
+ RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L4_FRAG) >> 4,
+ (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L4_FRAG) >> 4,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L4_FRAG) >> 4,
+ (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L4_FRAG) >> 4,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+ RTE_PTYPE_L4_FRAG) >> 4,
+ (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+ RTE_PTYPE_L4_FRAG) >> 4,
+ RTE_PTYPE_UNKNOWN);
+ /*
+ * Tunnel PTYPEs. Use the same shuffle index as above.
+ * L4 types are not part of this table. They come from non-tunnel
+ * types above.
+ */
+ const __m256i tnl_l3_ptype_shuffle =
+ _mm256_set_epi8(/* second 128 bits */
+ RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
+ RTE_PTYPE_UNKNOWN,
+ /* first 128 bits */
+ RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
+ RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+ RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
+ RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
+ RTE_PTYPE_UNKNOWN);
+
+ const __m256i mbuf_init = _mm256_set_epi64x(0, enic->mbuf_initializer,
+ 0, enic->mbuf_initializer);
+
+ /*
+ * --- cq desc fields --- offset
+ * completed_index_flags - 0 use: fcoe
+ * q_number_rss_type_flags - 2 use: rss types, csum_not_calc
+ * rss_hash - 4 ==> mbuf.hash.rss
+ * bytes_written_flags - 8 ==> mbuf.pkt_len,data_len
+ * use: truncated, vlan_stripped
+ * vlan - 10 ==> mbuf.vlan_tci
+ * checksum_fcoe - 12 (unused)
+ * flags - 14 use: all bits
+ * type_color - 15 (unused)
+ *
+ * --- mbuf fields --- offset
+ * rearm_data ---- 16
+ * data_off - 0 (mbuf_init) -+
+ * refcnt - 2 (mbuf_init) |
+ * nb_segs - 4 (mbuf_init) | 16B 128b
+ * port - 6 (mbuf_init) |
+ * ol_flag - 8 (from cqd) -+
+ * rx_descriptor_fields1 ---- 32
+ * packet_type - 0 (from cqd) -+
+ * pkt_len - 4 (from cqd) |
+ * data_len - 8 (from cqd) | 16B 128b
+ * vlan_tci - 10 (from cqd) |
+ * rss - 12 (from cqd) -+
+ */
+
+ __m256i overlay_enabled =
+ _mm256_set1_epi32((uint32_t)enic->overlay_offload);
+
+ /* Step 2: Process 8 packets per loop using SIMD */
+ while (max_rx > 7 && (((cqd + 7)->type_color &
+ CQ_DESC_COLOR_MASK_NOSHIFT) != color)) {
+ /* Load 8 16B CQ descriptors */
+ __m256i cqd01 = _mm256_load_si256((void *)cqd);
+ __m256i cqd23 = _mm256_load_si256((void *)(cqd + 2));
+ __m256i cqd45 = _mm256_load_si256((void *)(cqd + 4));
+ __m256i cqd67 = _mm256_load_si256((void *)(cqd + 6));
+ /* Copy 8 mbuf pointers to rx_pkts */
+ _mm256_storeu_si256((void *)rx,
+ _mm256_loadu_si256((void *)rxmb));
+ _mm256_storeu_si256((void *)(rx + 4),
+ _mm256_loadu_si256((void *)(rxmb + 4)));
+
+ /*
+ * Collect 8 flags (each 32 bits) into one register.
+ * 4 shuffles, 3 blends, 1 permute for 8 desc: 1 inst/desc
+ */
+ __m256i flags01 =
+ _mm256_shuffle_epi8(cqd01, flags_shuffle_mask);
+ /*
+ * Shuffle above produces 8 x 32-bit flags for 8 descriptors
+ * in this order: 0, 0, 0, 0, 1, 1, 1, 1
+ * The duplicates in each 128-bit lane simplifies blending
+ * below.
+ */
+ __m256i flags23 =
+ _mm256_shuffle_epi8(cqd23, flags_shuffle_mask);
+ __m256i flags45 =
+ _mm256_shuffle_epi8(cqd45, flags_shuffle_mask);
+ __m256i flags67 =
+ _mm256_shuffle_epi8(cqd67, flags_shuffle_mask);
+ /* 1st blend produces flags for desc: 0, 2, 0, 0, 1, 3, 1, 1 */
+ __m256i flags0_3 = _mm256_blend_epi32(flags01, flags23, 0x22);
+ /* 2nd blend produces flags for desc: 4, 4, 4, 6, 5, 5, 5, 7 */
+ __m256i flags4_7 = _mm256_blend_epi32(flags45, flags67, 0x88);
+ /* 3rd blend produces flags for desc: 0, 2, 4, 6, 1, 3, 5, 7 */
+ __m256i flags0_7 = _mm256_blend_epi32(flags0_3, flags4_7, 0xcc);
+ /*
+ * Swap to reorder flags in this order: 1, 3, 5, 7, 0, 2, 4, 6
+ * This order simplifies blend operations way below that
+ * produce 'rearm' data for each mbuf.
+ */
+ flags0_7 = _mm256_permute4x64_epi64(flags0_7,
+ (1 << 6) + (0 << 4) + (3 << 2) + 2);
+
+ /*
+ * Check truncated bits and bail out early on.
+ * 6 avx inst, 1 or, 1 if-then-else for 8 desc: 1 inst/desc
+ */
+ __m256i trunc =
+ _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 17), 31);
+ trunc = _mm256_add_epi64(trunc, _mm256_permute4x64_epi64(trunc,
+ (1 << 6) + (0 << 4) + (3 << 2) + 2));
+ /* 0:63 contains 1+3+0+2 and 64:127 contains 5+7+4+6 */
+ if (_mm256_extract_epi64(trunc, 0) ||
+ _mm256_extract_epi64(trunc, 1))
+ break;
+
+ /*
+ * Compute PKT_RX_RSS_HASH.
+ * Use 2 shifts and 1 shuffle for 8 desc: 0.375 inst/desc
+ * RSS types in byte 0, 4, 8, 12, 16, 20, 24, 28
+ * Everything else is zero.
+ */
+ __m256i rss_types =
+ _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 10), 28);
+ /*
+ * RSS flags (PKT_RX_RSS_HASH) are in
+ * byte 0, 4, 8, 12, 16, 20, 24, 28
+ * Everything else is zero.
+ */
+ __m256i rss_flags = _mm256_shuffle_epi8(rss_shuffle, rss_types);
+
+ /*
+ * Compute CKSUM flags. First build the index and then
+ * use it to shuffle csum_shuffle.
+ * 20 instructions including const loads: 2.5 inst/desc
+ */
+ /*
+ * csum_not_calc (bit 22)
+ * csum_not_calc (0) => 0xffffffff
+ * csum_not_calc (1) => 0x0
+ */
+ const __m256i zero4 = _mm256_setzero_si256();
+ const __m256i mask22 = _mm256_set1_epi32(0x400000);
+ __m256i csum_not_calc = _mm256_cmpeq_epi32(zero4,
+ _mm256_and_si256(flags0_7, mask22));
+ /*
+ * (tcp|udp) && !fragment => bit 1
+ * tcp = bit 2, udp = bit 1, frag = bit 6
+ */
+ const __m256i mask1 = _mm256_set1_epi32(0x2);
+ __m256i tcp_udp =
+ _mm256_andnot_si256(_mm256_srli_epi32(flags0_7, 5),
+ _mm256_or_si256(flags0_7,
+ _mm256_srli_epi32(flags0_7, 1)));
+ tcp_udp = _mm256_and_si256(tcp_udp, mask1);
+ /* ipv4 (bit 5) => bit 2 */
+ const __m256i mask2 = _mm256_set1_epi32(0x4);
+ __m256i ipv4 = _mm256_and_si256(mask2,
+ _mm256_srli_epi32(flags0_7, 3));
+ /*
+ * ipv4_csum_ok (bit 3) => bit 3
+ * tcp_udp_csum_ok (bit 0) => bit 0
+ * 0x9
+ */
+ const __m256i mask0_3 = _mm256_set1_epi32(0x9);
+ __m256i csum_idx = _mm256_and_si256(flags0_7, mask0_3);
+ csum_idx = _mm256_and_si256(csum_not_calc,
+ _mm256_or_si256(_mm256_or_si256(csum_idx, ipv4),
+ tcp_udp));
+ __m256i csum_flags =
+ _mm256_shuffle_epi8(csum_shuffle, csum_idx);
+ /* Shift left to restore CKSUM flags. See csum_shuffle. */
+ csum_flags = _mm256_slli_epi32(csum_flags, 1);
+ /* Combine csum flags and offload flags: 0.125 inst/desc */
+ rss_flags = _mm256_or_si256(rss_flags, csum_flags);
+
+ /*
+ * Collect 8 VLAN IDs and compute vlan_id != 0 on each.
+ * 4 shuffles, 3 blends, 1 permute, 1 cmp, 1 sub for 8 desc:
+ * 1.25 inst/desc
+ */
+ __m256i vlan01 = _mm256_shuffle_epi8(cqd01, vlan_shuffle_mask);
+ __m256i vlan23 = _mm256_shuffle_epi8(cqd23, vlan_shuffle_mask);
+ __m256i vlan45 = _mm256_shuffle_epi8(cqd45, vlan_shuffle_mask);
+ __m256i vlan67 = _mm256_shuffle_epi8(cqd67, vlan_shuffle_mask);
+ __m256i vlan0_3 = _mm256_blend_epi32(vlan01, vlan23, 0x22);
+ __m256i vlan4_7 = _mm256_blend_epi32(vlan45, vlan67, 0x88);
+ /* desc: 0, 2, 4, 6, 1, 3, 5, 7 */
+ __m256i vlan0_7 = _mm256_blend_epi32(vlan0_3, vlan4_7, 0xcc);
+ /* desc: 1, 3, 5, 7, 0, 2, 4, 6 */
+ vlan0_7 = _mm256_permute4x64_epi64(vlan0_7,
+ (1 << 6) + (0 << 4) + (3 << 2) + 2);
+ /*
+ * Compare 0 == vlan_id produces 0xffffffff (-1) if
+ * vlan 0 and 0 if vlan non-0. Then subtracting the
+ * result from 0 produces 0 - (-1) = 1 for vlan 0, and
+ * 0 - 0 = 0 for vlan non-0.
+ */
+ vlan0_7 = _mm256_cmpeq_epi32(zero4, vlan0_7);
+ /* vlan_id != 0 => 0, vlan_id == 0 => 1 */
+ vlan0_7 = _mm256_sub_epi32(zero4, vlan0_7);
+
+ /*
+ * Compute PKT_RX_VLAN and PKT_RX_VLAN_STRIPPED.
+ * Use 3 shifts, 1 or, 1 shuffle for 8 desc: 0.625 inst/desc
+ * VLAN offload flags in byte 0, 4, 8, 12, 16, 20, 24, 28
+ * Everything else is zero.
+ */
+ __m256i vlan_idx =
+ _mm256_or_si256(/* vlan_stripped => bit 0 */
+ _mm256_srli_epi32(_mm256_slli_epi32(flags0_7,
+ 16), 31),
+ /* (vlan_id == 0) => bit 1 */
+ _mm256_slli_epi32(vlan0_7, 1));
+ /*
+ * The index captures 4 cases.
+ * stripped, id = 0 ==> 11b = 3
+ * stripped, id != 0 ==> 01b = 1
+ * not strip, id == 0 ==> 10b = 2
+ * not strip, id != 0 ==> 00b = 0
+ */
+ __m256i vlan_flags = _mm256_permutevar8x32_epi32(vlan_shuffle,
+ vlan_idx);
+ /* Combine vlan and offload flags: 0.125 inst/desc */
+ rss_flags = _mm256_or_si256(rss_flags, vlan_flags);
+
+ /*
+ * Compute non-tunnel PTYPEs.
+ * 17 inst / 8 desc = 2.125 inst/desc
+ */
+ /* ETHER and ETHER_VLAN */
+ __m256i vlan_ptype =
+ _mm256_permutevar8x32_epi32(vlan_ptype_shuffle,
+ vlan_idx);
+ /* Build the ptype index from flags */
+ tcp_udp = _mm256_slli_epi32(flags0_7, 29);
+ tcp_udp = _mm256_slli_epi32(_mm256_srli_epi32(tcp_udp, 30), 2);
+ __m256i ip4_ip6 =
+ _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 26), 30);
+ __m256i ptype_idx = _mm256_or_si256(tcp_udp, ip4_ip6);
+ __m256i frag_bit =
+ _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 25), 31);
+ __m256i nonfrag_ptype =
+ _mm256_shuffle_epi8(nonfrag_ptype_shuffle, ptype_idx);
+ __m256i frag_ptype =
+ _mm256_shuffle_epi8(frag_ptype_shuffle, ptype_idx);
+ /*
+ * Zero out the unwanted types and combine the remaining bits.
+ * The effect is same as selecting non-frag or frag types
+ * depending on the frag bit.
+ */
+ nonfrag_ptype = _mm256_and_si256(nonfrag_ptype,
+ _mm256_cmpeq_epi32(zero4, frag_bit));
+ frag_ptype = _mm256_and_si256(frag_ptype,
+ _mm256_cmpgt_epi32(frag_bit, zero4));
+ __m256i ptype = _mm256_or_si256(nonfrag_ptype, frag_ptype);
+ ptype = _mm256_slli_epi32(ptype, 4);
+ /*
+ * Compute tunnel PTYPEs.
+ * 15 inst / 8 desc = 1.875 inst/desc
+ */
+ __m256i tnl_l3_ptype =
+ _mm256_shuffle_epi8(tnl_l3_ptype_shuffle, ptype_idx);
+ tnl_l3_ptype = _mm256_slli_epi32(tnl_l3_ptype, 16);
+ /*
+ * Shift non-tunnel L4 types to make them tunnel types.
+ * RTE_PTYPE_L4_TCP << 16 == RTE_PTYPE_INNER_L4_TCP
+ */
+ __m256i tnl_l4_ptype =
+ _mm256_slli_epi32(_mm256_and_si256(ptype,
+ _mm256_set1_epi32(RTE_PTYPE_L4_MASK)), 16);
+ __m256i tnl_ptype =
+ _mm256_or_si256(tnl_l3_ptype, tnl_l4_ptype);
+ tnl_ptype = _mm256_or_si256(tnl_ptype,
+ _mm256_set1_epi32(RTE_PTYPE_TUNNEL_GRENAT |
+ RTE_PTYPE_INNER_L2_ETHER));
+ /*
+ * Select non-tunnel or tunnel types by zeroing out the
+ * unwanted ones.
+ */
+ __m256i tnl_flags = _mm256_and_si256(overlay_enabled,
+ _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 2), 31));
+ tnl_ptype = _mm256_and_si256(tnl_ptype,
+ _mm256_sub_epi32(zero4, tnl_flags));
+ ptype = _mm256_and_si256(ptype,
+ _mm256_cmpeq_epi32(zero4, tnl_flags));
+ /*
+ * Combine types and swap to have ptypes in the same order
+ * as desc.
+ * desc: 0 2 4 6 1 3 5 7
+ * 3 inst / 8 desc = 0.375 inst/desc
+ */
+ ptype = _mm256_or_si256(ptype, tnl_ptype);
+ ptype = _mm256_or_si256(ptype, vlan_ptype);
+ ptype = _mm256_permute4x64_epi64(ptype,
+ (1 << 6) + (0 << 4) + (3 << 2) + 2);
+
+ /*
+ * Mask packet length.
+ * Use 4 ands: 0.5 instructions/desc
+ */
+ cqd01 = _mm256_and_si256(cqd01, mask);
+ cqd23 = _mm256_and_si256(cqd23, mask);
+ cqd45 = _mm256_and_si256(cqd45, mask);
+ cqd67 = _mm256_and_si256(cqd67, mask);
+ /*
+ * Shuffle. Two 16B sets of the mbuf fields.
+ * packet_type, pkt_len, data_len, vlan_tci, rss
+ */
+ __m256i rearm01 = _mm256_shuffle_epi8(cqd01, shuffle_mask);
+ __m256i rearm23 = _mm256_shuffle_epi8(cqd23, shuffle_mask);
+ __m256i rearm45 = _mm256_shuffle_epi8(cqd45, shuffle_mask);
+ __m256i rearm67 = _mm256_shuffle_epi8(cqd67, shuffle_mask);
+
+ /*
+ * Blend in ptypes
+ * 4 blends and 3 shuffles for 8 desc: 0.875 inst/desc
+ */
+ rearm01 = _mm256_blend_epi32(rearm01, ptype, 0x11);
+ rearm23 = _mm256_blend_epi32(rearm23,
+ _mm256_shuffle_epi32(ptype, 1), 0x11);
+ rearm45 = _mm256_blend_epi32(rearm45,
+ _mm256_shuffle_epi32(ptype, 2), 0x11);
+ rearm67 = _mm256_blend_epi32(rearm67,
+ _mm256_shuffle_epi32(ptype, 3), 0x11);
+
+ /*
+ * Move rss_flags into ol_flags in mbuf_init.
+ * Use 1 shift and 1 blend for each desc: 2 inst/desc
+ */
+ __m256i mbuf_init4_5 = _mm256_blend_epi32(mbuf_init,
+ rss_flags, 0x44);
+ __m256i mbuf_init2_3 = _mm256_blend_epi32(mbuf_init,
+ _mm256_slli_si256(rss_flags, 4), 0x44);
+ __m256i mbuf_init0_1 = _mm256_blend_epi32(mbuf_init,
+ _mm256_slli_si256(rss_flags, 8), 0x44);
+ __m256i mbuf_init6_7 = _mm256_blend_epi32(mbuf_init,
+ _mm256_srli_si256(rss_flags, 4), 0x44);
+
+ /*
+ * Build rearm, one per desc.
+ * 8 blends and 4 permutes: 1.5 inst/desc
+ */
+ __m256i rearm0 = _mm256_blend_epi32(rearm01,
+ mbuf_init0_1, 0xf0);
+ __m256i rearm1 = _mm256_blend_epi32(mbuf_init0_1,
+ rearm01, 0xf0);
+ __m256i rearm2 = _mm256_blend_epi32(rearm23,
+ mbuf_init2_3, 0xf0);
+ __m256i rearm3 = _mm256_blend_epi32(mbuf_init2_3,
+ rearm23, 0xf0);
+ /* Swap upper and lower 64 bits */
+ rearm0 = _mm256_permute4x64_epi64(rearm0,
+ (1 << 6) + (0 << 4) + (3 << 2) + 2);
+ rearm2 = _mm256_permute4x64_epi64(rearm2,
+ (1 << 6) + (0 << 4) + (3 << 2) + 2);
+ /* Second set of 4 descriptors */
+ __m256i rearm4 = _mm256_blend_epi32(rearm45,
+ mbuf_init4_5, 0xf0);
+ __m256i rearm5 = _mm256_blend_epi32(mbuf_init4_5,
+ rearm45, 0xf0);
+ __m256i rearm6 = _mm256_blend_epi32(rearm67,
+ mbuf_init6_7, 0xf0);
+ __m256i rearm7 = _mm256_blend_epi32(mbuf_init6_7,
+ rearm67, 0xf0);
+ rearm4 = _mm256_permute4x64_epi64(rearm4,
+ (1 << 6) + (0 << 4) + (3 << 2) + 2);
+ rearm6 = _mm256_permute4x64_epi64(rearm6,
+ (1 << 6) + (0 << 4) + (3 << 2) + 2);
+
+ /*
+ * Write out 32B of mbuf fields.
+ * data_off - off 0 (mbuf_init)
+ * refcnt - 2 (mbuf_init)
+ * nb_segs - 4 (mbuf_init)
+ * port - 6 (mbuf_init)
+ * ol_flag - 8 (from cqd)
+ * packet_type - 16 (from cqd)
+ * pkt_len - 20 (from cqd)
+ * data_len - 24 (from cqd)
+ * vlan_tci - 26 (from cqd)
+ * rss - 28 (from cqd)
+ */
+ _mm256_storeu_si256((__m256i *)&rxmb[0]->rearm_data, rearm0);
+ _mm256_storeu_si256((__m256i *)&rxmb[1]->rearm_data, rearm1);
+ _mm256_storeu_si256((__m256i *)&rxmb[2]->rearm_data, rearm2);
+ _mm256_storeu_si256((__m256i *)&rxmb[3]->rearm_data, rearm3);
+ _mm256_storeu_si256((__m256i *)&rxmb[4]->rearm_data, rearm4);
+ _mm256_storeu_si256((__m256i *)&rxmb[5]->rearm_data, rearm5);
+ _mm256_storeu_si256((__m256i *)&rxmb[6]->rearm_data, rearm6);
+ _mm256_storeu_si256((__m256i *)&rxmb[7]->rearm_data, rearm7);
+
+ max_rx -= 8;
+ cqd += 8;
+ rx += 8;
+ rxmb += 8;
+ }
+
+ /*
+ * Step 3: Slow path to handle a small (<8) number of packets and
+ * occasional truncated packets.
+ */
+ while (max_rx && ((cqd->type_color &
+ CQ_DESC_COLOR_MASK_NOSHIFT) != color)) {
+ if (unlikely(cqd->bytes_written_flags &
+ CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) {
+ rte_pktmbuf_free(*rxmb++);
+ rte_atomic64_inc(&enic->soft_stats.rx_packet_errors);
+ } else {
+ *rx++ = rx_one(cqd, *rxmb++, enic);
+ }
+ cqd++;
+ max_rx--;
+ }
+
+ /* Number of descriptors visited */
+ nb_rx = cqd - (struct cq_enet_rq_desc *)(cq->ring.descs) - cq_idx;
+ if (nb_rx == 0)
+ return 0;
+ rqd = ((struct rq_enet_desc *)rq->ring.descs) + cq_idx;
+ rxmb = rq->mbuf_ring + cq_idx;
+ cq_idx += nb_rx;
+ rq->rx_nb_hold += nb_rx;
+ if (unlikely(cq_idx == cq->ring.desc_count)) {
+ cq_idx = 0;
+ cq->last_color ^= CQ_DESC_COLOR_MASK_NOSHIFT;
+ }
+ cq->to_clean = cq_idx;
+
+ /* Step 4: Restock RQ with new mbufs */
+ memcpy(rxmb, rq->free_mbufs + ENIC_RX_BURST_MAX - rq->num_free_mbufs,
+ sizeof(struct rte_mbuf *) * nb_rx);
+ rq->num_free_mbufs -= nb_rx;
+ while (nb_rx) {
+ rqd->address = (*rxmb)->buf_iova + RTE_PKTMBUF_HEADROOM;
+ nb_rx--;
+ rqd++;
+ rxmb++;
+ }
+ if (rq->rx_nb_hold > rq->rx_free_thresh) {
+ rq->posted_index = enic_ring_add(rq->ring.desc_count,
+ rq->posted_index,
+ rq->rx_nb_hold);
+ rq->rx_nb_hold = 0;
+ rte_wmb();
+ iowrite32_relaxed(rq->posted_index,
+ &rq->ctrl->posted_index);
+ }
+
+ return rx - rx_pkts;
+}
+
+bool
+enic_use_vector_rx_handler(struct enic *enic)
+{
+ struct rte_eth_dev *eth_dev;
+ struct rte_fdir_conf *fconf;
+
+ eth_dev = enic->rte_dev;
+ /* User needs to request for the avx2 handler */
+ if (!enic->enable_avx2_rx)
+ return false;
+ /* Do not support scatter Rx */
+ if (!(enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0))
+ return false;
+ /* Do not support fdir/flow */
+ fconf = &eth_dev->data->dev_conf.fdir_conf;
+ if (fconf->mode != RTE_FDIR_MODE_NONE)
+ return false;
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) {
+ PMD_INIT_LOG(DEBUG, " use the non-scatter avx2 Rx handler");
+ eth_dev->rx_pkt_burst = &enic_noscatter_vec_recv_pkts;
+ return true;
+ }
+ return false;
+}
diff --git a/drivers/net/enic/meson.build b/drivers/net/enic/meson.build
index bfd4e237..06448711 100644
--- a/drivers/net/enic/meson.build
+++ b/drivers/net/enic/meson.build
@@ -17,3 +17,19 @@ sources = files(
)
deps += ['hash']
includes += include_directories('base')
+
+# The current implementation assumes 64-bit pointers
+if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') and cc.sizeof('void *') == 8
+ sources += files('enic_rxtx_vec_avx2.c')
+# Build the avx2 handler if the compiler supports it, even though 'machine'
+# does not. This is to support users who build for the min supported machine
+# and need to run the binary on newer CPUs too.
+# This part is from i40e meson.build
+elif cc.has_argument('-mavx2') and cc.sizeof('void *') == 8
+ enic_avx2_lib = static_library('enic_avx2_lib',
+ 'enic_rxtx_vec_avx2.c',
+ dependencies: [static_rte_ethdev, static_rte_bus_pci],
+ include_directories: includes,
+ c_args: [cflags, '-mavx2'])
+ objs += enic_avx2_lib.extract_objects('enic_rxtx_vec_avx2.c')
+endif