summaryrefslogtreecommitdiffstats
path: root/drivers/net/mlx5
diff options
context:
space:
mode:
authorLuca Boccassi <luca.boccassi@gmail.com>2018-11-01 11:59:50 +0000
committerLuca Boccassi <luca.boccassi@gmail.com>2018-11-01 12:00:19 +0000
commit8d01b9cd70a67cdafd5b965a70420c3bd7fb3f82 (patch)
tree208e3bc33c220854d89d010e3abf720a2e62e546 /drivers/net/mlx5
parentb63264c8342e6a1b6971c79550d2af2024b6a4de (diff)
New upstream version 18.11-rc1upstream/18.11-rc1
Change-Id: Iaa71986dd6332e878d8f4bf493101b2bbc6313bb Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
Diffstat (limited to 'drivers/net/mlx5')
-rw-r--r--drivers/net/mlx5/Makefile43
-rw-r--r--drivers/net/mlx5/meson.build244
-rw-r--r--drivers/net/mlx5/mlx5.c154
-rw-r--r--drivers/net/mlx5/mlx5.h51
-rw-r--r--drivers/net/mlx5/mlx5_ethdev.c72
-rw-r--r--drivers/net/mlx5/mlx5_flow.c3351
-rw-r--r--drivers/net/mlx5/mlx5_flow.h375
-rw-r--r--drivers/net/mlx5/mlx5_flow_dv.c1492
-rw-r--r--drivers/net/mlx5/mlx5_flow_tcf.c2913
-rw-r--r--drivers/net/mlx5/mlx5_flow_verbs.c1825
-rw-r--r--drivers/net/mlx5/mlx5_glue.c113
-rw-r--r--drivers/net/mlx5/mlx5_glue.h34
-rw-r--r--drivers/net/mlx5/mlx5_mac.c2
-rw-r--r--drivers/net/mlx5/mlx5_mr.c155
-rw-r--r--drivers/net/mlx5/mlx5_nl_flow.c1248
-rw-r--r--drivers/net/mlx5/mlx5_prm.h222
-rw-r--r--drivers/net/mlx5/mlx5_rxq.c3
-rw-r--r--drivers/net/mlx5/mlx5_rxtx.c48
-rw-r--r--drivers/net/mlx5/mlx5_rxtx.h41
-rw-r--r--drivers/net/mlx5/mlx5_rxtx_vec.c46
-rw-r--r--drivers/net/mlx5/mlx5_rxtx_vec.h1
-rw-r--r--drivers/net/mlx5/mlx5_rxtx_vec_neon.h11
-rw-r--r--drivers/net/mlx5/mlx5_rxtx_vec_sse.h10
-rw-r--r--drivers/net/mlx5/mlx5_socket.c2
-rw-r--r--drivers/net/mlx5/mlx5_stats.c87
-rw-r--r--drivers/net/mlx5/mlx5_txq.c5
26 files changed, 8910 insertions, 3638 deletions
diff --git a/drivers/net/mlx5/Makefile b/drivers/net/mlx5/Makefile
index 2e70dec5..fecb57c1 100644
--- a/drivers/net/mlx5/Makefile
+++ b/drivers/net/mlx5/Makefile
@@ -8,7 +8,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_pmd_mlx5.a
LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION)
LIB_GLUE_BASE = librte_pmd_mlx5_glue.so
-LIB_GLUE_VERSION = 18.05.0
+LIB_GLUE_VERSION = 18.11.0
# Sources.
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c
@@ -31,9 +31,11 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_stats.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_dv.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_tcf.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
-SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl_flow.c
ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
INSTALL-$(CONFIG_RTE_LIBRTE_MLX5_PMD)-lib += $(LIB_GLUE)
@@ -135,6 +137,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
enum MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
+ HAVE_IBV_FLOW_DV_SUPPORT \
+ infiniband/mlx5dv.h \
+ enum MLX5DV_FLOW_ACTION_TAG \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_ETHTOOL_LINK_MODE_25G \
/usr/include/linux/ethtool.h \
enum ETHTOOL_LINK_MODE_25000baseCR_Full_BIT \
@@ -150,11 +157,16 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
enum ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
- HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT \
+ HAVE_IBV_DEVICE_COUNTERS_SET_V42 \
infiniband/verbs.h \
type 'struct ibv_counter_set_init_attr' \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
+ HAVE_IBV_DEVICE_COUNTERS_SET_V45 \
+ infiniband/verbs.h \
+ type 'struct ibv_counters_init_attr' \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_RDMA_NL_NLDEV \
rdma/rdma_netlink.h \
enum RDMA_NL_NLDEV \
@@ -200,6 +212,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
enum IFLA_PHYS_PORT_NAME \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
+ HAVE_TCA_CHAIN \
+ linux/rtnetlink.h \
+ enum TCA_CHAIN \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_TCA_FLOWER_ACT \
linux/pkt_cls.h \
enum TCA_FLOWER_ACT \
@@ -335,11 +352,31 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
enum TCA_FLOWER_KEY_VLAN_ETH_TYPE \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_TCP_FLAGS \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_TCP_FLAGS \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK \
+ linux/pkt_cls.h \
+ enum TCA_FLOWER_KEY_TCP_FLAGS_MASK \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
+ HAVE_TC_ACT_GOTO_CHAIN \
+ linux/pkt_cls.h \
+ define TC_ACT_GOTO_CHAIN \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_TC_ACT_VLAN \
linux/tc_act/tc_vlan.h \
enum TCA_VLAN_PUSH_VLAN_PRIORITY \
$(AUTOCONF_OUTPUT)
$Q sh -- '$<' '$@' \
+ HAVE_TC_ACT_PEDIT \
+ linux/tc_act/tc_pedit.h \
+ enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
+ $(AUTOCONF_OUTPUT)
+ $Q sh -- '$<' '$@' \
HAVE_SUPPORTED_40000baseKR4_Full \
/usr/include/linux/ethtool.h \
define SUPPORTED_40000baseKR4_Full \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
new file mode 100644
index 00000000..e8cbe3ee
--- /dev/null
+++ b/drivers/net/mlx5/meson.build
@@ -0,0 +1,244 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 6WIND S.A.
+# Copyright 2018 Mellanox Technologies, Ltd
+
+pmd_dlopen = get_option('enable_driver_mlx_glue')
+LIB_GLUE_BASE = 'librte_pmd_mlx5_glue.so'
+LIB_GLUE_VERSION = '18.11.0'
+LIB_GLUE = LIB_GLUE_BASE + '.' + LIB_GLUE_VERSION
+if pmd_dlopen
+ dpdk_conf.set('RTE_LIBRTE_MLX5_DLOPEN_DEPS', 1)
+ cflags += [
+ '-DMLX5_GLUE="@0@"'.format(LIB_GLUE),
+ '-DMLX5_GLUE_VERSION="@0@"'.format(LIB_GLUE_VERSION),
+ ]
+endif
+libs = [
+ cc.find_library('mnl', required:false),
+ cc.find_library('mlx5', required:false),
+ cc.find_library('ibverbs', required:false),
+]
+build = true
+foreach lib:libs
+ if not lib.found()
+ build = false
+ endif
+endforeach
+if build
+ allow_experimental_apis = true
+ ext_deps += libs
+ sources = files(
+ 'mlx5.c',
+ 'mlx5_ethdev.c',
+ 'mlx5_flow.c',
+ 'mlx5_flow_dv.c',
+ 'mlx5_flow_tcf.c',
+ 'mlx5_flow_verbs.c',
+ 'mlx5_mac.c',
+ 'mlx5_mr.c',
+ 'mlx5_nl.c',
+ 'mlx5_rss.c',
+ 'mlx5_rxmode.c',
+ 'mlx5_rxq.c',
+ 'mlx5_rxtx.c',
+ 'mlx5_socket.c',
+ 'mlx5_stats.c',
+ 'mlx5_trigger.c',
+ 'mlx5_txq.c',
+ 'mlx5_vlan.c',
+ )
+ if dpdk_conf.has('RTE_ARCH_X86_64') or dpdk_conf.has('RTE_ARCH_ARM64')
+ sources += files('mlx5_rxtx_vec.c')
+ endif
+ if not pmd_dlopen
+ sources += files('mlx5_glue.c')
+ endif
+ cflags_options = [
+ '-Wextra',
+ '-std=c11',
+ '-Wno-strict-prototypes',
+ '-D_BSD_SOURCE',
+ '-D_DEFAULT_SOURCE',
+ '-D_XOPEN_SOURCE=600'
+ ]
+ foreach option:cflags_options
+ if cc.has_argument(option)
+ cflags += option
+ endif
+ endforeach
+ if get_option('buildtype').contains('debug')
+ cflags += [ '-pedantic', '-UNDEBUG', '-DPEDANTIC' ]
+ else
+ cflags += [ '-DNDEBUG', '-UPEDANTIC' ]
+ endif
+ # To maintain the compatibility with the make build system
+ # mlx5_autoconf.h file is still generated.
+ # input array for meson member search:
+ # [ "MACRO to define if found", "header for the search",
+ # "symbol to search", "struct member to search" ]
+ has_member_args = [
+ [ 'HAVE_IBV_MLX5_MOD_SWP', 'infiniband/mlx5dv.h',
+ 'struct mlx5dv_sw_parsing_caps', 'sw_parsing_offloads' ],
+ [ 'HAVE_IBV_DEVICE_COUNTERS_SET_V42', 'infiniband/verbs.h',
+ 'struct ibv_counter_set_init_attr', 'counter_set_id' ],
+ [ 'HAVE_IBV_DEVICE_COUNTERS_SET_V45', 'infiniband/verbs.h',
+ 'struct ibv_counters_init_attr', 'comp_mask' ],
+ ]
+ # input array for meson symbol search:
+ # [ "MACRO to define if found", "header for the search",
+ # "symbol to search" ]
+ has_sym_args = [
+ [ 'HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT', 'infiniband/mlx5dv.h',
+ 'MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX' ],
+ [ 'HAVE_IBV_DEVICE_TUNNEL_SUPPORT', 'infiniband/mlx5dv.h',
+ 'MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS' ],
+ [ 'HAVE_IBV_MLX5_MOD_MPW', 'infiniband/mlx5dv.h',
+ 'MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED' ],
+ [ 'HAVE_IBV_MLX5_MOD_CQE_128B_COMP', 'infiniband/mlx5dv.h',
+ 'MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP' ],
+ [ 'HAVE_IBV_FLOW_DV_SUPPORT', 'infiniband/mlx5dv.h',
+ 'MLX5DV_FLOW_ACTION_TAG' ],
+ [ 'HAVE_IBV_DEVICE_MPLS_SUPPORT', 'infiniband/verbs.h',
+ 'IBV_FLOW_SPEC_MPLS' ],
+ [ 'HAVE_IBV_WQ_FLAG_RX_END_PADDING', 'infiniband/verbs.h',
+ 'IBV_WQ_FLAG_RX_END_PADDING' ],
+ [ 'HAVE_SUPPORTED_40000baseKR4_Full', 'linux/ethtool.h',
+ 'SUPPORTED_40000baseKR4_Full' ],
+ [ 'HAVE_SUPPORTED_40000baseCR4_Full', 'linux/ethtool.h',
+ 'SUPPORTED_40000baseCR4_Full' ],
+ [ 'HAVE_SUPPORTED_40000baseSR4_Full', 'linux/ethtool.h',
+ 'SUPPORTED_40000baseSR4_Full' ],
+ [ 'HAVE_SUPPORTED_40000baseLR4_Full', 'linux/ethtool.h',
+ 'SUPPORTED_40000baseLR4_Full' ],
+ [ 'HAVE_SUPPORTED_56000baseKR4_Full', 'linux/ethtool.h',
+ 'SUPPORTED_56000baseKR4_Full' ],
+ [ 'HAVE_SUPPORTED_56000baseCR4_Full', 'linux/ethtool.h',
+ 'SUPPORTED_56000baseCR4_Full' ],
+ [ 'HAVE_SUPPORTED_56000baseSR4_Full', 'linux/ethtool.h',
+ 'SUPPORTED_56000baseSR4_Full' ],
+ [ 'HAVE_SUPPORTED_56000baseLR4_Full', 'linux/ethtool.h',
+ 'SUPPORTED_56000baseLR4_Full' ],
+ [ 'HAVE_ETHTOOL_LINK_MODE_25G', 'linux/ethtool.h',
+ 'ETHTOOL_LINK_MODE_25000baseCR_Full_BIT' ],
+ [ 'HAVE_ETHTOOL_LINK_MODE_50G', 'linux/ethtool.h',
+ 'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ],
+ [ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h',
+ 'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ],
+ [ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h',
+ 'IFLA_PHYS_SWITCH_ID' ],
+ [ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h',
+ 'IFLA_PHYS_PORT_NAME' ],
+ [ 'HAVE_TCA_CHAIN', 'linux/rtnetlink.h',
+ 'TCA_CHAIN' ],
+ [ 'HAVE_TCA_FLOWER_ACT', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_ACT' ],
+ [ 'HAVE_TCA_FLOWER_FLAGS', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_FLAGS' ],
+ [ 'HAVE_TCA_FLOWER_KEY_ETH_TYPE', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_ETH_TYPE' ],
+ [ 'HAVE_TCA_FLOWER_KEY_ETH_DST', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_ETH_DST' ],
+ [ 'HAVE_TCA_FLOWER_KEY_ETH_DST_MASK', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_ETH_DST_MASK' ],
+ [ 'HAVE_TCA_FLOWER_KEY_ETH_SRC', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_ETH_SRC' ],
+ [ 'HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_ETH_SRC_MASK' ],
+ [ 'HAVE_TCA_FLOWER_KEY_IP_PROTO', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_IP_PROTO' ],
+ [ 'HAVE_TCA_FLOWER_KEY_IPV4_SRC', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_IPV4_SRC' ],
+ [ 'HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_IPV4_SRC_MASK' ],
+ [ 'HAVE_TCA_FLOWER_KEY_IPV4_DST', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_IPV4_DST' ],
+ [ 'HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_IPV4_DST_MASK' ],
+ [ 'HAVE_TCA_FLOWER_KEY_IPV6_SRC', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_IPV6_SRC' ],
+ [ 'HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_IPV6_SRC_MASK' ],
+ [ 'HAVE_TCA_FLOWER_KEY_IPV6_DST', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_IPV6_DST' ],
+ [ 'HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_IPV6_DST_MASK' ],
+ [ 'HAVE_TCA_FLOWER_KEY_TCP_SRC', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_TCP_SRC' ],
+ [ 'HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_TCP_SRC_MASK' ],
+ [ 'HAVE_TCA_FLOWER_KEY_TCP_DST', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_TCP_DST' ],
+ [ 'HAVE_TCA_FLOWER_KEY_TCP_DST_MASK', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_TCP_DST_MASK' ],
+ [ 'HAVE_TCA_FLOWER_KEY_UDP_SRC', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_UDP_SRC' ],
+ [ 'HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_UDP_SRC_MASK' ],
+ [ 'HAVE_TCA_FLOWER_KEY_UDP_DST', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_UDP_DST' ],
+ [ 'HAVE_TCA_FLOWER_KEY_UDP_DST_MASK', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_UDP_DST_MASK' ],
+ [ 'HAVE_TCA_FLOWER_KEY_VLAN_ID', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_VLAN_ID' ],
+ [ 'HAVE_TCA_FLOWER_KEY_VLAN_PRIO', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_VLAN_PRIO' ],
+ [ 'HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
+ [ 'HAVE_TCA_FLOWER_KEY_TCP_FLAGS', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_TCP_FLAGS' ],
+ [ 'HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK', 'linux/pkt_cls.h',
+ 'TCA_FLOWER_KEY_TCP_FLAGS_MASK' ],
+ [ 'HAVE_TC_ACT_GOTO_CHAIN', 'linux/pkt_cls.h',
+ 'TC_ACT_GOTO_CHAIN' ],
+ [ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
+ 'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
+ [ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
+ 'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
+ [ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
+ 'RDMA_NL_NLDEV' ],
+ [ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
+ 'RDMA_NLDEV_CMD_GET' ],
+ [ 'HAVE_RDMA_NLDEV_CMD_PORT_GET', 'rdma/rdma_netlink.h',
+ 'RDMA_NLDEV_CMD_PORT_GET' ],
+ [ 'HAVE_RDMA_NLDEV_ATTR_DEV_INDEX', 'rdma/rdma_netlink.h',
+ 'RDMA_NLDEV_ATTR_DEV_INDEX' ],
+ [ 'HAVE_RDMA_NLDEV_ATTR_DEV_NAME', 'rdma/rdma_netlink.h',
+ 'RDMA_NLDEV_ATTR_DEV_NAME' ],
+ [ 'HAVE_RDMA_NLDEV_ATTR_PORT_INDEX', 'rdma/rdma_netlink.h',
+ 'RDMA_NLDEV_ATTR_PORT_INDEX' ],
+ [ 'HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX', 'rdma/rdma_netlink.h',
+ 'RDMA_NLDEV_ATTR_NDEV_INDEX' ],
+ ]
+ config = configuration_data()
+ foreach arg:has_sym_args
+ config.set(arg[0], cc.has_header_symbol(arg[1], arg[2]))
+ endforeach
+ foreach arg:has_member_args
+ file_prefix = '#include<' + arg[1] + '>'
+ config.set(arg[0], cc.has_member(arg[2], arg[3],
+ prefix : file_prefix))
+ endforeach
+ configure_file(output : 'mlx5_autoconf.h', configuration : config)
+endif
+# Build Glue Library
+if pmd_dlopen and build
+ dlopen_name = 'mlx5_glue'
+ dlopen_lib_name = driver_name_fmt.format(dlopen_name)
+ dlopen_so_version = LIB_GLUE_VERSION
+ dlopen_sources = files('mlx5_glue.c')
+ dlopen_install_dir = [ eal_pmd_path + '-glue' ]
+ shared_lib = shared_library(
+ dlopen_lib_name,
+ dlopen_sources,
+ include_directories: global_inc,
+ c_args: cflags,
+ dependencies: libs,
+ link_args: [
+ '-Wl,-export-dynamic',
+ '-Wl,-h,@0@'.format(LIB_GLUE),
+ ],
+ soversion: dlopen_so_version,
+ install: true,
+ install_dir: dlopen_install_dir,
+ )
+endif
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index ec63bc6e..a277b573 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -46,6 +46,7 @@
#include "mlx5_defs.h"
#include "mlx5_glue.h"
#include "mlx5_mr.h"
+#include "mlx5_flow.h"
/* Device parameter to enable RX completion queue compression. */
#define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"
@@ -89,6 +90,9 @@
/* Allow L3 VXLAN flow creation. */
#define MLX5_L3_VXLAN_EN "l3_vxlan_en"
+/* Activate DV flow steering. */
+#define MLX5_DV_FLOW_EN "dv_flow_en"
+
/* Activate Netlink support in VF mode. */
#define MLX5_VF_NL_EN "vf_nl_en"
@@ -282,8 +286,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
close(priv->nl_socket_route);
if (priv->nl_socket_rdma >= 0)
close(priv->nl_socket_rdma);
- if (priv->mnl_socket)
- mlx5_nl_flow_socket_destroy(priv->mnl_socket);
+ if (priv->tcf_context)
+ mlx5_flow_tcf_context_destroy(priv->tcf_context);
ret = mlx5_hrxq_ibv_verify(dev);
if (ret)
DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -333,6 +337,17 @@ mlx5_dev_close(struct rte_eth_dev *dev)
}
memset(priv, 0, sizeof(*priv));
priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
+ /*
+ * flag to rte_eth_dev_close() that it should release the port resources
+ * (calling rte_eth_dev_release_port()) in addition to closing it.
+ */
+ dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
+ /*
+ * Reset mac_addrs to NULL such that it is not freed as part of
+ * rte_eth_dev_release_port(). mac_addrs is part of dev_private so
+ * it is freed when dev_private is freed.
+ */
+ dev->data->mac_addrs = NULL;
}
const struct eth_dev_ops mlx5_dev_ops = {
@@ -477,7 +492,7 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
} else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
config->txqs_inline = tmp;
} else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
- config->mps = !!tmp ? config->mps : 0;
+ config->mps = !!tmp;
} else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) {
config->mpw_hdr_dseg = !!tmp;
} else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) {
@@ -490,6 +505,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
config->l3_vxlan_en = !!tmp;
} else if (strcmp(MLX5_VF_NL_EN, key) == 0) {
config->vf_nl_en = !!tmp;
+ } else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) {
+ config->dv_flow_en = !!tmp;
} else {
DRV_LOG(WARNING, "%s: unknown parameter", key);
rte_errno = EINVAL;
@@ -527,6 +544,7 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
MLX5_RX_VEC_EN,
MLX5_L3_VXLAN_EN,
MLX5_VF_NL_EN,
+ MLX5_DV_FLOW_EN,
MLX5_REPRESENTOR,
NULL,
};
@@ -568,11 +586,13 @@ static struct rte_pci_driver mlx5_driver;
static void *uar_base;
static int
-find_lower_va_bound(const struct rte_memseg_list *msl __rte_unused,
+find_lower_va_bound(const struct rte_memseg_list *msl,
const struct rte_memseg *ms, void *arg)
{
void **addr = arg;
+ if (msl->external)
+ return 0;
if (*addr == NULL)
*addr = ms->addr;
else
@@ -685,9 +705,10 @@ mlx5_uar_init_secondary(struct rte_eth_dev *dev)
*
* @return
* A valid Ethernet device object on success, NULL otherwise and rte_errno
- * is set. The following error is defined:
+ * is set. The following errors are defined:
*
* EBUSY: device is not supposed to be spawned.
+ * EEXIST: device is already spawned
*/
static struct rte_eth_dev *
mlx5_dev_spawn(struct rte_device *dpdk_dev,
@@ -702,6 +723,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
struct mlx5dv_context dv_attr = { .comp_mask = 0 };
struct mlx5_dev_config config = {
.vf = !!vf,
+ .mps = MLX5_ARG_UNSET,
.tx_vec_en = 1,
.rx_vec_en = 1,
.mpw_hdr_dseg = 0,
@@ -729,12 +751,10 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
unsigned int mprq_max_stride_size_n = 0;
unsigned int mprq_min_stride_num_n = 0;
unsigned int mprq_max_stride_num_n = 0;
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
- struct ibv_counter_set_description cs_desc = { .counter_type = 0 };
-#endif
struct ether_addr mac;
char name[RTE_ETH_NAME_MAX_LEN];
int own_domain_id = 0;
+ uint16_t port_id;
unsigned int i;
/* Determine if this port representor is supposed to be spawned. */
@@ -757,6 +777,17 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
return NULL;
}
}
+ /* Build device name. */
+ if (!switch_info->representor)
+ rte_strlcpy(name, dpdk_dev->name, sizeof(name));
+ else
+ snprintf(name, sizeof(name), "%s_representor_%u",
+ dpdk_dev->name, switch_info->port_name);
+ /* check if the device is already spawned */
+ if (rte_eth_dev_get_port_by_name(name, &port_id) == 0) {
+ rte_errno = EEXIST;
+ return NULL;
+ }
/* Prepare shared data between primary and secondary process. */
mlx5_prepare_shared_data();
errno = 0;
@@ -791,7 +822,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
DRV_LOG(DEBUG, "MPW isn't supported");
mps = MLX5_MPW_DISABLED;
}
- config.mps = mps;
#ifdef HAVE_IBV_MLX5_MOD_SWP
if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_SWP)
swp = dv_attr.sw_parsing_caps.sw_parsing_offloads;
@@ -864,11 +894,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
DEBUG("ibv_query_device_ex() failed");
goto error;
}
- if (!switch_info->representor)
- rte_strlcpy(name, dpdk_dev->name, sizeof(name));
- else
- snprintf(name, sizeof(name), "%s_representor_%u",
- dpdk_dev->name, switch_info->port_name);
DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name);
if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
eth_dev = rte_eth_dev_attach_secondary(name);
@@ -1000,12 +1025,15 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
config.hw_csum = !!(attr.device_cap_flags_ex & IBV_DEVICE_RAW_IP_CSUM);
DRV_LOG(DEBUG, "checksum offloading is %ssupported",
(config.hw_csum ? "" : "not "));
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
- config.flow_counter_en = !!attr.max_counter_sets;
- mlx5_glue->describe_counter_set(ctx, 0, &cs_desc);
- DRV_LOG(DEBUG, "counter type = %d, num of cs = %ld, attributes = %d",
- cs_desc.counter_type, cs_desc.num_of_cs,
- cs_desc.attributes);
+#if !defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) && \
+ !defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+ DRV_LOG(DEBUG, "counters are not supported");
+#endif
+#ifndef HAVE_IBV_FLOW_DV_SUPPORT
+ if (config.dv_flow_en) {
+ DRV_LOG(WARNING, "DV flow is not supported");
+ config.dv_flow_en = 0;
+ }
#endif
config.ind_table_max_size =
attr.rss_caps.max_rwq_indirection_table_size;
@@ -1035,13 +1063,15 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
(1 << IBV_QPT_RAW_PACKET)));
if (config.tso)
config.tso_max_payload_sz = attr.tso_caps.max_tso;
- if (config.mps && !mps) {
- DRV_LOG(ERR,
- "multi-packet send not supported on this device"
- " (" MLX5_TXQ_MPW_EN ")");
- err = ENOTSUP;
- goto error;
- }
+ /*
+ * MPW is disabled by default, while the Enhanced MPW is enabled
+ * by default.
+ */
+ if (config.mps == MLX5_ARG_UNSET)
+ config.mps = (mps == MLX5_MPW_ENHANCED) ? MLX5_MPW_ENHANCED :
+ MLX5_MPW_DISABLED;
+ else
+ config.mps = config.mps ? mps : MLX5_MPW_DISABLED;
DRV_LOG(INFO, "%sMPS is %s",
config.mps == MLX5_MPW_ENHANCED ? "enhanced " : "",
config.mps != MLX5_MPW_DISABLED ? "enabled" : "disabled");
@@ -1073,13 +1103,14 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
err = ENOMEM;
goto error;
}
- if (priv->representor)
+ if (priv->representor) {
eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR;
+ eth_dev->data->representor_id = priv->representor_id;
+ }
eth_dev->data->dev_private = priv;
priv->dev_data = eth_dev->data;
eth_dev->data->mac_addrs = priv->mac;
eth_dev->device = dpdk_dev;
- eth_dev->device->driver = &mlx5_driver.driver;
err = mlx5_uar_init_primary(eth_dev);
if (err) {
err = rte_errno;
@@ -1128,8 +1159,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
if (vf && config.vf_nl_en)
mlx5_nl_mac_addr_sync(eth_dev);
- priv->mnl_socket = mlx5_nl_flow_socket_create();
- if (!priv->mnl_socket) {
+ priv->tcf_context = mlx5_flow_tcf_context_create();
+ if (!priv->tcf_context) {
err = -rte_errno;
DRV_LOG(WARNING,
"flow rules relying on switch offloads will not be"
@@ -1144,16 +1175,16 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
error.message =
"cannot retrieve network interface index";
} else {
- err = mlx5_nl_flow_init(priv->mnl_socket, ifindex,
- &error);
+ err = mlx5_flow_tcf_init(priv->tcf_context,
+ ifindex, &error);
}
if (err) {
DRV_LOG(WARNING,
"flow rules relying on switch offloads will"
" not be supported: %s: %s",
error.message, strerror(rte_errno));
- mlx5_nl_flow_socket_destroy(priv->mnl_socket);
- priv->mnl_socket = NULL;
+ mlx5_flow_tcf_context_destroy(priv->tcf_context);
+ priv->tcf_context = NULL;
}
}
TAILQ_INIT(&priv->flows);
@@ -1208,16 +1239,21 @@ error:
close(priv->nl_socket_route);
if (priv->nl_socket_rdma >= 0)
close(priv->nl_socket_rdma);
- if (priv->mnl_socket)
- mlx5_nl_flow_socket_destroy(priv->mnl_socket);
+ if (priv->tcf_context)
+ mlx5_flow_tcf_context_destroy(priv->tcf_context);
if (own_domain_id)
claim_zero(rte_eth_switch_domain_free(priv->domain_id));
rte_free(priv);
+ if (eth_dev != NULL)
+ eth_dev->data->dev_private = NULL;
}
if (pd)
claim_zero(mlx5_glue->dealloc_pd(pd));
- if (eth_dev)
+ if (eth_dev != NULL) {
+ /* mac_addrs must not be freed alone because part of dev_private */
+ eth_dev->data->mac_addrs = NULL;
rte_eth_dev_release_port(eth_dev);
+ }
if (ctx)
claim_zero(mlx5_glue->close_device(ctx));
assert(err > 0);
@@ -1404,9 +1440,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
list[i].eth_dev = mlx5_dev_spawn
(&pci_dev->device, list[i].ibv_dev, vf, &list[i].info);
if (!list[i].eth_dev) {
- if (rte_errno != EBUSY)
+ if (rte_errno != EBUSY && rte_errno != EEXIST)
break;
- /* Device is disabled, ignore it. */
+ /* Device is disabled or already spawned. Ignore it. */
continue;
}
restore = list[i].eth_dev->data->dev_flags;
@@ -1437,8 +1473,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
if (!list[i].eth_dev)
continue;
mlx5_dev_close(list[i].eth_dev);
- if (rte_eal_process_type() == RTE_PROC_PRIMARY)
- rte_free(list[i].eth_dev->data->dev_private);
+ /* mac_addrs must not be freed because in dev_private */
+ list[i].eth_dev->data->mac_addrs = NULL;
claim_zero(rte_eth_dev_release_port(list[i].eth_dev));
}
/* Restore original error. */
@@ -1449,6 +1485,32 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
return ret;
}
+/**
+ * DPDK callback to remove a PCI device.
+ *
+ * This function removes all Ethernet devices belong to a given PCI device.
+ *
+ * @param[in] pci_dev
+ * Pointer to the PCI device.
+ *
+ * @return
+ * 0 on success, the function cannot fail.
+ */
+static int
+mlx5_pci_remove(struct rte_pci_device *pci_dev)
+{
+ uint16_t port_id;
+ struct rte_eth_dev *port;
+
+ for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) {
+ port = &rte_eth_devices[port_id];
+ if (port->state != RTE_ETH_DEV_UNUSED &&
+ port->device == &pci_dev->device)
+ rte_eth_dev_close(port_id);
+ }
+ return 0;
+}
+
static const struct rte_pci_id mlx5_pci_id_map[] = {
{
RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
@@ -1487,6 +1549,10 @@ static const struct rte_pci_id mlx5_pci_id_map[] = {
PCI_DEVICE_ID_MELLANOX_CONNECTX5BF)
},
{
+ RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+ PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF)
+ },
+ {
.vendor_id = 0
}
};
@@ -1497,7 +1563,9 @@ static struct rte_pci_driver mlx5_driver = {
},
.id_table = mlx5_pci_id_map,
.probe = mlx5_pci_probe,
- .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV,
+ .remove = mlx5_pci_remove,
+ .drv_flags = (RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV |
+ RTE_PCI_DRV_PROBE_AGAIN),
};
#ifdef RTE_LIBRTE_MLX5_DLOPEN_DEPS
diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index a3a34cff..74d87c05 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -51,6 +51,7 @@ enum {
PCI_DEVICE_ID_MELLANOX_CONNECTX5EX = 0x1019,
PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a,
PCI_DEVICE_ID_MELLANOX_CONNECTX5BF = 0xa2d2,
+ PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF = 0xa2d3,
};
/** Switch information returned by mlx5_nl_switch_info(). */
@@ -71,12 +72,23 @@ struct mlx5_shared_data {
extern struct mlx5_shared_data *mlx5_shared_data;
+struct mlx5_counter_ctrl {
+ /* Name of the counter. */
+ char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
+ /* Name of the counter on the device table. */
+ char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
+ uint32_t ib:1; /**< Nonzero for IB counters. */
+};
+
struct mlx5_xstats_ctrl {
/* Number of device stats. */
uint16_t stats_n;
+ /* Number of device stats identified by PMD. */
+ uint16_t mlx5_stats_n;
/* Index in the device counters table. */
uint16_t dev_table_idx[MLX5_MAX_XSTATS];
uint64_t base[MLX5_MAX_XSTATS];
+ struct mlx5_counter_ctrl info[MLX5_MAX_XSTATS];
};
/* Flow list . */
@@ -99,11 +111,9 @@ struct mlx5_dev_config {
unsigned int hw_fcs_strip:1; /* FCS stripping is supported. */
unsigned int hw_padding:1; /* End alignment padding is supported. */
unsigned int vf:1; /* This is a VF. */
- unsigned int mps:2; /* Multi-packet send supported mode. */
unsigned int tunnel_en:1;
/* Whether tunnel stateless offloads are supported. */
unsigned int mpls_en:1; /* MPLS over GRE/UDP is enabled. */
- unsigned int flow_counter_en:1; /* Whether flow counter is supported. */
unsigned int cqe_comp:1; /* CQE compression is enabled. */
unsigned int tso:1; /* Whether TSO is supported. */
unsigned int tx_vec_en:1; /* Tx vector is enabled. */
@@ -111,6 +121,7 @@ struct mlx5_dev_config {
unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
unsigned int l3_vxlan_en:1; /* Enable L3 VXLAN flow creation. */
unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
+ unsigned int dv_flow_en:1; /* Enable DV flow. */
unsigned int swp:1; /* Tx generic tunnel checksum and TSO offload. */
struct {
unsigned int enabled:1; /* Whether MPRQ is enabled. */
@@ -122,6 +133,7 @@ struct mlx5_dev_config {
unsigned int min_rxqs_num;
/* Rx queue count threshold to enable MPRQ. */
} mprq; /* Configurations for Multi-Packet RQ. */
+ int mps; /* Multi-packet send supported mode. */
unsigned int flow_prio; /* Number of flow priorities. */
unsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */
unsigned int ind_table_max_size; /* Maximum indirection table size. */
@@ -156,13 +168,7 @@ struct mlx5_drop {
struct mlx5_rxq_ibv *rxq; /* Verbs Rx queue. */
};
-/** DPDK port to network interface index (ifindex) conversion. */
-struct mlx5_nl_flow_ptoi {
- uint16_t port_id; /**< DPDK port ID. */
- unsigned int ifindex; /**< Network interface index. */
-};
-
-struct mnl_socket;
+struct mlx5_flow_tcf_context;
struct priv {
LIST_ENTRY(priv) mem_event_cb; /* Called by memory event callback. */
@@ -212,6 +218,7 @@ struct priv {
LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
/* Verbs Indirection tables. */
LIST_HEAD(ind_tables, mlx5_ind_table_ibv) ind_tbls;
+ LIST_HEAD(matchers, mlx5_flow_dv_matcher) matchers;
uint32_t link_speed_capa; /* Link speed capabilities. */
struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
int primary_socket; /* Unix socket for primary process. */
@@ -228,7 +235,7 @@ struct priv {
rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
/* UAR same-page access control required in 32bit implementations. */
#endif
- struct mnl_socket *mnl_socket; /* Libmnl socket. */
+ struct mlx5_flow_tcf_context *tcf_context; /* TC flower context. */
};
#define PORT_ID(priv) ((priv)->dev_data->port_id)
@@ -240,12 +247,9 @@ int mlx5_getenv_int(const char *);
/* mlx5_ethdev.c */
-int mlx5_get_master_ifname(const struct rte_eth_dev *dev,
- char (*ifname)[IF_NAMESIZE]);
int mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]);
unsigned int mlx5_ifindex(const struct rte_eth_dev *dev);
-int mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr,
- int master);
+int mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr);
int mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu);
int mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep,
unsigned int flags);
@@ -396,23 +400,4 @@ unsigned int mlx5_nl_ifindex(int nl, const char *name);
int mlx5_nl_switch_info(int nl, unsigned int ifindex,
struct mlx5_switch_info *info);
-/* mlx5_nl_flow.c */
-
-int mlx5_nl_flow_transpose(void *buf,
- size_t size,
- const struct mlx5_nl_flow_ptoi *ptoi,
- const struct rte_flow_attr *attr,
- const struct rte_flow_item *pattern,
- const struct rte_flow_action *actions,
- struct rte_flow_error *error);
-void mlx5_nl_flow_brand(void *buf, uint32_t handle);
-int mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
- struct rte_flow_error *error);
-int mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
- struct rte_flow_error *error);
-int mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
- struct rte_flow_error *error);
-struct mnl_socket *mlx5_nl_flow_socket_create(void);
-void mlx5_nl_flow_socket_destroy(struct mnl_socket *nl);
-
#endif /* RTE_PMD_MLX5_H_ */
diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
index 34c5b95e..d178ed6a 100644
--- a/drivers/net/mlx5/mlx5_ethdev.c
+++ b/drivers/net/mlx5/mlx5_ethdev.c
@@ -3,8 +3,6 @@
* Copyright 2015 Mellanox Technologies, Ltd
*/
-#define _GNU_SOURCE
-
#include <stddef.h>
#include <assert.h>
#include <inttypes.h>
@@ -129,7 +127,7 @@ struct ethtool_link_settings {
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-int
+static int
mlx5_get_master_ifname(const struct rte_eth_dev *dev,
char (*ifname)[IF_NAMESIZE])
{
@@ -270,16 +268,12 @@ mlx5_ifindex(const struct rte_eth_dev *dev)
* Request number to pass to ioctl().
* @param[out] ifr
* Interface request structure output buffer.
- * @param master
- * When device is a port representor, perform request on master device
- * instead.
*
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
int
-mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr,
- int master)
+mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr)
{
int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
int ret = 0;
@@ -288,10 +282,7 @@ mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr,
rte_errno = errno;
return -rte_errno;
}
- if (master)
- ret = mlx5_get_master_ifname(dev, &ifr->ifr_name);
- else
- ret = mlx5_get_ifname(dev, &ifr->ifr_name);
+ ret = mlx5_get_ifname(dev, &ifr->ifr_name);
if (ret)
goto error;
ret = ioctl(sock, req, ifr);
@@ -321,7 +312,7 @@ int
mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu)
{
struct ifreq request;
- int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request, 0);
+ int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request);
if (ret)
return ret;
@@ -345,7 +336,7 @@ mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
{
struct ifreq request = { .ifr_mtu = mtu, };
- return mlx5_ifreq(dev, SIOCSIFMTU, &request, 0);
+ return mlx5_ifreq(dev, SIOCSIFMTU, &request);
}
/**
@@ -365,13 +356,13 @@ int
mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags)
{
struct ifreq request;
- int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request, 0);
+ int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request);
if (ret)
return ret;
request.ifr_flags &= keep;
request.ifr_flags |= flags & ~keep;
- return mlx5_ifreq(dev, SIOCSIFFLAGS, &request, 0);
+ return mlx5_ifreq(dev, SIOCSIFFLAGS, &request);
}
/**
@@ -627,17 +618,20 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,
int link_speed = 0;
int ret;
- ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr, 1);
+ ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
if (ret) {
DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
dev->data->port_id, strerror(rte_errno));
return ret;
}
- memset(&dev_link, 0, sizeof(dev_link));
- dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
- (ifr.ifr_flags & IFF_RUNNING));
- ifr.ifr_data = (void *)&edata;
- ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
+ dev_link = (struct rte_eth_link) {
+ .link_status = ((ifr.ifr_flags & IFF_UP) &&
+ (ifr.ifr_flags & IFF_RUNNING)),
+ };
+ ifr = (struct ifreq) {
+ .ifr_data = (void *)&edata,
+ };
+ ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
if (ret) {
DRV_LOG(WARNING,
"port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s",
@@ -666,8 +660,8 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,
ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
ETH_LINK_SPEED_FIXED);
- if ((dev_link.link_speed && !dev_link.link_status) ||
- (!dev_link.link_speed && dev_link.link_status)) {
+ if (((dev_link.link_speed && !dev_link.link_status) ||
+ (!dev_link.link_speed && dev_link.link_status))) {
rte_errno = EAGAIN;
return -rte_errno;
}
@@ -698,17 +692,20 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
uint64_t sc;
int ret;
- ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr, 1);
+ ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
if (ret) {
DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
dev->data->port_id, strerror(rte_errno));
return ret;
}
- memset(&dev_link, 0, sizeof(dev_link));
- dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
- (ifr.ifr_flags & IFF_RUNNING));
- ifr.ifr_data = (void *)&gcmd;
- ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
+ dev_link = (struct rte_eth_link) {
+ .link_status = ((ifr.ifr_flags & IFF_UP) &&
+ (ifr.ifr_flags & IFF_RUNNING)),
+ };
+ ifr = (struct ifreq) {
+ .ifr_data = (void *)&gcmd,
+ };
+ ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
if (ret) {
DRV_LOG(DEBUG,
"port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)"
@@ -725,7 +722,7 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
*ecmd = gcmd;
ifr.ifr_data = (void *)ecmd;
- ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
+ ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
if (ret) {
DRV_LOG(DEBUG,
"port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)"
@@ -775,8 +772,8 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
ETH_LINK_SPEED_FIXED);
- if ((dev_link.link_speed && !dev_link.link_status) ||
- (!dev_link.link_speed && dev_link.link_status)) {
+ if (((dev_link.link_speed && !dev_link.link_status) ||
+ (!dev_link.link_speed && dev_link.link_status))) {
rte_errno = EAGAIN;
return -rte_errno;
}
@@ -888,7 +885,7 @@ mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
int ret;
ifr.ifr_data = (void *)&ethpause;
- ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
+ ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
if (ret) {
DRV_LOG(WARNING,
"port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:"
@@ -941,7 +938,7 @@ mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
ethpause.tx_pause = 1;
else
ethpause.tx_pause = 0;
- ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 0);
+ ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
if (ret) {
DRV_LOG(WARNING,
"port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
@@ -1306,10 +1303,7 @@ mlx5_dev_to_port_id(const struct rte_device *dev, uint16_t *port_list,
RTE_ETH_FOREACH_DEV(id) {
struct rte_eth_dev *ldev = &rte_eth_devices[id];
- if (!ldev->device ||
- !ldev->device->driver ||
- strcmp(ldev->device->driver->name, MLX5_DRIVER_NAME) ||
- ldev->device != dev)
+ if (ldev->device != dev)
continue;
if (n < port_list_n)
port_list[n] = id;
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index ca4625b6..280af0ab 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -3,6 +3,7 @@
* Copyright 2016 Mellanox Technologies, Ltd
*/
+#include <netinet/in.h>
#include <sys/queue.h>
#include <stdalign.h>
#include <stdint.h>
@@ -31,74 +32,30 @@
#include "mlx5_defs.h"
#include "mlx5_prm.h"
#include "mlx5_glue.h"
+#include "mlx5_flow.h"
/* Dev ops structure defined in mlx5.c */
extern const struct eth_dev_ops mlx5_dev_ops;
extern const struct eth_dev_ops mlx5_dev_ops_isolate;
-/* Pattern outer Layer bits. */
-#define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
-#define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
-#define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
-#define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
-#define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
-#define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
-
-/* Pattern inner Layer bits. */
-#define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
-#define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
-#define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
-#define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
-#define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
-#define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
-
-/* Pattern tunnel Layer bits. */
-#define MLX5_FLOW_LAYER_VXLAN (1u << 12)
-#define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
-#define MLX5_FLOW_LAYER_GRE (1u << 14)
-#define MLX5_FLOW_LAYER_MPLS (1u << 15)
-
-/* Outer Masks. */
-#define MLX5_FLOW_LAYER_OUTER_L3 \
- (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
-#define MLX5_FLOW_LAYER_OUTER_L4 \
- (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
-#define MLX5_FLOW_LAYER_OUTER \
- (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
- MLX5_FLOW_LAYER_OUTER_L4)
-
-/* Tunnel Masks. */
-#define MLX5_FLOW_LAYER_TUNNEL \
- (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \
- MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS)
-
-/* Inner Masks. */
-#define MLX5_FLOW_LAYER_INNER_L3 \
- (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
-#define MLX5_FLOW_LAYER_INNER_L4 \
- (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
-#define MLX5_FLOW_LAYER_INNER \
- (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
- MLX5_FLOW_LAYER_INNER_L4)
-
-/* Actions that modify the fate of matching traffic. */
-#define MLX5_FLOW_FATE_DROP (1u << 0)
-#define MLX5_FLOW_FATE_QUEUE (1u << 1)
-#define MLX5_FLOW_FATE_RSS (1u << 2)
-
-/* Modify a packet. */
-#define MLX5_FLOW_MOD_FLAG (1u << 0)
-#define MLX5_FLOW_MOD_MARK (1u << 1)
-#define MLX5_FLOW_MOD_COUNT (1u << 2)
-
-/* possible L3 layers protocols filtering. */
-#define MLX5_IP_PROTOCOL_TCP 6
-#define MLX5_IP_PROTOCOL_UDP 17
-#define MLX5_IP_PROTOCOL_GRE 47
-#define MLX5_IP_PROTOCOL_MPLS 147
-
-/* Priority reserved for default flows. */
-#define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
+/** Device flow drivers. */
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops;
+#endif
+extern const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops;
+extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
+
+const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
+
+const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
+ [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
+#endif
+ [MLX5_FLOW_TYPE_TCF] = &mlx5_flow_tcf_drv_ops,
+ [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
+ [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
+};
enum mlx5_expansion {
MLX5_EXPANSION_ROOT,
@@ -270,53 +227,6 @@ static const struct rte_flow_expand_node mlx5_support_expansion[] = {
},
};
-/** Handles information leading to a drop fate. */
-struct mlx5_flow_verbs {
- LIST_ENTRY(mlx5_flow_verbs) next;
- unsigned int size; /**< Size of the attribute. */
- struct {
- struct ibv_flow_attr *attr;
- /**< Pointer to the Specification buffer. */
- uint8_t *specs; /**< Pointer to the specifications. */
- };
- struct ibv_flow *flow; /**< Verbs flow pointer. */
- struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
- uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
-};
-
-/* Counters information. */
-struct mlx5_flow_counter {
- LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */
- uint32_t shared:1; /**< Share counter ID with other flow rules. */
- uint32_t ref_cnt:31; /**< Reference counter. */
- uint32_t id; /**< Counter ID. */
- struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
- uint64_t hits; /**< Number of packets matched by the rule. */
- uint64_t bytes; /**< Number of bytes matched by the rule. */
-};
-
-/* Flow structure. */
-struct rte_flow {
- TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
- struct rte_flow_attr attributes; /**< User flow attribute. */
- uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
- uint32_t layers;
- /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
- uint32_t modifier;
- /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
- uint32_t fate;
- /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
- uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
- LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
- struct mlx5_flow_verbs *cur_verbs;
- /**< Current Verbs flow structure being filled. */
- struct mlx5_flow_counter *counter; /**< Holds Verbs flow counter. */
- struct rte_flow_action_rss rss;/**< RSS context. */
- uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
- uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
- void *nl_flow; /**< Netlink flow buffer if relevant. */
-};
-
static const struct rte_flow_ops mlx5_flow_ops = {
.validate = mlx5_flow_validate,
.create = mlx5_flow_create,
@@ -352,23 +262,6 @@ struct mlx5_fdir {
struct rte_flow_action_queue queue;
};
-/* Verbs specification header. */
-struct ibv_spec_header {
- enum ibv_flow_spec_type type;
- uint16_t size;
-};
-
-/*
- * Number of sub priorities.
- * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
- * matching on the NIC (firmware dependent) L4 most have the higher priority
- * followed by L3 and ending with L2.
- */
-#define MLX5_PRIORITY_MAP_L2 2
-#define MLX5_PRIORITY_MAP_L3 1
-#define MLX5_PRIORITY_MAP_L4 0
-#define MLX5_PRIORITY_MAP_MAX 3
-
/* Map of Verbs to Flow priority with 8 Verbs priorities. */
static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
{ 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
@@ -413,7 +306,7 @@ static struct mlx5_flow_tunnel_info tunnels_info[] = {
* Discover the maximum number of priority available.
*
* @param[in] dev
- * Pointer to Ethernet device.
+ * Pointer to the Ethernet device structure.
*
* @return
* number of supported flow priority on success, a negative errno
@@ -478,160 +371,33 @@ mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
}
/**
- * Adjust flow priority.
+ * Adjust flow priority based on the highest layer and the request priority.
*
- * @param dev
- * Pointer to Ethernet device.
- * @param flow
- * Pointer to an rte flow.
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] priority
+ * The rule base priority.
+ * @param[in] subpriority
+ * The priority based on the items.
+ *
+ * @return
+ * The new priority.
*/
-static void
-mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
+uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
+ uint32_t subpriority)
{
+ uint32_t res = 0;
struct priv *priv = dev->data->dev_private;
- uint32_t priority = flow->attributes.priority;
- uint32_t subpriority = flow->cur_verbs->attr->priority;
switch (priv->config.flow_prio) {
case RTE_DIM(priority_map_3):
- priority = priority_map_3[priority][subpriority];
+ res = priority_map_3[priority][subpriority];
break;
case RTE_DIM(priority_map_5):
- priority = priority_map_5[priority][subpriority];
+ res = priority_map_5[priority][subpriority];
break;
}
- flow->cur_verbs->attr->priority = priority;
-}
-
-/**
- * Get a flow counter.
- *
- * @param[in] dev
- * Pointer to Ethernet device.
- * @param[in] shared
- * Indicate if this counter is shared with other flows.
- * @param[in] id
- * Counter identifier.
- *
- * @return
- * A pointer to the counter, NULL otherwise and rte_errno is set.
- */
-static struct mlx5_flow_counter *
-mlx5_flow_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
-{
- struct priv *priv = dev->data->dev_private;
- struct mlx5_flow_counter *cnt;
-
- LIST_FOREACH(cnt, &priv->flow_counters, next) {
- if (!cnt->shared || cnt->shared != shared)
- continue;
- if (cnt->id != id)
- continue;
- cnt->ref_cnt++;
- return cnt;
- }
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-
- struct mlx5_flow_counter tmpl = {
- .shared = shared,
- .id = id,
- .cs = mlx5_glue->create_counter_set
- (priv->ctx,
- &(struct ibv_counter_set_init_attr){
- .counter_set_id = id,
- }),
- .hits = 0,
- .bytes = 0,
- };
-
- if (!tmpl.cs) {
- rte_errno = errno;
- return NULL;
- }
- cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
- if (!cnt) {
- rte_errno = ENOMEM;
- return NULL;
- }
- *cnt = tmpl;
- LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
- return cnt;
-#endif
- rte_errno = ENOTSUP;
- return NULL;
-}
-
-/**
- * Release a flow counter.
- *
- * @param[in] counter
- * Pointer to the counter handler.
- */
-static void
-mlx5_flow_counter_release(struct mlx5_flow_counter *counter)
-{
- if (--counter->ref_cnt == 0) {
- claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
- LIST_REMOVE(counter, next);
- rte_free(counter);
- }
-}
-
-/**
- * Verify the @p attributes will be correctly understood by the NIC and store
- * them in the @p flow if everything is correct.
- *
- * @param[in] dev
- * Pointer to Ethernet device.
- * @param[in] attributes
- * Pointer to flow attributes
- * @param[in, out] flow
- * Pointer to the rte_flow structure.
- * @param[out] error
- * Pointer to error structure.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_flow_attributes(struct rte_eth_dev *dev,
- const struct rte_flow_attr *attributes,
- struct rte_flow *flow,
- struct rte_flow_error *error)
-{
- uint32_t priority_max =
- ((struct priv *)dev->data->dev_private)->config.flow_prio - 1;
-
- if (attributes->group)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
- NULL,
- "groups is not supported");
- if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
- attributes->priority >= priority_max)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
- NULL,
- "priority out of range");
- if (attributes->egress)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
- NULL,
- "egress is not supported");
- if (attributes->transfer)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
- NULL,
- "transfer is not supported");
- if (!attributes->ingress)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
- NULL,
- "ingress attribute is mandatory");
- flow->attributes = *attributes;
- if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
- flow->attributes.priority = priority_max;
- return 0;
+ return res;
}
/**
@@ -652,7 +418,7 @@ mlx5_flow_attributes(struct rte_eth_dev *dev,
* @return
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-static int
+int
mlx5_flow_item_acceptable(const struct rte_flow_item *item,
const uint8_t *mask,
const uint8_t *nic_mask,
@@ -671,8 +437,7 @@ mlx5_flow_item_acceptable(const struct rte_flow_item *item,
" bits");
if (!item->spec && (item->mask || item->last))
return rte_flow_error_set(error, EINVAL,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"mask/last without a spec is not"
" supported");
if (item->spec && item->last) {
@@ -687,206 +452,635 @@ mlx5_flow_item_acceptable(const struct rte_flow_item *item,
}
ret = memcmp(spec, last, size);
if (ret != 0)
- return rte_flow_error_set(error, ENOTSUP,
+ return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM,
item,
- "range is not supported");
+ "range is not valid");
}
return 0;
}
/**
- * Add a verbs item specification into @p flow.
+ * Adjust the hash fields according to the @p flow information.
*
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] src
- * Create specification.
- * @param[in] size
- * Size in bytes of the specification to copy.
+ * @param[in] dev_flow.
+ * Pointer to the mlx5_flow.
+ * @param[in] tunnel
+ * 1 when the hash field is for a tunnel item.
+ * @param[in] layer_types
+ * ETH_RSS_* types.
+ * @param[in] hash_fields
+ * Item hash fields.
+ *
+ * @return
+ * The hash fileds that should be used.
+ */
+uint64_t
+mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow,
+ int tunnel __rte_unused, uint64_t layer_types,
+ uint64_t hash_fields)
+{
+ struct rte_flow *flow = dev_flow->flow;
+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+ int rss_request_inner = flow->rss.level >= 2;
+
+ /* Check RSS hash level for tunnel. */
+ if (tunnel && rss_request_inner)
+ hash_fields |= IBV_RX_HASH_INNER;
+ else if (tunnel || rss_request_inner)
+ return 0;
+#endif
+ /* Check if requested layer matches RSS hash fields. */
+ if (!(flow->rss.types & layer_types))
+ return 0;
+ return hash_fields;
+}
+
+/**
+ * Lookup and set the ptype in the data Rx part. A single Ptype can be used,
+ * if several tunnel rules are used on this queue, the tunnel ptype will be
+ * cleared.
+ *
+ * @param rxq_ctrl
+ * Rx queue to update.
*/
static void
-mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
+flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
{
- struct mlx5_flow_verbs *verbs = flow->cur_verbs;
+ unsigned int i;
+ uint32_t tunnel_ptype = 0;
+
+ /* Look up for the ptype to use. */
+ for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
+ if (!rxq_ctrl->flow_tunnels_n[i])
+ continue;
+ if (!tunnel_ptype) {
+ tunnel_ptype = tunnels_info[i].ptype;
+ } else {
+ tunnel_ptype = 0;
+ break;
+ }
+ }
+ rxq_ctrl->rxq.tunnel = tunnel_ptype;
+}
+
+/**
+ * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
+ * flow.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] dev_flow
+ * Pointer to device flow structure.
+ */
+static void
+flow_drv_rxq_flags_set(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct rte_flow *flow = dev_flow->flow;
+ const int mark = !!(flow->actions &
+ (MLX5_FLOW_ACTION_FLAG | MLX5_FLOW_ACTION_MARK));
+ const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+ unsigned int i;
+
+ for (i = 0; i != flow->rss.queue_num; ++i) {
+ int idx = (*flow->queue)[i];
+ struct mlx5_rxq_ctrl *rxq_ctrl =
+ container_of((*priv->rxqs)[idx],
+ struct mlx5_rxq_ctrl, rxq);
- if (verbs->specs) {
- void *dst;
+ if (mark) {
+ rxq_ctrl->rxq.mark = 1;
+ rxq_ctrl->flow_mark_n++;
+ }
+ if (tunnel) {
+ unsigned int j;
- dst = (void *)(verbs->specs + verbs->size);
- memcpy(dst, src, size);
- ++verbs->attr->num_of_specs;
+ /* Increase the counter matching the flow. */
+ for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
+ if ((tunnels_info[j].tunnel &
+ dev_flow->layers) ==
+ tunnels_info[j].tunnel) {
+ rxq_ctrl->flow_tunnels_n[j]++;
+ break;
+ }
+ }
+ flow_rxq_tunnel_ptype_update(rxq_ctrl);
+ }
}
- verbs->size += size;
}
/**
- * Adjust verbs hash fields according to the @p flow information.
+ * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
*
- * @param[in, out] flow.
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] flow
* Pointer to flow structure.
- * @param[in] tunnel
- * 1 when the hash field is for a tunnel item.
- * @param[in] layer_types
- * ETH_RSS_* types.
- * @param[in] hash_fields
- * Item hash fields.
*/
static void
-mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
- int tunnel __rte_unused,
- uint32_t layer_types, uint64_t hash_fields)
+flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+ struct mlx5_flow *dev_flow;
+
+ LIST_FOREACH(dev_flow, &flow->dev_flows, next)
+ flow_drv_rxq_flags_set(dev, dev_flow);
+}
+
+/**
+ * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
+ * device flow if no other flow uses it with the same kind of request.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[in] dev_flow
+ * Pointer to the device flow.
+ */
+static void
+flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct rte_flow *flow = dev_flow->flow;
+ const int mark = !!(flow->actions &
+ (MLX5_FLOW_ACTION_FLAG | MLX5_FLOW_ACTION_MARK));
+ const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+ unsigned int i;
+
+ assert(dev->data->dev_started);
+ for (i = 0; i != flow->rss.queue_num; ++i) {
+ int idx = (*flow->queue)[i];
+ struct mlx5_rxq_ctrl *rxq_ctrl =
+ container_of((*priv->rxqs)[idx],
+ struct mlx5_rxq_ctrl, rxq);
+
+ if (mark) {
+ rxq_ctrl->flow_mark_n--;
+ rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
+ }
+ if (tunnel) {
+ unsigned int j;
+
+ /* Decrease the counter matching the flow. */
+ for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
+ if ((tunnels_info[j].tunnel &
+ dev_flow->layers) ==
+ tunnels_info[j].tunnel) {
+ rxq_ctrl->flow_tunnels_n[j]--;
+ break;
+ }
+ }
+ flow_rxq_tunnel_ptype_update(rxq_ctrl);
+ }
+ }
+}
+
+/**
+ * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
+ * @p flow if no other flow uses it with the same kind of request.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param[in] flow
+ * Pointer to the flow.
+ */
+static void
+flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+ struct mlx5_flow *dev_flow;
+
+ LIST_FOREACH(dev_flow, &flow->dev_flows, next)
+ flow_drv_rxq_flags_trim(dev, dev_flow);
+}
+
+/**
+ * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ */
+static void
+flow_rxq_flags_clear(struct rte_eth_dev *dev)
+{
+ struct priv *priv = dev->data->dev_private;
+ unsigned int i;
+
+ for (i = 0; i != priv->rxqs_n; ++i) {
+ struct mlx5_rxq_ctrl *rxq_ctrl;
+ unsigned int j;
+
+ if (!(*priv->rxqs)[i])
+ continue;
+ rxq_ctrl = container_of((*priv->rxqs)[i],
+ struct mlx5_rxq_ctrl, rxq);
+ rxq_ctrl->flow_mark_n = 0;
+ rxq_ctrl->rxq.mark = 0;
+ for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
+ rxq_ctrl->flow_tunnels_n[j] = 0;
+ rxq_ctrl->rxq.tunnel = 0;
+ }
+}
+
+/*
+ * Validate the flag action.
+ *
+ * @param[in] action_flags
+ * Bit-fields that holds the actions detected until now.
+ * @param[in] attr
+ * Attributes of flow that includes this action.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_validate_action_flag(uint64_t action_flags,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error)
+{
+
+ if (action_flags & MLX5_FLOW_ACTION_DROP)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+ "can't drop and flag in same flow");
+ if (action_flags & MLX5_FLOW_ACTION_MARK)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+ "can't mark and flag in same flow");
+ if (action_flags & MLX5_FLOW_ACTION_FLAG)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+ "can't have 2 flag"
+ " actions in same flow");
+ if (attr->egress)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+ "flag action not supported for "
+ "egress");
+ return 0;
+}
+
+/*
+ * Validate the mark action.
+ *
+ * @param[in] action
+ * Pointer to the queue action.
+ * @param[in] action_flags
+ * Bit-fields that holds the actions detected until now.
+ * @param[in] attr
+ * Attributes of flow that includes this action.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
+ uint64_t action_flags,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error)
+{
+ const struct rte_flow_action_mark *mark = action->conf;
+
+ if (!mark)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ action,
+ "configuration cannot be null");
+ if (mark->id >= MLX5_FLOW_MARK_MAX)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ &mark->id,
+ "mark id must in 0 <= id < "
+ RTE_STR(MLX5_FLOW_MARK_MAX));
+ if (action_flags & MLX5_FLOW_ACTION_DROP)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+ "can't drop and mark in same flow");
+ if (action_flags & MLX5_FLOW_ACTION_FLAG)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+ "can't flag and mark in same flow");
+ if (action_flags & MLX5_FLOW_ACTION_MARK)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+ "can't have 2 mark actions in same"
+ " flow");
+ if (attr->egress)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+ "mark action not supported for "
+ "egress");
+ return 0;
+}
+
+/*
+ * Validate the drop action.
+ *
+ * @param[in] action_flags
+ * Bit-fields that holds the actions detected until now.
+ * @param[in] attr
+ * Attributes of flow that includes this action.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+int
+mlx5_flow_validate_action_drop(uint64_t action_flags,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error)
+{
+ if (action_flags & MLX5_FLOW_ACTION_FLAG)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+ "can't drop and flag in same flow");
+ if (action_flags & MLX5_FLOW_ACTION_MARK)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+ "can't drop and mark in same flow");
+ if (action_flags & MLX5_FLOW_FATE_ACTIONS)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+ "can't have 2 fate actions in"
+ " same flow");
+ if (attr->egress)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+ "drop action not supported for "
+ "egress");
+ return 0;
+}
+
+/*
+ * Validate the queue action.
+ *
+ * @param[in] action
+ * Pointer to the queue action.
+ * @param[in] action_flags
+ * Bit-fields that holds the actions detected until now.
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] attr
+ * Attributes of flow that includes this action.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+int
+mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
+ uint64_t action_flags,
+ struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error)
{
+ struct priv *priv = dev->data->dev_private;
+ const struct rte_flow_action_queue *queue = action->conf;
+
+ if (action_flags & MLX5_FLOW_FATE_ACTIONS)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+ "can't have 2 fate actions in"
+ " same flow");
+ if (queue->index >= priv->rxqs_n)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ &queue->index,
+ "queue index out of range");
+ if (!(*priv->rxqs)[queue->index])
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ &queue->index,
+ "queue is not configured");
+ if (attr->egress)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+ "queue action not supported for "
+ "egress");
+ return 0;
+}
+
+/*
+ * Validate the rss action.
+ *
+ * @param[in] action
+ * Pointer to the queue action.
+ * @param[in] action_flags
+ * Bit-fields that holds the actions detected until now.
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] attr
+ * Attributes of flow that includes this action.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+int
+mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
+ uint64_t action_flags,
+ struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+ const struct rte_flow_action_rss *rss = action->conf;
+ unsigned int i;
+
+ if (action_flags & MLX5_FLOW_FATE_ACTIONS)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+ "can't have 2 fate actions"
+ " in same flow");
+ if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
+ rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ &rss->func,
+ "RSS hash function not supported");
#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
- hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
- if (flow->rss.level == 2 && !tunnel)
- hash_fields = 0;
- else if (flow->rss.level < 2 && tunnel)
- hash_fields = 0;
+ if (rss->level > 2)
+#else
+ if (rss->level > 1)
#endif
- if (!(flow->rss.types & layer_types))
- hash_fields = 0;
- flow->cur_verbs->hash_fields |= hash_fields;
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ &rss->level,
+ "tunnel RSS is not supported");
+ if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ &rss->key_len,
+ "RSS hash key too small");
+ if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ &rss->key_len,
+ "RSS hash key too large");
+ if (rss->queue_num > priv->config.ind_table_max_size)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ &rss->queue_num,
+ "number of queues too large");
+ if (rss->types & MLX5_RSS_HF_MASK)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ &rss->types,
+ "some RSS protocols are not"
+ " supported");
+ for (i = 0; i != rss->queue_num; ++i) {
+ if (!(*priv->rxqs)[rss->queue[i]])
+ return rte_flow_error_set
+ (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ &rss->queue[i], "queue is not configured");
+ }
+ if (attr->egress)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+ "rss action not supported for "
+ "egress");
+ return 0;
+}
+
+/*
+ * Validate the count action.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] attr
+ * Attributes of flow that includes this action.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+int
+mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error)
+{
+ if (attr->egress)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+ "count action not supported for "
+ "egress");
+ return 0;
}
/**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Verify the @p attributes will be correctly understood by the NIC and store
+ * them in the @p flow if everything is correct.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] attributes
+ * Pointer to flow attributes
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attributes,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+ uint32_t priority_max = priv->config.flow_prio - 1;
+
+ if (attributes->group)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+ NULL, "groups is not supported");
+ if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
+ attributes->priority >= priority_max)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+ NULL, "priority out of range");
+ if (attributes->egress)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+ "egress is not supported");
+ if (attributes->transfer)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
+ NULL, "transfer is not supported");
+ if (!attributes->ingress)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+ NULL,
+ "ingress attribute is mandatory");
+ return 0;
+}
+
+/**
+ * Validate Ethernet item.
*
* @param[in] item
* Item specification.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small, nothing is
- * written.
+ * @param[in] item_flags
+ * Bit-fields that holds the items detected until now.
* @param[out] error
* Pointer to error structure.
*
* @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p item has fully been converted,
- * otherwise another call with this returned memory size should be done.
- * On error, a negative errno value is returned and rte_errno is set.
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-static int
-mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
- const size_t flow_size, struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ struct rte_flow_error *error)
{
- const struct rte_flow_item_eth *spec = item->spec;
const struct rte_flow_item_eth *mask = item->mask;
const struct rte_flow_item_eth nic_mask = {
.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
.type = RTE_BE16(0xffff),
};
- const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
- const unsigned int size = sizeof(struct ibv_flow_spec_eth);
- struct ibv_flow_spec_eth eth = {
- .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
- .size = size,
- };
int ret;
+ int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
- if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
- MLX5_FLOW_LAYER_OUTER_L2))
+ if (item_flags & MLX5_FLOW_LAYER_OUTER_L2)
return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
- "L2 layers already configured");
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
+ "3 levels of l2 are not supported");
+ if ((item_flags & MLX5_FLOW_LAYER_INNER_L2) && !tunnel)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
+ "2 L2 without tunnel are not supported");
if (!mask)
mask = &rte_flow_item_eth_mask;
ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
(const uint8_t *)&nic_mask,
sizeof(struct rte_flow_item_eth),
error);
- if (ret)
- return ret;
- flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
- MLX5_FLOW_LAYER_OUTER_L2;
- if (size > flow_size)
- return size;
- if (spec) {
- unsigned int i;
-
- memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
- memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
- eth.val.ether_type = spec->type;
- memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
- memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
- eth.mask.ether_type = mask->type;
- /* Remove unwanted bits from values. */
- for (i = 0; i < ETHER_ADDR_LEN; ++i) {
- eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
- eth.val.src_mac[i] &= eth.mask.src_mac[i];
- }
- eth.val.ether_type &= eth.mask.ether_type;
- }
- flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
- mlx5_flow_spec_verbs_add(flow, &eth, size);
- return size;
-}
-
-/**
- * Update the VLAN tag in the Verbs Ethernet specification.
- *
- * @param[in, out] attr
- * Pointer to Verbs attributes structure.
- * @param[in] eth
- * Verbs structure containing the VLAN information to copy.
- */
-static void
-mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
- struct ibv_flow_spec_eth *eth)
-{
- unsigned int i;
- const enum ibv_flow_spec_type search = eth->type;
- struct ibv_spec_header *hdr = (struct ibv_spec_header *)
- ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
-
- for (i = 0; i != attr->num_of_specs; ++i) {
- if (hdr->type == search) {
- struct ibv_flow_spec_eth *e =
- (struct ibv_flow_spec_eth *)hdr;
-
- e->val.vlan_tag = eth->val.vlan_tag;
- e->mask.vlan_tag = eth->mask.vlan_tag;
- e->val.ether_type = eth->val.ether_type;
- e->mask.ether_type = eth->mask.ether_type;
- break;
- }
- hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
- }
+ return ret;
}
/**
- * Convert the @p item into @p flow (or by updating the already present
- * Ethernet Verbs) specification after ensuring the NIC will understand and
- * process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Validate VLAN item.
*
* @param[in] item
* Item specification.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small, nothing is
- * written.
+ * @param[in] item_flags
+ * Bit-fields that holds the items detected until now.
* @param[out] error
* Pointer to error structure.
*
* @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p item has fully been converted,
- * otherwise another call with this returned memory size should be done.
- * On error, a negative errno value is returned and rte_errno is set.
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-static int
-mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
- const size_t flow_size, struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
+ int64_t item_flags,
+ struct rte_flow_error *error)
{
const struct rte_flow_item_vlan *spec = item->spec;
const struct rte_flow_item_vlan *mask = item->mask;
@@ -894,100 +1088,66 @@ mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
.tci = RTE_BE16(0x0fff),
.inner_type = RTE_BE16(0xffff),
};
- unsigned int size = sizeof(struct ibv_flow_spec_eth);
- const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
- struct ibv_flow_spec_eth eth = {
- .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
- .size = size,
- };
+ uint16_t vlan_tag = 0;
+ const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
int ret;
const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
MLX5_FLOW_LAYER_INNER_L4) :
- (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4);
+ (MLX5_FLOW_LAYER_OUTER_L3 |
+ MLX5_FLOW_LAYER_OUTER_L4);
const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
- MLX5_FLOW_LAYER_OUTER_VLAN;
- const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
- MLX5_FLOW_LAYER_OUTER_L2;
+ MLX5_FLOW_LAYER_OUTER_VLAN;
- if (flow->layers & vlanm)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ if (item_flags & vlanm)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"VLAN layer already configured");
- else if ((flow->layers & l34m) != 0)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ else if ((item_flags & l34m) != 0)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"L2 layer cannot follow L3/L4 layer");
if (!mask)
mask = &rte_flow_item_vlan_mask;
- ret = mlx5_flow_item_acceptable
- (item, (const uint8_t *)mask,
- (const uint8_t *)&nic_mask,
- sizeof(struct rte_flow_item_vlan), error);
+ ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+ (const uint8_t *)&nic_mask,
+ sizeof(struct rte_flow_item_vlan),
+ error);
if (ret)
return ret;
if (spec) {
- eth.val.vlan_tag = spec->tci;
- eth.mask.vlan_tag = mask->tci;
- eth.val.vlan_tag &= eth.mask.vlan_tag;
- eth.val.ether_type = spec->inner_type;
- eth.mask.ether_type = mask->inner_type;
- eth.val.ether_type &= eth.mask.ether_type;
+ vlan_tag = spec->tci;
+ vlan_tag &= mask->tci;
}
/*
* From verbs perspective an empty VLAN is equivalent
* to a packet without VLAN layer.
*/
- if (!eth.mask.vlan_tag)
+ if (!vlan_tag)
return rte_flow_error_set(error, EINVAL,
RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
item->spec,
"VLAN cannot be empty");
- if (!(flow->layers & l2m)) {
- if (size <= flow_size) {
- flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
- mlx5_flow_spec_verbs_add(flow, &eth, size);
- }
- } else {
- if (flow->cur_verbs)
- mlx5_flow_item_vlan_update(flow->cur_verbs->attr,
- &eth);
- size = 0; /* Only an update is done in eth specification. */
- }
- flow->layers |= tunnel ?
- (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
- (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
- return size;
+ return 0;
}
/**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Validate IPV4 item.
*
* @param[in] item
* Item specification.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small, nothing is
- * written.
+ * @param[in] item_flags
+ * Bit-fields that holds the items detected until now.
* @param[out] error
* Pointer to error structure.
*
* @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p item has fully been converted,
- * otherwise another call with this returned memory size should be done.
- * On error, a negative errno value is returned and rte_errno is set.
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-static int
-mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
- const size_t flow_size, struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
+ int64_t item_flags,
+ struct rte_flow_error *error)
{
- const struct rte_flow_item_ipv4 *spec = item->spec;
const struct rte_flow_item_ipv4 *mask = item->mask;
const struct rte_flow_item_ipv4 nic_mask = {
.hdr = {
@@ -997,97 +1157,48 @@ mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
.next_proto_id = 0xff,
},
};
- const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
- unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
- struct ibv_flow_spec_ipv4_ext ipv4 = {
- .type = IBV_FLOW_SPEC_IPV4_EXT |
- (tunnel ? IBV_FLOW_SPEC_INNER : 0),
- .size = size,
- };
+ const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
int ret;
- if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
- MLX5_FLOW_LAYER_OUTER_L3))
+ if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
+ MLX5_FLOW_LAYER_OUTER_L3))
return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"multiple L3 layers not supported");
- else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
- MLX5_FLOW_LAYER_OUTER_L4))
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ else if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+ MLX5_FLOW_LAYER_OUTER_L4))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"L3 cannot follow an L4 layer.");
if (!mask)
mask = &rte_flow_item_ipv4_mask;
- ret = mlx5_flow_item_acceptable
- (item, (const uint8_t *)mask,
- (const uint8_t *)&nic_mask,
- sizeof(struct rte_flow_item_ipv4), error);
+ ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+ (const uint8_t *)&nic_mask,
+ sizeof(struct rte_flow_item_ipv4),
+ error);
if (ret < 0)
return ret;
- flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
- MLX5_FLOW_LAYER_OUTER_L3_IPV4;
- if (spec) {
- ipv4.val = (struct ibv_flow_ipv4_ext_filter){
- .src_ip = spec->hdr.src_addr,
- .dst_ip = spec->hdr.dst_addr,
- .proto = spec->hdr.next_proto_id,
- .tos = spec->hdr.type_of_service,
- };
- ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
- .src_ip = mask->hdr.src_addr,
- .dst_ip = mask->hdr.dst_addr,
- .proto = mask->hdr.next_proto_id,
- .tos = mask->hdr.type_of_service,
- };
- /* Remove unwanted bits from values. */
- ipv4.val.src_ip &= ipv4.mask.src_ip;
- ipv4.val.dst_ip &= ipv4.mask.dst_ip;
- ipv4.val.proto &= ipv4.mask.proto;
- ipv4.val.tos &= ipv4.mask.tos;
- }
- flow->l3_protocol_en = !!ipv4.mask.proto;
- flow->l3_protocol = ipv4.val.proto;
- if (size <= flow_size) {
- mlx5_flow_verbs_hashfields_adjust
- (flow, tunnel,
- (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
- ETH_RSS_NONFRAG_IPV4_OTHER),
- (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4));
- flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
- mlx5_flow_spec_verbs_add(flow, &ipv4, size);
- }
- return size;
+ return 0;
}
/**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Validate IPV6 item.
*
* @param[in] item
* Item specification.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small, nothing is
- * written.
+ * @param[in] item_flags
+ * Bit-fields that holds the items detected until now.
* @param[out] error
* Pointer to error structure.
*
* @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p item has fully been converted,
- * otherwise another call with this returned memory size should be done.
- * On error, a negative errno value is returned and rte_errno is set.
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-static int
-mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
- const size_t flow_size, struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ struct rte_flow_error *error)
{
- const struct rte_flow_item_ipv6 *spec = item->spec;
const struct rte_flow_item_ipv6 *mask = item->mask;
const struct rte_flow_item_ipv6 nic_mask = {
.hdr = {
@@ -1102,25 +1213,18 @@ mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
.hop_limits = 0xff,
},
};
- const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
- unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
- struct ibv_flow_spec_ipv6 ipv6 = {
- .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
- .size = size,
- };
+ const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
int ret;
- if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
- MLX5_FLOW_LAYER_OUTER_L3))
+ if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
+ MLX5_FLOW_LAYER_OUTER_L3))
return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"multiple L3 layers not supported");
- else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
- MLX5_FLOW_LAYER_OUTER_L4))
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ else if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+ MLX5_FLOW_LAYER_OUTER_L4))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"L3 cannot follow an L4 layer.");
/*
* IPv6 is not recognised by the NIC inside a GRE tunnel.
@@ -1128,130 +1232,64 @@ mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
* accepted. Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and
* Mellanox OFED 4.4-1.0.0.0.
*/
- if (tunnel && flow->layers & MLX5_FLOW_LAYER_GRE)
+ if (tunnel && item_flags & MLX5_FLOW_LAYER_GRE)
return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"IPv6 inside a GRE tunnel is"
" not recognised.");
if (!mask)
mask = &rte_flow_item_ipv6_mask;
- ret = mlx5_flow_item_acceptable
- (item, (const uint8_t *)mask,
- (const uint8_t *)&nic_mask,
- sizeof(struct rte_flow_item_ipv6), error);
+ ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+ (const uint8_t *)&nic_mask,
+ sizeof(struct rte_flow_item_ipv6),
+ error);
if (ret < 0)
return ret;
- flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
- MLX5_FLOW_LAYER_OUTER_L3_IPV6;
- if (spec) {
- unsigned int i;
- uint32_t vtc_flow_val;
- uint32_t vtc_flow_mask;
-
- memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
- RTE_DIM(ipv6.val.src_ip));
- memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
- RTE_DIM(ipv6.val.dst_ip));
- memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
- RTE_DIM(ipv6.mask.src_ip));
- memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
- RTE_DIM(ipv6.mask.dst_ip));
- vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
- vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
- ipv6.val.flow_label =
- rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
- IPV6_HDR_FL_SHIFT);
- ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
- IPV6_HDR_TC_SHIFT;
- ipv6.val.next_hdr = spec->hdr.proto;
- ipv6.val.hop_limit = spec->hdr.hop_limits;
- ipv6.mask.flow_label =
- rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
- IPV6_HDR_FL_SHIFT);
- ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
- IPV6_HDR_TC_SHIFT;
- ipv6.mask.next_hdr = mask->hdr.proto;
- ipv6.mask.hop_limit = mask->hdr.hop_limits;
- /* Remove unwanted bits from values. */
- for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
- ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
- ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
- }
- ipv6.val.flow_label &= ipv6.mask.flow_label;
- ipv6.val.traffic_class &= ipv6.mask.traffic_class;
- ipv6.val.next_hdr &= ipv6.mask.next_hdr;
- ipv6.val.hop_limit &= ipv6.mask.hop_limit;
- }
- flow->l3_protocol_en = !!ipv6.mask.next_hdr;
- flow->l3_protocol = ipv6.val.next_hdr;
- if (size <= flow_size) {
- mlx5_flow_verbs_hashfields_adjust
- (flow, tunnel,
- (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER),
- (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6));
- flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
- mlx5_flow_spec_verbs_add(flow, &ipv6, size);
- }
- return size;
+ return 0;
}
/**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Validate UDP item.
*
* @param[in] item
* Item specification.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small, nothing is
- * written.
+ * @param[in] item_flags
+ * Bit-fields that holds the items detected until now.
+ * @param[in] target_protocol
+ * The next protocol in the previous item.
+ * @param[in] flow_mask
+ * mlx5 flow-specific (TCF, DV, verbs, etc.) supported header fields mask.
* @param[out] error
* Pointer to error structure.
*
* @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p item has fully been converted,
- * otherwise another call with this returned memory size should be done.
- * On error, a negative errno value is returned and rte_errno is set.
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-static int
-mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
- const size_t flow_size, struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ uint8_t target_protocol,
+ struct rte_flow_error *error)
{
- const struct rte_flow_item_udp *spec = item->spec;
const struct rte_flow_item_udp *mask = item->mask;
- const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
- unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
- struct ibv_flow_spec_tcp_udp udp = {
- .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
- .size = size,
- };
+ const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
int ret;
- if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"protocol filtering not compatible"
" with UDP layer");
- if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
- MLX5_FLOW_LAYER_OUTER_L3)))
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
- "L3 is mandatory to filter"
- " on L4");
- if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
- MLX5_FLOW_LAYER_OUTER_L4))
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
- "L4 layer is already"
- " present");
+ if (!(item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
+ MLX5_FLOW_LAYER_OUTER_L3)))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
+ "L3 is mandatory to filter on L4");
+ if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+ MLX5_FLOW_LAYER_OUTER_L4))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
+ "L4 layer is already present");
if (!mask)
mask = &rte_flow_item_udp_mask;
ret = mlx5_flow_item_acceptable
@@ -1260,178 +1298,118 @@ mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
sizeof(struct rte_flow_item_udp), error);
if (ret < 0)
return ret;
- flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
- MLX5_FLOW_LAYER_OUTER_L4_UDP;
- if (spec) {
- udp.val.dst_port = spec->hdr.dst_port;
- udp.val.src_port = spec->hdr.src_port;
- udp.mask.dst_port = mask->hdr.dst_port;
- udp.mask.src_port = mask->hdr.src_port;
- /* Remove unwanted bits from values. */
- udp.val.src_port &= udp.mask.src_port;
- udp.val.dst_port &= udp.mask.dst_port;
- }
- if (size <= flow_size) {
- mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP,
- (IBV_RX_HASH_SRC_PORT_UDP |
- IBV_RX_HASH_DST_PORT_UDP));
- flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
- mlx5_flow_spec_verbs_add(flow, &udp, size);
- }
- return size;
+ return 0;
}
/**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Validate TCP item.
*
* @param[in] item
* Item specification.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small, nothing is
- * written.
+ * @param[in] item_flags
+ * Bit-fields that holds the items detected until now.
+ * @param[in] target_protocol
+ * The next protocol in the previous item.
* @param[out] error
* Pointer to error structure.
*
* @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p item has fully been converted,
- * otherwise another call with this returned memory size should be done.
- * On error, a negative errno value is returned and rte_errno is set.
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-static int
-mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
- const size_t flow_size, struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ uint8_t target_protocol,
+ const struct rte_flow_item_tcp *flow_mask,
+ struct rte_flow_error *error)
{
- const struct rte_flow_item_tcp *spec = item->spec;
const struct rte_flow_item_tcp *mask = item->mask;
- const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
- unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
- struct ibv_flow_spec_tcp_udp tcp = {
- .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
- .size = size,
- };
+ const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
int ret;
- if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ assert(flow_mask);
+ if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"protocol filtering not compatible"
" with TCP layer");
- if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
- MLX5_FLOW_LAYER_OUTER_L3)))
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ if (!(item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
+ MLX5_FLOW_LAYER_OUTER_L3)))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"L3 is mandatory to filter on L4");
- if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
- MLX5_FLOW_LAYER_OUTER_L4))
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+ MLX5_FLOW_LAYER_OUTER_L4))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"L4 layer is already present");
if (!mask)
mask = &rte_flow_item_tcp_mask;
ret = mlx5_flow_item_acceptable
(item, (const uint8_t *)mask,
- (const uint8_t *)&rte_flow_item_tcp_mask,
+ (const uint8_t *)flow_mask,
sizeof(struct rte_flow_item_tcp), error);
if (ret < 0)
return ret;
- flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
- MLX5_FLOW_LAYER_OUTER_L4_TCP;
- if (spec) {
- tcp.val.dst_port = spec->hdr.dst_port;
- tcp.val.src_port = spec->hdr.src_port;
- tcp.mask.dst_port = mask->hdr.dst_port;
- tcp.mask.src_port = mask->hdr.src_port;
- /* Remove unwanted bits from values. */
- tcp.val.src_port &= tcp.mask.src_port;
- tcp.val.dst_port &= tcp.mask.dst_port;
- }
- if (size <= flow_size) {
- mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP,
- (IBV_RX_HASH_SRC_PORT_TCP |
- IBV_RX_HASH_DST_PORT_TCP));
- flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
- mlx5_flow_spec_verbs_add(flow, &tcp, size);
- }
- return size;
+ return 0;
}
/**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Validate VXLAN item.
*
* @param[in] item
* Item specification.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small, nothing is
- * written.
+ * @param[in] item_flags
+ * Bit-fields that holds the items detected until now.
+ * @param[in] target_protocol
+ * The next protocol in the previous item.
* @param[out] error
* Pointer to error structure.
*
* @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p item has fully been converted,
- * otherwise another call with this returned memory size should be done.
- * On error, a negative errno value is returned and rte_errno is set.
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-static int
-mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
- const size_t flow_size, struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ struct rte_flow_error *error)
{
const struct rte_flow_item_vxlan *spec = item->spec;
const struct rte_flow_item_vxlan *mask = item->mask;
- unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
- struct ibv_flow_spec_tunnel vxlan = {
- .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
- .size = size,
- };
int ret;
union vni {
uint32_t vlan_id;
uint8_t vni[4];
} id = { .vlan_id = 0, };
+ uint32_t vlan_id = 0;
+
- if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
+ if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"a tunnel is already present");
/*
* Verify only UDPv4 is present as defined in
* https://tools.ietf.org/html/rfc7348
*/
- if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"no outer UDP layer found");
if (!mask)
mask = &rte_flow_item_vxlan_mask;
ret = mlx5_flow_item_acceptable
(item, (const uint8_t *)mask,
(const uint8_t *)&rte_flow_item_vxlan_mask,
- sizeof(struct rte_flow_item_vxlan), error);
+ sizeof(struct rte_flow_item_vxlan),
+ error);
if (ret < 0)
return ret;
if (spec) {
memcpy(&id.vni[1], spec->vni, 3);
- vxlan.val.tunnel_id = id.vlan_id;
+ vlan_id = id.vlan_id;
memcpy(&id.vni[1], mask->vni, 3);
- vxlan.mask.tunnel_id = id.vlan_id;
- /* Remove unwanted bits from values. */
- vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
+ vlan_id &= id.vlan_id;
}
/*
* Tunnel id 0 is equivalent as not adding a VXLAN layer, if
@@ -1442,109 +1420,88 @@ mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
* match this rule. To avoid such situation, VNI 0 is
* currently refused.
*/
- if (!vxlan.val.tunnel_id)
- return rte_flow_error_set(error, EINVAL,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ if (!vlan_id)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"VXLAN vni cannot be 0");
- if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
- return rte_flow_error_set(error, EINVAL,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"VXLAN tunnel must be fully defined");
- if (size <= flow_size) {
- mlx5_flow_spec_verbs_add(flow, &vxlan, size);
- flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
- }
- flow->layers |= MLX5_FLOW_LAYER_VXLAN;
- return size;
+ return 0;
}
/**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Validate VXLAN_GPE item.
*
- * @param dev
- * Pointer to Ethernet device.
* @param[in] item
* Item specification.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small, nothing is
- * written.
+ * @param[in] item_flags
+ * Bit-fields that holds the items detected until now.
+ * @param[in] priv
+ * Pointer to the private data structure.
+ * @param[in] target_protocol
+ * The next protocol in the previous item.
* @param[out] error
* Pointer to error structure.
*
* @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p item has fully been converted,
- * otherwise another call with this returned memory size should be done.
- * On error, a negative errno value is returned and rte_errno is set.
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-static int
-mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev,
- const struct rte_flow_item *item,
- struct rte_flow *flow, const size_t flow_size,
- struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ struct rte_eth_dev *dev,
+ struct rte_flow_error *error)
{
+ struct priv *priv = dev->data->dev_private;
const struct rte_flow_item_vxlan_gpe *spec = item->spec;
const struct rte_flow_item_vxlan_gpe *mask = item->mask;
- unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
- struct ibv_flow_spec_tunnel vxlan_gpe = {
- .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
- .size = size,
- };
int ret;
union vni {
uint32_t vlan_id;
uint8_t vni[4];
} id = { .vlan_id = 0, };
+ uint32_t vlan_id = 0;
- if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en)
+ if (!priv->config.l3_vxlan_en)
return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"L3 VXLAN is not enabled by device"
" parameter and/or not configured in"
" firmware");
- if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
+ if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"a tunnel is already present");
/*
* Verify only UDPv4 is present as defined in
* https://tools.ietf.org/html/rfc7348
*/
- if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"no outer UDP layer found");
if (!mask)
mask = &rte_flow_item_vxlan_gpe_mask;
ret = mlx5_flow_item_acceptable
(item, (const uint8_t *)mask,
(const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
- sizeof(struct rte_flow_item_vxlan_gpe), error);
+ sizeof(struct rte_flow_item_vxlan_gpe),
+ error);
if (ret < 0)
return ret;
if (spec) {
+ if (spec->protocol)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ item,
+ "VxLAN-GPE protocol"
+ " not supported");
memcpy(&id.vni[1], spec->vni, 3);
- vxlan_gpe.val.tunnel_id = id.vlan_id;
+ vlan_id = id.vlan_id;
memcpy(&id.vni[1], mask->vni, 3);
- vxlan_gpe.mask.tunnel_id = id.vlan_id;
- if (spec->protocol)
- return rte_flow_error_set
- (error, EINVAL,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
- "VxLAN-GPE protocol not supported");
- /* Remove unwanted bits from values. */
- vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
+ vlan_id &= id.vlan_id;
}
/*
* Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
@@ -1554,141 +1511,55 @@ mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev,
* before will also match this rule. To avoid such situation, VNI 0
* is currently refused.
*/
- if (!vxlan_gpe.val.tunnel_id)
- return rte_flow_error_set(error, EINVAL,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ if (!vlan_id)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"VXLAN-GPE vni cannot be 0");
- if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
- return rte_flow_error_set(error, EINVAL,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"VXLAN-GPE tunnel must be fully"
" defined");
- if (size <= flow_size) {
- mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size);
- flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
- }
- flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE;
- return size;
-}
-
-/**
- * Update the protocol in Verbs IPv4/IPv6 spec.
- *
- * @param[in, out] attr
- * Pointer to Verbs attributes structure.
- * @param[in] search
- * Specification type to search in order to update the IP protocol.
- * @param[in] protocol
- * Protocol value to set if none is present in the specification.
- */
-static void
-mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
- enum ibv_flow_spec_type search,
- uint8_t protocol)
-{
- unsigned int i;
- struct ibv_spec_header *hdr = (struct ibv_spec_header *)
- ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
-
- if (!attr)
- return;
- for (i = 0; i != attr->num_of_specs; ++i) {
- if (hdr->type == search) {
- union {
- struct ibv_flow_spec_ipv4_ext *ipv4;
- struct ibv_flow_spec_ipv6 *ipv6;
- } ip;
-
- switch (search) {
- case IBV_FLOW_SPEC_IPV4_EXT:
- ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
- if (!ip.ipv4->val.proto) {
- ip.ipv4->val.proto = protocol;
- ip.ipv4->mask.proto = 0xff;
- }
- break;
- case IBV_FLOW_SPEC_IPV6:
- ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
- if (!ip.ipv6->val.next_hdr) {
- ip.ipv6->val.next_hdr = protocol;
- ip.ipv6->mask.next_hdr = 0xff;
- }
- break;
- default:
- break;
- }
- break;
- }
- hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
- }
+ return 0;
}
/**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * It will also update the previous L3 layer with the protocol value matching
- * the GRE.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Validate GRE item.
*
- * @param dev
- * Pointer to Ethernet device.
* @param[in] item
* Item specification.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small, nothing is
- * written.
+ * @param[in] item_flags
+ * Bit flags to mark detected items.
+ * @param[in] target_protocol
+ * The next protocol in the previous item.
* @param[out] error
* Pointer to error structure.
*
* @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p item has fully been converted,
- * otherwise another call with this returned memory size should be done.
- * On error, a negative errno value is returned and rte_errno is set.
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-static int
-mlx5_flow_item_gre(const struct rte_flow_item *item,
- struct rte_flow *flow, const size_t flow_size,
- struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ uint8_t target_protocol,
+ struct rte_flow_error *error)
{
- struct mlx5_flow_verbs *verbs = flow->cur_verbs;
- const struct rte_flow_item_gre *spec = item->spec;
+ const struct rte_flow_item_gre *spec __rte_unused = item->spec;
const struct rte_flow_item_gre *mask = item->mask;
-#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
- unsigned int size = sizeof(struct ibv_flow_spec_gre);
- struct ibv_flow_spec_gre tunnel = {
- .type = IBV_FLOW_SPEC_GRE,
- .size = size,
- };
-#else
- unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
- struct ibv_flow_spec_tunnel tunnel = {
- .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
- .size = size,
- };
-#endif
int ret;
- if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_GRE)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"protocol filtering not compatible"
" with this GRE layer");
- if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
+ if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"a tunnel is already present");
- if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
+ if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"L3 Layer is missing");
if (!mask)
mask = &rte_flow_item_gre_mask;
@@ -1698,92 +1569,50 @@ mlx5_flow_item_gre(const struct rte_flow_item *item,
sizeof(struct rte_flow_item_gre), error);
if (ret < 0)
return ret;
-#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
- if (spec) {
- tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
- tunnel.val.protocol = spec->protocol;
- tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
- tunnel.mask.protocol = mask->protocol;
- /* Remove unwanted bits from values. */
- tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
- tunnel.val.protocol &= tunnel.mask.protocol;
- tunnel.val.key &= tunnel.mask.key;
- }
-#else
+#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
if (spec && (spec->protocol & mask->protocol))
return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"without MPLS support the"
" specification cannot be used for"
" filtering");
-#endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
- if (size <= flow_size) {
- if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
- mlx5_flow_item_gre_ip_protocol_update
- (verbs->attr, IBV_FLOW_SPEC_IPV4_EXT,
- MLX5_IP_PROTOCOL_GRE);
- else
- mlx5_flow_item_gre_ip_protocol_update
- (verbs->attr, IBV_FLOW_SPEC_IPV6,
- MLX5_IP_PROTOCOL_GRE);
- mlx5_flow_spec_verbs_add(flow, &tunnel, size);
- flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
- }
- flow->layers |= MLX5_FLOW_LAYER_GRE;
- return size;
+#endif
+ return 0;
}
/**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Validate MPLS item.
*
* @param[in] item
* Item specification.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small, nothing is
- * written.
+ * @param[in] item_flags
+ * Bit-fields that holds the items detected until now.
+ * @param[in] target_protocol
+ * The next protocol in the previous item.
* @param[out] error
* Pointer to error structure.
*
* @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p item has fully been converted,
- * otherwise another call with this returned memory size should be done.
- * On error, a negative errno value is returned and rte_errno is set.
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-static int
-mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused,
- struct rte_flow *flow __rte_unused,
- const size_t flow_size __rte_unused,
- struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_mpls(const struct rte_flow_item *item __rte_unused,
+ uint64_t item_flags __rte_unused,
+ uint8_t target_protocol __rte_unused,
+ struct rte_flow_error *error)
{
#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
- const struct rte_flow_item_mpls *spec = item->spec;
const struct rte_flow_item_mpls *mask = item->mask;
- unsigned int size = sizeof(struct ibv_flow_spec_mpls);
- struct ibv_flow_spec_mpls mpls = {
- .type = IBV_FLOW_SPEC_MPLS,
- .size = size,
- };
int ret;
- if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_MPLS)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ if (target_protocol != 0xff && target_protocol != IPPROTO_MPLS)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"protocol filtering not compatible"
" with MPLS layer");
- /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
- if (flow->layers & MLX5_FLOW_LAYER_TUNNEL &&
- (flow->layers & MLX5_FLOW_LAYER_GRE) != MLX5_FLOW_LAYER_GRE)
+ if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"a tunnel is already"
" present");
if (!mask)
@@ -1794,1056 +1623,298 @@ mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused,
sizeof(struct rte_flow_item_mpls), error);
if (ret < 0)
return ret;
- if (spec) {
- memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
- memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
- /* Remove unwanted bits from values. */
- mpls.val.label &= mpls.mask.label;
- }
- if (size <= flow_size) {
- mlx5_flow_spec_verbs_add(flow, &mpls, size);
- flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
- }
- flow->layers |= MLX5_FLOW_LAYER_MPLS;
- return size;
-#endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
+ return 0;
+#endif
return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- item,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
"MPLS is not supported by Verbs, please"
" update.");
}
-/**
- * Convert the @p pattern into a Verbs specifications after ensuring the NIC
- * will understand and process it correctly.
- * The conversion is performed item per item, each of them is written into
- * the @p flow if its size is lesser or equal to @p flow_size.
- * Validation and memory consumption computation are still performed until the
- * end of @p pattern, unless an error is encountered.
- *
- * @param[in] pattern
- * Flow pattern.
- * @param[in, out] flow
- * Pointer to the rte_flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small some
- * garbage may be present.
- * @param[out] error
- * Pointer to error structure.
- *
- * @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @pattern has fully been
- * converted, otherwise another call with this returned memory size should
- * be done.
- * On error, a negative errno value is returned and rte_errno is set.
- */
static int
-mlx5_flow_items(struct rte_eth_dev *dev,
- const struct rte_flow_item pattern[],
- struct rte_flow *flow, const size_t flow_size,
- struct rte_flow_error *error)
+flow_null_validate(struct rte_eth_dev *dev __rte_unused,
+ const struct rte_flow_attr *attr __rte_unused,
+ const struct rte_flow_item items[] __rte_unused,
+ const struct rte_flow_action actions[] __rte_unused,
+ struct rte_flow_error *error __rte_unused)
{
- int remain = flow_size;
- size_t size = 0;
-
- for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
- int ret = 0;
-
- switch (pattern->type) {
- case RTE_FLOW_ITEM_TYPE_VOID:
- break;
- case RTE_FLOW_ITEM_TYPE_ETH:
- ret = mlx5_flow_item_eth(pattern, flow, remain, error);
- break;
- case RTE_FLOW_ITEM_TYPE_VLAN:
- ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
- break;
- case RTE_FLOW_ITEM_TYPE_IPV4:
- ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
- break;
- case RTE_FLOW_ITEM_TYPE_IPV6:
- ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
- break;
- case RTE_FLOW_ITEM_TYPE_UDP:
- ret = mlx5_flow_item_udp(pattern, flow, remain, error);
- break;
- case RTE_FLOW_ITEM_TYPE_TCP:
- ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
- break;
- case RTE_FLOW_ITEM_TYPE_VXLAN:
- ret = mlx5_flow_item_vxlan(pattern, flow, remain,
- error);
- break;
- case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
- ret = mlx5_flow_item_vxlan_gpe(dev, pattern, flow,
- remain, error);
- break;
- case RTE_FLOW_ITEM_TYPE_GRE:
- ret = mlx5_flow_item_gre(pattern, flow, remain, error);
- break;
- case RTE_FLOW_ITEM_TYPE_MPLS:
- ret = mlx5_flow_item_mpls(pattern, flow, remain, error);
- break;
- default:
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ITEM,
- pattern,
- "item not supported");
- }
- if (ret < 0)
- return ret;
- if (remain > ret)
- remain -= ret;
- else
- remain = 0;
- size += ret;
- }
- if (!flow->layers) {
- const struct rte_flow_item item = {
- .type = RTE_FLOW_ITEM_TYPE_ETH,
- };
-
- return mlx5_flow_item_eth(&item, flow, flow_size, error);
- }
- return size;
+ rte_errno = ENOTSUP;
+ return -rte_errno;
}
-/**
- * Convert the @p action into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
- *
- * @param[in] action
- * Action configuration.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small, nothing is
- * written.
- * @param[out] error
- * Pointer to error structure.
- *
- * @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p action has fully been
- * converted, otherwise another call with this returned memory size should
- * be done.
- * On error, a negative errno value is returned and rte_errno is set.
- */
-static int
-mlx5_flow_action_drop(const struct rte_flow_action *action,
- struct rte_flow *flow, const size_t flow_size,
- struct rte_flow_error *error)
+static struct mlx5_flow *
+flow_null_prepare(const struct rte_flow_attr *attr __rte_unused,
+ const struct rte_flow_item items[] __rte_unused,
+ const struct rte_flow_action actions[] __rte_unused,
+ uint64_t *item_flags __rte_unused,
+ uint64_t *action_flags __rte_unused,
+ struct rte_flow_error *error __rte_unused)
{
- unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
- struct ibv_flow_spec_action_drop drop = {
- .type = IBV_FLOW_SPEC_ACTION_DROP,
- .size = size,
- };
-
- if (flow->fate)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION,
- action,
- "multiple fate actions are not"
- " supported");
- if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION,
- action,
- "drop is not compatible with"
- " flag/mark action");
- if (size < flow_size)
- mlx5_flow_spec_verbs_add(flow, &drop, size);
- flow->fate |= MLX5_FLOW_FATE_DROP;
- return size;
+ rte_errno = ENOTSUP;
+ return NULL;
}
-/**
- * Convert the @p action into @p flow after ensuring the NIC will understand
- * and process it correctly.
- *
- * @param[in] dev
- * Pointer to Ethernet device structure.
- * @param[in] action
- * Action configuration.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[out] error
- * Pointer to error structure.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
static int
-mlx5_flow_action_queue(struct rte_eth_dev *dev,
- const struct rte_flow_action *action,
- struct rte_flow *flow,
- struct rte_flow_error *error)
+flow_null_translate(struct rte_eth_dev *dev __rte_unused,
+ struct mlx5_flow *dev_flow __rte_unused,
+ const struct rte_flow_attr *attr __rte_unused,
+ const struct rte_flow_item items[] __rte_unused,
+ const struct rte_flow_action actions[] __rte_unused,
+ struct rte_flow_error *error __rte_unused)
{
- struct priv *priv = dev->data->dev_private;
- const struct rte_flow_action_queue *queue = action->conf;
-
- if (flow->fate)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION,
- action,
- "multiple fate actions are not"
- " supported");
- if (queue->index >= priv->rxqs_n)
- return rte_flow_error_set(error, EINVAL,
- RTE_FLOW_ERROR_TYPE_ACTION_CONF,
- &queue->index,
- "queue index out of range");
- if (!(*priv->rxqs)[queue->index])
- return rte_flow_error_set(error, EINVAL,
- RTE_FLOW_ERROR_TYPE_ACTION_CONF,
- &queue->index,
- "queue is not configured");
- if (flow->queue)
- (*flow->queue)[0] = queue->index;
- flow->rss.queue_num = 1;
- flow->fate |= MLX5_FLOW_FATE_QUEUE;
- return 0;
+ rte_errno = ENOTSUP;
+ return -rte_errno;
}
-/**
- * Ensure the @p action will be understood and used correctly by the NIC.
- *
- * @param dev
- * Pointer to Ethernet device structure.
- * @param action[in]
- * Pointer to flow actions array.
- * @param flow[in, out]
- * Pointer to the rte_flow structure.
- * @param error[in, out]
- * Pointer to error structure.
- *
- * @return
- * On success @p flow->queue array and @p flow->rss are filled and valid.
- * On error, a negative errno value is returned and rte_errno is set.
- */
static int
-mlx5_flow_action_rss(struct rte_eth_dev *dev,
- const struct rte_flow_action *action,
- struct rte_flow *flow,
- struct rte_flow_error *error)
+flow_null_apply(struct rte_eth_dev *dev __rte_unused,
+ struct rte_flow *flow __rte_unused,
+ struct rte_flow_error *error __rte_unused)
{
- struct priv *priv = dev->data->dev_private;
- const struct rte_flow_action_rss *rss = action->conf;
- unsigned int i;
-
- if (flow->fate)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION,
- action,
- "multiple fate actions are not"
- " supported");
- if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
- rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION_CONF,
- &rss->func,
- "RSS hash function not supported");
-#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
- if (rss->level > 2)
-#else
- if (rss->level > 1)
-#endif
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION_CONF,
- &rss->level,
- "tunnel RSS is not supported");
- if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION_CONF,
- &rss->key_len,
- "RSS hash key too small");
- if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION_CONF,
- &rss->key_len,
- "RSS hash key too large");
- if (!rss->queue_num)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION_CONF,
- rss,
- "no queues were provided for RSS");
- if (rss->queue_num > priv->config.ind_table_max_size)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION_CONF,
- &rss->queue_num,
- "number of queues too large");
- if (rss->types & MLX5_RSS_HF_MASK)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION_CONF,
- &rss->types,
- "some RSS protocols are not"
- " supported");
- for (i = 0; i != rss->queue_num; ++i) {
- if (rss->queue[i] >= priv->rxqs_n)
- return rte_flow_error_set
- (error, EINVAL,
- RTE_FLOW_ERROR_TYPE_ACTION_CONF,
- rss,
- "queue index out of range");
- if (!(*priv->rxqs)[rss->queue[i]])
- return rte_flow_error_set
- (error, EINVAL,
- RTE_FLOW_ERROR_TYPE_ACTION_CONF,
- &rss->queue[i],
- "queue is not configured");
- }
- if (flow->queue)
- memcpy((*flow->queue), rss->queue,
- rss->queue_num * sizeof(uint16_t));
- flow->rss.queue_num = rss->queue_num;
- memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
- flow->rss.types = rss->types;
- flow->rss.level = rss->level;
- flow->fate |= MLX5_FLOW_FATE_RSS;
- return 0;
+ rte_errno = ENOTSUP;
+ return -rte_errno;
}
-/**
- * Convert the @p action into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
- *
- * @param[in] action
- * Action configuration.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small, nothing is
- * written.
- * @param[out] error
- * Pointer to error structure.
- *
- * @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p action has fully been
- * converted, otherwise another call with this returned memory size should
- * be done.
- * On error, a negative errno value is returned and rte_errno is set.
- */
-static int
-mlx5_flow_action_flag(const struct rte_flow_action *action,
- struct rte_flow *flow, const size_t flow_size,
- struct rte_flow_error *error)
-{
- unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
- struct ibv_flow_spec_action_tag tag = {
- .type = IBV_FLOW_SPEC_ACTION_TAG,
- .size = size,
- .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
- };
- struct mlx5_flow_verbs *verbs = flow->cur_verbs;
-
- if (flow->modifier & MLX5_FLOW_MOD_FLAG)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION,
- action,
- "flag action already present");
- if (flow->fate & MLX5_FLOW_FATE_DROP)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION,
- action,
- "flag is not compatible with drop"
- " action");
- if (flow->modifier & MLX5_FLOW_MOD_MARK)
- size = 0;
- else if (size <= flow_size && verbs)
- mlx5_flow_spec_verbs_add(flow, &tag, size);
- flow->modifier |= MLX5_FLOW_MOD_FLAG;
- return size;
+static void
+flow_null_remove(struct rte_eth_dev *dev __rte_unused,
+ struct rte_flow *flow __rte_unused)
+{
}
-/**
- * Update verbs specification to modify the flag to mark.
- *
- * @param[in, out] verbs
- * Pointer to the mlx5_flow_verbs structure.
- * @param[in] mark_id
- * Mark identifier to replace the flag.
- */
static void
-mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
+flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
+ struct rte_flow *flow __rte_unused)
{
- struct ibv_spec_header *hdr;
- int i;
-
- if (!verbs)
- return;
- /* Update Verbs specification. */
- hdr = (struct ibv_spec_header *)verbs->specs;
- if (!hdr)
- return;
- for (i = 0; i != verbs->attr->num_of_specs; ++i) {
- if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
- struct ibv_flow_spec_action_tag *t =
- (struct ibv_flow_spec_action_tag *)hdr;
-
- t->tag_id = mlx5_flow_mark_set(mark_id);
- }
- hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
- }
}
-/**
- * Convert the @p action into @p flow (or by updating the already present
- * Flag Verbs specification) after ensuring the NIC will understand and
- * process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
- *
- * @param[in] action
- * Action configuration.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small, nothing is
- * written.
- * @param[out] error
- * Pointer to error structure.
- *
- * @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p action has fully been
- * converted, otherwise another call with this returned memory size should
- * be done.
- * On error, a negative errno value is returned and rte_errno is set.
- */
static int
-mlx5_flow_action_mark(const struct rte_flow_action *action,
- struct rte_flow *flow, const size_t flow_size,
- struct rte_flow_error *error)
+flow_null_query(struct rte_eth_dev *dev __rte_unused,
+ struct rte_flow *flow __rte_unused,
+ const struct rte_flow_action *actions __rte_unused,
+ void *data __rte_unused,
+ struct rte_flow_error *error __rte_unused)
{
- const struct rte_flow_action_mark *mark = action->conf;
- unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
- struct ibv_flow_spec_action_tag tag = {
- .type = IBV_FLOW_SPEC_ACTION_TAG,
- .size = size,
- };
- struct mlx5_flow_verbs *verbs = flow->cur_verbs;
-
- if (!mark)
- return rte_flow_error_set(error, EINVAL,
- RTE_FLOW_ERROR_TYPE_ACTION,
- action,
- "configuration cannot be null");
- if (mark->id >= MLX5_FLOW_MARK_MAX)
- return rte_flow_error_set(error, EINVAL,
- RTE_FLOW_ERROR_TYPE_ACTION_CONF,
- &mark->id,
- "mark id must in 0 <= id < "
- RTE_STR(MLX5_FLOW_MARK_MAX));
- if (flow->modifier & MLX5_FLOW_MOD_MARK)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION,
- action,
- "mark action already present");
- if (flow->fate & MLX5_FLOW_FATE_DROP)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION,
- action,
- "mark is not compatible with drop"
- " action");
- if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
- mlx5_flow_verbs_mark_update(verbs, mark->id);
- size = 0;
- } else if (size <= flow_size) {
- tag.tag_id = mlx5_flow_mark_set(mark->id);
- mlx5_flow_spec_verbs_add(flow, &tag, size);
- }
- flow->modifier |= MLX5_FLOW_MOD_MARK;
- return size;
+ rte_errno = ENOTSUP;
+ return -rte_errno;
}
+/* Void driver to protect from null pointer reference. */
+const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
+ .validate = flow_null_validate,
+ .prepare = flow_null_prepare,
+ .translate = flow_null_translate,
+ .apply = flow_null_apply,
+ .remove = flow_null_remove,
+ .destroy = flow_null_destroy,
+ .query = flow_null_query,
+};
+
/**
- * Convert the @p action into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
- *
- * @param action[in]
- * Action configuration.
- * @param flow[in, out]
- * Pointer to flow structure.
- * @param flow_size[in]
- * Size in bytes of the available space in @p flow, if too small, nothing is
- * written.
- * @param error[int, out]
- * Pointer to error structure.
+ * Select flow driver type according to flow attributes and device
+ * configuration.
+ *
+ * @param[in] dev
+ * Pointer to the dev structure.
+ * @param[in] attr
+ * Pointer to the flow attributes.
*
* @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p action has fully been
- * converted, otherwise another call with this returned memory size should
- * be done.
- * On error, a negative errno value is returned and rte_errno is set.
+ * flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
*/
-static int
-mlx5_flow_action_count(struct rte_eth_dev *dev,
- const struct rte_flow_action *action,
- struct rte_flow *flow,
- const size_t flow_size __rte_unused,
- struct rte_flow_error *error)
-{
- const struct rte_flow_action_count *count = action->conf;
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
- unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
- struct ibv_flow_spec_counter_action counter = {
- .type = IBV_FLOW_SPEC_ACTION_COUNT,
- .size = size,
- };
-#endif
+static enum mlx5_flow_drv_type
+flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
+{
+ struct priv *priv = dev->data->dev_private;
+ enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX;
- if (!flow->counter) {
- flow->counter = mlx5_flow_counter_new(dev, count->shared,
- count->id);
- if (!flow->counter)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION,
- action,
- "cannot get counter"
- " context.");
- }
- if (!((struct priv *)dev->data->dev_private)->config.flow_counter_en)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION,
- action,
- "flow counters are not supported.");
- flow->modifier |= MLX5_FLOW_MOD_COUNT;
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
- counter.counter_set_handle = flow->counter->cs->handle;
- if (size <= flow_size)
- mlx5_flow_spec_verbs_add(flow, &counter, size);
- return size;
-#endif
- return 0;
+ if (attr->transfer)
+ type = MLX5_FLOW_TYPE_TCF;
+ else
+ type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
+ MLX5_FLOW_TYPE_VERBS;
+ return type;
}
+#define flow_get_drv_ops(type) flow_drv_ops[type]
+
/**
- * Convert the @p action into @p flow after ensuring the NIC will understand
- * and process it correctly.
- * The conversion is performed action per action, each of them is written into
- * the @p flow if its size is lesser or equal to @p flow_size.
- * Validation and memory consumption computation are still performed until the
- * end of @p action, unless an error is encountered.
+ * Flow driver validation API. This abstracts calling driver specific functions.
+ * The type of flow driver is determined according to flow attributes.
*
* @param[in] dev
- * Pointer to Ethernet device structure.
+ * Pointer to the dev structure.
+ * @param[in] attr
+ * Pointer to the flow attributes.
+ * @param[in] items
+ * Pointer to the list of items.
* @param[in] actions
- * Pointer to flow actions array.
- * @param[in, out] flow
- * Pointer to the rte_flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small some
- * garbage may be present.
+ * Pointer to the list of actions.
* @param[out] error
- * Pointer to error structure.
+ * Pointer to the error structure.
*
* @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the @p actions has fully been
- * converted, otherwise another call with this returned memory size should
- * be done.
- * On error, a negative errno value is returned and rte_errno is set.
+ * 0 on success, a negative errno value otherwise and rte_ernno is set.
*/
-static int
-mlx5_flow_actions(struct rte_eth_dev *dev,
+static inline int
+flow_drv_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
const struct rte_flow_action actions[],
- struct rte_flow *flow, const size_t flow_size,
struct rte_flow_error *error)
{
- size_t size = 0;
- int remain = flow_size;
- int ret = 0;
+ const struct mlx5_flow_driver_ops *fops;
+ enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
- for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
- switch (actions->type) {
- case RTE_FLOW_ACTION_TYPE_VOID:
- break;
- case RTE_FLOW_ACTION_TYPE_FLAG:
- ret = mlx5_flow_action_flag(actions, flow, remain,
- error);
- break;
- case RTE_FLOW_ACTION_TYPE_MARK:
- ret = mlx5_flow_action_mark(actions, flow, remain,
- error);
- break;
- case RTE_FLOW_ACTION_TYPE_DROP:
- ret = mlx5_flow_action_drop(actions, flow, remain,
- error);
- break;
- case RTE_FLOW_ACTION_TYPE_QUEUE:
- ret = mlx5_flow_action_queue(dev, actions, flow, error);
- break;
- case RTE_FLOW_ACTION_TYPE_RSS:
- ret = mlx5_flow_action_rss(dev, actions, flow, error);
- break;
- case RTE_FLOW_ACTION_TYPE_COUNT:
- ret = mlx5_flow_action_count(dev, actions, flow, remain,
- error);
- break;
- default:
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION,
- actions,
- "action not supported");
- }
- if (ret < 0)
- return ret;
- if (remain > ret)
- remain -= ret;
- else
- remain = 0;
- size += ret;
- }
- if (!flow->fate)
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL,
- "no fate action found");
- return size;
+ fops = flow_get_drv_ops(type);
+ return fops->validate(dev, attr, items, actions, error);
}
/**
- * Validate flow rule and fill flow structure accordingly.
+ * Flow driver preparation API. This abstracts calling driver specific
+ * functions. Parent flow (rte_flow) should have driver type (drv_type). It
+ * calculates the size of memory required for device flow, allocates the memory,
+ * initializes the device flow and returns the pointer.
*
- * @param dev
- * Pointer to Ethernet device.
- * @param[out] flow
- * Pointer to flow structure.
- * @param flow_size
- * Size of allocated space for @p flow.
* @param[in] attr
- * Flow rule attributes.
- * @param[in] pattern
- * Pattern specification (list terminated by the END pattern item).
+ * Pointer to the flow attributes.
+ * @param[in] items
+ * Pointer to the list of items.
* @param[in] actions
- * Associated actions (list terminated by the END action).
+ * Pointer to the list of actions.
+ * @param[out] item_flags
+ * Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ * Pointer to bit mask of all actions detected.
* @param[out] error
- * Perform verbose error reporting if not NULL.
+ * Pointer to the error structure.
*
* @return
- * A positive value representing the size of the flow object in bytes
- * regardless of @p flow_size on success, a negative errno value otherwise
- * and rte_errno is set.
+ * Pointer to device flow on success, otherwise NULL and rte_ernno is set.
*/
-static int
-mlx5_flow_merge_switch(struct rte_eth_dev *dev,
- struct rte_flow *flow,
- size_t flow_size,
- const struct rte_flow_attr *attr,
- const struct rte_flow_item pattern[],
- const struct rte_flow_action actions[],
- struct rte_flow_error *error)
-{
- unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
- uint16_t port_id[!n + n];
- struct mlx5_nl_flow_ptoi ptoi[!n + n + 1];
- size_t off = RTE_ALIGN_CEIL(sizeof(*flow), alignof(max_align_t));
- unsigned int i;
- unsigned int own = 0;
- int ret;
-
- /* At least one port is needed when no switch domain is present. */
- if (!n) {
- n = 1;
- port_id[0] = dev->data->port_id;
- } else {
- n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
- }
- for (i = 0; i != n; ++i) {
- struct rte_eth_dev_info dev_info;
-
- rte_eth_dev_info_get(port_id[i], &dev_info);
- if (port_id[i] == dev->data->port_id)
- own = i;
- ptoi[i].port_id = port_id[i];
- ptoi[i].ifindex = dev_info.if_index;
- }
- /* Ensure first entry of ptoi[] is the current device. */
- if (own) {
- ptoi[n] = ptoi[0];
- ptoi[0] = ptoi[own];
- ptoi[own] = ptoi[n];
- }
- /* An entry with zero ifindex terminates ptoi[]. */
- ptoi[n].port_id = 0;
- ptoi[n].ifindex = 0;
- if (flow_size < off)
- flow_size = 0;
- ret = mlx5_nl_flow_transpose((uint8_t *)flow + off,
- flow_size ? flow_size - off : 0,
- ptoi, attr, pattern, actions, error);
- if (ret < 0)
- return ret;
- if (flow_size) {
- *flow = (struct rte_flow){
- .attributes = *attr,
- .nl_flow = (uint8_t *)flow + off,
- };
- /*
- * Generate a reasonably unique handle based on the address
- * of the target buffer.
- *
- * This is straightforward on 32-bit systems where the flow
- * pointer can be used directly. Otherwise, its least
- * significant part is taken after shifting it by the
- * previous power of two of the pointed buffer size.
- */
- if (sizeof(flow) <= 4)
- mlx5_nl_flow_brand(flow->nl_flow, (uintptr_t)flow);
- else
- mlx5_nl_flow_brand
- (flow->nl_flow,
- (uintptr_t)flow >>
- rte_log2_u32(rte_align32prevpow2(flow_size)));
- }
- return off + ret;
-}
-
-static unsigned int
-mlx5_find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
+static inline struct mlx5_flow *
+flow_drv_prepare(struct rte_flow *flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ uint64_t *item_flags,
+ uint64_t *action_flags,
+ struct rte_flow_error *error)
{
- const struct rte_flow_item *item;
- unsigned int has_vlan = 0;
+ const struct mlx5_flow_driver_ops *fops;
+ enum mlx5_flow_drv_type type = flow->drv_type;
- for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
- if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
- has_vlan = 1;
- break;
- }
- }
- if (has_vlan)
- return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
- MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
- return rss_level < 2 ? MLX5_EXPANSION_ROOT :
- MLX5_EXPANSION_ROOT_OUTER;
+ assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+ fops = flow_get_drv_ops(type);
+ return fops->prepare(attr, items, actions, item_flags, action_flags,
+ error);
}
/**
- * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
- * after ensuring the NIC will understand and process it correctly.
- * The conversion is only performed item/action per item/action, each of
- * them is written into the @p flow if its size is lesser or equal to @p
- * flow_size.
- * Validation and memory consumption computation are still performed until the
- * end, unless an error is encountered.
+ * Flow driver translation API. This abstracts calling driver specific
+ * functions. Parent flow (rte_flow) should have driver type (drv_type). It
+ * translates a generic flow into a driver flow. flow_drv_prepare() must
+ * precede.
+ *
*
* @param[in] dev
- * Pointer to Ethernet device.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[in] flow_size
- * Size in bytes of the available space in @p flow, if too small some
- * garbage may be present.
- * @param[in] attributes
- * Flow rule attributes.
- * @param[in] pattern
- * Pattern specification (list terminated by the END pattern item).
+ * Pointer to the rte dev structure.
+ * @param[in, out] dev_flow
+ * Pointer to the mlx5 flow.
+ * @param[in] attr
+ * Pointer to the flow attributes.
+ * @param[in] items
+ * Pointer to the list of items.
* @param[in] actions
- * Associated actions (list terminated by the END action).
+ * Pointer to the list of actions.
* @param[out] error
- * Perform verbose error reporting if not NULL.
+ * Pointer to the error structure.
*
* @return
- * On success the number of bytes consumed/necessary, if the returned value
- * is lesser or equal to @p flow_size, the flow has fully been converted and
- * can be applied, otherwise another call with this returned memory size
- * should be done.
- * On error, a negative errno value is returned and rte_errno is set.
- */
-static int
-mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
- const size_t flow_size,
- const struct rte_flow_attr *attributes,
- const struct rte_flow_item pattern[],
- const struct rte_flow_action actions[],
- struct rte_flow_error *error)
-{
- struct rte_flow local_flow = { .layers = 0, };
- size_t size = sizeof(*flow);
- union {
- struct rte_flow_expand_rss buf;
- uint8_t buffer[2048];
- } expand_buffer;
- struct rte_flow_expand_rss *buf = &expand_buffer.buf;
- struct mlx5_flow_verbs *original_verbs = NULL;
- size_t original_verbs_size = 0;
- uint32_t original_layers = 0;
- int expanded_pattern_idx = 0;
- int ret;
- uint32_t i;
-
- if (attributes->transfer)
- return mlx5_flow_merge_switch(dev, flow, flow_size,
- attributes, pattern,
- actions, error);
- if (size > flow_size)
- flow = &local_flow;
- ret = mlx5_flow_attributes(dev, attributes, flow, error);
- if (ret < 0)
- return ret;
- ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
- if (ret < 0)
- return ret;
- if (local_flow.rss.types) {
- unsigned int graph_root;
-
- graph_root = mlx5_find_graph_root(pattern,
- local_flow.rss.level);
- ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
- pattern, local_flow.rss.types,
- mlx5_support_expansion,
- graph_root);
- assert(ret > 0 &&
- (unsigned int)ret < sizeof(expand_buffer.buffer));
- } else {
- buf->entries = 1;
- buf->entry[0].pattern = (void *)(uintptr_t)pattern;
- }
- size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
- sizeof(void *));
- if (size <= flow_size)
- flow->queue = (void *)(flow + 1);
- LIST_INIT(&flow->verbs);
- flow->layers = 0;
- flow->modifier = 0;
- flow->fate = 0;
- for (i = 0; i != buf->entries; ++i) {
- size_t off = size;
- size_t off2;
-
- flow->layers = original_layers;
- size += sizeof(struct ibv_flow_attr) +
- sizeof(struct mlx5_flow_verbs);
- off2 = size;
- if (size < flow_size) {
- flow->cur_verbs = (void *)((uintptr_t)flow + off);
- flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
- flow->cur_verbs->specs =
- (void *)(flow->cur_verbs->attr + 1);
- }
- /* First iteration convert the pattern into Verbs. */
- if (i == 0) {
- /* Actions don't need to be converted several time. */
- ret = mlx5_flow_actions(dev, actions, flow,
- (size < flow_size) ?
- flow_size - size : 0,
- error);
- if (ret < 0)
- return ret;
- size += ret;
- } else {
- /*
- * Next iteration means the pattern has already been
- * converted and an expansion is necessary to match
- * the user RSS request. For that only the expanded
- * items will be converted, the common part with the
- * user pattern are just copied into the next buffer
- * zone.
- */
- size += original_verbs_size;
- if (size < flow_size) {
- rte_memcpy(flow->cur_verbs->attr,
- original_verbs->attr,
- original_verbs_size +
- sizeof(struct ibv_flow_attr));
- flow->cur_verbs->size = original_verbs_size;
- }
- }
- ret = mlx5_flow_items
- (dev,
- (const struct rte_flow_item *)
- &buf->entry[i].pattern[expanded_pattern_idx],
- flow,
- (size < flow_size) ? flow_size - size : 0, error);
- if (ret < 0)
- return ret;
- size += ret;
- if (size <= flow_size) {
- mlx5_flow_adjust_priority(dev, flow);
- LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
- }
- /*
- * Keep a pointer of the first verbs conversion and the layers
- * it has encountered.
- */
- if (i == 0) {
- original_verbs = flow->cur_verbs;
- original_verbs_size = size - off2;
- original_layers = flow->layers;
- /*
- * move the index of the expanded pattern to the
- * first item not addressed yet.
- */
- if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
- expanded_pattern_idx++;
- } else {
- const struct rte_flow_item *item = pattern;
-
- for (item = pattern;
- item->type != RTE_FLOW_ITEM_TYPE_END;
- ++item)
- expanded_pattern_idx++;
- }
- }
- }
- /* Restore the origin layers in the flow. */
- flow->layers = original_layers;
- return size;
-}
-
-/**
- * Lookup and set the ptype in the data Rx part. A single Ptype can be used,
- * if several tunnel rules are used on this queue, the tunnel ptype will be
- * cleared.
- *
- * @param rxq_ctrl
- * Rx queue to update.
+ * 0 on success, a negative errno value otherwise and rte_ernno is set.
*/
-static void
-mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
+static inline int
+flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
{
- unsigned int i;
- uint32_t tunnel_ptype = 0;
+ const struct mlx5_flow_driver_ops *fops;
+ enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
- /* Look up for the ptype to use. */
- for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
- if (!rxq_ctrl->flow_tunnels_n[i])
- continue;
- if (!tunnel_ptype) {
- tunnel_ptype = tunnels_info[i].ptype;
- } else {
- tunnel_ptype = 0;
- break;
- }
- }
- rxq_ctrl->rxq.tunnel = tunnel_ptype;
+ assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+ fops = flow_get_drv_ops(type);
+ return fops->translate(dev, dev_flow, attr, items, actions, error);
}
/**
- * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow.
+ * Flow driver apply API. This abstracts calling driver specific functions.
+ * Parent flow (rte_flow) should have driver type (drv_type). It applies
+ * translated driver flows on to device. flow_drv_translate() must precede.
*
* @param[in] dev
- * Pointer to Ethernet device.
- * @param[in] flow
+ * Pointer to Ethernet device structure.
+ * @param[in, out] flow
* Pointer to flow structure.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
*/
-static void
-mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
+static inline int
+flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+ struct rte_flow_error *error)
{
- struct priv *priv = dev->data->dev_private;
- const int mark = !!(flow->modifier &
- (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
- const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
- unsigned int i;
+ const struct mlx5_flow_driver_ops *fops;
+ enum mlx5_flow_drv_type type = flow->drv_type;
- for (i = 0; i != flow->rss.queue_num; ++i) {
- int idx = (*flow->queue)[i];
- struct mlx5_rxq_ctrl *rxq_ctrl =
- container_of((*priv->rxqs)[idx],
- struct mlx5_rxq_ctrl, rxq);
-
- if (mark) {
- rxq_ctrl->rxq.mark = 1;
- rxq_ctrl->flow_mark_n++;
- }
- if (tunnel) {
- unsigned int j;
-
- /* Increase the counter matching the flow. */
- for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
- if ((tunnels_info[j].tunnel & flow->layers) ==
- tunnels_info[j].tunnel) {
- rxq_ctrl->flow_tunnels_n[j]++;
- break;
- }
- }
- mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
- }
- }
+ assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+ fops = flow_get_drv_ops(type);
+ return fops->apply(dev, flow, error);
}
/**
- * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
- * @p flow if no other flow uses it with the same kind of request.
+ * Flow driver remove API. This abstracts calling driver specific functions.
+ * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
+ * on device. All the resources of the flow should be freed by calling
+ * flow_dv_destroy().
*
- * @param dev
+ * @param[in] dev
* Pointer to Ethernet device.
- * @param[in] flow
- * Pointer to the flow.
+ * @param[in, out] flow
+ * Pointer to flow structure.
*/
-static void
-mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
+static inline void
+flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
{
- struct priv *priv = dev->data->dev_private;
- const int mark = !!(flow->modifier &
- (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
- const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
- unsigned int i;
-
- assert(dev->data->dev_started);
- for (i = 0; i != flow->rss.queue_num; ++i) {
- int idx = (*flow->queue)[i];
- struct mlx5_rxq_ctrl *rxq_ctrl =
- container_of((*priv->rxqs)[idx],
- struct mlx5_rxq_ctrl, rxq);
-
- if (mark) {
- rxq_ctrl->flow_mark_n--;
- rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
- }
- if (tunnel) {
- unsigned int j;
+ const struct mlx5_flow_driver_ops *fops;
+ enum mlx5_flow_drv_type type = flow->drv_type;
- /* Decrease the counter matching the flow. */
- for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
- if ((tunnels_info[j].tunnel & flow->layers) ==
- tunnels_info[j].tunnel) {
- rxq_ctrl->flow_tunnels_n[j]--;
- break;
- }
- }
- mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
- }
- }
+ assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+ fops = flow_get_drv_ops(type);
+ fops->remove(dev, flow);
}
/**
- * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
+ * Flow driver destroy API. This abstracts calling driver specific functions.
+ * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
+ * on device and releases resources of the flow.
*
- * @param dev
+ * @param[in] dev
* Pointer to Ethernet device.
+ * @param[in, out] flow
+ * Pointer to flow structure.
*/
-static void
-mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
+static inline void
+flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
{
- struct priv *priv = dev->data->dev_private;
- unsigned int i;
-
- for (i = 0; i != priv->rxqs_n; ++i) {
- struct mlx5_rxq_ctrl *rxq_ctrl;
- unsigned int j;
+ const struct mlx5_flow_driver_ops *fops;
+ enum mlx5_flow_drv_type type = flow->drv_type;
- if (!(*priv->rxqs)[i])
- continue;
- rxq_ctrl = container_of((*priv->rxqs)[i],
- struct mlx5_rxq_ctrl, rxq);
- rxq_ctrl->flow_mark_n = 0;
- rxq_ctrl->rxq.mark = 0;
- for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
- rxq_ctrl->flow_tunnels_n[j] = 0;
- rxq_ctrl->rxq.tunnel = 0;
- }
+ assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+ fops = flow_get_drv_ops(type);
+ fops->destroy(dev, flow);
}
/**
@@ -2859,134 +1930,55 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
const struct rte_flow_action actions[],
struct rte_flow_error *error)
{
- int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
+ int ret;
+ ret = flow_drv_validate(dev, attr, items, actions, error);
if (ret < 0)
return ret;
return 0;
}
/**
- * Remove the flow.
+ * Get RSS action from the action list.
*
- * @param[in] dev
- * Pointer to Ethernet device.
- * @param[in, out] flow
- * Pointer to flow structure.
+ * @param[in] actions
+ * Pointer to the list of actions.
+ *
+ * @return
+ * Pointer to the RSS action if exist, else return NULL.
*/
-static void
-mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+static const struct rte_flow_action_rss*
+flow_get_rss_action(const struct rte_flow_action actions[])
{
- struct priv *priv = dev->data->dev_private;
- struct mlx5_flow_verbs *verbs;
-
- if (flow->nl_flow && priv->mnl_socket)
- mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL);
- LIST_FOREACH(verbs, &flow->verbs, next) {
- if (verbs->flow) {
- claim_zero(mlx5_glue->destroy_flow(verbs->flow));
- verbs->flow = NULL;
- }
- if (verbs->hrxq) {
- if (flow->fate & MLX5_FLOW_FATE_DROP)
- mlx5_hrxq_drop_release(dev);
- else
- mlx5_hrxq_release(dev, verbs->hrxq);
- verbs->hrxq = NULL;
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+ switch (actions->type) {
+ case RTE_FLOW_ACTION_TYPE_RSS:
+ return (const struct rte_flow_action_rss *)
+ actions->conf;
+ default:
+ break;
}
}
- if (flow->counter) {
- mlx5_flow_counter_release(flow->counter);
- flow->counter = NULL;
- }
+ return NULL;
}
-/**
- * Apply the flow.
- *
- * @param[in] dev
- * Pointer to Ethernet device structure.
- * @param[in, out] flow
- * Pointer to flow structure.
- * @param[out] error
- * Pointer to error structure.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
- struct rte_flow_error *error)
+static unsigned int
+find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
{
- struct priv *priv = dev->data->dev_private;
- struct mlx5_flow_verbs *verbs;
- int err;
-
- LIST_FOREACH(verbs, &flow->verbs, next) {
- if (flow->fate & MLX5_FLOW_FATE_DROP) {
- verbs->hrxq = mlx5_hrxq_drop_new(dev);
- if (!verbs->hrxq) {
- rte_flow_error_set
- (error, errno,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL,
- "cannot get drop hash queue");
- goto error;
- }
- } else {
- struct mlx5_hrxq *hrxq;
-
- hrxq = mlx5_hrxq_get(dev, flow->key,
- MLX5_RSS_HASH_KEY_LEN,
- verbs->hash_fields,
- (*flow->queue),
- flow->rss.queue_num);
- if (!hrxq)
- hrxq = mlx5_hrxq_new(dev, flow->key,
- MLX5_RSS_HASH_KEY_LEN,
- verbs->hash_fields,
- (*flow->queue),
- flow->rss.queue_num,
- !!(flow->layers &
- MLX5_FLOW_LAYER_TUNNEL));
- if (!hrxq) {
- rte_flow_error_set
- (error, rte_errno,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL,
- "cannot get hash queue");
- goto error;
- }
- verbs->hrxq = hrxq;
- }
- verbs->flow =
- mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
- if (!verbs->flow) {
- rte_flow_error_set(error, errno,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL,
- "hardware refuses to create flow");
- goto error;
- }
- }
- if (flow->nl_flow &&
- priv->mnl_socket &&
- mlx5_nl_flow_create(priv->mnl_socket, flow->nl_flow, error))
- goto error;
- return 0;
-error:
- err = rte_errno; /* Save rte_errno before cleanup. */
- LIST_FOREACH(verbs, &flow->verbs, next) {
- if (verbs->hrxq) {
- if (flow->fate & MLX5_FLOW_FATE_DROP)
- mlx5_hrxq_drop_release(dev);
- else
- mlx5_hrxq_release(dev, verbs->hrxq);
- verbs->hrxq = NULL;
+ const struct rte_flow_item *item;
+ unsigned int has_vlan = 0;
+
+ for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+ if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
+ has_vlan = 1;
+ break;
}
}
- rte_errno = err; /* Restore rte_errno. */
- return -rte_errno;
+ if (has_vlan)
+ return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
+ MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
+ return rss_level < 2 ? MLX5_EXPANSION_ROOT :
+ MLX5_EXPANSION_ROOT_OUTER;
}
/**
@@ -3009,50 +2001,90 @@ error:
* A flow on success, NULL otherwise and rte_errno is set.
*/
static struct rte_flow *
-mlx5_flow_list_create(struct rte_eth_dev *dev,
- struct mlx5_flows *list,
- const struct rte_flow_attr *attr,
- const struct rte_flow_item items[],
- const struct rte_flow_action actions[],
- struct rte_flow_error *error)
+flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
{
struct rte_flow *flow = NULL;
- size_t size = 0;
+ struct mlx5_flow *dev_flow;
+ uint64_t action_flags = 0;
+ uint64_t item_flags = 0;
+ const struct rte_flow_action_rss *rss;
+ union {
+ struct rte_flow_expand_rss buf;
+ uint8_t buffer[2048];
+ } expand_buffer;
+ struct rte_flow_expand_rss *buf = &expand_buffer.buf;
int ret;
+ uint32_t i;
+ uint32_t flow_size;
- ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
+ ret = flow_drv_validate(dev, attr, items, actions, error);
if (ret < 0)
return NULL;
- size = ret;
- flow = rte_calloc(__func__, 1, size, 0);
- if (!flow) {
- rte_flow_error_set(error, ENOMEM,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL,
- "not enough memory to create flow");
- return NULL;
+ flow_size = sizeof(struct rte_flow);
+ rss = flow_get_rss_action(actions);
+ if (rss)
+ flow_size += RTE_ALIGN_CEIL(rss->queue_num * sizeof(uint16_t),
+ sizeof(void *));
+ else
+ flow_size += RTE_ALIGN_CEIL(sizeof(uint16_t), sizeof(void *));
+ flow = rte_calloc(__func__, 1, flow_size, 0);
+ flow->drv_type = flow_get_drv_type(dev, attr);
+ assert(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
+ flow->drv_type < MLX5_FLOW_TYPE_MAX);
+ flow->queue = (void *)(flow + 1);
+ LIST_INIT(&flow->dev_flows);
+ if (rss && rss->types) {
+ unsigned int graph_root;
+
+ graph_root = find_graph_root(items, rss->level);
+ ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
+ items, rss->types,
+ mlx5_support_expansion,
+ graph_root);
+ assert(ret > 0 &&
+ (unsigned int)ret < sizeof(expand_buffer.buffer));
+ } else {
+ buf->entries = 1;
+ buf->entry[0].pattern = (void *)(uintptr_t)items;
}
- ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
- if (ret < 0) {
- rte_free(flow);
- return NULL;
+ for (i = 0; i < buf->entries; ++i) {
+ dev_flow = flow_drv_prepare(flow, attr, buf->entry[i].pattern,
+ actions, &item_flags, &action_flags,
+ error);
+ if (!dev_flow)
+ goto error;
+ dev_flow->flow = flow;
+ dev_flow->layers = item_flags;
+ /* Store actions once as expanded flows have same actions. */
+ if (i == 0)
+ flow->actions = action_flags;
+ assert(flow->actions == action_flags);
+ LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
+ ret = flow_drv_translate(dev, dev_flow, attr,
+ buf->entry[i].pattern,
+ actions, error);
+ if (ret < 0)
+ goto error;
}
- assert((size_t)ret == size);
if (dev->data->dev_started) {
- ret = mlx5_flow_apply(dev, flow, error);
- if (ret < 0) {
- ret = rte_errno; /* Save rte_errno before cleanup. */
- if (flow) {
- mlx5_flow_remove(dev, flow);
- rte_free(flow);
- }
- rte_errno = ret; /* Restore rte_errno. */
- return NULL;
- }
+ ret = flow_drv_apply(dev, flow, error);
+ if (ret < 0)
+ goto error;
}
TAILQ_INSERT_TAIL(list, flow, next);
- mlx5_flow_rxq_flags_set(dev, flow);
+ flow_rxq_flags_set(dev, flow);
return flow;
+error:
+ ret = rte_errno; /* Save rte_errno before cleanup. */
+ assert(flow);
+ flow_drv_destroy(dev, flow);
+ rte_free(flow);
+ rte_errno = ret; /* Restore rte_errno. */
+ return NULL;
}
/**
@@ -3068,9 +2100,9 @@ mlx5_flow_create(struct rte_eth_dev *dev,
const struct rte_flow_action actions[],
struct rte_flow_error *error)
{
- return mlx5_flow_list_create
- (dev, &((struct priv *)dev->data->dev_private)->flows,
- attr, items, actions, error);
+ return flow_list_create(dev,
+ &((struct priv *)dev->data->dev_private)->flows,
+ attr, items, actions, error);
}
/**
@@ -3084,17 +2116,17 @@ mlx5_flow_create(struct rte_eth_dev *dev,
* Flow to destroy.
*/
static void
-mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
- struct rte_flow *flow)
+flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
+ struct rte_flow *flow)
{
- mlx5_flow_remove(dev, flow);
+ flow_drv_destroy(dev, flow);
TAILQ_REMOVE(list, flow, next);
/*
* Update RX queue flags only if port is started, otherwise it is
* already clean.
*/
if (dev->data->dev_started)
- mlx5_flow_rxq_flags_trim(dev, flow);
+ flow_rxq_flags_trim(dev, flow);
rte_free(flow);
}
@@ -3113,7 +2145,7 @@ mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
struct rte_flow *flow;
flow = TAILQ_FIRST(list);
- mlx5_flow_list_destroy(dev, list, flow);
+ flow_list_destroy(dev, list, flow);
}
}
@@ -3131,8 +2163,8 @@ mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
struct rte_flow *flow;
TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
- mlx5_flow_remove(dev, flow);
- mlx5_flow_rxq_flags_clear(dev);
+ flow_drv_remove(dev, flow);
+ flow_rxq_flags_clear(dev);
}
/**
@@ -3154,10 +2186,10 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
int ret = 0;
TAILQ_FOREACH(flow, list, next) {
- ret = mlx5_flow_apply(dev, flow, &error);
+ ret = flow_drv_apply(dev, flow, &error);
if (ret < 0)
goto error;
- mlx5_flow_rxq_flags_set(dev, flow);
+ flow_rxq_flags_set(dev, flow);
}
return 0;
error:
@@ -3228,7 +2260,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
},
{
.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
- RTE_FLOW_ITEM_TYPE_END,
+ RTE_FLOW_ITEM_TYPE_END,
.spec = vlan_spec,
.last = NULL,
.mask = vlan_mask,
@@ -3266,8 +2298,8 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
}
for (i = 0; i != priv->reta_idx_n; ++i)
queue[i] = (*priv->reta_idx)[i];
- flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
- actions, &error);
+ flow = flow_list_create(dev, &priv->ctrl_flows,
+ &attr, items, actions, &error);
if (!flow)
return -rte_errno;
return 0;
@@ -3307,7 +2339,7 @@ mlx5_flow_destroy(struct rte_eth_dev *dev,
{
struct priv *priv = dev->data->dev_private;
- mlx5_flow_list_destroy(dev, &priv->flows, flow);
+ flow_list_destroy(dev, &priv->flows, flow);
return 0;
}
@@ -3356,92 +2388,45 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
}
/**
- * Query flow counter.
- *
- * @param flow
- * Pointer to the flow.
+ * Query a flow.
*
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
+ * @see rte_flow_query()
+ * @see rte_flow_ops
*/
static int
-mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
- void *data __rte_unused,
- struct rte_flow_error *error)
-{
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
- if (flow->modifier & MLX5_FLOW_MOD_COUNT) {
- struct rte_flow_query_count *qc = data;
- uint64_t counters[2] = {0, 0};
- struct ibv_query_counter_set_attr query_cs_attr = {
- .cs = flow->counter->cs,
- .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
- };
- struct ibv_counter_set_data query_out = {
- .out = counters,
- .outlen = 2 * sizeof(uint64_t),
- };
- int err = mlx5_glue->query_counter_set(&query_cs_attr,
- &query_out);
+flow_drv_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_action *actions,
+ void *data,
+ struct rte_flow_error *error)
+{
+ const struct mlx5_flow_driver_ops *fops;
+ enum mlx5_flow_drv_type ftype = flow->drv_type;
- if (err)
- return rte_flow_error_set
- (error, err,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL,
- "cannot read counter");
- qc->hits_set = 1;
- qc->bytes_set = 1;
- qc->hits = counters[0] - flow->counter->hits;
- qc->bytes = counters[1] - flow->counter->bytes;
- if (qc->reset) {
- flow->counter->hits = counters[0];
- flow->counter->bytes = counters[1];
- }
- return 0;
- }
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL,
- "flow does not have counter");
-#endif
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL,
- "counters are not available");
+ assert(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
+ fops = flow_get_drv_ops(ftype);
+
+ return fops->query(dev, flow, actions, data, error);
}
/**
- * Query a flows.
+ * Query a flow.
*
* @see rte_flow_query()
* @see rte_flow_ops
*/
int
-mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
+mlx5_flow_query(struct rte_eth_dev *dev,
struct rte_flow *flow,
const struct rte_flow_action *actions,
void *data,
struct rte_flow_error *error)
{
- int ret = 0;
+ int ret;
- for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
- switch (actions->type) {
- case RTE_FLOW_ACTION_TYPE_VOID:
- break;
- case RTE_FLOW_ACTION_TYPE_COUNT:
- ret = mlx5_flow_query_count(flow, data, error);
- break;
- default:
- return rte_flow_error_set(error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ACTION,
- actions,
- "action not supported");
- }
- if (ret < 0)
- return ret;
- }
+ ret = flow_drv_query(dev, flow, actions, data, error);
+ if (ret < 0)
+ return ret;
return 0;
}
@@ -3511,7 +2496,6 @@ mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
.dst_addr = input->flow.ip4_flow.dst_ip,
.time_to_live = input->flow.ip4_flow.ttl,
.type_of_service = input->flow.ip4_flow.tos,
- .next_proto_id = input->flow.ip4_flow.proto,
};
attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
.src_addr = mask->ipv4_mask.src_ip,
@@ -3663,9 +2647,8 @@ mlx5_fdir_filter_add(struct rte_eth_dev *dev,
ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
if (ret)
return ret;
- flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
- attributes.items, attributes.actions,
- &error);
+ flow = flow_list_create(dev, &priv->flows, &attributes.attr,
+ attributes.items, attributes.actions, &error);
if (flow) {
DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
(void *)flow);
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
new file mode 100644
index 00000000..61299d66
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_flow.h
@@ -0,0 +1,375 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#ifndef RTE_PMD_MLX5_FLOW_H_
+#define RTE_PMD_MLX5_FLOW_H_
+
+#include <netinet/in.h>
+#include <sys/queue.h>
+#include <stdalign.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+/* Pattern outer Layer bits. */
+#define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
+#define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
+#define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
+#define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
+#define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
+#define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
+
+/* Pattern inner Layer bits. */
+#define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
+#define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
+#define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
+#define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
+#define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
+#define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
+
+/* Pattern tunnel Layer bits. */
+#define MLX5_FLOW_LAYER_VXLAN (1u << 12)
+#define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
+#define MLX5_FLOW_LAYER_GRE (1u << 14)
+#define MLX5_FLOW_LAYER_MPLS (1u << 15)
+
+/* General pattern items bits. */
+#define MLX5_FLOW_ITEM_METADATA (1u << 16)
+
+/* Outer Masks. */
+#define MLX5_FLOW_LAYER_OUTER_L3 \
+ (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
+#define MLX5_FLOW_LAYER_OUTER_L4 \
+ (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
+#define MLX5_FLOW_LAYER_OUTER \
+ (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
+ MLX5_FLOW_LAYER_OUTER_L4)
+
+/* Tunnel Masks. */
+#define MLX5_FLOW_LAYER_TUNNEL \
+ (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \
+ MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS)
+
+/* Inner Masks. */
+#define MLX5_FLOW_LAYER_INNER_L3 \
+ (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
+#define MLX5_FLOW_LAYER_INNER_L4 \
+ (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
+#define MLX5_FLOW_LAYER_INNER \
+ (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
+ MLX5_FLOW_LAYER_INNER_L4)
+
+/* Actions */
+#define MLX5_FLOW_ACTION_DROP (1u << 0)
+#define MLX5_FLOW_ACTION_QUEUE (1u << 1)
+#define MLX5_FLOW_ACTION_RSS (1u << 2)
+#define MLX5_FLOW_ACTION_FLAG (1u << 3)
+#define MLX5_FLOW_ACTION_MARK (1u << 4)
+#define MLX5_FLOW_ACTION_COUNT (1u << 5)
+#define MLX5_FLOW_ACTION_PORT_ID (1u << 6)
+#define MLX5_FLOW_ACTION_OF_POP_VLAN (1u << 7)
+#define MLX5_FLOW_ACTION_OF_PUSH_VLAN (1u << 8)
+#define MLX5_FLOW_ACTION_OF_SET_VLAN_VID (1u << 9)
+#define MLX5_FLOW_ACTION_OF_SET_VLAN_PCP (1u << 10)
+#define MLX5_FLOW_ACTION_SET_IPV4_SRC (1u << 11)
+#define MLX5_FLOW_ACTION_SET_IPV4_DST (1u << 12)
+#define MLX5_FLOW_ACTION_SET_IPV6_SRC (1u << 13)
+#define MLX5_FLOW_ACTION_SET_IPV6_DST (1u << 14)
+#define MLX5_FLOW_ACTION_SET_TP_SRC (1u << 15)
+#define MLX5_FLOW_ACTION_SET_TP_DST (1u << 16)
+#define MLX5_FLOW_ACTION_JUMP (1u << 17)
+#define MLX5_FLOW_ACTION_SET_TTL (1u << 18)
+#define MLX5_FLOW_ACTION_DEC_TTL (1u << 19)
+#define MLX5_FLOW_ACTION_SET_MAC_SRC (1u << 20)
+#define MLX5_FLOW_ACTION_SET_MAC_DST (1u << 21)
+
+#define MLX5_FLOW_FATE_ACTIONS \
+ (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS)
+
+#ifndef IPPROTO_MPLS
+#define IPPROTO_MPLS 137
+#endif
+
+/* UDP port numbers for VxLAN. */
+#define MLX5_UDP_PORT_VXLAN 4789
+#define MLX5_UDP_PORT_VXLAN_GPE 4790
+
+/* Priority reserved for default flows. */
+#define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
+
+/*
+ * Number of sub priorities.
+ * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
+ * matching on the NIC (firmware dependent) L4 most have the higher priority
+ * followed by L3 and ending with L2.
+ */
+#define MLX5_PRIORITY_MAP_L2 2
+#define MLX5_PRIORITY_MAP_L3 1
+#define MLX5_PRIORITY_MAP_L4 0
+#define MLX5_PRIORITY_MAP_MAX 3
+
+/* Valid layer type for IPV4 RSS. */
+#define MLX5_IPV4_LAYER_TYPES \
+ (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | \
+ ETH_RSS_NONFRAG_IPV4_TCP | ETH_RSS_NONFRAG_IPV4_UDP | \
+ ETH_RSS_NONFRAG_IPV4_OTHER)
+
+/* IBV hash source bits for IPV4. */
+#define MLX5_IPV4_IBV_RX_HASH (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4)
+
+/* Valid layer type for IPV6 RSS. */
+#define MLX5_IPV6_LAYER_TYPES \
+ (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_TCP | \
+ ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_EX | ETH_RSS_IPV6_TCP_EX | \
+ ETH_RSS_IPV6_UDP_EX | ETH_RSS_NONFRAG_IPV6_OTHER)
+
+/* IBV hash source bits for IPV6. */
+#define MLX5_IPV6_IBV_RX_HASH (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6)
+
+/* Max number of actions per DV flow. */
+#define MLX5_DV_MAX_NUMBER_OF_ACTIONS 8
+
+enum mlx5_flow_drv_type {
+ MLX5_FLOW_TYPE_MIN,
+ MLX5_FLOW_TYPE_DV,
+ MLX5_FLOW_TYPE_TCF,
+ MLX5_FLOW_TYPE_VERBS,
+ MLX5_FLOW_TYPE_MAX,
+};
+
+/* Matcher PRM representation */
+struct mlx5_flow_dv_match_params {
+ size_t size;
+ /**< Size of match value. Do NOT split size and key! */
+ uint32_t buf[MLX5_ST_SZ_DW(fte_match_param)];
+ /**< Matcher value. This value is used as the mask or as a key. */
+};
+
+#define MLX5_DV_MAX_NUMBER_OF_ACTIONS 8
+
+/* Matcher structure. */
+struct mlx5_flow_dv_matcher {
+ LIST_ENTRY(mlx5_flow_dv_matcher) next;
+ /* Pointer to the next element. */
+ rte_atomic32_t refcnt; /**< Reference counter. */
+ void *matcher_object; /**< Pointer to DV matcher */
+ uint16_t crc; /**< CRC of key. */
+ uint16_t priority; /**< Priority of matcher. */
+ uint8_t egress; /**< Egress matcher. */
+ struct mlx5_flow_dv_match_params mask; /**< Matcher mask. */
+};
+
+/* DV flows structure. */
+struct mlx5_flow_dv {
+ uint64_t hash_fields; /**< Fields that participate in the hash. */
+ struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
+ /* Flow DV api: */
+ struct mlx5_flow_dv_matcher *matcher; /**< Cache to matcher. */
+ struct mlx5_flow_dv_match_params value;
+ /**< Holds the value that the packet is compared to. */
+ struct ibv_flow *flow; /**< Installed flow. */
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ struct mlx5dv_flow_action_attr actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS];
+ /**< Action list. */
+#endif
+ int actions_n; /**< number of actions. */
+};
+
+/** Linux TC flower driver for E-Switch flow. */
+struct mlx5_flow_tcf {
+ struct nlmsghdr *nlh;
+ struct tcmsg *tcm;
+};
+
+/* Verbs specification header. */
+struct ibv_spec_header {
+ enum ibv_flow_spec_type type;
+ uint16_t size;
+};
+
+/** Handles information leading to a drop fate. */
+struct mlx5_flow_verbs {
+ LIST_ENTRY(mlx5_flow_verbs) next;
+ unsigned int size; /**< Size of the attribute. */
+ struct {
+ struct ibv_flow_attr *attr;
+ /**< Pointer to the Specification buffer. */
+ uint8_t *specs; /**< Pointer to the specifications. */
+ };
+ struct ibv_flow *flow; /**< Verbs flow pointer. */
+ struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
+ uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
+};
+
+/** Device flow structure. */
+struct mlx5_flow {
+ LIST_ENTRY(mlx5_flow) next;
+ struct rte_flow *flow; /**< Pointer to the main flow. */
+ uint64_t layers;
+ /**< Bit-fields of present layers, see MLX5_FLOW_LAYER_*. */
+ union {
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ struct mlx5_flow_dv dv;
+#endif
+ struct mlx5_flow_tcf tcf;
+ struct mlx5_flow_verbs verbs;
+ };
+};
+
+/* Counters information. */
+struct mlx5_flow_counter {
+ LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */
+ uint32_t shared:1; /**< Share counter ID with other flow rules. */
+ uint32_t ref_cnt:31; /**< Reference counter. */
+ uint32_t id; /**< Counter ID. */
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
+ struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
+#elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+ struct ibv_counters *cs; /**< Holds the counters for the rule. */
+#endif
+ uint64_t hits; /**< Number of packets matched by the rule. */
+ uint64_t bytes; /**< Number of bytes matched by the rule. */
+};
+
+/* Flow structure. */
+struct rte_flow {
+ TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
+ enum mlx5_flow_drv_type drv_type; /**< Drvier type. */
+ struct mlx5_flow_counter *counter; /**< Holds flow counter. */
+ struct rte_flow_action_rss rss;/**< RSS context. */
+ uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
+ uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
+ LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
+ /**< Device flows that are part of the flow. */
+ uint64_t actions;
+ /**< Bit-fields of detected actions, see MLX5_FLOW_ACTION_*. */
+};
+typedef int (*mlx5_flow_validate_t)(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error);
+typedef struct mlx5_flow *(*mlx5_flow_prepare_t)
+ (const struct rte_flow_attr *attr, const struct rte_flow_item items[],
+ const struct rte_flow_action actions[], uint64_t *item_flags,
+ uint64_t *action_flags, struct rte_flow_error *error);
+typedef int (*mlx5_flow_translate_t)(struct rte_eth_dev *dev,
+ struct mlx5_flow *dev_flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error);
+typedef int (*mlx5_flow_apply_t)(struct rte_eth_dev *dev, struct rte_flow *flow,
+ struct rte_flow_error *error);
+typedef void (*mlx5_flow_remove_t)(struct rte_eth_dev *dev,
+ struct rte_flow *flow);
+typedef void (*mlx5_flow_destroy_t)(struct rte_eth_dev *dev,
+ struct rte_flow *flow);
+typedef int (*mlx5_flow_query_t)(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_action *actions,
+ void *data,
+ struct rte_flow_error *error);
+struct mlx5_flow_driver_ops {
+ mlx5_flow_validate_t validate;
+ mlx5_flow_prepare_t prepare;
+ mlx5_flow_translate_t translate;
+ mlx5_flow_apply_t apply;
+ mlx5_flow_remove_t remove;
+ mlx5_flow_destroy_t destroy;
+ mlx5_flow_query_t query;
+};
+
+/* mlx5_flow.c */
+
+uint64_t mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow, int tunnel,
+ uint64_t layer_types,
+ uint64_t hash_fields);
+uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
+ uint32_t subpriority);
+int mlx5_flow_validate_action_count(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_action_drop(uint64_t action_flags,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_action_flag(uint64_t action_flags,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
+ uint64_t action_flags,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
+ uint64_t action_flags,
+ struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
+ uint64_t action_flags,
+ struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attributes,
+ struct rte_flow_error *error);
+int mlx5_flow_item_acceptable(const struct rte_flow_item *item,
+ const uint8_t *mask,
+ const uint8_t *nic_mask,
+ unsigned int size,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ uint8_t target_protocol,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
+ int64_t item_flags,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_item_mpls(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ uint8_t target_protocol,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ uint8_t target_protocol,
+ const struct rte_flow_item_tcp *flow_mask,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ uint8_t target_protocol,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
+ int64_t item_flags,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ struct rte_flow_error *error);
+int mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
+ uint64_t item_flags,
+ struct rte_eth_dev *dev,
+ struct rte_flow_error *error);
+
+/* mlx5_flow_tcf.c */
+
+int mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
+ unsigned int ifindex, struct rte_flow_error *error);
+struct mlx5_flow_tcf_context *mlx5_flow_tcf_context_create(void);
+void mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx);
+
+#endif /* RTE_PMD_MLX5_FLOW_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
new file mode 100644
index 00000000..8f729f44
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_flow_dv.c
@@ -0,0 +1,1492 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <sys/queue.h>
+#include <stdalign.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_common.h>
+#include <rte_ether.h>
+#include <rte_eth_ctrl.h>
+#include <rte_ethdev_driver.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_malloc.h>
+#include <rte_ip.h>
+
+#include "mlx5.h"
+#include "mlx5_defs.h"
+#include "mlx5_prm.h"
+#include "mlx5_glue.h"
+#include "mlx5_flow.h"
+
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+
+/**
+ * Validate META item.
+ *
+ * @param[in] dev
+ * Pointer to the rte_eth_dev structure.
+ * @param[in] item
+ * Item specification.
+ * @param[in] attr
+ * Attributes of flow that includes this item.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_item_meta(struct rte_eth_dev *dev,
+ const struct rte_flow_item *item,
+ const struct rte_flow_attr *attr,
+ struct rte_flow_error *error)
+{
+ const struct rte_flow_item_meta *spec = item->spec;
+ const struct rte_flow_item_meta *mask = item->mask;
+ const struct rte_flow_item_meta nic_mask = {
+ .data = RTE_BE32(UINT32_MAX)
+ };
+ int ret;
+ uint64_t offloads = dev->data->dev_conf.txmode.offloads;
+
+ if (!(offloads & DEV_TX_OFFLOAD_MATCH_METADATA))
+ return rte_flow_error_set(error, EPERM,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ NULL,
+ "match on metadata offload "
+ "configuration is off for this port");
+ if (!spec)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+ item->spec,
+ "data cannot be empty");
+ if (!spec->data)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+ NULL,
+ "data cannot be zero");
+ if (!mask)
+ mask = &rte_flow_item_meta_mask;
+ ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+ (const uint8_t *)&nic_mask,
+ sizeof(struct rte_flow_item_meta),
+ error);
+ if (ret < 0)
+ return ret;
+ if (attr->ingress)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+ NULL,
+ "pattern not supported for ingress");
+ return 0;
+}
+
+/**
+ * Verify the @p attributes will be correctly understood by the NIC and store
+ * them in the @p flow if everything is correct.
+ *
+ * @param[in] dev
+ * Pointer to dev struct.
+ * @param[in] attributes
+ * Pointer to flow attributes
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_attributes(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attributes,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+ uint32_t priority_max = priv->config.flow_prio - 1;
+
+ if (attributes->group)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+ NULL,
+ "groups is not supported");
+ if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
+ attributes->priority >= priority_max)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+ NULL,
+ "priority out of range");
+ if (attributes->transfer)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
+ NULL,
+ "transfer is not supported");
+ if (!(attributes->egress ^ attributes->ingress))
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR, NULL,
+ "must specify exactly one of "
+ "ingress or egress");
+ return 0;
+}
+
+/**
+ * Internal validation function. For validating both actions and items.
+ *
+ * @param[in] dev
+ * Pointer to the rte_eth_dev structure.
+ * @param[in] attr
+ * Pointer to the flow attributes.
+ * @param[in] items
+ * Pointer to the list of items.
+ * @param[in] actions
+ * Pointer to the list of actions.
+ * @param[out] error
+ * Pointer to the error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ int ret;
+ uint64_t action_flags = 0;
+ uint64_t item_flags = 0;
+ int tunnel = 0;
+ uint8_t next_protocol = 0xff;
+ int actions_n = 0;
+
+ if (items == NULL)
+ return -1;
+ ret = flow_dv_validate_attributes(dev, attr, error);
+ if (ret < 0)
+ return ret;
+ for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+ tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+ switch (items->type) {
+ case RTE_FLOW_ITEM_TYPE_VOID:
+ break;
+ case RTE_FLOW_ITEM_TYPE_ETH:
+ ret = mlx5_flow_validate_item_eth(items, item_flags,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+ MLX5_FLOW_LAYER_OUTER_L2;
+ break;
+ case RTE_FLOW_ITEM_TYPE_VLAN:
+ ret = mlx5_flow_validate_item_vlan(items, item_flags,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
+ MLX5_FLOW_LAYER_OUTER_VLAN;
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV4:
+ ret = mlx5_flow_validate_item_ipv4(items, item_flags,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+ MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+ if (items->mask != NULL &&
+ ((const struct rte_flow_item_ipv4 *)
+ items->mask)->hdr.next_proto_id)
+ next_protocol =
+ ((const struct rte_flow_item_ipv4 *)
+ (items->spec))->hdr.next_proto_id;
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV6:
+ ret = mlx5_flow_validate_item_ipv6(items, item_flags,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+ MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+ if (items->mask != NULL &&
+ ((const struct rte_flow_item_ipv6 *)
+ items->mask)->hdr.proto)
+ next_protocol =
+ ((const struct rte_flow_item_ipv6 *)
+ items->spec)->hdr.proto;
+ break;
+ case RTE_FLOW_ITEM_TYPE_TCP:
+ ret = mlx5_flow_validate_item_tcp
+ (items, item_flags,
+ next_protocol,
+ &rte_flow_item_tcp_mask,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
+ MLX5_FLOW_LAYER_OUTER_L4_TCP;
+ break;
+ case RTE_FLOW_ITEM_TYPE_UDP:
+ ret = mlx5_flow_validate_item_udp(items, item_flags,
+ next_protocol,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
+ MLX5_FLOW_LAYER_OUTER_L4_UDP;
+ break;
+ case RTE_FLOW_ITEM_TYPE_GRE:
+ case RTE_FLOW_ITEM_TYPE_NVGRE:
+ ret = mlx5_flow_validate_item_gre(items, item_flags,
+ next_protocol, error);
+ if (ret < 0)
+ return ret;
+ item_flags |= MLX5_FLOW_LAYER_GRE;
+ break;
+ case RTE_FLOW_ITEM_TYPE_VXLAN:
+ ret = mlx5_flow_validate_item_vxlan(items, item_flags,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= MLX5_FLOW_LAYER_VXLAN;
+ break;
+ case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+ ret = mlx5_flow_validate_item_vxlan_gpe(items,
+ item_flags, dev,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
+ break;
+ case RTE_FLOW_ITEM_TYPE_META:
+ ret = flow_dv_validate_item_meta(dev, items, attr,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= MLX5_FLOW_ITEM_METADATA;
+ break;
+ default:
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ NULL, "item not supported");
+ }
+ }
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+ if (actions_n == MLX5_DV_MAX_NUMBER_OF_ACTIONS)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions, "too many actions");
+ switch (actions->type) {
+ case RTE_FLOW_ACTION_TYPE_VOID:
+ break;
+ case RTE_FLOW_ACTION_TYPE_FLAG:
+ ret = mlx5_flow_validate_action_flag(action_flags,
+ attr, error);
+ if (ret < 0)
+ return ret;
+ action_flags |= MLX5_FLOW_ACTION_FLAG;
+ ++actions_n;
+ break;
+ case RTE_FLOW_ACTION_TYPE_MARK:
+ ret = mlx5_flow_validate_action_mark(actions,
+ action_flags,
+ attr, error);
+ if (ret < 0)
+ return ret;
+ action_flags |= MLX5_FLOW_ACTION_MARK;
+ ++actions_n;
+ break;
+ case RTE_FLOW_ACTION_TYPE_DROP:
+ ret = mlx5_flow_validate_action_drop(action_flags,
+ attr, error);
+ if (ret < 0)
+ return ret;
+ action_flags |= MLX5_FLOW_ACTION_DROP;
+ ++actions_n;
+ break;
+ case RTE_FLOW_ACTION_TYPE_QUEUE:
+ ret = mlx5_flow_validate_action_queue(actions,
+ action_flags, dev,
+ attr, error);
+ if (ret < 0)
+ return ret;
+ action_flags |= MLX5_FLOW_ACTION_QUEUE;
+ ++actions_n;
+ break;
+ case RTE_FLOW_ACTION_TYPE_RSS:
+ ret = mlx5_flow_validate_action_rss(actions,
+ action_flags, dev,
+ attr, error);
+ if (ret < 0)
+ return ret;
+ action_flags |= MLX5_FLOW_ACTION_RSS;
+ ++actions_n;
+ break;
+ case RTE_FLOW_ACTION_TYPE_COUNT:
+ ret = mlx5_flow_validate_action_count(dev, attr, error);
+ if (ret < 0)
+ return ret;
+ action_flags |= MLX5_FLOW_ACTION_COUNT;
+ ++actions_n;
+ break;
+ default:
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "action not supported");
+ }
+ }
+ if (!(action_flags & MLX5_FLOW_FATE_ACTIONS) && attr->ingress)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, actions,
+ "no fate action is found");
+ return 0;
+}
+
+/**
+ * Internal preparation function. Allocates the DV flow size,
+ * this size is constant.
+ *
+ * @param[in] attr
+ * Pointer to the flow attributes.
+ * @param[in] items
+ * Pointer to the list of items.
+ * @param[in] actions
+ * Pointer to the list of actions.
+ * @param[out] item_flags
+ * Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ * Pointer to bit mask of all actions detected.
+ * @param[out] error
+ * Pointer to the error structure.
+ *
+ * @return
+ * Pointer to mlx5_flow object on success,
+ * otherwise NULL and rte_ernno is set.
+ */
+static struct mlx5_flow *
+flow_dv_prepare(const struct rte_flow_attr *attr __rte_unused,
+ const struct rte_flow_item items[] __rte_unused,
+ const struct rte_flow_action actions[] __rte_unused,
+ uint64_t *item_flags __rte_unused,
+ uint64_t *action_flags __rte_unused,
+ struct rte_flow_error *error)
+{
+ uint32_t size = sizeof(struct mlx5_flow);
+ struct mlx5_flow *flow;
+
+ flow = rte_calloc(__func__, 1, size, 0);
+ if (!flow) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "not enough memory to create flow");
+ return NULL;
+ }
+ flow->dv.value.size = MLX5_ST_SZ_DB(fte_match_param);
+ return flow;
+}
+
+/**
+ * Add Ethernet item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ * Flow matcher.
+ * @param[in, out] key
+ * Flow matcher value.
+ * @param[in] item
+ * Flow pattern to translate.
+ * @param[in] inner
+ * Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_eth(void *matcher, void *key,
+ const struct rte_flow_item *item, int inner)
+{
+ const struct rte_flow_item_eth *eth_m = item->mask;
+ const struct rte_flow_item_eth *eth_v = item->spec;
+ const struct rte_flow_item_eth nic_mask = {
+ .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ .type = RTE_BE16(0xffff),
+ };
+ void *headers_m;
+ void *headers_v;
+ char *l24_v;
+ unsigned int i;
+
+ if (!eth_v)
+ return;
+ if (!eth_m)
+ eth_m = &nic_mask;
+ if (inner) {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+ } else {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+ }
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m, dmac_47_16),
+ &eth_m->dst, sizeof(eth_m->dst));
+ /* The value must be in the range of the mask. */
+ l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dmac_47_16);
+ for (i = 0; i < sizeof(eth_m->dst); ++i)
+ l24_v[i] = eth_m->dst.addr_bytes[i] & eth_v->dst.addr_bytes[i];
+ memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m, smac_47_16),
+ &eth_m->src, sizeof(eth_m->src));
+ l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, smac_47_16);
+ /* The value must be in the range of the mask. */
+ for (i = 0; i < sizeof(eth_m->dst); ++i)
+ l24_v[i] = eth_m->src.addr_bytes[i] & eth_v->src.addr_bytes[i];
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, ethertype,
+ rte_be_to_cpu_16(eth_m->type));
+ l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, ethertype);
+ *(uint16_t *)(l24_v) = eth_m->type & eth_v->type;
+}
+
+/**
+ * Add VLAN item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ * Flow matcher.
+ * @param[in, out] key
+ * Flow matcher value.
+ * @param[in] item
+ * Flow pattern to translate.
+ * @param[in] inner
+ * Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_vlan(void *matcher, void *key,
+ const struct rte_flow_item *item,
+ int inner)
+{
+ const struct rte_flow_item_vlan *vlan_m = item->mask;
+ const struct rte_flow_item_vlan *vlan_v = item->spec;
+ const struct rte_flow_item_vlan nic_mask = {
+ .tci = RTE_BE16(0x0fff),
+ .inner_type = RTE_BE16(0xffff),
+ };
+ void *headers_m;
+ void *headers_v;
+ uint16_t tci_m;
+ uint16_t tci_v;
+
+ if (!vlan_v)
+ return;
+ if (!vlan_m)
+ vlan_m = &nic_mask;
+ if (inner) {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+ } else {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+ }
+ tci_m = rte_be_to_cpu_16(vlan_m->tci);
+ tci_v = rte_be_to_cpu_16(vlan_m->tci & vlan_v->tci);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, cvlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_vid, tci_m);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, tci_v);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_cfi, tci_m >> 12);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_cfi, tci_v >> 12);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_prio, tci_m >> 13);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, tci_v >> 13);
+}
+
+/**
+ * Add IPV4 item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ * Flow matcher.
+ * @param[in, out] key
+ * Flow matcher value.
+ * @param[in] item
+ * Flow pattern to translate.
+ * @param[in] inner
+ * Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_ipv4(void *matcher, void *key,
+ const struct rte_flow_item *item,
+ int inner)
+{
+ const struct rte_flow_item_ipv4 *ipv4_m = item->mask;
+ const struct rte_flow_item_ipv4 *ipv4_v = item->spec;
+ const struct rte_flow_item_ipv4 nic_mask = {
+ .hdr = {
+ .src_addr = RTE_BE32(0xffffffff),
+ .dst_addr = RTE_BE32(0xffffffff),
+ .type_of_service = 0xff,
+ .next_proto_id = 0xff,
+ },
+ };
+ void *headers_m;
+ void *headers_v;
+ char *l24_m;
+ char *l24_v;
+ uint8_t tos;
+
+ if (inner) {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+ } else {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+ }
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 4);
+ if (!ipv4_v)
+ return;
+ if (!ipv4_m)
+ ipv4_m = &nic_mask;
+ l24_m = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+ *(uint32_t *)l24_m = ipv4_m->hdr.dst_addr;
+ *(uint32_t *)l24_v = ipv4_m->hdr.dst_addr & ipv4_v->hdr.dst_addr;
+ l24_m = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv4_layout.ipv4);
+ *(uint32_t *)l24_m = ipv4_m->hdr.src_addr;
+ *(uint32_t *)l24_v = ipv4_m->hdr.src_addr & ipv4_v->hdr.src_addr;
+ tos = ipv4_m->hdr.type_of_service & ipv4_v->hdr.type_of_service;
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_ecn,
+ ipv4_m->hdr.type_of_service);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, tos);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_dscp,
+ ipv4_m->hdr.type_of_service >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, tos >> 2);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol,
+ ipv4_m->hdr.next_proto_id);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ ipv4_v->hdr.next_proto_id & ipv4_m->hdr.next_proto_id);
+}
+
+/**
+ * Add IPV6 item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ * Flow matcher.
+ * @param[in, out] key
+ * Flow matcher value.
+ * @param[in] item
+ * Flow pattern to translate.
+ * @param[in] inner
+ * Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_ipv6(void *matcher, void *key,
+ const struct rte_flow_item *item,
+ int inner)
+{
+ const struct rte_flow_item_ipv6 *ipv6_m = item->mask;
+ const struct rte_flow_item_ipv6 *ipv6_v = item->spec;
+ const struct rte_flow_item_ipv6 nic_mask = {
+ .hdr = {
+ .src_addr =
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff",
+ .dst_addr =
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff",
+ .vtc_flow = RTE_BE32(0xffffffff),
+ .proto = 0xff,
+ .hop_limits = 0xff,
+ },
+ };
+ void *headers_m;
+ void *headers_v;
+ void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
+ char *l24_m;
+ char *l24_v;
+ uint32_t vtc_m;
+ uint32_t vtc_v;
+ int i;
+ int size;
+
+ if (inner) {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+ } else {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+ }
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0xf);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 6);
+ if (!ipv6_v)
+ return;
+ if (!ipv6_m)
+ ipv6_m = &nic_mask;
+ size = sizeof(ipv6_m->hdr.dst_addr);
+ l24_m = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+ l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+ memcpy(l24_m, ipv6_m->hdr.dst_addr, size);
+ for (i = 0; i < size; ++i)
+ l24_v[i] = l24_m[i] & ipv6_v->hdr.dst_addr[i];
+ l24_m = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6);
+ l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+ src_ipv4_src_ipv6.ipv6_layout.ipv6);
+ memcpy(l24_m, ipv6_m->hdr.src_addr, size);
+ for (i = 0; i < size; ++i)
+ l24_v[i] = l24_m[i] & ipv6_v->hdr.src_addr[i];
+ /* TOS. */
+ vtc_m = rte_be_to_cpu_32(ipv6_m->hdr.vtc_flow);
+ vtc_v = rte_be_to_cpu_32(ipv6_m->hdr.vtc_flow & ipv6_v->hdr.vtc_flow);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_ecn, vtc_m >> 20);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, vtc_v >> 20);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_dscp, vtc_m >> 22);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, vtc_v >> 22);
+ /* Label. */
+ if (inner) {
+ MLX5_SET(fte_match_set_misc, misc_m, inner_ipv6_flow_label,
+ vtc_m);
+ MLX5_SET(fte_match_set_misc, misc_v, inner_ipv6_flow_label,
+ vtc_v);
+ } else {
+ MLX5_SET(fte_match_set_misc, misc_m, outer_ipv6_flow_label,
+ vtc_m);
+ MLX5_SET(fte_match_set_misc, misc_v, outer_ipv6_flow_label,
+ vtc_v);
+ }
+ /* Protocol. */
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol,
+ ipv6_m->hdr.proto);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+ ipv6_v->hdr.proto & ipv6_m->hdr.proto);
+}
+
+/**
+ * Add TCP item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ * Flow matcher.
+ * @param[in, out] key
+ * Flow matcher value.
+ * @param[in] item
+ * Flow pattern to translate.
+ * @param[in] inner
+ * Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_tcp(void *matcher, void *key,
+ const struct rte_flow_item *item,
+ int inner)
+{
+ const struct rte_flow_item_tcp *tcp_m = item->mask;
+ const struct rte_flow_item_tcp *tcp_v = item->spec;
+ void *headers_m;
+ void *headers_v;
+
+ if (inner) {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+ } else {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+ }
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol, 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_TCP);
+ if (!tcp_v)
+ return;
+ if (!tcp_m)
+ tcp_m = &rte_flow_item_tcp_mask;
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, tcp_sport,
+ rte_be_to_cpu_16(tcp_m->hdr.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
+ rte_be_to_cpu_16(tcp_v->hdr.src_port & tcp_m->hdr.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, tcp_dport,
+ rte_be_to_cpu_16(tcp_m->hdr.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
+ rte_be_to_cpu_16(tcp_v->hdr.dst_port & tcp_m->hdr.dst_port));
+}
+
+/**
+ * Add UDP item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ * Flow matcher.
+ * @param[in, out] key
+ * Flow matcher value.
+ * @param[in] item
+ * Flow pattern to translate.
+ * @param[in] inner
+ * Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_udp(void *matcher, void *key,
+ const struct rte_flow_item *item,
+ int inner)
+{
+ const struct rte_flow_item_udp *udp_m = item->mask;
+ const struct rte_flow_item_udp *udp_v = item->spec;
+ void *headers_m;
+ void *headers_v;
+
+ if (inner) {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+ } else {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+ }
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol, 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+ if (!udp_v)
+ return;
+ if (!udp_m)
+ udp_m = &rte_flow_item_udp_mask;
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_sport,
+ rte_be_to_cpu_16(udp_m->hdr.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
+ rte_be_to_cpu_16(udp_v->hdr.src_port & udp_m->hdr.src_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_dport,
+ rte_be_to_cpu_16(udp_m->hdr.dst_port));
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
+ rte_be_to_cpu_16(udp_v->hdr.dst_port & udp_m->hdr.dst_port));
+}
+
+/**
+ * Add GRE item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ * Flow matcher.
+ * @param[in, out] key
+ * Flow matcher value.
+ * @param[in] item
+ * Flow pattern to translate.
+ * @param[in] inner
+ * Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_gre(void *matcher, void *key,
+ const struct rte_flow_item *item,
+ int inner)
+{
+ const struct rte_flow_item_gre *gre_m = item->mask;
+ const struct rte_flow_item_gre *gre_v = item->spec;
+ void *headers_m;
+ void *headers_v;
+ void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
+
+ if (inner) {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+ } else {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+ }
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol, 0xff);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE);
+ if (!gre_v)
+ return;
+ if (!gre_m)
+ gre_m = &rte_flow_item_gre_mask;
+ MLX5_SET(fte_match_set_misc, misc_m, gre_protocol,
+ rte_be_to_cpu_16(gre_m->protocol));
+ MLX5_SET(fte_match_set_misc, misc_v, gre_protocol,
+ rte_be_to_cpu_16(gre_v->protocol & gre_m->protocol));
+}
+
+/**
+ * Add NVGRE item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ * Flow matcher.
+ * @param[in, out] key
+ * Flow matcher value.
+ * @param[in] item
+ * Flow pattern to translate.
+ * @param[in] inner
+ * Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_nvgre(void *matcher, void *key,
+ const struct rte_flow_item *item,
+ int inner)
+{
+ const struct rte_flow_item_nvgre *nvgre_m = item->mask;
+ const struct rte_flow_item_nvgre *nvgre_v = item->spec;
+ void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
+ const char *tni_flow_id_m = (const char *)nvgre_m->tni;
+ const char *tni_flow_id_v = (const char *)nvgre_v->tni;
+ char *gre_key_m;
+ char *gre_key_v;
+ int size;
+ int i;
+
+ flow_dv_translate_item_gre(matcher, key, item, inner);
+ if (!nvgre_v)
+ return;
+ if (!nvgre_m)
+ nvgre_m = &rte_flow_item_nvgre_mask;
+ size = sizeof(nvgre_m->tni) + sizeof(nvgre_m->flow_id);
+ gre_key_m = MLX5_ADDR_OF(fte_match_set_misc, misc_m, gre_key_h);
+ gre_key_v = MLX5_ADDR_OF(fte_match_set_misc, misc_v, gre_key_h);
+ memcpy(gre_key_m, tni_flow_id_m, size);
+ for (i = 0; i < size; ++i)
+ gre_key_v[i] = gre_key_m[i] & tni_flow_id_v[i];
+}
+
+/**
+ * Add VXLAN item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ * Flow matcher.
+ * @param[in, out] key
+ * Flow matcher value.
+ * @param[in] item
+ * Flow pattern to translate.
+ * @param[in] inner
+ * Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_vxlan(void *matcher, void *key,
+ const struct rte_flow_item *item,
+ int inner)
+{
+ const struct rte_flow_item_vxlan *vxlan_m = item->mask;
+ const struct rte_flow_item_vxlan *vxlan_v = item->spec;
+ void *headers_m;
+ void *headers_v;
+ void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters);
+ void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
+ char *vni_m;
+ char *vni_v;
+ uint16_t dport;
+ int size;
+ int i;
+
+ if (inner) {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ inner_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+ } else {
+ headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+ outer_headers);
+ headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+ }
+ dport = item->type == RTE_FLOW_ITEM_TYPE_VXLAN ?
+ MLX5_UDP_PORT_VXLAN : MLX5_UDP_PORT_VXLAN_GPE;
+ if (!MLX5_GET16(fte_match_set_lyr_2_4, headers_v, udp_dport)) {
+ MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_dport, 0xFFFF);
+ MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, dport);
+ }
+ if (!vxlan_v)
+ return;
+ if (!vxlan_m)
+ vxlan_m = &rte_flow_item_vxlan_mask;
+ size = sizeof(vxlan_m->vni);
+ vni_m = MLX5_ADDR_OF(fte_match_set_misc, misc_m, vxlan_vni);
+ vni_v = MLX5_ADDR_OF(fte_match_set_misc, misc_v, vxlan_vni);
+ memcpy(vni_m, vxlan_m->vni, size);
+ for (i = 0; i < size; ++i)
+ vni_v[i] = vni_m[i] & vxlan_v->vni[i];
+}
+
+/**
+ * Add META item to matcher
+ *
+ * @param[in, out] matcher
+ * Flow matcher.
+ * @param[in, out] key
+ * Flow matcher value.
+ * @param[in] item
+ * Flow pattern to translate.
+ * @param[in] inner
+ * Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_meta(void *matcher, void *key,
+ const struct rte_flow_item *item)
+{
+ const struct rte_flow_item_meta *meta_m;
+ const struct rte_flow_item_meta *meta_v;
+ void *misc2_m =
+ MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters_2);
+ void *misc2_v =
+ MLX5_ADDR_OF(fte_match_param, key, misc_parameters_2);
+
+ meta_m = (const void *)item->mask;
+ if (!meta_m)
+ meta_m = &rte_flow_item_meta_mask;
+ meta_v = (const void *)item->spec;
+ if (meta_v) {
+ MLX5_SET(fte_match_set_misc2, misc2_m, metadata_reg_a,
+ rte_be_to_cpu_32(meta_m->data));
+ MLX5_SET(fte_match_set_misc2, misc2_v, metadata_reg_a,
+ rte_be_to_cpu_32(meta_v->data & meta_m->data));
+ }
+}
+
+/**
+ * Update the matcher and the value based the selected item.
+ *
+ * @param[in, out] matcher
+ * Flow matcher.
+ * @param[in, out] key
+ * Flow matcher value.
+ * @param[in] item
+ * Flow pattern to translate.
+ * @param[in, out] dev_flow
+ * Pointer to the mlx5_flow.
+ * @param[in] inner
+ * Item is inner pattern.
+ */
+static void
+flow_dv_create_item(void *matcher, void *key,
+ const struct rte_flow_item *item,
+ struct mlx5_flow *dev_flow,
+ int inner)
+{
+ struct mlx5_flow_dv_matcher *tmatcher = matcher;
+
+ switch (item->type) {
+ case RTE_FLOW_ITEM_TYPE_ETH:
+ flow_dv_translate_item_eth(tmatcher->mask.buf, key, item,
+ inner);
+ tmatcher->priority = MLX5_PRIORITY_MAP_L2;
+ break;
+ case RTE_FLOW_ITEM_TYPE_VLAN:
+ flow_dv_translate_item_vlan(tmatcher->mask.buf, key, item,
+ inner);
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV4:
+ flow_dv_translate_item_ipv4(tmatcher->mask.buf, key, item,
+ inner);
+ tmatcher->priority = MLX5_PRIORITY_MAP_L3;
+ dev_flow->dv.hash_fields |=
+ mlx5_flow_hashfields_adjust(dev_flow, inner,
+ MLX5_IPV4_LAYER_TYPES,
+ MLX5_IPV4_IBV_RX_HASH);
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV6:
+ flow_dv_translate_item_ipv6(tmatcher->mask.buf, key, item,
+ inner);
+ tmatcher->priority = MLX5_PRIORITY_MAP_L3;
+ dev_flow->dv.hash_fields |=
+ mlx5_flow_hashfields_adjust(dev_flow, inner,
+ MLX5_IPV6_LAYER_TYPES,
+ MLX5_IPV6_IBV_RX_HASH);
+ break;
+ case RTE_FLOW_ITEM_TYPE_TCP:
+ flow_dv_translate_item_tcp(tmatcher->mask.buf, key, item,
+ inner);
+ tmatcher->priority = MLX5_PRIORITY_MAP_L4;
+ dev_flow->dv.hash_fields |=
+ mlx5_flow_hashfields_adjust(dev_flow, inner,
+ ETH_RSS_TCP,
+ (IBV_RX_HASH_SRC_PORT_TCP |
+ IBV_RX_HASH_DST_PORT_TCP));
+ break;
+ case RTE_FLOW_ITEM_TYPE_UDP:
+ flow_dv_translate_item_udp(tmatcher->mask.buf, key, item,
+ inner);
+ tmatcher->priority = MLX5_PRIORITY_MAP_L4;
+ dev_flow->verbs.hash_fields |=
+ mlx5_flow_hashfields_adjust(dev_flow, inner,
+ ETH_RSS_UDP,
+ (IBV_RX_HASH_SRC_PORT_UDP |
+ IBV_RX_HASH_DST_PORT_UDP));
+ break;
+ case RTE_FLOW_ITEM_TYPE_GRE:
+ flow_dv_translate_item_gre(tmatcher->mask.buf, key, item,
+ inner);
+ break;
+ case RTE_FLOW_ITEM_TYPE_NVGRE:
+ flow_dv_translate_item_nvgre(tmatcher->mask.buf, key, item,
+ inner);
+ break;
+ case RTE_FLOW_ITEM_TYPE_VXLAN:
+ case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+ flow_dv_translate_item_vxlan(tmatcher->mask.buf, key, item,
+ inner);
+ break;
+ case RTE_FLOW_ITEM_TYPE_META:
+ flow_dv_translate_item_meta(tmatcher->mask.buf, key, item);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * Store the requested actions in an array.
+ *
+ * @param[in] action
+ * Flow action to translate.
+ * @param[in, out] dev_flow
+ * Pointer to the mlx5_flow.
+ */
+static void
+flow_dv_create_action(const struct rte_flow_action *action,
+ struct mlx5_flow *dev_flow)
+{
+ const struct rte_flow_action_queue *queue;
+ const struct rte_flow_action_rss *rss;
+ int actions_n = dev_flow->dv.actions_n;
+ struct rte_flow *flow = dev_flow->flow;
+
+ switch (action->type) {
+ case RTE_FLOW_ACTION_TYPE_VOID:
+ break;
+ case RTE_FLOW_ACTION_TYPE_FLAG:
+ dev_flow->dv.actions[actions_n].type = MLX5DV_FLOW_ACTION_TAG;
+ dev_flow->dv.actions[actions_n].tag_value =
+ mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT);
+ actions_n++;
+ flow->actions |= MLX5_FLOW_ACTION_FLAG;
+ break;
+ case RTE_FLOW_ACTION_TYPE_MARK:
+ dev_flow->dv.actions[actions_n].type = MLX5DV_FLOW_ACTION_TAG;
+ dev_flow->dv.actions[actions_n].tag_value =
+ mlx5_flow_mark_set
+ (((const struct rte_flow_action_mark *)
+ (action->conf))->id);
+ flow->actions |= MLX5_FLOW_ACTION_MARK;
+ actions_n++;
+ break;
+ case RTE_FLOW_ACTION_TYPE_DROP:
+ dev_flow->dv.actions[actions_n].type = MLX5DV_FLOW_ACTION_DROP;
+ flow->actions |= MLX5_FLOW_ACTION_DROP;
+ break;
+ case RTE_FLOW_ACTION_TYPE_QUEUE:
+ queue = action->conf;
+ flow->rss.queue_num = 1;
+ (*flow->queue)[0] = queue->index;
+ flow->actions |= MLX5_FLOW_ACTION_QUEUE;
+ break;
+ case RTE_FLOW_ACTION_TYPE_RSS:
+ rss = action->conf;
+ if (flow->queue)
+ memcpy((*flow->queue), rss->queue,
+ rss->queue_num * sizeof(uint16_t));
+ flow->rss.queue_num = rss->queue_num;
+ memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
+ flow->rss.types = rss->types;
+ flow->rss.level = rss->level;
+ /* Added to array only in apply since we need the QP */
+ flow->actions |= MLX5_FLOW_ACTION_RSS;
+ break;
+ default:
+ break;
+ }
+ dev_flow->dv.actions_n = actions_n;
+}
+
+static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
+
+#define HEADER_IS_ZERO(match_criteria, headers) \
+ !(memcmp(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
+ matcher_zero, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
+
+/**
+ * Calculate flow matcher enable bitmap.
+ *
+ * @param match_criteria
+ * Pointer to flow matcher criteria.
+ *
+ * @return
+ * Bitmap of enabled fields.
+ */
+static uint8_t
+flow_dv_matcher_enable(uint32_t *match_criteria)
+{
+ uint8_t match_criteria_enable;
+
+ match_criteria_enable =
+ (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
+ MLX5_MATCH_CRITERIA_ENABLE_OUTER_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
+ MLX5_MATCH_CRITERIA_ENABLE_MISC_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
+ MLX5_MATCH_CRITERIA_ENABLE_INNER_BIT;
+ match_criteria_enable |=
+ (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
+ MLX5_MATCH_CRITERIA_ENABLE_MISC2_BIT;
+
+ return match_criteria_enable;
+}
+
+/**
+ * Register the flow matcher.
+ *
+ * @param dev[in, out]
+ * Pointer to rte_eth_dev structure.
+ * @param[in, out] matcher
+ * Pointer to flow matcher.
+ * @parm[in, out] dev_flow
+ * Pointer to the dev_flow.
+ * @param[out] error
+ * pointer to error structure.
+ *
+ * @return
+ * 0 on success otherwise -errno and errno is set.
+ */
+static int
+flow_dv_matcher_register(struct rte_eth_dev *dev,
+ struct mlx5_flow_dv_matcher *matcher,
+ struct mlx5_flow *dev_flow,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_flow_dv_matcher *cache_matcher;
+ struct mlx5dv_flow_matcher_attr dv_attr = {
+ .type = IBV_FLOW_ATTR_NORMAL,
+ .match_mask = (void *)&matcher->mask,
+ };
+
+ /* Lookup from cache. */
+ LIST_FOREACH(cache_matcher, &priv->matchers, next) {
+ if (matcher->crc == cache_matcher->crc &&
+ matcher->priority == cache_matcher->priority &&
+ matcher->egress == cache_matcher->egress &&
+ !memcmp((const void *)matcher->mask.buf,
+ (const void *)cache_matcher->mask.buf,
+ cache_matcher->mask.size)) {
+ DRV_LOG(DEBUG,
+ "priority %hd use %s matcher %p: refcnt %d++",
+ cache_matcher->priority,
+ cache_matcher->egress ? "tx" : "rx",
+ (void *)cache_matcher,
+ rte_atomic32_read(&cache_matcher->refcnt));
+ rte_atomic32_inc(&cache_matcher->refcnt);
+ dev_flow->dv.matcher = cache_matcher;
+ return 0;
+ }
+ }
+ /* Register new matcher. */
+ cache_matcher = rte_calloc(__func__, 1, sizeof(*cache_matcher), 0);
+ if (!cache_matcher)
+ return rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "cannot allocate matcher memory");
+ *cache_matcher = *matcher;
+ dv_attr.match_criteria_enable =
+ flow_dv_matcher_enable(cache_matcher->mask.buf);
+ dv_attr.priority = matcher->priority;
+ if (matcher->egress)
+ dv_attr.flags |= IBV_FLOW_ATTR_FLAGS_EGRESS;
+ cache_matcher->matcher_object =
+ mlx5_glue->dv_create_flow_matcher(priv->ctx, &dv_attr);
+ if (!cache_matcher->matcher_object)
+ return rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, "cannot create matcher");
+ rte_atomic32_inc(&cache_matcher->refcnt);
+ LIST_INSERT_HEAD(&priv->matchers, cache_matcher, next);
+ dev_flow->dv.matcher = cache_matcher;
+ DRV_LOG(DEBUG, "priority %hd new %s matcher %p: refcnt %d",
+ cache_matcher->priority,
+ cache_matcher->egress ? "tx" : "rx", (void *)cache_matcher,
+ rte_atomic32_read(&cache_matcher->refcnt));
+ return 0;
+}
+
+
+/**
+ * Fill the flow with DV spec.
+ *
+ * @param[in] dev
+ * Pointer to rte_eth_dev structure.
+ * @param[in, out] dev_flow
+ * Pointer to the sub flow.
+ * @param[in] attr
+ * Pointer to the flow attributes.
+ * @param[in] items
+ * Pointer to the list of items.
+ * @param[in] actions
+ * Pointer to the list of actions.
+ * @param[out] error
+ * Pointer to the error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_dv_translate(struct rte_eth_dev *dev,
+ struct mlx5_flow *dev_flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[] __rte_unused,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+ uint64_t priority = attr->priority;
+ struct mlx5_flow_dv_matcher matcher = {
+ .mask = {
+ .size = sizeof(matcher.mask.buf),
+ },
+ };
+ void *match_value = dev_flow->dv.value.buf;
+ int tunnel = 0;
+
+ if (priority == MLX5_FLOW_PRIO_RSVD)
+ priority = priv->config.flow_prio - 1;
+ for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+ tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+ flow_dv_create_item(&matcher, match_value, items, dev_flow,
+ tunnel);
+ }
+ matcher.crc = rte_raw_cksum((const void *)matcher.mask.buf,
+ matcher.mask.size);
+ if (priority == MLX5_FLOW_PRIO_RSVD)
+ priority = priv->config.flow_prio - 1;
+ matcher.priority = mlx5_flow_adjust_priority(dev, priority,
+ matcher.priority);
+ matcher.egress = attr->egress;
+ if (flow_dv_matcher_register(dev, &matcher, dev_flow, error))
+ return -rte_errno;
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
+ flow_dv_create_action(actions, dev_flow);
+ return 0;
+}
+
+/**
+ * Apply the flow to the NIC.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ * Pointer to flow structure.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+ struct rte_flow_error *error)
+{
+ struct mlx5_flow_dv *dv;
+ struct mlx5_flow *dev_flow;
+ int n;
+ int err;
+
+ LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+ dv = &dev_flow->dv;
+ n = dv->actions_n;
+ if (flow->actions & MLX5_FLOW_ACTION_DROP) {
+ dv->hrxq = mlx5_hrxq_drop_new(dev);
+ if (!dv->hrxq) {
+ rte_flow_error_set
+ (error, errno,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "cannot get drop hash queue");
+ goto error;
+ }
+ dv->actions[n].type = MLX5DV_FLOW_ACTION_DEST_IBV_QP;
+ dv->actions[n].qp = dv->hrxq->qp;
+ n++;
+ } else if (flow->actions &
+ (MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS)) {
+ struct mlx5_hrxq *hrxq;
+ hrxq = mlx5_hrxq_get(dev, flow->key,
+ MLX5_RSS_HASH_KEY_LEN,
+ dv->hash_fields,
+ (*flow->queue),
+ flow->rss.queue_num);
+ if (!hrxq)
+ hrxq = mlx5_hrxq_new
+ (dev, flow->key, MLX5_RSS_HASH_KEY_LEN,
+ dv->hash_fields, (*flow->queue),
+ flow->rss.queue_num,
+ !!(dev_flow->layers &
+ MLX5_FLOW_LAYER_TUNNEL));
+ if (!hrxq) {
+ rte_flow_error_set
+ (error, rte_errno,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "cannot get hash queue");
+ goto error;
+ }
+ dv->hrxq = hrxq;
+ dv->actions[n].type = MLX5DV_FLOW_ACTION_DEST_IBV_QP;
+ dv->actions[n].qp = hrxq->qp;
+ n++;
+ }
+ dv->flow =
+ mlx5_glue->dv_create_flow(dv->matcher->matcher_object,
+ (void *)&dv->value, n,
+ dv->actions);
+ if (!dv->flow) {
+ rte_flow_error_set(error, errno,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "hardware refuses to create flow");
+ goto error;
+ }
+ }
+ return 0;
+error:
+ err = rte_errno; /* Save rte_errno before cleanup. */
+ LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+ struct mlx5_flow_dv *dv = &dev_flow->dv;
+ if (dv->hrxq) {
+ if (flow->actions & MLX5_FLOW_ACTION_DROP)
+ mlx5_hrxq_drop_release(dev);
+ else
+ mlx5_hrxq_release(dev, dv->hrxq);
+ dv->hrxq = NULL;
+ }
+ }
+ rte_errno = err; /* Restore rte_errno. */
+ return -rte_errno;
+}
+
+/**
+ * Release the flow matcher.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param flow
+ * Pointer to mlx5_flow.
+ *
+ * @return
+ * 1 while a reference on it exists, 0 when freed.
+ */
+static int
+flow_dv_matcher_release(struct rte_eth_dev *dev,
+ struct mlx5_flow *flow)
+{
+ struct mlx5_flow_dv_matcher *matcher = flow->dv.matcher;
+
+ assert(matcher->matcher_object);
+ DRV_LOG(DEBUG, "port %u matcher %p: refcnt %d--",
+ dev->data->port_id, (void *)matcher,
+ rte_atomic32_read(&matcher->refcnt));
+ if (rte_atomic32_dec_and_test(&matcher->refcnt)) {
+ claim_zero(mlx5_glue->dv_destroy_flow_matcher
+ (matcher->matcher_object));
+ LIST_REMOVE(matcher, next);
+ rte_free(matcher);
+ DRV_LOG(DEBUG, "port %u matcher %p: removed",
+ dev->data->port_id, (void *)matcher);
+ return 0;
+ }
+ return 1;
+}
+
+/**
+ * Remove the flow from the NIC but keeps it in memory.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in, out] flow
+ * Pointer to flow structure.
+ */
+static void
+flow_dv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+ struct mlx5_flow_dv *dv;
+ struct mlx5_flow *dev_flow;
+
+ if (!flow)
+ return;
+ LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+ dv = &dev_flow->dv;
+ if (dv->flow) {
+ claim_zero(mlx5_glue->destroy_flow(dv->flow));
+ dv->flow = NULL;
+ }
+ if (dv->hrxq) {
+ if (flow->actions & MLX5_FLOW_ACTION_DROP)
+ mlx5_hrxq_drop_release(dev);
+ else
+ mlx5_hrxq_release(dev, dv->hrxq);
+ dv->hrxq = NULL;
+ }
+ }
+ if (flow->counter)
+ flow->counter = NULL;
+}
+
+/**
+ * Remove the flow from the NIC and the memory.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ * Pointer to flow structure.
+ */
+static void
+flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+ struct mlx5_flow *dev_flow;
+
+ if (!flow)
+ return;
+ flow_dv_remove(dev, flow);
+ while (!LIST_EMPTY(&flow->dev_flows)) {
+ dev_flow = LIST_FIRST(&flow->dev_flows);
+ LIST_REMOVE(dev_flow, next);
+ if (dev_flow->dv.matcher)
+ flow_dv_matcher_release(dev, dev_flow);
+ rte_free(dev_flow);
+ }
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_dv_query(struct rte_eth_dev *dev __rte_unused,
+ struct rte_flow *flow __rte_unused,
+ const struct rte_flow_action *actions __rte_unused,
+ void *data __rte_unused,
+ struct rte_flow_error *error __rte_unused)
+{
+ rte_errno = ENOTSUP;
+ return -rte_errno;
+}
+
+
+const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = {
+ .validate = flow_dv_validate,
+ .prepare = flow_dv_prepare,
+ .translate = flow_dv_translate,
+ .apply = flow_dv_apply,
+ .remove = flow_dv_remove,
+ .destroy = flow_dv_destroy,
+ .query = flow_dv_query,
+};
+
+#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
new file mode 100644
index 00000000..719fb106
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_flow_tcf.c
@@ -0,0 +1,2913 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 6WIND S.A.
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <libmnl/libmnl.h>
+#include <linux/gen_stats.h>
+#include <linux/if_ether.h>
+#include <linux/netlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <linux/tc_act/tc_gact.h>
+#include <linux/tc_act/tc_mirred.h>
+#include <netinet/in.h>
+#include <stdalign.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+
+#include <rte_byteorder.h>
+#include <rte_errno.h>
+#include <rte_ether.h>
+#include <rte_flow.h>
+#include <rte_malloc.h>
+#include <rte_common.h>
+
+#include "mlx5.h"
+#include "mlx5_flow.h"
+#include "mlx5_autoconf.h"
+
+#ifdef HAVE_TC_ACT_VLAN
+
+#include <linux/tc_act/tc_vlan.h>
+
+#else /* HAVE_TC_ACT_VLAN */
+
+#define TCA_VLAN_ACT_POP 1
+#define TCA_VLAN_ACT_PUSH 2
+#define TCA_VLAN_ACT_MODIFY 3
+#define TCA_VLAN_PARMS 2
+#define TCA_VLAN_PUSH_VLAN_ID 3
+#define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
+#define TCA_VLAN_PAD 5
+#define TCA_VLAN_PUSH_VLAN_PRIORITY 6
+
+struct tc_vlan {
+ tc_gen;
+ int v_action;
+};
+
+#endif /* HAVE_TC_ACT_VLAN */
+
+#ifdef HAVE_TC_ACT_PEDIT
+
+#include <linux/tc_act/tc_pedit.h>
+
+#else /* HAVE_TC_ACT_VLAN */
+
+enum {
+ TCA_PEDIT_UNSPEC,
+ TCA_PEDIT_TM,
+ TCA_PEDIT_PARMS,
+ TCA_PEDIT_PAD,
+ TCA_PEDIT_PARMS_EX,
+ TCA_PEDIT_KEYS_EX,
+ TCA_PEDIT_KEY_EX,
+ __TCA_PEDIT_MAX
+};
+
+enum {
+ TCA_PEDIT_KEY_EX_HTYPE = 1,
+ TCA_PEDIT_KEY_EX_CMD = 2,
+ __TCA_PEDIT_KEY_EX_MAX
+};
+
+enum pedit_header_type {
+ TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
+ TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
+ TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
+ TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
+ TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
+ TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
+ __PEDIT_HDR_TYPE_MAX,
+};
+
+enum pedit_cmd {
+ TCA_PEDIT_KEY_EX_CMD_SET = 0,
+ TCA_PEDIT_KEY_EX_CMD_ADD = 1,
+ __PEDIT_CMD_MAX,
+};
+
+struct tc_pedit_key {
+ __u32 mask; /* AND */
+ __u32 val; /*XOR */
+ __u32 off; /*offset */
+ __u32 at;
+ __u32 offmask;
+ __u32 shift;
+};
+
+__extension__
+struct tc_pedit_sel {
+ tc_gen;
+ unsigned char nkeys;
+ unsigned char flags;
+ struct tc_pedit_key keys[0];
+};
+
+#endif /* HAVE_TC_ACT_VLAN */
+
+/* Normally found in linux/netlink.h. */
+#ifndef NETLINK_CAP_ACK
+#define NETLINK_CAP_ACK 10
+#endif
+
+/* Normally found in linux/pkt_sched.h. */
+#ifndef TC_H_MIN_INGRESS
+#define TC_H_MIN_INGRESS 0xfff2u
+#endif
+
+/* Normally found in linux/pkt_cls.h. */
+#ifndef TCA_CLS_FLAGS_SKIP_SW
+#define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
+#endif
+#ifndef HAVE_TCA_CHAIN
+#define TCA_CHAIN 11
+#endif
+#ifndef HAVE_TCA_FLOWER_ACT
+#define TCA_FLOWER_ACT 3
+#endif
+#ifndef HAVE_TCA_FLOWER_FLAGS
+#define TCA_FLOWER_FLAGS 22
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
+#define TCA_FLOWER_KEY_ETH_TYPE 8
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
+#define TCA_FLOWER_KEY_ETH_DST 4
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
+#define TCA_FLOWER_KEY_ETH_DST_MASK 5
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
+#define TCA_FLOWER_KEY_ETH_SRC 6
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
+#define TCA_FLOWER_KEY_ETH_SRC_MASK 7
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
+#define TCA_FLOWER_KEY_IP_PROTO 9
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
+#define TCA_FLOWER_KEY_IPV4_SRC 10
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
+#define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
+#define TCA_FLOWER_KEY_IPV4_DST 12
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
+#define TCA_FLOWER_KEY_IPV4_DST_MASK 13
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
+#define TCA_FLOWER_KEY_IPV6_SRC 14
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
+#define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
+#define TCA_FLOWER_KEY_IPV6_DST 16
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
+#define TCA_FLOWER_KEY_IPV6_DST_MASK 17
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
+#define TCA_FLOWER_KEY_TCP_SRC 18
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
+#define TCA_FLOWER_KEY_TCP_SRC_MASK 35
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
+#define TCA_FLOWER_KEY_TCP_DST 19
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
+#define TCA_FLOWER_KEY_TCP_DST_MASK 36
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
+#define TCA_FLOWER_KEY_UDP_SRC 20
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
+#define TCA_FLOWER_KEY_UDP_SRC_MASK 37
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
+#define TCA_FLOWER_KEY_UDP_DST 21
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
+#define TCA_FLOWER_KEY_UDP_DST_MASK 38
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
+#define TCA_FLOWER_KEY_VLAN_ID 23
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
+#define TCA_FLOWER_KEY_VLAN_PRIO 24
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
+#define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
+#define TCA_FLOWER_KEY_TCP_FLAGS 71
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
+#define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
+#endif
+#ifndef HAVE_TC_ACT_GOTO_CHAIN
+#define TC_ACT_GOTO_CHAIN 0x20000000
+#endif
+
+#ifndef IPV6_ADDR_LEN
+#define IPV6_ADDR_LEN 16
+#endif
+
+#ifndef IPV4_ADDR_LEN
+#define IPV4_ADDR_LEN 4
+#endif
+
+#ifndef TP_PORT_LEN
+#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
+#endif
+
+#ifndef TTL_LEN
+#define TTL_LEN 1
+#endif
+
+#ifndef TCA_ACT_MAX_PRIO
+#define TCA_ACT_MAX_PRIO 32
+#endif
+
+/**
+ * Structure for holding netlink context.
+ * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
+ * Using this (8KB) buffer size ensures that netlink messages will never be
+ * truncated.
+ */
+struct mlx5_flow_tcf_context {
+ struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
+ uint32_t seq; /* Message sequence number. */
+ uint32_t buf_size; /* Message buffer size. */
+ uint8_t *buf; /* Message buffer. */
+};
+
+/** Structure used when extracting the values of a flow counters
+ * from a netlink message.
+ */
+struct flow_tcf_stats_basic {
+ bool valid;
+ struct gnet_stats_basic counters;
+};
+
+/** Empty masks for known item types. */
+static const union {
+ struct rte_flow_item_port_id port_id;
+ struct rte_flow_item_eth eth;
+ struct rte_flow_item_vlan vlan;
+ struct rte_flow_item_ipv4 ipv4;
+ struct rte_flow_item_ipv6 ipv6;
+ struct rte_flow_item_tcp tcp;
+ struct rte_flow_item_udp udp;
+} flow_tcf_mask_empty;
+
+/** Supported masks for known item types. */
+static const struct {
+ struct rte_flow_item_port_id port_id;
+ struct rte_flow_item_eth eth;
+ struct rte_flow_item_vlan vlan;
+ struct rte_flow_item_ipv4 ipv4;
+ struct rte_flow_item_ipv6 ipv6;
+ struct rte_flow_item_tcp tcp;
+ struct rte_flow_item_udp udp;
+} flow_tcf_mask_supported = {
+ .port_id = {
+ .id = 0xffffffff,
+ },
+ .eth = {
+ .type = RTE_BE16(0xffff),
+ .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+ },
+ .vlan = {
+ /* PCP and VID only, no DEI. */
+ .tci = RTE_BE16(0xefff),
+ .inner_type = RTE_BE16(0xffff),
+ },
+ .ipv4.hdr = {
+ .next_proto_id = 0xff,
+ .src_addr = RTE_BE32(0xffffffff),
+ .dst_addr = RTE_BE32(0xffffffff),
+ },
+ .ipv6.hdr = {
+ .proto = 0xff,
+ .src_addr =
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff",
+ .dst_addr =
+ "\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff",
+ },
+ .tcp.hdr = {
+ .src_port = RTE_BE16(0xffff),
+ .dst_port = RTE_BE16(0xffff),
+ .tcp_flags = 0xff,
+ },
+ .udp.hdr = {
+ .src_port = RTE_BE16(0xffff),
+ .dst_port = RTE_BE16(0xffff),
+ },
+};
+
+#define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
+#define SZ_NLATTR_NEST SZ_NLATTR_HDR
+#define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
+#define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
+#define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
+
+#define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
+
+/** DPDK port to network interface index (ifindex) conversion. */
+struct flow_tcf_ptoi {
+ uint16_t port_id; /**< DPDK port ID. */
+ unsigned int ifindex; /**< Network interface index. */
+};
+
+/* Due to a limitation on driver/FW. */
+#define MLX5_TCF_GROUP_ID_MAX 3
+#define MLX5_TCF_GROUP_PRIORITY_MAX 14
+
+#define MLX5_TCF_FATE_ACTIONS \
+ (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
+ MLX5_FLOW_ACTION_JUMP)
+
+#define MLX5_TCF_VLAN_ACTIONS \
+ (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
+ MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
+
+#define MLX5_TCF_PEDIT_ACTIONS \
+ (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
+ MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
+ MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
+ MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
+ MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
+
+#define MLX5_TCF_CONFIG_ACTIONS \
+ (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
+ MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
+ MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
+ (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
+
+#define MAX_PEDIT_KEYS 128
+#define SZ_PEDIT_KEY_VAL 4
+
+#define NUM_OF_PEDIT_KEYS(sz) \
+ (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
+
+struct pedit_key_ex {
+ enum pedit_header_type htype;
+ enum pedit_cmd cmd;
+};
+
+struct pedit_parser {
+ struct tc_pedit_sel sel;
+ struct tc_pedit_key keys[MAX_PEDIT_KEYS];
+ struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
+};
+
+/**
+ * Create space for using the implicitly created TC flow counter.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ *
+ * @return
+ * A pointer to the counter data structure, NULL otherwise and
+ * rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+flow_tcf_counter_new(void)
+{
+ struct mlx5_flow_counter *cnt;
+
+ /*
+ * eswitch counter cannot be shared and its id is unknown.
+ * currently returning all with id 0.
+ * in the future maybe better to switch to unique numbers.
+ */
+ struct mlx5_flow_counter tmpl = {
+ .ref_cnt = 1,
+ };
+ cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+ if (!cnt) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+ *cnt = tmpl;
+ /* Implicit counter, do not add to list. */
+ return cnt;
+}
+
+/**
+ * Set pedit key of MAC address
+ *
+ * @param[in] actions
+ * pointer to action specification
+ * @param[in,out] p_parser
+ * pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
+ struct pedit_parser *p_parser)
+{
+ int idx = p_parser->sel.nkeys;
+ uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
+ offsetof(struct ether_hdr, s_addr) :
+ offsetof(struct ether_hdr, d_addr);
+ const struct rte_flow_action_set_mac *conf =
+ (const struct rte_flow_action_set_mac *)actions->conf;
+
+ p_parser->keys[idx].off = off;
+ p_parser->keys[idx].mask = ~UINT32_MAX;
+ p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
+ p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+ memcpy(&p_parser->keys[idx].val,
+ conf->mac_addr, SZ_PEDIT_KEY_VAL);
+ idx++;
+ p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
+ p_parser->keys[idx].mask = 0xFFFF0000;
+ p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
+ p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+ memcpy(&p_parser->keys[idx].val,
+ conf->mac_addr + SZ_PEDIT_KEY_VAL,
+ ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
+ p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Set pedit key of decrease/set ttl
+ *
+ * @param[in] actions
+ * pointer to action specification
+ * @param[in,out] p_parser
+ * pointer to pedit_parser
+ * @param[in] item_flags
+ * flags of all items presented
+ */
+static void
+flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
+ struct pedit_parser *p_parser,
+ uint64_t item_flags)
+{
+ int idx = p_parser->sel.nkeys;
+
+ p_parser->keys[idx].mask = 0xFFFFFF00;
+ if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
+ p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
+ p_parser->keys[idx].off =
+ offsetof(struct ipv4_hdr, time_to_live);
+ }
+ if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
+ p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
+ p_parser->keys[idx].off =
+ offsetof(struct ipv6_hdr, hop_limits);
+ }
+ if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
+ p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
+ p_parser->keys[idx].val = 0x000000FF;
+ } else {
+ p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+ p_parser->keys[idx].val =
+ (__u32)((const struct rte_flow_action_set_ttl *)
+ actions->conf)->ttl_value;
+ }
+ p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Set pedit key of transport (TCP/UDP) port value
+ *
+ * @param[in] actions
+ * pointer to action specification
+ * @param[in,out] p_parser
+ * pointer to pedit_parser
+ * @param[in] item_flags
+ * flags of all items presented
+ */
+static void
+flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
+ struct pedit_parser *p_parser,
+ uint64_t item_flags)
+{
+ int idx = p_parser->sel.nkeys;
+
+ if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
+ p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
+ if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
+ p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
+ p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+ /* offset of src/dst port is same for TCP and UDP */
+ p_parser->keys[idx].off =
+ actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
+ offsetof(struct tcp_hdr, src_port) :
+ offsetof(struct tcp_hdr, dst_port);
+ p_parser->keys[idx].mask = 0xFFFF0000;
+ p_parser->keys[idx].val =
+ (__u32)((const struct rte_flow_action_set_tp *)
+ actions->conf)->port;
+ p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Set pedit key of ipv6 address
+ *
+ * @param[in] actions
+ * pointer to action specification
+ * @param[in,out] p_parser
+ * pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
+ struct pedit_parser *p_parser)
+{
+ int idx = p_parser->sel.nkeys;
+ int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+ int off_base =
+ actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
+ offsetof(struct ipv6_hdr, src_addr) :
+ offsetof(struct ipv6_hdr, dst_addr);
+ const struct rte_flow_action_set_ipv6 *conf =
+ (const struct rte_flow_action_set_ipv6 *)actions->conf;
+
+ for (int i = 0; i < keys; i++, idx++) {
+ p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
+ p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+ p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
+ p_parser->keys[idx].mask = ~UINT32_MAX;
+ memcpy(&p_parser->keys[idx].val,
+ conf->ipv6_addr + i * SZ_PEDIT_KEY_VAL,
+ SZ_PEDIT_KEY_VAL);
+ }
+ p_parser->sel.nkeys += keys;
+}
+
+/**
+ * Set pedit key of ipv4 address
+ *
+ * @param[in] actions
+ * pointer to action specification
+ * @param[in,out] p_parser
+ * pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
+ struct pedit_parser *p_parser)
+{
+ int idx = p_parser->sel.nkeys;
+
+ p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
+ p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+ p_parser->keys[idx].off =
+ actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
+ offsetof(struct ipv4_hdr, src_addr) :
+ offsetof(struct ipv4_hdr, dst_addr);
+ p_parser->keys[idx].mask = ~UINT32_MAX;
+ p_parser->keys[idx].val =
+ ((const struct rte_flow_action_set_ipv4 *)
+ actions->conf)->ipv4_addr;
+ p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Create the pedit's na attribute in netlink message
+ * on pre-allocate message buffer
+ *
+ * @param[in,out] nl
+ * pointer to pre-allocated netlink message buffer
+ * @param[in,out] actions
+ * pointer to pointer of actions specification.
+ * @param[in,out] action_flags
+ * pointer to actions flags
+ * @param[in] item_flags
+ * flags of all item presented
+ */
+static void
+flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
+ const struct rte_flow_action **actions,
+ uint64_t item_flags)
+{
+ struct pedit_parser p_parser;
+ struct nlattr *na_act_options;
+ struct nlattr *na_pedit_keys;
+
+ memset(&p_parser, 0, sizeof(p_parser));
+ mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
+ na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
+ /* all modify header actions should be in one tc-pedit action */
+ for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+ switch ((*actions)->type) {
+ case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+ flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+ flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+ flow_tcf_pedit_key_set_tp_port(*actions,
+ &p_parser, item_flags);
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_TTL:
+ case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+ flow_tcf_pedit_key_set_dec_ttl(*actions,
+ &p_parser, item_flags);
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+ flow_tcf_pedit_key_set_mac(*actions, &p_parser);
+ break;
+ default:
+ goto pedit_mnl_msg_done;
+ }
+ }
+pedit_mnl_msg_done:
+ p_parser.sel.action = TC_ACT_PIPE;
+ mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
+ sizeof(p_parser.sel) +
+ p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
+ &p_parser);
+ na_pedit_keys =
+ mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
+ for (int i = 0; i < p_parser.sel.nkeys; i++) {
+ struct nlattr *na_pedit_key =
+ mnl_attr_nest_start(nl,
+ TCA_PEDIT_KEY_EX | NLA_F_NESTED);
+ mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
+ p_parser.keys_ex[i].htype);
+ mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
+ p_parser.keys_ex[i].cmd);
+ mnl_attr_nest_end(nl, na_pedit_key);
+ }
+ mnl_attr_nest_end(nl, na_pedit_keys);
+ mnl_attr_nest_end(nl, na_act_options);
+ (*actions)--;
+}
+
+/**
+ * Calculate max memory size of one TC-pedit actions.
+ * One TC-pedit action can contain set of keys each defining
+ * a rewrite element (rte_flow action)
+ *
+ * @param[in,out] actions
+ * actions specification.
+ * @param[in,out] action_flags
+ * actions flags
+ * @param[in,out] size
+ * accumulated size
+ * @return
+ * Max memory size of one TC-pedit action
+ */
+static int
+flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
+ uint64_t *action_flags)
+{
+ int pedit_size = 0;
+ int keys = 0;
+ uint64_t flags = 0;
+
+ pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
+ SZ_NLATTR_STRZ_OF("pedit") +
+ SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
+ for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+ switch ((*actions)->type) {
+ case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+ keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
+ flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+ keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
+ flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+ keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+ flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+ keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+ flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+ /* TCP is as same as UDP */
+ keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
+ flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+ /* TCP is as same as UDP */
+ keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
+ flags |= MLX5_FLOW_ACTION_SET_TP_DST;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_TTL:
+ keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
+ flags |= MLX5_FLOW_ACTION_SET_TTL;
+ break;
+ case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+ keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
+ flags |= MLX5_FLOW_ACTION_DEC_TTL;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+ keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
+ flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+ keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
+ flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
+ break;
+ default:
+ goto get_pedit_action_size_done;
+ }
+ }
+get_pedit_action_size_done:
+ /* TCA_PEDIT_PARAMS_EX */
+ pedit_size +=
+ SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
+ keys * sizeof(struct tc_pedit_key));
+ pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
+ pedit_size += keys *
+ /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
+ (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
+ SZ_NLATTR_DATA_OF(2));
+ (*action_flags) |= flags;
+ (*actions)--;
+ return pedit_size;
+}
+
+/**
+ * Retrieve mask for pattern item.
+ *
+ * This function does basic sanity checks on a pattern item in order to
+ * return the most appropriate mask for it.
+ *
+ * @param[in] item
+ * Item specification.
+ * @param[in] mask_default
+ * Default mask for pattern item as specified by the flow API.
+ * @param[in] mask_supported
+ * Mask fields supported by the implementation.
+ * @param[in] mask_empty
+ * Empty mask to return when there is no specification.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * Either @p item->mask or one of the mask parameters on success, NULL
+ * otherwise and rte_errno is set.
+ */
+static const void *
+flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
+ const void *mask_supported, const void *mask_empty,
+ size_t mask_size, struct rte_flow_error *error)
+{
+ const uint8_t *mask;
+ size_t i;
+
+ /* item->last and item->mask cannot exist without item->spec. */
+ if (!item->spec && (item->mask || item->last)) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, item,
+ "\"mask\" or \"last\" field provided without"
+ " a corresponding \"spec\"");
+ return NULL;
+ }
+ /* No spec, no mask, no problem. */
+ if (!item->spec)
+ return mask_empty;
+ mask = item->mask ? item->mask : mask_default;
+ assert(mask);
+ /*
+ * Single-pass check to make sure that:
+ * - Mask is supported, no bits are set outside mask_supported.
+ * - Both item->spec and item->last are included in mask.
+ */
+ for (i = 0; i != mask_size; ++i) {
+ if (!mask[i])
+ continue;
+ if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
+ ((const uint8_t *)mask_supported)[i]) {
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+ "unsupported field found"
+ " in \"mask\"");
+ return NULL;
+ }
+ if (item->last &&
+ (((const uint8_t *)item->spec)[i] & mask[i]) !=
+ (((const uint8_t *)item->last)[i] & mask[i])) {
+ rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM_LAST,
+ item->last,
+ "range between \"spec\" and \"last\""
+ " not comprised in \"mask\"");
+ return NULL;
+ }
+ }
+ return mask;
+}
+
+/**
+ * Build a conversion table between port ID and ifindex.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[out] ptoi
+ * Pointer to ptoi table.
+ * @param[in] len
+ * Size of ptoi table provided.
+ *
+ * @return
+ * Size of ptoi table filled.
+ */
+static unsigned int
+flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
+ unsigned int len)
+{
+ unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
+ uint16_t port_id[n + 1];
+ unsigned int i;
+ unsigned int own = 0;
+
+ /* At least one port is needed when no switch domain is present. */
+ if (!n) {
+ n = 1;
+ port_id[0] = dev->data->port_id;
+ } else {
+ n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
+ }
+ if (n > len)
+ return 0;
+ for (i = 0; i != n; ++i) {
+ struct rte_eth_dev_info dev_info;
+
+ rte_eth_dev_info_get(port_id[i], &dev_info);
+ if (port_id[i] == dev->data->port_id)
+ own = i;
+ ptoi[i].port_id = port_id[i];
+ ptoi[i].ifindex = dev_info.if_index;
+ }
+ /* Ensure first entry of ptoi[] is the current device. */
+ if (own) {
+ ptoi[n] = ptoi[0];
+ ptoi[0] = ptoi[own];
+ ptoi[own] = ptoi[n];
+ }
+ /* An entry with zero ifindex terminates ptoi[]. */
+ ptoi[n].port_id = 0;
+ ptoi[n].ifindex = 0;
+ return n;
+}
+
+/**
+ * Verify the @p attr will be correctly understood by the E-switch.
+ *
+ * @param[in] attr
+ * Pointer to flow attributes
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
+ struct rte_flow_error *error)
+{
+ /*
+ * Supported attributes: groups, some priorities and ingress only.
+ * group is supported only if kernel supports chain. Don't care about
+ * transfer as it is the caller's problem.
+ */
+ if (attr->group > MLX5_TCF_GROUP_ID_MAX)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
+ "group ID larger than "
+ RTE_STR(MLX5_TCF_GROUP_ID_MAX)
+ " isn't supported");
+ else if (attr->group > 0 &&
+ attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+ attr,
+ "lowest priority level is "
+ RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
+ " when group is configured");
+ else if (attr->priority > 0xfffe)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+ attr,
+ "lowest priority level is 0xfffe");
+ if (!attr->ingress)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+ attr, "only ingress is supported");
+ if (attr->egress)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+ attr, "egress is not supported");
+ return 0;
+}
+
+/**
+ * Validate flow for E-Switch.
+ *
+ * @param[in] priv
+ * Pointer to the priv structure.
+ * @param[in] attr
+ * Pointer to the flow attributes.
+ * @param[in] items
+ * Pointer to the list of items.
+ * @param[in] actions
+ * Pointer to the list of actions.
+ * @param[out] error
+ * Pointer to the error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_tcf_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ union {
+ const struct rte_flow_item_port_id *port_id;
+ const struct rte_flow_item_eth *eth;
+ const struct rte_flow_item_vlan *vlan;
+ const struct rte_flow_item_ipv4 *ipv4;
+ const struct rte_flow_item_ipv6 *ipv6;
+ const struct rte_flow_item_tcp *tcp;
+ const struct rte_flow_item_udp *udp;
+ } spec, mask;
+ union {
+ const struct rte_flow_action_port_id *port_id;
+ const struct rte_flow_action_jump *jump;
+ const struct rte_flow_action_of_push_vlan *of_push_vlan;
+ const struct rte_flow_action_of_set_vlan_vid *
+ of_set_vlan_vid;
+ const struct rte_flow_action_of_set_vlan_pcp *
+ of_set_vlan_pcp;
+ const struct rte_flow_action_set_ipv4 *set_ipv4;
+ const struct rte_flow_action_set_ipv6 *set_ipv6;
+ } conf;
+ uint64_t item_flags = 0;
+ uint64_t action_flags = 0;
+ uint8_t next_protocol = -1;
+ unsigned int tcm_ifindex = 0;
+ uint8_t pedit_validated = 0;
+ struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
+ struct rte_eth_dev *port_id_dev = NULL;
+ bool in_port_id_set;
+ int ret;
+
+ claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
+ PTOI_TABLE_SZ_MAX(dev)));
+ ret = flow_tcf_validate_attributes(attr, error);
+ if (ret < 0)
+ return ret;
+ for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+ unsigned int i;
+
+ switch (items->type) {
+ case RTE_FLOW_ITEM_TYPE_VOID:
+ break;
+ case RTE_FLOW_ITEM_TYPE_PORT_ID:
+ mask.port_id = flow_tcf_item_mask
+ (items, &rte_flow_item_port_id_mask,
+ &flow_tcf_mask_supported.port_id,
+ &flow_tcf_mask_empty.port_id,
+ sizeof(flow_tcf_mask_supported.port_id),
+ error);
+ if (!mask.port_id)
+ return -rte_errno;
+ if (mask.port_id == &flow_tcf_mask_empty.port_id) {
+ in_port_id_set = 1;
+ break;
+ }
+ spec.port_id = items->spec;
+ if (mask.port_id->id && mask.port_id->id != 0xffffffff)
+ return rte_flow_error_set
+ (error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+ mask.port_id,
+ "no support for partial mask on"
+ " \"id\" field");
+ if (!mask.port_id->id)
+ i = 0;
+ else
+ for (i = 0; ptoi[i].ifindex; ++i)
+ if (ptoi[i].port_id == spec.port_id->id)
+ break;
+ if (!ptoi[i].ifindex)
+ return rte_flow_error_set
+ (error, ENODEV,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+ spec.port_id,
+ "missing data to convert port ID to"
+ " ifindex");
+ if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
+ return rte_flow_error_set
+ (error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+ spec.port_id,
+ "cannot match traffic for"
+ " several port IDs through"
+ " a single flow rule");
+ tcm_ifindex = ptoi[i].ifindex;
+ in_port_id_set = 1;
+ break;
+ case RTE_FLOW_ITEM_TYPE_ETH:
+ ret = mlx5_flow_validate_item_eth(items, item_flags,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
+ /* TODO:
+ * Redundant check due to different supported mask.
+ * Same for the rest of items.
+ */
+ mask.eth = flow_tcf_item_mask
+ (items, &rte_flow_item_eth_mask,
+ &flow_tcf_mask_supported.eth,
+ &flow_tcf_mask_empty.eth,
+ sizeof(flow_tcf_mask_supported.eth),
+ error);
+ if (!mask.eth)
+ return -rte_errno;
+ if (mask.eth->type && mask.eth->type !=
+ RTE_BE16(0xffff))
+ return rte_flow_error_set
+ (error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+ mask.eth,
+ "no support for partial mask on"
+ " \"type\" field");
+ break;
+ case RTE_FLOW_ITEM_TYPE_VLAN:
+ ret = mlx5_flow_validate_item_vlan(items, item_flags,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
+ mask.vlan = flow_tcf_item_mask
+ (items, &rte_flow_item_vlan_mask,
+ &flow_tcf_mask_supported.vlan,
+ &flow_tcf_mask_empty.vlan,
+ sizeof(flow_tcf_mask_supported.vlan),
+ error);
+ if (!mask.vlan)
+ return -rte_errno;
+ if ((mask.vlan->tci & RTE_BE16(0xe000) &&
+ (mask.vlan->tci & RTE_BE16(0xe000)) !=
+ RTE_BE16(0xe000)) ||
+ (mask.vlan->tci & RTE_BE16(0x0fff) &&
+ (mask.vlan->tci & RTE_BE16(0x0fff)) !=
+ RTE_BE16(0x0fff)) ||
+ (mask.vlan->inner_type &&
+ mask.vlan->inner_type != RTE_BE16(0xffff)))
+ return rte_flow_error_set
+ (error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+ mask.vlan,
+ "no support for partial masks on"
+ " \"tci\" (PCP and VID parts) and"
+ " \"inner_type\" fields");
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV4:
+ ret = mlx5_flow_validate_item_ipv4(items, item_flags,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+ mask.ipv4 = flow_tcf_item_mask
+ (items, &rte_flow_item_ipv4_mask,
+ &flow_tcf_mask_supported.ipv4,
+ &flow_tcf_mask_empty.ipv4,
+ sizeof(flow_tcf_mask_supported.ipv4),
+ error);
+ if (!mask.ipv4)
+ return -rte_errno;
+ if (mask.ipv4->hdr.next_proto_id &&
+ mask.ipv4->hdr.next_proto_id != 0xff)
+ return rte_flow_error_set
+ (error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+ mask.ipv4,
+ "no support for partial mask on"
+ " \"hdr.next_proto_id\" field");
+ else if (mask.ipv4->hdr.next_proto_id)
+ next_protocol =
+ ((const struct rte_flow_item_ipv4 *)
+ (items->spec))->hdr.next_proto_id;
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV6:
+ ret = mlx5_flow_validate_item_ipv6(items, item_flags,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+ mask.ipv6 = flow_tcf_item_mask
+ (items, &rte_flow_item_ipv6_mask,
+ &flow_tcf_mask_supported.ipv6,
+ &flow_tcf_mask_empty.ipv6,
+ sizeof(flow_tcf_mask_supported.ipv6),
+ error);
+ if (!mask.ipv6)
+ return -rte_errno;
+ if (mask.ipv6->hdr.proto &&
+ mask.ipv6->hdr.proto != 0xff)
+ return rte_flow_error_set
+ (error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+ mask.ipv6,
+ "no support for partial mask on"
+ " \"hdr.proto\" field");
+ else if (mask.ipv6->hdr.proto)
+ next_protocol =
+ ((const struct rte_flow_item_ipv6 *)
+ (items->spec))->hdr.proto;
+ break;
+ case RTE_FLOW_ITEM_TYPE_UDP:
+ ret = mlx5_flow_validate_item_udp(items, item_flags,
+ next_protocol, error);
+ if (ret < 0)
+ return ret;
+ item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+ mask.udp = flow_tcf_item_mask
+ (items, &rte_flow_item_udp_mask,
+ &flow_tcf_mask_supported.udp,
+ &flow_tcf_mask_empty.udp,
+ sizeof(flow_tcf_mask_supported.udp),
+ error);
+ if (!mask.udp)
+ return -rte_errno;
+ break;
+ case RTE_FLOW_ITEM_TYPE_TCP:
+ ret = mlx5_flow_validate_item_tcp
+ (items, item_flags,
+ next_protocol,
+ &flow_tcf_mask_supported.tcp,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+ mask.tcp = flow_tcf_item_mask
+ (items, &rte_flow_item_tcp_mask,
+ &flow_tcf_mask_supported.tcp,
+ &flow_tcf_mask_empty.tcp,
+ sizeof(flow_tcf_mask_supported.tcp),
+ error);
+ if (!mask.tcp)
+ return -rte_errno;
+ break;
+ default:
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ NULL, "item not supported");
+ }
+ }
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+ unsigned int i;
+ uint64_t current_action_flag = 0;
+
+ switch (actions->type) {
+ case RTE_FLOW_ACTION_TYPE_VOID:
+ break;
+ case RTE_FLOW_ACTION_TYPE_PORT_ID:
+ current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
+ if (!actions->conf)
+ break;
+ conf.port_id = actions->conf;
+ if (conf.port_id->original)
+ i = 0;
+ else
+ for (i = 0; ptoi[i].ifindex; ++i)
+ if (ptoi[i].port_id == conf.port_id->id)
+ break;
+ if (!ptoi[i].ifindex)
+ return rte_flow_error_set
+ (error, ENODEV,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ conf.port_id,
+ "missing data to convert port ID to"
+ " ifindex");
+ port_id_dev = &rte_eth_devices[conf.port_id->id];
+ break;
+ case RTE_FLOW_ACTION_TYPE_JUMP:
+ current_action_flag = MLX5_FLOW_ACTION_JUMP;
+ if (!actions->conf)
+ break;
+ conf.jump = actions->conf;
+ if (attr->group >= conf.jump->group)
+ return rte_flow_error_set
+ (error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "can jump only to a group forward");
+ break;
+ case RTE_FLOW_ACTION_TYPE_DROP:
+ current_action_flag = MLX5_FLOW_ACTION_DROP;
+ break;
+ case RTE_FLOW_ACTION_TYPE_COUNT:
+ break;
+ case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+ current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
+ break;
+ case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+ current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
+ break;
+ case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+ if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
+ return rte_flow_error_set
+ (error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION, actions,
+ "vlan modify is not supported,"
+ " set action must follow push action");
+ current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
+ break;
+ case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
+ if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
+ return rte_flow_error_set
+ (error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION, actions,
+ "vlan modify is not supported,"
+ " set action must follow push action");
+ current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+ current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+ current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+ current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+ current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+ current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+ current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_TTL:
+ current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
+ break;
+ case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+ current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+ current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+ current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
+ break;
+ default:
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "action not supported");
+ }
+ if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
+ if (!actions->conf)
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+ actions,
+ "action configuration not set");
+ }
+ if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
+ pedit_validated)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "set actions should be "
+ "listed successively");
+ if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
+ (action_flags & MLX5_TCF_PEDIT_ACTIONS))
+ pedit_validated = 1;
+ if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
+ (action_flags & MLX5_TCF_FATE_ACTIONS))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "can't have multiple fate"
+ " actions");
+ action_flags |= current_action_flag;
+ }
+ if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
+ (action_flags & MLX5_FLOW_ACTION_DROP))
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "set action is not compatible with "
+ "drop action");
+ if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
+ !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "set action must be followed by "
+ "port_id action");
+ if (action_flags &
+ (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
+ if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "no ipv4 item found in"
+ " pattern");
+ }
+ if (action_flags &
+ (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
+ if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "no ipv6 item found in"
+ " pattern");
+ }
+ if (action_flags &
+ (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
+ if (!(item_flags &
+ (MLX5_FLOW_LAYER_OUTER_L4_UDP |
+ MLX5_FLOW_LAYER_OUTER_L4_TCP)))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "no TCP/UDP item found in"
+ " pattern");
+ }
+ /*
+ * FW syndrome (0xA9C090):
+ * set_flow_table_entry: push vlan action fte in fdb can ONLY be
+ * forward to the uplink.
+ */
+ if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
+ (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
+ ((struct priv *)port_id_dev->data->dev_private)->representor)
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION, actions,
+ "vlan push can only be applied"
+ " when forwarding to uplink port");
+ /*
+ * FW syndrome (0x294609):
+ * set_flow_table_entry: modify/pop/push actions in fdb flow table
+ * are supported only while forwarding to vport.
+ */
+ if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
+ !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION, actions,
+ "vlan actions are supported"
+ " only with port_id action");
+ if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, actions,
+ "no fate action is found");
+ if (action_flags &
+ (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
+ if (!(item_flags &
+ (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
+ MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "no IP found in pattern");
+ }
+ if (action_flags &
+ (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
+ if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "no ethernet found in"
+ " pattern");
+ }
+ return 0;
+}
+
+/**
+ * Calculate maximum size of memory for flow items of Linux TC flower and
+ * extract specified items.
+ *
+ * @param[in] items
+ * Pointer to the list of items.
+ * @param[out] item_flags
+ * Pointer to the detected items.
+ *
+ * @return
+ * Maximum size of memory for items.
+ */
+static int
+flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ uint64_t *item_flags)
+{
+ int size = 0;
+ uint64_t flags = 0;
+
+ size += SZ_NLATTR_STRZ_OF("flower") +
+ SZ_NLATTR_NEST + /* TCA_OPTIONS. */
+ SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
+ if (attr->group > 0)
+ size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
+ for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+ switch (items->type) {
+ case RTE_FLOW_ITEM_TYPE_VOID:
+ break;
+ case RTE_FLOW_ITEM_TYPE_PORT_ID:
+ break;
+ case RTE_FLOW_ITEM_TYPE_ETH:
+ size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+ SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
+ /* dst/src MAC addr and mask. */
+ flags |= MLX5_FLOW_LAYER_OUTER_L2;
+ break;
+ case RTE_FLOW_ITEM_TYPE_VLAN:
+ size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+ SZ_NLATTR_TYPE_OF(uint16_t) +
+ /* VLAN Ether type. */
+ SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
+ SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
+ flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV4:
+ size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+ SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+ SZ_NLATTR_TYPE_OF(uint32_t) * 4;
+ /* dst/src IP addr and mask. */
+ flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV6:
+ size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+ SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+ SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
+ /* dst/src IP addr and mask. */
+ flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+ break;
+ case RTE_FLOW_ITEM_TYPE_UDP:
+ size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+ SZ_NLATTR_TYPE_OF(uint16_t) * 4;
+ /* dst/src port and mask. */
+ flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+ break;
+ case RTE_FLOW_ITEM_TYPE_TCP:
+ size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+ SZ_NLATTR_TYPE_OF(uint16_t) * 4;
+ /* dst/src port and mask. */
+ flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+ break;
+ default:
+ DRV_LOG(WARNING,
+ "unsupported item %p type %d,"
+ " items must be validated before flow creation",
+ (const void *)items, items->type);
+ break;
+ }
+ }
+ *item_flags = flags;
+ return size;
+}
+
+/**
+ * Calculate maximum size of memory for flow actions of Linux TC flower and
+ * extract specified actions.
+ *
+ * @param[in] actions
+ * Pointer to the list of actions.
+ * @param[out] action_flags
+ * Pointer to the detected actions.
+ *
+ * @return
+ * Maximum size of memory for actions.
+ */
+static int
+flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
+ uint64_t *action_flags)
+{
+ int size = 0;
+ uint64_t flags = 0;
+
+ size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+ switch (actions->type) {
+ case RTE_FLOW_ACTION_TYPE_VOID:
+ break;
+ case RTE_FLOW_ACTION_TYPE_PORT_ID:
+ size += SZ_NLATTR_NEST + /* na_act_index. */
+ SZ_NLATTR_STRZ_OF("mirred") +
+ SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+ SZ_NLATTR_TYPE_OF(struct tc_mirred);
+ flags |= MLX5_FLOW_ACTION_PORT_ID;
+ break;
+ case RTE_FLOW_ACTION_TYPE_JUMP:
+ size += SZ_NLATTR_NEST + /* na_act_index. */
+ SZ_NLATTR_STRZ_OF("gact") +
+ SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+ SZ_NLATTR_TYPE_OF(struct tc_gact);
+ flags |= MLX5_FLOW_ACTION_JUMP;
+ break;
+ case RTE_FLOW_ACTION_TYPE_DROP:
+ size += SZ_NLATTR_NEST + /* na_act_index. */
+ SZ_NLATTR_STRZ_OF("gact") +
+ SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+ SZ_NLATTR_TYPE_OF(struct tc_gact);
+ flags |= MLX5_FLOW_ACTION_DROP;
+ break;
+ case RTE_FLOW_ACTION_TYPE_COUNT:
+ break;
+ case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+ flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
+ goto action_of_vlan;
+ case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+ flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
+ goto action_of_vlan;
+ case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+ flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
+ goto action_of_vlan;
+ case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
+ flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
+ goto action_of_vlan;
+action_of_vlan:
+ size += SZ_NLATTR_NEST + /* na_act_index. */
+ SZ_NLATTR_STRZ_OF("vlan") +
+ SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+ SZ_NLATTR_TYPE_OF(struct tc_vlan) +
+ SZ_NLATTR_TYPE_OF(uint16_t) +
+ /* VLAN protocol. */
+ SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
+ SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+ case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+ case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+ case RTE_FLOW_ACTION_TYPE_SET_TTL:
+ case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+ size += flow_tcf_get_pedit_actions_size(&actions,
+ &flags);
+ break;
+ default:
+ DRV_LOG(WARNING,
+ "unsupported action %p type %d,"
+ " items must be validated before flow creation",
+ (const void *)actions, actions->type);
+ break;
+ }
+ }
+ *action_flags = flags;
+ return size;
+}
+
+/**
+ * Brand rtnetlink buffer with unique handle.
+ *
+ * This handle should be unique for a given network interface to avoid
+ * collisions.
+ *
+ * @param nlh
+ * Pointer to Netlink message.
+ * @param handle
+ * Unique 32-bit handle to use.
+ */
+static void
+flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
+{
+ struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
+
+ tcm->tcm_handle = handle;
+ DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
+ (void *)nlh, handle);
+}
+
+/**
+ * Prepare a flow object for Linux TC flower. It calculates the maximum size of
+ * memory required, allocates the memory, initializes Netlink message headers
+ * and set unique TC message handle.
+ *
+ * @param[in] attr
+ * Pointer to the flow attributes.
+ * @param[in] items
+ * Pointer to the list of items.
+ * @param[in] actions
+ * Pointer to the list of actions.
+ * @param[out] item_flags
+ * Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ * Pointer to bit mask of all actions detected.
+ * @param[out] error
+ * Pointer to the error structure.
+ *
+ * @return
+ * Pointer to mlx5_flow object on success,
+ * otherwise NULL and rte_ernno is set.
+ */
+static struct mlx5_flow *
+flow_tcf_prepare(const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ uint64_t *item_flags, uint64_t *action_flags,
+ struct rte_flow_error *error)
+{
+ size_t size = sizeof(struct mlx5_flow) +
+ MNL_ALIGN(sizeof(struct nlmsghdr)) +
+ MNL_ALIGN(sizeof(struct tcmsg));
+ struct mlx5_flow *dev_flow;
+ struct nlmsghdr *nlh;
+ struct tcmsg *tcm;
+
+ size += flow_tcf_get_items_and_size(attr, items, item_flags);
+ size += flow_tcf_get_actions_and_size(actions, action_flags);
+ dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
+ if (!dev_flow) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "not enough memory to create E-Switch flow");
+ return NULL;
+ }
+ nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
+ tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+ *dev_flow = (struct mlx5_flow){
+ .tcf = (struct mlx5_flow_tcf){
+ .nlh = nlh,
+ .tcm = tcm,
+ },
+ };
+ /*
+ * Generate a reasonably unique handle based on the address of the
+ * target buffer.
+ *
+ * This is straightforward on 32-bit systems where the flow pointer can
+ * be used directly. Otherwise, its least significant part is taken
+ * after shifting it by the previous power of two of the pointed buffer
+ * size.
+ */
+ if (sizeof(dev_flow) <= 4)
+ flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
+ else
+ flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
+ rte_log2_u32(rte_align32prevpow2(size)));
+ return dev_flow;
+}
+
+/**
+ * Make adjustments for supporting count actions.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] dev_flow
+ * Pointer to mlx5_flow.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 On success else a negative errno value is returned and rte_errno is set.
+ */
+static int
+flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
+ struct mlx5_flow *dev_flow,
+ struct rte_flow_error *error)
+{
+ struct rte_flow *flow = dev_flow->flow;
+
+ if (!flow->counter) {
+ flow->counter = flow_tcf_counter_new();
+ if (!flow->counter)
+ return rte_flow_error_set(error, rte_errno,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL,
+ "cannot get counter"
+ " context.");
+ }
+ return 0;
+}
+
+/**
+ * Translate flow for Linux TC flower and construct Netlink message.
+ *
+ * @param[in] priv
+ * Pointer to the priv structure.
+ * @param[in, out] flow
+ * Pointer to the sub flow.
+ * @param[in] attr
+ * Pointer to the flow attributes.
+ * @param[in] items
+ * Pointer to the list of items.
+ * @param[in] actions
+ * Pointer to the list of actions.
+ * @param[out] error
+ * Pointer to the error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ union {
+ const struct rte_flow_item_port_id *port_id;
+ const struct rte_flow_item_eth *eth;
+ const struct rte_flow_item_vlan *vlan;
+ const struct rte_flow_item_ipv4 *ipv4;
+ const struct rte_flow_item_ipv6 *ipv6;
+ const struct rte_flow_item_tcp *tcp;
+ const struct rte_flow_item_udp *udp;
+ } spec, mask;
+ union {
+ const struct rte_flow_action_port_id *port_id;
+ const struct rte_flow_action_jump *jump;
+ const struct rte_flow_action_of_push_vlan *of_push_vlan;
+ const struct rte_flow_action_of_set_vlan_vid *
+ of_set_vlan_vid;
+ const struct rte_flow_action_of_set_vlan_pcp *
+ of_set_vlan_pcp;
+ } conf;
+ struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
+ struct nlmsghdr *nlh = dev_flow->tcf.nlh;
+ struct tcmsg *tcm = dev_flow->tcf.tcm;
+ uint32_t na_act_index_cur;
+ bool eth_type_set = 0;
+ bool vlan_present = 0;
+ bool vlan_eth_type_set = 0;
+ bool ip_proto_set = 0;
+ struct nlattr *na_flower;
+ struct nlattr *na_flower_act;
+ struct nlattr *na_vlan_id = NULL;
+ struct nlattr *na_vlan_priority = NULL;
+ uint64_t item_flags = 0;
+ int ret;
+
+ claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
+ PTOI_TABLE_SZ_MAX(dev)));
+ nlh = dev_flow->tcf.nlh;
+ tcm = dev_flow->tcf.tcm;
+ /* Prepare API must have been called beforehand. */
+ assert(nlh != NULL && tcm != NULL);
+ tcm->tcm_family = AF_UNSPEC;
+ tcm->tcm_ifindex = ptoi[0].ifindex;
+ tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
+ /*
+ * Priority cannot be zero to prevent the kernel from picking one
+ * automatically.
+ */
+ tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
+ RTE_BE16(ETH_P_ALL));
+ if (attr->group > 0)
+ mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
+ mnl_attr_put_strz(nlh, TCA_KIND, "flower");
+ na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
+ mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
+ for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+ unsigned int i;
+
+ switch (items->type) {
+ case RTE_FLOW_ITEM_TYPE_VOID:
+ break;
+ case RTE_FLOW_ITEM_TYPE_PORT_ID:
+ mask.port_id = flow_tcf_item_mask
+ (items, &rte_flow_item_port_id_mask,
+ &flow_tcf_mask_supported.port_id,
+ &flow_tcf_mask_empty.port_id,
+ sizeof(flow_tcf_mask_supported.port_id),
+ error);
+ assert(mask.port_id);
+ if (mask.port_id == &flow_tcf_mask_empty.port_id)
+ break;
+ spec.port_id = items->spec;
+ if (!mask.port_id->id)
+ i = 0;
+ else
+ for (i = 0; ptoi[i].ifindex; ++i)
+ if (ptoi[i].port_id == spec.port_id->id)
+ break;
+ assert(ptoi[i].ifindex);
+ tcm->tcm_ifindex = ptoi[i].ifindex;
+ break;
+ case RTE_FLOW_ITEM_TYPE_ETH:
+ item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
+ mask.eth = flow_tcf_item_mask
+ (items, &rte_flow_item_eth_mask,
+ &flow_tcf_mask_supported.eth,
+ &flow_tcf_mask_empty.eth,
+ sizeof(flow_tcf_mask_supported.eth),
+ error);
+ assert(mask.eth);
+ if (mask.eth == &flow_tcf_mask_empty.eth)
+ break;
+ spec.eth = items->spec;
+ if (mask.eth->type) {
+ mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
+ spec.eth->type);
+ eth_type_set = 1;
+ }
+ if (!is_zero_ether_addr(&mask.eth->dst)) {
+ mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
+ ETHER_ADDR_LEN,
+ spec.eth->dst.addr_bytes);
+ mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
+ ETHER_ADDR_LEN,
+ mask.eth->dst.addr_bytes);
+ }
+ if (!is_zero_ether_addr(&mask.eth->src)) {
+ mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
+ ETHER_ADDR_LEN,
+ spec.eth->src.addr_bytes);
+ mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
+ ETHER_ADDR_LEN,
+ mask.eth->src.addr_bytes);
+ }
+ break;
+ case RTE_FLOW_ITEM_TYPE_VLAN:
+ item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
+ mask.vlan = flow_tcf_item_mask
+ (items, &rte_flow_item_vlan_mask,
+ &flow_tcf_mask_supported.vlan,
+ &flow_tcf_mask_empty.vlan,
+ sizeof(flow_tcf_mask_supported.vlan),
+ error);
+ assert(mask.vlan);
+ if (!eth_type_set)
+ mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
+ RTE_BE16(ETH_P_8021Q));
+ eth_type_set = 1;
+ vlan_present = 1;
+ if (mask.vlan == &flow_tcf_mask_empty.vlan)
+ break;
+ spec.vlan = items->spec;
+ if (mask.vlan->inner_type) {
+ mnl_attr_put_u16(nlh,
+ TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+ spec.vlan->inner_type);
+ vlan_eth_type_set = 1;
+ }
+ if (mask.vlan->tci & RTE_BE16(0xe000))
+ mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
+ (rte_be_to_cpu_16
+ (spec.vlan->tci) >> 13) & 0x7);
+ if (mask.vlan->tci & RTE_BE16(0x0fff))
+ mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
+ rte_be_to_cpu_16
+ (spec.vlan->tci &
+ RTE_BE16(0x0fff)));
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV4:
+ item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+ mask.ipv4 = flow_tcf_item_mask
+ (items, &rte_flow_item_ipv4_mask,
+ &flow_tcf_mask_supported.ipv4,
+ &flow_tcf_mask_empty.ipv4,
+ sizeof(flow_tcf_mask_supported.ipv4),
+ error);
+ assert(mask.ipv4);
+ if (!eth_type_set || !vlan_eth_type_set)
+ mnl_attr_put_u16(nlh,
+ vlan_present ?
+ TCA_FLOWER_KEY_VLAN_ETH_TYPE :
+ TCA_FLOWER_KEY_ETH_TYPE,
+ RTE_BE16(ETH_P_IP));
+ eth_type_set = 1;
+ vlan_eth_type_set = 1;
+ if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
+ break;
+ spec.ipv4 = items->spec;
+ if (mask.ipv4->hdr.next_proto_id) {
+ mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+ spec.ipv4->hdr.next_proto_id);
+ ip_proto_set = 1;
+ }
+ if (mask.ipv4->hdr.src_addr) {
+ mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
+ spec.ipv4->hdr.src_addr);
+ mnl_attr_put_u32(nlh,
+ TCA_FLOWER_KEY_IPV4_SRC_MASK,
+ mask.ipv4->hdr.src_addr);
+ }
+ if (mask.ipv4->hdr.dst_addr) {
+ mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
+ spec.ipv4->hdr.dst_addr);
+ mnl_attr_put_u32(nlh,
+ TCA_FLOWER_KEY_IPV4_DST_MASK,
+ mask.ipv4->hdr.dst_addr);
+ }
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV6:
+ item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+ mask.ipv6 = flow_tcf_item_mask
+ (items, &rte_flow_item_ipv6_mask,
+ &flow_tcf_mask_supported.ipv6,
+ &flow_tcf_mask_empty.ipv6,
+ sizeof(flow_tcf_mask_supported.ipv6),
+ error);
+ assert(mask.ipv6);
+ if (!eth_type_set || !vlan_eth_type_set)
+ mnl_attr_put_u16(nlh,
+ vlan_present ?
+ TCA_FLOWER_KEY_VLAN_ETH_TYPE :
+ TCA_FLOWER_KEY_ETH_TYPE,
+ RTE_BE16(ETH_P_IPV6));
+ eth_type_set = 1;
+ vlan_eth_type_set = 1;
+ if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
+ break;
+ spec.ipv6 = items->spec;
+ if (mask.ipv6->hdr.proto) {
+ mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+ spec.ipv6->hdr.proto);
+ ip_proto_set = 1;
+ }
+ if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
+ mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
+ sizeof(spec.ipv6->hdr.src_addr),
+ spec.ipv6->hdr.src_addr);
+ mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+ sizeof(mask.ipv6->hdr.src_addr),
+ mask.ipv6->hdr.src_addr);
+ }
+ if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
+ mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
+ sizeof(spec.ipv6->hdr.dst_addr),
+ spec.ipv6->hdr.dst_addr);
+ mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
+ sizeof(mask.ipv6->hdr.dst_addr),
+ mask.ipv6->hdr.dst_addr);
+ }
+ break;
+ case RTE_FLOW_ITEM_TYPE_UDP:
+ item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+ mask.udp = flow_tcf_item_mask
+ (items, &rte_flow_item_udp_mask,
+ &flow_tcf_mask_supported.udp,
+ &flow_tcf_mask_empty.udp,
+ sizeof(flow_tcf_mask_supported.udp),
+ error);
+ assert(mask.udp);
+ if (!ip_proto_set)
+ mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+ IPPROTO_UDP);
+ if (mask.udp == &flow_tcf_mask_empty.udp)
+ break;
+ spec.udp = items->spec;
+ if (mask.udp->hdr.src_port) {
+ mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
+ spec.udp->hdr.src_port);
+ mnl_attr_put_u16(nlh,
+ TCA_FLOWER_KEY_UDP_SRC_MASK,
+ mask.udp->hdr.src_port);
+ }
+ if (mask.udp->hdr.dst_port) {
+ mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
+ spec.udp->hdr.dst_port);
+ mnl_attr_put_u16(nlh,
+ TCA_FLOWER_KEY_UDP_DST_MASK,
+ mask.udp->hdr.dst_port);
+ }
+ break;
+ case RTE_FLOW_ITEM_TYPE_TCP:
+ item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+ mask.tcp = flow_tcf_item_mask
+ (items, &rte_flow_item_tcp_mask,
+ &flow_tcf_mask_supported.tcp,
+ &flow_tcf_mask_empty.tcp,
+ sizeof(flow_tcf_mask_supported.tcp),
+ error);
+ assert(mask.tcp);
+ if (!ip_proto_set)
+ mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+ IPPROTO_TCP);
+ if (mask.tcp == &flow_tcf_mask_empty.tcp)
+ break;
+ spec.tcp = items->spec;
+ if (mask.tcp->hdr.src_port) {
+ mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
+ spec.tcp->hdr.src_port);
+ mnl_attr_put_u16(nlh,
+ TCA_FLOWER_KEY_TCP_SRC_MASK,
+ mask.tcp->hdr.src_port);
+ }
+ if (mask.tcp->hdr.dst_port) {
+ mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
+ spec.tcp->hdr.dst_port);
+ mnl_attr_put_u16(nlh,
+ TCA_FLOWER_KEY_TCP_DST_MASK,
+ mask.tcp->hdr.dst_port);
+ }
+ if (mask.tcp->hdr.tcp_flags) {
+ mnl_attr_put_u16
+ (nlh,
+ TCA_FLOWER_KEY_TCP_FLAGS,
+ rte_cpu_to_be_16
+ (spec.tcp->hdr.tcp_flags));
+ mnl_attr_put_u16
+ (nlh,
+ TCA_FLOWER_KEY_TCP_FLAGS_MASK,
+ rte_cpu_to_be_16
+ (mask.tcp->hdr.tcp_flags));
+ }
+ break;
+ default:
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ NULL, "item not supported");
+ }
+ }
+ na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
+ na_act_index_cur = 1;
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+ struct nlattr *na_act_index;
+ struct nlattr *na_act;
+ unsigned int vlan_act;
+ unsigned int i;
+
+ switch (actions->type) {
+ case RTE_FLOW_ACTION_TYPE_VOID:
+ break;
+ case RTE_FLOW_ACTION_TYPE_PORT_ID:
+ conf.port_id = actions->conf;
+ if (conf.port_id->original)
+ i = 0;
+ else
+ for (i = 0; ptoi[i].ifindex; ++i)
+ if (ptoi[i].port_id == conf.port_id->id)
+ break;
+ assert(ptoi[i].ifindex);
+ na_act_index =
+ mnl_attr_nest_start(nlh, na_act_index_cur++);
+ assert(na_act_index);
+ mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
+ na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+ assert(na_act);
+ mnl_attr_put(nlh, TCA_MIRRED_PARMS,
+ sizeof(struct tc_mirred),
+ &(struct tc_mirred){
+ .action = TC_ACT_STOLEN,
+ .eaction = TCA_EGRESS_REDIR,
+ .ifindex = ptoi[i].ifindex,
+ });
+ mnl_attr_nest_end(nlh, na_act);
+ mnl_attr_nest_end(nlh, na_act_index);
+ break;
+ case RTE_FLOW_ACTION_TYPE_JUMP:
+ conf.jump = actions->conf;
+ na_act_index =
+ mnl_attr_nest_start(nlh, na_act_index_cur++);
+ assert(na_act_index);
+ mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
+ na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+ assert(na_act);
+ mnl_attr_put(nlh, TCA_GACT_PARMS,
+ sizeof(struct tc_gact),
+ &(struct tc_gact){
+ .action = TC_ACT_GOTO_CHAIN |
+ conf.jump->group,
+ });
+ mnl_attr_nest_end(nlh, na_act);
+ mnl_attr_nest_end(nlh, na_act_index);
+ break;
+ case RTE_FLOW_ACTION_TYPE_DROP:
+ na_act_index =
+ mnl_attr_nest_start(nlh, na_act_index_cur++);
+ assert(na_act_index);
+ mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
+ na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+ assert(na_act);
+ mnl_attr_put(nlh, TCA_GACT_PARMS,
+ sizeof(struct tc_gact),
+ &(struct tc_gact){
+ .action = TC_ACT_SHOT,
+ });
+ mnl_attr_nest_end(nlh, na_act);
+ mnl_attr_nest_end(nlh, na_act_index);
+ break;
+ case RTE_FLOW_ACTION_TYPE_COUNT:
+ /*
+ * Driver adds the count action implicitly for
+ * each rule it creates.
+ */
+ ret = flow_tcf_translate_action_count(dev,
+ dev_flow, error);
+ if (ret < 0)
+ return ret;
+ break;
+ case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+ conf.of_push_vlan = NULL;
+ vlan_act = TCA_VLAN_ACT_POP;
+ goto action_of_vlan;
+ case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+ conf.of_push_vlan = actions->conf;
+ vlan_act = TCA_VLAN_ACT_PUSH;
+ goto action_of_vlan;
+ case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+ conf.of_set_vlan_vid = actions->conf;
+ if (na_vlan_id)
+ goto override_na_vlan_id;
+ vlan_act = TCA_VLAN_ACT_MODIFY;
+ goto action_of_vlan;
+ case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
+ conf.of_set_vlan_pcp = actions->conf;
+ if (na_vlan_priority)
+ goto override_na_vlan_priority;
+ vlan_act = TCA_VLAN_ACT_MODIFY;
+ goto action_of_vlan;
+action_of_vlan:
+ na_act_index =
+ mnl_attr_nest_start(nlh, na_act_index_cur++);
+ assert(na_act_index);
+ mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
+ na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+ assert(na_act);
+ mnl_attr_put(nlh, TCA_VLAN_PARMS,
+ sizeof(struct tc_vlan),
+ &(struct tc_vlan){
+ .action = TC_ACT_PIPE,
+ .v_action = vlan_act,
+ });
+ if (vlan_act == TCA_VLAN_ACT_POP) {
+ mnl_attr_nest_end(nlh, na_act);
+ mnl_attr_nest_end(nlh, na_act_index);
+ break;
+ }
+ if (vlan_act == TCA_VLAN_ACT_PUSH)
+ mnl_attr_put_u16(nlh,
+ TCA_VLAN_PUSH_VLAN_PROTOCOL,
+ conf.of_push_vlan->ethertype);
+ na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
+ mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
+ na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
+ mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
+ mnl_attr_nest_end(nlh, na_act);
+ mnl_attr_nest_end(nlh, na_act_index);
+ if (actions->type ==
+ RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
+override_na_vlan_id:
+ na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
+ *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
+ rte_be_to_cpu_16
+ (conf.of_set_vlan_vid->vlan_vid);
+ } else if (actions->type ==
+ RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
+override_na_vlan_priority:
+ na_vlan_priority->nla_type =
+ TCA_VLAN_PUSH_VLAN_PRIORITY;
+ *(uint8_t *)mnl_attr_get_payload
+ (na_vlan_priority) =
+ conf.of_set_vlan_pcp->vlan_pcp;
+ }
+ break;
+ case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+ case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+ case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+ case RTE_FLOW_ACTION_TYPE_SET_TTL:
+ case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+ case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+ na_act_index =
+ mnl_attr_nest_start(nlh, na_act_index_cur++);
+ flow_tcf_create_pedit_mnl_msg(nlh,
+ &actions, item_flags);
+ mnl_attr_nest_end(nlh, na_act_index);
+ break;
+ default:
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "action not supported");
+ }
+ }
+ assert(na_flower);
+ assert(na_flower_act);
+ mnl_attr_nest_end(nlh, na_flower_act);
+ mnl_attr_nest_end(nlh, na_flower);
+ return 0;
+}
+
+/**
+ * Send Netlink message with acknowledgment.
+ *
+ * @param ctx
+ * Flow context to use.
+ * @param nlh
+ * Message to send. This function always raises the NLM_F_ACK flag before
+ * sending.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
+{
+ alignas(struct nlmsghdr)
+ uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
+ nlh->nlmsg_len - sizeof(*nlh)];
+ uint32_t seq = ctx->seq++;
+ struct mnl_socket *nl = ctx->nl;
+ int ret;
+
+ nlh->nlmsg_flags |= NLM_F_ACK;
+ nlh->nlmsg_seq = seq;
+ ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
+ if (ret != -1)
+ ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
+ if (ret != -1)
+ ret = mnl_cb_run
+ (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
+ if (ret > 0)
+ return 0;
+ rte_errno = errno;
+ return -rte_errno;
+}
+
+/**
+ * Apply flow to E-Switch by sending Netlink message.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in, out] flow
+ * Pointer to the sub flow.
+ * @param[out] error
+ * Pointer to the error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+ struct rte_flow_error *error)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
+ struct mlx5_flow *dev_flow;
+ struct nlmsghdr *nlh;
+
+ dev_flow = LIST_FIRST(&flow->dev_flows);
+ /* E-Switch flow can't be expanded. */
+ assert(!LIST_NEXT(dev_flow, next));
+ nlh = dev_flow->tcf.nlh;
+ nlh->nlmsg_type = RTM_NEWTFILTER;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ if (!flow_tcf_nl_ack(ctx, nlh))
+ return 0;
+ return rte_flow_error_set(error, rte_errno,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "netlink: failed to create TC flow rule");
+}
+
+/**
+ * Remove flow from E-Switch by sending Netlink message.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in, out] flow
+ * Pointer to the sub flow.
+ */
+static void
+flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
+ struct mlx5_flow *dev_flow;
+ struct nlmsghdr *nlh;
+
+ if (!flow)
+ return;
+ if (flow->counter) {
+ if (--flow->counter->ref_cnt == 0) {
+ rte_free(flow->counter);
+ flow->counter = NULL;
+ }
+ }
+ dev_flow = LIST_FIRST(&flow->dev_flows);
+ if (!dev_flow)
+ return;
+ /* E-Switch flow can't be expanded. */
+ assert(!LIST_NEXT(dev_flow, next));
+ nlh = dev_flow->tcf.nlh;
+ nlh->nlmsg_type = RTM_DELTFILTER;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+ flow_tcf_nl_ack(ctx, nlh);
+}
+
+/**
+ * Remove flow from E-Switch and release resources of the device flow.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in, out] flow
+ * Pointer to the sub flow.
+ */
+static void
+flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+ struct mlx5_flow *dev_flow;
+
+ if (!flow)
+ return;
+ flow_tcf_remove(dev, flow);
+ dev_flow = LIST_FIRST(&flow->dev_flows);
+ if (!dev_flow)
+ return;
+ /* E-Switch flow can't be expanded. */
+ assert(!LIST_NEXT(dev_flow, next));
+ LIST_REMOVE(dev_flow, next);
+ rte_free(dev_flow);
+}
+
+/**
+ * Helper routine for figuring the space size required for a parse buffer.
+ *
+ * @param array
+ * array of values to use.
+ * @param idx
+ * Current location in array.
+ * @param value
+ * Value to compare with.
+ *
+ * @return
+ * The maximum between the given value and the array value on index.
+ */
+static uint16_t
+flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
+{
+ return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
+}
+
+/**
+ * Parse rtnetlink message attributes filling the attribute table with the info
+ * retrieved.
+ *
+ * @param tb
+ * Attribute table to be filled.
+ * @param[out] max
+ * Maxinum entry in the attribute table.
+ * @param rte
+ * The attributes section in the message to be parsed.
+ * @param len
+ * The length of the attributes section in the message.
+ */
+static void
+flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
+ struct rtattr *rta, int len)
+{
+ unsigned short type;
+ memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+ while (RTA_OK(rta, len)) {
+ type = rta->rta_type;
+ if (type <= max && !tb[type])
+ tb[type] = rta;
+ rta = RTA_NEXT(rta, len);
+ }
+}
+
+/**
+ * Extract flow counters from flower action.
+ *
+ * @param rta
+ * flower action stats properties in the Netlink message received.
+ * @param rta_type
+ * The backward sequence of rta_types, as written in the attribute table,
+ * we need to traverse in order to get to the requested object.
+ * @param idx
+ * Current location in rta_type table.
+ * @param[out] data
+ * data holding the count statistics of the rte_flow retrieved from
+ * the message.
+ *
+ * @return
+ * 0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
+ uint16_t rta_type[], int idx,
+ struct gnet_stats_basic *data)
+{
+ int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
+ TCA_STATS_BASIC);
+ struct rtattr *tbs[tca_stats_max + 1];
+
+ if (rta == NULL || idx < 0)
+ return -1;
+ flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
+ RTA_DATA(rta), RTA_PAYLOAD(rta));
+ switch (rta_type[idx]) {
+ case TCA_STATS_BASIC:
+ if (tbs[TCA_STATS_BASIC]) {
+ memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
+ RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
+ sizeof(*data)));
+ return 0;
+ }
+ break;
+ default:
+ break;
+ }
+ return -1;
+}
+
+/**
+ * Parse flower single action retrieving the requested action attribute,
+ * if found.
+ *
+ * @param arg
+ * flower action properties in the Netlink message received.
+ * @param rta_type
+ * The backward sequence of rta_types, as written in the attribute table,
+ * we need to traverse in order to get to the requested object.
+ * @param idx
+ * Current location in rta_type table.
+ * @param[out] data
+ * Count statistics retrieved from the message query.
+ *
+ * @return
+ * 0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
+ uint16_t rta_type[], int idx, void *data)
+{
+ int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
+ struct rtattr *tb[tca_act_max + 1];
+
+ if (arg == NULL || idx < 0)
+ return -1;
+ flow_tcf_nl_parse_rtattr(tb, tca_act_max,
+ RTA_DATA(arg), RTA_PAYLOAD(arg));
+ if (tb[TCA_ACT_KIND] == NULL)
+ return -1;
+ switch (rta_type[idx]) {
+ case TCA_ACT_STATS:
+ if (tb[TCA_ACT_STATS])
+ return flow_tcf_nl_action_stats_parse_and_get
+ (tb[TCA_ACT_STATS],
+ rta_type, --idx,
+ (struct gnet_stats_basic *)data);
+ break;
+ default:
+ break;
+ }
+ return -1;
+}
+
+/**
+ * Parse flower action section in the message retrieving the requested
+ * attribute from the first action that provides it.
+ *
+ * @param opt
+ * flower section in the Netlink message received.
+ * @param rta_type
+ * The backward sequence of rta_types, as written in the attribute table,
+ * we need to traverse in order to get to the requested object.
+ * @param idx
+ * Current location in rta_type table.
+ * @param[out] data
+ * data retrieved from the message query.
+ *
+ * @return
+ * 0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
+ uint16_t rta_type[], int idx, void *data)
+{
+ struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
+ int i;
+
+ if (arg == NULL || idx < 0)
+ return -1;
+ flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
+ RTA_DATA(arg), RTA_PAYLOAD(arg));
+ switch (rta_type[idx]) {
+ /*
+ * flow counters are stored in the actions defined by the flow
+ * and not in the flow itself, therefore we need to traverse the
+ * flower chain of actions in search for them.
+ *
+ * Note that the index is not decremented here.
+ */
+ case TCA_ACT_STATS:
+ for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
+ if (tb[i] &&
+ !flow_tcf_nl_parse_one_action_and_get(tb[i],
+ rta_type,
+ idx, data))
+ return 0;
+ }
+ break;
+ default:
+ break;
+ }
+ return -1;
+}
+
+/**
+ * Parse flower classifier options in the message, retrieving the requested
+ * attribute if found.
+ *
+ * @param opt
+ * flower section in the Netlink message received.
+ * @param rta_type
+ * The backward sequence of rta_types, as written in the attribute table,
+ * we need to traverse in order to get to the requested object.
+ * @param idx
+ * Current location in rta_type table.
+ * @param[out] data
+ * data retrieved from the message query.
+ *
+ * @return
+ * 0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
+ uint16_t rta_type[], int idx, void *data)
+{
+ int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
+ TCA_FLOWER_ACT);
+ struct rtattr *tb[tca_flower_max + 1];
+
+ if (!opt || idx < 0)
+ return -1;
+ flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
+ RTA_DATA(opt), RTA_PAYLOAD(opt));
+ switch (rta_type[idx]) {
+ case TCA_FLOWER_ACT:
+ if (tb[TCA_FLOWER_ACT])
+ return flow_tcf_nl_action_parse_and_get
+ (tb[TCA_FLOWER_ACT],
+ rta_type, --idx, data);
+ break;
+ default:
+ break;
+ }
+ return -1;
+}
+
+/**
+ * Parse Netlink reply on filter query, retrieving the flow counters.
+ *
+ * @param nlh
+ * Message received from Netlink.
+ * @param rta_type
+ * The backward sequence of rta_types, as written in the attribute table,
+ * we need to traverse in order to get to the requested object.
+ * @param idx
+ * Current location in rta_type table.
+ * @param[out] data
+ * data retrieved from the message query.
+ *
+ * @return
+ * 0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
+ uint16_t rta_type[], int idx, void *data)
+{
+ struct nlmsghdr *nlh = cnlh;
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len = nlh->nlmsg_len;
+ int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
+ struct rtattr *tb[tca_max + 1];
+
+ if (idx < 0)
+ return -1;
+ if (nlh->nlmsg_type != RTM_NEWTFILTER &&
+ nlh->nlmsg_type != RTM_GETTFILTER &&
+ nlh->nlmsg_type != RTM_DELTFILTER)
+ return -1;
+ len -= NLMSG_LENGTH(sizeof(*t));
+ if (len < 0)
+ return -1;
+ flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
+ /* Not a TC flower flow - bail out */
+ if (!tb[TCA_KIND] ||
+ strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
+ return -1;
+ switch (rta_type[idx]) {
+ case TCA_OPTIONS:
+ if (tb[TCA_OPTIONS])
+ return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
+ rta_type,
+ --idx, data);
+ break;
+ default:
+ break;
+ }
+ return -1;
+}
+
+/**
+ * A callback to parse Netlink reply on TC flower query.
+ *
+ * @param nlh
+ * Message received from Netlink.
+ * @param[out] data
+ * Pointer to data area to be filled by the parsing routine.
+ * assumed to be a pinter to struct flow_tcf_stats_basic.
+ *
+ * @return
+ * MNL_CB_OK value.
+ */
+static int
+flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
+{
+ /*
+ * The backward sequence of rta_types to pass in order to get
+ * to the counters.
+ */
+ uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
+ TCA_FLOWER_ACT, TCA_OPTIONS };
+ struct flow_tcf_stats_basic *sb_data = data;
+ union {
+ const struct nlmsghdr *c;
+ struct nlmsghdr *nc;
+ } tnlh = { .c = nlh };
+
+ if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
+ RTE_DIM(rta_type) - 1,
+ (void *)&sb_data->counters))
+ sb_data->valid = true;
+ return MNL_CB_OK;
+}
+
+/**
+ * Query a TC flower rule for its statistics via netlink.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in] flow
+ * Pointer to the sub flow.
+ * @param[out] data
+ * data retrieved by the query.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_query_count(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ void *data,
+ struct rte_flow_error *error)
+{
+ struct flow_tcf_stats_basic sb_data = { 0 };
+ struct rte_flow_query_count *qc = data;
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
+ struct mnl_socket *nl = ctx->nl;
+ struct mlx5_flow *dev_flow;
+ struct nlmsghdr *nlh;
+ uint32_t seq = priv->tcf_context->seq++;
+ ssize_t ret;
+ assert(qc);
+
+ dev_flow = LIST_FIRST(&flow->dev_flows);
+ /* E-Switch flow can't be expanded. */
+ assert(!LIST_NEXT(dev_flow, next));
+ if (!dev_flow->flow->counter)
+ goto notsup_exit;
+ nlh = dev_flow->tcf.nlh;
+ nlh->nlmsg_type = RTM_GETTFILTER;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
+ nlh->nlmsg_seq = seq;
+ if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
+ goto error_exit;
+ do {
+ ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
+ if (ret <= 0)
+ break;
+ ret = mnl_cb_run(ctx->buf, ret, seq,
+ mnl_socket_get_portid(nl),
+ flow_tcf_nl_message_get_stats_basic,
+ (void *)&sb_data);
+ } while (ret > 0);
+ /* Return the delta from last reset. */
+ if (sb_data.valid) {
+ /* Return the delta from last reset. */
+ qc->hits_set = 1;
+ qc->bytes_set = 1;
+ qc->hits = sb_data.counters.packets - flow->counter->hits;
+ qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
+ if (qc->reset) {
+ flow->counter->hits = sb_data.counters.packets;
+ flow->counter->bytes = sb_data.counters.bytes;
+ }
+ return 0;
+ }
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "flow does not have counter");
+error_exit:
+ return rte_flow_error_set
+ (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, "netlink: failed to read flow rule counters");
+notsup_exit:
+ return rte_flow_error_set
+ (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, "counters are not available.");
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_tcf_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_action *actions,
+ void *data,
+ struct rte_flow_error *error)
+{
+ int ret = -EINVAL;
+
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+ switch (actions->type) {
+ case RTE_FLOW_ACTION_TYPE_VOID:
+ break;
+ case RTE_FLOW_ACTION_TYPE_COUNT:
+ ret = flow_tcf_query_count(dev, flow, data, error);
+ break;
+ default:
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "action not supported");
+ }
+ }
+ return ret;
+}
+
+const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
+ .validate = flow_tcf_validate,
+ .prepare = flow_tcf_prepare,
+ .translate = flow_tcf_translate,
+ .apply = flow_tcf_apply,
+ .remove = flow_tcf_remove,
+ .destroy = flow_tcf_destroy,
+ .query = flow_tcf_query,
+};
+
+/**
+ * Create and configure a libmnl socket for Netlink flow rules.
+ *
+ * @return
+ * A valid libmnl socket object pointer on success, NULL otherwise and
+ * rte_errno is set.
+ */
+static struct mnl_socket *
+flow_tcf_mnl_socket_create(void)
+{
+ struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
+
+ if (nl) {
+ mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
+ sizeof(int));
+ if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
+ return nl;
+ }
+ rte_errno = errno;
+ if (nl)
+ mnl_socket_close(nl);
+ return NULL;
+}
+
+/**
+ * Destroy a libmnl socket.
+ *
+ * @param nl
+ * Libmnl socket of the @p NETLINK_ROUTE kind.
+ */
+static void
+flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
+{
+ if (nl)
+ mnl_socket_close(nl);
+}
+
+/**
+ * Initialize ingress qdisc of a given network interface.
+ *
+ * @param ctx
+ * Pointer to tc-flower context to use.
+ * @param ifindex
+ * Index of network interface to initialize.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
+ unsigned int ifindex, struct rte_flow_error *error)
+{
+ struct nlmsghdr *nlh;
+ struct tcmsg *tcm;
+ alignas(struct nlmsghdr)
+ uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
+
+ /* Destroy existing ingress qdisc and everything attached to it. */
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = RTM_DELQDISC;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+ tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+ tcm->tcm_family = AF_UNSPEC;
+ tcm->tcm_ifindex = ifindex;
+ tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+ tcm->tcm_parent = TC_H_INGRESS;
+ /* Ignore errors when qdisc is already absent. */
+ if (flow_tcf_nl_ack(ctx, nlh) &&
+ rte_errno != EINVAL && rte_errno != ENOENT)
+ return rte_flow_error_set(error, rte_errno,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "netlink: failed to remove ingress"
+ " qdisc");
+ /* Create fresh ingress qdisc. */
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = RTM_NEWQDISC;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+ tcm->tcm_family = AF_UNSPEC;
+ tcm->tcm_ifindex = ifindex;
+ tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+ tcm->tcm_parent = TC_H_INGRESS;
+ mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
+ if (flow_tcf_nl_ack(ctx, nlh))
+ return rte_flow_error_set(error, rte_errno,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "netlink: failed to create ingress"
+ " qdisc");
+ return 0;
+}
+
+/**
+ * Create libmnl context for Netlink flow rules.
+ *
+ * @return
+ * A valid libmnl socket object pointer on success, NULL otherwise and
+ * rte_errno is set.
+ */
+struct mlx5_flow_tcf_context *
+mlx5_flow_tcf_context_create(void)
+{
+ struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
+ sizeof(*ctx),
+ sizeof(uint32_t));
+ if (!ctx)
+ goto error;
+ ctx->nl = flow_tcf_mnl_socket_create();
+ if (!ctx->nl)
+ goto error;
+ ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
+ ctx->buf = rte_zmalloc(__func__,
+ ctx->buf_size, sizeof(uint32_t));
+ if (!ctx->buf)
+ goto error;
+ ctx->seq = random();
+ return ctx;
+error:
+ mlx5_flow_tcf_context_destroy(ctx);
+ return NULL;
+}
+
+/**
+ * Destroy a libmnl context.
+ *
+ * @param ctx
+ * Libmnl socket of the @p NETLINK_ROUTE kind.
+ */
+void
+mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
+{
+ if (!ctx)
+ return;
+ flow_tcf_mnl_socket_destroy(ctx->nl);
+ rte_free(ctx->buf);
+ rte_free(ctx);
+}
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
new file mode 100644
index 00000000..81bc39f9
--- /dev/null
+++ b/drivers/net/mlx5/mlx5_flow_verbs.c
@@ -0,0 +1,1825 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <netinet/in.h>
+#include <sys/queue.h>
+#include <stdalign.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_common.h>
+#include <rte_ether.h>
+#include <rte_eth_ctrl.h>
+#include <rte_ethdev_driver.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_malloc.h>
+#include <rte_ip.h>
+
+#include "mlx5.h"
+#include "mlx5_defs.h"
+#include "mlx5_prm.h"
+#include "mlx5_glue.h"
+#include "mlx5_flow.h"
+
+/**
+ * Create Verbs flow counter with Verbs library.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in, out] counter
+ * mlx5 flow counter object, contains the counter id,
+ * handle of created Verbs flow counter is returned
+ * in cs field (if counters are supported).
+ *
+ * @return
+ * 0 On success else a negative errno value is returned
+ * and rte_errno is set.
+ */
+static int
+flow_verbs_counter_create(struct rte_eth_dev *dev,
+ struct mlx5_flow_counter *counter)
+{
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
+ struct priv *priv = dev->data->dev_private;
+ struct ibv_counter_set_init_attr init = {
+ .counter_set_id = counter->id};
+
+ counter->cs = mlx5_glue->create_counter_set(priv->ctx, &init);
+ if (!counter->cs) {
+ rte_errno = ENOTSUP;
+ return -ENOTSUP;
+ }
+ return 0;
+#elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+ struct priv *priv = dev->data->dev_private;
+ struct ibv_counters_init_attr init = {0};
+ struct ibv_counter_attach_attr attach = {0};
+ int ret;
+
+ counter->cs = mlx5_glue->create_counters(priv->ctx, &init);
+ if (!counter->cs) {
+ rte_errno = ENOTSUP;
+ return -ENOTSUP;
+ }
+ attach.counter_desc = IBV_COUNTER_PACKETS;
+ attach.index = 0;
+ ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
+ if (!ret) {
+ attach.counter_desc = IBV_COUNTER_BYTES;
+ attach.index = 1;
+ ret = mlx5_glue->attach_counters
+ (counter->cs, &attach, NULL);
+ }
+ if (ret) {
+ claim_zero(mlx5_glue->destroy_counters(counter->cs));
+ counter->cs = NULL;
+ rte_errno = ret;
+ return -ret;
+ }
+ return 0;
+#else
+ (void)dev;
+ (void)counter;
+ rte_errno = ENOTSUP;
+ return -ENOTSUP;
+#endif
+}
+
+/**
+ * Get a flow counter.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] shared
+ * Indicate if this counter is shared with other flows.
+ * @param[in] id
+ * Counter identifier.
+ *
+ * @return
+ * A pointer to the counter, NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
+{
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_flow_counter *cnt;
+ int ret;
+
+ LIST_FOREACH(cnt, &priv->flow_counters, next) {
+ if (!cnt->shared || cnt->shared != shared)
+ continue;
+ if (cnt->id != id)
+ continue;
+ cnt->ref_cnt++;
+ return cnt;
+ }
+ cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+ if (!cnt) {
+ rte_errno = ENOMEM;
+ return NULL;
+ }
+ cnt->id = id;
+ cnt->shared = shared;
+ cnt->ref_cnt = 1;
+ cnt->hits = 0;
+ cnt->bytes = 0;
+ /* Create counter with Verbs. */
+ ret = flow_verbs_counter_create(dev, cnt);
+ if (!ret) {
+ LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
+ return cnt;
+ }
+ /* Some error occurred in Verbs library. */
+ rte_free(cnt);
+ rte_errno = -ret;
+ return NULL;
+}
+
+/**
+ * Release a flow counter.
+ *
+ * @param[in] counter
+ * Pointer to the counter handler.
+ */
+static void
+flow_verbs_counter_release(struct mlx5_flow_counter *counter)
+{
+ if (--counter->ref_cnt == 0) {
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
+ claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
+#elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+ claim_zero(mlx5_glue->destroy_counters(counter->cs));
+#endif
+ LIST_REMOVE(counter, next);
+ rte_free(counter);
+ }
+}
+
+/**
+ * Query a flow counter via Verbs library call.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
+ struct rte_flow *flow, void *data,
+ struct rte_flow_error *error)
+{
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
+ defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+ if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
+ struct rte_flow_query_count *qc = data;
+ uint64_t counters[2] = {0, 0};
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
+ struct ibv_query_counter_set_attr query_cs_attr = {
+ .cs = flow->counter->cs,
+ .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
+ };
+ struct ibv_counter_set_data query_out = {
+ .out = counters,
+ .outlen = 2 * sizeof(uint64_t),
+ };
+ int err = mlx5_glue->query_counter_set(&query_cs_attr,
+ &query_out);
+#elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+ int err = mlx5_glue->query_counters
+ (flow->counter->cs, counters,
+ RTE_DIM(counters),
+ IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
+#endif
+ if (err)
+ return rte_flow_error_set
+ (error, err,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "cannot read counter");
+ qc->hits_set = 1;
+ qc->bytes_set = 1;
+ qc->hits = counters[0] - flow->counter->hits;
+ qc->bytes = counters[1] - flow->counter->bytes;
+ if (qc->reset) {
+ flow->counter->hits = counters[0];
+ flow->counter->bytes = counters[1];
+ }
+ return 0;
+ }
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "flow does not have counter");
+#else
+ (void)flow;
+ (void)data;
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "counters are not available");
+#endif
+}
+
+/**
+ * Add a verbs item specification into @p flow.
+ *
+ * @param[in, out] flow
+ * Pointer to flow structure.
+ * @param[in] src
+ * Create specification.
+ * @param[in] size
+ * Size in bytes of the specification to copy.
+ */
+static void
+flow_verbs_spec_add(struct mlx5_flow *flow, void *src, unsigned int size)
+{
+ struct mlx5_flow_verbs *verbs = &flow->verbs;
+
+ if (verbs->specs) {
+ void *dst;
+
+ dst = (void *)(verbs->specs + verbs->size);
+ memcpy(dst, src, size);
+ ++verbs->attr->num_of_specs;
+ }
+ verbs->size += size;
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ * Item specification.
+ * @param[in] item_flags
+ * Bit field with all detected items.
+ * @param[in, out] dev_flow
+ * Pointer to dev_flow structure.
+ */
+static void
+flow_verbs_translate_item_eth(const struct rte_flow_item *item,
+ uint64_t *item_flags,
+ struct mlx5_flow *dev_flow)
+{
+ const struct rte_flow_item_eth *spec = item->spec;
+ const struct rte_flow_item_eth *mask = item->mask;
+ const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
+ const unsigned int size = sizeof(struct ibv_flow_spec_eth);
+ struct ibv_flow_spec_eth eth = {
+ .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+ .size = size,
+ };
+
+ if (!mask)
+ mask = &rte_flow_item_eth_mask;
+ if (spec) {
+ unsigned int i;
+
+ memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+ memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+ eth.val.ether_type = spec->type;
+ memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+ memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+ eth.mask.ether_type = mask->type;
+ /* Remove unwanted bits from values. */
+ for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+ eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
+ eth.val.src_mac[i] &= eth.mask.src_mac[i];
+ }
+ eth.val.ether_type &= eth.mask.ether_type;
+ dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+ }
+ flow_verbs_spec_add(dev_flow, &eth, size);
+ *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+ MLX5_FLOW_LAYER_OUTER_L2;
+}
+
+/**
+ * Update the VLAN tag in the Verbs Ethernet specification.
+ * This function assumes that the input is valid and there is space to add
+ * the requested item.
+ *
+ * @param[in, out] attr
+ * Pointer to Verbs attributes structure.
+ * @param[in] eth
+ * Verbs structure containing the VLAN information to copy.
+ */
+static void
+flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
+ struct ibv_flow_spec_eth *eth)
+{
+ unsigned int i;
+ const enum ibv_flow_spec_type search = eth->type;
+ struct ibv_spec_header *hdr = (struct ibv_spec_header *)
+ ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
+
+ for (i = 0; i != attr->num_of_specs; ++i) {
+ if (hdr->type == search) {
+ struct ibv_flow_spec_eth *e =
+ (struct ibv_flow_spec_eth *)hdr;
+
+ e->val.vlan_tag = eth->val.vlan_tag;
+ e->mask.vlan_tag = eth->mask.vlan_tag;
+ e->val.ether_type = eth->val.ether_type;
+ e->mask.ether_type = eth->mask.ether_type;
+ break;
+ }
+ hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
+ }
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ * Item specification.
+ * @param[in, out] item_flags
+ * Bit mask that holds all detected items.
+ * @param[in, out] dev_flow
+ * Pointer to dev_flow structure.
+ */
+static void
+flow_verbs_translate_item_vlan(const struct rte_flow_item *item,
+ uint64_t *item_flags,
+ struct mlx5_flow *dev_flow)
+{
+ const struct rte_flow_item_vlan *spec = item->spec;
+ const struct rte_flow_item_vlan *mask = item->mask;
+ unsigned int size = sizeof(struct ibv_flow_spec_eth);
+ const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
+ struct ibv_flow_spec_eth eth = {
+ .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+ .size = size,
+ };
+ const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+ MLX5_FLOW_LAYER_OUTER_L2;
+
+ if (!mask)
+ mask = &rte_flow_item_vlan_mask;
+ if (spec) {
+ eth.val.vlan_tag = spec->tci;
+ eth.mask.vlan_tag = mask->tci;
+ eth.val.vlan_tag &= eth.mask.vlan_tag;
+ eth.val.ether_type = spec->inner_type;
+ eth.mask.ether_type = mask->inner_type;
+ eth.val.ether_type &= eth.mask.ether_type;
+ }
+ if (!(*item_flags & l2m)) {
+ dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+ flow_verbs_spec_add(dev_flow, &eth, size);
+ } else {
+ flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
+ size = 0; /* Only an update is done in eth specification. */
+ }
+ *item_flags |= tunnel ?
+ (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
+ (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ * Item specification.
+ * @param[in, out] item_flags
+ * Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ * Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_ipv4(const struct rte_flow_item *item,
+ uint64_t *item_flags,
+ struct mlx5_flow *dev_flow)
+{
+ const struct rte_flow_item_ipv4 *spec = item->spec;
+ const struct rte_flow_item_ipv4 *mask = item->mask;
+ const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
+ unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
+ struct ibv_flow_spec_ipv4_ext ipv4 = {
+ .type = IBV_FLOW_SPEC_IPV4_EXT |
+ (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+ .size = size,
+ };
+
+ if (!mask)
+ mask = &rte_flow_item_ipv4_mask;
+ *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+ MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+ if (spec) {
+ ipv4.val = (struct ibv_flow_ipv4_ext_filter){
+ .src_ip = spec->hdr.src_addr,
+ .dst_ip = spec->hdr.dst_addr,
+ .proto = spec->hdr.next_proto_id,
+ .tos = spec->hdr.type_of_service,
+ };
+ ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
+ .src_ip = mask->hdr.src_addr,
+ .dst_ip = mask->hdr.dst_addr,
+ .proto = mask->hdr.next_proto_id,
+ .tos = mask->hdr.type_of_service,
+ };
+ /* Remove unwanted bits from values. */
+ ipv4.val.src_ip &= ipv4.mask.src_ip;
+ ipv4.val.dst_ip &= ipv4.mask.dst_ip;
+ ipv4.val.proto &= ipv4.mask.proto;
+ ipv4.val.tos &= ipv4.mask.tos;
+ }
+ dev_flow->verbs.hash_fields |=
+ mlx5_flow_hashfields_adjust(dev_flow, tunnel,
+ MLX5_IPV4_LAYER_TYPES,
+ MLX5_IPV4_IBV_RX_HASH);
+ dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
+ flow_verbs_spec_add(dev_flow, &ipv4, size);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ * Item specification.
+ * @param[in, out] item_flags
+ * Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ * Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_ipv6(const struct rte_flow_item *item,
+ uint64_t *item_flags,
+ struct mlx5_flow *dev_flow)
+{
+ const struct rte_flow_item_ipv6 *spec = item->spec;
+ const struct rte_flow_item_ipv6 *mask = item->mask;
+ const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+ unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
+ struct ibv_flow_spec_ipv6 ipv6 = {
+ .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+ .size = size,
+ };
+
+ if (!mask)
+ mask = &rte_flow_item_ipv6_mask;
+ *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+ MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+ if (spec) {
+ unsigned int i;
+ uint32_t vtc_flow_val;
+ uint32_t vtc_flow_mask;
+
+ memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
+ RTE_DIM(ipv6.val.src_ip));
+ memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
+ RTE_DIM(ipv6.val.dst_ip));
+ memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
+ RTE_DIM(ipv6.mask.src_ip));
+ memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
+ RTE_DIM(ipv6.mask.dst_ip));
+ vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
+ vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
+ ipv6.val.flow_label =
+ rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
+ IPV6_HDR_FL_SHIFT);
+ ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
+ IPV6_HDR_TC_SHIFT;
+ ipv6.val.next_hdr = spec->hdr.proto;
+ ipv6.val.hop_limit = spec->hdr.hop_limits;
+ ipv6.mask.flow_label =
+ rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
+ IPV6_HDR_FL_SHIFT);
+ ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
+ IPV6_HDR_TC_SHIFT;
+ ipv6.mask.next_hdr = mask->hdr.proto;
+ ipv6.mask.hop_limit = mask->hdr.hop_limits;
+ /* Remove unwanted bits from values. */
+ for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
+ ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
+ ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
+ }
+ ipv6.val.flow_label &= ipv6.mask.flow_label;
+ ipv6.val.traffic_class &= ipv6.mask.traffic_class;
+ ipv6.val.next_hdr &= ipv6.mask.next_hdr;
+ ipv6.val.hop_limit &= ipv6.mask.hop_limit;
+ }
+ dev_flow->verbs.hash_fields |=
+ mlx5_flow_hashfields_adjust(dev_flow, tunnel,
+ MLX5_IPV6_LAYER_TYPES,
+ MLX5_IPV6_IBV_RX_HASH);
+ dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
+ flow_verbs_spec_add(dev_flow, &ipv6, size);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ * Item specification.
+ * @param[in, out] item_flags
+ * Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ * Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_udp(const struct rte_flow_item *item,
+ uint64_t *item_flags,
+ struct mlx5_flow *dev_flow)
+{
+ const struct rte_flow_item_udp *spec = item->spec;
+ const struct rte_flow_item_udp *mask = item->mask;
+ const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
+ unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
+ struct ibv_flow_spec_tcp_udp udp = {
+ .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+ .size = size,
+ };
+
+ if (!mask)
+ mask = &rte_flow_item_udp_mask;
+ *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
+ MLX5_FLOW_LAYER_OUTER_L4_UDP;
+ if (spec) {
+ udp.val.dst_port = spec->hdr.dst_port;
+ udp.val.src_port = spec->hdr.src_port;
+ udp.mask.dst_port = mask->hdr.dst_port;
+ udp.mask.src_port = mask->hdr.src_port;
+ /* Remove unwanted bits from values. */
+ udp.val.src_port &= udp.mask.src_port;
+ udp.val.dst_port &= udp.mask.dst_port;
+ }
+ dev_flow->verbs.hash_fields |=
+ mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_UDP,
+ (IBV_RX_HASH_SRC_PORT_UDP |
+ IBV_RX_HASH_DST_PORT_UDP));
+ dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
+ flow_verbs_spec_add(dev_flow, &udp, size);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ * Item specification.
+ * @param[in, out] item_flags
+ * Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ * Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_tcp(const struct rte_flow_item *item,
+ uint64_t *item_flags,
+ struct mlx5_flow *dev_flow)
+{
+ const struct rte_flow_item_tcp *spec = item->spec;
+ const struct rte_flow_item_tcp *mask = item->mask;
+ const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+ unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
+ struct ibv_flow_spec_tcp_udp tcp = {
+ .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+ .size = size,
+ };
+
+ if (!mask)
+ mask = &rte_flow_item_tcp_mask;
+ *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
+ MLX5_FLOW_LAYER_OUTER_L4_TCP;
+ if (spec) {
+ tcp.val.dst_port = spec->hdr.dst_port;
+ tcp.val.src_port = spec->hdr.src_port;
+ tcp.mask.dst_port = mask->hdr.dst_port;
+ tcp.mask.src_port = mask->hdr.src_port;
+ /* Remove unwanted bits from values. */
+ tcp.val.src_port &= tcp.mask.src_port;
+ tcp.val.dst_port &= tcp.mask.dst_port;
+ }
+ dev_flow->verbs.hash_fields |=
+ mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_TCP,
+ (IBV_RX_HASH_SRC_PORT_TCP |
+ IBV_RX_HASH_DST_PORT_TCP));
+ dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
+ flow_verbs_spec_add(dev_flow, &tcp, size);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ * Item specification.
+ * @param[in, out] item_flags
+ * Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ * Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_vxlan(const struct rte_flow_item *item,
+ uint64_t *item_flags,
+ struct mlx5_flow *dev_flow)
+{
+ const struct rte_flow_item_vxlan *spec = item->spec;
+ const struct rte_flow_item_vxlan *mask = item->mask;
+ unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+ struct ibv_flow_spec_tunnel vxlan = {
+ .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
+ .size = size,
+ };
+ union vni {
+ uint32_t vlan_id;
+ uint8_t vni[4];
+ } id = { .vlan_id = 0, };
+
+ if (!mask)
+ mask = &rte_flow_item_vxlan_mask;
+ if (spec) {
+ memcpy(&id.vni[1], spec->vni, 3);
+ vxlan.val.tunnel_id = id.vlan_id;
+ memcpy(&id.vni[1], mask->vni, 3);
+ vxlan.mask.tunnel_id = id.vlan_id;
+ /* Remove unwanted bits from values. */
+ vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
+ }
+ flow_verbs_spec_add(dev_flow, &vxlan, size);
+ dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+ *item_flags |= MLX5_FLOW_LAYER_VXLAN;
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ * Item specification.
+ * @param[in, out] item_flags
+ * Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ * Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_vxlan_gpe(const struct rte_flow_item *item,
+ uint64_t *item_flags,
+ struct mlx5_flow *dev_flow)
+{
+ const struct rte_flow_item_vxlan_gpe *spec = item->spec;
+ const struct rte_flow_item_vxlan_gpe *mask = item->mask;
+ unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+ struct ibv_flow_spec_tunnel vxlan_gpe = {
+ .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
+ .size = size,
+ };
+ union vni {
+ uint32_t vlan_id;
+ uint8_t vni[4];
+ } id = { .vlan_id = 0, };
+
+ if (!mask)
+ mask = &rte_flow_item_vxlan_gpe_mask;
+ if (spec) {
+ memcpy(&id.vni[1], spec->vni, 3);
+ vxlan_gpe.val.tunnel_id = id.vlan_id;
+ memcpy(&id.vni[1], mask->vni, 3);
+ vxlan_gpe.mask.tunnel_id = id.vlan_id;
+ /* Remove unwanted bits from values. */
+ vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
+ }
+ flow_verbs_spec_add(dev_flow, &vxlan_gpe, size);
+ dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+ *item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
+}
+
+/**
+ * Update the protocol in Verbs IPv4/IPv6 spec.
+ *
+ * @param[in, out] attr
+ * Pointer to Verbs attributes structure.
+ * @param[in] search
+ * Specification type to search in order to update the IP protocol.
+ * @param[in] protocol
+ * Protocol value to set if none is present in the specification.
+ */
+static void
+flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
+ enum ibv_flow_spec_type search,
+ uint8_t protocol)
+{
+ unsigned int i;
+ struct ibv_spec_header *hdr = (struct ibv_spec_header *)
+ ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
+
+ if (!attr)
+ return;
+ for (i = 0; i != attr->num_of_specs; ++i) {
+ if (hdr->type == search) {
+ union {
+ struct ibv_flow_spec_ipv4_ext *ipv4;
+ struct ibv_flow_spec_ipv6 *ipv6;
+ } ip;
+
+ switch (search) {
+ case IBV_FLOW_SPEC_IPV4_EXT:
+ ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
+ if (!ip.ipv4->val.proto) {
+ ip.ipv4->val.proto = protocol;
+ ip.ipv4->mask.proto = 0xff;
+ }
+ break;
+ case IBV_FLOW_SPEC_IPV6:
+ ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
+ if (!ip.ipv6->val.next_hdr) {
+ ip.ipv6->val.next_hdr = protocol;
+ ip.ipv6->mask.next_hdr = 0xff;
+ }
+ break;
+ default:
+ break;
+ }
+ break;
+ }
+ hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
+ }
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ * Item specification.
+ * @param[in, out] item_flags
+ * Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ * Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_gre(const struct rte_flow_item *item __rte_unused,
+ uint64_t *item_flags,
+ struct mlx5_flow *dev_flow)
+{
+ struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
+#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
+ unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+ struct ibv_flow_spec_tunnel tunnel = {
+ .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
+ .size = size,
+ };
+#else
+ const struct rte_flow_item_gre *spec = item->spec;
+ const struct rte_flow_item_gre *mask = item->mask;
+ unsigned int size = sizeof(struct ibv_flow_spec_gre);
+ struct ibv_flow_spec_gre tunnel = {
+ .type = IBV_FLOW_SPEC_GRE,
+ .size = size,
+ };
+
+ if (!mask)
+ mask = &rte_flow_item_gre_mask;
+ if (spec) {
+ tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
+ tunnel.val.protocol = spec->protocol;
+ tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
+ tunnel.mask.protocol = mask->protocol;
+ /* Remove unwanted bits from values. */
+ tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
+ tunnel.val.protocol &= tunnel.mask.protocol;
+ tunnel.val.key &= tunnel.mask.key;
+ }
+#endif
+ if (*item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
+ flow_verbs_item_gre_ip_protocol_update(verbs->attr,
+ IBV_FLOW_SPEC_IPV4_EXT,
+ IPPROTO_GRE);
+ else
+ flow_verbs_item_gre_ip_protocol_update(verbs->attr,
+ IBV_FLOW_SPEC_IPV6,
+ IPPROTO_GRE);
+ flow_verbs_spec_add(dev_flow, &tunnel, size);
+ verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
+ *item_flags |= MLX5_FLOW_LAYER_GRE;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] item
+ * Item specification.
+ * @param[in, out] item_flags
+ * Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ * Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_mpls(const struct rte_flow_item *item __rte_unused,
+ uint64_t *action_flags __rte_unused,
+ struct mlx5_flow *dev_flow __rte_unused)
+{
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+ const struct rte_flow_item_mpls *spec = item->spec;
+ const struct rte_flow_item_mpls *mask = item->mask;
+ unsigned int size = sizeof(struct ibv_flow_spec_mpls);
+ struct ibv_flow_spec_mpls mpls = {
+ .type = IBV_FLOW_SPEC_MPLS,
+ .size = size,
+ };
+
+ if (!mask)
+ mask = &rte_flow_item_mpls_mask;
+ if (spec) {
+ memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
+ memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
+ /* Remove unwanted bits from values. */
+ mpls.val.label &= mpls.mask.label;
+ }
+ flow_verbs_spec_add(dev_flow, &mpls, size);
+ dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+ *action_flags |= MLX5_FLOW_LAYER_MPLS;
+#endif
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in, out] action_flags
+ * Pointer to the detected actions.
+ * @param[in] dev_flow
+ * Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_drop(uint64_t *action_flags,
+ struct mlx5_flow *dev_flow)
+{
+ unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
+ struct ibv_flow_spec_action_drop drop = {
+ .type = IBV_FLOW_SPEC_ACTION_DROP,
+ .size = size,
+ };
+
+ flow_verbs_spec_add(dev_flow, &drop, size);
+ *action_flags |= MLX5_FLOW_ACTION_DROP;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] action
+ * Action configuration.
+ * @param[in, out] action_flags
+ * Pointer to the detected actions.
+ * @param[in] dev_flow
+ * Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_queue(const struct rte_flow_action *action,
+ uint64_t *action_flags,
+ struct mlx5_flow *dev_flow)
+{
+ const struct rte_flow_action_queue *queue = action->conf;
+ struct rte_flow *flow = dev_flow->flow;
+
+ if (flow->queue)
+ (*flow->queue)[0] = queue->index;
+ flow->rss.queue_num = 1;
+ *action_flags |= MLX5_FLOW_ACTION_QUEUE;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] action
+ * Action configuration.
+ * @param[in, out] action_flags
+ * Pointer to the detected actions.
+ * @param[in] dev_flow
+ * Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_rss(const struct rte_flow_action *action,
+ uint64_t *action_flags,
+ struct mlx5_flow *dev_flow)
+{
+ const struct rte_flow_action_rss *rss = action->conf;
+ struct rte_flow *flow = dev_flow->flow;
+
+ if (flow->queue)
+ memcpy((*flow->queue), rss->queue,
+ rss->queue_num * sizeof(uint16_t));
+ flow->rss.queue_num = rss->queue_num;
+ memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
+ flow->rss.types = rss->types;
+ flow->rss.level = rss->level;
+ *action_flags |= MLX5_FLOW_ACTION_RSS;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] action
+ * Action configuration.
+ * @param[in, out] action_flags
+ * Pointer to the detected actions.
+ * @param[in] dev_flow
+ * Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_flag
+ (const struct rte_flow_action *action __rte_unused,
+ uint64_t *action_flags,
+ struct mlx5_flow *dev_flow)
+{
+ unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
+ struct ibv_flow_spec_action_tag tag = {
+ .type = IBV_FLOW_SPEC_ACTION_TAG,
+ .size = size,
+ .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
+ };
+ *action_flags |= MLX5_FLOW_ACTION_MARK;
+ flow_verbs_spec_add(dev_flow, &tag, size);
+}
+
+/**
+ * Update verbs specification to modify the flag to mark.
+ *
+ * @param[in, out] verbs
+ * Pointer to the mlx5_flow_verbs structure.
+ * @param[in] mark_id
+ * Mark identifier to replace the flag.
+ */
+static void
+flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
+{
+ struct ibv_spec_header *hdr;
+ int i;
+
+ if (!verbs)
+ return;
+ /* Update Verbs specification. */
+ hdr = (struct ibv_spec_header *)verbs->specs;
+ if (!hdr)
+ return;
+ for (i = 0; i != verbs->attr->num_of_specs; ++i) {
+ if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
+ struct ibv_flow_spec_action_tag *t =
+ (struct ibv_flow_spec_action_tag *)hdr;
+
+ t->tag_id = mlx5_flow_mark_set(mark_id);
+ }
+ hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
+ }
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] action
+ * Action configuration.
+ * @param[in, out] action_flags
+ * Pointer to the detected actions.
+ * @param[in] dev_flow
+ * Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_mark(const struct rte_flow_action *action,
+ uint64_t *action_flags,
+ struct mlx5_flow *dev_flow)
+{
+ const struct rte_flow_action_mark *mark = action->conf;
+ unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
+ struct ibv_flow_spec_action_tag tag = {
+ .type = IBV_FLOW_SPEC_ACTION_TAG,
+ .size = size,
+ };
+ struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
+
+ if (*action_flags & MLX5_FLOW_ACTION_FLAG) {
+ flow_verbs_mark_update(verbs, mark->id);
+ size = 0;
+ } else {
+ tag.tag_id = mlx5_flow_mark_set(mark->id);
+ flow_verbs_spec_add(dev_flow, &tag, size);
+ }
+ *action_flags |= MLX5_FLOW_ACTION_MARK;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] action
+ * Action configuration.
+ * @param[in, out] action_flags
+ * Pointer to the detected actions.
+ * @param[in] dev_flow
+ * Pointer to mlx5_flow.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 On success else a negative errno value is returned and rte_errno is set.
+ */
+static int
+flow_verbs_translate_action_count(struct rte_eth_dev *dev,
+ const struct rte_flow_action *action,
+ uint64_t *action_flags,
+ struct mlx5_flow *dev_flow,
+ struct rte_flow_error *error)
+{
+ const struct rte_flow_action_count *count = action->conf;
+ struct rte_flow *flow = dev_flow->flow;
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
+ defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+ unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
+ struct ibv_flow_spec_counter_action counter = {
+ .type = IBV_FLOW_SPEC_ACTION_COUNT,
+ .size = size,
+ };
+#endif
+
+ if (!flow->counter) {
+ flow->counter = flow_verbs_counter_new(dev, count->shared,
+ count->id);
+ if (!flow->counter)
+ return rte_flow_error_set(error, rte_errno,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ action,
+ "cannot get counter"
+ " context.");
+ }
+ *action_flags |= MLX5_FLOW_ACTION_COUNT;
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
+ counter.counter_set_handle = flow->counter->cs->handle;
+ flow_verbs_spec_add(dev_flow, &counter, size);
+#elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+ counter.counters = flow->counter->cs;
+ flow_verbs_spec_add(dev_flow, &counter, size);
+#endif
+ return 0;
+}
+
+/**
+ * Internal validation function. For validating both actions and items.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in] attr
+ * Pointer to the flow attributes.
+ * @param[in] items
+ * Pointer to the list of items.
+ * @param[in] actions
+ * Pointer to the list of actions.
+ * @param[out] error
+ * Pointer to the error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_verbs_validate(struct rte_eth_dev *dev,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ int ret;
+ uint64_t action_flags = 0;
+ uint64_t item_flags = 0;
+ int tunnel = 0;
+ uint8_t next_protocol = 0xff;
+
+ if (items == NULL)
+ return -1;
+ ret = mlx5_flow_validate_attributes(dev, attr, error);
+ if (ret < 0)
+ return ret;
+ for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+ int ret = 0;
+
+ tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+ switch (items->type) {
+ case RTE_FLOW_ITEM_TYPE_VOID:
+ break;
+ case RTE_FLOW_ITEM_TYPE_ETH:
+ ret = mlx5_flow_validate_item_eth(items, item_flags,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+ MLX5_FLOW_LAYER_OUTER_L2;
+ break;
+ case RTE_FLOW_ITEM_TYPE_VLAN:
+ ret = mlx5_flow_validate_item_vlan(items, item_flags,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
+ MLX5_FLOW_LAYER_OUTER_VLAN;
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV4:
+ ret = mlx5_flow_validate_item_ipv4(items, item_flags,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+ MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+ if (items->mask != NULL &&
+ ((const struct rte_flow_item_ipv4 *)
+ items->mask)->hdr.next_proto_id)
+ next_protocol =
+ ((const struct rte_flow_item_ipv4 *)
+ (items->spec))->hdr.next_proto_id;
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV6:
+ ret = mlx5_flow_validate_item_ipv6(items, item_flags,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+ MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+ if (items->mask != NULL &&
+ ((const struct rte_flow_item_ipv6 *)
+ items->mask)->hdr.proto)
+ next_protocol =
+ ((const struct rte_flow_item_ipv6 *)
+ items->spec)->hdr.proto;
+ break;
+ case RTE_FLOW_ITEM_TYPE_UDP:
+ ret = mlx5_flow_validate_item_udp(items, item_flags,
+ next_protocol,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
+ MLX5_FLOW_LAYER_OUTER_L4_UDP;
+ break;
+ case RTE_FLOW_ITEM_TYPE_TCP:
+ ret = mlx5_flow_validate_item_tcp
+ (items, item_flags,
+ next_protocol,
+ &rte_flow_item_tcp_mask,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
+ MLX5_FLOW_LAYER_OUTER_L4_TCP;
+ break;
+ case RTE_FLOW_ITEM_TYPE_VXLAN:
+ ret = mlx5_flow_validate_item_vxlan(items, item_flags,
+ error);
+ if (ret < 0)
+ return ret;
+ item_flags |= MLX5_FLOW_LAYER_VXLAN;
+ break;
+ case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+ ret = mlx5_flow_validate_item_vxlan_gpe(items,
+ item_flags,
+ dev, error);
+ if (ret < 0)
+ return ret;
+ item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
+ break;
+ case RTE_FLOW_ITEM_TYPE_GRE:
+ ret = mlx5_flow_validate_item_gre(items, item_flags,
+ next_protocol, error);
+ if (ret < 0)
+ return ret;
+ item_flags |= MLX5_FLOW_LAYER_GRE;
+ break;
+ case RTE_FLOW_ITEM_TYPE_MPLS:
+ ret = mlx5_flow_validate_item_mpls(items, item_flags,
+ next_protocol,
+ error);
+ if (ret < 0)
+ return ret;
+ if (next_protocol != 0xff &&
+ next_protocol != IPPROTO_MPLS)
+ return rte_flow_error_set
+ (error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ITEM, items,
+ "protocol filtering not compatible"
+ " with MPLS layer");
+ item_flags |= MLX5_FLOW_LAYER_MPLS;
+ break;
+ default:
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ NULL, "item not supported");
+ }
+ }
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+ switch (actions->type) {
+ case RTE_FLOW_ACTION_TYPE_VOID:
+ break;
+ case RTE_FLOW_ACTION_TYPE_FLAG:
+ ret = mlx5_flow_validate_action_flag(action_flags,
+ attr,
+ error);
+ if (ret < 0)
+ return ret;
+ action_flags |= MLX5_FLOW_ACTION_FLAG;
+ break;
+ case RTE_FLOW_ACTION_TYPE_MARK:
+ ret = mlx5_flow_validate_action_mark(actions,
+ action_flags,
+ attr,
+ error);
+ if (ret < 0)
+ return ret;
+ action_flags |= MLX5_FLOW_ACTION_MARK;
+ break;
+ case RTE_FLOW_ACTION_TYPE_DROP:
+ ret = mlx5_flow_validate_action_drop(action_flags,
+ attr,
+ error);
+ if (ret < 0)
+ return ret;
+ action_flags |= MLX5_FLOW_ACTION_DROP;
+ break;
+ case RTE_FLOW_ACTION_TYPE_QUEUE:
+ ret = mlx5_flow_validate_action_queue(actions,
+ action_flags, dev,
+ attr,
+ error);
+ if (ret < 0)
+ return ret;
+ action_flags |= MLX5_FLOW_ACTION_QUEUE;
+ break;
+ case RTE_FLOW_ACTION_TYPE_RSS:
+ ret = mlx5_flow_validate_action_rss(actions,
+ action_flags, dev,
+ attr,
+ error);
+ if (ret < 0)
+ return ret;
+ action_flags |= MLX5_FLOW_ACTION_RSS;
+ break;
+ case RTE_FLOW_ACTION_TYPE_COUNT:
+ ret = mlx5_flow_validate_action_count(dev, attr, error);
+ if (ret < 0)
+ return ret;
+ action_flags |= MLX5_FLOW_ACTION_COUNT;
+ break;
+ default:
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "action not supported");
+ }
+ }
+ if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
+ return rte_flow_error_set(error, EINVAL,
+ RTE_FLOW_ERROR_TYPE_ACTION, actions,
+ "no fate action is found");
+ return 0;
+}
+
+/**
+ * Calculate the required bytes that are needed for the action part of the verbs
+ * flow, in addtion returns bit-fields with all the detected action, in order to
+ * avoid another interation over the actions.
+ *
+ * @param[in] actions
+ * Pointer to the list of actions.
+ * @param[out] action_flags
+ * Pointer to the detected actions.
+ *
+ * @return
+ * The size of the memory needed for all actions.
+ */
+static int
+flow_verbs_get_actions_and_size(const struct rte_flow_action actions[],
+ uint64_t *action_flags)
+{
+ int size = 0;
+ uint64_t detected_actions = 0;
+
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+ switch (actions->type) {
+ case RTE_FLOW_ACTION_TYPE_VOID:
+ break;
+ case RTE_FLOW_ACTION_TYPE_FLAG:
+ size += sizeof(struct ibv_flow_spec_action_tag);
+ detected_actions |= MLX5_FLOW_ACTION_FLAG;
+ break;
+ case RTE_FLOW_ACTION_TYPE_MARK:
+ size += sizeof(struct ibv_flow_spec_action_tag);
+ detected_actions |= MLX5_FLOW_ACTION_MARK;
+ break;
+ case RTE_FLOW_ACTION_TYPE_DROP:
+ size += sizeof(struct ibv_flow_spec_action_drop);
+ detected_actions |= MLX5_FLOW_ACTION_DROP;
+ break;
+ case RTE_FLOW_ACTION_TYPE_QUEUE:
+ detected_actions |= MLX5_FLOW_ACTION_QUEUE;
+ break;
+ case RTE_FLOW_ACTION_TYPE_RSS:
+ detected_actions |= MLX5_FLOW_ACTION_RSS;
+ break;
+ case RTE_FLOW_ACTION_TYPE_COUNT:
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
+ defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+ size += sizeof(struct ibv_flow_spec_counter_action);
+#endif
+ detected_actions |= MLX5_FLOW_ACTION_COUNT;
+ break;
+ default:
+ break;
+ }
+ }
+ *action_flags = detected_actions;
+ return size;
+}
+
+/**
+ * Calculate the required bytes that are needed for the item part of the verbs
+ * flow, in addtion returns bit-fields with all the detected action, in order to
+ * avoid another interation over the actions.
+ *
+ * @param[in] actions
+ * Pointer to the list of items.
+ * @param[in, out] item_flags
+ * Pointer to the detected items.
+ *
+ * @return
+ * The size of the memory needed for all items.
+ */
+static int
+flow_verbs_get_items_and_size(const struct rte_flow_item items[],
+ uint64_t *item_flags)
+{
+ int size = 0;
+ uint64_t detected_items = 0;
+
+ for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+ int tunnel = !!(detected_items & MLX5_FLOW_LAYER_TUNNEL);
+
+ switch (items->type) {
+ case RTE_FLOW_ITEM_TYPE_VOID:
+ break;
+ case RTE_FLOW_ITEM_TYPE_ETH:
+ size += sizeof(struct ibv_flow_spec_eth);
+ detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+ MLX5_FLOW_LAYER_OUTER_L2;
+ break;
+ case RTE_FLOW_ITEM_TYPE_VLAN:
+ size += sizeof(struct ibv_flow_spec_eth);
+ detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
+ MLX5_FLOW_LAYER_OUTER_VLAN;
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV4:
+ size += sizeof(struct ibv_flow_spec_ipv4_ext);
+ detected_items |= tunnel ?
+ MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+ MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV6:
+ size += sizeof(struct ibv_flow_spec_ipv6);
+ detected_items |= tunnel ?
+ MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+ MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+ break;
+ case RTE_FLOW_ITEM_TYPE_UDP:
+ size += sizeof(struct ibv_flow_spec_tcp_udp);
+ detected_items |= tunnel ?
+ MLX5_FLOW_LAYER_INNER_L4_UDP :
+ MLX5_FLOW_LAYER_OUTER_L4_UDP;
+ break;
+ case RTE_FLOW_ITEM_TYPE_TCP:
+ size += sizeof(struct ibv_flow_spec_tcp_udp);
+ detected_items |= tunnel ?
+ MLX5_FLOW_LAYER_INNER_L4_TCP :
+ MLX5_FLOW_LAYER_OUTER_L4_TCP;
+ break;
+ case RTE_FLOW_ITEM_TYPE_VXLAN:
+ size += sizeof(struct ibv_flow_spec_tunnel);
+ detected_items |= MLX5_FLOW_LAYER_VXLAN;
+ break;
+ case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+ size += sizeof(struct ibv_flow_spec_tunnel);
+ detected_items |= MLX5_FLOW_LAYER_VXLAN_GPE;
+ break;
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+ case RTE_FLOW_ITEM_TYPE_GRE:
+ size += sizeof(struct ibv_flow_spec_gre);
+ detected_items |= MLX5_FLOW_LAYER_GRE;
+ break;
+ case RTE_FLOW_ITEM_TYPE_MPLS:
+ size += sizeof(struct ibv_flow_spec_mpls);
+ detected_items |= MLX5_FLOW_LAYER_MPLS;
+ break;
+#else
+ case RTE_FLOW_ITEM_TYPE_GRE:
+ size += sizeof(struct ibv_flow_spec_tunnel);
+ detected_items |= MLX5_FLOW_LAYER_TUNNEL;
+ break;
+#endif
+ default:
+ break;
+ }
+ }
+ *item_flags = detected_items;
+ return size;
+}
+
+/**
+ * Internal preparation function. Allocate mlx5_flow with the required size.
+ * The required size is calculate based on the actions and items. This function
+ * also returns the detected actions and items for later use.
+ *
+ * @param[in] attr
+ * Pointer to the flow attributes.
+ * @param[in] items
+ * Pointer to the list of items.
+ * @param[in] actions
+ * Pointer to the list of actions.
+ * @param[out] item_flags
+ * Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ * Pointer to bit mask of all actions detected.
+ * @param[out] error
+ * Pointer to the error structure.
+ *
+ * @return
+ * Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
+ * is set.
+ */
+static struct mlx5_flow *
+flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ uint64_t *item_flags,
+ uint64_t *action_flags,
+ struct rte_flow_error *error)
+{
+ uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
+ struct mlx5_flow *flow;
+
+ size += flow_verbs_get_actions_and_size(actions, action_flags);
+ size += flow_verbs_get_items_and_size(items, item_flags);
+ flow = rte_calloc(__func__, 1, size, 0);
+ if (!flow) {
+ rte_flow_error_set(error, ENOMEM,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "not enough memory to create flow");
+ return NULL;
+ }
+ flow->verbs.attr = (void *)(flow + 1);
+ flow->verbs.specs =
+ (uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
+ return flow;
+}
+
+/**
+ * Fill the flow with verb spec.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in, out] dev_flow
+ * Pointer to the mlx5 flow.
+ * @param[in] attr
+ * Pointer to the flow attributes.
+ * @param[in] items
+ * Pointer to the list of items.
+ * @param[in] actions
+ * Pointer to the list of actions.
+ * @param[out] error
+ * Pointer to the error structure.
+ *
+ * @return
+ * 0 on success, else a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_verbs_translate(struct rte_eth_dev *dev,
+ struct mlx5_flow *dev_flow,
+ const struct rte_flow_attr *attr,
+ const struct rte_flow_item items[],
+ const struct rte_flow_action actions[],
+ struct rte_flow_error *error)
+{
+ uint64_t action_flags = 0;
+ uint64_t item_flags = 0;
+ uint64_t priority = attr->priority;
+ struct priv *priv = dev->data->dev_private;
+
+ if (priority == MLX5_FLOW_PRIO_RSVD)
+ priority = priv->config.flow_prio - 1;
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+ int ret;
+ switch (actions->type) {
+ case RTE_FLOW_ACTION_TYPE_VOID:
+ break;
+ case RTE_FLOW_ACTION_TYPE_FLAG:
+ flow_verbs_translate_action_flag(actions,
+ &action_flags,
+ dev_flow);
+ break;
+ case RTE_FLOW_ACTION_TYPE_MARK:
+ flow_verbs_translate_action_mark(actions,
+ &action_flags,
+ dev_flow);
+ break;
+ case RTE_FLOW_ACTION_TYPE_DROP:
+ flow_verbs_translate_action_drop(&action_flags,
+ dev_flow);
+ break;
+ case RTE_FLOW_ACTION_TYPE_QUEUE:
+ flow_verbs_translate_action_queue(actions,
+ &action_flags,
+ dev_flow);
+ break;
+ case RTE_FLOW_ACTION_TYPE_RSS:
+ flow_verbs_translate_action_rss(actions,
+ &action_flags,
+ dev_flow);
+ break;
+ case RTE_FLOW_ACTION_TYPE_COUNT:
+ ret = flow_verbs_translate_action_count(dev,
+ actions,
+ &action_flags,
+ dev_flow,
+ error);
+ if (ret < 0)
+ return ret;
+ break;
+ default:
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "action not supported");
+ }
+ }
+ /* Device flow should have action flags by flow_drv_prepare(). */
+ assert(dev_flow->flow->actions == action_flags);
+ for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+ switch (items->type) {
+ case RTE_FLOW_ITEM_TYPE_VOID:
+ break;
+ case RTE_FLOW_ITEM_TYPE_ETH:
+ flow_verbs_translate_item_eth(items, &item_flags,
+ dev_flow);
+ break;
+ case RTE_FLOW_ITEM_TYPE_VLAN:
+ flow_verbs_translate_item_vlan(items, &item_flags,
+ dev_flow);
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV4:
+ flow_verbs_translate_item_ipv4(items, &item_flags,
+ dev_flow);
+ break;
+ case RTE_FLOW_ITEM_TYPE_IPV6:
+ flow_verbs_translate_item_ipv6(items, &item_flags,
+ dev_flow);
+ break;
+ case RTE_FLOW_ITEM_TYPE_UDP:
+ flow_verbs_translate_item_udp(items, &item_flags,
+ dev_flow);
+ break;
+ case RTE_FLOW_ITEM_TYPE_TCP:
+ flow_verbs_translate_item_tcp(items, &item_flags,
+ dev_flow);
+ break;
+ case RTE_FLOW_ITEM_TYPE_VXLAN:
+ flow_verbs_translate_item_vxlan(items, &item_flags,
+ dev_flow);
+ break;
+ case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+ flow_verbs_translate_item_vxlan_gpe(items, &item_flags,
+ dev_flow);
+ break;
+ case RTE_FLOW_ITEM_TYPE_GRE:
+ flow_verbs_translate_item_gre(items, &item_flags,
+ dev_flow);
+ break;
+ case RTE_FLOW_ITEM_TYPE_MPLS:
+ flow_verbs_translate_item_mpls(items, &item_flags,
+ dev_flow);
+ break;
+ default:
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ITEM,
+ NULL,
+ "item not supported");
+ }
+ }
+ dev_flow->verbs.attr->priority =
+ mlx5_flow_adjust_priority(dev, priority,
+ dev_flow->verbs.attr->priority);
+ return 0;
+}
+
+/**
+ * Remove the flow from the NIC but keeps it in memory.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ * Pointer to flow structure.
+ */
+static void
+flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+ struct mlx5_flow_verbs *verbs;
+ struct mlx5_flow *dev_flow;
+
+ if (!flow)
+ return;
+ LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+ verbs = &dev_flow->verbs;
+ if (verbs->flow) {
+ claim_zero(mlx5_glue->destroy_flow(verbs->flow));
+ verbs->flow = NULL;
+ }
+ if (verbs->hrxq) {
+ if (flow->actions & MLX5_FLOW_ACTION_DROP)
+ mlx5_hrxq_drop_release(dev);
+ else
+ mlx5_hrxq_release(dev, verbs->hrxq);
+ verbs->hrxq = NULL;
+ }
+ }
+ if (flow->counter) {
+ flow_verbs_counter_release(flow->counter);
+ flow->counter = NULL;
+ }
+}
+
+/**
+ * Remove the flow from the NIC and the memory.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ * Pointer to flow structure.
+ */
+static void
+flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+ struct mlx5_flow *dev_flow;
+
+ if (!flow)
+ return;
+ flow_verbs_remove(dev, flow);
+ while (!LIST_EMPTY(&flow->dev_flows)) {
+ dev_flow = LIST_FIRST(&flow->dev_flows);
+ LIST_REMOVE(dev_flow, next);
+ rte_free(dev_flow);
+ }
+}
+
+/**
+ * Apply the flow to the NIC.
+ *
+ * @param[in] dev
+ * Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ * Pointer to flow structure.
+ * @param[out] error
+ * Pointer to error structure.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+ struct rte_flow_error *error)
+{
+ struct mlx5_flow_verbs *verbs;
+ struct mlx5_flow *dev_flow;
+ int err;
+
+ LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+ verbs = &dev_flow->verbs;
+ if (flow->actions & MLX5_FLOW_ACTION_DROP) {
+ verbs->hrxq = mlx5_hrxq_drop_new(dev);
+ if (!verbs->hrxq) {
+ rte_flow_error_set
+ (error, errno,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "cannot get drop hash queue");
+ goto error;
+ }
+ } else {
+ struct mlx5_hrxq *hrxq;
+
+ hrxq = mlx5_hrxq_get(dev, flow->key,
+ MLX5_RSS_HASH_KEY_LEN,
+ verbs->hash_fields,
+ (*flow->queue),
+ flow->rss.queue_num);
+ if (!hrxq)
+ hrxq = mlx5_hrxq_new(dev, flow->key,
+ MLX5_RSS_HASH_KEY_LEN,
+ verbs->hash_fields,
+ (*flow->queue),
+ flow->rss.queue_num,
+ !!(dev_flow->layers &
+ MLX5_FLOW_LAYER_TUNNEL));
+ if (!hrxq) {
+ rte_flow_error_set
+ (error, rte_errno,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "cannot get hash queue");
+ goto error;
+ }
+ verbs->hrxq = hrxq;
+ }
+ verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
+ verbs->attr);
+ if (!verbs->flow) {
+ rte_flow_error_set(error, errno,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL,
+ "hardware refuses to create flow");
+ goto error;
+ }
+ }
+ return 0;
+error:
+ err = rte_errno; /* Save rte_errno before cleanup. */
+ LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+ verbs = &dev_flow->verbs;
+ if (verbs->hrxq) {
+ if (flow->actions & MLX5_FLOW_ACTION_DROP)
+ mlx5_hrxq_drop_release(dev);
+ else
+ mlx5_hrxq_release(dev, verbs->hrxq);
+ verbs->hrxq = NULL;
+ }
+ }
+ rte_errno = err; /* Restore rte_errno. */
+ return -rte_errno;
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_verbs_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ const struct rte_flow_action *actions,
+ void *data,
+ struct rte_flow_error *error)
+{
+ int ret = -EINVAL;
+
+ for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+ switch (actions->type) {
+ case RTE_FLOW_ACTION_TYPE_VOID:
+ break;
+ case RTE_FLOW_ACTION_TYPE_COUNT:
+ ret = flow_verbs_counter_query(dev, flow, data, error);
+ break;
+ default:
+ return rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_ACTION,
+ actions,
+ "action not supported");
+ }
+ }
+ return ret;
+}
+
+const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
+ .validate = flow_verbs_validate,
+ .prepare = flow_verbs_prepare,
+ .translate = flow_verbs_translate,
+ .apply = flow_verbs_apply,
+ .remove = flow_verbs_remove,
+ .destroy = flow_verbs_destroy,
+ .query = flow_verbs_query,
+};
diff --git a/drivers/net/mlx5/mlx5_glue.c b/drivers/net/mlx5/mlx5_glue.c
index 84f9492a..1afb114f 100644
--- a/drivers/net/mlx5/mlx5_glue.c
+++ b/drivers/net/mlx5/mlx5_glue.c
@@ -215,7 +215,7 @@ static struct ibv_counter_set *
mlx5_glue_create_counter_set(struct ibv_context *context,
struct ibv_counter_set_init_attr *init_attr)
{
-#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V42
(void)context;
(void)init_attr;
return NULL;
@@ -227,7 +227,7 @@ mlx5_glue_create_counter_set(struct ibv_context *context,
static int
mlx5_glue_destroy_counter_set(struct ibv_counter_set *cs)
{
-#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V42
(void)cs;
return ENOTSUP;
#else
@@ -240,7 +240,7 @@ mlx5_glue_describe_counter_set(struct ibv_context *context,
uint16_t counter_set_id,
struct ibv_counter_set_description *cs_desc)
{
-#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V42
(void)context;
(void)counter_set_id;
(void)cs_desc;
@@ -254,7 +254,7 @@ static int
mlx5_glue_query_counter_set(struct ibv_query_counter_set_attr *query_attr,
struct ibv_counter_set_data *cs_data)
{
-#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V42
(void)query_attr;
(void)cs_data;
return ENOTSUP;
@@ -263,6 +263,62 @@ mlx5_glue_query_counter_set(struct ibv_query_counter_set_attr *query_attr,
#endif
}
+static struct ibv_counters *
+mlx5_glue_create_counters(struct ibv_context *context,
+ struct ibv_counters_init_attr *init_attr)
+{
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V45
+ (void)context;
+ (void)init_attr;
+ return NULL;
+#else
+ return ibv_create_counters(context, init_attr);
+#endif
+}
+
+static int
+mlx5_glue_destroy_counters(struct ibv_counters *counters)
+{
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V45
+ (void)counters;
+ return ENOTSUP;
+#else
+ return ibv_destroy_counters(counters);
+#endif
+}
+
+static int
+mlx5_glue_attach_counters(struct ibv_counters *counters,
+ struct ibv_counter_attach_attr *attr,
+ struct ibv_flow *flow)
+{
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V45
+ (void)counters;
+ (void)attr;
+ (void)flow;
+ return ENOTSUP;
+#else
+ return ibv_attach_counters_point_flow(counters, attr, flow);
+#endif
+}
+
+static int
+mlx5_glue_query_counters(struct ibv_counters *counters,
+ uint64_t *counters_value,
+ uint32_t ncounters,
+ uint32_t flags)
+{
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V45
+ (void)counters;
+ (void)counters_value;
+ (void)ncounters;
+ (void)flags;
+ return ENOTSUP;
+#else
+ return ibv_read_counters(counters, counters_value, ncounters, flags);
+#endif
+}
+
static void
mlx5_glue_ack_async_event(struct ibv_async_event *event)
{
@@ -346,6 +402,48 @@ mlx5_glue_dv_create_qp(struct ibv_context *context,
#endif
}
+static struct mlx5dv_flow_matcher *
+mlx5_glue_dv_create_flow_matcher(struct ibv_context *context,
+ struct mlx5dv_flow_matcher_attr *matcher_attr)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ return mlx5dv_create_flow_matcher(context, matcher_attr);
+#else
+ (void)context;
+ (void)matcher_attr;
+ return NULL;
+#endif
+}
+
+static struct ibv_flow *
+mlx5_glue_dv_create_flow(struct mlx5dv_flow_matcher *matcher,
+ struct mlx5dv_flow_match_parameters *match_value,
+ size_t num_actions,
+ struct mlx5dv_flow_action_attr *actions_attr)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ return mlx5dv_create_flow(matcher, match_value,
+ num_actions, actions_attr);
+#else
+ (void)matcher;
+ (void)match_value;
+ (void)num_actions;
+ (void)actions_attr;
+ return NULL;
+#endif
+}
+
+static int
+mlx5_glue_dv_destroy_flow_matcher(struct mlx5dv_flow_matcher *matcher)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ return mlx5dv_destroy_flow_matcher(matcher);
+#else
+ (void)matcher;
+ return 0;
+#endif
+}
+
alignas(RTE_CACHE_LINE_SIZE)
const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
.version = MLX5_GLUE_VERSION,
@@ -382,6 +480,10 @@ const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
.destroy_counter_set = mlx5_glue_destroy_counter_set,
.describe_counter_set = mlx5_glue_describe_counter_set,
.query_counter_set = mlx5_glue_query_counter_set,
+ .create_counters = mlx5_glue_create_counters,
+ .destroy_counters = mlx5_glue_destroy_counters,
+ .attach_counters = mlx5_glue_attach_counters,
+ .query_counters = mlx5_glue_query_counters,
.ack_async_event = mlx5_glue_ack_async_event,
.get_async_event = mlx5_glue_get_async_event,
.port_state_str = mlx5_glue_port_state_str,
@@ -392,4 +494,7 @@ const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
.dv_set_context_attr = mlx5_glue_dv_set_context_attr,
.dv_init_obj = mlx5_glue_dv_init_obj,
.dv_create_qp = mlx5_glue_dv_create_qp,
+ .dv_create_flow_matcher = mlx5_glue_dv_create_flow_matcher,
+ .dv_destroy_flow_matcher = mlx5_glue_dv_destroy_flow_matcher,
+ .dv_create_flow = mlx5_glue_dv_create_flow,
};
diff --git a/drivers/net/mlx5/mlx5_glue.h b/drivers/net/mlx5/mlx5_glue.h
index e584d367..44bfefed 100644
--- a/drivers/net/mlx5/mlx5_glue.h
+++ b/drivers/net/mlx5/mlx5_glue.h
@@ -23,7 +23,7 @@
#define MLX5_GLUE_VERSION ""
#endif
-#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V42
struct ibv_counter_set;
struct ibv_counter_set_data;
struct ibv_counter_set_description;
@@ -31,6 +31,12 @@ struct ibv_counter_set_init_attr;
struct ibv_query_counter_set_attr;
#endif
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V45
+struct ibv_counters;
+struct ibv_counters_init_attr;
+struct ibv_counter_attach_attr;
+#endif
+
#ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
struct mlx5dv_qp_init_attr;
#endif
@@ -39,6 +45,13 @@ struct mlx5dv_qp_init_attr;
struct mlx5dv_wq_init_attr;
#endif
+#ifndef HAVE_IBV_FLOW_DV_SUPPORT
+struct mlx5dv_flow_matcher;
+struct mlx5dv_flow_matcher_attr;
+struct mlx5dv_flow_action_attr;
+struct mlx5dv_flow_match_parameters;
+#endif
+
/* LIB_GLUE_VERSION must be updated every time this structure is modified. */
struct mlx5_glue {
const char *version;
@@ -99,6 +112,17 @@ struct mlx5_glue {
struct ibv_counter_set_description *cs_desc);
int (*query_counter_set)(struct ibv_query_counter_set_attr *query_attr,
struct ibv_counter_set_data *cs_data);
+ struct ibv_counters *(*create_counters)
+ (struct ibv_context *context,
+ struct ibv_counters_init_attr *init_attr);
+ int (*destroy_counters)(struct ibv_counters *counters);
+ int (*attach_counters)(struct ibv_counters *counters,
+ struct ibv_counter_attach_attr *attr,
+ struct ibv_flow *flow);
+ int (*query_counters)(struct ibv_counters *counters,
+ uint64_t *counters_value,
+ uint32_t ncounters,
+ uint32_t flags);
void (*ack_async_event)(struct ibv_async_event *event);
int (*get_async_event)(struct ibv_context *context,
struct ibv_async_event *event);
@@ -122,6 +146,14 @@ struct mlx5_glue {
(struct ibv_context *context,
struct ibv_qp_init_attr_ex *qp_init_attr_ex,
struct mlx5dv_qp_init_attr *dv_qp_init_attr);
+ struct mlx5dv_flow_matcher *(*dv_create_flow_matcher)
+ (struct ibv_context *context,
+ struct mlx5dv_flow_matcher_attr *matcher_attr);
+ int (*dv_destroy_flow_matcher)(struct mlx5dv_flow_matcher *matcher);
+ struct ibv_flow *(*dv_create_flow)(struct mlx5dv_flow_matcher *matcher,
+ struct mlx5dv_flow_match_parameters *match_value,
+ size_t num_actions,
+ struct mlx5dv_flow_action_attr *actions_attr);
};
const struct mlx5_glue *mlx5_glue;
diff --git a/drivers/net/mlx5/mlx5_mac.c b/drivers/net/mlx5/mlx5_mac.c
index 12ee37f5..672a4761 100644
--- a/drivers/net/mlx5/mlx5_mac.c
+++ b/drivers/net/mlx5/mlx5_mac.c
@@ -49,7 +49,7 @@ mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[ETHER_ADDR_LEN])
struct ifreq request;
int ret;
- ret = mlx5_ifreq(dev, SIOCGIFHWADDR, &request, 0);
+ ret = mlx5_ifreq(dev, SIOCGIFHWADDR, &request);
if (ret)
return ret;
memcpy(mac, request.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 1d1bcb5f..f4b15d3f 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -277,6 +277,23 @@ mr_find_next_chunk(struct mlx5_mr *mr, struct mlx5_mr_cache *entry,
uintptr_t end = 0;
uint32_t idx = 0;
+ /* MR for external memory doesn't have memseg list. */
+ if (mr->msl == NULL) {
+ struct ibv_mr *ibv_mr = mr->ibv_mr;
+
+ assert(mr->ms_bmp_n == 1);
+ assert(mr->ms_n == 1);
+ assert(base_idx == 0);
+ /*
+ * Can't search it from memseg list but get it directly from
+ * verbs MR as there's only one chunk.
+ */
+ entry->start = (uintptr_t)ibv_mr->addr;
+ entry->end = (uintptr_t)ibv_mr->addr + mr->ibv_mr->length;
+ entry->lkey = rte_cpu_to_be_32(mr->ibv_mr->lkey);
+ /* Returning 1 ends iteration. */
+ return 1;
+ }
for (idx = base_idx; idx < mr->ms_bmp_n; ++idx) {
if (rte_bitmap_get(mr->ms_bmp, idx)) {
const struct rte_memseg_list *msl;
@@ -811,6 +828,7 @@ mlx5_mr_mem_event_free_cb(struct rte_eth_dev *dev, const void *addr, size_t len)
mr = mr_lookup_dev_list(dev, &entry, start);
if (mr == NULL)
continue;
+ assert(mr->msl); /* Can't be external memory. */
ms = rte_mem_virt2memseg((void *)start, msl);
assert(ms != NULL);
assert(msl->page_sz == ms->hugepage_sz);
@@ -1061,6 +1079,139 @@ mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl)
(void *)mr_ctrl, mr_ctrl->cur_gen);
}
+/**
+ * Called during rte_mempool_mem_iter() by mlx5_mr_update_ext_mp().
+ *
+ * Externally allocated chunk is registered and a MR is created for the chunk.
+ * The MR object is added to the global list. If memseg list of a MR object
+ * (mr->msl) is null, the MR object can be regarded as externally allocated
+ * memory.
+ *
+ * Once external memory is registered, it should be static. If the memory is
+ * freed and the virtual address range has different physical memory mapped
+ * again, it may cause crash on device due to the wrong translation entry. PMD
+ * can't track the free event of the external memory for now.
+ */
+static void
+mlx5_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque,
+ struct rte_mempool_memhdr *memhdr,
+ unsigned mem_idx __rte_unused)
+{
+ struct mr_update_mp_data *data = opaque;
+ struct rte_eth_dev *dev = data->dev;
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_mr_ctrl *mr_ctrl = data->mr_ctrl;
+ struct mlx5_mr *mr = NULL;
+ uintptr_t addr = (uintptr_t)memhdr->addr;
+ size_t len = memhdr->len;
+ struct mlx5_mr_cache entry;
+ uint32_t lkey;
+
+ /* If already registered, it should return. */
+ rte_rwlock_read_lock(&priv->mr.rwlock);
+ lkey = mr_lookup_dev(dev, &entry, addr);
+ rte_rwlock_read_unlock(&priv->mr.rwlock);
+ if (lkey != UINT32_MAX)
+ return;
+ mr = rte_zmalloc_socket(NULL,
+ RTE_ALIGN_CEIL(sizeof(*mr),
+ RTE_CACHE_LINE_SIZE),
+ RTE_CACHE_LINE_SIZE, mp->socket_id);
+ if (mr == NULL) {
+ DRV_LOG(WARNING,
+ "port %u unable to allocate memory for a new MR of"
+ " mempool (%s).",
+ dev->data->port_id, mp->name);
+ data->ret = -1;
+ return;
+ }
+ DRV_LOG(DEBUG, "port %u register MR for chunk #%d of mempool (%s)",
+ dev->data->port_id, mem_idx, mp->name);
+ mr->ibv_mr = mlx5_glue->reg_mr(priv->pd, (void *)addr, len,
+ IBV_ACCESS_LOCAL_WRITE);
+ if (mr->ibv_mr == NULL) {
+ DRV_LOG(WARNING,
+ "port %u fail to create a verbs MR for address (%p)",
+ dev->data->port_id, (void *)addr);
+ rte_free(mr);
+ data->ret = -1;
+ return;
+ }
+ mr->msl = NULL; /* Mark it is external memory. */
+ mr->ms_bmp = NULL;
+ mr->ms_n = 1;
+ mr->ms_bmp_n = 1;
+ rte_rwlock_write_lock(&priv->mr.rwlock);
+ LIST_INSERT_HEAD(&priv->mr.mr_list, mr, mr);
+ DRV_LOG(DEBUG,
+ "port %u MR CREATED (%p) for external memory %p:\n"
+ " [0x%" PRIxPTR ", 0x%" PRIxPTR "),"
+ " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u",
+ dev->data->port_id, (void *)mr, (void *)addr,
+ addr, addr + len, rte_cpu_to_be_32(mr->ibv_mr->lkey),
+ mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n);
+ /* Insert to the global cache table. */
+ mr_insert_dev_cache(dev, mr);
+ rte_rwlock_write_unlock(&priv->mr.rwlock);
+ /* Insert to the local cache table */
+ mlx5_mr_addr2mr_bh(dev, mr_ctrl, addr);
+}
+
+/**
+ * Register MR for entire memory chunks in a Mempool having externally allocated
+ * memory and fill in local cache.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param mr_ctrl
+ * Pointer to per-queue MR control structure.
+ * @param mp
+ * Pointer to registering Mempool.
+ *
+ * @return
+ * 0 on success, -1 on failure.
+ */
+static uint32_t
+mlx5_mr_update_ext_mp(struct rte_eth_dev *dev, struct mlx5_mr_ctrl *mr_ctrl,
+ struct rte_mempool *mp)
+{
+ struct mr_update_mp_data data = {
+ .dev = dev,
+ .mr_ctrl = mr_ctrl,
+ .ret = 0,
+ };
+
+ rte_mempool_mem_iter(mp, mlx5_mr_update_ext_mp_cb, &data);
+ return data.ret;
+}
+
+/**
+ * Register MR entire memory chunks in a Mempool having externally allocated
+ * memory and search LKey of the address to return.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param addr
+ * Search key.
+ * @param mp
+ * Pointer to registering Mempool where addr belongs.
+ *
+ * @return
+ * LKey for address on success, UINT32_MAX on failure.
+ */
+uint32_t
+mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr,
+ struct rte_mempool *mp)
+{
+ struct mlx5_txq_ctrl *txq_ctrl =
+ container_of(txq, struct mlx5_txq_ctrl, txq);
+ struct mlx5_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
+ struct priv *priv = txq_ctrl->priv;
+
+ mlx5_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
+ return mlx5_tx_addr2mr_bh(txq, addr);
+}
+
/* Called during rte_mempool_mem_iter() by mlx5_mr_update_mp(). */
static void
mlx5_mr_update_mp_cb(struct rte_mempool *mp __rte_unused, void *opaque,
@@ -1104,6 +1255,10 @@ mlx5_mr_update_mp(struct rte_eth_dev *dev, struct mlx5_mr_ctrl *mr_ctrl,
};
rte_mempool_mem_iter(mp, mlx5_mr_update_mp_cb, &data);
+ if (data.ret < 0 && rte_errno == ENXIO) {
+ /* Mempool may have externally allocated memory. */
+ return mlx5_mr_update_ext_mp(dev, mr_ctrl, mp);
+ }
return data.ret;
}
diff --git a/drivers/net/mlx5/mlx5_nl_flow.c b/drivers/net/mlx5/mlx5_nl_flow.c
deleted file mode 100644
index a1c8c340..00000000
--- a/drivers/net/mlx5/mlx5_nl_flow.c
+++ /dev/null
@@ -1,1248 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018 6WIND S.A.
- * Copyright 2018 Mellanox Technologies, Ltd
- */
-
-#include <assert.h>
-#include <errno.h>
-#include <libmnl/libmnl.h>
-#include <linux/if_ether.h>
-#include <linux/netlink.h>
-#include <linux/pkt_cls.h>
-#include <linux/pkt_sched.h>
-#include <linux/rtnetlink.h>
-#include <linux/tc_act/tc_gact.h>
-#include <linux/tc_act/tc_mirred.h>
-#include <netinet/in.h>
-#include <stdalign.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-
-#include <rte_byteorder.h>
-#include <rte_errno.h>
-#include <rte_ether.h>
-#include <rte_flow.h>
-
-#include "mlx5.h"
-#include "mlx5_autoconf.h"
-
-#ifdef HAVE_TC_ACT_VLAN
-
-#include <linux/tc_act/tc_vlan.h>
-
-#else /* HAVE_TC_ACT_VLAN */
-
-#define TCA_VLAN_ACT_POP 1
-#define TCA_VLAN_ACT_PUSH 2
-#define TCA_VLAN_ACT_MODIFY 3
-#define TCA_VLAN_PARMS 2
-#define TCA_VLAN_PUSH_VLAN_ID 3
-#define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
-#define TCA_VLAN_PAD 5
-#define TCA_VLAN_PUSH_VLAN_PRIORITY 6
-
-struct tc_vlan {
- tc_gen;
- int v_action;
-};
-
-#endif /* HAVE_TC_ACT_VLAN */
-
-/* Normally found in linux/netlink.h. */
-#ifndef NETLINK_CAP_ACK
-#define NETLINK_CAP_ACK 10
-#endif
-
-/* Normally found in linux/pkt_sched.h. */
-#ifndef TC_H_MIN_INGRESS
-#define TC_H_MIN_INGRESS 0xfff2u
-#endif
-
-/* Normally found in linux/pkt_cls.h. */
-#ifndef TCA_CLS_FLAGS_SKIP_SW
-#define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
-#endif
-#ifndef HAVE_TCA_FLOWER_ACT
-#define TCA_FLOWER_ACT 3
-#endif
-#ifndef HAVE_TCA_FLOWER_FLAGS
-#define TCA_FLOWER_FLAGS 22
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
-#define TCA_FLOWER_KEY_ETH_TYPE 8
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
-#define TCA_FLOWER_KEY_ETH_DST 4
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
-#define TCA_FLOWER_KEY_ETH_DST_MASK 5
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
-#define TCA_FLOWER_KEY_ETH_SRC 6
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
-#define TCA_FLOWER_KEY_ETH_SRC_MASK 7
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
-#define TCA_FLOWER_KEY_IP_PROTO 9
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
-#define TCA_FLOWER_KEY_IPV4_SRC 10
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
-#define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
-#define TCA_FLOWER_KEY_IPV4_DST 12
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
-#define TCA_FLOWER_KEY_IPV4_DST_MASK 13
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
-#define TCA_FLOWER_KEY_IPV6_SRC 14
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
-#define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
-#define TCA_FLOWER_KEY_IPV6_DST 16
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
-#define TCA_FLOWER_KEY_IPV6_DST_MASK 17
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
-#define TCA_FLOWER_KEY_TCP_SRC 18
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
-#define TCA_FLOWER_KEY_TCP_SRC_MASK 35
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
-#define TCA_FLOWER_KEY_TCP_DST 19
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
-#define TCA_FLOWER_KEY_TCP_DST_MASK 36
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
-#define TCA_FLOWER_KEY_UDP_SRC 20
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
-#define TCA_FLOWER_KEY_UDP_SRC_MASK 37
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
-#define TCA_FLOWER_KEY_UDP_DST 21
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
-#define TCA_FLOWER_KEY_UDP_DST_MASK 38
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
-#define TCA_FLOWER_KEY_VLAN_ID 23
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
-#define TCA_FLOWER_KEY_VLAN_PRIO 24
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
-#define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
-#endif
-
-/** Parser state definitions for mlx5_nl_flow_trans[]. */
-enum mlx5_nl_flow_trans {
- INVALID,
- BACK,
- ATTR,
- PATTERN,
- ITEM_VOID,
- ITEM_PORT_ID,
- ITEM_ETH,
- ITEM_VLAN,
- ITEM_IPV4,
- ITEM_IPV6,
- ITEM_TCP,
- ITEM_UDP,
- ACTIONS,
- ACTION_VOID,
- ACTION_PORT_ID,
- ACTION_DROP,
- ACTION_OF_POP_VLAN,
- ACTION_OF_PUSH_VLAN,
- ACTION_OF_SET_VLAN_VID,
- ACTION_OF_SET_VLAN_PCP,
- END,
-};
-
-#define TRANS(...) (const enum mlx5_nl_flow_trans []){ __VA_ARGS__, INVALID, }
-
-#define PATTERN_COMMON \
- ITEM_VOID, ITEM_PORT_ID, ACTIONS
-#define ACTIONS_COMMON \
- ACTION_VOID, ACTION_OF_POP_VLAN, ACTION_OF_PUSH_VLAN, \
- ACTION_OF_SET_VLAN_VID, ACTION_OF_SET_VLAN_PCP
-#define ACTIONS_FATE \
- ACTION_PORT_ID, ACTION_DROP
-
-/** Parser state transitions used by mlx5_nl_flow_transpose(). */
-static const enum mlx5_nl_flow_trans *const mlx5_nl_flow_trans[] = {
- [INVALID] = NULL,
- [BACK] = NULL,
- [ATTR] = TRANS(PATTERN),
- [PATTERN] = TRANS(ITEM_ETH, PATTERN_COMMON),
- [ITEM_VOID] = TRANS(BACK),
- [ITEM_PORT_ID] = TRANS(BACK),
- [ITEM_ETH] = TRANS(ITEM_IPV4, ITEM_IPV6, ITEM_VLAN, PATTERN_COMMON),
- [ITEM_VLAN] = TRANS(ITEM_IPV4, ITEM_IPV6, PATTERN_COMMON),
- [ITEM_IPV4] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
- [ITEM_IPV6] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
- [ITEM_TCP] = TRANS(PATTERN_COMMON),
- [ITEM_UDP] = TRANS(PATTERN_COMMON),
- [ACTIONS] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
- [ACTION_VOID] = TRANS(BACK),
- [ACTION_PORT_ID] = TRANS(ACTION_VOID, END),
- [ACTION_DROP] = TRANS(ACTION_VOID, END),
- [ACTION_OF_POP_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
- [ACTION_OF_PUSH_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
- [ACTION_OF_SET_VLAN_VID] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
- [ACTION_OF_SET_VLAN_PCP] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
- [END] = NULL,
-};
-
-/** Empty masks for known item types. */
-static const union {
- struct rte_flow_item_port_id port_id;
- struct rte_flow_item_eth eth;
- struct rte_flow_item_vlan vlan;
- struct rte_flow_item_ipv4 ipv4;
- struct rte_flow_item_ipv6 ipv6;
- struct rte_flow_item_tcp tcp;
- struct rte_flow_item_udp udp;
-} mlx5_nl_flow_mask_empty;
-
-/** Supported masks for known item types. */
-static const struct {
- struct rte_flow_item_port_id port_id;
- struct rte_flow_item_eth eth;
- struct rte_flow_item_vlan vlan;
- struct rte_flow_item_ipv4 ipv4;
- struct rte_flow_item_ipv6 ipv6;
- struct rte_flow_item_tcp tcp;
- struct rte_flow_item_udp udp;
-} mlx5_nl_flow_mask_supported = {
- .port_id = {
- .id = 0xffffffff,
- },
- .eth = {
- .type = RTE_BE16(0xffff),
- .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
- .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
- },
- .vlan = {
- /* PCP and VID only, no DEI. */
- .tci = RTE_BE16(0xefff),
- .inner_type = RTE_BE16(0xffff),
- },
- .ipv4.hdr = {
- .next_proto_id = 0xff,
- .src_addr = RTE_BE32(0xffffffff),
- .dst_addr = RTE_BE32(0xffffffff),
- },
- .ipv6.hdr = {
- .proto = 0xff,
- .src_addr =
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff",
- .dst_addr =
- "\xff\xff\xff\xff\xff\xff\xff\xff"
- "\xff\xff\xff\xff\xff\xff\xff\xff",
- },
- .tcp.hdr = {
- .src_port = RTE_BE16(0xffff),
- .dst_port = RTE_BE16(0xffff),
- },
- .udp.hdr = {
- .src_port = RTE_BE16(0xffff),
- .dst_port = RTE_BE16(0xffff),
- },
-};
-
-/**
- * Retrieve mask for pattern item.
- *
- * This function does basic sanity checks on a pattern item in order to
- * return the most appropriate mask for it.
- *
- * @param[in] item
- * Item specification.
- * @param[in] mask_default
- * Default mask for pattern item as specified by the flow API.
- * @param[in] mask_supported
- * Mask fields supported by the implementation.
- * @param[in] mask_empty
- * Empty mask to return when there is no specification.
- * @param[out] error
- * Perform verbose error reporting if not NULL.
- *
- * @return
- * Either @p item->mask or one of the mask parameters on success, NULL
- * otherwise and rte_errno is set.
- */
-static const void *
-mlx5_nl_flow_item_mask(const struct rte_flow_item *item,
- const void *mask_default,
- const void *mask_supported,
- const void *mask_empty,
- size_t mask_size,
- struct rte_flow_error *error)
-{
- const uint8_t *mask;
- size_t i;
-
- /* item->last and item->mask cannot exist without item->spec. */
- if (!item->spec && (item->mask || item->last)) {
- rte_flow_error_set
- (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
- "\"mask\" or \"last\" field provided without a"
- " corresponding \"spec\"");
- return NULL;
- }
- /* No spec, no mask, no problem. */
- if (!item->spec)
- return mask_empty;
- mask = item->mask ? item->mask : mask_default;
- assert(mask);
- /*
- * Single-pass check to make sure that:
- * - Mask is supported, no bits are set outside mask_supported.
- * - Both item->spec and item->last are included in mask.
- */
- for (i = 0; i != mask_size; ++i) {
- if (!mask[i])
- continue;
- if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
- ((const uint8_t *)mask_supported)[i]) {
- rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
- mask, "unsupported field found in \"mask\"");
- return NULL;
- }
- if (item->last &&
- (((const uint8_t *)item->spec)[i] & mask[i]) !=
- (((const uint8_t *)item->last)[i] & mask[i])) {
- rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_LAST,
- item->last,
- "range between \"spec\" and \"last\" not"
- " comprised in \"mask\"");
- return NULL;
- }
- }
- return mask;
-}
-
-/**
- * Transpose flow rule description to rtnetlink message.
- *
- * This function transposes a flow rule description to a traffic control
- * (TC) filter creation message ready to be sent over Netlink.
- *
- * Target interface is specified as the first entry of the @p ptoi table.
- * Subsequent entries enable this function to resolve other DPDK port IDs
- * found in the flow rule.
- *
- * @param[out] buf
- * Output message buffer. May be NULL when @p size is 0.
- * @param size
- * Size of @p buf. Message may be truncated if not large enough.
- * @param[in] ptoi
- * DPDK port ID to network interface index translation table. This table
- * is terminated by an entry with a zero ifindex value.
- * @param[in] attr
- * Flow rule attributes.
- * @param[in] pattern
- * Pattern specification.
- * @param[in] actions
- * Associated actions.
- * @param[out] error
- * Perform verbose error reporting if not NULL.
- *
- * @return
- * A positive value representing the exact size of the message in bytes
- * regardless of the @p size parameter on success, a negative errno value
- * otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_transpose(void *buf,
- size_t size,
- const struct mlx5_nl_flow_ptoi *ptoi,
- const struct rte_flow_attr *attr,
- const struct rte_flow_item *pattern,
- const struct rte_flow_action *actions,
- struct rte_flow_error *error)
-{
- alignas(struct nlmsghdr)
- uint8_t buf_tmp[mnl_nlmsg_size(sizeof(struct tcmsg) + 1024)];
- const struct rte_flow_item *item;
- const struct rte_flow_action *action;
- unsigned int n;
- uint32_t act_index_cur;
- bool in_port_id_set;
- bool eth_type_set;
- bool vlan_present;
- bool vlan_eth_type_set;
- bool ip_proto_set;
- struct nlattr *na_flower;
- struct nlattr *na_flower_act;
- struct nlattr *na_vlan_id;
- struct nlattr *na_vlan_priority;
- const enum mlx5_nl_flow_trans *trans;
- const enum mlx5_nl_flow_trans *back;
-
- if (!size)
- goto error_nobufs;
-init:
- item = pattern;
- action = actions;
- n = 0;
- act_index_cur = 0;
- in_port_id_set = false;
- eth_type_set = false;
- vlan_present = false;
- vlan_eth_type_set = false;
- ip_proto_set = false;
- na_flower = NULL;
- na_flower_act = NULL;
- na_vlan_id = NULL;
- na_vlan_priority = NULL;
- trans = TRANS(ATTR);
- back = trans;
-trans:
- switch (trans[n++]) {
- union {
- const struct rte_flow_item_port_id *port_id;
- const struct rte_flow_item_eth *eth;
- const struct rte_flow_item_vlan *vlan;
- const struct rte_flow_item_ipv4 *ipv4;
- const struct rte_flow_item_ipv6 *ipv6;
- const struct rte_flow_item_tcp *tcp;
- const struct rte_flow_item_udp *udp;
- } spec, mask;
- union {
- const struct rte_flow_action_port_id *port_id;
- const struct rte_flow_action_of_push_vlan *of_push_vlan;
- const struct rte_flow_action_of_set_vlan_vid *
- of_set_vlan_vid;
- const struct rte_flow_action_of_set_vlan_pcp *
- of_set_vlan_pcp;
- } conf;
- struct nlmsghdr *nlh;
- struct tcmsg *tcm;
- struct nlattr *act_index;
- struct nlattr *act;
- unsigned int i;
-
- case INVALID:
- if (item->type)
- return rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
- item, "unsupported pattern item combination");
- else if (action->type)
- return rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
- action, "unsupported action combination");
- return rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
- "flow rule lacks some kind of fate action");
- case BACK:
- trans = back;
- n = 0;
- goto trans;
- case ATTR:
- /*
- * Supported attributes: no groups, some priorities and
- * ingress only. Don't care about transfer as it is the
- * caller's problem.
- */
- if (attr->group)
- return rte_flow_error_set
- (error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
- attr, "groups are not supported");
- if (attr->priority > 0xfffe)
- return rte_flow_error_set
- (error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
- attr, "lowest priority level is 0xfffe");
- if (!attr->ingress)
- return rte_flow_error_set
- (error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
- attr, "only ingress is supported");
- if (attr->egress)
- return rte_flow_error_set
- (error, ENOTSUP,
- RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
- attr, "egress is not supported");
- if (size < mnl_nlmsg_size(sizeof(*tcm)))
- goto error_nobufs;
- nlh = mnl_nlmsg_put_header(buf);
- nlh->nlmsg_type = 0;
- nlh->nlmsg_flags = 0;
- nlh->nlmsg_seq = 0;
- tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
- tcm->tcm_family = AF_UNSPEC;
- tcm->tcm_ifindex = ptoi[0].ifindex;
- /*
- * Let kernel pick a handle by default. A predictable handle
- * can be set by the caller on the resulting buffer through
- * mlx5_nl_flow_brand().
- */
- tcm->tcm_handle = 0;
- tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
- /*
- * Priority cannot be zero to prevent the kernel from
- * picking one automatically.
- */
- tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
- RTE_BE16(ETH_P_ALL));
- break;
- case PATTERN:
- if (!mnl_attr_put_strz_check(buf, size, TCA_KIND, "flower"))
- goto error_nobufs;
- na_flower = mnl_attr_nest_start_check(buf, size, TCA_OPTIONS);
- if (!na_flower)
- goto error_nobufs;
- if (!mnl_attr_put_u32_check(buf, size, TCA_FLOWER_FLAGS,
- TCA_CLS_FLAGS_SKIP_SW))
- goto error_nobufs;
- break;
- case ITEM_VOID:
- if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
- goto trans;
- ++item;
- break;
- case ITEM_PORT_ID:
- if (item->type != RTE_FLOW_ITEM_TYPE_PORT_ID)
- goto trans;
- mask.port_id = mlx5_nl_flow_item_mask
- (item, &rte_flow_item_port_id_mask,
- &mlx5_nl_flow_mask_supported.port_id,
- &mlx5_nl_flow_mask_empty.port_id,
- sizeof(mlx5_nl_flow_mask_supported.port_id), error);
- if (!mask.port_id)
- return -rte_errno;
- if (mask.port_id == &mlx5_nl_flow_mask_empty.port_id) {
- in_port_id_set = 1;
- ++item;
- break;
- }
- spec.port_id = item->spec;
- if (mask.port_id->id && mask.port_id->id != 0xffffffff)
- return rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
- mask.port_id,
- "no support for partial mask on"
- " \"id\" field");
- if (!mask.port_id->id)
- i = 0;
- else
- for (i = 0; ptoi[i].ifindex; ++i)
- if (ptoi[i].port_id == spec.port_id->id)
- break;
- if (!ptoi[i].ifindex)
- return rte_flow_error_set
- (error, ENODEV, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
- spec.port_id,
- "missing data to convert port ID to ifindex");
- tcm = mnl_nlmsg_get_payload(buf);
- if (in_port_id_set &&
- ptoi[i].ifindex != (unsigned int)tcm->tcm_ifindex)
- return rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
- spec.port_id,
- "cannot match traffic for several port IDs"
- " through a single flow rule");
- tcm->tcm_ifindex = ptoi[i].ifindex;
- in_port_id_set = 1;
- ++item;
- break;
- case ITEM_ETH:
- if (item->type != RTE_FLOW_ITEM_TYPE_ETH)
- goto trans;
- mask.eth = mlx5_nl_flow_item_mask
- (item, &rte_flow_item_eth_mask,
- &mlx5_nl_flow_mask_supported.eth,
- &mlx5_nl_flow_mask_empty.eth,
- sizeof(mlx5_nl_flow_mask_supported.eth), error);
- if (!mask.eth)
- return -rte_errno;
- if (mask.eth == &mlx5_nl_flow_mask_empty.eth) {
- ++item;
- break;
- }
- spec.eth = item->spec;
- if (mask.eth->type && mask.eth->type != RTE_BE16(0xffff))
- return rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
- mask.eth,
- "no support for partial mask on"
- " \"type\" field");
- if (mask.eth->type) {
- if (!mnl_attr_put_u16_check(buf, size,
- TCA_FLOWER_KEY_ETH_TYPE,
- spec.eth->type))
- goto error_nobufs;
- eth_type_set = 1;
- }
- if ((!is_zero_ether_addr(&mask.eth->dst) &&
- (!mnl_attr_put_check(buf, size,
- TCA_FLOWER_KEY_ETH_DST,
- ETHER_ADDR_LEN,
- spec.eth->dst.addr_bytes) ||
- !mnl_attr_put_check(buf, size,
- TCA_FLOWER_KEY_ETH_DST_MASK,
- ETHER_ADDR_LEN,
- mask.eth->dst.addr_bytes))) ||
- (!is_zero_ether_addr(&mask.eth->src) &&
- (!mnl_attr_put_check(buf, size,
- TCA_FLOWER_KEY_ETH_SRC,
- ETHER_ADDR_LEN,
- spec.eth->src.addr_bytes) ||
- !mnl_attr_put_check(buf, size,
- TCA_FLOWER_KEY_ETH_SRC_MASK,
- ETHER_ADDR_LEN,
- mask.eth->src.addr_bytes))))
- goto error_nobufs;
- ++item;
- break;
- case ITEM_VLAN:
- if (item->type != RTE_FLOW_ITEM_TYPE_VLAN)
- goto trans;
- mask.vlan = mlx5_nl_flow_item_mask
- (item, &rte_flow_item_vlan_mask,
- &mlx5_nl_flow_mask_supported.vlan,
- &mlx5_nl_flow_mask_empty.vlan,
- sizeof(mlx5_nl_flow_mask_supported.vlan), error);
- if (!mask.vlan)
- return -rte_errno;
- if (!eth_type_set &&
- !mnl_attr_put_u16_check(buf, size,
- TCA_FLOWER_KEY_ETH_TYPE,
- RTE_BE16(ETH_P_8021Q)))
- goto error_nobufs;
- eth_type_set = 1;
- vlan_present = 1;
- if (mask.vlan == &mlx5_nl_flow_mask_empty.vlan) {
- ++item;
- break;
- }
- spec.vlan = item->spec;
- if ((mask.vlan->tci & RTE_BE16(0xe000) &&
- (mask.vlan->tci & RTE_BE16(0xe000)) != RTE_BE16(0xe000)) ||
- (mask.vlan->tci & RTE_BE16(0x0fff) &&
- (mask.vlan->tci & RTE_BE16(0x0fff)) != RTE_BE16(0x0fff)) ||
- (mask.vlan->inner_type &&
- mask.vlan->inner_type != RTE_BE16(0xffff)))
- return rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
- mask.vlan,
- "no support for partial masks on"
- " \"tci\" (PCP and VID parts) and"
- " \"inner_type\" fields");
- if (mask.vlan->inner_type) {
- if (!mnl_attr_put_u16_check
- (buf, size, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
- spec.vlan->inner_type))
- goto error_nobufs;
- vlan_eth_type_set = 1;
- }
- if ((mask.vlan->tci & RTE_BE16(0xe000) &&
- !mnl_attr_put_u8_check
- (buf, size, TCA_FLOWER_KEY_VLAN_PRIO,
- (rte_be_to_cpu_16(spec.vlan->tci) >> 13) & 0x7)) ||
- (mask.vlan->tci & RTE_BE16(0x0fff) &&
- !mnl_attr_put_u16_check
- (buf, size, TCA_FLOWER_KEY_VLAN_ID,
- rte_be_to_cpu_16(spec.vlan->tci & RTE_BE16(0x0fff)))))
- goto error_nobufs;
- ++item;
- break;
- case ITEM_IPV4:
- if (item->type != RTE_FLOW_ITEM_TYPE_IPV4)
- goto trans;
- mask.ipv4 = mlx5_nl_flow_item_mask
- (item, &rte_flow_item_ipv4_mask,
- &mlx5_nl_flow_mask_supported.ipv4,
- &mlx5_nl_flow_mask_empty.ipv4,
- sizeof(mlx5_nl_flow_mask_supported.ipv4), error);
- if (!mask.ipv4)
- return -rte_errno;
- if ((!eth_type_set || !vlan_eth_type_set) &&
- !mnl_attr_put_u16_check(buf, size,
- vlan_present ?
- TCA_FLOWER_KEY_VLAN_ETH_TYPE :
- TCA_FLOWER_KEY_ETH_TYPE,
- RTE_BE16(ETH_P_IP)))
- goto error_nobufs;
- eth_type_set = 1;
- vlan_eth_type_set = 1;
- if (mask.ipv4 == &mlx5_nl_flow_mask_empty.ipv4) {
- ++item;
- break;
- }
- spec.ipv4 = item->spec;
- if (mask.ipv4->hdr.next_proto_id &&
- mask.ipv4->hdr.next_proto_id != 0xff)
- return rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
- mask.ipv4,
- "no support for partial mask on"
- " \"hdr.next_proto_id\" field");
- if (mask.ipv4->hdr.next_proto_id) {
- if (!mnl_attr_put_u8_check
- (buf, size, TCA_FLOWER_KEY_IP_PROTO,
- spec.ipv4->hdr.next_proto_id))
- goto error_nobufs;
- ip_proto_set = 1;
- }
- if ((mask.ipv4->hdr.src_addr &&
- (!mnl_attr_put_u32_check(buf, size,
- TCA_FLOWER_KEY_IPV4_SRC,
- spec.ipv4->hdr.src_addr) ||
- !mnl_attr_put_u32_check(buf, size,
- TCA_FLOWER_KEY_IPV4_SRC_MASK,
- mask.ipv4->hdr.src_addr))) ||
- (mask.ipv4->hdr.dst_addr &&
- (!mnl_attr_put_u32_check(buf, size,
- TCA_FLOWER_KEY_IPV4_DST,
- spec.ipv4->hdr.dst_addr) ||
- !mnl_attr_put_u32_check(buf, size,
- TCA_FLOWER_KEY_IPV4_DST_MASK,
- mask.ipv4->hdr.dst_addr))))
- goto error_nobufs;
- ++item;
- break;
- case ITEM_IPV6:
- if (item->type != RTE_FLOW_ITEM_TYPE_IPV6)
- goto trans;
- mask.ipv6 = mlx5_nl_flow_item_mask
- (item, &rte_flow_item_ipv6_mask,
- &mlx5_nl_flow_mask_supported.ipv6,
- &mlx5_nl_flow_mask_empty.ipv6,
- sizeof(mlx5_nl_flow_mask_supported.ipv6), error);
- if (!mask.ipv6)
- return -rte_errno;
- if ((!eth_type_set || !vlan_eth_type_set) &&
- !mnl_attr_put_u16_check(buf, size,
- vlan_present ?
- TCA_FLOWER_KEY_VLAN_ETH_TYPE :
- TCA_FLOWER_KEY_ETH_TYPE,
- RTE_BE16(ETH_P_IPV6)))
- goto error_nobufs;
- eth_type_set = 1;
- vlan_eth_type_set = 1;
- if (mask.ipv6 == &mlx5_nl_flow_mask_empty.ipv6) {
- ++item;
- break;
- }
- spec.ipv6 = item->spec;
- if (mask.ipv6->hdr.proto && mask.ipv6->hdr.proto != 0xff)
- return rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
- mask.ipv6,
- "no support for partial mask on"
- " \"hdr.proto\" field");
- if (mask.ipv6->hdr.proto) {
- if (!mnl_attr_put_u8_check
- (buf, size, TCA_FLOWER_KEY_IP_PROTO,
- spec.ipv6->hdr.proto))
- goto error_nobufs;
- ip_proto_set = 1;
- }
- if ((!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr) &&
- (!mnl_attr_put_check(buf, size,
- TCA_FLOWER_KEY_IPV6_SRC,
- sizeof(spec.ipv6->hdr.src_addr),
- spec.ipv6->hdr.src_addr) ||
- !mnl_attr_put_check(buf, size,
- TCA_FLOWER_KEY_IPV6_SRC_MASK,
- sizeof(mask.ipv6->hdr.src_addr),
- mask.ipv6->hdr.src_addr))) ||
- (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr) &&
- (!mnl_attr_put_check(buf, size,
- TCA_FLOWER_KEY_IPV6_DST,
- sizeof(spec.ipv6->hdr.dst_addr),
- spec.ipv6->hdr.dst_addr) ||
- !mnl_attr_put_check(buf, size,
- TCA_FLOWER_KEY_IPV6_DST_MASK,
- sizeof(mask.ipv6->hdr.dst_addr),
- mask.ipv6->hdr.dst_addr))))
- goto error_nobufs;
- ++item;
- break;
- case ITEM_TCP:
- if (item->type != RTE_FLOW_ITEM_TYPE_TCP)
- goto trans;
- mask.tcp = mlx5_nl_flow_item_mask
- (item, &rte_flow_item_tcp_mask,
- &mlx5_nl_flow_mask_supported.tcp,
- &mlx5_nl_flow_mask_empty.tcp,
- sizeof(mlx5_nl_flow_mask_supported.tcp), error);
- if (!mask.tcp)
- return -rte_errno;
- if (!ip_proto_set &&
- !mnl_attr_put_u8_check(buf, size,
- TCA_FLOWER_KEY_IP_PROTO,
- IPPROTO_TCP))
- goto error_nobufs;
- if (mask.tcp == &mlx5_nl_flow_mask_empty.tcp) {
- ++item;
- break;
- }
- spec.tcp = item->spec;
- if ((mask.tcp->hdr.src_port &&
- mask.tcp->hdr.src_port != RTE_BE16(0xffff)) ||
- (mask.tcp->hdr.dst_port &&
- mask.tcp->hdr.dst_port != RTE_BE16(0xffff)))
- return rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
- mask.tcp,
- "no support for partial masks on"
- " \"hdr.src_port\" and \"hdr.dst_port\""
- " fields");
- if ((mask.tcp->hdr.src_port &&
- (!mnl_attr_put_u16_check(buf, size,
- TCA_FLOWER_KEY_TCP_SRC,
- spec.tcp->hdr.src_port) ||
- !mnl_attr_put_u16_check(buf, size,
- TCA_FLOWER_KEY_TCP_SRC_MASK,
- mask.tcp->hdr.src_port))) ||
- (mask.tcp->hdr.dst_port &&
- (!mnl_attr_put_u16_check(buf, size,
- TCA_FLOWER_KEY_TCP_DST,
- spec.tcp->hdr.dst_port) ||
- !mnl_attr_put_u16_check(buf, size,
- TCA_FLOWER_KEY_TCP_DST_MASK,
- mask.tcp->hdr.dst_port))))
- goto error_nobufs;
- ++item;
- break;
- case ITEM_UDP:
- if (item->type != RTE_FLOW_ITEM_TYPE_UDP)
- goto trans;
- mask.udp = mlx5_nl_flow_item_mask
- (item, &rte_flow_item_udp_mask,
- &mlx5_nl_flow_mask_supported.udp,
- &mlx5_nl_flow_mask_empty.udp,
- sizeof(mlx5_nl_flow_mask_supported.udp), error);
- if (!mask.udp)
- return -rte_errno;
- if (!ip_proto_set &&
- !mnl_attr_put_u8_check(buf, size,
- TCA_FLOWER_KEY_IP_PROTO,
- IPPROTO_UDP))
- goto error_nobufs;
- if (mask.udp == &mlx5_nl_flow_mask_empty.udp) {
- ++item;
- break;
- }
- spec.udp = item->spec;
- if ((mask.udp->hdr.src_port &&
- mask.udp->hdr.src_port != RTE_BE16(0xffff)) ||
- (mask.udp->hdr.dst_port &&
- mask.udp->hdr.dst_port != RTE_BE16(0xffff)))
- return rte_flow_error_set
- (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
- mask.udp,
- "no support for partial masks on"
- " \"hdr.src_port\" and \"hdr.dst_port\""
- " fields");
- if ((mask.udp->hdr.src_port &&
- (!mnl_attr_put_u16_check(buf, size,
- TCA_FLOWER_KEY_UDP_SRC,
- spec.udp->hdr.src_port) ||
- !mnl_attr_put_u16_check(buf, size,
- TCA_FLOWER_KEY_UDP_SRC_MASK,
- mask.udp->hdr.src_port))) ||
- (mask.udp->hdr.dst_port &&
- (!mnl_attr_put_u16_check(buf, size,
- TCA_FLOWER_KEY_UDP_DST,
- spec.udp->hdr.dst_port) ||
- !mnl_attr_put_u16_check(buf, size,
- TCA_FLOWER_KEY_UDP_DST_MASK,
- mask.udp->hdr.dst_port))))
- goto error_nobufs;
- ++item;
- break;
- case ACTIONS:
- if (item->type != RTE_FLOW_ITEM_TYPE_END)
- goto trans;
- assert(na_flower);
- assert(!na_flower_act);
- na_flower_act =
- mnl_attr_nest_start_check(buf, size, TCA_FLOWER_ACT);
- if (!na_flower_act)
- goto error_nobufs;
- act_index_cur = 1;
- break;
- case ACTION_VOID:
- if (action->type != RTE_FLOW_ACTION_TYPE_VOID)
- goto trans;
- ++action;
- break;
- case ACTION_PORT_ID:
- if (action->type != RTE_FLOW_ACTION_TYPE_PORT_ID)
- goto trans;
- conf.port_id = action->conf;
- if (conf.port_id->original)
- i = 0;
- else
- for (i = 0; ptoi[i].ifindex; ++i)
- if (ptoi[i].port_id == conf.port_id->id)
- break;
- if (!ptoi[i].ifindex)
- return rte_flow_error_set
- (error, ENODEV, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
- conf.port_id,
- "missing data to convert port ID to ifindex");
- act_index =
- mnl_attr_nest_start_check(buf, size, act_index_cur++);
- if (!act_index ||
- !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "mirred"))
- goto error_nobufs;
- act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
- if (!act)
- goto error_nobufs;
- if (!mnl_attr_put_check(buf, size, TCA_MIRRED_PARMS,
- sizeof(struct tc_mirred),
- &(struct tc_mirred){
- .action = TC_ACT_STOLEN,
- .eaction = TCA_EGRESS_REDIR,
- .ifindex = ptoi[i].ifindex,
- }))
- goto error_nobufs;
- mnl_attr_nest_end(buf, act);
- mnl_attr_nest_end(buf, act_index);
- ++action;
- break;
- case ACTION_DROP:
- if (action->type != RTE_FLOW_ACTION_TYPE_DROP)
- goto trans;
- act_index =
- mnl_attr_nest_start_check(buf, size, act_index_cur++);
- if (!act_index ||
- !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "gact"))
- goto error_nobufs;
- act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
- if (!act)
- goto error_nobufs;
- if (!mnl_attr_put_check(buf, size, TCA_GACT_PARMS,
- sizeof(struct tc_gact),
- &(struct tc_gact){
- .action = TC_ACT_SHOT,
- }))
- goto error_nobufs;
- mnl_attr_nest_end(buf, act);
- mnl_attr_nest_end(buf, act_index);
- ++action;
- break;
- case ACTION_OF_POP_VLAN:
- if (action->type != RTE_FLOW_ACTION_TYPE_OF_POP_VLAN)
- goto trans;
- conf.of_push_vlan = NULL;
- i = TCA_VLAN_ACT_POP;
- goto action_of_vlan;
- case ACTION_OF_PUSH_VLAN:
- if (action->type != RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN)
- goto trans;
- conf.of_push_vlan = action->conf;
- i = TCA_VLAN_ACT_PUSH;
- goto action_of_vlan;
- case ACTION_OF_SET_VLAN_VID:
- if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID)
- goto trans;
- conf.of_set_vlan_vid = action->conf;
- if (na_vlan_id)
- goto override_na_vlan_id;
- i = TCA_VLAN_ACT_MODIFY;
- goto action_of_vlan;
- case ACTION_OF_SET_VLAN_PCP:
- if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP)
- goto trans;
- conf.of_set_vlan_pcp = action->conf;
- if (na_vlan_priority)
- goto override_na_vlan_priority;
- i = TCA_VLAN_ACT_MODIFY;
- goto action_of_vlan;
-action_of_vlan:
- act_index =
- mnl_attr_nest_start_check(buf, size, act_index_cur++);
- if (!act_index ||
- !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "vlan"))
- goto error_nobufs;
- act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
- if (!act)
- goto error_nobufs;
- if (!mnl_attr_put_check(buf, size, TCA_VLAN_PARMS,
- sizeof(struct tc_vlan),
- &(struct tc_vlan){
- .action = TC_ACT_PIPE,
- .v_action = i,
- }))
- goto error_nobufs;
- if (i == TCA_VLAN_ACT_POP) {
- mnl_attr_nest_end(buf, act);
- mnl_attr_nest_end(buf, act_index);
- ++action;
- break;
- }
- if (i == TCA_VLAN_ACT_PUSH &&
- !mnl_attr_put_u16_check(buf, size,
- TCA_VLAN_PUSH_VLAN_PROTOCOL,
- conf.of_push_vlan->ethertype))
- goto error_nobufs;
- na_vlan_id = mnl_nlmsg_get_payload_tail(buf);
- if (!mnl_attr_put_u16_check(buf, size, TCA_VLAN_PAD, 0))
- goto error_nobufs;
- na_vlan_priority = mnl_nlmsg_get_payload_tail(buf);
- if (!mnl_attr_put_u8_check(buf, size, TCA_VLAN_PAD, 0))
- goto error_nobufs;
- mnl_attr_nest_end(buf, act);
- mnl_attr_nest_end(buf, act_index);
- if (action->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
-override_na_vlan_id:
- na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
- *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
- rte_be_to_cpu_16
- (conf.of_set_vlan_vid->vlan_vid);
- } else if (action->type ==
- RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
-override_na_vlan_priority:
- na_vlan_priority->nla_type =
- TCA_VLAN_PUSH_VLAN_PRIORITY;
- *(uint8_t *)mnl_attr_get_payload(na_vlan_priority) =
- conf.of_set_vlan_pcp->vlan_pcp;
- }
- ++action;
- break;
- case END:
- if (item->type != RTE_FLOW_ITEM_TYPE_END ||
- action->type != RTE_FLOW_ACTION_TYPE_END)
- goto trans;
- if (na_flower_act)
- mnl_attr_nest_end(buf, na_flower_act);
- if (na_flower)
- mnl_attr_nest_end(buf, na_flower);
- nlh = buf;
- return nlh->nlmsg_len;
- }
- back = trans;
- trans = mlx5_nl_flow_trans[trans[n - 1]];
- n = 0;
- goto trans;
-error_nobufs:
- if (buf != buf_tmp) {
- buf = buf_tmp;
- size = sizeof(buf_tmp);
- goto init;
- }
- return rte_flow_error_set
- (error, ENOBUFS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
- "generated TC message is too large");
-}
-
-/**
- * Brand rtnetlink buffer with unique handle.
- *
- * This handle should be unique for a given network interface to avoid
- * collisions.
- *
- * @param buf
- * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
- * @param handle
- * Unique 32-bit handle to use.
- */
-void
-mlx5_nl_flow_brand(void *buf, uint32_t handle)
-{
- struct tcmsg *tcm = mnl_nlmsg_get_payload(buf);
-
- tcm->tcm_handle = handle;
-}
-
-/**
- * Send Netlink message with acknowledgment.
- *
- * @param nl
- * Libmnl socket to use.
- * @param nlh
- * Message to send. This function always raises the NLM_F_ACK flag before
- * sending.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_nl_flow_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
-{
- alignas(struct nlmsghdr)
- uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
- nlh->nlmsg_len - sizeof(*nlh)];
- uint32_t seq = random();
- int ret;
-
- nlh->nlmsg_flags |= NLM_F_ACK;
- nlh->nlmsg_seq = seq;
- ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
- if (ret != -1)
- ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
- if (ret != -1)
- ret = mnl_cb_run
- (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
- if (!ret)
- return 0;
- rte_errno = errno;
- return -rte_errno;
-}
-
-/**
- * Create a Netlink flow rule.
- *
- * @param nl
- * Libmnl socket to use.
- * @param buf
- * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
- * @param[out] error
- * Perform verbose error reporting if not NULL.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
- struct rte_flow_error *error)
-{
- struct nlmsghdr *nlh = buf;
-
- nlh->nlmsg_type = RTM_NEWTFILTER;
- nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
- if (!mlx5_nl_flow_nl_ack(nl, nlh))
- return 0;
- return rte_flow_error_set
- (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
- "netlink: failed to create TC flow rule");
-}
-
-/**
- * Destroy a Netlink flow rule.
- *
- * @param nl
- * Libmnl socket to use.
- * @param buf
- * Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
- * @param[out] error
- * Perform verbose error reporting if not NULL.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
- struct rte_flow_error *error)
-{
- struct nlmsghdr *nlh = buf;
-
- nlh->nlmsg_type = RTM_DELTFILTER;
- nlh->nlmsg_flags = NLM_F_REQUEST;
- if (!mlx5_nl_flow_nl_ack(nl, nlh))
- return 0;
- return rte_flow_error_set
- (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
- "netlink: failed to destroy TC flow rule");
-}
-
-/**
- * Initialize ingress qdisc of a given network interface.
- *
- * @param nl
- * Libmnl socket of the @p NETLINK_ROUTE kind.
- * @param ifindex
- * Index of network interface to initialize.
- * @param[out] error
- * Perform verbose error reporting if not NULL.
- *
- * @return
- * 0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
- struct rte_flow_error *error)
-{
- struct nlmsghdr *nlh;
- struct tcmsg *tcm;
- alignas(struct nlmsghdr)
- uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
-
- /* Destroy existing ingress qdisc and everything attached to it. */
- nlh = mnl_nlmsg_put_header(buf);
- nlh->nlmsg_type = RTM_DELQDISC;
- nlh->nlmsg_flags = NLM_F_REQUEST;
- tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
- tcm->tcm_family = AF_UNSPEC;
- tcm->tcm_ifindex = ifindex;
- tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
- tcm->tcm_parent = TC_H_INGRESS;
- /* Ignore errors when qdisc is already absent. */
- if (mlx5_nl_flow_nl_ack(nl, nlh) &&
- rte_errno != EINVAL && rte_errno != ENOENT)
- return rte_flow_error_set
- (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL, "netlink: failed to remove ingress qdisc");
- /* Create fresh ingress qdisc. */
- nlh = mnl_nlmsg_put_header(buf);
- nlh->nlmsg_type = RTM_NEWQDISC;
- nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
- tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
- tcm->tcm_family = AF_UNSPEC;
- tcm->tcm_ifindex = ifindex;
- tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
- tcm->tcm_parent = TC_H_INGRESS;
- mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
- if (mlx5_nl_flow_nl_ack(nl, nlh))
- return rte_flow_error_set
- (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
- NULL, "netlink: failed to create ingress qdisc");
- return 0;
-}
-
-/**
- * Create and configure a libmnl socket for Netlink flow rules.
- *
- * @return
- * A valid libmnl socket object pointer on success, NULL otherwise and
- * rte_errno is set.
- */
-struct mnl_socket *
-mlx5_nl_flow_socket_create(void)
-{
- struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
-
- if (nl) {
- mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
- sizeof(int));
- if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
- return nl;
- }
- rte_errno = errno;
- if (nl)
- mnl_socket_close(nl);
- return NULL;
-}
-
-/**
- * Destroy a libmnl socket.
- */
-void
-mlx5_nl_flow_socket_destroy(struct mnl_socket *nl)
-{
- mnl_socket_close(nl);
-}
diff --git a/drivers/net/mlx5/mlx5_prm.h b/drivers/net/mlx5/mlx5_prm.h
index 0870d32f..29742b13 100644
--- a/drivers/net/mlx5/mlx5_prm.h
+++ b/drivers/net/mlx5/mlx5_prm.h
@@ -159,7 +159,7 @@ struct mlx5_wqe_eth_seg_small {
uint8_t cs_flags;
uint8_t rsvd1;
uint16_t mss;
- uint32_t rsvd2;
+ uint32_t flow_table_metadata;
uint16_t inline_hdr_sz;
uint8_t inline_hdr[2];
} __rte_aligned(MLX5_WQE_DWORD_SIZE);
@@ -280,6 +280,226 @@ struct mlx5_cqe {
/* CQE format value. */
#define MLX5_COMPRESSED 0x3
+/* The field of packet to be modified. */
+enum mlx5_modificaiton_field {
+ MLX5_MODI_OUT_SMAC_47_16 = 1,
+ MLX5_MODI_OUT_SMAC_15_0,
+ MLX5_MODI_OUT_ETHERTYPE,
+ MLX5_MODI_OUT_DMAC_47_16,
+ MLX5_MODI_OUT_DMAC_15_0,
+ MLX5_MODI_OUT_IP_DSCP,
+ MLX5_MODI_OUT_TCP_FLAGS,
+ MLX5_MODI_OUT_TCP_SPORT,
+ MLX5_MODI_OUT_TCP_DPORT,
+ MLX5_MODI_OUT_IPV4_TTL,
+ MLX5_MODI_OUT_UDP_SPORT,
+ MLX5_MODI_OUT_UDP_DPORT,
+ MLX5_MODI_OUT_SIPV6_127_96,
+ MLX5_MODI_OUT_SIPV6_95_64,
+ MLX5_MODI_OUT_SIPV6_63_32,
+ MLX5_MODI_OUT_SIPV6_31_0,
+ MLX5_MODI_OUT_DIPV6_127_96,
+ MLX5_MODI_OUT_DIPV6_95_64,
+ MLX5_MODI_OUT_DIPV6_63_32,
+ MLX5_MODI_OUT_DIPV6_31_0,
+ MLX5_MODI_OUT_SIPV4,
+ MLX5_MODI_OUT_DIPV4,
+ MLX5_MODI_IN_SMAC_47_16 = 0x31,
+ MLX5_MODI_IN_SMAC_15_0,
+ MLX5_MODI_IN_ETHERTYPE,
+ MLX5_MODI_IN_DMAC_47_16,
+ MLX5_MODI_IN_DMAC_15_0,
+ MLX5_MODI_IN_IP_DSCP,
+ MLX5_MODI_IN_TCP_FLAGS,
+ MLX5_MODI_IN_TCP_SPORT,
+ MLX5_MODI_IN_TCP_DPORT,
+ MLX5_MODI_IN_IPV4_TTL,
+ MLX5_MODI_IN_UDP_SPORT,
+ MLX5_MODI_IN_UDP_DPORT,
+ MLX5_MODI_IN_SIPV6_127_96,
+ MLX5_MODI_IN_SIPV6_95_64,
+ MLX5_MODI_IN_SIPV6_63_32,
+ MLX5_MODI_IN_SIPV6_31_0,
+ MLX5_MODI_IN_DIPV6_127_96,
+ MLX5_MODI_IN_DIPV6_95_64,
+ MLX5_MODI_IN_DIPV6_63_32,
+ MLX5_MODI_IN_DIPV6_31_0,
+ MLX5_MODI_IN_SIPV4,
+ MLX5_MODI_IN_DIPV4,
+ MLX5_MODI_OUT_IPV6_HOPLIMIT,
+ MLX5_MODI_IN_IPV6_HOPLIMIT,
+ MLX5_MODI_META_DATA_REG_A,
+ MLX5_MODI_META_DATA_REG_B = 0x50,
+};
+
+/* Modification sub command. */
+struct mlx5_modification_cmd {
+ union {
+ uint32_t data0;
+ struct {
+ unsigned int bits:5;
+ unsigned int rsvd0:3;
+ unsigned int src_offset:5; /* Start bit offset. */
+ unsigned int rsvd1:3;
+ unsigned int src_field:12;
+ unsigned int type:4;
+ };
+ };
+ union {
+ uint32_t data1;
+ uint8_t data[4];
+ struct {
+ unsigned int rsvd2:8;
+ unsigned int dst_offset:8;
+ unsigned int dst_field:12;
+ unsigned int rsvd3:4;
+ };
+ };
+};
+
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+
+#define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0)
+#define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld)
+#define __mlx5_bit_off(typ, fld) ((unsigned int)(unsigned long) \
+ (&(__mlx5_nullp(typ)->fld)))
+#define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - \
+ (__mlx5_bit_off(typ, fld) & 0x1f))
+#define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32)
+#define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << \
+ __mlx5_dw_bit_off(typ, fld))
+#define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
+#define __mlx5_16_off(typ, fld) (__mlx5_bit_off(typ, fld) / 16)
+#define __mlx5_16_bit_off(typ, fld) (16 - __mlx5_bit_sz(typ, fld) - \
+ (__mlx5_bit_off(typ, fld) & 0xf))
+#define __mlx5_mask16(typ, fld) ((u16)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
+#define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32)
+#define MLX5_ST_SZ_DB(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8)
+#define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8)
+#define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld))
+
+/* insert a value to a struct */
+#define MLX5_SET(typ, p, fld, v) \
+ do { \
+ u32 _v = v; \
+ *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \
+ rte_cpu_to_be_32((rte_be_to_cpu_32(*((u32 *)(p) + \
+ __mlx5_dw_off(typ, fld))) & \
+ (~__mlx5_dw_mask(typ, fld))) | \
+ (((_v) & __mlx5_mask(typ, fld)) << \
+ __mlx5_dw_bit_off(typ, fld))); \
+ } while (0)
+#define MLX5_GET16(typ, p, fld) \
+ ((rte_be_to_cpu_16(*((__be16 *)(p) + \
+ __mlx5_16_off(typ, fld))) >> __mlx5_16_bit_off(typ, fld)) & \
+ __mlx5_mask16(typ, fld))
+#define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8)
+
+struct mlx5_ifc_fte_match_set_misc_bits {
+ u8 reserved_at_0[0x8];
+ u8 source_sqn[0x18];
+ u8 reserved_at_20[0x10];
+ u8 source_port[0x10];
+ u8 outer_second_prio[0x3];
+ u8 outer_second_cfi[0x1];
+ u8 outer_second_vid[0xc];
+ u8 inner_second_prio[0x3];
+ u8 inner_second_cfi[0x1];
+ u8 inner_second_vid[0xc];
+ u8 outer_second_cvlan_tag[0x1];
+ u8 inner_second_cvlan_tag[0x1];
+ u8 outer_second_svlan_tag[0x1];
+ u8 inner_second_svlan_tag[0x1];
+ u8 reserved_at_64[0xc];
+ u8 gre_protocol[0x10];
+ u8 gre_key_h[0x18];
+ u8 gre_key_l[0x8];
+ u8 vxlan_vni[0x18];
+ u8 reserved_at_b8[0x8];
+ u8 reserved_at_c0[0x20];
+ u8 reserved_at_e0[0xc];
+ u8 outer_ipv6_flow_label[0x14];
+ u8 reserved_at_100[0xc];
+ u8 inner_ipv6_flow_label[0x14];
+ u8 reserved_at_120[0xe0];
+};
+
+struct mlx5_ifc_ipv4_layout_bits {
+ u8 reserved_at_0[0x60];
+ u8 ipv4[0x20];
+};
+
+struct mlx5_ifc_ipv6_layout_bits {
+ u8 ipv6[16][0x8];
+};
+
+union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
+ struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
+ struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
+ u8 reserved_at_0[0x80];
+};
+
+struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
+ u8 smac_47_16[0x20];
+ u8 smac_15_0[0x10];
+ u8 ethertype[0x10];
+ u8 dmac_47_16[0x20];
+ u8 dmac_15_0[0x10];
+ u8 first_prio[0x3];
+ u8 first_cfi[0x1];
+ u8 first_vid[0xc];
+ u8 ip_protocol[0x8];
+ u8 ip_dscp[0x6];
+ u8 ip_ecn[0x2];
+ u8 cvlan_tag[0x1];
+ u8 svlan_tag[0x1];
+ u8 frag[0x1];
+ u8 ip_version[0x4];
+ u8 tcp_flags[0x9];
+ u8 tcp_sport[0x10];
+ u8 tcp_dport[0x10];
+ u8 reserved_at_c0[0x20];
+ u8 udp_sport[0x10];
+ u8 udp_dport[0x10];
+ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
+ union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
+};
+
+struct mlx5_ifc_fte_match_mpls_bits {
+ u8 mpls_label[0x14];
+ u8 mpls_exp[0x3];
+ u8 mpls_s_bos[0x1];
+ u8 mpls_ttl[0x8];
+};
+
+struct mlx5_ifc_fte_match_set_misc2_bits {
+ struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls;
+ struct mlx5_ifc_fte_match_mpls_bits inner_first_mpls;
+ struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls_over_gre;
+ struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls_over_udp;
+ u8 reserved_at_80[0x100];
+ u8 metadata_reg_a[0x20];
+ u8 reserved_at_1a0[0x60];
+};
+
+/* Flow matcher. */
+struct mlx5_ifc_fte_match_param_bits {
+ struct mlx5_ifc_fte_match_set_lyr_2_4_bits outer_headers;
+ struct mlx5_ifc_fte_match_set_misc_bits misc_parameters;
+ struct mlx5_ifc_fte_match_set_lyr_2_4_bits inner_headers;
+ struct mlx5_ifc_fte_match_set_misc2_bits misc_parameters_2;
+ u8 reserved_at_800[0x800];
+};
+
+enum {
+ MLX5_MATCH_CRITERIA_ENABLE_OUTER_BIT,
+ MLX5_MATCH_CRITERIA_ENABLE_MISC_BIT,
+ MLX5_MATCH_CRITERIA_ENABLE_INNER_BIT,
+ MLX5_MATCH_CRITERIA_ENABLE_MISC2_BIT
+};
+
/* CQE format mask. */
#define MLX5E_CQE_FORMAT_MASK 0xc
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 1f7bfd44..ed993ea6 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -388,7 +388,6 @@ mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev)
DEV_RX_OFFLOAD_TIMESTAMP |
DEV_RX_OFFLOAD_JUMBO_FRAME);
- offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
if (config->hw_fcs_strip)
offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
@@ -1438,7 +1437,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
tmpl->rxq.vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
/* By default, FCS (CRC) is stripped by hardware. */
tmpl->rxq.crc_present = 0;
- if (rte_eth_dev_must_keep_crc(offloads)) {
+ if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
if (config->hw_fcs_strip) {
tmpl->rxq.crc_present = 1;
} else {
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 2d14f8a6..24a054d5 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -523,6 +523,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
uint8_t tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG);
uint32_t swp_offsets = 0;
uint8_t swp_types = 0;
+ rte_be32_t metadata;
uint16_t tso_segsz = 0;
#ifdef MLX5_PMD_SOFT_COUNTERS
uint32_t total_length = 0;
@@ -566,6 +567,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
cs_flags = txq_ol_cksum_to_cs(buf);
txq_mbuf_to_swp(txq, buf, (uint8_t *)&swp_offsets, &swp_types);
raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
+ /* Copy metadata from mbuf if valid */
+ metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+ 0;
/* Replace the Ethernet type by the VLAN if necessary. */
if (buf->ol_flags & PKT_TX_VLAN_PKT) {
uint32_t vlan = rte_cpu_to_be_32(0x81000000 |
@@ -781,7 +785,7 @@ next_pkt:
swp_offsets,
cs_flags | (swp_types << 8) |
(rte_cpu_to_be_16(tso_segsz) << 16),
- 0,
+ metadata,
(ehdr << 16) | rte_cpu_to_be_16(tso_header_sz),
};
} else {
@@ -795,7 +799,7 @@ next_pkt:
wqe->eseg = (rte_v128u32_t){
swp_offsets,
cs_flags | (swp_types << 8),
- 0,
+ metadata,
(ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz),
};
}
@@ -861,7 +865,7 @@ mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length)
mpw->wqe->eseg.inline_hdr_sz = 0;
mpw->wqe->eseg.rsvd0 = 0;
mpw->wqe->eseg.rsvd1 = 0;
- mpw->wqe->eseg.rsvd2 = 0;
+ mpw->wqe->eseg.flow_table_metadata = 0;
mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
(txq->wqe_ci << 8) |
MLX5_OPCODE_TSO);
@@ -948,6 +952,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
uint32_t length;
unsigned int segs_n = buf->nb_segs;
uint32_t cs_flags;
+ rte_be32_t metadata;
/*
* Make sure there is enough room to store this packet and
@@ -964,6 +969,9 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
max_elts -= segs_n;
--pkts_n;
cs_flags = txq_ol_cksum_to_cs(buf);
+ /* Copy metadata from mbuf if valid */
+ metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+ 0;
/* Retrieve packet information. */
length = PKT_LEN(buf);
assert(length);
@@ -971,6 +979,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
((mpw.len != length) ||
(segs_n != 1) ||
+ (mpw.wqe->eseg.flow_table_metadata != metadata) ||
(mpw.wqe->eseg.cs_flags != cs_flags)))
mlx5_mpw_close(txq, &mpw);
if (mpw.state == MLX5_MPW_STATE_CLOSED) {
@@ -984,6 +993,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
max_wqe -= 2;
mlx5_mpw_new(txq, &mpw, length);
mpw.wqe->eseg.cs_flags = cs_flags;
+ mpw.wqe->eseg.flow_table_metadata = metadata;
}
/* Multi-segment packets must be alone in their MPW. */
assert((segs_n == 1) || (mpw.pkts_n == 0));
@@ -1082,7 +1092,7 @@ mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw,
mpw->wqe->eseg.cs_flags = 0;
mpw->wqe->eseg.rsvd0 = 0;
mpw->wqe->eseg.rsvd1 = 0;
- mpw->wqe->eseg.rsvd2 = 0;
+ mpw->wqe->eseg.flow_table_metadata = 0;
inl = (struct mlx5_wqe_inl_small *)
(((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);
mpw->data.raw = (uint8_t *)&inl->raw;
@@ -1172,6 +1182,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
uint32_t length;
unsigned int segs_n = buf->nb_segs;
uint8_t cs_flags;
+ rte_be32_t metadata;
/*
* Make sure there is enough room to store this packet and
@@ -1193,18 +1204,23 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
*/
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
cs_flags = txq_ol_cksum_to_cs(buf);
+ /* Copy metadata from mbuf if valid */
+ metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+ 0;
/* Retrieve packet information. */
length = PKT_LEN(buf);
/* Start new session if packet differs. */
if (mpw.state == MLX5_MPW_STATE_OPENED) {
if ((mpw.len != length) ||
(segs_n != 1) ||
+ (mpw.wqe->eseg.flow_table_metadata != metadata) ||
(mpw.wqe->eseg.cs_flags != cs_flags))
mlx5_mpw_close(txq, &mpw);
} else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
if ((mpw.len != length) ||
(segs_n != 1) ||
(length > inline_room) ||
+ (mpw.wqe->eseg.flow_table_metadata != metadata) ||
(mpw.wqe->eseg.cs_flags != cs_flags)) {
mlx5_mpw_inline_close(txq, &mpw);
inline_room =
@@ -1224,12 +1240,14 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
max_wqe -= 2;
mlx5_mpw_new(txq, &mpw, length);
mpw.wqe->eseg.cs_flags = cs_flags;
+ mpw.wqe->eseg.flow_table_metadata = metadata;
} else {
if (unlikely(max_wqe < wqe_inl_n))
break;
max_wqe -= wqe_inl_n;
mlx5_mpw_inline_new(txq, &mpw, length);
mpw.wqe->eseg.cs_flags = cs_flags;
+ mpw.wqe->eseg.flow_table_metadata = metadata;
}
}
/* Multi-segment packets must be alone in their MPW. */
@@ -1461,6 +1479,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
unsigned int do_inline = 0; /* Whether inline is possible. */
uint32_t length;
uint8_t cs_flags;
+ rte_be32_t metadata;
/* Multi-segmented packet is handled in slow-path outside. */
assert(NB_SEGS(buf) == 1);
@@ -1468,6 +1487,9 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
if (max_elts - j == 0)
break;
cs_flags = txq_ol_cksum_to_cs(buf);
+ /* Copy metadata from mbuf if valid */
+ metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+ 0;
/* Retrieve packet information. */
length = PKT_LEN(buf);
/* Start new session if:
@@ -1482,6 +1504,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
(length <= txq->inline_max_packet_sz &&
inl_pad + sizeof(inl_hdr) + length >
mpw_room) ||
+ (mpw.wqe->eseg.flow_table_metadata != metadata) ||
(mpw.wqe->eseg.cs_flags != cs_flags))
max_wqe -= mlx5_empw_close(txq, &mpw);
}
@@ -1505,6 +1528,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
sizeof(inl_hdr) + length <= mpw_room &&
!txq->mpw_hdr_dseg;
mpw.wqe->eseg.cs_flags = cs_flags;
+ mpw.wqe->eseg.flow_table_metadata = metadata;
} else {
/* Evaluate whether the next packet can be inlined.
* Inlininig is possible when:
@@ -2097,7 +2121,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
const unsigned int wq_mask = (1 << rxq->elts_n) - 1;
volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
unsigned int i = 0;
- uint16_t rq_ci = rxq->rq_ci;
+ uint32_t rq_ci = rxq->rq_ci;
uint16_t consumed_strd = rxq->consumed_strd;
struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
@@ -2324,7 +2348,7 @@ removed_rx_burst(void *dpdk_txq __rte_unused,
* (e.g. mlx5_rxtx_vec_sse.c for x86).
*/
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused,
struct rte_mbuf **pkts __rte_unused,
uint16_t pkts_n __rte_unused)
@@ -2332,7 +2356,7 @@ mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused,
return 0;
}
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
mlx5_tx_burst_vec(void *dpdk_txq __rte_unused,
struct rte_mbuf **pkts __rte_unused,
uint16_t pkts_n __rte_unused)
@@ -2340,7 +2364,7 @@ mlx5_tx_burst_vec(void *dpdk_txq __rte_unused,
return 0;
}
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
mlx5_rx_burst_vec(void *dpdk_txq __rte_unused,
struct rte_mbuf **pkts __rte_unused,
uint16_t pkts_n __rte_unused)
@@ -2348,25 +2372,25 @@ mlx5_rx_burst_vec(void *dpdk_txq __rte_unused,
return 0;
}
-int __attribute__((weak))
+__rte_weak int
mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused)
{
return -ENOTSUP;
}
-int __attribute__((weak))
+__rte_weak int
mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused)
{
return -ENOTSUP;
}
-int __attribute__((weak))
+__rte_weak int
mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)
{
return -ENOTSUP;
}
-int __attribute__((weak))
+__rte_weak int
mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused)
{
return -ENOTSUP;
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index 48ed2b20..1db468c3 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -97,10 +97,10 @@ struct mlx5_rxq_data {
volatile uint32_t *rq_db;
volatile uint32_t *cq_db;
uint16_t port_id;
- uint16_t rq_ci;
+ uint32_t rq_ci;
uint16_t consumed_strd; /* Number of consumed strides in WQE. */
- uint16_t rq_pi;
- uint16_t cq_ci;
+ uint32_t rq_pi;
+ uint32_t cq_ci;
struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
uint16_t mprq_max_memcpy_len; /* Maximum size of packet to memcpy. */
volatile void *wqes;
@@ -363,6 +363,8 @@ uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
void mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl);
uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr);
uint32_t mlx5_tx_addr2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr);
+uint32_t mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr,
+ struct rte_mempool *mp);
/**
* Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
@@ -607,6 +609,24 @@ mlx5_tx_complete(struct mlx5_txq_data *txq)
}
/**
+ * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which the
+ * cloned mbuf is allocated is returned instead.
+ *
+ * @param buf
+ * Pointer to mbuf.
+ *
+ * @return
+ * Memory pool where data is located for given mbuf.
+ */
+static struct rte_mempool *
+mlx5_mb2mp(struct rte_mbuf *buf)
+{
+ if (unlikely(RTE_MBUF_INDIRECT(buf)))
+ return rte_mbuf_from_indirect(buf)->pool;
+ return buf->pool;
+}
+
+/**
* Query LKey from a packet buffer for Rx. No need to flush local caches for Rx
* as mempool is pre-configured and static.
*
@@ -664,7 +684,20 @@ mlx5_tx_addr2mr(struct mlx5_txq_data *txq, uintptr_t addr)
return mlx5_tx_addr2mr_bh(txq, addr);
}
-#define mlx5_tx_mb2mr(rxq, mb) mlx5_tx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr))
+static __rte_always_inline uint32_t
+mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
+{
+ uintptr_t addr = (uintptr_t)mb->buf_addr;
+ uint32_t lkey = mlx5_tx_addr2mr(txq, addr);
+
+ if (likely(lkey != UINT32_MAX))
+ return lkey;
+ if (rte_errno == ENXIO) {
+ /* Mempool may have externally allocated memory. */
+ lkey = mlx5_tx_update_ext_mp(txq, addr, mlx5_mb2mp(mb));
+ }
+ return lkey;
+}
/**
* Ring TX queue doorbell and flush the update if requested.
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.c b/drivers/net/mlx5/mlx5_rxtx_vec.c
index 0a4aed8f..1453f4ff 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.c
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.c
@@ -40,7 +40,8 @@
#endif
/**
- * Count the number of packets having same ol_flags and calculate cs_flags.
+ * Count the number of packets having same ol_flags and same metadata (if
+ * PKT_TX_METADATA is set in ol_flags), and calculate cs_flags.
*
* @param pkts
* Pointer to array of packets.
@@ -48,26 +49,45 @@
* Number of packets.
* @param cs_flags
* Pointer of flags to be returned.
+ * @param metadata
+ * Pointer of metadata to be returned.
+ * @param txq_offloads
+ * Offloads enabled on Tx queue
*
* @return
- * Number of packets having same ol_flags.
+ * Number of packets having same ol_flags and metadata, if relevant.
*/
static inline unsigned int
-txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags)
+txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags,
+ rte_be32_t *metadata, const uint64_t txq_offloads)
{
unsigned int pos;
- const uint64_t ol_mask =
+ const uint64_t cksum_ol_mask =
PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
+ rte_be32_t p0_metadata, pn_metadata;
if (!pkts_n)
return 0;
- /* Count the number of packets having same ol_flags. */
- for (pos = 1; pos < pkts_n; ++pos)
- if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask)
+ p0_metadata = pkts[0]->ol_flags & PKT_TX_METADATA ?
+ pkts[0]->tx_metadata : 0;
+ /* Count the number of packets having same offload parameters. */
+ for (pos = 1; pos < pkts_n; ++pos) {
+ /* Check if packet has same checksum flags. */
+ if ((txq_offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP) &&
+ ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & cksum_ol_mask))
break;
+ /* Check if packet has same metadata. */
+ if (txq_offloads & DEV_TX_OFFLOAD_MATCH_METADATA) {
+ pn_metadata = pkts[pos]->ol_flags & PKT_TX_METADATA ?
+ pkts[pos]->tx_metadata : 0;
+ if (pn_metadata != p0_metadata)
+ break;
+ }
+ }
*cs_flags = txq_ol_cksum_to_cs(pkts[0]);
+ *metadata = p0_metadata;
return pos;
}
@@ -96,7 +116,7 @@ mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t ret;
n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
- ret = txq_burst_v(txq, &pkts[nb_tx], n, 0);
+ ret = txq_burst_v(txq, &pkts[nb_tx], n, 0, 0);
nb_tx += ret;
if (!ret)
break;
@@ -127,6 +147,7 @@ mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
uint8_t cs_flags = 0;
uint16_t n;
uint16_t ret;
+ rte_be32_t metadata = 0;
/* Transmit multi-seg packets in the head of pkts list. */
if ((txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) &&
@@ -137,9 +158,12 @@ mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
n = txq_count_contig_single_seg(&pkts[nb_tx], n);
- if (txq->offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP)
- n = txq_calc_offload(&pkts[nb_tx], n, &cs_flags);
- ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
+ if (txq->offloads & (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP |
+ DEV_TX_OFFLOAD_MATCH_METADATA))
+ n = txq_calc_offload(&pkts[nb_tx], n,
+ &cs_flags, &metadata,
+ txq->offloads);
+ ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags, metadata);
nb_tx += ret;
if (!ret)
break;
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec.h b/drivers/net/mlx5/mlx5_rxtx_vec.h
index fb884f92..fda7004e 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec.h
@@ -22,6 +22,7 @@
/* HW offload capabilities of vectorized Tx. */
#define MLX5_VEC_TX_OFFLOAD_CAP \
(MLX5_VEC_TX_CKSUM_OFFLOAD_CAP | \
+ DEV_TX_OFFLOAD_MATCH_METADATA | \
DEV_TX_OFFLOAD_MULTI_SEGS)
/*
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
index b37b7381..0b729f18 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_neon.h
@@ -201,13 +201,15 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
* Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
* @param cs_flags
* Checksum offload flags to be written in the descriptor.
+ * @param metadata
+ * Metadata value to be written in the descriptor.
*
* @return
* Number of packets successfully transmitted (<= pkts_n).
*/
static inline uint16_t
txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
- uint8_t cs_flags)
+ uint8_t cs_flags, rte_be32_t metadata)
{
struct rte_mbuf **elts;
uint16_t elts_head = txq->elts_head;
@@ -293,11 +295,8 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
ctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m);
vst1q_u8((void *)t_wqe, ctrl);
/* Fill ESEG in the header. */
- vst1q_u8((void *)(t_wqe + 1),
- ((uint8x16_t) { 0, 0, 0, 0,
- cs_flags, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0 }));
+ vst1q_u32((void *)(t_wqe + 1),
+ ((uint32x4_t) { 0, cs_flags, metadata, 0 }));
#ifdef MLX5_PMD_SOFT_COUNTERS
txq->stats.opackets += pkts_n;
#endif
diff --git a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
index 54b3783c..e0f95f92 100644
--- a/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
+++ b/drivers/net/mlx5/mlx5_rxtx_vec_sse.h
@@ -202,13 +202,15 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
* Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
* @param cs_flags
* Checksum offload flags to be written in the descriptor.
+ * @param metadata
+ * Metadata value to be written in the descriptor.
*
* @return
* Number of packets successfully transmitted (<= pkts_n).
*/
static inline uint16_t
txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
- uint8_t cs_flags)
+ uint8_t cs_flags, rte_be32_t metadata)
{
struct rte_mbuf **elts;
uint16_t elts_head = txq->elts_head;
@@ -292,11 +294,7 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
ctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl);
_mm_store_si128(t_wqe, ctrl);
/* Fill ESEG in the header. */
- _mm_store_si128(t_wqe + 1,
- _mm_set_epi8(0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, cs_flags,
- 0, 0, 0, 0));
+ _mm_store_si128(t_wqe + 1, _mm_set_epi32(0, metadata, cs_flags, 0));
#ifdef MLX5_PMD_SOFT_COUNTERS
txq->stats.opackets += pkts_n;
#endif
diff --git a/drivers/net/mlx5/mlx5_socket.c b/drivers/net/mlx5/mlx5_socket.c
index a3a52291..00106171 100644
--- a/drivers/net/mlx5/mlx5_socket.c
+++ b/drivers/net/mlx5/mlx5_socket.c
@@ -3,8 +3,6 @@
* Copyright 2016 Mellanox Technologies, Ltd
*/
-#define _GNU_SOURCE
-
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
diff --git a/drivers/net/mlx5/mlx5_stats.c b/drivers/net/mlx5/mlx5_stats.c
index 91f3d474..a14d1e49 100644
--- a/drivers/net/mlx5/mlx5_stats.c
+++ b/drivers/net/mlx5/mlx5_stats.c
@@ -17,14 +17,6 @@
#include "mlx5_rxtx.h"
#include "mlx5_defs.h"
-struct mlx5_counter_ctrl {
- /* Name of the counter. */
- char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
- /* Name of the counter on the device table. */
- char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
- uint32_t ib:1; /**< Nonzero for IB counters. */
-};
-
static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
{
.dpdk_name = "rx_port_unicast_bytes",
@@ -115,6 +107,23 @@ static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
.dpdk_name = "rx_bytes_phy",
.ctr_name = "rx_bytes_phy",
},
+ /* Representor only */
+ {
+ .dpdk_name = "rx_packets",
+ .ctr_name = "vport_rx_packets",
+ },
+ {
+ .dpdk_name = "rx_bytes",
+ .ctr_name = "vport_rx_bytes",
+ },
+ {
+ .dpdk_name = "tx_packets",
+ .ctr_name = "vport_tx_packets",
+ },
+ {
+ .dpdk_name = "tx_bytes",
+ .ctr_name = "vport_tx_bytes",
+ },
};
static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);
@@ -146,19 +155,19 @@ mlx5_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats)
et_stats->cmd = ETHTOOL_GSTATS;
et_stats->n_stats = xstats_ctrl->stats_n;
ifr.ifr_data = (caddr_t)et_stats;
- ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
+ ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
if (ret) {
DRV_LOG(WARNING,
"port %u unable to read statistic values from device",
dev->data->port_id);
return ret;
}
- for (i = 0; i != xstats_n; ++i) {
- if (mlx5_counters_init[i].ib) {
+ for (i = 0; i != xstats_ctrl->mlx5_stats_n; ++i) {
+ if (xstats_ctrl->info[i].ib) {
FILE *file;
MKSTR(path, "%s/ports/1/hw_counters/%s",
priv->ibdev_path,
- mlx5_counters_init[i].ctr_name);
+ xstats_ctrl->info[i].ctr_name);
file = fopen(path, "rb");
if (file) {
@@ -194,7 +203,7 @@ mlx5_ethtool_get_stats_n(struct rte_eth_dev *dev) {
drvinfo.cmd = ETHTOOL_GDRVINFO;
ifr.ifr_data = (caddr_t)&drvinfo;
- ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
+ ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
if (ret) {
DRV_LOG(WARNING, "port %u unable to query number of statistics",
dev->data->port_id);
@@ -222,6 +231,8 @@ mlx5_xstats_init(struct rte_eth_dev *dev)
unsigned int str_sz;
int ret;
+ /* So that it won't aggregate for each init. */
+ xstats_ctrl->mlx5_stats_n = 0;
ret = mlx5_ethtool_get_stats_n(dev);
if (ret < 0) {
DRV_LOG(WARNING, "port %u no extended statistics available",
@@ -229,7 +240,6 @@ mlx5_xstats_init(struct rte_eth_dev *dev)
return;
}
dev_stats_n = ret;
- xstats_ctrl->stats_n = dev_stats_n;
/* Allocate memory to grab stat names and values. */
str_sz = dev_stats_n * ETH_GSTRING_LEN;
strings = (struct ethtool_gstrings *)
@@ -244,14 +254,12 @@ mlx5_xstats_init(struct rte_eth_dev *dev)
strings->string_set = ETH_SS_STATS;
strings->len = dev_stats_n;
ifr.ifr_data = (caddr_t)strings;
- ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
+ ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
if (ret) {
DRV_LOG(WARNING, "port %u unable to get statistic names",
dev->data->port_id);
goto free;
}
- for (j = 0; j != xstats_n; ++j)
- xstats_ctrl->dev_table_idx[j] = dev_stats_n;
for (i = 0; i != dev_stats_n; ++i) {
const char *curr_string = (const char *)
&strings->data[i * ETH_GSTRING_LEN];
@@ -259,24 +267,25 @@ mlx5_xstats_init(struct rte_eth_dev *dev)
for (j = 0; j != xstats_n; ++j) {
if (!strcmp(mlx5_counters_init[j].ctr_name,
curr_string)) {
- xstats_ctrl->dev_table_idx[j] = i;
+ unsigned int idx = xstats_ctrl->mlx5_stats_n++;
+
+ xstats_ctrl->dev_table_idx[idx] = i;
+ xstats_ctrl->info[idx] = mlx5_counters_init[j];
break;
}
}
}
- for (j = 0; j != xstats_n; ++j) {
- if (mlx5_counters_init[j].ib)
- continue;
- if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) {
- DRV_LOG(WARNING,
- "port %u counter \"%s\" is not recognized",
- dev->data->port_id,
- mlx5_counters_init[j].dpdk_name);
- goto free;
+ /* Add IB counters. */
+ for (i = 0; i != xstats_n; ++i) {
+ if (mlx5_counters_init[i].ib) {
+ unsigned int idx = xstats_ctrl->mlx5_stats_n++;
+
+ xstats_ctrl->info[idx] = mlx5_counters_init[i];
}
}
+ assert(xstats_ctrl->mlx5_stats_n <= MLX5_MAX_XSTATS);
+ xstats_ctrl->stats_n = dev_stats_n;
/* Copy to base at first time. */
- assert(xstats_n <= MLX5_MAX_XSTATS);
ret = mlx5_read_dev_counters(dev, xstats_ctrl->base);
if (ret)
DRV_LOG(ERR, "port %u cannot read device counters: %s",
@@ -306,9 +315,10 @@ mlx5_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *stats,
struct priv *priv = dev->data->dev_private;
unsigned int i;
uint64_t counters[n];
+ struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+ uint16_t mlx5_stats_n = xstats_ctrl->mlx5_stats_n;
- if (n >= xstats_n && stats) {
- struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+ if (n >= mlx5_stats_n && stats) {
int stats_n;
int ret;
@@ -320,12 +330,12 @@ mlx5_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *stats,
ret = mlx5_read_dev_counters(dev, counters);
if (ret)
return ret;
- for (i = 0; i != xstats_n; ++i) {
+ for (i = 0; i != mlx5_stats_n; ++i) {
stats[i].id = i;
stats[i].value = (counters[i] - xstats_ctrl->base[i]);
}
}
- return xstats_n;
+ return mlx5_stats_n;
}
/**
@@ -441,7 +451,7 @@ mlx5_xstats_reset(struct rte_eth_dev *dev)
struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
int stats_n;
unsigned int i;
- unsigned int n = xstats_n;
+ unsigned int n = xstats_ctrl->mlx5_stats_n;
uint64_t counters[n];
int ret;
@@ -481,14 +491,17 @@ mlx5_xstats_get_names(struct rte_eth_dev *dev __rte_unused,
struct rte_eth_xstat_name *xstats_names, unsigned int n)
{
unsigned int i;
+ struct priv *priv = dev->data->dev_private;
+ struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+ unsigned int mlx5_xstats_n = xstats_ctrl->mlx5_stats_n;
- if (n >= xstats_n && xstats_names) {
- for (i = 0; i != xstats_n; ++i) {
+ if (n >= mlx5_xstats_n && xstats_names) {
+ for (i = 0; i != mlx5_xstats_n; ++i) {
strncpy(xstats_names[i].name,
- mlx5_counters_init[i].dpdk_name,
+ xstats_ctrl->info[i].dpdk_name,
RTE_ETH_XSTATS_NAME_SIZE);
xstats_names[i].name[RTE_ETH_XSTATS_NAME_SIZE - 1] = 0;
}
}
- return xstats_n;
+ return mlx5_xstats_n;
}
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index f9bc4739..b01bd675 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -120,7 +120,6 @@ mlx5_get_tx_port_offloads(struct rte_eth_dev *dev)
offloads |= (DEV_TX_OFFLOAD_IP_TNL_TSO |
DEV_TX_OFFLOAD_UDP_TNL_TSO);
}
-
if (config->tunnel_en) {
if (config->hw_csum)
offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
@@ -128,6 +127,10 @@ mlx5_get_tx_port_offloads(struct rte_eth_dev *dev)
offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
DEV_TX_OFFLOAD_GRE_TNL_TSO);
}
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+ if (config->dv_flow_en)
+ offloads |= DEV_TX_OFFLOAD_MATCH_METADATA;
+#endif
return offloads;
}