summaryrefslogtreecommitdiffstats
path: root/src/plugins
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2018-03-16 01:25:27 +0100
committerDamjan Marion <dmarion.lists@gmail.com>2018-05-10 15:45:03 +0000
commita35cc14d37466f0737fa928d25697fbfe6e7d657 (patch)
tree7242bb0bc9feebc35a2b1ab56986bc2a7ddb0ceb /src/plugins
parentcd330c878dfdb9f52b372007a5086138b12d89f7 (diff)
vnet: device flow offload infra
Change-Id: Ibea4a96bdec5e368301a03d8b11a0712fa0265e0 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'src/plugins')
-rw-r--r--src/plugins/dpdk.am1
-rw-r--r--src/plugins/dpdk/device/device.c2
-rw-r--r--src/plugins/dpdk/device/dpdk.h26
-rw-r--r--src/plugins/dpdk/device/flow.c286
-rwxr-xr-xsrc/plugins/dpdk/device/init.c8
-rw-r--r--src/plugins/dpdk/device/node.c40
6 files changed, 363 insertions, 0 deletions
diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am
index 10f2fe4aaba..7c6e0058992 100644
--- a/src/plugins/dpdk.am
+++ b/src/plugins/dpdk.am
@@ -38,6 +38,7 @@ dpdk_plugin_la_SOURCES = \
dpdk/api/dpdk_api.c \
dpdk/device/cli.c \
dpdk/device/common.c \
+ dpdk/device/flow.c \
dpdk/device/dpdk_priv.h \
dpdk/device/device.c \
dpdk/device/format.c \
diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c
index 3a9c7813f0c..cdd9785a3f3 100644
--- a/src/plugins/dpdk/device/device.c
+++ b/src/plugins/dpdk/device/device.c
@@ -715,6 +715,8 @@ VNET_DEVICE_CLASS (dpdk_device_class) = {
.subif_add_del_function = dpdk_subif_add_del_function,
.rx_redirect_to_node = dpdk_set_interface_next_node,
.mac_addr_change_function = dpdk_set_mac_address,
+ .format_flow = format_dpdk_flow,
+ .flow_ops_function = dpdk_flow_ops_fn,
};
/* *INDENT-ON* */
diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h
index 0778659db55..e1966e438c0 100644
--- a/src/plugins/dpdk/device/dpdk.h
+++ b/src/plugins/dpdk/device/dpdk.h
@@ -40,6 +40,7 @@
#include <rte_sched.h>
#include <rte_net.h>
#include <rte_bus_pci.h>
+#include <rte_flow.h>
#include <vnet/unix/pcap.h>
#include <vnet/devices/devices.h>
@@ -51,6 +52,7 @@
#endif
#include <vlib/pci/pci.h>
+#include <vnet/flow/flow.h>
#define NB_MBUF (16<<10)
@@ -158,6 +160,7 @@ typedef struct
_( 8, BOND_SLAVE_UP, "bond-slave-up") \
_( 9, TX_OFFLOAD, "tx-offload") \
_(10, INTEL_PHDR_CKSUM, "intel-phdr-cksum") \
+ _(11, RX_FLOW_OFFLOAD, "rx-flow-offload")
enum
{
@@ -168,6 +171,20 @@ enum
typedef struct
{
+ u32 flow_index;
+ u32 mark;
+ struct rte_flow *handle;
+} dpdk_flow_entry_t;
+
+typedef struct
+{
+ u32 flow_id;
+ u16 next_index;
+ i16 buffer_advance;
+} dpdk_flow_lookup_entry_t;
+
+typedef struct
+{
CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
volatile u32 **lockp;
@@ -202,6 +219,12 @@ typedef struct
struct rte_eth_conf port_conf;
struct rte_eth_txconf tx_conf;
+ /* flow related */
+ u32 supported_flow_actions;
+ dpdk_flow_entry_t *flow_entries; /* pool */
+ dpdk_flow_lookup_entry_t *flow_lookup_entries; /* vector */
+ struct rte_flow_error last_flow_error;
+
/* HQoS related */
dpdk_device_hqos_per_worker_thread_t *hqos_wt;
dpdk_device_hqos_per_hqos_thread_t *hqos_ht;
@@ -472,7 +495,10 @@ format_function_t format_dpdk_tx_trace;
format_function_t format_dpdk_rx_trace;
format_function_t format_dpdk_rte_mbuf;
format_function_t format_dpdk_rx_rte_mbuf;
+format_function_t format_dpdk_flow;
unformat_function_t unformat_dpdk_log_level;
+vnet_flow_dev_ops_function_t dpdk_flow_ops_fn;
+
clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn);
clib_error_t *unformat_hqos (unformat_input_t * input,
dpdk_device_config_hqos_t * hqos);
diff --git a/src/plugins/dpdk/device/flow.c b/src/plugins/dpdk/device/flow.c
new file mode 100644
index 00000000000..a0eb95f8fa0
--- /dev/null
+++ b/src/plugins/dpdk/device/flow.c
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2019 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/vnet.h>
+#include <vppinfra/vec.h>
+#include <vppinfra/format.h>
+#include <vlib/unix/cj.h>
+#include <assert.h>
+
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ethernet/arp_packet.h>
+#include <dpdk/device/dpdk.h>
+
+#include <dpdk/device/dpdk_priv.h>
+#include <vppinfra/error.h>
+
+/* constant structs */
+static const struct rte_flow_attr ingress = {.ingress = 1 };
+static const struct rte_flow_item_eth any_eth[2] = { };
+static const struct rte_flow_item_vlan any_vlan[2] = { };
+
+static int
+dpdk_flow_add_n_touple (dpdk_device_t * xd, vnet_flow_t * f,
+ dpdk_flow_entry_t * fe)
+{
+ struct rte_flow_item_ipv4 ip4[2] = { };
+ struct rte_flow_item_ipv6 ip6[2] = { };
+ struct rte_flow_item_udp udp[2] = { };
+ struct rte_flow_item_tcp tcp[2] = { };
+ struct rte_flow_action_mark mark = { 0 };
+ struct rte_flow_item *item, *items = 0;
+ struct rte_flow_action *action, *actions = 0;
+ u16 src_port, dst_port, src_port_mask, dst_port_mask;
+ u8 protocol;
+ int rv = 0;
+
+ if (f->actions & (~xd->supported_flow_actions))
+ return VNET_FLOW_ERROR_NOT_SUPPORTED;
+
+ /* Ethernet */
+ vec_add2 (items, item, 1);
+ item->type = RTE_FLOW_ITEM_TYPE_ETH;
+ item->spec = any_eth;
+ item->mask = any_eth + 1;
+
+ /* VLAN */
+ vec_add2 (items, item, 1);
+ item->type = RTE_FLOW_ITEM_TYPE_VLAN;
+ item->spec = any_vlan;
+ item->mask = any_vlan + 1;
+
+ /* IP */
+ vec_add2 (items, item, 1);
+ if (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE)
+ {
+ vnet_flow_ip6_n_tuple_t *t6 = &f->ip6_n_tuple;
+ clib_memcpy (ip6[0].hdr.src_addr, &t6->src_addr.addr, 16);
+ clib_memcpy (ip6[1].hdr.src_addr, &t6->src_addr.mask, 16);
+ clib_memcpy (ip6[0].hdr.dst_addr, &t6->dst_addr.addr, 16);
+ clib_memcpy (ip6[1].hdr.dst_addr, &t6->dst_addr.mask, 16);
+ item->type = RTE_FLOW_ITEM_TYPE_IPV6;
+ item->spec = ip6;
+ item->mask = ip6 + 1;
+
+ src_port = t6->src_port.port;
+ dst_port = t6->dst_port.port;
+ src_port_mask = t6->src_port.mask;
+ dst_port_mask = t6->dst_port.mask;
+ protocol = t6->protocol;
+ }
+ else
+ {
+ vnet_flow_ip4_n_tuple_t *t4 = &f->ip4_n_tuple;
+ ASSERT (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE);
+ ip4[0].hdr.src_addr = t4->src_addr.mask.as_u32;
+ ip4[1].hdr.src_addr = t4->src_addr.mask.as_u32;
+ ip4[0].hdr.dst_addr = t4->dst_addr.addr.as_u32;
+ ip4[1].hdr.dst_addr = t4->dst_addr.mask.as_u32;
+ item->type = RTE_FLOW_ITEM_TYPE_IPV4;
+ item->spec = ip4;
+ item->mask = ip4 + 1;
+
+ src_port = t4->src_port.port;
+ dst_port = t4->dst_port.mask;
+ src_port_mask = t4->src_port.mask;
+ dst_port_mask = t4->dst_port.mask;
+ protocol = t4->protocol;
+ }
+
+ /* Layer 4 */
+ vec_add2 (items, item, 1);
+ if (protocol == IP_PROTOCOL_UDP)
+ {
+ udp[0].hdr.src_port = clib_host_to_net_u16 (src_port);
+ udp[1].hdr.src_port = clib_host_to_net_u16 (src_port_mask);
+ udp[0].hdr.dst_port = clib_host_to_net_u16 (dst_port);
+ udp[1].hdr.dst_port = clib_host_to_net_u16 (dst_port_mask);
+ item->type = RTE_FLOW_ITEM_TYPE_UDP;
+ item->spec = udp;
+ item->mask = udp + 1;
+ }
+ else if (protocol == IP_PROTOCOL_TCP)
+ {
+ tcp[0].hdr.src_port = clib_host_to_net_u16 (src_port);
+ tcp[1].hdr.src_port = clib_host_to_net_u16 (src_port_mask);
+ tcp[0].hdr.dst_port = clib_host_to_net_u16 (dst_port);
+ tcp[1].hdr.dst_port = clib_host_to_net_u16 (dst_port_mask);
+ item->type = RTE_FLOW_ITEM_TYPE_TCP;
+ item->spec = tcp;
+ item->mask = tcp + 1;
+ }
+ else
+ {
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto done;
+ }
+
+ /* The End */
+ vec_add2 (items, item, 1);
+ item->type = RTE_FLOW_ITEM_TYPE_END;
+
+ vec_add2 (actions, action, 1);
+ action->type = RTE_FLOW_ACTION_TYPE_PASSTHRU;
+
+ vec_add2 (actions, action, 1);
+ mark.id = fe->mark;
+ action->type = RTE_FLOW_ACTION_TYPE_MARK;
+ action->conf = &mark;
+
+ vec_add2 (actions, action, 1);
+ action->type = RTE_FLOW_ACTION_TYPE_END;
+
+ fe->handle = rte_flow_create (xd->device_index, &ingress, items, actions,
+ &xd->last_flow_error);
+
+ if (!fe->handle)
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+
+done:
+ vec_free (items);
+ vec_free (actions);
+ return rv;
+}
+
+int
+dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance,
+ u32 flow_index, uword * private_data)
+{
+ dpdk_main_t *dm = &dpdk_main;
+ vnet_flow_t *flow = vnet_get_flow (flow_index);
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
+ dpdk_flow_entry_t *fe;
+ dpdk_flow_lookup_entry_t *fle = 0;
+ int rv;
+
+ if (op == VNET_FLOW_DEV_OP_DEL_FLOW)
+ {
+ ASSERT (*private_data >= vec_len (xd->flow_entries));
+
+ fe = vec_elt_at_index (xd->flow_entries, *private_data);
+
+ if ((rv = rte_flow_destroy (xd->device_index, fe->handle,
+ &xd->last_flow_error)))
+ return VNET_FLOW_ERROR_INTERNAL;
+
+ memset (fe, 0, sizeof (*fe));
+ pool_put (xd->flow_entries, fe);
+ return 0;
+ }
+
+ if (op != VNET_FLOW_DEV_OP_ADD_FLOW)
+ return VNET_FLOW_ERROR_NOT_SUPPORTED;
+
+ pool_get (xd->flow_entries, fe);
+ fe->flow_index = flow->index;
+
+ if (flow->actions == 0)
+ {
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto done;
+ }
+
+ /* if we need to mark packets, assign one mark */
+ if (flow->actions & (VNET_FLOW_ACTION_MARK |
+ VNET_FLOW_ACTION_REDIRECT_TO_NODE |
+ VNET_FLOW_ACTION_BUFFER_ADVANCE))
+ {
+ /* reserve slot 0 */
+ if (xd->flow_lookup_entries == 0)
+ pool_get_aligned (xd->flow_lookup_entries, fle,
+ CLIB_CACHE_LINE_BYTES);
+ pool_get_aligned (xd->flow_lookup_entries, fle, CLIB_CACHE_LINE_BYTES);
+ fe->mark = fle - xd->flow_lookup_entries;
+ }
+ else
+ fe->mark = 0;
+
+ switch (flow->type)
+ {
+ case VNET_FLOW_TYPE_IP4_N_TUPLE:
+ case VNET_FLOW_TYPE_IP6_N_TUPLE:
+ if ((rv = dpdk_flow_add_n_touple (xd, flow, fe)))
+ goto done;
+ break;
+ default:
+ rv = VNET_FLOW_ERROR_NOT_SUPPORTED;
+ goto done;
+ }
+
+
+ *private_data = fe - xd->flow_entries;
+
+ /* install entry in the lookup table */
+ memset (fle, ~1, sizeof (*fle));
+ if (flow->actions & VNET_FLOW_ACTION_MARK)
+ fle->flow_id = flow->mark_flow_id;
+ if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_NODE)
+ fle->next_index = flow->redirect_device_input_next_index;
+ if (flow->actions & VNET_FLOW_ACTION_BUFFER_ADVANCE)
+ fle->buffer_advance = flow->buffer_advance;
+
+done:
+ if (rv)
+ {
+ memset (fe, 0, sizeof (*fe));
+ pool_put (xd->flow_entries, fe);
+ if (fle)
+ {
+ memset (fle, 0, sizeof (*fle));
+ pool_put (xd->flow_lookup_entries, fle);
+ }
+ }
+ return rv;
+}
+
+u8 *
+format_dpdk_flow (u8 * s, va_list * args)
+{
+ u32 dev_instance = va_arg (*args, u32);
+ u32 flow_index = va_arg (*args, u32);
+ uword private_data = va_arg (*args, uword);
+ dpdk_main_t *dm = &dpdk_main;
+ dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance);
+ dpdk_flow_entry_t *fe;
+
+ if (flow_index == ~0)
+ {
+ s = format (s, "%-25s: %U\n", "supported flow actions",
+ format_flow_actions, xd->supported_flow_actions);
+ s = format (s, "%-25s: %d\n", "last DPDK error type",
+ xd->last_flow_error.type);
+ s = format (s, "%-25s: %s\n", "last DPDK error message",
+ xd->last_flow_error.message ? xd->last_flow_error.message :
+ "n/a");
+ return s;
+ }
+
+ fe = vec_elt_at_index (xd->flow_entries, private_data);
+
+ if (!fe)
+ return format (s, "unknown flow");
+
+ s = format (s, "mark %u", fe->mark);
+ return s;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c
index 83d26ce13e5..9d4c0fd5d7f 100755
--- a/src/plugins/dpdk/device/init.c
+++ b/src/plugins/dpdk/device/init.c
@@ -385,6 +385,14 @@ dpdk_lib_init (dpdk_main_t * dm)
case VNET_DPDK_PMD_IXGBE:
case VNET_DPDK_PMD_I40E:
xd->port_type = port_type_from_speed_capa (&dev_info);
+#if 0
+ xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_PERFECT;
+#endif
+ xd->supported_flow_actions = VNET_FLOW_ACTION_MARK |
+ VNET_FLOW_ACTION_REDIRECT_TO_NODE |
+ VNET_FLOW_ACTION_BUFFER_ADVANCE |
+ VNET_FLOW_ACTION_COUNT | VNET_FLOW_ACTION_DROP;
+
if (dm->conf->no_tx_checksum_offload == 0)
{
xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOXSUMS;
diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c
index 7bb1fb3c9c1..7ba4dad8ae9 100644
--- a/src/plugins/dpdk/device/node.c
+++ b/src/plugins/dpdk/device/node.c
@@ -474,6 +474,40 @@ dpdk_set_next_from_etype (vlib_main_t * vm, vlib_node_runtime_t * node,
}
}
+static_always_inline void
+dpdk_process_flow_offload (dpdk_device_t * xd, dpdk_per_thread_data_t * ptd,
+ uword n_rx_packets)
+{
+ uword n;
+ dpdk_flow_lookup_entry_t *fle;
+ vlib_buffer_t *b0;
+
+ /* TODO prefetch and quad-loop */
+ for (n = 0; n < n_rx_packets; n++)
+ {
+ if ((ptd->flags[n] & (1 << DPDK_RX_F_FDIR)) == 0)
+ continue;
+
+ fle = vec_elt_at_index (xd->flow_lookup_entries,
+ ptd->mbufs[n]->hash.fdir.hi);
+
+ if (fle->next_index != (u16) ~ 0)
+ ptd->next[n] = fle->next_index;
+
+ if (fle->flow_id != ~0)
+ {
+ b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]);
+ b0->flow_id = fle->flow_id;
+ }
+
+ if (fle->buffer_advance != ~0)
+ {
+ b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]);
+ vlib_buffer_advance (b0, fle->buffer_advance);
+ }
+ }
+}
+
static_always_inline u32
dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
vlib_node_runtime_t * node, u32 thread_index, u16 queue_id)
@@ -549,6 +583,12 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd,
else
dpdk_set_next_from_etype (vm, node, ptd, n_rx_packets);
+ /* flow offload - process if rx flow offlaod enabled and at least one packet
+ is marked */
+ if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) &&
+ (or_flags & (1 << DPDK_RX_F_FDIR))))
+ dpdk_process_flow_offload (xd, ptd, n_rx_packets);
+
/* is at least one packet marked as ip4 checksum bad? */
if (PREDICT_FALSE (or_flags & (1 << DPDK_RX_F_CKSUM_BAD)))
for (n = 0; n < n_rx_packets; n++)