From a35cc14d37466f0737fa928d25697fbfe6e7d657 Mon Sep 17 00:00:00 2001 From: Damjan Marion Date: Fri, 16 Mar 2018 01:25:27 +0100 Subject: vnet: device flow offload infra Change-Id: Ibea4a96bdec5e368301a03d8b11a0712fa0265e0 Signed-off-by: Damjan Marion --- src/plugins/dpdk.am | 1 + src/plugins/dpdk/device/device.c | 2 + src/plugins/dpdk/device/dpdk.h | 26 ++++ src/plugins/dpdk/device/flow.c | 286 +++++++++++++++++++++++++++++++++++++++ src/plugins/dpdk/device/init.c | 8 ++ src/plugins/dpdk/device/node.c | 40 ++++++ 6 files changed, 363 insertions(+) create mode 100644 src/plugins/dpdk/device/flow.c (limited to 'src/plugins') diff --git a/src/plugins/dpdk.am b/src/plugins/dpdk.am index 10f2fe4aaba..7c6e0058992 100644 --- a/src/plugins/dpdk.am +++ b/src/plugins/dpdk.am @@ -38,6 +38,7 @@ dpdk_plugin_la_SOURCES = \ dpdk/api/dpdk_api.c \ dpdk/device/cli.c \ dpdk/device/common.c \ + dpdk/device/flow.c \ dpdk/device/dpdk_priv.h \ dpdk/device/device.c \ dpdk/device/format.c \ diff --git a/src/plugins/dpdk/device/device.c b/src/plugins/dpdk/device/device.c index 3a9c7813f0c..cdd9785a3f3 100644 --- a/src/plugins/dpdk/device/device.c +++ b/src/plugins/dpdk/device/device.c @@ -715,6 +715,8 @@ VNET_DEVICE_CLASS (dpdk_device_class) = { .subif_add_del_function = dpdk_subif_add_del_function, .rx_redirect_to_node = dpdk_set_interface_next_node, .mac_addr_change_function = dpdk_set_mac_address, + .format_flow = format_dpdk_flow, + .flow_ops_function = dpdk_flow_ops_fn, }; /* *INDENT-ON* */ diff --git a/src/plugins/dpdk/device/dpdk.h b/src/plugins/dpdk/device/dpdk.h index 0778659db55..e1966e438c0 100644 --- a/src/plugins/dpdk/device/dpdk.h +++ b/src/plugins/dpdk/device/dpdk.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,7 @@ #endif #include +#include #define NB_MBUF (16<<10) @@ -158,6 +160,7 @@ typedef struct _( 8, BOND_SLAVE_UP, "bond-slave-up") \ _( 9, TX_OFFLOAD, "tx-offload") \ _(10, INTEL_PHDR_CKSUM, "intel-phdr-cksum") \ + _(11, RX_FLOW_OFFLOAD, "rx-flow-offload") enum { @@ -166,6 +169,20 @@ enum #undef _ }; +typedef struct +{ + u32 flow_index; + u32 mark; + struct rte_flow *handle; +} dpdk_flow_entry_t; + +typedef struct +{ + u32 flow_id; + u16 next_index; + i16 buffer_advance; +} dpdk_flow_lookup_entry_t; + typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); @@ -202,6 +219,12 @@ typedef struct struct rte_eth_conf port_conf; struct rte_eth_txconf tx_conf; + /* flow related */ + u32 supported_flow_actions; + dpdk_flow_entry_t *flow_entries; /* pool */ + dpdk_flow_lookup_entry_t *flow_lookup_entries; /* vector */ + struct rte_flow_error last_flow_error; + /* HQoS related */ dpdk_device_hqos_per_worker_thread_t *hqos_wt; dpdk_device_hqos_per_hqos_thread_t *hqos_ht; @@ -472,7 +495,10 @@ format_function_t format_dpdk_tx_trace; format_function_t format_dpdk_rx_trace; format_function_t format_dpdk_rte_mbuf; format_function_t format_dpdk_rx_rte_mbuf; +format_function_t format_dpdk_flow; unformat_function_t unformat_dpdk_log_level; +vnet_flow_dev_ops_function_t dpdk_flow_ops_fn; + clib_error_t *unformat_rss_fn (unformat_input_t * input, uword * rss_fn); clib_error_t *unformat_hqos (unformat_input_t * input, dpdk_device_config_hqos_t * hqos); diff --git a/src/plugins/dpdk/device/flow.c b/src/plugins/dpdk/device/flow.c new file mode 100644 index 00000000000..a0eb95f8fa0 --- /dev/null +++ b/src/plugins/dpdk/device/flow.c @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +/* constant structs */ +static const struct rte_flow_attr ingress = {.ingress = 1 }; +static const struct rte_flow_item_eth any_eth[2] = { }; +static const struct rte_flow_item_vlan any_vlan[2] = { }; + +static int +dpdk_flow_add_n_touple (dpdk_device_t * xd, vnet_flow_t * f, + dpdk_flow_entry_t * fe) +{ + struct rte_flow_item_ipv4 ip4[2] = { }; + struct rte_flow_item_ipv6 ip6[2] = { }; + struct rte_flow_item_udp udp[2] = { }; + struct rte_flow_item_tcp tcp[2] = { }; + struct rte_flow_action_mark mark = { 0 }; + struct rte_flow_item *item, *items = 0; + struct rte_flow_action *action, *actions = 0; + u16 src_port, dst_port, src_port_mask, dst_port_mask; + u8 protocol; + int rv = 0; + + if (f->actions & (~xd->supported_flow_actions)) + return VNET_FLOW_ERROR_NOT_SUPPORTED; + + /* Ethernet */ + vec_add2 (items, item, 1); + item->type = RTE_FLOW_ITEM_TYPE_ETH; + item->spec = any_eth; + item->mask = any_eth + 1; + + /* VLAN */ + vec_add2 (items, item, 1); + item->type = RTE_FLOW_ITEM_TYPE_VLAN; + item->spec = any_vlan; + item->mask = any_vlan + 1; + + /* IP */ + vec_add2 (items, item, 1); + if (f->type == VNET_FLOW_TYPE_IP6_N_TUPLE) + { + vnet_flow_ip6_n_tuple_t *t6 = &f->ip6_n_tuple; + clib_memcpy (ip6[0].hdr.src_addr, &t6->src_addr.addr, 16); + clib_memcpy (ip6[1].hdr.src_addr, &t6->src_addr.mask, 16); + clib_memcpy (ip6[0].hdr.dst_addr, &t6->dst_addr.addr, 16); + clib_memcpy (ip6[1].hdr.dst_addr, &t6->dst_addr.mask, 16); + item->type = RTE_FLOW_ITEM_TYPE_IPV6; + item->spec = ip6; + item->mask = ip6 + 1; + + src_port = t6->src_port.port; + dst_port = t6->dst_port.port; + src_port_mask = t6->src_port.mask; + dst_port_mask = t6->dst_port.mask; + protocol = t6->protocol; + } + else + { + vnet_flow_ip4_n_tuple_t *t4 = &f->ip4_n_tuple; + ASSERT (f->type == VNET_FLOW_TYPE_IP4_N_TUPLE); + ip4[0].hdr.src_addr = t4->src_addr.mask.as_u32; + ip4[1].hdr.src_addr = t4->src_addr.mask.as_u32; + ip4[0].hdr.dst_addr = t4->dst_addr.addr.as_u32; + ip4[1].hdr.dst_addr = t4->dst_addr.mask.as_u32; + item->type = RTE_FLOW_ITEM_TYPE_IPV4; + item->spec = ip4; + item->mask = ip4 + 1; + + src_port = t4->src_port.port; + dst_port = t4->dst_port.mask; + src_port_mask = t4->src_port.mask; + dst_port_mask = t4->dst_port.mask; + protocol = t4->protocol; + } + + /* Layer 4 */ + vec_add2 (items, item, 1); + if (protocol == IP_PROTOCOL_UDP) + { + udp[0].hdr.src_port = clib_host_to_net_u16 (src_port); + udp[1].hdr.src_port = clib_host_to_net_u16 (src_port_mask); + udp[0].hdr.dst_port = clib_host_to_net_u16 (dst_port); + udp[1].hdr.dst_port = clib_host_to_net_u16 (dst_port_mask); + item->type = RTE_FLOW_ITEM_TYPE_UDP; + item->spec = udp; + item->mask = udp + 1; + } + else if (protocol == IP_PROTOCOL_TCP) + { + tcp[0].hdr.src_port = clib_host_to_net_u16 (src_port); + tcp[1].hdr.src_port = clib_host_to_net_u16 (src_port_mask); + tcp[0].hdr.dst_port = clib_host_to_net_u16 (dst_port); + tcp[1].hdr.dst_port = clib_host_to_net_u16 (dst_port_mask); + item->type = RTE_FLOW_ITEM_TYPE_TCP; + item->spec = tcp; + item->mask = tcp + 1; + } + else + { + rv = VNET_FLOW_ERROR_NOT_SUPPORTED; + goto done; + } + + /* The End */ + vec_add2 (items, item, 1); + item->type = RTE_FLOW_ITEM_TYPE_END; + + vec_add2 (actions, action, 1); + action->type = RTE_FLOW_ACTION_TYPE_PASSTHRU; + + vec_add2 (actions, action, 1); + mark.id = fe->mark; + action->type = RTE_FLOW_ACTION_TYPE_MARK; + action->conf = &mark; + + vec_add2 (actions, action, 1); + action->type = RTE_FLOW_ACTION_TYPE_END; + + fe->handle = rte_flow_create (xd->device_index, &ingress, items, actions, + &xd->last_flow_error); + + if (!fe->handle) + rv = VNET_FLOW_ERROR_NOT_SUPPORTED; + +done: + vec_free (items); + vec_free (actions); + return rv; +} + +int +dpdk_flow_ops_fn (vnet_main_t * vnm, vnet_flow_dev_op_t op, u32 dev_instance, + u32 flow_index, uword * private_data) +{ + dpdk_main_t *dm = &dpdk_main; + vnet_flow_t *flow = vnet_get_flow (flow_index); + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance); + dpdk_flow_entry_t *fe; + dpdk_flow_lookup_entry_t *fle = 0; + int rv; + + if (op == VNET_FLOW_DEV_OP_DEL_FLOW) + { + ASSERT (*private_data >= vec_len (xd->flow_entries)); + + fe = vec_elt_at_index (xd->flow_entries, *private_data); + + if ((rv = rte_flow_destroy (xd->device_index, fe->handle, + &xd->last_flow_error))) + return VNET_FLOW_ERROR_INTERNAL; + + memset (fe, 0, sizeof (*fe)); + pool_put (xd->flow_entries, fe); + return 0; + } + + if (op != VNET_FLOW_DEV_OP_ADD_FLOW) + return VNET_FLOW_ERROR_NOT_SUPPORTED; + + pool_get (xd->flow_entries, fe); + fe->flow_index = flow->index; + + if (flow->actions == 0) + { + rv = VNET_FLOW_ERROR_NOT_SUPPORTED; + goto done; + } + + /* if we need to mark packets, assign one mark */ + if (flow->actions & (VNET_FLOW_ACTION_MARK | + VNET_FLOW_ACTION_REDIRECT_TO_NODE | + VNET_FLOW_ACTION_BUFFER_ADVANCE)) + { + /* reserve slot 0 */ + if (xd->flow_lookup_entries == 0) + pool_get_aligned (xd->flow_lookup_entries, fle, + CLIB_CACHE_LINE_BYTES); + pool_get_aligned (xd->flow_lookup_entries, fle, CLIB_CACHE_LINE_BYTES); + fe->mark = fle - xd->flow_lookup_entries; + } + else + fe->mark = 0; + + switch (flow->type) + { + case VNET_FLOW_TYPE_IP4_N_TUPLE: + case VNET_FLOW_TYPE_IP6_N_TUPLE: + if ((rv = dpdk_flow_add_n_touple (xd, flow, fe))) + goto done; + break; + default: + rv = VNET_FLOW_ERROR_NOT_SUPPORTED; + goto done; + } + + + *private_data = fe - xd->flow_entries; + + /* install entry in the lookup table */ + memset (fle, ~1, sizeof (*fle)); + if (flow->actions & VNET_FLOW_ACTION_MARK) + fle->flow_id = flow->mark_flow_id; + if (flow->actions & VNET_FLOW_ACTION_REDIRECT_TO_NODE) + fle->next_index = flow->redirect_device_input_next_index; + if (flow->actions & VNET_FLOW_ACTION_BUFFER_ADVANCE) + fle->buffer_advance = flow->buffer_advance; + +done: + if (rv) + { + memset (fe, 0, sizeof (*fe)); + pool_put (xd->flow_entries, fe); + if (fle) + { + memset (fle, 0, sizeof (*fle)); + pool_put (xd->flow_lookup_entries, fle); + } + } + return rv; +} + +u8 * +format_dpdk_flow (u8 * s, va_list * args) +{ + u32 dev_instance = va_arg (*args, u32); + u32 flow_index = va_arg (*args, u32); + uword private_data = va_arg (*args, uword); + dpdk_main_t *dm = &dpdk_main; + dpdk_device_t *xd = vec_elt_at_index (dm->devices, dev_instance); + dpdk_flow_entry_t *fe; + + if (flow_index == ~0) + { + s = format (s, "%-25s: %U\n", "supported flow actions", + format_flow_actions, xd->supported_flow_actions); + s = format (s, "%-25s: %d\n", "last DPDK error type", + xd->last_flow_error.type); + s = format (s, "%-25s: %s\n", "last DPDK error message", + xd->last_flow_error.message ? xd->last_flow_error.message : + "n/a"); + return s; + } + + fe = vec_elt_at_index (xd->flow_entries, private_data); + + if (!fe) + return format (s, "unknown flow"); + + s = format (s, "mark %u", fe->mark); + return s; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/dpdk/device/init.c b/src/plugins/dpdk/device/init.c index 83d26ce13e5..9d4c0fd5d7f 100755 --- a/src/plugins/dpdk/device/init.c +++ b/src/plugins/dpdk/device/init.c @@ -385,6 +385,14 @@ dpdk_lib_init (dpdk_main_t * dm) case VNET_DPDK_PMD_IXGBE: case VNET_DPDK_PMD_I40E: xd->port_type = port_type_from_speed_capa (&dev_info); +#if 0 + xd->port_conf.fdir_conf.mode = RTE_FDIR_MODE_PERFECT; +#endif + xd->supported_flow_actions = VNET_FLOW_ACTION_MARK | + VNET_FLOW_ACTION_REDIRECT_TO_NODE | + VNET_FLOW_ACTION_BUFFER_ADVANCE | + VNET_FLOW_ACTION_COUNT | VNET_FLOW_ACTION_DROP; + if (dm->conf->no_tx_checksum_offload == 0) { xd->tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOXSUMS; diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index 7bb1fb3c9c1..7ba4dad8ae9 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -474,6 +474,40 @@ dpdk_set_next_from_etype (vlib_main_t * vm, vlib_node_runtime_t * node, } } +static_always_inline void +dpdk_process_flow_offload (dpdk_device_t * xd, dpdk_per_thread_data_t * ptd, + uword n_rx_packets) +{ + uword n; + dpdk_flow_lookup_entry_t *fle; + vlib_buffer_t *b0; + + /* TODO prefetch and quad-loop */ + for (n = 0; n < n_rx_packets; n++) + { + if ((ptd->flags[n] & (1 << DPDK_RX_F_FDIR)) == 0) + continue; + + fle = vec_elt_at_index (xd->flow_lookup_entries, + ptd->mbufs[n]->hash.fdir.hi); + + if (fle->next_index != (u16) ~ 0) + ptd->next[n] = fle->next_index; + + if (fle->flow_id != ~0) + { + b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]); + b0->flow_id = fle->flow_id; + } + + if (fle->buffer_advance != ~0) + { + b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]); + vlib_buffer_advance (b0, fle->buffer_advance); + } + } +} + static_always_inline u32 dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, vlib_node_runtime_t * node, u32 thread_index, u16 queue_id) @@ -549,6 +583,12 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, else dpdk_set_next_from_etype (vm, node, ptd, n_rx_packets); + /* flow offload - process if rx flow offlaod enabled and at least one packet + is marked */ + if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) && + (or_flags & (1 << DPDK_RX_F_FDIR)))) + dpdk_process_flow_offload (xd, ptd, n_rx_packets); + /* is at least one packet marked as ip4 checksum bad? */ if (PREDICT_FALSE (or_flags & (1 << DPDK_RX_F_CKSUM_BAD))) for (n = 0; n < n_rx_packets; n++) -- cgit 1.2.3-korg