From a60d4cb0a12ca82ba60a9b8725c9413c8d409bdd Mon Sep 17 00:00:00 2001 From: John Lo Date: Sat, 17 Dec 2016 03:09:58 -0500 Subject: Improve dpdk-input node to handle drivers not setting mbuf PTYPE For drivers that do not provide dpdk rte_mbuf PTYPE information, check ethernet header Etype to acccelerate IP4 and IP6 forwarding path. Update packet trace for dpdk-input node to provide more info from DPDK rte_mbuf offload flags and packet types. Change-Id: I207158797a155305314d002726c0af97b8cb0eb3 Signed-off-by: John Lo --- vnet/vnet/devices/devices.h | 4 +- vnet/vnet/devices/dpdk/dpdk.h | 14 ++--- vnet/vnet/devices/dpdk/format.c | 24 ++++++++- vnet/vnet/devices/dpdk/init.c | 3 ++ vnet/vnet/devices/dpdk/node.c | 114 ++++++++++++++++++++++++++-------------- 5 files changed, 111 insertions(+), 48 deletions(-) diff --git a/vnet/vnet/devices/devices.h b/vnet/vnet/devices/devices.h index 3bd700a0f72..c46dab904c3 100644 --- a/vnet/vnet/devices/devices.h +++ b/vnet/vnet/devices/devices.h @@ -21,6 +21,7 @@ typedef enum { + VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT, VNET_DEVICE_INPUT_NEXT_IP4_INPUT, VNET_DEVICE_INPUT_NEXT_IP6_INPUT, VNET_DEVICE_INPUT_NEXT_MPLS_INPUT, @@ -32,7 +33,8 @@ typedef enum #define VNET_DEVICE_INPUT_NEXT_NODES { \ [VNET_DEVICE_INPUT_NEXT_DROP] = "error-drop", \ [VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT] = "ethernet-input", \ - [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = "ip4-input-no-checksum", \ + [VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT] = "ip4-input-no-checksum", \ + [VNET_DEVICE_INPUT_NEXT_IP4_INPUT] = "ip4-input", \ [VNET_DEVICE_INPUT_NEXT_IP6_INPUT] = "ip6-input", \ [VNET_DEVICE_INPUT_NEXT_MPLS_INPUT] = "mpls-input", \ } diff --git a/vnet/vnet/devices/dpdk/dpdk.h b/vnet/vnet/devices/dpdk/dpdk.h index 771c91fdc6e..3669bc5fd1f 100644 --- a/vnet/vnet/devices/dpdk/dpdk.h +++ b/vnet/vnet/devices/dpdk/dpdk.h @@ -189,13 +189,13 @@ typedef struct i8 cpu_socket; u16 flags; -#define DPDK_DEVICE_FLAG_ADMIN_UP (1 << 0) -#define DPDK_DEVICE_FLAG_PROMISC (1 << 1) -#define DPDK_DEVICE_FLAG_PMD (1 << 2) -#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 3) - -#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5) -#define DPDK_DEVICE_FLAG_HQOS (1 << 6) +#define DPDK_DEVICE_FLAG_ADMIN_UP (1 << 0) +#define DPDK_DEVICE_FLAG_PROMISC (1 << 1) +#define DPDK_DEVICE_FLAG_PMD (1 << 2) +#define DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE (1 << 3) +#define DPDK_DEVICE_FLAG_MAYBE_MULTISEG (1 << 4) +#define DPDK_DEVICE_FLAG_HAVE_SUBIF (1 << 5) +#define DPDK_DEVICE_FLAG_HQOS (1 << 6) u16 nb_tx_desc; CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); diff --git a/vnet/vnet/devices/dpdk/format.c b/vnet/vnet/devices/dpdk/format.c index 212f10914af..0b8a6939934 100644 --- a/vnet/vnet/devices/dpdk/format.c +++ b/vnet/vnet/devices/dpdk/format.c @@ -79,20 +79,39 @@ _(DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM, "outer-ipv4-cksum") \ _(DEV_TX_OFFLOAD_QINQ_INSERT, "qinq-insert") +#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0) +/* New ol_flags bits added in DPDK-16.11 */ +#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7) +#define PKT_RX_L4_CKSUM_GOOD (1ULL << 8) +#endif + #define foreach_dpdk_pkt_rx_offload_flag \ _ (PKT_RX_VLAN_PKT, "RX packet is a 802.1q VLAN packet") \ _ (PKT_RX_RSS_HASH, "RX packet with RSS hash result") \ _ (PKT_RX_FDIR, "RX packet with FDIR infos") \ _ (PKT_RX_L4_CKSUM_BAD, "L4 cksum of RX pkt. is not OK") \ _ (PKT_RX_IP_CKSUM_BAD, "IP cksum of RX pkt. is not OK") \ + _ (PKT_RX_VLAN_STRIPPED, "RX packet VLAN tag stripped") \ + _ (PKT_RX_IP_CKSUM_GOOD, "IP cksum of RX pkt. is valid") \ + _ (PKT_RX_L4_CKSUM_GOOD, "L4 cksum of RX pkt. is valid") \ _ (PKT_RX_IEEE1588_PTP, "RX IEEE1588 L2 Ethernet PT Packet") \ - _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") + _ (PKT_RX_IEEE1588_TMST, "RX IEEE1588 L2/L4 timestamped packet") \ + _ (PKT_RX_QINQ_STRIPPED, "RX packet QinQ tags stripped") + +#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0) +/* PTYPE added in DPDK-16.11 */ +#define RTE_PTYPE_L2_ETHER_VLAN 0x00000006 +#define RTE_PTYPE_L2_ETHER_QINQ 0x00000007 +#endif #define foreach_dpdk_pkt_type \ _ (L2, ETHER, "Ethernet packet") \ _ (L2, ETHER_TIMESYNC, "Ethernet packet for time sync") \ _ (L2, ETHER_ARP, "ARP packet") \ _ (L2, ETHER_LLDP, "LLDP (Link Layer Discovery Protocol) packet") \ + _ (L2, ETHER_NSH, "NSH (Network Service Header) packet") \ + _ (L2, ETHER_VLAN, "VLAN packet") \ + _ (L2, ETHER_QINQ, "QinQ packet") \ _ (L3, IPV4, "IPv4 packet without extension headers") \ _ (L3, IPV4_EXT, "IPv4 packet with extension headers") \ _ (L3, IPV4_EXT_UNKNOWN, "IPv4 packet with or without extension headers") \ @@ -642,7 +661,8 @@ format_dpdk_rte_mbuf (u8 * s, va_list * va) s = format (s, "\n%U%U", format_white_space, indent, format_dpdk_pkt_offload_flags, &mb->ol_flags); - if (mb->ol_flags & PKT_RX_VLAN_PKT) + if ((mb->ol_flags & PKT_RX_VLAN_PKT) && + ((mb->ol_flags & (PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == 0)) { ethernet_vlan_header_tv_t *vlan_hdr = ((ethernet_vlan_header_tv_t *) & (eth_hdr->type)); diff --git a/vnet/vnet/devices/dpdk/init.c b/vnet/vnet/devices/dpdk/init.c index c9e7dc8e1de..0448c15f6f1 100755 --- a/vnet/vnet/devices/dpdk/init.c +++ b/vnet/vnet/devices/dpdk/init.c @@ -473,6 +473,7 @@ dpdk_lib_init (dpdk_main_t * dm) /* Cisco VIC */ case VNET_DPDK_PMD_ENIC: rte_eth_link_get_nowait (i, &l); + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; xd->nb_rx_desc = DPDK_NB_RX_DESC_ENIC; if (l.link_speed == 40000) { @@ -489,6 +490,7 @@ dpdk_lib_init (dpdk_main_t * dm) /* Intel Fortville */ case VNET_DPDK_PMD_I40E: case VNET_DPDK_PMD_I40EVF: + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; xd->port_type = VNET_DPDK_PORT_TYPE_ETH_40G; xd->nb_rx_desc = DPDK_NB_RX_DESC_40GE; xd->nb_tx_desc = DPDK_NB_TX_DESC_40GE; @@ -561,6 +563,7 @@ dpdk_lib_init (dpdk_main_t * dm) break; case VNET_DPDK_PMD_BOND: + xd->flags |= DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE; xd->port_type = VNET_DPDK_PORT_TYPE_ETH_BOND; break; diff --git a/vnet/vnet/devices/dpdk/node.c b/vnet/vnet/devices/dpdk/node.c index 4e668776530..e541cdbcbd2 100644 --- a/vnet/vnet/devices/dpdk/node.c +++ b/vnet/vnet/devices/dpdk/node.c @@ -34,14 +34,61 @@ static char *dpdk_error_strings[] = { #undef _ }; -#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0) +always_inline int +vlib_buffer_is_ip4 (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP4)); +} + +always_inline int +vlib_buffer_is_ip6 (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_IP6)); +} + +always_inline int +vlib_buffer_is_mpls (vlib_buffer_t * b) +{ + ethernet_header_t *h = (ethernet_header_t *) b->data; + return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); +} + +#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0) +/* New ol_flags bits added in DPDK-16.11 */ +#define PKT_RX_IP_CKSUM_GOOD (1ULL << 7) +#endif + +always_inline u32 +dpdk_rx_next_from_etype (struct rte_mbuf * mb, vlib_buffer_t * b0) +{ + if (PREDICT_TRUE (vlib_buffer_is_ip4 (b0))) + if (PREDICT_TRUE ((mb->ol_flags & PKT_RX_IP_CKSUM_GOOD) != 0)) + return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; + else + return VNET_DEVICE_INPUT_NEXT_IP4_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_ip6 (b0))) + return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) + return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; + else + return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; +} + always_inline int dpdk_mbuf_is_vlan (struct rte_mbuf *mb) { +#if RTE_VERSION >= RTE_VERSION_NUM(16, 11, 0, 0) return (mb->packet_type & RTE_PTYPE_L2_ETHER_VLAN) == RTE_PTYPE_L2_ETHER_VLAN; -} +#else + return + (mb->ol_flags & + (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ_STRIPPED)) == + PKT_RX_VLAN_PKT; #endif +} always_inline int dpdk_mbuf_is_ip4 (struct rte_mbuf *mb) @@ -55,35 +102,19 @@ dpdk_mbuf_is_ip6 (struct rte_mbuf *mb) return RTE_ETH_IS_IPV6_HDR (mb->packet_type) != 0; } -always_inline int -vlib_buffer_is_mpls (vlib_buffer_t * b) -{ - ethernet_header_t *h = (ethernet_header_t *) b->data; - return (h->type == clib_host_to_net_u16 (ETHERNET_TYPE_MPLS_UNICAST)); -} - -always_inline void -dpdk_rx_next_from_mb (struct rte_mbuf *mb, vlib_buffer_t * b0, u32 * next0) +always_inline u32 +dpdk_rx_next_from_mb (struct rte_mbuf * mb, vlib_buffer_t * b0) { - u32 n0; - -#if RTE_VERSION < RTE_VERSION_NUM(16, 11, 0, 0) - if (PREDICT_FALSE - ((mb->ol_flags & (PKT_RX_VLAN_PKT | PKT_RX_VLAN_STRIPPED)) == - PKT_RX_VLAN_PKT)) -#else if (PREDICT_FALSE (dpdk_mbuf_is_vlan (mb))) -#endif - n0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + return VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; else if (PREDICT_TRUE (dpdk_mbuf_is_ip4 (mb))) - n0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT; + return VNET_DEVICE_INPUT_NEXT_IP4_NCS_INPUT; else if (PREDICT_TRUE (dpdk_mbuf_is_ip6 (mb))) - n0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + return VNET_DEVICE_INPUT_NEXT_IP6_INPUT; else if (PREDICT_TRUE (vlib_buffer_is_mpls (b0))) - n0 = VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; + return VNET_DEVICE_INPUT_NEXT_MPLS_INPUT; else - n0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - *next0 = n0; + return dpdk_rx_next_from_etype (mb, b0); } always_inline void @@ -127,10 +158,12 @@ dpdk_rx_trace (dpdk_main_t * dm, if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) next0 = xd->per_interface_next_index; - else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF)) - next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) + next0 = dpdk_rx_next_from_mb (mb, b0); else - dpdk_rx_next_from_mb (mb, b0, &next0); + next0 = dpdk_rx_next_from_etype (mb, b0); + dpdk_rx_error_from_mb (mb, &next0, &error0); vlib_trace_buffer (vm, node, next0, b0, /* follow_chain */ 0); @@ -350,17 +383,20 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, { next0 = next1 = next2 = next3 = xd->per_interface_next_index; } - else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF)) + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) { - next0 = next1 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; - next2 = next3 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + next0 = dpdk_rx_next_from_mb (mb0, b0); + next1 = dpdk_rx_next_from_mb (mb1, b1); + next2 = dpdk_rx_next_from_mb (mb2, b2); + next3 = dpdk_rx_next_from_mb (mb3, b3); } else { - dpdk_rx_next_from_mb (mb0, b0, &next0); - dpdk_rx_next_from_mb (mb1, b1, &next1); - dpdk_rx_next_from_mb (mb2, b2, &next2); - dpdk_rx_next_from_mb (mb3, b3, &next3); + next0 = dpdk_rx_next_from_etype (mb0, b0); + next1 = dpdk_rx_next_from_etype (mb1, b1); + next2 = dpdk_rx_next_from_etype (mb2, b2); + next3 = dpdk_rx_next_from_etype (mb3, b3); } if (PREDICT_FALSE (or_ol_flags & PKT_RX_IP_CKSUM_BAD)) @@ -475,10 +511,12 @@ dpdk_device_input (dpdk_main_t * dm, dpdk_device_t * xd, if (PREDICT_FALSE (xd->per_interface_next_index != ~0)) next0 = xd->per_interface_next_index; - else if (PREDICT_FALSE (xd->flags & DPDK_DEVICE_FLAG_HAVE_SUBIF)) - next0 = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + else if (PREDICT_TRUE + ((xd->flags & DPDK_DEVICE_FLAG_PMD_SUPPORTS_PTYPE) != 0)) + next0 = dpdk_rx_next_from_mb (mb0, b0); else - dpdk_rx_next_from_mb (mb0, b0, &next0); + next0 = dpdk_rx_next_from_etype (mb0, b0); + dpdk_rx_error_from_mb (mb0, &next0, &error0); b0->error = node->errors[error0]; -- cgit 1.2.3-korg