diff options
Diffstat (limited to 'src/plugins/dpdk/device/node.c')
-rw-r--r-- | src/plugins/dpdk/device/node.c | 72 |
1 files changed, 46 insertions, 26 deletions
diff --git a/src/plugins/dpdk/device/node.c b/src/plugins/dpdk/device/node.c index c725681caa5..ca1690b708f 100644 --- a/src/plugins/dpdk/device/node.c +++ b/src/plugins/dpdk/device/node.c @@ -23,10 +23,10 @@ #include <dpdk/device/dpdk.h> #include <vnet/classify/vnet_classify.h> #include <vnet/mpls/packet.h> -#include <vnet/handoff.h> #include <vnet/devices/devices.h> #include <vnet/interface/rx_queue_funcs.h> #include <vnet/feature/feature.h> +#include <vnet/tcp/tcp_packet.h> #include <dpdk/device/dpdk_priv.h> @@ -37,10 +37,13 @@ static char *dpdk_error_strings[] = { }; /* make sure all flags we need are stored in lower 32 bits */ -STATIC_ASSERT ((u64) (PKT_RX_IP_CKSUM_BAD | PKT_RX_FDIR | PKT_RX_LRO) < - (1ULL << 32), +STATIC_ASSERT ((u64) (RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD | + RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_LRO) < (1ULL << 32), "dpdk flags not in lower word, fix needed"); +STATIC_ASSERT (RTE_MBUF_F_RX_L4_CKSUM_BAD == (1ULL << 3), + "bit number of RTE_MBUF_F_RX_L4_CKSUM_BAD is no longer 3!"); + static_always_inline uword dpdk_process_subseq_segs (vlib_main_t * vm, vlib_buffer_t * b, struct rte_mbuf *mb, vlib_buffer_t * bt) @@ -98,13 +101,13 @@ dpdk_prefetch_buffer_x4 (struct rte_mbuf *mb[]) { vlib_buffer_t *b; b = vlib_buffer_from_rte_mbuf (mb[0]); - clib_prefetch_load (b); + clib_prefetch_store (b); b = vlib_buffer_from_rte_mbuf (mb[1]); - clib_prefetch_load (b); + clib_prefetch_store (b); b = vlib_buffer_from_rte_mbuf (mb[2]); - clib_prefetch_load (b); + clib_prefetch_store (b); b = vlib_buffer_from_rte_mbuf (mb[3]); - clib_prefetch_load (b); + clib_prefetch_store (b); } /** \brief Main DPDK input node @@ -126,18 +129,18 @@ dpdk_prefetch_buffer_x4 (struct rte_mbuf *mb[]) @em Uses: - <code>struct rte_mbuf mb->ol_flags</code> - - PKT_RX_IP_CKSUM_BAD + - RTE_MBUF_F_RX_IP_CKSUM_BAD @em Sets: - <code>b->error</code> if the packet is to be dropped immediately - <code>b->current_data, b->current_length</code> - - adjusted as needed to skip the L2 header in direct-dispatch cases + - adjusted as needed to skip the L2 header in direct-dispatch cases - <code>vnet_buffer(b)->sw_if_index[VLIB_RX]</code> - - rx interface sw_if_index + - rx interface sw_if_index - <code>vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0</code> - - required by ipX-lookup + - required by ipX-lookup - <code>b->flags</code> - - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc. + - to indicate multi-segment pkts (VLIB_BUFFER_NEXT_PRESENT), etc. <em>Next Nodes:</em> - Static arcs to: error-drop, ethernet-input, @@ -254,7 +257,7 @@ dpdk_process_flow_offload (dpdk_device_t * xd, dpdk_per_thread_data_t * ptd, /* TODO prefetch and quad-loop */ for (n = 0; n < n_rx_packets; n++) { - if ((ptd->flags[n] & PKT_RX_FDIR_ID) == 0) + if ((ptd->flags[n] & RTE_MBUF_F_RX_FDIR_ID) == 0) continue; fle = pool_elt_at_index (xd->flow_lookup_entries, @@ -327,7 +330,7 @@ dpdk_process_lro_offload (dpdk_device_t *xd, dpdk_per_thread_data_t *ptd, for (n = 0; n < n_rx_packets; n++) { b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]); - if (ptd->flags[n] & PKT_RX_LRO) + if (ptd->flags[n] & RTE_MBUF_F_RX_LRO) { b0->flags |= VNET_BUFFER_F_GSO; vnet_buffer2 (b0)->gso_size = ptd->mbufs[n]->tso_segsz; @@ -362,12 +365,13 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, /* get up to DPDK_RX_BURST_SZ buffers from PMD */ while (n_rx_packets < DPDK_RX_BURST_SZ) { - n = rte_eth_rx_burst (xd->port_id, queue_id, - ptd->mbufs + n_rx_packets, - DPDK_RX_BURST_SZ - n_rx_packets); + u32 n_to_rx = clib_min (DPDK_RX_BURST_SZ - n_rx_packets, 32); + + n = rte_eth_rx_burst (xd->port_id, queue_id, ptd->mbufs + n_rx_packets, + n_to_rx); n_rx_packets += n; - if (n < 32) + if (n < n_to_rx) break; } @@ -377,6 +381,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, /* Update buffer template */ vnet_buffer (bt)->sw_if_index[VLIB_RX] = xd->sw_if_index; bt->error = node->errors[DPDK_ERROR_NONE]; + bt->flags = xd->buffer_flags; /* as DPDK is allocating empty buffers from mempool provided before interface start for each queue, it is safe to store this in the template */ bt->buffer_pool_index = rxq->buffer_pool_index; @@ -391,17 +396,34 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, /* as all packets belong to the same interface feature arc lookup can be don once and result stored in the buffer template */ if (PREDICT_FALSE (vnet_device_input_have_features (xd->sw_if_index))) - vnet_feature_start_device_input_x1 (xd->sw_if_index, &next_index, bt); + vnet_feature_start_device_input (xd->sw_if_index, &next_index, bt); if (xd->flags & DPDK_DEVICE_FLAG_MAYBE_MULTISEG) n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 1, &or_flags); else n_rx_bytes = dpdk_process_rx_burst (vm, ptd, n_rx_packets, 0, &or_flags); - if (PREDICT_FALSE ((or_flags & PKT_RX_LRO))) + if (PREDICT_FALSE ((or_flags & RTE_MBUF_F_RX_LRO))) dpdk_process_lro_offload (xd, ptd, n_rx_packets); - if (PREDICT_FALSE (or_flags & PKT_RX_FDIR)) + if (PREDICT_FALSE ((or_flags & RTE_MBUF_F_RX_L4_CKSUM_BAD) && + (xd->buffer_flags & VNET_BUFFER_F_L4_CHECKSUM_CORRECT))) + { + for (n = 0; n < n_rx_packets; n++) + { + /* Check and reset VNET_BUFFER_F_L4_CHECKSUM_CORRECT flag + if RTE_MBUF_F_RX_L4_CKSUM_BAD is set. + The magic num 3 is the bit number of RTE_MBUF_F_RX_L4_CKSUM_BAD + which is defined in DPDK. + Have made a STATIC_ASSERT in this file to ensure this. + */ + b0 = vlib_buffer_from_rte_mbuf (ptd->mbufs[n]); + b0->flags ^= (ptd->flags[n] & RTE_MBUF_F_RX_L4_CKSUM_BAD) + << (VNET_BUFFER_F_LOG2_L4_CHECKSUM_CORRECT - 3); + } + } + + if (PREDICT_FALSE (or_flags & RTE_MBUF_F_RX_FDIR)) { /* some packets will need to go to different next nodes */ for (n = 0; n < n_rx_packets; n++) @@ -410,7 +432,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, /* flow offload - process if rx flow offload enabled and at least one packet is marked */ if (PREDICT_FALSE ((xd->flags & DPDK_DEVICE_FLAG_RX_FLOW_OFFLOAD) && - (or_flags & PKT_RX_FDIR))) + (or_flags & RTE_MBUF_F_RX_FDIR))) dpdk_process_flow_offload (xd, ptd, n_rx_packets); /* enqueue buffers to the next node */ @@ -447,7 +469,7 @@ dpdk_device_input (vlib_main_t * vm, dpdk_main_t * dm, dpdk_device_t * xd, marked as ip4 checksum bad we can notify ethernet input so it can send pacets to ip4-input-no-checksum node */ if (xd->flags & DPDK_DEVICE_FLAG_RX_IP4_CKSUM && - (or_flags & PKT_RX_IP_CKSUM_BAD) == 0) + (or_flags & RTE_MBUF_F_RX_IP_CKSUM_BAD) == 0) f->flags |= ETH_INPUT_FRAME_F_IP4_CKSUM_OK; vlib_frame_no_append (f); } @@ -521,7 +543,7 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, dpdk_device_t *xd; uword n_rx_packets = 0; vnet_hw_if_rxq_poll_vector_t *pv; - u32 thread_index = node->thread_index; + u32 thread_index = vm->thread_index; /* * Poll all devices on this cpu for input/interrupts. @@ -538,7 +560,6 @@ VLIB_NODE_FN (dpdk_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, return n_rx_packets; } -/* *INDENT-OFF* */ VLIB_REGISTER_NODE (dpdk_input_node) = { .type = VLIB_NODE_TYPE_INPUT, .name = "dpdk-input", @@ -554,7 +575,6 @@ VLIB_REGISTER_NODE (dpdk_input_node) = { .n_errors = DPDK_N_ERROR, .error_strings = dpdk_error_strings, }; -/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON |