summaryrefslogtreecommitdiffstats
path: root/src/plugins/af_xdp/input.c
diff options
context:
space:
mode:
authorBenoît Ganne <bganne@cisco.com>2020-06-12 08:47:34 +0200
committerDamjan Marion <dmarion@me.com>2020-08-31 17:16:56 +0000
commit4a76d6f6da035220917097bc047b08bc58254803 (patch)
tree10399485df66c0ebeeaa32edbb7a1022b8baa9b3 /src/plugins/af_xdp/input.c
parentbfed7c047d2807f835cd7b9ef0330d314ac0ebc5 (diff)
af_xdp: AF_XDP input plugin
Type: feature Change-Id: I85aa4ad6b68c1aa0e51938002dc691a4b11c545c Signed-off-by: Damjan Marion <damarion@cisco.com> Signed-off-by: Benoît Ganne <bganne@cisco.com>
Diffstat (limited to 'src/plugins/af_xdp/input.c')
-rw-r--r--src/plugins/af_xdp/input.c367
1 files changed, 367 insertions, 0 deletions
diff --git a/src/plugins/af_xdp/input.c b/src/plugins/af_xdp/input.c
new file mode 100644
index 00000000000..c5b3488d438
--- /dev/null
+++ b/src/plugins/af_xdp/input.c
@@ -0,0 +1,367 @@
+/*
+ *------------------------------------------------------------------
+ * Copyright (c) 2018 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *------------------------------------------------------------------
+ */
+
+#include <poll.h>
+#include <vlib/vlib.h>
+#include <vlib/unix/unix.h>
+#include <vlib/pci/pci.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/devices/devices.h>
+#include "af_xdp.h"
+
+#define foreach_af_xdp_input_error \
+ _(POLL_REQUIRED, "poll required") \
+ _(POLL_FAILURES, "poll failures")
+
+typedef enum
+{
+#define _(f,s) AF_XDP_INPUT_ERROR_##f,
+ foreach_af_xdp_input_error
+#undef _
+ AF_XDP_INPUT_N_ERROR,
+} af_xdp_input_error_t;
+
+static __clib_unused char *af_xdp_input_error_strings[] = {
+#define _(n,s) s,
+ foreach_af_xdp_input_error
+#undef _
+};
+
+static_always_inline void
+af_xdp_device_input_trace (vlib_main_t * vm, vlib_node_runtime_t * node,
+ u32 n_left, const u32 * bi, u32 next_index,
+ u32 hw_if_index)
+{
+ u32 n_trace;
+
+ if (PREDICT_TRUE (0 == (n_trace = vlib_get_trace_count (vm, node))))
+ return;
+
+ while (n_trace && n_left)
+ {
+ vlib_buffer_t *b;
+ af_xdp_input_trace_t *tr;
+ b = vlib_get_buffer (vm, bi[0]);
+ vlib_trace_buffer (vm, node, next_index, b,
+ /* follow_chain */ 0);
+ tr = vlib_add_trace (vm, node, b, sizeof (*tr));
+ tr->next_index = next_index;
+ tr->hw_if_index = hw_if_index;
+
+ n_trace--;
+ n_left--;
+ bi++;
+ }
+
+ vlib_set_trace_count (vm, node, n_trace);
+}
+
+static_always_inline void
+af_xdp_device_input_refill_db (vlib_main_t * vm,
+ const vlib_node_runtime_t * node,
+ af_xdp_device_t * ad, af_xdp_rxq_t * rxq,
+ const u32 n_alloc)
+{
+ int ret;
+
+ xsk_ring_prod__submit (&rxq->fq, n_alloc);
+
+ if (!xsk_ring_prod__needs_wakeup (&rxq->fq))
+ return;
+
+ vlib_error_count (vm, node->node_index, AF_XDP_INPUT_ERROR_POLL_REQUIRED,
+ 1);
+
+ struct pollfd fd = {.fd = rxq->xsk_fd,.events = POLLIN };
+ ret = poll (&fd, 1, 0);
+ if (PREDICT_TRUE (ret >= 0))
+ return;
+
+ /* something bad is happening */
+ vlib_error_count (vm, node->node_index, AF_XDP_INPUT_ERROR_POLL_FAILURES,
+ 1);
+ af_xdp_device_error (ad, "poll() failed");
+}
+
+static_always_inline void
+af_xdp_device_input_refill (vlib_main_t * vm,
+ const vlib_node_runtime_t * node,
+ af_xdp_device_t * ad, af_xdp_rxq_t * rxq,
+ const int copy)
+{
+ __u64 *fill;
+ const u32 size = rxq->fq.size;
+ const u32 mask = size - 1;
+ u32 bis[VLIB_FRAME_SIZE], *bi = bis;
+ u32 n_alloc, n, n_wrap;
+ u32 idx;
+
+ ASSERT (mask == rxq->fq.mask);
+
+ /* do not enqueue more packet than ring space */
+ n_alloc = xsk_prod_nb_free (&rxq->fq, 16);
+ /* do not bother to allocate if too small */
+ if (n_alloc < 16)
+ return;
+
+ n_alloc = clib_min (n_alloc, ARRAY_LEN (bis));
+ n_alloc = vlib_buffer_alloc_from_pool (vm, bis, n_alloc, ad->pool);
+ n = xsk_ring_prod__reserve (&rxq->fq, n_alloc, &idx);
+ ASSERT (n == n_alloc);
+
+ fill = xsk_ring_prod__fill_addr (&rxq->fq, idx);
+ n = clib_min (n_alloc, size - (idx & mask));
+ n_wrap = n_alloc - n;
+
+ /*
+ * Note about headroom: for some reasons, there seem to be a discrepency
+ * between 0-copy and copy mode:
+ * - 0-copy: XDP_PACKET_HEADROOM will be added to the user headroom
+ * - copy: nothing is added to the user headroom
+ * We privileged 0-copy and set headroom to 0. As XDP_PACKET_HEADROOM ==
+ * sizeof(vlib_buffer_t), data will correctly point to vlib_buffer_t->data.
+ * In copy mode, we have to add sizeof(vlib_buffer_t) to desc offset during
+ * refill.
+ */
+ STATIC_ASSERT (sizeof (vlib_buffer_t) == XDP_PACKET_HEADROOM, "wrong size");
+#define bi2addr(bi) \
+ (((bi) << CLIB_LOG2_CACHE_LINE_BYTES) + (copy ? sizeof(vlib_buffer_t) : 0))
+
+wrap_around:
+
+ while (n >= 8)
+ {
+#ifdef CLIB_HAVE_VEC256
+ u64x4 b0 = u32x4_extend_to_u64x4 (*(u32x4u *) (bi + 0));
+ u64x4 b1 = u32x4_extend_to_u64x4 (*(u32x4u *) (bi + 4));
+ *(u64x4u *) (fill + 0) = bi2addr (b0);
+ *(u64x4u *) (fill + 4) = bi2addr (b1);
+#else
+ fill[0] = bi2addr (bi[0]);
+ fill[1] = bi2addr (bi[1]);
+ fill[2] = bi2addr (bi[2]);
+ fill[3] = bi2addr (bi[3]);
+ fill[4] = bi2addr (bi[4]);
+ fill[5] = bi2addr (bi[5]);
+ fill[6] = bi2addr (bi[6]);
+ fill[7] = bi2addr (bi[7]);
+#endif
+ fill += 8;
+ bi += 8;
+ n -= 8;
+ }
+
+ while (n >= 1)
+ {
+ fill[0] = bi2addr (bi[0]);
+ fill += 1;
+ bi += 1;
+ n -= 1;
+ }
+
+ if (n_wrap)
+ {
+ fill = xsk_ring_prod__fill_addr (&rxq->fq, 0);
+ n = n_wrap;
+ n_wrap = 0;
+ goto wrap_around;
+ }
+
+ af_xdp_device_input_refill_db (vm, node, ad, rxq, n_alloc);
+}
+
+static_always_inline void
+af_xdp_device_input_ethernet (vlib_main_t * vm, vlib_node_runtime_t * node,
+ const u32 next_index, const u32 sw_if_index,
+ const u32 hw_if_index)
+{
+ vlib_next_frame_t *nf;
+ vlib_frame_t *f;
+ ethernet_input_frame_t *ef;
+
+ if (PREDICT_FALSE (VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT != next_index))
+ return;
+
+ nf =
+ vlib_node_runtime_get_next_frame (vm, node,
+ VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT);
+ f = vlib_get_frame (vm, nf->frame);
+ f->flags = ETH_INPUT_FRAME_F_SINGLE_SW_IF_IDX;
+
+ ef = vlib_frame_scalar_args (f);
+ ef->sw_if_index = sw_if_index;
+ ef->hw_if_index = hw_if_index;
+}
+
+static_always_inline u32
+af_xdp_device_input_bufs (vlib_main_t * vm, const af_xdp_device_t * ad,
+ af_xdp_rxq_t * rxq, u32 * bis, const u32 n_rx,
+ vlib_buffer_t * bt, u32 idx, const int copy)
+{
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+ u16 lens[VLIB_FRAME_SIZE], *len = lens;
+ const u32 mask = rxq->rx.mask;
+ u32 n = n_rx, *bi = bis, bytes = 0;
+
+#define addr2bi(addr) \
+ (((addr) - (copy ? sizeof(vlib_buffer_t) : 0)) >> CLIB_LOG2_CACHE_LINE_BYTES)
+
+ while (n >= 1)
+ {
+ const struct xdp_desc *desc = xsk_ring_cons__rx_desc (&rxq->rx, idx);
+ bi[0] = addr2bi (xsk_umem__extract_addr (desc->addr));
+ ASSERT (vlib_buffer_is_known (vm, bi[0]) ==
+ VLIB_BUFFER_KNOWN_ALLOCATED);
+ len[0] = desc->len;
+ idx = (idx + 1) & mask;
+ bi += 1;
+ len += 1;
+ n -= 1;
+ }
+
+ vlib_get_buffers (vm, bis, bufs, n_rx);
+
+ n = n_rx;
+ len = lens;
+
+ while (n >= 8)
+ {
+ vlib_prefetch_buffer_header (b[4], LOAD);
+ vlib_buffer_copy_template (b[0], bt);
+ bytes += b[0]->current_length = len[0];
+
+ vlib_prefetch_buffer_header (b[5], LOAD);
+ vlib_buffer_copy_template (b[1], bt);
+ bytes += b[1]->current_length = len[1];
+
+ vlib_prefetch_buffer_header (b[6], LOAD);
+ vlib_buffer_copy_template (b[2], bt);
+ bytes += b[2]->current_length = len[2];
+
+ vlib_prefetch_buffer_header (b[7], LOAD);
+ vlib_buffer_copy_template (b[3], bt);
+ bytes += b[3]->current_length = len[3];
+
+ b += 4;
+ len += 4;
+ n -= 4;
+ }
+
+ while (n >= 1)
+ {
+ vlib_buffer_copy_template (b[0], bt);
+ bytes += b[0]->current_length = len[0];
+ b += 1;
+ len += 1;
+ n -= 1;
+ }
+
+ xsk_ring_cons__release (&rxq->rx, n_rx);
+ return bytes;
+}
+
+static_always_inline uword
+af_xdp_device_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, af_xdp_device_t * ad,
+ u16 qid, const int copy)
+{
+ vnet_main_t *vnm = vnet_get_main ();
+ af_xdp_rxq_t *rxq = vec_elt_at_index (ad->rxqs, qid);
+ vlib_buffer_t bt;
+ u32 next_index, *to_next, n_left_to_next;
+ u32 n_rx_packets, n_rx_bytes;
+ u32 idx;
+
+ n_rx_packets = xsk_ring_cons__peek (&rxq->rx, VLIB_FRAME_SIZE, &idx);
+
+ if (PREDICT_FALSE (0 == n_rx_packets))
+ goto refill;
+
+ vlib_buffer_copy_template (&bt, ad->buffer_template);
+ next_index = ad->per_interface_next_index;
+ if (PREDICT_FALSE (vnet_device_input_have_features (ad->sw_if_index)))
+ vnet_feature_start_device_input_x1 (ad->sw_if_index, &next_index, &bt);
+
+ vlib_get_new_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+ n_rx_bytes =
+ af_xdp_device_input_bufs (vm, ad, rxq, to_next, n_rx_packets, &bt, idx,
+ copy);
+ af_xdp_device_input_ethernet (vm, node, next_index, ad->sw_if_index,
+ ad->hw_if_index);
+
+ vlib_put_next_frame (vm, node, next_index, n_left_to_next - n_rx_packets);
+
+ af_xdp_device_input_trace (vm, node, n_rx_packets, to_next, next_index,
+ ad->hw_if_index);
+
+ vlib_increment_combined_counter
+ (vnm->interface_main.combined_sw_if_counters +
+ VNET_INTERFACE_COUNTER_RX, vm->thread_index,
+ ad->hw_if_index, n_rx_packets, n_rx_bytes);
+
+refill:
+ af_xdp_device_input_refill (vm, node, ad, rxq, copy);
+
+ return n_rx_packets;
+}
+
+VLIB_NODE_FN (af_xdp_input_node) (vlib_main_t * vm,
+ vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ u32 n_rx = 0;
+ af_xdp_main_t *am = &af_xdp_main;
+ vnet_device_input_runtime_t *rt = (void *) node->runtime_data;
+ vnet_device_and_queue_t *dq;
+
+ foreach_device_and_queue (dq, rt->devices_and_queues)
+ {
+ af_xdp_device_t *ad;
+ ad = vec_elt_at_index (am->devices, dq->dev_instance);
+ if ((ad->flags & AF_XDP_DEVICE_F_ADMIN_UP) == 0)
+ continue;
+ if (PREDICT_TRUE (ad->flags & AF_XDP_DEVICE_F_ZEROCOPY))
+ n_rx += af_xdp_device_input_inline (vm, node, frame, ad, dq->queue_id,
+ /* copy */ 0);
+ else
+ n_rx += af_xdp_device_input_inline (vm, node, frame, ad, dq->queue_id,
+ /* copy */ 1);
+ }
+ return n_rx;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (af_xdp_input_node) = {
+ .name = "af_xdp-input",
+ .sibling_of = "device-input",
+ .format_trace = format_af_xdp_input_trace,
+ .type = VLIB_NODE_TYPE_INPUT,
+ .state = VLIB_NODE_STATE_DISABLED,
+ .n_errors = AF_XDP_INPUT_N_ERROR,
+ .error_strings = af_xdp_input_error_strings,
+ .flags = VLIB_NODE_FLAG_TRACE_SUPPORTED,
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */