diff options
Diffstat (limited to 'src/plugins/af_xdp/output.c')
-rw-r--r-- | src/plugins/af_xdp/output.c | 51 |
1 files changed, 40 insertions, 11 deletions
diff --git a/src/plugins/af_xdp/output.c b/src/plugins/af_xdp/output.c index 51a56ed866d..a59c01ca6e0 100644 --- a/src/plugins/af_xdp/output.c +++ b/src/plugins/af_xdp/output.c @@ -1,5 +1,5 @@ -#include <poll.h> #include <string.h> +#include <vppinfra/clib.h> #include <vlib/vlib.h> #include <vlib/unix/unix.h> #include <vnet/ethernet/ethernet.h> @@ -101,11 +101,19 @@ af_xdp_device_output_tx_db (vlib_main_t * vm, if (xsk_ring_prod__needs_wakeup (&txq->tx)) { - struct pollfd fd = { .fd = txq->xsk_fd, .events = POLLIN | POLLOUT }; - int ret = poll (&fd, 1, 0); + const struct msghdr msg = {}; + int ret; + /* On tx, xsk socket will only tx up to TX_BATCH_SIZE, as defined in + * kernel net/xdp/xsk.c. Unfortunately we do not know how much this is, + * our only option is to retry until everything is sent... */ + do + { + ret = sendmsg (txq->xsk_fd, &msg, MSG_DONTWAIT); + } + while (ret < 0 && EAGAIN == errno); if (PREDICT_FALSE (ret < 0)) { - /* something bad is happening */ + /* not EAGAIN: something bad is happening */ vlib_error_count (vm, node->node_index, AF_XDP_TX_ERROR_SYSCALL_FAILURES, 1); af_xdp_device_error (ad, "tx poll() failed"); @@ -147,6 +155,14 @@ wrap_around: while (n >= 8) { + if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT || + b[1]->flags & VLIB_BUFFER_NEXT_PRESENT || + b[2]->flags & VLIB_BUFFER_NEXT_PRESENT || + b[3]->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + break; + } + vlib_prefetch_buffer_header (b[4], LOAD); offset = (sizeof (vlib_buffer_t) + @@ -186,6 +202,17 @@ wrap_around: while (n >= 1) { + if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + if (vlib_buffer_chain_linearize (vm, b[0]) != 1) + { + af_xdp_log (VLIB_LOG_LEVEL_ERR, ad, + "vlib_buffer_chain_linearize failed"); + vlib_buffer_free_one (vm, vlib_get_buffer_index (vm, b[0])); + continue; + } + } + offset = (sizeof (vlib_buffer_t) + b[0]->current_data) << XSK_UNALIGNED_BUF_OFFSET_SHIFT; @@ -215,9 +242,9 @@ VNET_DEVICE_CLASS_TX_FN (af_xdp_device_class) (vlib_main_t * vm, af_xdp_main_t *rm = &af_xdp_main; vnet_interface_output_runtime_t *ord = (void *) node->runtime_data; af_xdp_device_t *ad = pool_elt_at_index (rm->devices, ord->dev_instance); - u32 thread_index = vm->thread_index; - af_xdp_txq_t *txq = - vec_elt_at_index (ad->txqs, (thread_index - 1) % ad->txq_num); + const vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame); + const int shared_queue = tf->shared_queue; + af_xdp_txq_t *txq = vec_elt_at_index (ad->txqs, tf->queue_id); u32 *from; u32 n, n_tx; int i; @@ -225,20 +252,22 @@ VNET_DEVICE_CLASS_TX_FN (af_xdp_device_class) (vlib_main_t * vm, from = vlib_frame_vector_args (frame); n_tx = frame->n_vectors; - clib_spinlock_lock_if_init (&txq->lock); + if (shared_queue) + clib_spinlock_lock (&txq->lock); for (i = 0, n = 0; i < AF_XDP_TX_RETRIES && n < n_tx; i++) { u32 n_enq; af_xdp_device_output_free (vm, node, txq); - n_enq = af_xdp_device_output_tx_try (vm, node, ad, txq, n_tx - n, from); + n_enq = + af_xdp_device_output_tx_try (vm, node, ad, txq, n_tx - n, from + n); n += n_enq; - from += n_enq; } af_xdp_device_output_tx_db (vm, node, ad, txq, n); - clib_spinlock_unlock_if_init (&txq->lock); + if (shared_queue) + clib_spinlock_unlock (&txq->lock); if (PREDICT_FALSE (n != n_tx)) { |