summaryrefslogtreecommitdiffstats
path: root/src/plugins/af_xdp/output.c
diff options
context:
space:
mode:
authorBenoît Ganne <bganne@cisco.com>2021-04-29 18:24:24 +0200
committerDamjan Marion <dmarion@me.com>2021-05-21 19:50:14 +0000
commita42c41be4eed3e1ce2a42038b07ce1d3420891cd (patch)
treefc95c7c24cbef993cc2bef8742b3360123d70b66 /src/plugins/af_xdp/output.c
parent92a8d761c412590f5112239be4c511091b2b2d5a (diff)
af_xdp: workaround kernel race between poll() and sendmsg()
Prior to Linux 5.6 there is a race condition between poll() and sendmsg() in the kernel. This patch protects the syscalls with a lock to prevent it, unless the NO_SYSCALL_LOCK flag is set at create time. See https://lore.kernel.org/bpf/BYAPR11MB365382C5DB1E5FCC53242609C1549@BYAPR11MB3653.namprd11.prod.outlook.com/ Type: fix Change-Id: Ie7d4f5cb41f697b11a09b6046e54d190430d76df Signed-off-by: Benoît Ganne <bganne@cisco.com>
Diffstat (limited to 'src/plugins/af_xdp/output.c')
-rw-r--r--src/plugins/af_xdp/output.c33
1 files changed, 16 insertions, 17 deletions
diff --git a/src/plugins/af_xdp/output.c b/src/plugins/af_xdp/output.c
index 52c34e00d95..51a56ed866d 100644
--- a/src/plugins/af_xdp/output.c
+++ b/src/plugins/af_xdp/output.c
@@ -1,4 +1,4 @@
-#include <errno.h>
+#include <poll.h>
#include <string.h>
#include <vlib/vlib.h>
#include <vlib/unix/unix.h>
@@ -90,31 +90,29 @@ af_xdp_device_output_tx_db (vlib_main_t * vm,
af_xdp_device_t * ad,
af_xdp_txq_t * txq, const u32 n_tx)
{
- int ret;
-
xsk_ring_prod__submit (&txq->tx, n_tx);
if (!xsk_ring_prod__needs_wakeup (&txq->tx))
return;
- vlib_error_count (vm, node->node_index, AF_XDP_TX_ERROR_SENDTO_REQUIRED, 1);
+ vlib_error_count (vm, node->node_index, AF_XDP_TX_ERROR_SYSCALL_REQUIRED, 1);
- ret = sendto (txq->xsk_fd, NULL, 0, MSG_DONTWAIT, NULL, 0);
- if (PREDICT_TRUE (ret >= 0))
- return;
+ clib_spinlock_lock_if_init (&txq->syscall_lock);
- /* those errors are fine */
- switch (errno)
+ if (xsk_ring_prod__needs_wakeup (&txq->tx))
{
- case ENOBUFS:
- case EAGAIN:
- case EBUSY:
- return;
+ struct pollfd fd = { .fd = txq->xsk_fd, .events = POLLIN | POLLOUT };
+ int ret = poll (&fd, 1, 0);
+ if (PREDICT_FALSE (ret < 0))
+ {
+ /* something bad is happening */
+ vlib_error_count (vm, node->node_index,
+ AF_XDP_TX_ERROR_SYSCALL_FAILURES, 1);
+ af_xdp_device_error (ad, "tx poll() failed");
+ }
}
- /* something bad is happening */
- vlib_error_count (vm, node->node_index, AF_XDP_TX_ERROR_SENDTO_FAILURES, 1);
- af_xdp_device_error (ad, "sendto() failed");
+ clib_spinlock_unlock_if_init (&txq->syscall_lock);
}
static_always_inline u32
@@ -218,7 +216,8 @@ VNET_DEVICE_CLASS_TX_FN (af_xdp_device_class) (vlib_main_t * vm,
vnet_interface_output_runtime_t *ord = (void *) node->runtime_data;
af_xdp_device_t *ad = pool_elt_at_index (rm->devices, ord->dev_instance);
u32 thread_index = vm->thread_index;
- af_xdp_txq_t *txq = vec_elt_at_index (ad->txqs, thread_index % ad->txq_num);
+ af_xdp_txq_t *txq =
+ vec_elt_at_index (ad->txqs, (thread_index - 1) % ad->txq_num);
u32 *from;
u32 n, n_tx;
int i;