diff options
author | Benoît Ganne <bganne@cisco.com> | 2021-04-29 18:24:24 +0200 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2021-05-21 19:50:14 +0000 |
commit | a42c41be4eed3e1ce2a42038b07ce1d3420891cd (patch) | |
tree | fc95c7c24cbef993cc2bef8742b3360123d70b66 /src/plugins/af_xdp/output.c | |
parent | 92a8d761c412590f5112239be4c511091b2b2d5a (diff) |
af_xdp: workaround kernel race between poll() and sendmsg()
Prior to Linux 5.6 there is a race condition between poll() and
sendmsg() in the kernel. This patch protects the syscalls with a lock
to prevent it, unless the NO_SYSCALL_LOCK flag is set at create time.
See
https://lore.kernel.org/bpf/BYAPR11MB365382C5DB1E5FCC53242609C1549@BYAPR11MB3653.namprd11.prod.outlook.com/
Type: fix
Change-Id: Ie7d4f5cb41f697b11a09b6046e54d190430d76df
Signed-off-by: Benoît Ganne <bganne@cisco.com>
Diffstat (limited to 'src/plugins/af_xdp/output.c')
-rw-r--r-- | src/plugins/af_xdp/output.c | 33 |
1 files changed, 16 insertions, 17 deletions
diff --git a/src/plugins/af_xdp/output.c b/src/plugins/af_xdp/output.c index 52c34e00d95..51a56ed866d 100644 --- a/src/plugins/af_xdp/output.c +++ b/src/plugins/af_xdp/output.c @@ -1,4 +1,4 @@ -#include <errno.h> +#include <poll.h> #include <string.h> #include <vlib/vlib.h> #include <vlib/unix/unix.h> @@ -90,31 +90,29 @@ af_xdp_device_output_tx_db (vlib_main_t * vm, af_xdp_device_t * ad, af_xdp_txq_t * txq, const u32 n_tx) { - int ret; - xsk_ring_prod__submit (&txq->tx, n_tx); if (!xsk_ring_prod__needs_wakeup (&txq->tx)) return; - vlib_error_count (vm, node->node_index, AF_XDP_TX_ERROR_SENDTO_REQUIRED, 1); + vlib_error_count (vm, node->node_index, AF_XDP_TX_ERROR_SYSCALL_REQUIRED, 1); - ret = sendto (txq->xsk_fd, NULL, 0, MSG_DONTWAIT, NULL, 0); - if (PREDICT_TRUE (ret >= 0)) - return; + clib_spinlock_lock_if_init (&txq->syscall_lock); - /* those errors are fine */ - switch (errno) + if (xsk_ring_prod__needs_wakeup (&txq->tx)) { - case ENOBUFS: - case EAGAIN: - case EBUSY: - return; + struct pollfd fd = { .fd = txq->xsk_fd, .events = POLLIN | POLLOUT }; + int ret = poll (&fd, 1, 0); + if (PREDICT_FALSE (ret < 0)) + { + /* something bad is happening */ + vlib_error_count (vm, node->node_index, + AF_XDP_TX_ERROR_SYSCALL_FAILURES, 1); + af_xdp_device_error (ad, "tx poll() failed"); + } } - /* something bad is happening */ - vlib_error_count (vm, node->node_index, AF_XDP_TX_ERROR_SENDTO_FAILURES, 1); - af_xdp_device_error (ad, "sendto() failed"); + clib_spinlock_unlock_if_init (&txq->syscall_lock); } static_always_inline u32 @@ -218,7 +216,8 @@ VNET_DEVICE_CLASS_TX_FN (af_xdp_device_class) (vlib_main_t * vm, vnet_interface_output_runtime_t *ord = (void *) node->runtime_data; af_xdp_device_t *ad = pool_elt_at_index (rm->devices, ord->dev_instance); u32 thread_index = vm->thread_index; - af_xdp_txq_t *txq = vec_elt_at_index (ad->txqs, thread_index % ad->txq_num); + af_xdp_txq_t *txq = + vec_elt_at_index (ad->txqs, (thread_index - 1) % ad->txq_num); u32 *from; u32 n, n_tx; int i; |