From 8a419d5b5dd3da5d3d8597cd65ac74f191fb7da7 Mon Sep 17 00:00:00 2001 From: Mohammed Hawari Date: Thu, 24 Feb 2022 15:19:01 +0100 Subject: devices: af_packet, fix tx stall by retrying failed sendto MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I6bed66f740b34673a4883eda1c7f7310c57e131b Type: fix Signed-off-by: Mohammed Hawari Signed-off-by: BenoƮt Ganne --- src/vnet/devices/af_packet/af_packet.c | 1 + src/vnet/devices/af_packet/af_packet.h | 1 + src/vnet/devices/af_packet/device.c | 28 ++++++++++++++++++++++------ 3 files changed, 24 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index 83c10684c9f..ec65bf6d493 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -437,6 +437,7 @@ af_packet_queue_init (vlib_main_t *vm, af_packet_if_t *apif, tx_queue->next_tx_frame = 0; tx_queue->queue_id = queue_id; + tx_queue->is_tx_pending = 0; clib_spinlock_init (&tx_queue->lockp); } diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h index 10a7aafda56..e5dc6151361 100644 --- a/src/vnet/devices/af_packet/af_packet.h +++ b/src/vnet/devices/af_packet/af_packet.h @@ -82,6 +82,7 @@ typedef struct u32 rx_frame_offset; u16 num_rx_pkts; u8 is_rx_pending; + u8 is_tx_pending; vnet_hw_if_rx_mode mode; } af_packet_queue_t; diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c index 1e177f625b2..e1eb46a5e9b 100644 --- a/src/vnet/devices/af_packet/device.c +++ b/src/vnet/devices/af_packet/device.c @@ -418,9 +418,10 @@ VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm, CLIB_MEMORY_BARRIER (); - if (PREDICT_TRUE (n_sent)) + if (PREDICT_TRUE (n_sent || tx_queue->is_tx_pending)) { tx_queue->next_tx_frame = tx_frame; + tx_queue->is_tx_pending = 0; if (PREDICT_FALSE ( sendto (tx_queue->fd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1)) @@ -429,11 +430,26 @@ VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm, * Note that we have no reliable way to properly determine the * disposition of the packets we just enqueued for delivery. */ - vlib_error_count (vm, node->node_index, - unix_error_is_fatal (errno) ? - AF_PACKET_TX_ERROR_TXRING_FATAL : - AF_PACKET_TX_ERROR_TXRING_EAGAIN, - n_sent); + uword counter; + + if (unix_error_is_fatal (errno)) + { + counter = AF_PACKET_TX_ERROR_TXRING_FATAL; + } + else + { + counter = AF_PACKET_TX_ERROR_TXRING_EAGAIN; + /* non-fatal error: kick again next time + * note that you could still end up in a deadlock: if you do not + * try to send new packets (ie reschedule this tx node), eg. + * because your peer is waiting for the unsent packets to reply + * to you but your waiting for its reply etc., you are not going + * to kick again, and everybody is waiting for the other to talk + * 1st... */ + tx_queue->is_tx_pending = 1; + } + + vlib_error_count (vm, node->node_index, counter, 1); } } -- cgit 1.2.3-korg