From 5a7aa51f00d562814204aca7831777651a00869f Mon Sep 17 00:00:00 2001 From: Mohsin Kazmi Date: Fri, 25 Mar 2022 14:27:45 +0000 Subject: devices: add multi-queue support for af-packet Type: feature Change-Id: I0f4e6517fcfa07ffb0aba89b159ac1337937a508 Signed-off-by: Mohsin Kazmi --- src/vnet/devices/af_packet/node.c | 68 ++++++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 26 deletions(-) (limited to 'src/vnet/devices/af_packet/node.c') diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c index 06012fd263a..323508bd304 100644 --- a/src/vnet/devices/af_packet/node.c +++ b/src/vnet/devices/af_packet/node.c @@ -30,9 +30,10 @@ #include #include -#define foreach_af_packet_input_error \ - _(PARTIAL_PKT, "partial packet") - +#define foreach_af_packet_input_error \ + _ (PARTIAL_PKT, "partial packet") \ + _ (TIMEDOUT_BLK, "timed out block") \ + _ (TOTAL_RECV_BLK, "total received block") typedef enum { #define _(f,s) AF_PACKET_INPUT_ERROR_##f, @@ -51,6 +52,7 @@ typedef struct { u32 next_index; u32 hw_if_index; + u16 queue_id; int block; u32 pkt_num; void *block_start; @@ -67,8 +69,8 @@ format_af_packet_input_trace (u8 * s, va_list * args) af_packet_input_trace_t *t = va_arg (*args, af_packet_input_trace_t *); u32 indent = format_get_indent (s); - s = format (s, "af_packet: hw_if_index %d next-index %d", - t->hw_if_index, t->next_index); + s = format (s, "af_packet: hw_if_index %d rx-queue %u next-index %d", + t->hw_if_index, t->queue_id, t->next_index); s = format ( s, "\n%Ublock %u:\n%Uaddress %p version %u seq_num %lu pkt_num %u", @@ -222,22 +224,25 @@ fill_cksum_offload (vlib_buffer_t *b, u8 *l4_hdr_sz, u8 is_ip) always_inline uword af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, af_packet_if_t *apif, - u8 is_cksum_gso_enabled) + u16 queue_id, u8 is_cksum_gso_enabled) { af_packet_main_t *apm = &af_packet_main; + af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, queue_id); tpacket3_hdr_t *tph; u32 next_index; u32 n_free_bufs; u32 n_rx_packets = 0; u32 n_rx_bytes = 0; + u32 timedout_blk = 0; + u32 total = 0; u32 *to_next = 0; - u32 block = apif->next_rx_block; - u32 block_nr = apif->rx_req->tp_block_nr; + u32 block = rx_queue->next_rx_block; + u32 block_nr = rx_queue->rx_req->tp_block_nr; u8 *block_start = 0; uword n_trace = vlib_get_trace_count (vm, node); u32 thread_index = vm->thread_index; u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); - u32 min_bufs = apif->rx_req->tp_frame_size / n_buffer_bytes; + u32 min_bufs = rx_queue->rx_req->tp_frame_size / n_buffer_bytes; u32 num_pkts = 0; u32 rx_frame_offset = 0; block_desc_t *bd = 0; @@ -256,18 +261,23 @@ af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_feature_start_device_input_x1 (apif->sw_if_index, &next_index, &bt); } - if ((((block_desc_t *) (block_start = apif->rx_ring[block])) + if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block])) ->hdr.bh1.block_status & TP_STATUS_USER) != 0) { u32 n_required = 0; bd = (block_desc_t *) block_start; - if (PREDICT_FALSE (apif->ss.is_save)) + total++; + + if (TP_STATUS_BLK_TMO & bd->hdr.bh1.block_status) + timedout_blk++; + + if (PREDICT_FALSE (rx_queue->is_rx_pending)) { - num_pkts = apif->ss.num_pkts; - rx_frame_offset = apif->ss.rx_frame_offset; - apif->ss.is_save = 0; + num_pkts = rx_queue->num_rx_pkts; + rx_frame_offset = rx_queue->rx_frame_offset; + rx_queue->is_rx_pending = 0; } else { @@ -318,9 +328,9 @@ af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, if (PREDICT_FALSE (((data_len / n_buffer_bytes) + 1) > vec_len (apm->rx_buffers[thread_index]))) { - apif->ss.rx_frame_offset = rx_frame_offset; - apif->ss.num_pkts = num_pkts; - apif->ss.is_save = 1; + rx_queue->rx_frame_offset = rx_frame_offset; + rx_queue->num_rx_pkts = num_pkts; + rx_queue->is_rx_pending = 1; vlib_put_next_frame (vm, node, next_index, n_left_to_next); goto done; } @@ -450,6 +460,7 @@ af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); tr->next_index = next0; tr->hw_if_index = apif->hw_if_index; + tr->queue_id = queue_id; tr->block = block; tr->block_start = bd; tr->pkt_num = bd->hdr.bh1.num_pkts - num_pkts; @@ -483,23 +494,28 @@ af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, } else { - apif->ss.rx_frame_offset = rx_frame_offset; - apif->ss.num_pkts = num_pkts; - apif->ss.is_save = 1; + rx_queue->rx_frame_offset = rx_frame_offset; + rx_queue->num_rx_pkts = num_pkts; + rx_queue->is_rx_pending = 1; } } - apif->next_rx_block = block; + rx_queue->next_rx_block = block; done: - if ((((block_desc_t *) (block_start = apif->rx_ring[block])) + if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block])) ->hdr.bh1.block_status & TP_STATUS_USER) != 0) vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_POLLING); else vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_INTERRUPT); + vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TOTAL_RECV_BLK, + total); + vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TIMEDOUT_BLK, + timedout_blk); + vlib_increment_combined_counter (vnet_get_main ()->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, @@ -524,11 +540,11 @@ VLIB_NODE_FN (af_packet_input_node) (vlib_main_t * vm, if (apif->is_admin_up) { if (apif->is_cksum_gso_enabled) - n_rx_packets += - af_packet_device_input_fn (vm, node, frame, apif, 1); + n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif, + pv[i].queue_id, 1); else - n_rx_packets += - af_packet_device_input_fn (vm, node, frame, apif, 0); + n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif, + pv[i].queue_id, 0); } } return n_rx_packets; -- cgit 1.2.3-korg