From 8b90d89b05322ceaaf57e0eda403c4f92546f7b3 Mon Sep 17 00:00:00 2001 From: Mohsin Kazmi Date: Thu, 8 Sep 2022 17:21:20 +0000 Subject: devices: add support for af-packet v2 Type: feature Signed-off-by: Mohsin Kazmi Change-Id: I2ccaf1d512dcb72e414be8c69cbb538ebbe0e933 --- src/vnet/devices/af_packet/device.c | 354 +++++++++++++++++++++++++----------- 1 file changed, 246 insertions(+), 108 deletions(-) (limited to 'src/vnet/devices/af_packet/device.c') diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c index 1d14c9b8dcf..2e9b7a4ed83 100644 --- a/src/vnet/devices/af_packet/device.c +++ b/src/vnet/devices/af_packet/device.c @@ -61,7 +61,12 @@ typedef struct u32 buffer_index; u32 hw_if_index; u16 queue_id; - tpacket3_hdr_t tph; + u8 is_v2; + union + { + tpacket2_hdr_t tph2; + tpacket3_hdr_t tph3; + }; vnet_virtio_net_hdr_t vnet_hdr; vlib_buffer_t buffer; } af_packet_tx_trace_t; @@ -91,7 +96,8 @@ format_af_packet_device (u8 * s, va_list * args) af_packet_queue_t *rx_queue = 0; af_packet_queue_t *tx_queue = 0; - s = format (s, "Linux PACKET socket interface"); + s = format (s, "Linux PACKET socket interface %s", + (apif->version == TPACKET_V2) ? "v2" : "v3"); s = format (s, "\n%UFEATURES:", format_white_space, indent); if (apif->is_qdisc_bypass_enabled) s = format (s, "\n%Uqdisc-bpass-enabled", format_white_space, indent + 2); @@ -102,17 +108,20 @@ format_af_packet_device (u8 * s, va_list * args) vec_foreach (rx_queue, apif->rx_queues) { - u32 rx_block_size = rx_queue->rx_req->tp_block_size; - u32 rx_frame_size = rx_queue->rx_req->tp_frame_size; - u32 rx_frame_nr = rx_queue->rx_req->tp_frame_nr; - u32 rx_block_nr = rx_queue->rx_req->tp_block_nr; + u32 rx_block_size = rx_queue->rx_req->req.tp_block_size; + u32 rx_frame_size = rx_queue->rx_req->req.tp_frame_size; + u32 rx_frame_nr = rx_queue->rx_req->req.tp_frame_nr; + u32 rx_block_nr = rx_queue->rx_req->req.tp_block_nr; s = format (s, "\n%URX Queue %u:", format_white_space, indent, rx_queue->queue_id); s = format (s, "\n%Ublock size:%d nr:%d frame size:%d nr:%d", format_white_space, indent + 2, rx_block_size, rx_block_nr, rx_frame_size, rx_frame_nr); - s = format (s, " next block:%d", rx_queue->next_rx_block); + if (apif->version == TPACKET_V2) + s = format (s, " next frame:%d", rx_queue->next_rx_frame); + else + s = format (s, " next block:%d", rx_queue->next_rx_block); if (rx_queue->is_rx_pending) { s = format ( @@ -125,15 +134,16 @@ format_af_packet_device (u8 * s, va_list * args) vec_foreach (tx_queue, apif->tx_queues) { clib_spinlock_lock (&tx_queue->lockp); - u32 tx_block_sz = tx_queue->tx_req->tp_block_size; - u32 tx_frame_sz = tx_queue->tx_req->tp_frame_size; - u32 tx_frame_nr = tx_queue->tx_req->tp_frame_nr; - u32 tx_block_nr = tx_queue->tx_req->tp_block_nr; + u32 tx_block_sz = tx_queue->tx_req->req.tp_block_size; + u32 tx_frame_sz = tx_queue->tx_req->req.tp_frame_size; + u32 tx_frame_nr = tx_queue->tx_req->req.tp_frame_nr; + u32 tx_block_nr = tx_queue->tx_req->req.tp_block_nr; int block = 0; int n_send_req = 0, n_avail = 0, n_sending = 0, n_tot = 0, n_wrong = 0; u8 *tx_block_start = tx_queue->tx_ring[block]; u32 tx_frame = tx_queue->next_tx_frame; - tpacket3_hdr_t *tph; + tpacket3_hdr_t *tph3; + tpacket2_hdr_t *tph2; s = format (s, "\n%UTX Queue %u:", format_white_space, indent, tx_queue->queue_id); @@ -141,22 +151,40 @@ format_af_packet_device (u8 * s, va_list * args) format_white_space, indent + 2, tx_block_sz, tx_block_nr, tx_frame_sz, tx_frame_nr); s = format (s, " next frame:%d", tx_queue->next_tx_frame); - - do - { - tph = (tpacket3_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz); - tx_frame = (tx_frame + 1) % tx_frame_nr; - if (tph->tp_status == 0) - n_avail++; - else if (tph->tp_status & TP_STATUS_SEND_REQUEST) - n_send_req++; - else if (tph->tp_status & TP_STATUS_SENDING) - n_sending++; - else - n_wrong++; - n_tot++; - } - while (tx_frame != tx_queue->next_tx_frame); + if (apif->version & TPACKET_V3) + do + { + tph3 = + (tpacket3_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz); + tx_frame = (tx_frame + 1) % tx_frame_nr; + if (tph3->tp_status == 0) + n_avail++; + else if (tph3->tp_status & TP_STATUS_SEND_REQUEST) + n_send_req++; + else if (tph3->tp_status & TP_STATUS_SENDING) + n_sending++; + else + n_wrong++; + n_tot++; + } + while (tx_frame != tx_queue->next_tx_frame); + else + do + { + tph2 = + (tpacket2_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz); + tx_frame = (tx_frame + 1) % tx_frame_nr; + if (tph2->tp_status == 0) + n_avail++; + else if (tph2->tp_status & TP_STATUS_SEND_REQUEST) + n_send_req++; + else if (tph2->tp_status & TP_STATUS_SENDING) + n_sending++; + else + n_wrong++; + n_tot++; + } + while (tx_frame != tx_queue->next_tx_frame); s = format (s, "\n%Uavailable:%d request:%d sending:%d wrong:%d total:%d", format_white_space, indent + 2, n_avail, n_send_req, n_sending, @@ -177,24 +205,46 @@ format_af_packet_tx_trace (u8 *s, va_list *va) s = format (s, "af_packet: hw_if_index %u tx-queue %u", t->hw_if_index, t->queue_id); - s = - format (s, - "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u" - "\n%Usec 0x%x nsec 0x%x vlan %U" + if (t->is_v2) + { + s = format ( + s, + "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u" + "\n%Usec 0x%x nsec 0x%x vlan %U" #ifdef TP_STATUS_VLAN_TPID_VALID - " vlan_tpid %u" + " vlan_tpid %u" #endif - , - format_white_space, indent + 2, format_white_space, indent + 4, - t->tph.tp_status, t->tph.tp_len, t->tph.tp_snaplen, t->tph.tp_mac, - t->tph.tp_net, format_white_space, indent + 4, t->tph.tp_sec, - t->tph.tp_nsec, format_ethernet_vlan_tci, t->tph.hv1.tp_vlan_tci + , + format_white_space, indent + 2, format_white_space, indent + 4, + t->tph2.tp_status, t->tph2.tp_len, t->tph2.tp_snaplen, t->tph2.tp_mac, + t->tph2.tp_net, format_white_space, indent + 4, t->tph2.tp_sec, + t->tph2.tp_nsec, format_ethernet_vlan_tci, t->tph2.tp_vlan_tci #ifdef TP_STATUS_VLAN_TPID_VALID - , - t->tph.hv1.tp_vlan_tpid + , + t->tph2.tp_vlan_tpid #endif - ); - + ); + } + else + { + s = format ( + s, + "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u" + "\n%Usec 0x%x nsec 0x%x vlan %U" +#ifdef TP_STATUS_VLAN_TPID_VALID + " vlan_tpid %u" +#endif + , + format_white_space, indent + 2, format_white_space, indent + 4, + t->tph3.tp_status, t->tph3.tp_len, t->tph3.tp_snaplen, t->tph3.tp_mac, + t->tph3.tp_net, format_white_space, indent + 4, t->tph3.tp_sec, + t->tph3.tp_nsec, format_ethernet_vlan_tci, t->tph3.hv1.tp_vlan_tci +#ifdef TP_STATUS_VLAN_TPID_VALID + , + t->tph3.hv1.tp_vlan_tpid +#endif + ); + } s = format (s, "\n%Uvnet-hdr:\n%Uflags 0x%02x gso_type 0x%02x hdr_len %u" "\n%Ugso_size %u csum_start %u csum_offset %u", @@ -214,17 +264,23 @@ format_af_packet_tx_trace (u8 *s, va_list *va) static void af_packet_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_buffer_t *b0, u32 bi, tpacket3_hdr_t *tph, + vlib_buffer_t *b0, u32 bi, void *tph, vnet_virtio_net_hdr_t *vnet_hdr, u32 hw_if_index, - u16 queue_id) + u16 queue_id, u8 is_v2) { af_packet_tx_trace_t *t; t = vlib_add_trace (vm, node, b0, sizeof (t[0])); t->hw_if_index = hw_if_index; t->queue_id = queue_id; t->buffer_index = bi; + t->is_v2 = is_v2; - clib_memcpy_fast (&t->tph, tph, sizeof (*tph)); + if (is_v2) + clib_memcpy_fast (&t->tph2, (tpacket2_hdr_t *) tph, + sizeof (tpacket2_hdr_t)); + else + clib_memcpy_fast (&t->tph3, (tpacket3_hdr_t *) tph, + sizeof (tpacket3_hdr_t)); clib_memcpy_fast (&t->vnet_hdr, vnet_hdr, sizeof (*vnet_hdr)); clib_memcpy_fast (&t->buffer, b0, sizeof (*b0) - sizeof (b0->pre_data)); clib_memcpy_fast (t->buffer.pre_data, vlib_buffer_get_current (b0), @@ -337,95 +393,177 @@ VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm, af_packet_queue_t *tx_queue = vec_elt_at_index (apif->tx_queues, queue_id); u32 block = 0, frame_size = 0, frame_num = 0, tx_frame = 0; u8 *block_start = 0; - tpacket3_hdr_t *tph = 0; + tpacket3_hdr_t *tph3 = 0; + tpacket2_hdr_t *tph2 = 0; u32 frame_not_ready = 0; u8 is_cksum_gso_enabled = (apif->is_cksum_gso_enabled == 1) ? 1 : 0; + u32 tpacket_align = 0; + u8 is_v2 = (apif->version == TPACKET_V2) ? 1 : 0; if (tf->shared_queue) clib_spinlock_lock (&tx_queue->lockp); - frame_size = tx_queue->tx_req->tp_frame_size; - frame_num = tx_queue->tx_req->tp_frame_nr; + frame_size = tx_queue->tx_req->req.tp_frame_size; + frame_num = tx_queue->tx_req->req.tp_frame_nr; block_start = tx_queue->tx_ring[block]; tx_frame = tx_queue->next_tx_frame; - - while (n_left) + if (is_v2) { - u32 len; - vnet_virtio_net_hdr_t *vnet_hdr = 0; - u32 offset = 0; - vlib_buffer_t *b0 = 0, *b0_first = 0; - u32 bi, bi_first; - - bi = bi_first = buffers[0]; - n_left--; - buffers++; - - tph = (tpacket3_hdr_t *) (block_start + tx_frame * frame_size); - if (PREDICT_FALSE (tph->tp_status & - (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING))) + tpacket_align = TPACKET_ALIGN (sizeof (tpacket2_hdr_t)); + while (n_left) { - frame_not_ready++; - goto next; - } + u32 len; + vnet_virtio_net_hdr_t *vnet_hdr = 0; + u32 offset = 0; + vlib_buffer_t *b0 = 0, *b0_first = 0; + u32 bi, bi_first; + + bi = bi_first = buffers[0]; + n_left--; + buffers++; + + tph2 = (tpacket2_hdr_t *) (block_start + tx_frame * frame_size); + if (PREDICT_FALSE (tph2->tp_status & + (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING))) + { + frame_not_ready++; + goto nextv2; + } - b0_first = b0 = vlib_get_buffer (vm, bi); + b0_first = b0 = vlib_get_buffer (vm, bi); - if (PREDICT_TRUE (is_cksum_gso_enabled)) - { - vnet_hdr = - (vnet_virtio_net_hdr_t *) ((u8 *) tph + TPACKET_ALIGN (sizeof ( - tpacket3_hdr_t))); + if (PREDICT_TRUE (is_cksum_gso_enabled)) + { + vnet_hdr = + (vnet_virtio_net_hdr_t *) ((u8 *) tph2 + tpacket_align); - clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t)); - offset = sizeof (vnet_virtio_net_hdr_t); + clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t)); + offset = sizeof (vnet_virtio_net_hdr_t); - if (b0->flags & VNET_BUFFER_F_GSO) - fill_gso_offload (b0, vnet_hdr); - else if (b0->flags & VNET_BUFFER_F_OFFLOAD) - fill_cksum_offload (b0, vnet_hdr); - } - - len = b0->current_length; - clib_memcpy_fast ((u8 *) tph + TPACKET_ALIGN (sizeof (tpacket3_hdr_t)) + - offset, - vlib_buffer_get_current (b0), len); - offset += len; + if (b0->flags & VNET_BUFFER_F_GSO) + fill_gso_offload (b0, vnet_hdr); + else if (b0->flags & VNET_BUFFER_F_OFFLOAD) + fill_cksum_offload (b0, vnet_hdr); + } - while (b0->flags & VLIB_BUFFER_NEXT_PRESENT) - { - b0 = vlib_get_buffer (vm, b0->next_buffer); len = b0->current_length; - clib_memcpy_fast ((u8 *) tph + - TPACKET_ALIGN (sizeof (tpacket3_hdr_t)) + offset, + clib_memcpy_fast ((u8 *) tph2 + tpacket_align + offset, vlib_buffer_get_current (b0), len); offset += len; - } - tph->tp_len = tph->tp_snaplen = offset; - tph->tp_status = TP_STATUS_SEND_REQUEST; - n_sent++; + while (b0->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b0 = vlib_get_buffer (vm, b0->next_buffer); + len = b0->current_length; + clib_memcpy_fast ((u8 *) tph2 + tpacket_align + offset, + vlib_buffer_get_current (b0), len); + offset += len; + } + + tph2->tp_len = tph2->tp_snaplen = offset; + tph2->tp_status = TP_STATUS_SEND_REQUEST; + n_sent++; - if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED)) + { + if (PREDICT_TRUE (is_cksum_gso_enabled)) + af_packet_tx_trace (vm, node, b0_first, bi_first, tph2, + vnet_hdr, apif->hw_if_index, queue_id, 1); + else + { + vnet_virtio_net_hdr_t vnet_hdr2 = {}; + af_packet_tx_trace (vm, node, b0_first, bi_first, tph2, + &vnet_hdr2, apif->hw_if_index, queue_id, + 1); + } + } + tx_frame = (tx_frame + 1) % frame_num; + + nextv2: + /* check if we've exhausted the ring */ + if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num)) + break; + } + } + else + { + tpacket_align = TPACKET_ALIGN (sizeof (tpacket3_hdr_t)); + + while (n_left) { + u32 len; + vnet_virtio_net_hdr_t *vnet_hdr = 0; + u32 offset = 0; + vlib_buffer_t *b0 = 0, *b0_first = 0; + u32 bi, bi_first; + + bi = bi_first = buffers[0]; + n_left--; + buffers++; + + tph3 = (tpacket3_hdr_t *) (block_start + tx_frame * frame_size); + if (PREDICT_FALSE (tph3->tp_status & + (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING))) + { + frame_not_ready++; + goto nextv3; + } + + b0_first = b0 = vlib_get_buffer (vm, bi); + if (PREDICT_TRUE (is_cksum_gso_enabled)) - af_packet_tx_trace (vm, node, b0_first, bi_first, tph, vnet_hdr, - apif->hw_if_index, queue_id); - else { - vnet_virtio_net_hdr_t vnet_hdr2 = {}; - af_packet_tx_trace (vm, node, b0_first, bi_first, tph, - &vnet_hdr2, apif->hw_if_index, queue_id); + vnet_hdr = + (vnet_virtio_net_hdr_t *) ((u8 *) tph3 + tpacket_align); + + clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t)); + offset = sizeof (vnet_virtio_net_hdr_t); + + if (b0->flags & VNET_BUFFER_F_GSO) + fill_gso_offload (b0, vnet_hdr); + else if (b0->flags & VNET_BUFFER_F_OFFLOAD) + fill_cksum_offload (b0, vnet_hdr); } - } - tx_frame = (tx_frame + 1) % frame_num; - next: - /* check if we've exhausted the ring */ - if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num)) - break; - } + len = b0->current_length; + clib_memcpy_fast ((u8 *) tph3 + tpacket_align + offset, + vlib_buffer_get_current (b0), len); + offset += len; + + while (b0->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b0 = vlib_get_buffer (vm, b0->next_buffer); + len = b0->current_length; + clib_memcpy_fast ((u8 *) tph3 + tpacket_align + offset, + vlib_buffer_get_current (b0), len); + offset += len; + } + + tph3->tp_len = tph3->tp_snaplen = offset; + tph3->tp_status = TP_STATUS_SEND_REQUEST; + n_sent++; + if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED)) + { + if (PREDICT_TRUE (is_cksum_gso_enabled)) + af_packet_tx_trace (vm, node, b0_first, bi_first, tph3, + vnet_hdr, apif->hw_if_index, queue_id, 0); + else + { + vnet_virtio_net_hdr_t vnet_hdr2 = {}; + af_packet_tx_trace (vm, node, b0_first, bi_first, tph3, + &vnet_hdr2, apif->hw_if_index, queue_id, + 0); + } + } + tx_frame = (tx_frame + 1) % frame_num; + + nextv3: + /* check if we've exhausted the ring */ + if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num)) + break; + } + } CLIB_MEMORY_BARRIER (); if (PREDICT_TRUE (n_sent || tx_queue->is_tx_pending)) -- cgit 1.2.3-korg