diff options
author | Mohsin Kazmi <sykazmi@cisco.com> | 2022-09-08 17:21:20 +0000 |
---|---|---|
committer | Mohsin Kazmi <sykazmi@cisco.com> | 2022-10-21 16:32:56 +0000 |
commit | 8b90d89b05322ceaaf57e0eda403c4f92546f7b3 (patch) | |
tree | 7ee0ecc2128f41e3e2219489dff890430a36c385 /src/vnet/devices | |
parent | 65bff88c3671ec6ee561e70f17c60ea9784a39dd (diff) |
devices: add support for af-packet v2
Type: feature
Signed-off-by: Mohsin Kazmi <sykazmi@cisco.com>
Change-Id: I2ccaf1d512dcb72e414be8c69cbb538ebbe0e933
Diffstat (limited to 'src/vnet/devices')
-rw-r--r-- | src/vnet/devices/af_packet/af_packet.api | 1 | ||||
-rw-r--r-- | src/vnet/devices/af_packet/af_packet.c | 131 | ||||
-rw-r--r-- | src/vnet/devices/af_packet/af_packet.h | 17 | ||||
-rw-r--r-- | src/vnet/devices/af_packet/af_packet_api.c | 5 | ||||
-rw-r--r-- | src/vnet/devices/af_packet/cli.c | 4 | ||||
-rw-r--r-- | src/vnet/devices/af_packet/device.c | 354 | ||||
-rw-r--r-- | src/vnet/devices/af_packet/node.c | 319 |
7 files changed, 645 insertions, 186 deletions
diff --git a/src/vnet/devices/af_packet/af_packet.api b/src/vnet/devices/af_packet/af_packet.api index 4a5cfb0fc3a..a12da37796a 100644 --- a/src/vnet/devices/af_packet/af_packet.api +++ b/src/vnet/devices/af_packet/af_packet.api @@ -94,6 +94,7 @@ enum af_packet_mode { enum af_packet_flags { AF_PACKET_API_FLAG_QDISC_BYPASS = 1, /* enable the qdisc bypass */ AF_PACKET_API_FLAG_CKSUM_GSO = 2, /* enable checksum/gso */ + AF_PACKET_API_FLAG_VERSION_2 = 8 [backwards_compatible], /* af packet v2, default is v3 */ }; /** \brief Create host-interface diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index ec65bf6d493..010bc1c266c 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -48,6 +48,10 @@ VNET_HW_INTERFACE_CLASS (af_packet_ip_device_hw_interface_class, static) = { #define AF_PACKET_DEFAULT_TX_FRAME_SIZE (2048 * 33) // GSO packet of 64KB #define AF_PACKET_TX_BLOCK_NR 1 +#define AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK_V2 1024 +#define AF_PACKET_DEFAULT_RX_FRAME_SIZE_V2 (2048 * 33) // GSO packet of 64KB +#define AF_PACKET_RX_BLOCK_NR_V2 1 + #define AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK 32 #define AF_PACKET_DEFAULT_RX_FRAME_SIZE 2048 #define AF_PACKET_RX_BLOCK_NR 160 @@ -189,23 +193,16 @@ af_packet_set_tx_queues (vlib_main_t *vm, af_packet_if_t *apif) } static int -create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req, - tpacket_req3_t *tx_req, int *fd, af_packet_ring_t *ring, - u32 fanout_id, af_packet_if_flags_t *flags) +create_packet_sock (int host_if_index, tpacket_req_u_t *rx_req, + tpacket_req_u_t *tx_req, int *fd, af_packet_ring_t *ring, + u32 fanout_id, af_packet_if_flags_t *flags, int ver) { af_packet_main_t *apm = &af_packet_main; struct sockaddr_ll sll; socklen_t req_sz = sizeof (tpacket_req3_t); int ret; - int ver = TPACKET_V3; u32 ring_sz = 0; - if (rx_req) - ring_sz += rx_req->tp_block_size * rx_req->tp_block_nr; - - if (tx_req) - ring_sz += tx_req->tp_block_size * tx_req->tp_block_nr; - if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0) { vlib_log_err (apm->log_class, @@ -297,7 +294,13 @@ create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req, goto error; } } - + if (ver == TPACKET_V2) + { + req_sz = sizeof (tpacket_req_t); + ring_sz += rx_req->req.tp_block_size * rx_req->req.tp_block_nr; + } + else + ring_sz += rx_req->req3.tp_block_size * rx_req->req3.tp_block_nr; if (setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz) < 0) { vlib_log_err (apm->log_class, @@ -309,15 +312,23 @@ create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req, } if (tx_req) - if (setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz) < 0) - { - vlib_log_err (apm->log_class, - "Failed to set packet tx ring options: %s (errno %d)", - strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } - + { + if (ver == TPACKET_V2) + { + req_sz = sizeof (tpacket_req_t); + ring_sz += tx_req->req.tp_block_size * tx_req->req.tp_block_nr; + } + else + ring_sz += tx_req->req3.tp_block_size * tx_req->req3.tp_block_nr; + if (setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz) < 0) + { + vlib_log_err (apm->log_class, + "Failed to set packet tx ring options: %s (errno %d)", + strerror (errno), errno); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + } ring->ring_start_addr = mmap (NULL, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, *fd, 0); if (ring->ring_start_addr == MAP_FAILED) @@ -347,8 +358,8 @@ af_packet_queue_init (vlib_main_t *vm, af_packet_if_t *apif, u8 queue_id) { af_packet_main_t *apm = &af_packet_main; - tpacket_req3_t *rx_req = 0; - tpacket_req3_t *tx_req = 0; + tpacket_req_u_t *rx_req = 0; + tpacket_req_u_t *tx_req = 0; int ret, fd = -1; af_packet_ring_t ring = { 0 }; u8 *ring_addr = 0; @@ -360,22 +371,32 @@ af_packet_queue_init (vlib_main_t *vm, af_packet_if_t *apif, { rx_frames_per_block = arg->rx_frames_per_block ? arg->rx_frames_per_block : - AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK; - - rx_frame_size = arg->rx_frame_size ? arg->rx_frame_size : - AF_PACKET_DEFAULT_RX_FRAME_SIZE; + ((apif->version == TPACKET_V3) ? + AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK : + AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK_V2); + + rx_frame_size = + arg->rx_frame_size ? + arg->rx_frame_size : + ((apif->version == TPACKET_V3) ? AF_PACKET_DEFAULT_RX_FRAME_SIZE : + AF_PACKET_DEFAULT_RX_FRAME_SIZE_V2); vec_validate (rx_queue->rx_req, 0); - rx_queue->rx_req->tp_block_size = rx_frame_size * rx_frames_per_block; - rx_queue->rx_req->tp_frame_size = rx_frame_size; - rx_queue->rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR; - rx_queue->rx_req->tp_frame_nr = - AF_PACKET_RX_BLOCK_NR * rx_frames_per_block; - rx_queue->rx_req->tp_retire_blk_tov = 1; // 1 ms block timout - rx_queue->rx_req->tp_feature_req_word = 0; - rx_queue->rx_req->tp_sizeof_priv = 0; + rx_queue->rx_req->req.tp_block_size = + rx_frame_size * rx_frames_per_block; + rx_queue->rx_req->req.tp_frame_size = rx_frame_size; + rx_queue->rx_req->req.tp_block_nr = (apif->version == TPACKET_V3) ? + AF_PACKET_RX_BLOCK_NR : + AF_PACKET_RX_BLOCK_NR_V2; + rx_queue->rx_req->req.tp_frame_nr = + rx_queue->rx_req->req.tp_block_nr * rx_frames_per_block; + if (apif->version == TPACKET_V3) + { + rx_queue->rx_req->req3.tp_retire_blk_tov = 1; // 1 ms block timout + rx_queue->rx_req->req3.tp_feature_req_word = 0; + rx_queue->rx_req->req3.tp_sizeof_priv = 0; + } rx_req = rx_queue->rx_req; } - if (tx_queue) { tx_frames_per_block = arg->tx_frames_per_block ? @@ -385,21 +406,26 @@ af_packet_queue_init (vlib_main_t *vm, af_packet_if_t *apif, AF_PACKET_DEFAULT_TX_FRAME_SIZE; vec_validate (tx_queue->tx_req, 0); - tx_queue->tx_req->tp_block_size = tx_frame_size * tx_frames_per_block; - tx_queue->tx_req->tp_frame_size = tx_frame_size; - tx_queue->tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR; - tx_queue->tx_req->tp_frame_nr = + tx_queue->tx_req->req.tp_block_size = + tx_frame_size * tx_frames_per_block; + tx_queue->tx_req->req.tp_frame_size = tx_frame_size; + tx_queue->tx_req->req.tp_block_nr = AF_PACKET_TX_BLOCK_NR; + tx_queue->tx_req->req.tp_frame_nr = AF_PACKET_TX_BLOCK_NR * tx_frames_per_block; - tx_queue->tx_req->tp_retire_blk_tov = 0; - tx_queue->tx_req->tp_sizeof_priv = 0; - tx_queue->tx_req->tp_feature_req_word = 0; + if (apif->version == TPACKET_V3) + { + tx_queue->tx_req->req3.tp_retire_blk_tov = 0; + tx_queue->tx_req->req3.tp_sizeof_priv = 0; + tx_queue->tx_req->req3.tp_feature_req_word = 0; + } tx_req = tx_queue->tx_req; } if (rx_queue || tx_queue) { - ret = create_packet_v3_sock (apif->host_if_index, rx_req, tx_req, &fd, - &ring, apif->dev_instance, &arg->flags); + ret = + create_packet_sock (apif->host_if_index, rx_req, tx_req, &fd, &ring, + apif->dev_instance, &arg->flags, apif->version); if (ret != 0) goto error; @@ -411,28 +437,28 @@ af_packet_queue_init (vlib_main_t *vm, af_packet_if_t *apif, if (rx_queue) { rx_queue->fd = fd; - vec_validate (rx_queue->rx_ring, rx_queue->rx_req->tp_block_nr - 1); + vec_validate (rx_queue->rx_ring, rx_queue->rx_req->req.tp_block_nr - 1); vec_foreach_index (i, rx_queue->rx_ring) { rx_queue->rx_ring[i] = - ring_addr + i * rx_queue->rx_req->tp_block_size; + ring_addr + i * rx_queue->rx_req->req.tp_block_size; } rx_queue->next_rx_block = 0; rx_queue->queue_id = queue_id; rx_queue->is_rx_pending = 0; - ring_addr = ring_addr + rx_queue->rx_req->tp_block_size * - rx_queue->rx_req->tp_block_nr; + ring_addr = ring_addr + rx_queue->rx_req->req.tp_block_size * + rx_queue->rx_req->req.tp_block_nr; } if (tx_queue) { tx_queue->fd = fd; - vec_validate (tx_queue->tx_ring, tx_queue->tx_req->tp_block_nr - 1); + vec_validate (tx_queue->tx_ring, tx_queue->tx_req->req.tp_block_nr - 1); vec_foreach_index (i, tx_queue->tx_ring) { tx_queue->tx_ring[i] = - ring_addr + i * tx_queue->tx_req->tp_block_size; + ring_addr + i * tx_queue->tx_req->req.tp_block_size; } tx_queue->next_tx_frame = 0; @@ -604,6 +630,11 @@ af_packet_create_if (af_packet_create_if_arg_t *arg) apif->per_interface_next_index = ~0; apif->mode = arg->mode; + if (arg->is_v2) + apif->version = TPACKET_V2; + else + apif->version = TPACKET_V3; + ret = af_packet_device_init (vm, apif, arg); if (ret != 0) goto error; diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h index 940acbb1372..7aa5e6d5b9a 100644 --- a/src/vnet/devices/af_packet/af_packet.h +++ b/src/vnet/devices/af_packet/af_packet.h @@ -23,9 +23,17 @@ #include <vlib/log.h> typedef struct tpacket_block_desc block_desc_t; +typedef struct tpacket_req tpacket_req_t; typedef struct tpacket_req3 tpacket_req3_t; +typedef struct tpacket2_hdr tpacket2_hdr_t; typedef struct tpacket3_hdr tpacket3_hdr_t; +typedef union _tpacket_req_u +{ + tpacket_req_t req; + tpacket_req3_t req3; +} tpacket_req_u_t; + typedef enum { AF_PACKET_IF_MODE_ETHERNET = 1, @@ -37,6 +45,7 @@ typedef enum AF_PACKET_IF_FLAGS_QDISC_BYPASS = 1, AF_PACKET_IF_FLAGS_CKSUM_GSO = 2, AF_PACKET_IF_FLAGS_FANOUT = 4, + AF_PACKET_IF_FLAGS_VERSION_2 = 8, } af_packet_if_flags_t; typedef struct @@ -58,8 +67,8 @@ typedef struct int fd; union { - tpacket_req3_t *rx_req; - tpacket_req3_t *tx_req; + tpacket_req_u_t *rx_req; + tpacket_req_u_t *tx_req; }; union @@ -71,6 +80,7 @@ typedef struct union { u32 next_rx_block; + u32 next_rx_frame; u32 next_tx_frame; }; @@ -95,7 +105,7 @@ typedef struct af_packet_if_mode_t mode; u8 is_admin_up; u8 is_cksum_gso_enabled; - + u8 version; af_packet_queue_t *rx_queues; af_packet_queue_t *tx_queues; @@ -139,6 +149,7 @@ typedef struct u32 tx_frames_per_block; u8 num_rxqs; u8 num_txqs; + u8 is_v2; af_packet_if_mode_t mode; af_packet_if_flags_t flags; diff --git a/src/vnet/devices/af_packet/af_packet_api.c b/src/vnet/devices/af_packet/af_packet_api.c index 21f2c381809..6ca79f9705e 100644 --- a/src/vnet/devices/af_packet/af_packet_api.c +++ b/src/vnet/devices/af_packet/af_packet_api.c @@ -135,6 +135,10 @@ vl_api_af_packet_create_v3_t_handler (vl_api_af_packet_create_v3_t *mp) ((int) AF_PACKET_API_FLAG_CKSUM_GSO == (int) AF_PACKET_IF_FLAGS_CKSUM_GSO), "af-packet checksum/gso offload api flag mismatch"); + STATIC_ASSERT ( + ((int) AF_PACKET_API_FLAG_VERSION_2 == (int) AF_PACKET_IF_FLAGS_VERSION_2), + "af-packet version 2 api flag mismatch"); + // Default flags arg->flags = clib_net_to_host_u32 (mp->flags); @@ -144,6 +148,7 @@ vl_api_af_packet_create_v3_t_handler (vl_api_af_packet_create_v3_t *mp) if (clib_net_to_host_u16 (mp->num_tx_queues) > 1) arg->num_txqs = clib_net_to_host_u16 (mp->num_tx_queues); + arg->is_v2 = (arg->flags & AF_PACKET_API_FLAG_VERSION_2) ? 1 : 0; rv = af_packet_create_if (arg); error: diff --git a/src/vnet/devices/af_packet/cli.c b/src/vnet/devices/af_packet/cli.c index e730659bfcd..d20ef61a09d 100644 --- a/src/vnet/devices/af_packet/cli.c +++ b/src/vnet/devices/af_packet/cli.c @@ -89,6 +89,8 @@ af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input, arg->flags &= ~AF_PACKET_IF_FLAGS_CKSUM_GSO; else if (unformat (line_input, "mode ip")) arg->mode = AF_PACKET_IF_MODE_IP; + else if (unformat (line_input, "v2")) + arg->is_v2 = 1; else if (unformat (line_input, "hw-addr %U", unformat_ethernet_address, hwaddr)) arg->hw_addr = hwaddr; @@ -160,7 +162,7 @@ done: ?*/ VLIB_CLI_COMMAND (af_packet_create_command, static) = { .path = "create host-interface", - .short_help = "create host-interface name <ifname> [num-rx-queues <n>] " + .short_help = "create host-interface [v2] name <ifname> [num-rx-queues <n>] " "[num-tx-queues <n>] [hw-addr <mac-addr>] [mode ip] " "[qdisc-bypass-disable] [cksum-gso-disable]", .function = af_packet_create_command_fn, diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c index 1d14c9b8dcf..2e9b7a4ed83 100644 --- a/src/vnet/devices/af_packet/device.c +++ b/src/vnet/devices/af_packet/device.c @@ -61,7 +61,12 @@ typedef struct u32 buffer_index; u32 hw_if_index; u16 queue_id; - tpacket3_hdr_t tph; + u8 is_v2; + union + { + tpacket2_hdr_t tph2; + tpacket3_hdr_t tph3; + }; vnet_virtio_net_hdr_t vnet_hdr; vlib_buffer_t buffer; } af_packet_tx_trace_t; @@ -91,7 +96,8 @@ format_af_packet_device (u8 * s, va_list * args) af_packet_queue_t *rx_queue = 0; af_packet_queue_t *tx_queue = 0; - s = format (s, "Linux PACKET socket interface"); + s = format (s, "Linux PACKET socket interface %s", + (apif->version == TPACKET_V2) ? "v2" : "v3"); s = format (s, "\n%UFEATURES:", format_white_space, indent); if (apif->is_qdisc_bypass_enabled) s = format (s, "\n%Uqdisc-bpass-enabled", format_white_space, indent + 2); @@ -102,17 +108,20 @@ format_af_packet_device (u8 * s, va_list * args) vec_foreach (rx_queue, apif->rx_queues) { - u32 rx_block_size = rx_queue->rx_req->tp_block_size; - u32 rx_frame_size = rx_queue->rx_req->tp_frame_size; - u32 rx_frame_nr = rx_queue->rx_req->tp_frame_nr; - u32 rx_block_nr = rx_queue->rx_req->tp_block_nr; + u32 rx_block_size = rx_queue->rx_req->req.tp_block_size; + u32 rx_frame_size = rx_queue->rx_req->req.tp_frame_size; + u32 rx_frame_nr = rx_queue->rx_req->req.tp_frame_nr; + u32 rx_block_nr = rx_queue->rx_req->req.tp_block_nr; s = format (s, "\n%URX Queue %u:", format_white_space, indent, rx_queue->queue_id); s = format (s, "\n%Ublock size:%d nr:%d frame size:%d nr:%d", format_white_space, indent + 2, rx_block_size, rx_block_nr, rx_frame_size, rx_frame_nr); - s = format (s, " next block:%d", rx_queue->next_rx_block); + if (apif->version == TPACKET_V2) + s = format (s, " next frame:%d", rx_queue->next_rx_frame); + else + s = format (s, " next block:%d", rx_queue->next_rx_block); if (rx_queue->is_rx_pending) { s = format ( @@ -125,15 +134,16 @@ format_af_packet_device (u8 * s, va_list * args) vec_foreach (tx_queue, apif->tx_queues) { clib_spinlock_lock (&tx_queue->lockp); - u32 tx_block_sz = tx_queue->tx_req->tp_block_size; - u32 tx_frame_sz = tx_queue->tx_req->tp_frame_size; - u32 tx_frame_nr = tx_queue->tx_req->tp_frame_nr; - u32 tx_block_nr = tx_queue->tx_req->tp_block_nr; + u32 tx_block_sz = tx_queue->tx_req->req.tp_block_size; + u32 tx_frame_sz = tx_queue->tx_req->req.tp_frame_size; + u32 tx_frame_nr = tx_queue->tx_req->req.tp_frame_nr; + u32 tx_block_nr = tx_queue->tx_req->req.tp_block_nr; int block = 0; int n_send_req = 0, n_avail = 0, n_sending = 0, n_tot = 0, n_wrong = 0; u8 *tx_block_start = tx_queue->tx_ring[block]; u32 tx_frame = tx_queue->next_tx_frame; - tpacket3_hdr_t *tph; + tpacket3_hdr_t *tph3; + tpacket2_hdr_t *tph2; s = format (s, "\n%UTX Queue %u:", format_white_space, indent, tx_queue->queue_id); @@ -141,22 +151,40 @@ format_af_packet_device (u8 * s, va_list * args) format_white_space, indent + 2, tx_block_sz, tx_block_nr, tx_frame_sz, tx_frame_nr); s = format (s, " next frame:%d", tx_queue->next_tx_frame); - - do - { - tph = (tpacket3_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz); - tx_frame = (tx_frame + 1) % tx_frame_nr; - if (tph->tp_status == 0) - n_avail++; - else if (tph->tp_status & TP_STATUS_SEND_REQUEST) - n_send_req++; - else if (tph->tp_status & TP_STATUS_SENDING) - n_sending++; - else - n_wrong++; - n_tot++; - } - while (tx_frame != tx_queue->next_tx_frame); + if (apif->version & TPACKET_V3) + do + { + tph3 = + (tpacket3_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz); + tx_frame = (tx_frame + 1) % tx_frame_nr; + if (tph3->tp_status == 0) + n_avail++; + else if (tph3->tp_status & TP_STATUS_SEND_REQUEST) + n_send_req++; + else if (tph3->tp_status & TP_STATUS_SENDING) + n_sending++; + else + n_wrong++; + n_tot++; + } + while (tx_frame != tx_queue->next_tx_frame); + else + do + { + tph2 = + (tpacket2_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz); + tx_frame = (tx_frame + 1) % tx_frame_nr; + if (tph2->tp_status == 0) + n_avail++; + else if (tph2->tp_status & TP_STATUS_SEND_REQUEST) + n_send_req++; + else if (tph2->tp_status & TP_STATUS_SENDING) + n_sending++; + else + n_wrong++; + n_tot++; + } + while (tx_frame != tx_queue->next_tx_frame); s = format (s, "\n%Uavailable:%d request:%d sending:%d wrong:%d total:%d", format_white_space, indent + 2, n_avail, n_send_req, n_sending, @@ -177,24 +205,46 @@ format_af_packet_tx_trace (u8 *s, va_list *va) s = format (s, "af_packet: hw_if_index %u tx-queue %u", t->hw_if_index, t->queue_id); - s = - format (s, - "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u" - "\n%Usec 0x%x nsec 0x%x vlan %U" + if (t->is_v2) + { + s = format ( + s, + "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u" + "\n%Usec 0x%x nsec 0x%x vlan %U" #ifdef TP_STATUS_VLAN_TPID_VALID - " vlan_tpid %u" + " vlan_tpid %u" #endif - , - format_white_space, indent + 2, format_white_space, indent + 4, - t->tph.tp_status, t->tph.tp_len, t->tph.tp_snaplen, t->tph.tp_mac, - t->tph.tp_net, format_white_space, indent + 4, t->tph.tp_sec, - t->tph.tp_nsec, format_ethernet_vlan_tci, t->tph.hv1.tp_vlan_tci + , + format_white_space, indent + 2, format_white_space, indent + 4, + t->tph2.tp_status, t->tph2.tp_len, t->tph2.tp_snaplen, t->tph2.tp_mac, + t->tph2.tp_net, format_white_space, indent + 4, t->tph2.tp_sec, + t->tph2.tp_nsec, format_ethernet_vlan_tci, t->tph2.tp_vlan_tci #ifdef TP_STATUS_VLAN_TPID_VALID - , - t->tph.hv1.tp_vlan_tpid + , + t->tph2.tp_vlan_tpid #endif - ); - + ); + } + else + { + s = format ( + s, + "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u" + "\n%Usec 0x%x nsec 0x%x vlan %U" +#ifdef TP_STATUS_VLAN_TPID_VALID + " vlan_tpid %u" +#endif + , + format_white_space, indent + 2, format_white_space, indent + 4, + t->tph3.tp_status, t->tph3.tp_len, t->tph3.tp_snaplen, t->tph3.tp_mac, + t->tph3.tp_net, format_white_space, indent + 4, t->tph3.tp_sec, + t->tph3.tp_nsec, format_ethernet_vlan_tci, t->tph3.hv1.tp_vlan_tci +#ifdef TP_STATUS_VLAN_TPID_VALID + , + t->tph3.hv1.tp_vlan_tpid +#endif + ); + } s = format (s, "\n%Uvnet-hdr:\n%Uflags 0x%02x gso_type 0x%02x hdr_len %u" "\n%Ugso_size %u csum_start %u csum_offset %u", @@ -214,17 +264,23 @@ format_af_packet_tx_trace (u8 *s, va_list *va) static void af_packet_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_buffer_t *b0, u32 bi, tpacket3_hdr_t *tph, + vlib_buffer_t *b0, u32 bi, void *tph, vnet_virtio_net_hdr_t *vnet_hdr, u32 hw_if_index, - u16 queue_id) + u16 queue_id, u8 is_v2) { af_packet_tx_trace_t *t; t = vlib_add_trace (vm, node, b0, sizeof (t[0])); t->hw_if_index = hw_if_index; t->queue_id = queue_id; t->buffer_index = bi; + t->is_v2 = is_v2; - clib_memcpy_fast (&t->tph, tph, sizeof (*tph)); + if (is_v2) + clib_memcpy_fast (&t->tph2, (tpacket2_hdr_t *) tph, + sizeof (tpacket2_hdr_t)); + else + clib_memcpy_fast (&t->tph3, (tpacket3_hdr_t *) tph, + sizeof (tpacket3_hdr_t)); clib_memcpy_fast (&t->vnet_hdr, vnet_hdr, sizeof (*vnet_hdr)); clib_memcpy_fast (&t->buffer, b0, sizeof (*b0) - sizeof (b0->pre_data)); clib_memcpy_fast (t->buffer.pre_data, vlib_buffer_get_current (b0), @@ -337,95 +393,177 @@ VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm, af_packet_queue_t *tx_queue = vec_elt_at_index (apif->tx_queues, queue_id); u32 block = 0, frame_size = 0, frame_num = 0, tx_frame = 0; u8 *block_start = 0; - tpacket3_hdr_t *tph = 0; + tpacket3_hdr_t *tph3 = 0; + tpacket2_hdr_t *tph2 = 0; u32 frame_not_ready = 0; u8 is_cksum_gso_enabled = (apif->is_cksum_gso_enabled == 1) ? 1 : 0; + u32 tpacket_align = 0; + u8 is_v2 = (apif->version == TPACKET_V2) ? 1 : 0; if (tf->shared_queue) clib_spinlock_lock (&tx_queue->lockp); - frame_size = tx_queue->tx_req->tp_frame_size; - frame_num = tx_queue->tx_req->tp_frame_nr; + frame_size = tx_queue->tx_req->req.tp_frame_size; + frame_num = tx_queue->tx_req->req.tp_frame_nr; block_start = tx_queue->tx_ring[block]; tx_frame = tx_queue->next_tx_frame; - - while (n_left) + if (is_v2) { - u32 len; - vnet_virtio_net_hdr_t *vnet_hdr = 0; - u32 offset = 0; - vlib_buffer_t *b0 = 0, *b0_first = 0; - u32 bi, bi_first; - - bi = bi_first = buffers[0]; - n_left--; - buffers++; - - tph = (tpacket3_hdr_t *) (block_start + tx_frame * frame_size); - if (PREDICT_FALSE (tph->tp_status & - (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING))) + tpacket_align = TPACKET_ALIGN (sizeof (tpacket2_hdr_t)); + while (n_left) { - frame_not_ready++; - goto next; - } + u32 len; + vnet_virtio_net_hdr_t *vnet_hdr = 0; + u32 offset = 0; + vlib_buffer_t *b0 = 0, *b0_first = 0; + u32 bi, bi_first; + + bi = bi_first = buffers[0]; + n_left--; + buffers++; + + tph2 = (tpacket2_hdr_t *) (block_start + tx_frame * frame_size); + if (PREDICT_FALSE (tph2->tp_status & + (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING))) + { + frame_not_ready++; + goto nextv2; + } - b0_first = b0 = vlib_get_buffer (vm, bi); + b0_first = b0 = vlib_get_buffer (vm, bi); - if (PREDICT_TRUE (is_cksum_gso_enabled)) - { - vnet_hdr = - (vnet_virtio_net_hdr_t *) ((u8 *) tph + TPACKET_ALIGN (sizeof ( - tpacket3_hdr_t))); + if (PREDICT_TRUE (is_cksum_gso_enabled)) + { + vnet_hdr = + (vnet_virtio_net_hdr_t *) ((u8 *) tph2 + tpacket_align); - clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t)); - offset = sizeof (vnet_virtio_net_hdr_t); + clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t)); + offset = sizeof (vnet_virtio_net_hdr_t); - if (b0->flags & VNET_BUFFER_F_GSO) - fill_gso_offload (b0, vnet_hdr); - else if (b0->flags & VNET_BUFFER_F_OFFLOAD) - fill_cksum_offload (b0, vnet_hdr); - } - - len = b0->current_length; - clib_memcpy_fast ((u8 *) tph + TPACKET_ALIGN (sizeof (tpacket3_hdr_t)) + - offset, - vlib_buffer_get_current (b0), len); - offset += len; + if (b0->flags & VNET_BUFFER_F_GSO) + fill_gso_offload (b0, vnet_hdr); + else if (b0->flags & VNET_BUFFER_F_OFFLOAD) + fill_cksum_offload (b0, vnet_hdr); + } - while (b0->flags & VLIB_BUFFER_NEXT_PRESENT) - { - b0 = vlib_get_buffer (vm, b0->next_buffer); len = b0->current_length; - clib_memcpy_fast ((u8 *) tph + - TPACKET_ALIGN (sizeof (tpacket3_hdr_t)) + offset, + clib_memcpy_fast ((u8 *) tph2 + tpacket_align + offset, vlib_buffer_get_current (b0), len); offset += len; - } - tph->tp_len = tph->tp_snaplen = offset; - tph->tp_status = TP_STATUS_SEND_REQUEST; - n_sent++; + while (b0->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b0 = vlib_get_buffer (vm, b0->next_buffer); + len = b0->current_length; + clib_memcpy_fast ((u8 *) tph2 + tpacket_align + offset, + vlib_buffer_get_current (b0), len); + offset += len; + } + + tph2->tp_len = tph2->tp_snaplen = offset; + tph2->tp_status = TP_STATUS_SEND_REQUEST; + n_sent++; - if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED)) + if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED)) + { + if (PREDICT_TRUE (is_cksum_gso_enabled)) + af_packet_tx_trace (vm, node, b0_first, bi_first, tph2, + vnet_hdr, apif->hw_if_index, queue_id, 1); + else + { + vnet_virtio_net_hdr_t vnet_hdr2 = {}; + af_packet_tx_trace (vm, node, b0_first, bi_first, tph2, + &vnet_hdr2, apif->hw_if_index, queue_id, + 1); + } + } + tx_frame = (tx_frame + 1) % frame_num; + + nextv2: + /* check if we've exhausted the ring */ + if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num)) + break; + } + } + else + { + tpacket_align = TPACKET_ALIGN (sizeof (tpacket3_hdr_t)); + + while (n_left) { + u32 len; + vnet_virtio_net_hdr_t *vnet_hdr = 0; + u32 offset = 0; + vlib_buffer_t *b0 = 0, *b0_first = 0; + u32 bi, bi_first; + + bi = bi_first = buffers[0]; + n_left--; + buffers++; + + tph3 = (tpacket3_hdr_t *) (block_start + tx_frame * frame_size); + if (PREDICT_FALSE (tph3->tp_status & + (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING))) + { + frame_not_ready++; + goto nextv3; + } + + b0_first = b0 = vlib_get_buffer (vm, bi); + if (PREDICT_TRUE (is_cksum_gso_enabled)) - af_packet_tx_trace (vm, node, b0_first, bi_first, tph, vnet_hdr, - apif->hw_if_index, queue_id); - else { - vnet_virtio_net_hdr_t vnet_hdr2 = {}; - af_packet_tx_trace (vm, node, b0_first, bi_first, tph, - &vnet_hdr2, apif->hw_if_index, queue_id); + vnet_hdr = + (vnet_virtio_net_hdr_t *) ((u8 *) tph3 + tpacket_align); + + clib_memset_u8 (vnet_hdr, 0, sizeof (vnet_virtio_net_hdr_t)); + offset = sizeof (vnet_virtio_net_hdr_t); + + if (b0->flags & VNET_BUFFER_F_GSO) + fill_gso_offload (b0, vnet_hdr); + else if (b0->flags & VNET_BUFFER_F_OFFLOAD) + fill_cksum_offload (b0, vnet_hdr); } - } - tx_frame = (tx_frame + 1) % frame_num; - next: - /* check if we've exhausted the ring */ - if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num)) - break; - } + len = b0->current_length; + clib_memcpy_fast ((u8 *) tph3 + tpacket_align + offset, + vlib_buffer_get_current (b0), len); + offset += len; + + while (b0->flags & VLIB_BUFFER_NEXT_PRESENT) + { + b0 = vlib_get_buffer (vm, b0->next_buffer); + len = b0->current_length; + clib_memcpy_fast ((u8 *) tph3 + tpacket_align + offset, + vlib_buffer_get_current (b0), len); + offset += len; + } + + tph3->tp_len = tph3->tp_snaplen = offset; + tph3->tp_status = TP_STATUS_SEND_REQUEST; + n_sent++; + if (PREDICT_FALSE (b0_first->flags & VLIB_BUFFER_IS_TRACED)) + { + if (PREDICT_TRUE (is_cksum_gso_enabled)) + af_packet_tx_trace (vm, node, b0_first, bi_first, tph3, + vnet_hdr, apif->hw_if_index, queue_id, 0); + else + { + vnet_virtio_net_hdr_t vnet_hdr2 = {}; + af_packet_tx_trace (vm, node, b0_first, bi_first, tph3, + &vnet_hdr2, apif->hw_if_index, queue_id, + 0); + } + } + tx_frame = (tx_frame + 1) % frame_num; + + nextv3: + /* check if we've exhausted the ring */ + if (PREDICT_FALSE (frame_not_ready + n_sent == frame_num)) + break; + } + } CLIB_MEMORY_BARRIER (); if (PREDICT_TRUE (n_sent || tx_queue->is_tx_pending)) diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c index 8c72afb2456..724924f5f4b 100644 --- a/src/vnet/devices/af_packet/node.c +++ b/src/vnet/devices/af_packet/node.c @@ -57,8 +57,13 @@ typedef struct u32 pkt_num; void *block_start; block_desc_t bd; - tpacket3_hdr_t tph; + union + { + tpacket3_hdr_t tph3; + tpacket2_hdr_t tph2; + }; vnet_virtio_net_hdr_t vnet_hdr; + u8 is_v3; } af_packet_input_trace_t; static u8 * @@ -72,27 +77,51 @@ format_af_packet_input_trace (u8 * s, va_list * args) s = format (s, "af_packet: hw_if_index %d rx-queue %u next-index %d", t->hw_if_index, t->queue_id, t->next_index); - s = format ( - s, "\n%Ublock %u:\n%Uaddress %p version %u seq_num %lu pkt_num %u", - format_white_space, indent + 2, t->block, format_white_space, indent + 4, - t->block_start, t->bd.version, t->bd.hdr.bh1.seq_num, t->pkt_num); - s = - format (s, - "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u" - "\n%Usec 0x%x nsec 0x%x vlan %U" + if (t->is_v3) + { + s = format ( + s, "\n%Ublock %u:\n%Uaddress %p version %u seq_num %lu pkt_num %u", + format_white_space, indent + 2, t->block, format_white_space, + indent + 4, t->block_start, t->bd.version, t->bd.hdr.bh1.seq_num, + t->pkt_num); + s = format ( + s, + "\n%Utpacket3_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u" + "\n%Usec 0x%x nsec 0x%x vlan %U" #ifdef TP_STATUS_VLAN_TPID_VALID - " vlan_tpid %u" + " vlan_tpid %u" #endif - , - format_white_space, indent + 2, format_white_space, indent + 4, - t->tph.tp_status, t->tph.tp_len, t->tph.tp_snaplen, t->tph.tp_mac, - t->tph.tp_net, format_white_space, indent + 4, t->tph.tp_sec, - t->tph.tp_nsec, format_ethernet_vlan_tci, t->tph.hv1.tp_vlan_tci + , + format_white_space, indent + 2, format_white_space, indent + 4, + t->tph3.tp_status, t->tph3.tp_len, t->tph3.tp_snaplen, t->tph3.tp_mac, + t->tph3.tp_net, format_white_space, indent + 4, t->tph3.tp_sec, + t->tph3.tp_nsec, format_ethernet_vlan_tci, t->tph3.hv1.tp_vlan_tci #ifdef TP_STATUS_VLAN_TPID_VALID - , - t->tph.hv1.tp_vlan_tpid + , + t->tph3.hv1.tp_vlan_tpid #endif - ); + ); + } + else + { + s = format ( + s, + "\n%Utpacket2_hdr:\n%Ustatus 0x%x len %u snaplen %u mac %u net %u" + "\n%Usec 0x%x nsec 0x%x vlan %U" +#ifdef TP_STATUS_VLAN_TPID_VALID + " vlan_tpid %u" +#endif + , + format_white_space, indent + 2, format_white_space, indent + 4, + t->tph2.tp_status, t->tph2.tp_len, t->tph2.tp_snaplen, t->tph2.tp_mac, + t->tph2.tp_net, format_white_space, indent + 4, t->tph2.tp_sec, + t->tph2.tp_nsec, format_ethernet_vlan_tci, t->tph2.tp_vlan_tci +#ifdef TP_STATUS_VLAN_TPID_VALID + , + t->tph2.tp_vlan_tpid +#endif + ); + } s = format (s, "\n%Uvnet-hdr:\n%Uflags 0x%02x gso_type 0x%02x hdr_len %u" @@ -222,9 +251,9 @@ fill_cksum_offload (vlib_buffer_t *b, u8 *l4_hdr_sz, u8 is_ip) } always_inline uword -af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, - vlib_frame_t *frame, af_packet_if_t *apif, - u16 queue_id, u8 is_cksum_gso_enabled) +af_packet_v3_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, af_packet_if_t *apif, + u16 queue_id, u8 is_cksum_gso_enabled) { af_packet_main_t *apm = &af_packet_main; af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, queue_id); @@ -237,12 +266,12 @@ af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, u32 total = 0; u32 *to_next = 0; u32 block = rx_queue->next_rx_block; - u32 block_nr = rx_queue->rx_req->tp_block_nr; + u32 block_nr = rx_queue->rx_req->req3.tp_block_nr; u8 *block_start = 0; uword n_trace = vlib_get_trace_count (vm, node); u32 thread_index = vm->thread_index; u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); - u32 min_bufs = rx_queue->rx_req->tp_frame_size / n_buffer_bytes; + u32 min_bufs = rx_queue->rx_req->req3.tp_frame_size / n_buffer_bytes; u32 num_pkts = 0; u32 rx_frame_offset = 0; block_desc_t *bd = 0; @@ -458,6 +487,7 @@ af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, af_packet_input_trace_t *tr; vlib_set_trace_count (vm, node, --n_trace); tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); + tr->is_v3 = 1; tr->next_index = next0; tr->hw_if_index = apif->hw_if_index; tr->queue_id = queue_id; @@ -465,7 +495,7 @@ af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, tr->block_start = bd; tr->pkt_num = bd->hdr.bh1.num_pkts - num_pkts; clib_memcpy_fast (&tr->bd, bd, sizeof (block_desc_t)); - clib_memcpy_fast (&tr->tph, tph, sizeof (tpacket3_hdr_t)); + clib_memcpy_fast (&tr->tph3, tph, sizeof (tpacket3_hdr_t)); if (is_cksum_gso_enabled) clib_memcpy_fast (&tr->vnet_hdr, vnet_hdr, sizeof (vnet_virtio_net_hdr_t)); @@ -528,6 +558,247 @@ done: return n_rx_packets; } +always_inline uword +af_packet_v2_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, af_packet_if_t *apif, + u16 queue_id, u8 is_cksum_gso_enabled) +{ + af_packet_main_t *apm = &af_packet_main; + af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, queue_id); + tpacket2_hdr_t *tph; + u32 next_index; + u32 block = 0; + u32 rx_frame; + u32 n_free_bufs; + u32 n_rx_packets = 0; + u32 n_rx_bytes = 0; + u32 *to_next = 0; + u32 frame_size = rx_queue->rx_req->req.tp_frame_size; + u32 frame_num = rx_queue->rx_req->req.tp_frame_nr; + u8 *block_start = rx_queue->rx_ring[block]; + uword n_trace = vlib_get_trace_count (vm, node); + u32 thread_index = vm->thread_index; + u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); + u32 min_bufs = rx_queue->rx_req->req.tp_frame_size / n_buffer_bytes; + u8 is_ip = (apif->mode == AF_PACKET_IF_MODE_IP); + vlib_buffer_t bt = {}; + + if (is_ip) + { + next_index = VNET_DEVICE_INPUT_NEXT_IP4_INPUT; + } + else + { + next_index = VNET_DEVICE_INPUT_NEXT_ETHERNET_INPUT; + if (PREDICT_FALSE (apif->per_interface_next_index != ~0)) + next_index = apif->per_interface_next_index; + + /* redirect if feature path enabled */ + vnet_feature_start_device_input_x1 (apif->sw_if_index, &next_index, &bt); + } + + n_free_bufs = vec_len (apm->rx_buffers[thread_index]); + if (PREDICT_FALSE (n_free_bufs < VLIB_FRAME_SIZE)) + { + vec_validate (apm->rx_buffers[thread_index], + VLIB_FRAME_SIZE + n_free_bufs - 1); + n_free_bufs += vlib_buffer_alloc ( + vm, &apm->rx_buffers[thread_index][n_free_bufs], VLIB_FRAME_SIZE); + vec_set_len (apm->rx_buffers[thread_index], n_free_bufs); + } + + rx_frame = rx_queue->next_rx_frame; + tph = (tpacket2_hdr_t *) (block_start + rx_frame * frame_size); + while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs)) + { + vlib_buffer_t *b0 = 0, *first_b0 = 0, *prev_b0 = 0; + u32 next0 = next_index; + + u32 n_left_to_next; + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + while ((tph->tp_status & TP_STATUS_USER) && (n_free_bufs > min_bufs) && + n_left_to_next) + { + vnet_virtio_net_hdr_t *vnet_hdr = 0; + u32 data_len = tph->tp_snaplen; + u32 offset = 0; + u32 bi0 = 0, first_bi0 = 0; + u8 l4_hdr_sz = 0; + + if (is_cksum_gso_enabled) + vnet_hdr = + (vnet_virtio_net_hdr_t *) ((u8 *) tph + tph->tp_mac - + sizeof (vnet_virtio_net_hdr_t)); + while (data_len) + { + /* grab free buffer */ + u32 last_empty_buffer = + vec_len (apm->rx_buffers[thread_index]) - 1; + bi0 = apm->rx_buffers[thread_index][last_empty_buffer]; + b0 = vlib_get_buffer (vm, bi0); + vec_set_len (apm->rx_buffers[thread_index], last_empty_buffer); + n_free_bufs--; + + /* copy data */ + u32 bytes_to_copy = + data_len > n_buffer_bytes ? n_buffer_bytes : data_len; + u32 vlan_len = 0; + u32 bytes_copied = 0; + b0->current_data = 0; + /* Kernel removes VLAN headers, so reconstruct VLAN */ + if (PREDICT_FALSE (tph->tp_status & TP_STATUS_VLAN_VALID)) + { + if (PREDICT_TRUE (offset == 0)) + { + clib_memcpy_fast (vlib_buffer_get_current (b0), + (u8 *) tph + tph->tp_mac, + sizeof (ethernet_header_t)); + ethernet_header_t *eth = vlib_buffer_get_current (b0); + ethernet_vlan_header_t *vlan = + (ethernet_vlan_header_t *) (eth + 1); + vlan->priority_cfi_and_id = + clib_host_to_net_u16 (tph->tp_vlan_tci); + vlan->type = eth->type; + eth->type = clib_host_to_net_u16 (ETHERNET_TYPE_VLAN); + vlan_len = sizeof (ethernet_vlan_header_t); + bytes_copied = sizeof (ethernet_header_t); + } + } + clib_memcpy_fast (((u8 *) vlib_buffer_get_current (b0)) + + bytes_copied + vlan_len, + (u8 *) tph + tph->tp_mac + offset + + bytes_copied, + (bytes_to_copy - bytes_copied)); + + /* fill buffer header */ + b0->current_length = bytes_to_copy + vlan_len; + + if (offset == 0) + { + b0->total_length_not_including_first_buffer = 0; + b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = apif->sw_if_index; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~0; + first_bi0 = bi0; + first_b0 = vlib_get_buffer (vm, first_bi0); + + if (is_cksum_gso_enabled) + { + if (vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) + fill_cksum_offload (first_b0, &l4_hdr_sz, is_ip); + if (vnet_hdr->gso_type & (VIRTIO_NET_HDR_GSO_TCPV4 | + VIRTIO_NET_HDR_GSO_TCPV6)) + fill_gso_offload (first_b0, vnet_hdr->gso_size, + l4_hdr_sz); + } + } + else + buffer_add_to_chain (b0, first_b0, prev_b0, bi0); + + prev_b0 = b0; + offset += bytes_to_copy; + data_len -= bytes_to_copy; + } + n_rx_packets++; + n_rx_bytes += tph->tp_snaplen; + to_next[0] = first_bi0; + to_next += 1; + n_left_to_next--; + + /* drop partial packets */ + if (PREDICT_FALSE (tph->tp_len != tph->tp_snaplen)) + { + next0 = VNET_DEVICE_INPUT_NEXT_DROP; + first_b0->error = + node->errors[AF_PACKET_INPUT_ERROR_PARTIAL_PKT]; + } + else + { + if (PREDICT_FALSE (is_ip)) + { + switch (first_b0->data[0] & 0xf0) + { + case 0x40: + next0 = VNET_DEVICE_INPUT_NEXT_IP4_INPUT; + break; + case 0x60: + next0 = VNET_DEVICE_INPUT_NEXT_IP6_INPUT; + break; + default: + next0 = VNET_DEVICE_INPUT_NEXT_DROP; + break; + } + if (PREDICT_FALSE (apif->per_interface_next_index != ~0)) + next0 = apif->per_interface_next_index; + } + else + { + /* copy feature arc data from template */ + first_b0->current_config_index = bt.current_config_index; + vnet_buffer (first_b0)->feature_arc_index = + vnet_buffer (&bt)->feature_arc_index; + } + } + + /* trace */ + if (PREDICT_FALSE (n_trace > 0 && + vlib_trace_buffer (vm, node, next0, first_b0, + /* follow_chain */ 0))) + { + af_packet_input_trace_t *tr; + vlib_set_trace_count (vm, node, --n_trace); + tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); + tr->is_v3 = 0; + tr->next_index = next0; + tr->hw_if_index = apif->hw_if_index; + tr->queue_id = queue_id; + clib_memcpy_fast (&tr->tph2, tph, sizeof (struct tpacket2_hdr)); + if (is_cksum_gso_enabled) + clib_memcpy_fast (&tr->vnet_hdr, vnet_hdr, + sizeof (vnet_virtio_net_hdr_t)); + else + clib_memset_u8 (&tr->vnet_hdr, 0, + sizeof (vnet_virtio_net_hdr_t)); + } + + /* enque and take next packet */ + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, first_bi0, next0); + + /* next packet */ + tph->tp_status = TP_STATUS_KERNEL; + rx_frame = (rx_frame + 1) % frame_num; + tph = (struct tpacket2_hdr *) (block_start + rx_frame * frame_size); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + rx_queue->next_rx_frame = rx_frame; + + vlib_increment_combined_counter ( + vnet_get_main ()->interface_main.combined_sw_if_counters + + VNET_INTERFACE_COUNTER_RX, + vlib_get_thread_index (), apif->hw_if_index, n_rx_packets, n_rx_bytes); + + vnet_device_increment_rx_packets (thread_index, n_rx_packets); + return n_rx_packets; +} + +always_inline uword +af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, af_packet_if_t *apif, + u16 queue_id, u8 is_cksum_gso_enabled) + +{ + if (apif->version == TPACKET_V3) + return af_packet_v3_device_input_fn (vm, node, frame, apif, queue_id, + is_cksum_gso_enabled); + else + return af_packet_v2_device_input_fn (vm, node, frame, apif, queue_id, + is_cksum_gso_enabled); +} + VLIB_NODE_FN (af_packet_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) |