diff options
author | Mohsin Kazmi <sykazmi@cisco.com> | 2022-03-25 14:27:45 +0000 |
---|---|---|
committer | Damjan Marion <dmarion@me.com> | 2022-04-04 14:22:36 +0000 |
commit | 5a7aa51f00d562814204aca7831777651a00869f (patch) | |
tree | 33539381ecf5de1ff1f5f82e85dc8e415589f147 /src/vnet/devices/af_packet | |
parent | 3414977152ae6362277158dc732e6b9958a6e618 (diff) |
devices: add multi-queue support for af-packet
Type: feature
Change-Id: I0f4e6517fcfa07ffb0aba89b159ac1337937a508
Signed-off-by: Mohsin Kazmi <sykazmi@cisco.com>
Diffstat (limited to 'src/vnet/devices/af_packet')
-rw-r--r-- | src/vnet/devices/af_packet/af_packet.c | 532 | ||||
-rw-r--r-- | src/vnet/devices/af_packet/af_packet.h | 71 | ||||
-rw-r--r-- | src/vnet/devices/af_packet/af_packet_api.c | 9 | ||||
-rw-r--r-- | src/vnet/devices/af_packet/cli.c | 12 | ||||
-rw-r--r-- | src/vnet/devices/af_packet/device.c | 149 | ||||
-rw-r--r-- | src/vnet/devices/af_packet/node.c | 68 |
6 files changed, 571 insertions, 270 deletions
diff --git a/src/vnet/devices/af_packet/af_packet.c b/src/vnet/devices/af_packet/af_packet.c index 5526d72e7a5..ac6fc05a627 100644 --- a/src/vnet/devices/af_packet/af_packet.c +++ b/src/vnet/devices/af_packet/af_packet.c @@ -33,6 +33,7 @@ #include <vnet/devices/netlink.h> #include <vnet/ethernet/ethernet.h> #include <vnet/interface/rx_queue_funcs.h> +#include <vnet/interface/tx_queue_funcs.h> #include <vnet/devices/af_packet/af_packet.h> @@ -47,9 +48,9 @@ VNET_HW_INTERFACE_CLASS (af_packet_ip_device_hw_interface_class, static) = { #define AF_PACKET_DEFAULT_TX_FRAME_SIZE (2048 * 33) // GSO packet of 64KB #define AF_PACKET_TX_BLOCK_NR 1 -#define AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK 256 +#define AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK 32 #define AF_PACKET_DEFAULT_RX_FRAME_SIZE 2048 -#define AF_PACKET_RX_BLOCK_NR 20 +#define AF_PACKET_RX_BLOCK_NR 160 /*defined in net/if.h but clashes with dpdk headers */ unsigned int if_nametoindex (const char *ifname); @@ -98,13 +99,10 @@ af_packet_read_mtu (af_packet_if_t *apif) static clib_error_t * af_packet_fd_read_ready (clib_file_t * uf) { - af_packet_main_t *apm = &af_packet_main; vnet_main_t *vnm = vnet_get_main (); - u32 idx = uf->private_data; - af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, idx); /* Schedule the rx node */ - vnet_hw_if_rx_queue_set_int_pending (vnm, apif->queue_index); + vnet_hw_if_rx_queue_set_int_pending (vnm, uf->private_data); return 0; } @@ -127,10 +125,74 @@ is_bridge (const u8 * host_if_name) return -1; } +static void +af_packet_set_rx_queues (vlib_main_t *vm, af_packet_if_t *apif) +{ + vnet_main_t *vnm = vnet_get_main (); + af_packet_queue_t *rx_queue; + + vnet_hw_if_set_input_node (vnm, apif->hw_if_index, + af_packet_input_node.index); + + vec_foreach (rx_queue, apif->rx_queues) + { + rx_queue->queue_index = vnet_hw_if_register_rx_queue ( + vnm, apif->hw_if_index, rx_queue->queue_id, VNET_HW_IF_RXQ_THREAD_ANY); + + { + clib_file_t template = { 0 }; + template.read_function = af_packet_fd_read_ready; + template.file_descriptor = rx_queue->fd; + template.private_data = rx_queue->queue_index; + template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED; + template.description = + format (0, "%U queue %u", format_af_packet_device_name, + apif->dev_instance, rx_queue->queue_id); + rx_queue->clib_file_index = clib_file_add (&file_main, &template); + } + vnet_hw_if_set_rx_queue_file_index (vnm, rx_queue->queue_index, + rx_queue->clib_file_index); + vnet_hw_if_set_rx_queue_mode (vnm, rx_queue->queue_index, + VNET_HW_IF_RX_MODE_INTERRUPT); + rx_queue->mode = VNET_HW_IF_RX_MODE_INTERRUPT; + } + vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index); +} + +static void +af_packet_set_tx_queues (vlib_main_t *vm, af_packet_if_t *apif) +{ + vnet_main_t *vnm = vnet_get_main (); + af_packet_main_t *apm = &af_packet_main; + af_packet_queue_t *tx_queue; + + vec_foreach (tx_queue, apif->tx_queues) + { + tx_queue->queue_index = vnet_hw_if_register_tx_queue ( + vnm, apif->hw_if_index, tx_queue->queue_id); + } + + if (apif->num_txqs == 0) + { + vlib_log_err (apm->log_class, "Interface %U has 0 txq", + format_vnet_hw_if_index_name, vnm, apif->hw_if_index); + return; + } + + for (u32 j = 0; j < vlib_get_n_threads (); j++) + { + u32 qi = apif->tx_queues[j % apif->num_txqs].queue_index; + vnet_hw_if_tx_queue_assign_thread (vnm, qi, j); + } + + vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index); +} + static int create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req, - tpacket_req3_t *tx_req, int *fd, u8 **ring, - u32 *hdrlen_ptr, u8 *is_cksum_gso_enabled) + tpacket_req3_t *tx_req, int *fd, af_packet_ring_t *ring, + u32 *hdrlen_ptr, u8 *is_cksum_gso_enabled, + u32 fanout_id, u8 is_fanout) { af_packet_main_t *apm = &af_packet_main; struct sockaddr_ll sll; @@ -139,14 +201,19 @@ create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req, int ver = TPACKET_V3; u32 hdrlen = 0; u32 len = sizeof (hdrlen); - u32 ring_sz = rx_req->tp_block_size * rx_req->tp_block_nr + - tx_req->tp_block_size * tx_req->tp_block_nr; + u32 ring_sz = 0; + + if (rx_req) + ring_sz += rx_req->tp_block_size * rx_req->tp_block_nr; + + if (tx_req) + ring_sz += tx_req->tp_block_size * tx_req->tp_block_nr; if ((*fd = socket (AF_PACKET, SOCK_RAW, htons (ETH_P_ALL))) < 0) { - vlib_log_debug (apm->log_class, - "Failed to create AF_PACKET socket: %s (errno %d)", - strerror (errno), errno); + vlib_log_err (apm->log_class, + "Failed to create AF_PACKET socket: %s (errno %d)", + strerror (errno), errno); ret = VNET_API_ERROR_SYSCALL_ERROR_1; goto error; } @@ -158,26 +225,25 @@ create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req, sll.sll_ifindex = host_if_index; if (bind (*fd, (struct sockaddr *) &sll, sizeof (sll)) < 0) { - vlib_log_debug (apm->log_class, - "Failed to bind rx packet socket: %s (errno %d)", - strerror (errno), errno); + vlib_log_err (apm->log_class, + "Failed to bind rx packet socket: %s (errno %d)", + strerror (errno), errno); ret = VNET_API_ERROR_SYSCALL_ERROR_1; goto error; } if (setsockopt (*fd, SOL_PACKET, PACKET_VERSION, &ver, sizeof (ver)) < 0) { - vlib_log_debug ( - apm->log_class, - "Failed to set rx packet interface version: %s (errno %d)", - strerror (errno), errno); + vlib_log_err (apm->log_class, + "Failed to set rx packet interface version: %s (errno %d)", + strerror (errno), errno); ret = VNET_API_ERROR_SYSCALL_ERROR_1; goto error; } if (getsockopt (*fd, SOL_PACKET, PACKET_HDRLEN, &hdrlen, &len) < 0) { - vlib_log_debug ( + vlib_log_err ( apm->log_class, "Failed to get packet hdr len error handling option: %s (errno %d)", strerror (errno), errno); @@ -190,7 +256,7 @@ create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req, int opt = 1; if (setsockopt (*fd, SOL_PACKET, PACKET_LOSS, &opt, sizeof (opt)) < 0) { - vlib_log_debug ( + vlib_log_err ( apm->log_class, "Failed to set packet tx ring error handling option: %s (errno %d)", strerror (errno), errno); @@ -221,34 +287,52 @@ create_packet_v3_sock (int host_if_index, tpacket_req3_t *rx_req, } #endif - if (setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz) < 0) + if (is_fanout) { - vlib_log_debug (apm->log_class, - "Failed to set packet rx ring options: %s (errno %d)", - strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; + int fanout = ((fanout_id & 0xffff) | ((PACKET_FANOUT_HASH) << 16)); + if (setsockopt (*fd, SOL_PACKET, PACKET_FANOUT, &fanout, + sizeof (fanout)) < 0) + { + vlib_log_err (apm->log_class, + "Failed to set fanout options: %s (errno %d)", + strerror (errno), errno); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } } - if (setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz) < 0) - { - vlib_log_debug (apm->log_class, + if (rx_req) + if (setsockopt (*fd, SOL_PACKET, PACKET_RX_RING, rx_req, req_sz) < 0) + { + vlib_log_err (apm->log_class, + "Failed to set packet rx ring options: %s (errno %d)", + strerror (errno), errno); + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } + + if (tx_req) + if (setsockopt (*fd, SOL_PACKET, PACKET_TX_RING, tx_req, req_sz) < 0) + { + vlib_log_err (apm->log_class, "Failed to set packet tx ring options: %s (errno %d)", strerror (errno), errno); - ret = VNET_API_ERROR_SYSCALL_ERROR_1; - goto error; - } + ret = VNET_API_ERROR_SYSCALL_ERROR_1; + goto error; + } - *ring = mmap (NULL, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, - *fd, 0); - if (*ring == MAP_FAILED) + ring->ring_start_addr = mmap (NULL, ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_LOCKED, *fd, 0); + if (ring->ring_start_addr == MAP_FAILED) { - vlib_log_debug (apm->log_class, "mmap failure: %s (errno %d)", - strerror (errno), errno); + vlib_log_err (apm->log_class, "mmap failure: %s (errno %d)", + strerror (errno), errno); ret = VNET_API_ERROR_SYSCALL_ERROR_1; goto error; } + ring->ring_size = ring_sz; + return 0; error: if (*fd >= 0) @@ -260,30 +344,192 @@ error: } int -af_packet_create_if (af_packet_create_if_arg_t *arg) +af_packet_queue_init (vlib_main_t *vm, af_packet_if_t *apif, + af_packet_create_if_arg_t *arg, + af_packet_queue_t *rx_queue, af_packet_queue_t *tx_queue, + u8 queue_id, u8 is_fanout) { af_packet_main_t *apm = &af_packet_main; - vlib_main_t *vm = vlib_get_main (); - int ret, fd = -1, fd2 = -1; tpacket_req3_t *rx_req = 0; tpacket_req3_t *tx_req = 0; + int ret, fd = -1; + af_packet_ring_t ring = { 0 }; + u8 *ring_addr = 0; + u32 rx_frames_per_block, tx_frames_per_block; + u32 rx_frame_size, tx_frame_size; + u32 hdrlen = 0; + u32 i = 0; + u8 is_cksum_gso_enabled = 0; + + if (rx_queue) + { + rx_frames_per_block = arg->rx_frames_per_block ? + arg->rx_frames_per_block : + AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK; + + rx_frame_size = arg->rx_frame_size ? arg->rx_frame_size : + AF_PACKET_DEFAULT_RX_FRAME_SIZE; + vec_validate (rx_queue->rx_req, 0); + rx_queue->rx_req->tp_block_size = rx_frame_size * rx_frames_per_block; + rx_queue->rx_req->tp_frame_size = rx_frame_size; + rx_queue->rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR; + rx_queue->rx_req->tp_frame_nr = + AF_PACKET_RX_BLOCK_NR * rx_frames_per_block; + rx_queue->rx_req->tp_retire_blk_tov = 1; // 1 ms block timout + rx_queue->rx_req->tp_feature_req_word = 0; + rx_queue->rx_req->tp_sizeof_priv = 0; + rx_req = rx_queue->rx_req; + } + + if (tx_queue) + { + tx_frames_per_block = arg->tx_frames_per_block ? + arg->tx_frames_per_block : + AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK; + tx_frame_size = arg->tx_frame_size ? arg->tx_frame_size : + AF_PACKET_DEFAULT_TX_FRAME_SIZE; + + vec_validate (tx_queue->tx_req, 0); + tx_queue->tx_req->tp_block_size = tx_frame_size * tx_frames_per_block; + tx_queue->tx_req->tp_frame_size = tx_frame_size; + tx_queue->tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR; + tx_queue->tx_req->tp_frame_nr = + AF_PACKET_TX_BLOCK_NR * tx_frames_per_block; + tx_queue->tx_req->tp_retire_blk_tov = 0; + tx_queue->tx_req->tp_sizeof_priv = 0; + tx_queue->tx_req->tp_feature_req_word = 0; + tx_req = tx_queue->tx_req; + } + + ret = create_packet_v3_sock (apif->host_if_index, rx_req, tx_req, &fd, &ring, + &hdrlen, &is_cksum_gso_enabled, + apif->dev_instance, is_fanout); + + if (ret != 0) + goto error; + + vec_add1 (apif->rings, ring); + ring_addr = ring.ring_start_addr; + + if (rx_queue) + { + rx_queue->fd = fd; + vec_validate (rx_queue->rx_ring, rx_queue->rx_req->tp_block_nr - 1); + vec_foreach_index (i, rx_queue->rx_ring) + { + rx_queue->rx_ring[i] = + ring_addr + i * rx_queue->rx_req->tp_block_size; + } + + rx_queue->next_rx_block = 0; + rx_queue->queue_id = queue_id; + rx_queue->is_rx_pending = 0; + ring_addr = ring_addr + rx_queue->rx_req->tp_block_size * + rx_queue->rx_req->tp_block_nr; + } + + if (tx_queue) + { + tx_queue->fd = fd; + vec_validate (tx_queue->tx_ring, tx_queue->tx_req->tp_block_nr - 1); + vec_foreach_index (i, tx_queue->tx_ring) + { + tx_queue->tx_ring[i] = + ring_addr + i * tx_queue->tx_req->tp_block_size; + } + + tx_queue->next_tx_frame = 0; + tx_queue->queue_id = queue_id; + clib_spinlock_init (&tx_queue->lockp); + } + + if (queue_id == 0) + { + apif->hdrlen = hdrlen; + apif->is_cksum_gso_enabled = is_cksum_gso_enabled; + } + + return 0; +error: + vlib_log_err (apm->log_class, "Failed to set queue %u error", queue_id); + vec_free (rx_queue->rx_req); + vec_free (tx_queue->tx_req); + return ret; +} + +int +af_packet_device_init (vlib_main_t *vm, af_packet_if_t *apif, + af_packet_create_if_arg_t *args) +{ + af_packet_main_t *apm = &af_packet_main; + af_packet_queue_t *rx_queue = 0; + af_packet_queue_t *tx_queue = 0; + u16 nq = clib_min (args->num_rxqs, args->num_txqs); + u16 i = 0; + int ret = 0; + u8 is_fanout = (args->num_rxqs > 1) ? 1 : 0; + + vec_validate (apif->rx_queues, args->num_rxqs - 1); + vec_validate (apif->tx_queues, args->num_txqs - 1); + + for (; i < nq; i++) + { + rx_queue = vec_elt_at_index (apif->rx_queues, i); + tx_queue = vec_elt_at_index (apif->tx_queues, i); + ret = af_packet_queue_init (vm, apif, args, rx_queue, tx_queue, i, + is_fanout); + if (ret != 0) + goto error; + } + + if (args->num_rxqs > args->num_txqs) + { + for (; i < args->num_rxqs; i++) + { + rx_queue = vec_elt_at_index (apif->rx_queues, i); + ret = + af_packet_queue_init (vm, apif, args, rx_queue, 0, i, is_fanout); + if (ret != 0) + goto error; + } + } + else if (args->num_txqs > args->num_rxqs) + { + for (; i < args->num_txqs; i++) + { + tx_queue = vec_elt_at_index (apif->tx_queues, i); + ret = af_packet_queue_init (vm, apif, args, 0, tx_queue, i, 0); + if (ret != 0) + goto error; + } + } + + apif->num_rxqs = args->num_rxqs; + apif->num_txqs = args->num_txqs; + + return 0; +error: + vlib_log_err (apm->log_class, "Failed to init device error"); + return ret; +} + +int +af_packet_create_if (af_packet_create_if_arg_t *arg) +{ + af_packet_main_t *apm = &af_packet_main; + vlib_main_t *vm = vlib_get_main (); + int fd2 = -1; struct ifreq ifr; - u8 *ring = 0; af_packet_if_t *apif = 0; u8 hw_addr[6]; vnet_sw_interface_t *sw; - vlib_thread_main_t *tm = vlib_get_thread_main (); vnet_main_t *vnm = vnet_get_main (); vnet_hw_if_caps_t caps = VNET_HW_IF_CAP_INT_MODE; uword *p; uword if_index; u8 *host_if_name_dup = 0; int host_if_index = -1; - u32 rx_frames_per_block, tx_frames_per_block; - u32 rx_frame_size, tx_frame_size; - u32 hdrlen = 0; - u32 i = 0; - u8 is_cksum_gso_enabled = 0; + int ret = 0; p = mhash_get (&apm->if_index_by_host_if_name, arg->host_if_name); if (p) @@ -295,35 +541,6 @@ af_packet_create_if (af_packet_create_if_arg_t *arg) host_if_name_dup = vec_dup (arg->host_if_name); - rx_frames_per_block = arg->rx_frames_per_block ? - arg->rx_frames_per_block : - AF_PACKET_DEFAULT_RX_FRAMES_PER_BLOCK; - tx_frames_per_block = arg->tx_frames_per_block ? - arg->tx_frames_per_block : - AF_PACKET_DEFAULT_TX_FRAMES_PER_BLOCK; - rx_frame_size = - arg->rx_frame_size ? arg->rx_frame_size : AF_PACKET_DEFAULT_RX_FRAME_SIZE; - tx_frame_size = - arg->tx_frame_size ? arg->tx_frame_size : AF_PACKET_DEFAULT_TX_FRAME_SIZE; - - vec_validate (rx_req, 0); - rx_req->tp_block_size = rx_frame_size * rx_frames_per_block; - rx_req->tp_frame_size = rx_frame_size; - rx_req->tp_block_nr = AF_PACKET_RX_BLOCK_NR; - rx_req->tp_frame_nr = AF_PACKET_RX_BLOCK_NR * rx_frames_per_block; - rx_req->tp_retire_blk_tov = 0; - rx_req->tp_feature_req_word = 0; - rx_req->tp_sizeof_priv = 0; - - vec_validate (tx_req, 0); - tx_req->tp_block_size = tx_frame_size * tx_frames_per_block; - tx_req->tp_frame_size = tx_frame_size; - tx_req->tp_block_nr = AF_PACKET_TX_BLOCK_NR; - tx_req->tp_frame_nr = AF_PACKET_TX_BLOCK_NR * tx_frames_per_block; - tx_req->tp_retire_blk_tov = 0; - tx_req->tp_sizeof_priv = 0; - tx_req->tp_feature_req_word = 0; - /* * make sure host side of interface is 'UP' before binding AF_PACKET * socket on it. @@ -378,14 +595,7 @@ af_packet_create_if (af_packet_create_if_arg_t *arg) fd2 = -1; } - ret = create_packet_v3_sock (host_if_index, rx_req, tx_req, &fd, &ring, - &hdrlen, &is_cksum_gso_enabled); - - if (ret != 0) - goto error; - ret = is_bridge (arg->host_if_name); - if (ret == 0) /* is a bridge, ignore state */ host_if_index = -1; @@ -393,40 +603,20 @@ af_packet_create_if (af_packet_create_if_arg_t *arg) pool_get (apm->interfaces, apif); if_index = apif - apm->interfaces; + apif->dev_instance = if_index; apif->host_if_index = host_if_index; - apif->fd = fd; - - vec_validate (apif->rx_ring, rx_req->tp_block_nr - 1); - vec_foreach_index (i, apif->rx_ring) - { - apif->rx_ring[i] = ring + i * rx_req->tp_block_size; - } - - ring = ring + rx_req->tp_block_size * rx_req->tp_block_nr; - - vec_validate (apif->tx_ring, tx_req->tp_block_nr - 1); - vec_foreach_index (i, apif->tx_ring) - { - apif->tx_ring[i] = ring + i * tx_req->tp_block_size; - } - - apif->rx_req = rx_req; - apif->tx_req = tx_req; apif->host_if_name = host_if_name_dup; apif->per_interface_next_index = ~0; - apif->next_tx_frame = 0; - apif->next_rx_block = 0; apif->mode = arg->mode; - apif->hdrlen = hdrlen; - apif->is_cksum_gso_enabled = is_cksum_gso_enabled; - apif->ss.is_save = 0; + + ret = af_packet_device_init (vm, apif, arg); + if (ret != 0) + goto error; ret = af_packet_read_mtu (apif); if (ret != 0) goto error; - if (tm->n_vlib_mains > 1) - clib_spinlock_init (&apif->lockp); if (apif->mode != AF_PACKET_IF_MODE_IP) { @@ -447,7 +637,7 @@ af_packet_create_if (af_packet_create_if_arg_t *arg) } eir.dev_class_index = af_packet_device_class.index; - eir.dev_instance = if_index; + eir.dev_instance = apif->dev_instance; eir.address = hw_addr; eir.cb.set_max_frame_size = af_packet_eth_set_max_frame_size; apif->hw_if_index = vnet_eth_register_interface (vnm, &eir); @@ -455,15 +645,15 @@ af_packet_create_if (af_packet_create_if_arg_t *arg) else { apif->hw_if_index = vnet_register_interface ( - vnm, af_packet_device_class.index, if_index, - af_packet_ip_device_hw_interface_class.index, if_index); + vnm, af_packet_device_class.index, apif->dev_instance, + af_packet_ip_device_hw_interface_class.index, apif->dev_instance); } + sw = vnet_get_hw_sw_interface (vnm, apif->hw_if_index); apif->sw_if_index = sw->sw_if_index; - vnet_hw_if_set_input_node (vnm, apif->hw_if_index, - af_packet_input_node.index); - apif->queue_index = vnet_hw_if_register_rx_queue (vnm, apif->hw_if_index, 0, - VNET_HW_IF_RXQ_THREAD_ANY); + + af_packet_set_rx_queues (vm, apif); + af_packet_set_tx_queues (vm, apif); if (apif->is_cksum_gso_enabled) caps |= VNET_HW_IF_CAP_TCP_GSO | VNET_HW_IF_CAP_TX_IP4_CKSUM | @@ -473,22 +663,6 @@ af_packet_create_if (af_packet_create_if_arg_t *arg) vnet_hw_interface_set_flags (vnm, apif->hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP); - vnet_hw_if_set_rx_queue_mode (vnm, apif->queue_index, - VNET_HW_IF_RX_MODE_INTERRUPT); - vnet_hw_if_update_runtime_data (vnm, apif->hw_if_index); - { - clib_file_t template = { 0 }; - template.read_function = af_packet_fd_read_ready; - template.file_descriptor = fd; - template.private_data = if_index; - template.flags = UNIX_FILE_EVENT_EDGE_TRIGGERED; - template.description = - format (0, "%U", format_af_packet_device_name, if_index); - apif->clib_file_index = clib_file_add (&file_main, &template); - } - vnet_hw_if_set_rx_queue_file_index (vnm, apif->queue_index, - apif->clib_file_index); - mhash_set_mem (&apm->if_index_by_host_if_name, host_if_name_dup, &if_index, 0); arg->sw_if_index = apif->sw_if_index; @@ -502,20 +676,65 @@ error: fd2 = -1; } vec_free (host_if_name_dup); - vec_free (rx_req); - vec_free (tx_req); + memset (apif, 0, sizeof (*apif)); + pool_put (apm->interfaces, apif); return ret; } +static int +af_packet_rx_queue_free (af_packet_if_t *apif, af_packet_queue_t *rx_queue) +{ + clib_file_del_by_index (&file_main, rx_queue->clib_file_index); + close (rx_queue->fd); + rx_queue->fd = -1; + rx_queue->rx_ring = NULL; + vec_free (rx_queue->rx_req); + rx_queue->rx_req = NULL; + return 0; +} + +static int +af_packet_tx_queue_free (af_packet_if_t *apif, af_packet_queue_t *tx_queue) +{ + close (tx_queue->fd); + tx_queue->fd = -1; + clib_spinlock_free (&tx_queue->lockp); + tx_queue->tx_ring = NULL; + vec_free (tx_queue->tx_req); + tx_queue->tx_req = NULL; + return 0; +} + +static int +af_packet_ring_free (af_packet_if_t *apif, af_packet_ring_t *ring) +{ + af_packet_main_t *apm = &af_packet_main; + + if (ring) + { + // FIXME: unmap the memory + if (munmap (ring->ring_start_addr, ring->ring_size)) + vlib_log_warn (apm->log_class, + "Host interface %s could not free ring %p of size %u", + apif->host_if_name, ring->ring_start_addr, + ring->ring_size); + else + ring->ring_start_addr = 0; + } + + return 0; +} + int af_packet_delete_if (u8 *host_if_name) { vnet_main_t *vnm = vnet_get_main (); af_packet_main_t *apm = &af_packet_main; af_packet_if_t *apif; + af_packet_queue_t *rx_queue; + af_packet_queue_t *tx_queue; + af_packet_ring_t *ring; uword *p; - uword if_index; - u32 ring_sz; p = mhash_get (&apm->if_index_by_host_if_name, host_if_name); if (p == NULL) @@ -525,46 +744,37 @@ af_packet_delete_if (u8 *host_if_name) return VNET_API_ERROR_SYSCALL_ERROR_1; } apif = pool_elt_at_index (apm->interfaces, p[0]); - if_index = apif - apm->interfaces; /* bring down the interface */ vnet_hw_interface_set_flags (vnm, apif->hw_if_index, 0); /* clean up */ - if (apif->clib_file_index != ~0) - { - clib_file_del (&file_main, file_main.file_pool + apif->clib_file_index); - apif->clib_file_index = ~0; - } - else - close (apif->fd); - - ring_sz = apif->rx_req->tp_block_size * apif->rx_req->tp_block_nr + - apif->tx_req->tp_block_size * apif->tx_req->tp_block_nr; - if (munmap (apif->rx_ring, ring_sz)) - vlib_log_warn (apm->log_class, - "Host interface %s could not free rx/tx ring", - host_if_name); - apif->rx_ring = NULL; - apif->tx_ring = NULL; - apif->fd = -1; - - vec_free (apif->rx_req); - apif->rx_req = NULL; - vec_free (apif->tx_req); - apif->tx_req = NULL; + vec_foreach (rx_queue, apif->rx_queues) + af_packet_rx_queue_free (apif, rx_queue); + vec_foreach (tx_queue, apif->tx_queues) + af_packet_tx_queue_free (apif, tx_queue); + vec_foreach (ring, apif->rings) + af_packet_ring_free (apif, ring); + + vec_free (apif->rx_queues); + apif->rx_queues = NULL; + vec_free (apif->tx_queues); + apif->tx_queues = NULL; + vec_free (apif->rings); + apif->rings = NULL; vec_free (apif->host_if_name); apif->host_if_name = NULL; apif->host_if_index = -1; - mhash_unset (&apm->if_index_by_host_if_name, host_if_name, &if_index); + mhash_unset (&apm->if_index_by_host_if_name, host_if_name, p); if (apif->mode != AF_PACKET_IF_MODE_IP) ethernet_delete_interface (vnm, apif->hw_if_index); else vnet_delete_hw_interface (vnm, apif->hw_if_index); + memset (apif, 0, sizeof (*apif)); pool_put (apm->interfaces, apif); return 0; diff --git a/src/vnet/devices/af_packet/af_packet.h b/src/vnet/devices/af_packet/af_packet.h index 217d2f6b009..4cae700909a 100644 --- a/src/vnet/devices/af_packet/af_packet.h +++ b/src/vnet/devices/af_packet/af_packet.h @@ -40,37 +40,68 @@ typedef struct typedef struct { - u32 rx_frame_offset; - u32 num_pkts; - u8 is_save; -} save_state_t; + u8 *ring_start_addr; + u32 ring_size; +} af_packet_ring_t; typedef struct { CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); clib_spinlock_t lockp; - u8 *host_if_name; - int host_if_index; int fd; - tpacket_req3_t *rx_req; - tpacket_req3_t *tx_req; - u8 **rx_ring; - u8 **tx_ring; - u8 is_cksum_gso_enabled; - u32 hdrlen; - u32 hw_if_index; - u32 sw_if_index; + union + { + tpacket_req3_t *rx_req; + tpacket_req3_t *tx_req; + }; + + union + { + u8 **rx_ring; + u8 **tx_ring; + }; + + union + { + u32 next_rx_block; + u32 next_tx_frame; + }; + + u16 queue_id; + u32 queue_index; + u32 clib_file_index; - u32 next_rx_block; - u32 next_tx_frame; + u32 rx_frame_offset; + u16 num_rx_pkts; + u8 is_rx_pending; + vnet_hw_if_rx_mode mode; +} af_packet_queue_t; +typedef struct +{ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + u32 hw_if_index; + u32 sw_if_index; u32 per_interface_next_index; + af_packet_if_mode_t mode; u8 is_admin_up; - u32 queue_index; + u8 is_cksum_gso_enabled; + + af_packet_queue_t *rx_queues; + af_packet_queue_t *tx_queues; + + u8 num_rxqs; + u8 num_txqs; + + u8 *host_if_name; + int host_if_index; + u32 hdrlen; + u32 host_mtu; - af_packet_if_mode_t mode; - save_state_t ss; + u32 dev_instance; + + af_packet_ring_t *rings; } af_packet_if_t; typedef struct @@ -96,6 +127,8 @@ typedef struct u32 tx_frame_size; u32 rx_frames_per_block; u32 tx_frames_per_block; + u8 num_rxqs; + u8 num_txqs; af_packet_if_mode_t mode; /* return */ diff --git a/src/vnet/devices/af_packet/af_packet_api.c b/src/vnet/devices/af_packet/af_packet_api.c index 3d57977eb49..693380d8ae5 100644 --- a/src/vnet/devices/af_packet/af_packet_api.c +++ b/src/vnet/devices/af_packet/af_packet_api.c @@ -68,6 +68,9 @@ vl_api_af_packet_create_v2_t_handler (vl_api_af_packet_create_v2_t *mp) arg->host_if_name = format (0, "%s", mp->host_if_name); vec_add1 (arg->host_if_name, 0); + // Default number of rx/tx queue(s) + arg->num_rxqs = 1; + arg->num_txqs = 1; arg->rx_frame_size = clib_net_to_host_u32 (mp->rx_frame_size); arg->tx_frame_size = clib_net_to_host_u32 (mp->tx_frame_size); arg->rx_frames_per_block = clib_net_to_host_u32 (mp->rx_frames_per_block); @@ -76,14 +79,10 @@ vl_api_af_packet_create_v2_t_handler (vl_api_af_packet_create_v2_t *mp) arg->mode = AF_PACKET_IF_MODE_ETHERNET; if (mp->num_rx_queues > 1) - { - rv = VNET_API_ERROR_INVALID_VALUE; - goto out; - } + arg->num_rxqs = clib_net_to_host_u16 (mp->num_rx_queues); rv = af_packet_create_if (arg); -out: vec_free (arg->host_if_name); REPLY_MACRO2 (VL_API_AF_PACKET_CREATE_V2_REPLY, ({ rmp->sw_if_index = clib_host_to_net_u32 (arg->sw_if_index); diff --git a/src/vnet/devices/af_packet/cli.c b/src/vnet/devices/af_packet/cli.c index 3dd3c8ee848..c90d5251f9f 100644 --- a/src/vnet/devices/af_packet/cli.c +++ b/src/vnet/devices/af_packet/cli.c @@ -54,6 +54,10 @@ af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input, // Default mode arg->mode = AF_PACKET_IF_MODE_ETHERNET; + // Default number of rx/tx queue(s) + arg->num_rxqs = 1; + arg->num_txqs = 1; + /* Get a line of input. */ if (!unformat_user (input, unformat_line_input, line_input)) return 0; @@ -72,6 +76,10 @@ af_packet_create_command_fn (vlib_main_t * vm, unformat_input_t * input, else if (unformat (line_input, "tx-per-block %u", &arg->tx_frames_per_block)) ; + else if (unformat (line_input, "num-rx-queues %u", &arg->num_rxqs)) + ; + else if (unformat (line_input, "num-tx-queues %u", &arg->num_txqs)) + ; else if (unformat (line_input, "mode ip")) arg->mode = AF_PACKET_IF_MODE_IP; else if (unformat (line_input, "hw-addr %U", unformat_ethernet_address, @@ -145,8 +153,8 @@ done: ?*/ VLIB_CLI_COMMAND (af_packet_create_command, static) = { .path = "create host-interface", - .short_help = - "create host-interface name <ifname> [hw-addr <mac-addr>] [mode ip]", + .short_help = "create host-interface name <ifname> [num-rx-queues <n>] " + "[num-tx-queues <n>] [hw-addr <mac-addr>] [mode ip]", .function = af_packet_create_command_fn, }; diff --git a/src/vnet/devices/af_packet/device.c b/src/vnet/devices/af_packet/device.c index f7f006ebd7f..013d9f71733 100644 --- a/src/vnet/devices/af_packet/device.c +++ b/src/vnet/devices/af_packet/device.c @@ -60,6 +60,7 @@ typedef struct { u32 buffer_index; u32 hw_if_index; + u16 queue_id; tpacket3_hdr_t tph; vnet_virtio_net_hdr_t vnet_hdr; vlib_buffer_t buffer; @@ -87,51 +88,74 @@ format_af_packet_device (u8 * s, va_list * args) af_packet_main_t *apm = &af_packet_main; af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, dev_instance); - clib_spinlock_lock_if_init (&apif->lockp); - u32 tx_block_sz = apif->tx_req->tp_block_size; - u32 tx_frame_sz = apif->tx_req->tp_frame_size; - u32 tx_frame_nr = apif->tx_req->tp_frame_nr; - u32 tx_block_nr = apif->tx_req->tp_block_nr; - u32 rx_block_size = apif->rx_req->tp_block_size; - u32 rx_frame_size = apif->rx_req->tp_frame_size; - u32 rx_frame_nr = apif->rx_req->tp_frame_nr; - u32 rx_block_nr = apif->rx_req->tp_block_nr; - int block = 0; - u8 *tx_block_start = apif->tx_ring[block]; - u32 tx_frame = apif->next_tx_frame; - tpacket3_hdr_t *tph; - - s = format (s, "Linux PACKET socket interface\n"); - s = format (s, "%UTX block size:%d nr:%d TX frame size:%d nr:%d\n", - format_white_space, indent, tx_block_sz, tx_block_nr, - tx_frame_sz, tx_frame_nr); - s = format (s, "%URX block size:%d nr:%d RX frame size:%d nr:%d\n", - format_white_space, indent, rx_block_size, rx_block_nr, - rx_frame_size, rx_frame_nr); - s = format (s, "%Unext frame:%d\n", format_white_space, indent, - apif->next_tx_frame); - - int n_send_req = 0, n_avail = 0, n_sending = 0, n_tot = 0, n_wrong = 0; - do + af_packet_queue_t *rx_queue = 0; + af_packet_queue_t *tx_queue = 0; + + s = format (s, "Linux PACKET socket interface"); + + vec_foreach (rx_queue, apif->rx_queues) { - tph = (tpacket3_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz); - tx_frame = (tx_frame + 1) % tx_frame_nr; - if (tph->tp_status == 0) - n_avail++; - else if (tph->tp_status & TP_STATUS_SEND_REQUEST) - n_send_req++; - else if (tph->tp_status & TP_STATUS_SENDING) - n_sending++; - else - n_wrong++; - n_tot++; + u32 rx_block_size = rx_queue->rx_req->tp_block_size; + u32 rx_frame_size = rx_queue->rx_req->tp_frame_size; + u32 rx_frame_nr = rx_queue->rx_req->tp_frame_nr; + u32 rx_block_nr = rx_queue->rx_req->tp_block_nr; + + s = format (s, "\n%URX Queue %u:", format_white_space, indent, + rx_queue->queue_id); + s = format (s, "\n%Ublock size:%d nr:%d frame size:%d nr:%d", + format_white_space, indent + 2, rx_block_size, rx_block_nr, + rx_frame_size, rx_frame_nr); + s = format (s, " next block:%d", rx_queue->next_rx_block); + if (rx_queue->is_rx_pending) + { + s = format ( + s, "\n%UPending Request: num-rx-pkts:%d next-frame-offset:%d", + format_white_space, indent + 2, rx_queue->num_rx_pkts, + rx_queue->rx_frame_offset); + } } - while (tx_frame != apif->next_tx_frame); - s = format (s, "%Uavailable:%d request:%d sending:%d wrong:%d total:%d\n", - format_white_space, indent, n_avail, n_send_req, n_sending, - n_wrong, n_tot); - clib_spinlock_unlock_if_init (&apif->lockp); + vec_foreach (tx_queue, apif->tx_queues) + { + clib_spinlock_lock (&tx_queue->lockp); + u32 tx_block_sz = tx_queue->tx_req->tp_block_size; + u32 tx_frame_sz = tx_queue->tx_req->tp_frame_size; + u32 tx_frame_nr = tx_queue->tx_req->tp_frame_nr; + u32 tx_block_nr = tx_queue->tx_req->tp_block_nr; + int block = 0; + int n_send_req = 0, n_avail = 0, n_sending = 0, n_tot = 0, n_wrong = 0; + u8 *tx_block_start = tx_queue->tx_ring[block]; + u32 tx_frame = tx_queue->next_tx_frame; + tpacket3_hdr_t *tph; + + s = format (s, "\n%UTX Queue %u:", format_white_space, indent, + tx_queue->queue_id); + s = format (s, "\n%Ublock size:%d nr:%d frame size:%d nr:%d", + format_white_space, indent + 2, tx_block_sz, tx_block_nr, + tx_frame_sz, tx_frame_nr); + s = format (s, " next frame:%d", tx_queue->next_tx_frame); + + do + { + tph = (tpacket3_hdr_t *) (tx_block_start + tx_frame * tx_frame_sz); + tx_frame = (tx_frame + 1) % tx_frame_nr; + if (tph->tp_status == 0) + n_avail++; + else if (tph->tp_status & TP_STATUS_SEND_REQUEST) + n_send_req++; + else if (tph->tp_status & TP_STATUS_SENDING) + n_sending++; + else + n_wrong++; + n_tot++; + } + while (tx_frame != tx_queue->next_tx_frame); + s = + format (s, "\n%Uavailable:%d request:%d sending:%d wrong:%d total:%d", + format_white_space, indent + 2, n_avail, n_send_req, n_sending, + n_wrong, n_tot); + clib_spinlock_unlock (&tx_queue->lockp); + } return s; } @@ -143,7 +167,8 @@ format_af_packet_tx_trace (u8 *s, va_list *va) af_packet_tx_trace_t *t = va_arg (*va, af_packet_tx_trace_t *); u32 indent = format_get_indent (s); - s = format (s, "af_packet: hw_if_index %u", t->hw_if_index); + s = format (s, "af_packet: hw_if_index %u tx-queue %u", t->hw_if_index, + t->queue_id); s = format (s, @@ -183,11 +208,13 @@ format_af_packet_tx_trace (u8 *s, va_list *va) static void af_packet_tx_trace (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_buffer_t *b0, u32 bi, tpacket3_hdr_t *tph, - vnet_virtio_net_hdr_t *vnet_hdr, u32 hw_if_index) + vnet_virtio_net_hdr_t *vnet_hdr, u32 hw_if_index, + u16 queue_id) { af_packet_tx_trace_t *t; t = vlib_add_trace (vm, node, b0, sizeof (t[0])); t->hw_if_index = hw_if_index; + t->queue_id = queue_id; t->buffer_index = bi; clib_memcpy_fast (&t->tph, tph, sizeof (*tph)); @@ -289,22 +316,29 @@ VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm, vlib_frame_t * frame) { af_packet_main_t *apm = &af_packet_main; + vnet_hw_if_tx_frame_t *tf = vlib_frame_scalar_args (frame); u32 *buffers = vlib_frame_vector_args (frame); u32 n_left = frame->n_vectors; u32 n_sent = 0; vnet_interface_output_runtime_t *rd = (void *) node->runtime_data; af_packet_if_t *apif = pool_elt_at_index (apm->interfaces, rd->dev_instance); - clib_spinlock_lock_if_init (&apif->lockp); - u32 block = 0; - u32 frame_size = apif->tx_req->tp_frame_size; - u32 frame_num = apif->tx_req->tp_frame_nr; - u8 *block_start = apif->tx_ring[block]; - u32 tx_frame = apif->next_tx_frame; - tpacket3_hdr_t *tph; + u16 queue_id = tf->queue_id; + af_packet_queue_t *tx_queue = vec_elt_at_index (apif->tx_queues, queue_id); + u32 block = 0, frame_size = 0, frame_num = 0, tx_frame = 0; + u8 *block_start = 0; + tpacket3_hdr_t *tph = 0; u32 frame_not_ready = 0; u8 is_cksum_gso_enabled = (apif->is_cksum_gso_enabled == 1) ? 1 : 0; + if (tf->shared_queue) + clib_spinlock_lock (&tx_queue->lockp); + + frame_size = tx_queue->tx_req->tp_frame_size; + frame_num = tx_queue->tx_req->tp_frame_nr; + block_start = tx_queue->tx_ring[block]; + tx_frame = tx_queue->next_tx_frame; + while (n_left) { u32 len; @@ -366,12 +400,12 @@ VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm, { if (PREDICT_TRUE (is_cksum_gso_enabled)) af_packet_tx_trace (vm, node, b0_first, bi_first, tph, vnet_hdr, - apif->hw_if_index); + apif->hw_if_index, queue_id); else { vnet_virtio_net_hdr_t vnet_hdr2 = {}; af_packet_tx_trace (vm, node, b0_first, bi_first, tph, - &vnet_hdr2, apif->hw_if_index); + &vnet_hdr2, apif->hw_if_index, queue_id); } } tx_frame = (tx_frame + 1) % frame_num; @@ -386,10 +420,10 @@ VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm, if (PREDICT_TRUE (n_sent)) { - apif->next_tx_frame = tx_frame; + tx_queue->next_tx_frame = tx_frame; - if (PREDICT_FALSE (sendto (apif->fd, NULL, 0, MSG_DONTWAIT, NULL, 0) == - -1)) + if (PREDICT_FALSE ( + sendto (tx_queue->fd, NULL, 0, MSG_DONTWAIT, NULL, 0) == -1)) { /* Uh-oh, drop & move on, but count whether it was fatal or not. * Note that we have no reliable way to properly determine the @@ -403,7 +437,8 @@ VNET_DEVICE_CLASS_TX_FN (af_packet_device_class) (vlib_main_t * vm, } } - clib_spinlock_unlock_if_init (&apif->lockp); + if (tf->shared_queue) + clib_spinlock_unlock (&tx_queue->lockp); if (PREDICT_FALSE (frame_not_ready)) vlib_error_count (vm, node->node_index, diff --git a/src/vnet/devices/af_packet/node.c b/src/vnet/devices/af_packet/node.c index 06012fd263a..323508bd304 100644 --- a/src/vnet/devices/af_packet/node.c +++ b/src/vnet/devices/af_packet/node.c @@ -30,9 +30,10 @@ #include <vnet/devices/af_packet/af_packet.h> #include <vnet/devices/virtio/virtio_std.h> -#define foreach_af_packet_input_error \ - _(PARTIAL_PKT, "partial packet") - +#define foreach_af_packet_input_error \ + _ (PARTIAL_PKT, "partial packet") \ + _ (TIMEDOUT_BLK, "timed out block") \ + _ (TOTAL_RECV_BLK, "total received block") typedef enum { #define _(f,s) AF_PACKET_INPUT_ERROR_##f, @@ -51,6 +52,7 @@ typedef struct { u32 next_index; u32 hw_if_index; + u16 queue_id; int block; u32 pkt_num; void *block_start; @@ -67,8 +69,8 @@ format_af_packet_input_trace (u8 * s, va_list * args) af_packet_input_trace_t *t = va_arg (*args, af_packet_input_trace_t *); u32 indent = format_get_indent (s); - s = format (s, "af_packet: hw_if_index %d next-index %d", - t->hw_if_index, t->next_index); + s = format (s, "af_packet: hw_if_index %d rx-queue %u next-index %d", + t->hw_if_index, t->queue_id, t->next_index); s = format ( s, "\n%Ublock %u:\n%Uaddress %p version %u seq_num %lu pkt_num %u", @@ -222,22 +224,25 @@ fill_cksum_offload (vlib_buffer_t *b, u8 *l4_hdr_sz, u8 is_ip) always_inline uword af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, af_packet_if_t *apif, - u8 is_cksum_gso_enabled) + u16 queue_id, u8 is_cksum_gso_enabled) { af_packet_main_t *apm = &af_packet_main; + af_packet_queue_t *rx_queue = vec_elt_at_index (apif->rx_queues, queue_id); tpacket3_hdr_t *tph; u32 next_index; u32 n_free_bufs; u32 n_rx_packets = 0; u32 n_rx_bytes = 0; + u32 timedout_blk = 0; + u32 total = 0; u32 *to_next = 0; - u32 block = apif->next_rx_block; - u32 block_nr = apif->rx_req->tp_block_nr; + u32 block = rx_queue->next_rx_block; + u32 block_nr = rx_queue->rx_req->tp_block_nr; u8 *block_start = 0; uword n_trace = vlib_get_trace_count (vm, node); u32 thread_index = vm->thread_index; u32 n_buffer_bytes = vlib_buffer_get_default_data_size (vm); - u32 min_bufs = apif->rx_req->tp_frame_size / n_buffer_bytes; + u32 min_bufs = rx_queue->rx_req->tp_frame_size / n_buffer_bytes; u32 num_pkts = 0; u32 rx_frame_offset = 0; block_desc_t *bd = 0; @@ -256,18 +261,23 @@ af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, vnet_feature_start_device_input_x1 (apif->sw_if_index, &next_index, &bt); } - if ((((block_desc_t *) (block_start = apif->rx_ring[block])) + if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block])) ->hdr.bh1.block_status & TP_STATUS_USER) != 0) { u32 n_required = 0; bd = (block_desc_t *) block_start; - if (PREDICT_FALSE (apif->ss.is_save)) + total++; + + if (TP_STATUS_BLK_TMO & bd->hdr.bh1.block_status) + timedout_blk++; + + if (PREDICT_FALSE (rx_queue->is_rx_pending)) { - num_pkts = apif->ss.num_pkts; - rx_frame_offset = apif->ss.rx_frame_offset; - apif->ss.is_save = 0; + num_pkts = rx_queue->num_rx_pkts; + rx_frame_offset = rx_queue->rx_frame_offset; + rx_queue->is_rx_pending = 0; } else { @@ -318,9 +328,9 @@ af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, if (PREDICT_FALSE (((data_len / n_buffer_bytes) + 1) > vec_len (apm->rx_buffers[thread_index]))) { - apif->ss.rx_frame_offset = rx_frame_offset; - apif->ss.num_pkts = num_pkts; - apif->ss.is_save = 1; + rx_queue->rx_frame_offset = rx_frame_offset; + rx_queue->num_rx_pkts = num_pkts; + rx_queue->is_rx_pending = 1; vlib_put_next_frame (vm, node, next_index, n_left_to_next); goto done; } @@ -450,6 +460,7 @@ af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, tr = vlib_add_trace (vm, node, first_b0, sizeof (*tr)); tr->next_index = next0; tr->hw_if_index = apif->hw_if_index; + tr->queue_id = queue_id; tr->block = block; tr->block_start = bd; tr->pkt_num = bd->hdr.bh1.num_pkts - num_pkts; @@ -483,23 +494,28 @@ af_packet_device_input_fn (vlib_main_t *vm, vlib_node_runtime_t *node, } else { - apif->ss.rx_frame_offset = rx_frame_offset; - apif->ss.num_pkts = num_pkts; - apif->ss.is_save = 1; + rx_queue->rx_frame_offset = rx_frame_offset; + rx_queue->num_rx_pkts = num_pkts; + rx_queue->is_rx_pending = 1; } } - apif->next_rx_block = block; + rx_queue->next_rx_block = block; done: - if ((((block_desc_t *) (block_start = apif->rx_ring[block])) + if ((((block_desc_t *) (block_start = rx_queue->rx_ring[block])) ->hdr.bh1.block_status & TP_STATUS_USER) != 0) vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_POLLING); else vlib_node_set_state (vm, node->node_index, VLIB_NODE_STATE_INTERRUPT); + vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TOTAL_RECV_BLK, + total); + vlib_error_count (vm, node->node_index, AF_PACKET_INPUT_ERROR_TIMEDOUT_BLK, + timedout_blk); + vlib_increment_combined_counter (vnet_get_main ()->interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, @@ -524,11 +540,11 @@ VLIB_NODE_FN (af_packet_input_node) (vlib_main_t * vm, if (apif->is_admin_up) { if (apif->is_cksum_gso_enabled) - n_rx_packets += - af_packet_device_input_fn (vm, node, frame, apif, 1); + n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif, + pv[i].queue_id, 1); else - n_rx_packets += - af_packet_device_input_fn (vm, node, frame, apif, 0); + n_rx_packets += af_packet_device_input_fn (vm, node, frame, apif, + pv[i].queue_id, 0); } } return n_rx_packets; |