From 16d12cf1861bd56c83cc8437944c290a59a34a79 Mon Sep 17 00:00:00 2001 From: Steven Date: Thu, 5 Oct 2017 00:12:33 -0700 Subject: tuntap: Introduce per thread structure to suport multi-threads (VPP-1012) https://gerrit.fd.io/r/#/c/8551/ decoupled the global variable, namely tm->iovecs from TX and RX. However, to support multi-threads, we have to eliminate the use of this global variable with per thread variable. I notice that rx_buffers must also be per thread variable. So, we introduce per thread struct to contain rx_buffers and iovecs. Each thread will find the per thread struct with thread_index. Change-Id: I61abf2fdace8d722525a382ac72f0d04a173b9ce Signed-off-by: Steven (cherry picked from commit 4cd257667406d0500a81323ef91f5c7c8c902b25) --- src/vnet/unix/tapcli.c | 90 +++++++++++++++++++++++++++++++------------------- src/vnet/unix/tuntap.c | 74 ++++++++++++++++++++++++----------------- 2 files changed, 99 insertions(+), 65 deletions(-) diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c index f39e2d7e75d..4fcd7497a27 100644 --- a/src/vnet/unix/tapcli.c +++ b/src/vnet/unix/tapcli.c @@ -96,19 +96,25 @@ u8 * format_tapcli_rx_trace (u8 * s, va_list * va) } /** - * @brief TAPCLI main state struct + * @brief TAPCLI per thread struct */ -typedef struct { - /** Vector of iovecs for readv calls. */ - struct iovec * rd_iovecs; - - /** Vector of iovecs for writev calls. */ - struct iovec * wr_iovecs; - +typedef struct +{ /** Vector of VLIB rx buffers to use. We allocate them in blocks of VLIB_FRAME_SIZE (256). */ u32 * rx_buffers; + /** Vector of iovecs for readv/writev calls. */ + struct iovec * iovecs; +} tapcli_per_thread_t; + +/** + * @brief TAPCLI main state struct + */ +typedef struct { + /** per thread variables */ + tapcli_per_thread_t * threads; + /** tap device destination MAC address. Required, or Linux drops pkts */ u8 ether_dst_mac[6]; @@ -170,6 +176,7 @@ tapcli_tx (vlib_main_t * vm, tapcli_main_t * tm = &tapcli_main; tapcli_interface_t * ti; int i; + u16 thread_index = vlib_get_thread_index (); for (i = 0; i < n_packets; i++) { @@ -204,11 +211,11 @@ tapcli_tx (vlib_main_t * vm, ti = vec_elt_at_index (tm->tapcli_interfaces, p[0]); /* Re-set iovecs if present. */ - if (tm->wr_iovecs) - _vec_len (tm->wr_iovecs) = 0; + if (tm->threads[thread_index].iovecs) + _vec_len (tm->threads[thread_index].iovecs) = 0; /* VLIB buffer chain -> Unix iovec(s). */ - vec_add2 (tm->wr_iovecs, iov, 1); + vec_add2 (tm->threads[thread_index].iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = l = b->current_length; @@ -217,7 +224,7 @@ tapcli_tx (vlib_main_t * vm, do { b = vlib_get_buffer (vm, b->next_buffer); - vec_add2 (tm->wr_iovecs, iov, 1); + vec_add2 (tm->threads[thread_index].iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = b->current_length; @@ -225,7 +232,8 @@ tapcli_tx (vlib_main_t * vm, } while (b->flags & VLIB_BUFFER_NEXT_PRESENT); } - if (writev (ti->unix_fd, tm->wr_iovecs, vec_len (tm->wr_iovecs)) < l) + if (writev (ti->unix_fd, tm->threads[thread_index].iovecs, + vec_len (tm->threads[thread_index].iovecs)) < l) clib_unix_warning ("writev"); } @@ -260,7 +268,7 @@ static uword tapcli_rx_iface(vlib_main_t * vm, const uword buffer_size = VLIB_BUFFER_DATA_SIZE; u32 n_trace = vlib_get_trace_count (vm, node); u8 set_trace = 0; - + u16 thread_index = vlib_get_thread_index (); vnet_main_t *vnm; vnet_sw_interface_t * si; u8 admin_down; @@ -280,31 +288,35 @@ static uword tapcli_rx_iface(vlib_main_t * vm, word n_bytes_in_packet; int j, n_bytes_left; - if (PREDICT_FALSE(vec_len(tm->rx_buffers) < tm->mtu_buffers)) { - uword len = vec_len(tm->rx_buffers); - _vec_len(tm->rx_buffers) += - vlib_buffer_alloc_from_free_list(vm, &tm->rx_buffers[len], + if (PREDICT_FALSE(vec_len(tm->threads[thread_index].rx_buffers) < + tm->mtu_buffers)) { + uword len = vec_len(tm->threads[thread_index].rx_buffers); + _vec_len(tm->threads[thread_index].rx_buffers) += + vlib_buffer_alloc_from_free_list(vm, &tm->threads[thread_index].rx_buffers[len], VLIB_FRAME_SIZE - len, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - if (PREDICT_FALSE(vec_len(tm->rx_buffers) < tm->mtu_buffers)) { + if (PREDICT_FALSE(vec_len(tm->threads[thread_index].rx_buffers) < + tm->mtu_buffers)) { vlib_node_increment_counter(vm, tapcli_rx_node.index, TAPCLI_ERROR_BUFFER_ALLOC, - tm->mtu_buffers - vec_len(tm->rx_buffers)); + tm->mtu_buffers - + vec_len(tm->threads[thread_index].rx_buffers)); break; } } - uword i_rx = vec_len (tm->rx_buffers) - 1; + uword i_rx = vec_len (tm->threads[thread_index].rx_buffers) - 1; /* Allocate RX buffers from end of rx_buffers. Turn them into iovecs to pass to readv. */ - vec_validate (tm->rd_iovecs, tm->mtu_buffers - 1); + vec_validate (tm->threads[thread_index].iovecs, tm->mtu_buffers - 1); for (j = 0; j < tm->mtu_buffers; j++) { - b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - j]); - tm->rd_iovecs[j].iov_base = b->data; - tm->rd_iovecs[j].iov_len = buffer_size; + b = vlib_get_buffer (vm, tm->threads[thread_index].rx_buffers[i_rx - j]); + tm->threads[thread_index].iovecs[j].iov_base = b->data; + tm->threads[thread_index].iovecs[j].iov_len = buffer_size; } - n_bytes_left = readv (ti->unix_fd, tm->rd_iovecs, tm->mtu_buffers); + n_bytes_left = readv (ti->unix_fd, tm->threads[thread_index].iovecs, + tm->mtu_buffers); n_bytes_in_packet = n_bytes_left; if (n_bytes_left <= 0) { if (errno != EAGAIN) { @@ -314,8 +326,9 @@ static uword tapcli_rx_iface(vlib_main_t * vm, break; } - bi_first = tm->rx_buffers[i_rx]; - b = b_first = vlib_get_buffer (vm, tm->rx_buffers[i_rx]); + bi_first = tm->threads[thread_index].rx_buffers[i_rx]; + b = b_first = vlib_get_buffer (vm, + tm->threads[thread_index].rx_buffers[i_rx]); prev = NULL; while (1) { @@ -333,11 +346,11 @@ static uword tapcli_rx_iface(vlib_main_t * vm, break; i_rx--; - bi = tm->rx_buffers[i_rx]; + bi = tm->threads[thread_index].rx_buffers[i_rx]; b = vlib_get_buffer (vm, bi); } - _vec_len (tm->rx_buffers) = i_rx; + _vec_len (tm->threads[thread_index].rx_buffers) = i_rx; b_first->total_length_not_including_first_buffer = (n_bytes_in_packet > buffer_size) ? n_bytes_in_packet - buffer_size : 0; @@ -369,7 +382,7 @@ static uword tapcli_rx_iface(vlib_main_t * vm, vlib_increment_combined_counter ( vnet_main.interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - vlib_get_thread_index(), ti->sw_if_index, + thread_index, ti->sw_if_index, 1, n_bytes_in_packet); if (PREDICT_FALSE(n_trace > 0)) { @@ -1455,6 +1468,8 @@ clib_error_t * tapcli_init (vlib_main_t * vm) { tapcli_main_t * tm = &tapcli_main; + vlib_thread_main_t * m = vlib_get_thread_main (); + tapcli_per_thread_t * thread; tm->vlib_main = vm; tm->vnet_main = vnet_get_main(); @@ -1462,10 +1477,17 @@ tapcli_init (vlib_main_t * vm) tm->mtu_bytes = TAP_MTU_DEFAULT; tm->tapcli_interface_index_by_sw_if_index = hash_create (0, sizeof(uword)); tm->tapcli_interface_index_by_unix_fd = hash_create (0, sizeof (uword)); - tm->rx_buffers = 0; - vec_alloc(tm->rx_buffers, VLIB_FRAME_SIZE); - vec_reset_length(tm->rx_buffers); vm->os_punt_frame = tapcli_nopunt_frame; + vec_validate_aligned (tm->threads, m->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + vec_foreach (thread, tm->threads) + { + thread->iovecs = 0; + thread->rx_buffers = 0; + vec_alloc(thread->rx_buffers, VLIB_FRAME_SIZE); + vec_reset_length(thread->rx_buffers); + } + return 0; } diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c index 3b73c1ed6d1..42728a74a16 100644 --- a/src/vnet/unix/tuntap.c +++ b/src/vnet/unix/tuntap.c @@ -67,19 +67,25 @@ typedef struct { } subif_address_t; /** - * @brief TUNTAP node main state + * @brief TUNTAP per thread struct */ -typedef struct { - /** Vector of iovecs for readv calls. */ - struct iovec * rd_iovecs; - - /** Vector of iovecs for writev calls. */ - struct iovec * wr_iovecs; - +typedef struct +{ /** Vector of VLIB rx buffers to use. We allocate them in blocks of VLIB_FRAME_SIZE (256). */ u32 * rx_buffers; + /** Vector of iovecs for readv/writev calls. */ + struct iovec * iovecs; +} tuntap_per_thread_t; + +/** + * @brief TUNTAP node main state + */ +typedef struct { + /** per thread variables */ + tuntap_per_thread_t * threads; + /** File descriptors for /dev/net/tun and provisioning socket. */ int dev_net_tun_fd, dev_tap_fd; @@ -146,6 +152,7 @@ tuntap_tx (vlib_main_t * vm, vnet_interface_main_t *im = &vnm->interface_main; u32 n_bytes = 0; int i; + u16 thread_index = vlib_get_thread_index (); for (i = 0; i < n_packets; i++) { @@ -162,11 +169,11 @@ tuntap_tx (vlib_main_t * vm, } /* Re-set iovecs if present. */ - if (tm->wr_iovecs) - _vec_len (tm->wr_iovecs) = 0; + if (tm->threads[thread_index].iovecs) + _vec_len (tm->threads[thread_index].iovecs) = 0; /** VLIB buffer chain -> Unix iovec(s). */ - vec_add2 (tm->wr_iovecs, iov, 1); + vec_add2 (tm->threads[thread_index].iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = l = b->current_length; @@ -175,7 +182,7 @@ tuntap_tx (vlib_main_t * vm, do { b = vlib_get_buffer (vm, b->next_buffer); - vec_add2 (tm->wr_iovecs, iov, 1); + vec_add2 (tm->threads[thread_index].iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = b->current_length; @@ -183,8 +190,8 @@ tuntap_tx (vlib_main_t * vm, } while (b->flags & VLIB_BUFFER_NEXT_PRESENT); } - if (writev (tm->dev_net_tun_fd, tm->wr_iovecs, - vec_len (tm->wr_iovecs)) < l) + if (writev (tm->dev_net_tun_fd, tm->threads[thread_index].iovecs, + vec_len (tm->threads[thread_index].iovecs)) < l) clib_unix_warning ("writev"); n_bytes += l; @@ -233,41 +240,43 @@ tuntap_rx (vlib_main_t * vm, vlib_buffer_t * b; u32 bi; const uword buffer_size = VLIB_BUFFER_DATA_SIZE; + u16 thread_index = vlib_get_thread_index (); /** Make sure we have some RX buffers. */ { - uword n_left = vec_len (tm->rx_buffers); + uword n_left = vec_len (tm->threads[thread_index].rx_buffers); uword n_alloc; if (n_left < VLIB_FRAME_SIZE / 2) { - if (! tm->rx_buffers) - vec_alloc (tm->rx_buffers, VLIB_FRAME_SIZE); + if (! tm->threads[thread_index].rx_buffers) + vec_alloc (tm->threads[thread_index].rx_buffers, VLIB_FRAME_SIZE); - n_alloc = vlib_buffer_alloc (vm, tm->rx_buffers + n_left, VLIB_FRAME_SIZE - n_left); - _vec_len (tm->rx_buffers) = n_left + n_alloc; + n_alloc = vlib_buffer_alloc (vm, tm->threads[thread_index].rx_buffers + n_left, VLIB_FRAME_SIZE - n_left); + _vec_len (tm->threads[thread_index].rx_buffers) = n_left + n_alloc; } } /** Allocate RX buffers from end of rx_buffers. Turn them into iovecs to pass to readv. */ { - uword i_rx = vec_len (tm->rx_buffers) - 1; + uword i_rx = vec_len (tm->threads[thread_index].rx_buffers) - 1; vlib_buffer_t * b; word i, n_bytes_left, n_bytes_in_packet; /** We should have enough buffers left for an MTU sized packet. */ - ASSERT (vec_len (tm->rx_buffers) >= tm->mtu_buffers); + ASSERT (vec_len (tm->threads[thread_index].rx_buffers) >= tm->mtu_buffers); - vec_validate (tm->rd_iovecs, tm->mtu_buffers - 1); + vec_validate (tm->threads[thread_index].iovecs, tm->mtu_buffers - 1); for (i = 0; i < tm->mtu_buffers; i++) { - b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - i]); - tm->rd_iovecs[i].iov_base = b->data; - tm->rd_iovecs[i].iov_len = buffer_size; + b = vlib_get_buffer (vm, tm->threads[thread_index].rx_buffers[i_rx - i]); + tm->threads[thread_index].iovecs[i].iov_base = b->data; + tm->threads[thread_index].iovecs[i].iov_len = buffer_size; } - n_bytes_left = readv (tm->dev_net_tun_fd, tm->rd_iovecs, tm->mtu_buffers); + n_bytes_left = readv (tm->dev_net_tun_fd, tm->threads[thread_index].iovecs, + tm->mtu_buffers); n_bytes_in_packet = n_bytes_left; if (n_bytes_left <= 0) { @@ -276,11 +285,11 @@ tuntap_rx (vlib_main_t * vm, return 0; } - bi = tm->rx_buffers[i_rx]; + bi = tm->threads[thread_index].rx_buffers[i_rx]; while (1) { - b = vlib_get_buffer (vm, tm->rx_buffers[i_rx]); + b = vlib_get_buffer (vm, tm->threads[thread_index].rx_buffers[i_rx]); b->flags = 0; b->current_data = 0; b->current_length = n_bytes_left < buffer_size ? n_bytes_left : buffer_size; @@ -294,18 +303,18 @@ tuntap_rx (vlib_main_t * vm, i_rx--; b->flags |= VLIB_BUFFER_NEXT_PRESENT; - b->next_buffer = tm->rx_buffers[i_rx]; + b->next_buffer = tm->threads[thread_index].rx_buffers[i_rx]; } /** Interface counters for tuntap interface. */ vlib_increment_combined_counter (vnet_main.interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - vlib_get_thread_index(), + thread_index, tm->sw_if_index, 1, n_bytes_in_packet); - _vec_len (tm->rx_buffers) = i_rx; + _vec_len (tm->threads[thread_index].rx_buffers) = i_rx; } b = vlib_get_buffer (vm, bi); @@ -983,6 +992,7 @@ tuntap_init (vlib_main_t * vm) ip4_add_del_interface_address_callback_t cb4; ip6_add_del_interface_address_callback_t cb6; tuntap_main_t * tm = &tuntap_main; + vlib_thread_main_t * m = vlib_get_thread_main (); error = vlib_call_init_function (vm, ip4_init); if (error) @@ -997,6 +1007,8 @@ tuntap_init (vlib_main_t * vm) cb6.function = tuntap_ip6_add_del_interface_address; cb6.function_opaque = 0; vec_add1 (im6->add_del_interface_address_callbacks, cb6); + vec_validate_aligned (tm->threads, m->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); return 0; } -- cgit 1.2.3-korg