From fc7e71100306a9313ce8346576b7d86abd6b401e Mon Sep 17 00:00:00 2001 From: Steven Date: Thu, 5 Oct 2017 00:12:33 -0700 Subject: tuntap: Introduce per thread structure to suport multi-threads (VPP-1012) https://gerrit.fd.io/r/#/c/8551/ decoupled the global variable, namely tm->iovecs from TX and RX. However, to support multi-threads, we have to eliminate the use of this global variable with per thread variable. I notice that rx_buffers must also be per thread variable. So, we introduce per thread struct to contain rx_buffers and iovecs. Each thread will find the per thread struct with thread_index. Change-Id: I61abf2fdace8d722525a382ac72f0d04a173b9ce Signed-off-by: Steven (cherry picked from commit 4cd257667406d0500a81323ef91f5c7c8c902b25) --- src/vnet/unix/tapcli.c | 90 +++++++++++++++++++++++++++++++------------------- src/vnet/unix/tuntap.c | 74 ++++++++++++++++++++++++----------------- 2 files changed, 99 insertions(+), 65 deletions(-) diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c index ce386094..d80cca3d 100644 --- a/src/vnet/unix/tapcli.c +++ b/src/vnet/unix/tapcli.c @@ -96,19 +96,25 @@ u8 * format_tapcli_rx_trace (u8 * s, va_list * va) } /** - * @brief TAPCLI main state struct + * @brief TAPCLI per thread struct */ -typedef struct { - /** Vector of iovecs for readv calls. */ - struct iovec * rd_iovecs; - - /** Vector of iovecs for writev calls. */ - struct iovec * wr_iovecs; - +typedef struct +{ /** Vector of VLIB rx buffers to use. We allocate them in blocks of VLIB_FRAME_SIZE (256). */ u32 * rx_buffers; + /** Vector of iovecs for readv/writev calls. */ + struct iovec * iovecs; +} tapcli_per_thread_t; + +/** + * @brief TAPCLI main state struct + */ +typedef struct { + /** per thread variables */ + tapcli_per_thread_t * threads; + /** tap device destination MAC address. Required, or Linux drops pkts */ u8 ether_dst_mac[6]; @@ -168,6 +174,7 @@ tapcli_tx (vlib_main_t * vm, tapcli_main_t * tm = &tapcli_main; tapcli_interface_t * ti; int i; + u16 thread_index = vlib_get_thread_index (); for (i = 0; i < n_packets; i++) { @@ -202,11 +209,11 @@ tapcli_tx (vlib_main_t * vm, ti = vec_elt_at_index (tm->tapcli_interfaces, p[0]); /* Re-set iovecs if present. */ - if (tm->wr_iovecs) - _vec_len (tm->wr_iovecs) = 0; + if (tm->threads[thread_index].iovecs) + _vec_len (tm->threads[thread_index].iovecs) = 0; /* VLIB buffer chain -> Unix iovec(s). */ - vec_add2 (tm->wr_iovecs, iov, 1); + vec_add2 (tm->threads[thread_index].iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = l = b->current_length; @@ -215,7 +222,7 @@ tapcli_tx (vlib_main_t * vm, do { b = vlib_get_buffer (vm, b->next_buffer); - vec_add2 (tm->wr_iovecs, iov, 1); + vec_add2 (tm->threads[thread_index].iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = b->current_length; @@ -223,7 +230,8 @@ tapcli_tx (vlib_main_t * vm, } while (b->flags & VLIB_BUFFER_NEXT_PRESENT); } - if (writev (ti->unix_fd, tm->wr_iovecs, vec_len (tm->wr_iovecs)) < l) + if (writev (ti->unix_fd, tm->threads[thread_index].iovecs, + vec_len (tm->threads[thread_index].iovecs)) < l) clib_unix_warning ("writev"); } @@ -258,7 +266,7 @@ static uword tapcli_rx_iface(vlib_main_t * vm, const uword buffer_size = VLIB_BUFFER_DATA_SIZE; u32 n_trace = vlib_get_trace_count (vm, node); u8 set_trace = 0; - + u16 thread_index = vlib_get_thread_index (); vnet_main_t *vnm; vnet_sw_interface_t * si; u8 admin_down; @@ -278,31 +286,35 @@ static uword tapcli_rx_iface(vlib_main_t * vm, word n_bytes_in_packet; int j, n_bytes_left; - if (PREDICT_FALSE(vec_len(tm->rx_buffers) < tm->mtu_buffers)) { - uword len = vec_len(tm->rx_buffers); - _vec_len(tm->rx_buffers) += - vlib_buffer_alloc_from_free_list(vm, &tm->rx_buffers[len], + if (PREDICT_FALSE(vec_len(tm->threads[thread_index].rx_buffers) < + tm->mtu_buffers)) { + uword len = vec_len(tm->threads[thread_index].rx_buffers); + _vec_len(tm->threads[thread_index].rx_buffers) += + vlib_buffer_alloc_from_free_list(vm, &tm->threads[thread_index].rx_buffers[len], VLIB_FRAME_SIZE - len, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); - if (PREDICT_FALSE(vec_len(tm->rx_buffers) < tm->mtu_buffers)) { + if (PREDICT_FALSE(vec_len(tm->threads[thread_index].rx_buffers) < + tm->mtu_buffers)) { vlib_node_increment_counter(vm, tapcli_rx_node.index, TAPCLI_ERROR_BUFFER_ALLOC, - tm->mtu_buffers - vec_len(tm->rx_buffers)); + tm->mtu_buffers - + vec_len(tm->threads[thread_index].rx_buffers)); break; } } - uword i_rx = vec_len (tm->rx_buffers) - 1; + uword i_rx = vec_len (tm->threads[thread_index].rx_buffers) - 1; /* Allocate RX buffers from end of rx_buffers. Turn them into iovecs to pass to readv. */ - vec_validate (tm->rd_iovecs, tm->mtu_buffers - 1); + vec_validate (tm->threads[thread_index].iovecs, tm->mtu_buffers - 1); for (j = 0; j < tm->mtu_buffers; j++) { - b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - j]); - tm->rd_iovecs[j].iov_base = b->data; - tm->rd_iovecs[j].iov_len = buffer_size; + b = vlib_get_buffer (vm, tm->threads[thread_index].rx_buffers[i_rx - j]); + tm->threads[thread_index].iovecs[j].iov_base = b->data; + tm->threads[thread_index].iovecs[j].iov_len = buffer_size; } - n_bytes_left = readv (ti->unix_fd, tm->rd_iovecs, tm->mtu_buffers); + n_bytes_left = readv (ti->unix_fd, tm->threads[thread_index].iovecs, + tm->mtu_buffers); n_bytes_in_packet = n_bytes_left; if (n_bytes_left <= 0) { if (errno != EAGAIN) { @@ -312,8 +324,9 @@ static uword tapcli_rx_iface(vlib_main_t * vm, break; } - bi_first = tm->rx_buffers[i_rx]; - b = b_first = vlib_get_buffer (vm, tm->rx_buffers[i_rx]); + bi_first = tm->threads[thread_index].rx_buffers[i_rx]; + b = b_first = vlib_get_buffer (vm, + tm->threads[thread_index].rx_buffers[i_rx]); prev = NULL; while (1) { @@ -331,11 +344,11 @@ static uword tapcli_rx_iface(vlib_main_t * vm, break; i_rx--; - bi = tm->rx_buffers[i_rx]; + bi = tm->threads[thread_index].rx_buffers[i_rx]; b = vlib_get_buffer (vm, bi); } - _vec_len (tm->rx_buffers) = i_rx; + _vec_len (tm->threads[thread_index].rx_buffers) = i_rx; b_first->total_length_not_including_first_buffer = (n_bytes_in_packet > buffer_size) ? n_bytes_in_packet - buffer_size : 0; @@ -367,7 +380,7 @@ static uword tapcli_rx_iface(vlib_main_t * vm, vlib_increment_combined_counter ( vnet_main.interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - vlib_get_thread_index(), ti->sw_if_index, + thread_index, ti->sw_if_index, 1, n_bytes_in_packet); if (PREDICT_FALSE(n_trace > 0)) { @@ -1453,16 +1466,25 @@ clib_error_t * tapcli_init (vlib_main_t * vm) { tapcli_main_t * tm = &tapcli_main; + vlib_thread_main_t * m = vlib_get_thread_main (); + tapcli_per_thread_t * thread; tm->vlib_main = vm; tm->vnet_main = vnet_get_main(); tm->mtu_bytes = TAP_MTU_DEFAULT; tm->tapcli_interface_index_by_sw_if_index = hash_create (0, sizeof(uword)); tm->tapcli_interface_index_by_unix_fd = hash_create (0, sizeof (uword)); - tm->rx_buffers = 0; - vec_alloc(tm->rx_buffers, VLIB_FRAME_SIZE); - vec_reset_length(tm->rx_buffers); vm->os_punt_frame = tapcli_nopunt_frame; + vec_validate_aligned (tm->threads, m->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); + vec_foreach (thread, tm->threads) + { + thread->iovecs = 0; + thread->rx_buffers = 0; + vec_alloc(thread->rx_buffers, VLIB_FRAME_SIZE); + vec_reset_length(thread->rx_buffers); + } + return 0; } diff --git a/src/vnet/unix/tuntap.c b/src/vnet/unix/tuntap.c index dc5c2a89..6e2a53fe 100644 --- a/src/vnet/unix/tuntap.c +++ b/src/vnet/unix/tuntap.c @@ -68,19 +68,25 @@ typedef struct { } subif_address_t; /** - * @brief TUNTAP node main state + * @brief TUNTAP per thread struct */ -typedef struct { - /** Vector of iovecs for readv calls. */ - struct iovec * rd_iovecs; - - /** Vector of iovecs for writev calls. */ - struct iovec * wr_iovecs; - +typedef struct +{ /** Vector of VLIB rx buffers to use. We allocate them in blocks of VLIB_FRAME_SIZE (256). */ u32 * rx_buffers; + /** Vector of iovecs for readv/writev calls. */ + struct iovec * iovecs; +} tuntap_per_thread_t; + +/** + * @brief TUNTAP node main state + */ +typedef struct { + /** per thread variables */ + tuntap_per_thread_t * threads; + /** File descriptors for /dev/net/tun and provisioning socket. */ int dev_net_tun_fd, dev_tap_fd; @@ -147,6 +153,7 @@ tuntap_tx (vlib_main_t * vm, vnet_interface_main_t *im = &vnm->interface_main; u32 n_bytes = 0; int i; + u16 thread_index = vlib_get_thread_index (); for (i = 0; i < n_packets; i++) { @@ -163,11 +170,11 @@ tuntap_tx (vlib_main_t * vm, } /* Re-set iovecs if present. */ - if (tm->wr_iovecs) - _vec_len (tm->wr_iovecs) = 0; + if (tm->threads[thread_index].iovecs) + _vec_len (tm->threads[thread_index].iovecs) = 0; /** VLIB buffer chain -> Unix iovec(s). */ - vec_add2 (tm->wr_iovecs, iov, 1); + vec_add2 (tm->threads[thread_index].iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = l = b->current_length; @@ -176,7 +183,7 @@ tuntap_tx (vlib_main_t * vm, do { b = vlib_get_buffer (vm, b->next_buffer); - vec_add2 (tm->wr_iovecs, iov, 1); + vec_add2 (tm->threads[thread_index].iovecs, iov, 1); iov->iov_base = b->data + b->current_data; iov->iov_len = b->current_length; @@ -184,8 +191,8 @@ tuntap_tx (vlib_main_t * vm, } while (b->flags & VLIB_BUFFER_NEXT_PRESENT); } - if (writev (tm->dev_net_tun_fd, tm->wr_iovecs, - vec_len (tm->wr_iovecs)) < l) + if (writev (tm->dev_net_tun_fd, tm->threads[thread_index].iovecs, + vec_len (tm->threads[thread_index].iovecs)) < l) clib_unix_warning ("writev"); n_bytes += l; @@ -234,41 +241,43 @@ tuntap_rx (vlib_main_t * vm, vlib_buffer_t * b; u32 bi; const uword buffer_size = VLIB_BUFFER_DATA_SIZE; + u16 thread_index = vlib_get_thread_index (); /** Make sure we have some RX buffers. */ { - uword n_left = vec_len (tm->rx_buffers); + uword n_left = vec_len (tm->threads[thread_index].rx_buffers); uword n_alloc; if (n_left < VLIB_FRAME_SIZE / 2) { - if (! tm->rx_buffers) - vec_alloc (tm->rx_buffers, VLIB_FRAME_SIZE); + if (! tm->threads[thread_index].rx_buffers) + vec_alloc (tm->threads[thread_index].rx_buffers, VLIB_FRAME_SIZE); - n_alloc = vlib_buffer_alloc (vm, tm->rx_buffers + n_left, VLIB_FRAME_SIZE - n_left); - _vec_len (tm->rx_buffers) = n_left + n_alloc; + n_alloc = vlib_buffer_alloc (vm, tm->threads[thread_index].rx_buffers + n_left, VLIB_FRAME_SIZE - n_left); + _vec_len (tm->threads[thread_index].rx_buffers) = n_left + n_alloc; } } /** Allocate RX buffers from end of rx_buffers. Turn them into iovecs to pass to readv. */ { - uword i_rx = vec_len (tm->rx_buffers) - 1; + uword i_rx = vec_len (tm->threads[thread_index].rx_buffers) - 1; vlib_buffer_t * b; word i, n_bytes_left, n_bytes_in_packet; /** We should have enough buffers left for an MTU sized packet. */ - ASSERT (vec_len (tm->rx_buffers) >= tm->mtu_buffers); + ASSERT (vec_len (tm->threads[thread_index].rx_buffers) >= tm->mtu_buffers); - vec_validate (tm->rd_iovecs, tm->mtu_buffers - 1); + vec_validate (tm->threads[thread_index].iovecs, tm->mtu_buffers - 1); for (i = 0; i < tm->mtu_buffers; i++) { - b = vlib_get_buffer (vm, tm->rx_buffers[i_rx - i]); - tm->rd_iovecs[i].iov_base = b->data; - tm->rd_iovecs[i].iov_len = buffer_size; + b = vlib_get_buffer (vm, tm->threads[thread_index].rx_buffers[i_rx - i]); + tm->threads[thread_index].iovecs[i].iov_base = b->data; + tm->threads[thread_index].iovecs[i].iov_len = buffer_size; } - n_bytes_left = readv (tm->dev_net_tun_fd, tm->rd_iovecs, tm->mtu_buffers); + n_bytes_left = readv (tm->dev_net_tun_fd, tm->threads[thread_index].iovecs, + tm->mtu_buffers); n_bytes_in_packet = n_bytes_left; if (n_bytes_left <= 0) { @@ -277,11 +286,11 @@ tuntap_rx (vlib_main_t * vm, return 0; } - bi = tm->rx_buffers[i_rx]; + bi = tm->threads[thread_index].rx_buffers[i_rx]; while (1) { - b = vlib_get_buffer (vm, tm->rx_buffers[i_rx]); + b = vlib_get_buffer (vm, tm->threads[thread_index].rx_buffers[i_rx]); b->flags = 0; b->current_data = 0; b->current_length = n_bytes_left < buffer_size ? n_bytes_left : buffer_size; @@ -295,18 +304,18 @@ tuntap_rx (vlib_main_t * vm, i_rx--; b->flags |= VLIB_BUFFER_NEXT_PRESENT; - b->next_buffer = tm->rx_buffers[i_rx]; + b->next_buffer = tm->threads[thread_index].rx_buffers[i_rx]; } /** Interface counters for tuntap interface. */ vlib_increment_combined_counter (vnet_main.interface_main.combined_sw_if_counters + VNET_INTERFACE_COUNTER_RX, - vlib_get_thread_index(), + thread_index, tm->sw_if_index, 1, n_bytes_in_packet); - _vec_len (tm->rx_buffers) = i_rx; + _vec_len (tm->threads[thread_index].rx_buffers) = i_rx; } b = vlib_get_buffer (vm, bi); @@ -1004,6 +1013,7 @@ tuntap_init (vlib_main_t * vm) ip4_add_del_interface_address_callback_t cb4; ip6_add_del_interface_address_callback_t cb6; tuntap_main_t * tm = &tuntap_main; + vlib_thread_main_t * m = vlib_get_thread_main (); error = vlib_call_init_function (vm, ip4_init); if (error) @@ -1018,6 +1028,8 @@ tuntap_init (vlib_main_t * vm) cb6.function = tuntap_ip6_add_del_interface_address; cb6.function_opaque = 0; vec_add1 (im6->add_del_interface_address_callbacks, cb6); + vec_validate_aligned (tm->threads, m->n_vlib_mains - 1, + CLIB_CACHE_LINE_BYTES); return 0; } -- cgit 1.2.3-korg