From f6359c8cace5b73a813e5f4e3d1bc28f7752fcdf Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Mon, 19 Jun 2017 12:26:09 -0400 Subject: Improve svm fifo and tcp tx path performance (VPP-846) - multiarch on svm fifo - avoid ip lookup on tx Change-Id: Iab0d85204a710979417bca1d692cc47877131203 Signed-off-by: Florin Coras Signed-off-by: Dave Barach --- src/svm/svm_fifo.c | 95 +++++++++++++++++++++++++++++++++++++--- src/uri/uri_tcp_test.c | 10 ++--- src/vnet/session/node.c | 13 +++--- src/vnet/session/transport.h | 7 ++- src/vnet/tcp/tcp.c | 100 +++++++++++++++++++++++++++++++++++++++++-- src/vnet/tcp/tcp.h | 4 ++ src/vnet/tcp/tcp_output.c | 29 +++++++++---- src/vnet/tcp/tcp_packet.h | 9 ++-- 8 files changed, 232 insertions(+), 35 deletions(-) diff --git a/src/svm/svm_fifo.c b/src/svm/svm_fifo.c index 6ca437cfd7b..aed5d6a7420 100644 --- a/src/svm/svm_fifo.c +++ b/src/svm/svm_fifo.c @@ -14,6 +14,7 @@ */ #include +#include static inline u8 position_lt (svm_fifo_t * f, u32 a, u32 b) @@ -417,10 +418,38 @@ svm_fifo_enqueue_internal (svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here) return (total_copy_bytes); } +#define SVM_ENQUEUE_CLONE_TEMPLATE(arch, fn, tgt) \ + uword \ + __attribute__ ((flatten)) \ + __attribute__ ((target (tgt))) \ + CLIB_CPU_OPTIMIZED \ + fn ## _ ## arch ( svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here) \ + { return fn (f, max_bytes, copy_from_here);} + +static int +svm_fifo_enqueue_nowait_ma (svm_fifo_t * f, u32 max_bytes, + u8 * copy_from_here) +{ + return svm_fifo_enqueue_internal (f, max_bytes, copy_from_here); +} + +foreach_march_variant (SVM_ENQUEUE_CLONE_TEMPLATE, + svm_fifo_enqueue_nowait_ma); +CLIB_MULTIARCH_SELECT_FN (svm_fifo_enqueue_nowait_ma); + int svm_fifo_enqueue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_from_here) { - return svm_fifo_enqueue_internal (f, max_bytes, copy_from_here); +#if CLIB_DEBUG > 0 + return svm_fifo_enqueue_nowait_ma (f, max_bytes, copy_from_here); +#else + static int (*fp) (svm_fifo_t *, u32, u8 *); + + if (PREDICT_FALSE (fp == 0)) + fp = (void *) svm_fifo_enqueue_nowait_ma_multiarch_select (); + + return (*fp) (f, max_bytes, copy_from_here); +#endif } /** @@ -541,15 +570,43 @@ svm_fifo_dequeue_internal (svm_fifo_t * f, u32 max_bytes, u8 * copy_here) return (total_copy_bytes); } -int -svm_fifo_dequeue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_here) +static int +svm_fifo_dequeue_nowait_ma (svm_fifo_t * f, u32 max_bytes, u8 * copy_here) { return svm_fifo_dequeue_internal (f, max_bytes, copy_here); } +#define SVM_FIFO_DEQUEUE_CLONE_TEMPLATE(arch, fn, tgt) \ + uword \ + __attribute__ ((flatten)) \ + __attribute__ ((target (tgt))) \ + CLIB_CPU_OPTIMIZED \ + fn ## _ ## arch ( svm_fifo_t * f, u32 max_bytes, \ + u8 * copy_here) \ + { return fn (f, max_bytes, copy_here);} + +foreach_march_variant (SVM_FIFO_DEQUEUE_CLONE_TEMPLATE, + svm_fifo_dequeue_nowait_ma); +CLIB_MULTIARCH_SELECT_FN (svm_fifo_dequeue_nowait_ma); + int -svm_fifo_peek (svm_fifo_t * f, u32 relative_offset, u32 max_bytes, - u8 * copy_here) +svm_fifo_dequeue_nowait (svm_fifo_t * f, u32 max_bytes, u8 * copy_here) +{ +#if CLIB_DEBUG > 0 + return svm_fifo_dequeue_nowait_ma (f, max_bytes, copy_here); +#else + static int (*fp) (svm_fifo_t *, u32, u8 *); + + if (PREDICT_FALSE (fp == 0)) + fp = (void *) svm_fifo_dequeue_nowait_ma_multiarch_select (); + + return (*fp) (f, max_bytes, copy_here); +#endif +} + +static int +svm_fifo_peek_ma (svm_fifo_t * f, u32 relative_offset, u32 max_bytes, + u8 * copy_here) { u32 total_copy_bytes, first_copy_bytes, second_copy_bytes; u32 cursize, nitems, real_head; @@ -586,6 +643,34 @@ svm_fifo_peek (svm_fifo_t * f, u32 relative_offset, u32 max_bytes, return total_copy_bytes; } +#define SVM_FIFO_PEEK_CLONE_TEMPLATE(arch, fn, tgt) \ + uword \ + __attribute__ ((flatten)) \ + __attribute__ ((target (tgt))) \ + CLIB_CPU_OPTIMIZED \ + fn ## _ ## arch ( svm_fifo_t * f, u32 relative_offset, u32 max_bytes, \ + u8 * copy_here) \ + { return fn (f, relative_offset, max_bytes, copy_here);} + +foreach_march_variant (SVM_FIFO_PEEK_CLONE_TEMPLATE, svm_fifo_peek_ma); +CLIB_MULTIARCH_SELECT_FN (svm_fifo_peek_ma); + +int +svm_fifo_peek (svm_fifo_t * f, u32 relative_offset, u32 max_bytes, + u8 * copy_here) +{ +#if CLIB_DEBUG > 0 + return svm_fifo_peek_ma (f, relative_offset, max_bytes, copy_here); +#else + static int (*fp) (svm_fifo_t *, u32, u32, u8 *); + + if (PREDICT_FALSE (fp == 0)) + fp = (void *) svm_fifo_peek_ma_multiarch_select (); + + return (*fp) (f, relative_offset, max_bytes, copy_here); +#endif +} + int svm_fifo_dequeue_drop (svm_fifo_t * f, u32 max_bytes) { diff --git a/src/uri/uri_tcp_test.c b/src/uri/uri_tcp_test.c index d1694cf4e73..80aab183b0a 100755 --- a/src/uri/uri_tcp_test.c +++ b/src/uri/uri_tcp_test.c @@ -398,7 +398,6 @@ static void vl_api_reset_session_t_handler (vl_api_reset_session_t * mp) { uri_tcp_test_main_t *utm = &uri_tcp_test_main; - session_t *session; vl_api_reset_session_reply_t *rmp; uword *p; int rv = 0; @@ -407,9 +406,8 @@ vl_api_reset_session_t_handler (vl_api_reset_session_t * mp) if (p) { - session = pool_elt_at_index (utm->sessions, p[0]); - hash_unset (utm->session_index_by_vpp_handles, mp->handle); - pool_put (utm->sessions, session); + clib_warning ("got reset"); + /* Cleanup later */ utm->time_to_stop = 1; } else @@ -603,7 +601,7 @@ send_test_chunk (uri_tcp_test_main_t * utm, svm_fifo_t * tx_fifo, int mypid, if (bytes_to_snd > vec_len (test_data)) bytes_to_snd = vec_len (test_data); - while (bytes_to_snd > 0) + while (bytes_to_snd > 0 && !utm->time_to_stop) { actual_write = (bytes_to_snd > queue_max_chunk) ? queue_max_chunk : bytes_to_snd; @@ -652,6 +650,8 @@ client_send_data (uri_tcp_test_main_t * utm) for (i = 0; i < n_iterations; i++) { send_test_chunk (utm, tx_fifo, mypid, 0); + if (utm->time_to_stop) + break; } leftover = utm->bytes_to_send % vec_len (test_data); diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c index c0ab1bf096f..b24f5fd994d 100644 --- a/src/vnet/session/node.c +++ b/src/vnet/session/node.c @@ -248,6 +248,11 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, ASSERT (bi0); _vec_len (smm->tx_buffers[thread_index]) = n_bufs; + /* usual speculation, or the enqueue_x1 macro will barf */ + to_next[0] = bi0; + to_next += 1; + n_left_to_next -= 1; + b0 = vlib_get_buffer (vm, bi0); b0->error = 0; b0->flags = VLIB_BUFFER_TOTAL_LENGTH_VALID @@ -255,10 +260,6 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, b0->current_data = 0; b0->total_length_not_including_first_buffer = 0; - /* RX on the local interface. tx in default fib */ - vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; - len_to_deq0 = clib_min (left_to_snd0, deq_per_buf); data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN); @@ -307,10 +308,6 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, })); /* *INDENT-ON* */ - /* usual speculation, or the enqueue_x1 macro will barf */ - to_next[0] = bi0; - to_next += 1; - n_left_to_next -= 1; VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); if (PREDICT_FALSE (n_trace > 0)) diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h index 04bd5ca0f29..561a92575cb 100644 --- a/src/vnet/session/transport.h +++ b/src/vnet/session/transport.h @@ -30,13 +30,16 @@ typedef struct _transport_connection ip46_address_t lcl_ip; /**< Local IP */ u16 lcl_port; /**< Local port */ u16 rmt_port; /**< Remote port */ - u8 proto; /**< Transport protocol id (also session type) */ + u8 proto; /**< Protocol id (also session type) */ u32 s_index; /**< Parent session index */ u32 c_index; /**< Connection index in transport pool */ u8 is_ip4; /**< Flag if IP4 connection */ u32 thread_index; /**< Worker-thread index */ + fib_node_index_t rmt_fei; /**< FIB entry index for rmt */ + dpo_id_t rmt_dpo; /**< Forwarding DPO for rmt */ + #if TRANSPORT_DEBUG elog_track_t elog_track; /**< Event logging */ u32 cc_stat_tstamp; /**< CC stats timestamp */ @@ -59,6 +62,8 @@ typedef struct _transport_connection #define c_thread_index connection.thread_index #define c_elog_track connection.elog_track #define c_cc_stat_tstamp connection.cc_stat_tstamp +#define c_rmt_fei connection.rmt_fei +#define c_rmt_dpo connection.rmt_dpo } transport_connection_t; /* diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 5c554bac5a9..4e85eb3fc93 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include tcp_main_t tcp_main; @@ -342,6 +343,99 @@ tcp_connection_timers_reset (tcp_connection_t * tc) } } +typedef struct ip4_tcp_hdr +{ + ip4_header_t ip; + tcp_header_t tcp; +} ip4_tcp_hdr_t; + +typedef struct ip6_tcp_hdr +{ + ip6_header_t ip; + tcp_header_t tcp; +} ip6_tcp_hdr_t; + +static void +tcp_connection_select_lb_bucket (tcp_connection_t * tc, const dpo_id_t * dpo, + dpo_id_t * result) +{ + const dpo_id_t *choice; + load_balance_t *lb; + int hash; + + lb = load_balance_get (dpo->dpoi_index); + if (tc->c_is_ip4) + { + ip4_tcp_hdr_t hdr; + memset (&hdr, 0, sizeof (hdr)); + hdr.ip.protocol = IP_PROTOCOL_TCP; + hdr.ip.address_pair.src.as_u32 = tc->c_lcl_ip.ip4.as_u32; + hdr.ip.address_pair.dst.as_u32 = tc->c_rmt_ip.ip4.as_u32; + hdr.tcp.src_port = tc->c_lcl_port; + hdr.tcp.dst_port = tc->c_rmt_port; + hash = ip4_compute_flow_hash (&hdr.ip, lb->lb_hash_config); + } + else + { + ip6_tcp_hdr_t hdr; + memset (&hdr, 0, sizeof (hdr)); + hdr.ip.protocol = IP_PROTOCOL_TCP; + clib_memcpy (&hdr.ip.src_address, &tc->c_lcl_ip.ip6, + sizeof (ip6_address_t)); + clib_memcpy (&hdr.ip.dst_address, &tc->c_rmt_ip.ip6, + sizeof (ip6_address_t)); + hdr.tcp.src_port = tc->c_lcl_port; + hdr.tcp.dst_port = tc->c_rmt_port; + hash = ip6_compute_flow_hash (&hdr.ip, lb->lb_hash_config); + } + choice = load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1); + dpo_copy (result, choice); +} + +fib_node_index_t +tcp_lookup_rmt_in_fib (tcp_connection_t * tc) +{ + fib_prefix_t prefix; + + clib_memcpy (&prefix.fp_addr, &tc->c_rmt_ip, sizeof (prefix.fp_addr)); + prefix.fp_proto = tc->c_is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6; + prefix.fp_len = tc->c_is_ip4 ? 32 : 128; + return fib_table_lookup (0, &prefix); +} + +static int +tcp_connection_stack_on_fib_entry (tcp_connection_t * tc) +{ + dpo_id_t choice = DPO_INVALID; + u32 output_node_index; + fib_entry_t *fe; + + fe = fib_entry_get (tc->c_rmt_fei); + if (fe->fe_lb.dpoi_type != DPO_LOAD_BALANCE) + return -1; + + tcp_connection_select_lb_bucket (tc, &fe->fe_lb, &choice); + + output_node_index = + tc->c_is_ip4 ? tcp4_output_node.index : tcp6_output_node.index; + dpo_stack_from_node (output_node_index, &tc->c_rmt_dpo, &choice); + return 0; +} + +/** Stack tcp connection on peer's fib entry. + * + * This ultimately populates the dpo the connection will use to send packets. + */ +static void +tcp_connection_fib_attach (tcp_connection_t * tc) +{ + tc->c_rmt_fei = tcp_lookup_rmt_in_fib (tc); + + ASSERT (tc->c_rmt_fei != FIB_NODE_INDEX_INVALID); + + tcp_connection_stack_on_fib_entry (tc); +} + /** Initialize tcp connection variables * * Should be called after having received a msg from the peer, i.e., a SYN or @@ -353,6 +447,7 @@ tcp_connection_init_vars (tcp_connection_t * tc) tcp_init_mss (tc); scoreboard_init (&tc->sack_sb); tcp_cc_init (tc); + tcp_connection_fib_attach (tc); } int @@ -361,7 +456,8 @@ tcp_connection_open (ip46_address_t * rmt_addr, u16 rmt_port, u8 is_ip4) tcp_main_t *tm = vnet_get_tcp_main (); tcp_connection_t *tc; fib_prefix_t prefix; - u32 fei, sw_if_index; + fib_node_index_t fei; + u32 sw_if_index; ip46_address_t lcl_addr; u16 lcl_port; @@ -985,8 +1081,6 @@ tcp_main_enable (vlib_main_t * vm) vec_validate (tm->timer_wheels, num_threads - 1); tcp_initialize_timer_wheels (tm); -// vec_validate (tm->delack_connections, num_threads - 1); - /* Initialize clocks per tick for TCP timestamp. Used to compute * monotonically increasing timestamps. */ tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index e83987182fa..12d804b82f6 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -28,6 +28,7 @@ #define THZ (u32) (1/TCP_TICK) /**< TCP tick frequency */ #define TCP_TSTAMP_RESOLUTION TCP_TICK /**< Time stamp resolution */ #define TCP_PAWS_IDLE 24 * 24 * 60 * 60 * THZ /**< 24 days */ +#define TCP_FIB_RECHECK_PERIOD 1 * THZ /**< Recheck every 1s */ #define TCP_MAX_OPTION_SPACE 40 #define TCP_DUPACK_THRESHOLD 3 @@ -256,6 +257,7 @@ typedef struct _tcp_connection u16 mss; /**< Our max seg size that includes options */ u32 limited_transmit; /**< snd_nxt when limited transmit starts */ + u32 last_fib_check; /**< Last time we checked fib route for peer */ } tcp_connection_t; struct _tcp_cc_algorithm @@ -528,6 +530,8 @@ void tcp_cc_init_congestion (tcp_connection_t * tc); int tcp_cc_recover (tcp_connection_t * tc); void tcp_cc_fastrecovery_exit (tcp_connection_t * tc); +fib_node_index_t tcp_lookup_rmt_in_fib (tcp_connection_t * tc); + /* Made public for unit testing only */ void tcp_update_sack_list (tcp_connection_t * tc, u32 start, u32 end); diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 554a981d924..41bebcb34af 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -22,17 +22,14 @@ vlib_node_registration_t tcp6_output_node; typedef enum _tcp_output_nect { TCP_OUTPUT_NEXT_DROP, - TCP_OUTPUT_NEXT_IP_LOOKUP, TCP_OUTPUT_N_NEXT } tcp_output_next_t; #define foreach_tcp4_output_next \ _ (DROP, "error-drop") \ - _ (IP_LOOKUP, "ip4-lookup") #define foreach_tcp6_output_next \ _ (DROP, "error-drop") \ - _ (IP_LOOKUP, "ip6-lookup") static char *tcp_error_strings[] = { #define tcp_error(n,s) s, @@ -1451,7 +1448,7 @@ tcp46_output_inline (vlib_main_t * vm, tcp_connection_t *tc0; tcp_tx_trace_t *t0; tcp_header_t *th0 = 0; - u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_IP_LOOKUP; + u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_DROP; bi0 = from[0]; to_next[0] = bi0; @@ -1530,10 +1527,26 @@ tcp46_output_inline (vlib_main_t * vm, tc0->rto_boff = 0; } - /* set fib index to default and lookup node */ - /* XXX network virtualization (vrf/vni) */ - vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0; + /* Make sure we haven't lost route to our peer */ + if (PREDICT_FALSE (tc0->last_fib_check + < tc0->snd_opts.tsval + TCP_FIB_RECHECK_PERIOD)) + { + if (PREDICT_TRUE + (tc0->c_rmt_fei == tcp_lookup_rmt_in_fib (tc0))) + { + tc0->last_fib_check = tc0->snd_opts.tsval; + } + else + { + clib_warning ("lost connection to peer"); + tcp_connection_reset (tc0); + goto done; + } + } + + /* Use pre-computed dpo to set next node */ + next0 = tc0->c_rmt_dpo.dpoi_next_node; + vnet_buffer (b0)->ip.adj_index[VLIB_TX] = tc0->c_rmt_dpo.dpoi_index; b0->flags |= VNET_BUFFER_LOCALLY_ORIGINATED; done: diff --git a/src/vnet/tcp/tcp_packet.h b/src/vnet/tcp/tcp_packet.h index 4f28cf32708..a6f62ee16d5 100644 --- a/src/vnet/tcp/tcp_packet.h +++ b/src/vnet/tcp/tcp_packet.h @@ -144,12 +144,11 @@ typedef struct { u8 flags; /** Option flags, see above */ - /* Received options */ - u16 mss; /**< Maximum segment size advertised by peer */ - u8 wscale; /**< Window scale advertised by peer */ - u32 tsval; /**< Peer's timestamp value */ + u16 mss; /**< Maximum segment size advertised */ + u8 wscale; /**< Window scale advertised */ + u32 tsval; /**< Timestamp value */ u32 tsecr; /**< Echoed/reflected time stamp */ - sack_block_t *sacks; /**< SACK blocks received */ + sack_block_t *sacks; /**< SACK blocks */ u8 n_sack_blocks; /**< Number of SACKs blocks */ } tcp_options_t; -- cgit 1.2.3-korg