From b26743d093141a2aef19bdf8a7fe06dcaa81329a Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Tue, 26 Jun 2018 09:31:04 -0700 Subject: tcp/session: tx optimizations - cache and reuse tcp options and rcv_wnd for session layer tx bursts - avoid reading/setting total_length_not_including_first_buffer. It's part of a buffer's second cache line so it comes at a "cost". Change-Id: Id18219c2f7e07cf4c63ee74f9cdd9e5918904036 Signed-off-by: Florin Coras --- src/vnet/tcp/tcp.c | 2 +- src/vnet/tcp/tcp.h | 5 +++- src/vnet/tcp/tcp_output.c | 67 +++++++++++++++++++++++++++++++++-------------- 3 files changed, 53 insertions(+), 21 deletions(-) (limited to 'src/vnet/tcp') diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index cca9f1c5c90..45eaf016b1e 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -984,7 +984,7 @@ tcp_session_send_mss (transport_connection_t * trans_conn) /* Ensure snd_mss does accurately reflect the amount of data we can push * in a segment. This also makes sure that options are updated according to * the current state of the connection. */ - tcp_update_snd_mss (tc); + tcp_update_burst_snd_vars (tc); return tc->snd_mss; } diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 176de2c10ac..e4168c4b701 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -379,6 +379,9 @@ typedef struct tcp_worker_ctx_ output nodes */ vlib_frame_t *ip_lookup_tx_frames[2]; /**< tx frames for ip 4/6 lookup nodes */ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline1); + u8 cached_opts[40]; /**< cached 'on the wire' + options for bursts */ } tcp_worker_ctx_t; typedef struct _tcp_main @@ -527,7 +530,7 @@ void tcp_send_reset (tcp_connection_t * tc); void tcp_send_syn (tcp_connection_t * tc); void tcp_send_fin (tcp_connection_t * tc); void tcp_init_mss (tcp_connection_t * tc); -void tcp_update_snd_mss (tcp_connection_t * tc); +void tcp_update_burst_snd_vars (tcp_connection_t * tc); void tcp_update_rto (tcp_connection_t * tc); void tcp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index, u8 is_ip4); void tcp_flush_frames_to_output (u8 thread_index); diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 13eac11105c..4b7915828df 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -403,20 +403,33 @@ tcp_make_options (tcp_connection_t * tc, tcp_options_t * opts, } /** - * Update snd_mss to reflect the effective segment size that we can send - * by taking into account all TCP options, including SACKs + * Update burst send vars + * + * - Updates snd_mss to reflect the effective segment size that we can send + * by taking into account all TCP options, including SACKs. + * - Cache 'on the wire' options for reuse + * - Updates receive window which can be reused for a burst. + * + * This should *only* be called when doing bursts */ void -tcp_update_snd_mss (tcp_connection_t * tc) +tcp_update_burst_snd_vars (tcp_connection_t * tc) { + tcp_main_t *tm = &tcp_main; + /* Compute options to be used for connection. These may be reused when * sending data or to compute the effective mss (snd_mss) */ - tc->snd_opts_len = - tcp_make_options (tc, &tc->snd_opts, TCP_STATE_ESTABLISHED); + tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, + TCP_STATE_ESTABLISHED); /* XXX check if MTU has been updated */ tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len; ASSERT (tc->snd_mss > 0); + + tcp_options_write (tm->wrk_ctx[tc->c_thread_index].cached_opts, + &tc->snd_opts); + + tcp_update_rcv_wnd (tc); } void @@ -1116,32 +1129,47 @@ tcp_make_state_flags (tcp_connection_t * tc, tcp_state_t next_state) */ always_inline void tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, - tcp_state_t next_state, u8 compute_opts) + tcp_state_t next_state, u8 compute_opts, u8 maybe_burst) { u32 advertise_wnd, data_len; - u8 tcp_hdr_opts_len, opts_write_len, flags; + u8 tcp_hdr_opts_len, flags; + tcp_main_t *tm = &tcp_main; tcp_header_t *th; - data_len = b->current_length + b->total_length_not_including_first_buffer; - ASSERT (!b->total_length_not_including_first_buffer - || (b->flags & VLIB_BUFFER_NEXT_PRESENT)); + data_len = b->current_length; + if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT)) + data_len += b->total_length_not_including_first_buffer; + vnet_buffer (b)->tcp.flags = 0; + vnet_buffer (b)->tcp.connection_index = tc->c_c_index; if (compute_opts) tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state); tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t); - advertise_wnd = tcp_window_to_advertise (tc, next_state); + + if (maybe_burst) + advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale; + else + advertise_wnd = tcp_window_to_advertise (tc, next_state); + flags = tcp_make_state_flags (tc, next_state); - /* Push header and options */ th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt, tc->rcv_nxt, tcp_hdr_opts_len, flags, advertise_wnd); - opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts); - ASSERT (opts_write_len == tc->snd_opts_len); - vnet_buffer (b)->tcp.connection_index = tc->c_c_index; + if (maybe_burst) + { + clib_memcpy ((u8 *) (th + 1), + tm->wrk_ctx[tc->c_thread_index].cached_opts, + tc->snd_opts_len); + } + else + { + u8 len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts); + ASSERT (len == tc->snd_opts_len); + } /* * Update connection variables @@ -1156,7 +1184,8 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 tcp_push_header (tcp_connection_t * tc, vlib_buffer_t * b) { - tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, 0); + tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, /* compute opts */ 0, + /* burst */ 1); tc->snd_una_max = tc->snd_nxt; ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd)); tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una); @@ -1276,7 +1305,7 @@ tcp_prepare_retransmit_segment (tcp_connection_t * tc, u32 offset, max_deq_bytes); ASSERT (n_bytes == max_deq_bytes); b[0]->current_length = n_bytes; - tcp_push_hdr_i (tc, *b, tc->state, 0); + tcp_push_hdr_i (tc, *b, tc->state, /* compute opts */ 0, /* burst */ 0); } /* Split mss into multiple buffers */ else @@ -1339,7 +1368,7 @@ tcp_prepare_retransmit_segment (tcp_connection_t * tc, u32 offset, b[0]->total_length_not_including_first_buffer += n_peeked; } - tcp_push_hdr_i (tc, *b, tc->state, 0); + tcp_push_hdr_i (tc, *b, tc->state, /* compute opts */ 0, /* burst */ 0); } ASSERT (n_bytes > 0); @@ -1613,7 +1642,7 @@ tcp_timer_persist_handler (u32 index) || tc->snd_nxt == tc->snd_una_max || tc->rto_boff > 1)); - tcp_push_hdr_i (tc, b, tc->state, 0); + tcp_push_hdr_i (tc, b, tc->state, /* compute opts */ 0, /* burst */ 0); tc->snd_una_max = tc->snd_nxt; tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una); tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); -- cgit 1.2.3-korg