diff options
author | Florin Coras <fcoras@cisco.com> | 2018-06-14 14:55:50 -0700 |
---|---|---|
committer | Dave Barach <openvpp@barachs.net> | 2018-06-19 22:08:34 +0000 |
commit | 8b20bf5ef72a85ed70d7457f33c096f1eef51d0a (patch) | |
tree | 7e485897c8ef6d5d21c45c58e74600250dca92b7 /src/vnet | |
parent | 0c8a3bc95dd79cc856c4210a2234d15153149be0 (diff) |
tcp: optimize tcp output
Change-Id: Idf17a0633a1618b12c22b1119e40c2e9d3192df9
Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet')
-rw-r--r-- | src/vnet/session/session_debug.h | 31 | ||||
-rw-r--r-- | src/vnet/session/session_node.c | 10 | ||||
-rw-r--r-- | src/vnet/tcp/tcp.c | 2 | ||||
-rw-r--r-- | src/vnet/tcp/tcp.h | 3 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_input.c | 2 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_output.c | 320 |
6 files changed, 197 insertions, 171 deletions
diff --git a/src/vnet/session/session_debug.h b/src/vnet/session/session_debug.h index f63509d1ffb..559f0bd6ed1 100644 --- a/src/vnet/session/session_debug.h +++ b/src/vnet/session/session_debug.h @@ -23,6 +23,8 @@ _(DEQ, "dequeue") \ _(DEQ_NODE, "dequeue") \ _(POLL_GAP_TRACK, "poll gap track") \ + _(POLL_DISPATCH_TIME, "dispatch time")\ + _(DISPATCH_END, "dispatch end") \ typedef enum _session_evt_dbg { @@ -31,7 +33,7 @@ typedef enum _session_evt_dbg #undef _ } session_evt_dbg_e; -#define SESSION_DEBUG (0 && TRANSPORT_DEBUG) +#define SESSION_DEBUG 0 * (TRANSPORT_DEBUG > 0) #define SESSION_DEQ_NODE_EVTS (0) #define SESSION_EVT_POLL_DBG (0) @@ -55,6 +57,7 @@ typedef enum _session_evt_dbg } * ed; \ ed = ELOG_DATA (&vlib_global_main.elog_main, _e) +#if SESSION_DEQ_NODE_EVTS && SESSION_DEBUG > 1 #define SESSION_EVT_DEQ_HANDLER(_s, _body) \ { \ ELOG_TYPE_DECLARE (_e) = \ @@ -77,7 +80,6 @@ typedef enum _session_evt_dbg do { _body; } while (0); \ } -#if SESSION_DEQ_NODE_EVTS && SESSION_DEBUG > 1 #define SESSION_EVT_DEQ_NODE_HANDLER(_node_evt) \ { \ ELOG_TYPE_DECLARE (_e) = \ @@ -94,6 +96,8 @@ typedef enum _session_evt_dbg ed->data[0] = _node_evt; \ } #else +#define SESSION_EVT_DEQ_HANDLER(_s, _body) +#define SESSION_EVT_ENQ_HANDLER(_s, _body) #define SESSION_EVT_DEQ_NODE_HANDLER(_node_evt) #endif /* SESSION_DEQ_NODE_EVTS */ @@ -117,11 +121,34 @@ typedef enum _session_evt_dbg _smm->last_event_poll_by_thread[_ti] = now; \ } +#define SESSION_EVT_POLL_DISPATCH_TIME_HANDLER(_smm, _ti) \ +{ \ + f64 diff = vlib_time_now (vlib_get_main ()) - \ + _smm->last_event_poll_by_thread[_ti]; \ + if (diff > 5e-2) \ + { \ + ELOG_TYPE_DECLARE (_e) = \ + { \ + .format = "dispatch time: %d us", \ + .format_args = "i4", \ + }; \ + DEC_SESSION_ED(_e, 1); \ + ed->data[0] = diff *1000000.0; \ + } \ +} + #else #define SESSION_EVT_POLL_GAP(_smm, _my_thread_index) #define SESSION_EVT_POLL_GAP_TRACK_HANDLER(_smm, _my_thread_index) +#define SESSION_EVT_POLL_DISPATCH_TIME_HANDLER(_smm, _ti) #endif /* SESSION_EVT_POLL_DBG */ +#define SESSION_EVT_DISPATCH_END_HANDLER(_smm, _ti) \ +{ \ + SESSION_EVT_DEQ_NODE_HANDLER(1); \ + SESSION_EVT_POLL_DISPATCH_TIME_HANDLER(_smm, _ti); \ +} + #define CONCAT_HELPER(_a, _b) _a##_b #define CC(_a, _b) CONCAT_HELPER(_a, _b) #define SESSION_EVT_DBG(_evt, _args...) CC(_evt, _HANDLER)(_args) diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index 2eea30be439..1902adc29d5 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -249,11 +249,11 @@ session_tx_fill_buffer (vlib_main_t * vm, session_tx_context_t * ctx, session_tx_fifo_chain_tail (vm, ctx, b, n_bufs, peek_data); /* *INDENT-OFF* */ - SESSION_EVT_DBG(SESSION_EVT_DEQ, s, ({ - ed->data[0] = e->event_type; - ed->data[1] = max_dequeue; + SESSION_EVT_DBG(SESSION_EVT_DEQ, ctx->s, ({ + ed->data[0] = FIFO_EVENT_APP_TX; + ed->data[1] = ctx->max_dequeue; ed->data[2] = len_to_deq; - ed->data[3] = left_to_snd; + ed->data[3] = ctx->left_to_snd; })); /* *INDENT-ON* */ } @@ -841,7 +841,7 @@ skip_dequeue: vlib_node_increment_counter (vm, session_queue_node.index, SESSION_QUEUE_ERROR_TX, n_tx_packets); - SESSION_EVT_DBG (SESSION_EVT_DEQ_NODE, 1); + SESSION_EVT_DBG (SESSION_EVT_DISPATCH_END, smm, thread_index); return n_tx_packets; } diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 2a696f19d22..854577b5575 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -1073,7 +1073,7 @@ const static transport_proto_vft_t tcp_proto = { .enable = vnet_tcp_enable_disable, .bind = tcp_session_bind, .unbind = tcp_session_unbind, - .push_header = tcp_push_header, + .push_header = tcp_session_push_header, .get_connection = tcp_session_get_transport, .get_listener = tcp_session_get_listener, .get_half_open = tcp_half_open_session_get_transport, diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 3a31234876e..5673c8cab81 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -668,7 +668,8 @@ tcp_set_time_now (u32 thread_index) return tcp_main.time_now[thread_index]; } -u32 tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b); +u32 tcp_session_push_header (transport_connection_t * tconn, + vlib_buffer_t * b); u32 tcp_prepare_retransmit_segment (tcp_connection_t * tc, u32 offset, diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index b39d051f75f..0c13bbec626 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -2620,8 +2620,10 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, case TCP_STATE_SYN_RCVD: /* Send FIN-ACK notify app and enter CLOSE-WAIT */ tcp_connection_timers_reset (tc0); + tcp_retransmit_timer_set (tc0); tcp_make_fin (tc0, b0); tc0->snd_nxt += 1; + tc0->snd_una_max = tc0->snd_nxt; next0 = tcp_next_output (tc0->c_is_ip4); stream_session_disconnect_notify (&tc0->connection); tc0->state = TCP_STATE_CLOSE_WAIT; diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 46bea7a6f6a..641277b67fa 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -1147,13 +1147,6 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, tc->snd_nxt += data_len; tc->rcv_las = tc->rcv_nxt; - /* TODO this is updated in output as well ... */ - if (seq_gt (tc->snd_nxt, tc->snd_una_max)) - { - tc->snd_una_max = tc->snd_nxt; - tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una); - } - TCP_EVT_DBG (TCP_EVT_PKTIZE, tc); } @@ -1596,6 +1589,8 @@ tcp_timer_persist_handler (u32 index) || tc->rto_boff > 1)); tcp_push_hdr_i (tc, b, tc->state, 0); + tc->snd_una_max = tc->snd_nxt; + tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una); tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); /* Just sent new data, enable retransmit */ @@ -1766,7 +1761,7 @@ tcp_session_has_ooo_data (tcp_connection_t * tc) static void tcp_output_handle_link_local (tcp_connection_t * tc0, vlib_buffer_t * b0, - u32 * next0, u32 * error0) + u16 * next0, u32 * error0) { ip_adjacency_t *adj; adj_index_t ai; @@ -1796,179 +1791,175 @@ tcp_output_handle_link_local (tcp_connection_t * tc0, vlib_buffer_t * b0, vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai; } -always_inline uword -tcp46_output_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * from_frame, int is_ip4) +static void +tcp46_output_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node, + u32 * to_next, u32 n_bufs) { - u32 n_left_from, next_index, *from, *to_next; - u32 my_thread_index = vm->thread_index; + u32 n_trace = vlib_get_trace_count (vm, node); + tcp_connection_t *tc; + tcp_tx_trace_t *t; + vlib_buffer_t *b; + tcp_header_t *th; + int i; - from = vlib_frame_vector_args (from_frame); - n_left_from = from_frame->n_vectors; - next_index = node->cached_next_index; - tcp_set_time_now (my_thread_index); + for (i = 0; i < clib_min (n_trace, n_bufs); i++) + { + b = vlib_get_buffer (vm, to_next[i]); + th = vlib_buffer_get_current (b); + tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index, + vm->thread_index); + t = vlib_add_trace (vm, node, b, sizeof (*t)); + clib_memcpy (&t->tcp_header, th, sizeof (t->tcp_header)); + clib_memcpy (&t->tcp_connection, tc, sizeof (t->tcp_connection)); + } +} - while (n_left_from > 0) +static inline void +tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0, + tcp_connection_t * tc0, u8 is_ip4) +{ + tcp_header_t *th0 = 0; + + th0 = vlib_buffer_get_current (b0); + TCP_EVT_DBG (TCP_EVT_OUTPUT, tc0, th0->flags, b0->current_length); + if (is_ip4) { - u32 n_left_to_next; + vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4, + IP_PROTOCOL_TCP, 1); + b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM; + vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; + th0->checksum = 0; + } + else + { + ip6_header_t *ih0; + ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6, + &tc0->c_rmt_ip6, IP_PROTOCOL_TCP); + b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM; + vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data; + vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; + th0->checksum = 0; + } +} - vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); +static inline void +tcp_output_handle_packet (tcp_connection_t * tc0, vlib_buffer_t * b0, + u32 * error0, u16 * next0, u8 is_ip4) +{ - while (n_left_from > 0 && n_left_to_next > 0) + if (PREDICT_FALSE (tc0->state == TCP_STATE_CLOSED)) + { + *error0 = TCP_ERROR_INVALID_CONNECTION; + *next0 = TCP_OUTPUT_NEXT_DROP; + return; + } + + vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index; + vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; + + if (!is_ip4) + { + if (PREDICT_FALSE (ip6_address_is_link_local_unicast (&tc0->c_rmt_ip6))) + tcp_output_handle_link_local (tc0, b0, next0, error0); + } + + /* Filter out DUPACKs if there are no OOO segments left */ + if (PREDICT_FALSE (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK)) + { + /* N.B. Should not filter burst of dupacks. Two issues: + * 1) dupacks open cwnd on remote peer when congested + * 2) acks leaving should have the latest rcv_wnd since the + * burst may have eaten up all of it, so only the old ones + * could be filtered. + */ + if (!tcp_session_has_ooo_data (tc0)) { - u32 bi0; - vlib_buffer_t *b0; - tcp_connection_t *tc0; - tcp_tx_trace_t *t0; - tcp_header_t *th0 = 0; - u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_IP_LOOKUP; + *error0 = TCP_ERROR_FILTERED_DUPACKS; + *next0 = TCP_OUTPUT_NEXT_DROP; + return; + } + } - if (n_left_from > 1) - { - vlib_buffer_t *pb; - pb = vlib_get_buffer (vm, from[1]); - vlib_prefetch_buffer_header (pb, STORE); - CLIB_PREFETCH (pb->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); - } + /* Stop DELACK timer and fix flags */ + tc0->flags &= ~(TCP_CONN_SNDACK); + if (!TCP_ALWAYS_ACK) + tcp_timer_reset (tc0, TCP_TIMER_DELACK); +} - bi0 = from[0]; - to_next[0] = bi0; - from += 1; - to_next += 1; - n_left_from -= 1; - n_left_to_next -= 1; +always_inline uword +tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame, int is_ip4) +{ + u32 n_left_from, *from, thread_index = vm->thread_index; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u16 nexts[VLIB_FRAME_SIZE], *next; - b0 = vlib_get_buffer (vm, bi0); - tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index, - my_thread_index); - if (PREDICT_FALSE (tc0 == 0 || tc0->state == TCP_STATE_CLOSED)) - { - error0 = TCP_ERROR_INVALID_CONNECTION; - next0 = TCP_OUTPUT_NEXT_DROP; - goto done; - } + from = vlib_frame_vector_args (frame); + n_left_from = frame->n_vectors; + tcp_set_time_now (thread_index); - th0 = vlib_buffer_get_current (b0); - TCP_EVT_DBG (TCP_EVT_OUTPUT, tc0, th0->flags, b0->current_length); - vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index; - vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; + if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE)) + tcp46_output_trace_frame (vm, node, from, n_left_from); - if (is_ip4) - { - vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4, - IP_PROTOCOL_TCP, 1); - b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM; - vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; - th0->checksum = 0; - } - else - { - ip6_header_t *ih0; - ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6, - &tc0->c_rmt_ip6, IP_PROTOCOL_TCP); - b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM; - vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data; - vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; - th0->checksum = 0; - - if (PREDICT_FALSE - (ip6_address_is_link_local_unicast (&tc0->c_rmt_ip6))) - tcp_output_handle_link_local (tc0, b0, &next0, &error0); - } + vlib_get_buffers (vm, from, bufs, n_left_from); + b = bufs; + next = nexts; - /* Filter out DUPACKs if there are no OOO segments left */ - if (PREDICT_FALSE - (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK)) - { - /* N.B. Should not filter burst of dupacks. Two issues: - * 1) dupacks open cwnd on remote peer when congested - * 2) acks leaving should have the latest rcv_wnd since the - * burst may have eaten up all of it, so only the old ones - * could be filtered. - */ - if (!tcp_session_has_ooo_data (tc0)) - { - error0 = TCP_ERROR_FILTERED_DUPACKS; - next0 = TCP_OUTPUT_NEXT_DROP; - goto done; - } - } + while (n_left_from >= 4) + { + u32 error0 = TCP_ERROR_PKTS_SENT, error1 = TCP_ERROR_PKTS_SENT; + tcp_connection_t *tc0, *tc1; - /* Stop DELACK timer and fix flags */ - tc0->flags &= ~(TCP_CONN_SNDACK); - tcp_timer_reset (tc0, TCP_TIMER_DELACK); + { + vlib_prefetch_buffer_header (b[2], STORE); + CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); - /* If not retransmitting - * 1) update snd_una_max (SYN, SYNACK, FIN) - * 2) If we're not tracking an ACK, start tracking */ - if (seq_lt (tc0->snd_una_max, tc0->snd_nxt)) - { - tc0->snd_una_max = tc0->snd_nxt; - if (tc0->rtt_ts == 0) - { - tc0->rtt_ts = tcp_time_now (); - tc0->rtt_seq = tc0->snd_nxt; - } - } + vlib_prefetch_buffer_header (b[3], STORE); + CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); + } - /* Set the retransmit timer if not set already and not - * doing a pure ACK */ - if (!tcp_timer_is_active (tc0, TCP_TIMER_RETRANSMIT) - && tc0->snd_nxt != tc0->snd_una) - { - tcp_retransmit_timer_set (tc0); - tc0->rto_boff = 0; - } + next[0] = next[1] = TCP_OUTPUT_NEXT_IP_LOOKUP; -#if 0 - /* Make sure we haven't lost route to our peer */ - if (PREDICT_FALSE (tc0->last_fib_check - < tc0->snd_opts.tsval + TCP_FIB_RECHECK_PERIOD)) - { - if (PREDICT_TRUE - (tc0->c_rmt_fei == tcp_lookup_rmt_in_fib (tc0))) - { - tc0->last_fib_check = tc0->snd_opts.tsval; - } - else - { - clib_warning ("lost connection to peer"); - tcp_connection_reset (tc0); - goto done; - } - } + tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index, + thread_index); + tc1 = tcp_connection_get (vnet_buffer (b[1])->tcp.connection_index, + thread_index); - /* Use pre-computed dpo to set next node */ - next0 = tc0->c_rmt_dpo.dpoi_next_node; - vnet_buffer (b0)->ip.adj_index[VLIB_TX] = tc0->c_rmt_dpo.dpoi_index; -#endif + tcp_output_push_ip (vm, b[0], tc0, is_ip4); + tcp_output_push_ip (vm, b[1], tc1, is_ip4); - done: - b0->error = node->errors[error0]; - if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) - { - t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); - if (th0) - { - clib_memcpy (&t0->tcp_header, th0, sizeof (t0->tcp_header)); - } - else - { - memset (&t0->tcp_header, 0, sizeof (t0->tcp_header)); - } - clib_memcpy (&t0->tcp_connection, tc0, - sizeof (t0->tcp_connection)); - } + tcp_output_handle_packet (tc0, b[0], &error0, &next[0], is_ip4); + tcp_output_handle_packet (tc1, b[1], &error1, &next[1], is_ip4); - vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, - n_left_to_next, bi0, next0); + b += 2; + next += 2; + n_left_from -= 2; + } + while (n_left_from > 0) + { + u32 error0 = TCP_ERROR_PKTS_SENT; + tcp_connection_t *tc0; + + if (n_left_from > 1) + { + vlib_prefetch_buffer_header (b[0], STORE); + CLIB_PREFETCH (b[0]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE); } - vlib_put_next_frame (vm, node, next_index, n_left_to_next); + next[0] = TCP_OUTPUT_NEXT_IP_LOOKUP; + tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index, + thread_index); + + tcp_output_push_ip (vm, b[0], tc0, is_ip4); + tcp_output_handle_packet (tc0, b[0], &error0, &next[0], is_ip4); + + b += 1; + next += 1; + n_left_from -= 1; } - return from_frame->n_vectors; + vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors); + return frame->n_vectors; } static uword @@ -2029,19 +2020,24 @@ VLIB_REGISTER_NODE (tcp6_output_node) = VLIB_NODE_FUNCTION_MULTIARCH (tcp6_output_node, tcp6_output); u32 -tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b) +tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b) { - tcp_connection_t *tc; - - tc = (tcp_connection_t *) tconn; + tcp_connection_t *tc = (tcp_connection_t *) tconn; tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, 0); + tc->snd_una_max = tc->snd_nxt; ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd)); - + tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una); + /* If not tracking an ACK, start tracking */ if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc)) { tc->rtt_ts = tcp_time_now (); tc->rtt_seq = tc->snd_nxt; } + if (PREDICT_FALSE (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT))) + { + tcp_retransmit_timer_set (tc); + tc->rto_boff = 0; + } tcp_trajectory_add_start (b, 3); return 0; } |