summaryrefslogtreecommitdiffstats
path: root/src/vnet/tcp/tcp_output.c
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2018-06-14 14:55:50 -0700
committerDave Barach <openvpp@barachs.net>2018-06-19 22:08:34 +0000
commit8b20bf5ef72a85ed70d7457f33c096f1eef51d0a (patch)
tree7e485897c8ef6d5d21c45c58e74600250dca92b7 /src/vnet/tcp/tcp_output.c
parent0c8a3bc95dd79cc856c4210a2234d15153149be0 (diff)
tcp: optimize tcp output
Change-Id: Idf17a0633a1618b12c22b1119e40c2e9d3192df9 Signed-off-by: Florin Coras <fcoras@cisco.com>
Diffstat (limited to 'src/vnet/tcp/tcp_output.c')
-rw-r--r--src/vnet/tcp/tcp_output.c320
1 files changed, 158 insertions, 162 deletions
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 46bea7a6f6a..641277b67fa 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -1147,13 +1147,6 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b,
tc->snd_nxt += data_len;
tc->rcv_las = tc->rcv_nxt;
- /* TODO this is updated in output as well ... */
- if (seq_gt (tc->snd_nxt, tc->snd_una_max))
- {
- tc->snd_una_max = tc->snd_nxt;
- tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
- }
-
TCP_EVT_DBG (TCP_EVT_PKTIZE, tc);
}
@@ -1596,6 +1589,8 @@ tcp_timer_persist_handler (u32 index)
|| tc->rto_boff > 1));
tcp_push_hdr_i (tc, b, tc->state, 0);
+ tc->snd_una_max = tc->snd_nxt;
+ tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
/* Just sent new data, enable retransmit */
@@ -1766,7 +1761,7 @@ tcp_session_has_ooo_data (tcp_connection_t * tc)
static void
tcp_output_handle_link_local (tcp_connection_t * tc0, vlib_buffer_t * b0,
- u32 * next0, u32 * error0)
+ u16 * next0, u32 * error0)
{
ip_adjacency_t *adj;
adj_index_t ai;
@@ -1796,179 +1791,175 @@ tcp_output_handle_link_local (tcp_connection_t * tc0, vlib_buffer_t * b0,
vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
}
-always_inline uword
-tcp46_output_inline (vlib_main_t * vm,
- vlib_node_runtime_t * node,
- vlib_frame_t * from_frame, int is_ip4)
+static void
+tcp46_output_trace_frame (vlib_main_t * vm, vlib_node_runtime_t * node,
+ u32 * to_next, u32 n_bufs)
{
- u32 n_left_from, next_index, *from, *to_next;
- u32 my_thread_index = vm->thread_index;
+ u32 n_trace = vlib_get_trace_count (vm, node);
+ tcp_connection_t *tc;
+ tcp_tx_trace_t *t;
+ vlib_buffer_t *b;
+ tcp_header_t *th;
+ int i;
- from = vlib_frame_vector_args (from_frame);
- n_left_from = from_frame->n_vectors;
- next_index = node->cached_next_index;
- tcp_set_time_now (my_thread_index);
+ for (i = 0; i < clib_min (n_trace, n_bufs); i++)
+ {
+ b = vlib_get_buffer (vm, to_next[i]);
+ th = vlib_buffer_get_current (b);
+ tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index,
+ vm->thread_index);
+ t = vlib_add_trace (vm, node, b, sizeof (*t));
+ clib_memcpy (&t->tcp_header, th, sizeof (t->tcp_header));
+ clib_memcpy (&t->tcp_connection, tc, sizeof (t->tcp_connection));
+ }
+}
- while (n_left_from > 0)
+static inline void
+tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0,
+ tcp_connection_t * tc0, u8 is_ip4)
+{
+ tcp_header_t *th0 = 0;
+
+ th0 = vlib_buffer_get_current (b0);
+ TCP_EVT_DBG (TCP_EVT_OUTPUT, tc0, th0->flags, b0->current_length);
+ if (is_ip4)
{
- u32 n_left_to_next;
+ vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
+ IP_PROTOCOL_TCP, 1);
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
+ vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
+ th0->checksum = 0;
+ }
+ else
+ {
+ ip6_header_t *ih0;
+ ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6,
+ &tc0->c_rmt_ip6, IP_PROTOCOL_TCP);
+ b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
+ vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data;
+ vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
+ th0->checksum = 0;
+ }
+}
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+static inline void
+tcp_output_handle_packet (tcp_connection_t * tc0, vlib_buffer_t * b0,
+ u32 * error0, u16 * next0, u8 is_ip4)
+{
- while (n_left_from > 0 && n_left_to_next > 0)
+ if (PREDICT_FALSE (tc0->state == TCP_STATE_CLOSED))
+ {
+ *error0 = TCP_ERROR_INVALID_CONNECTION;
+ *next0 = TCP_OUTPUT_NEXT_DROP;
+ return;
+ }
+
+ vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index;
+ vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+
+ if (!is_ip4)
+ {
+ if (PREDICT_FALSE (ip6_address_is_link_local_unicast (&tc0->c_rmt_ip6)))
+ tcp_output_handle_link_local (tc0, b0, next0, error0);
+ }
+
+ /* Filter out DUPACKs if there are no OOO segments left */
+ if (PREDICT_FALSE (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK))
+ {
+ /* N.B. Should not filter burst of dupacks. Two issues:
+ * 1) dupacks open cwnd on remote peer when congested
+ * 2) acks leaving should have the latest rcv_wnd since the
+ * burst may have eaten up all of it, so only the old ones
+ * could be filtered.
+ */
+ if (!tcp_session_has_ooo_data (tc0))
{
- u32 bi0;
- vlib_buffer_t *b0;
- tcp_connection_t *tc0;
- tcp_tx_trace_t *t0;
- tcp_header_t *th0 = 0;
- u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
+ *error0 = TCP_ERROR_FILTERED_DUPACKS;
+ *next0 = TCP_OUTPUT_NEXT_DROP;
+ return;
+ }
+ }
- if (n_left_from > 1)
- {
- vlib_buffer_t *pb;
- pb = vlib_get_buffer (vm, from[1]);
- vlib_prefetch_buffer_header (pb, STORE);
- CLIB_PREFETCH (pb->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
- }
+ /* Stop DELACK timer and fix flags */
+ tc0->flags &= ~(TCP_CONN_SNDACK);
+ if (!TCP_ALWAYS_ACK)
+ tcp_timer_reset (tc0, TCP_TIMER_DELACK);
+}
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
+always_inline uword
+tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, int is_ip4)
+{
+ u32 n_left_from, *from, thread_index = vm->thread_index;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u16 nexts[VLIB_FRAME_SIZE], *next;
- b0 = vlib_get_buffer (vm, bi0);
- tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
- my_thread_index);
- if (PREDICT_FALSE (tc0 == 0 || tc0->state == TCP_STATE_CLOSED))
- {
- error0 = TCP_ERROR_INVALID_CONNECTION;
- next0 = TCP_OUTPUT_NEXT_DROP;
- goto done;
- }
+ from = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ tcp_set_time_now (thread_index);
- th0 = vlib_buffer_get_current (b0);
- TCP_EVT_DBG (TCP_EVT_OUTPUT, tc0, th0->flags, b0->current_length);
- vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index;
- vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ tcp46_output_trace_frame (vm, node, from, n_left_from);
- if (is_ip4)
- {
- vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
- IP_PROTOCOL_TCP, 1);
- b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
- vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
- th0->checksum = 0;
- }
- else
- {
- ip6_header_t *ih0;
- ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6,
- &tc0->c_rmt_ip6, IP_PROTOCOL_TCP);
- b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
- vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data;
- vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
- th0->checksum = 0;
-
- if (PREDICT_FALSE
- (ip6_address_is_link_local_unicast (&tc0->c_rmt_ip6)))
- tcp_output_handle_link_local (tc0, b0, &next0, &error0);
- }
+ vlib_get_buffers (vm, from, bufs, n_left_from);
+ b = bufs;
+ next = nexts;
- /* Filter out DUPACKs if there are no OOO segments left */
- if (PREDICT_FALSE
- (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK))
- {
- /* N.B. Should not filter burst of dupacks. Two issues:
- * 1) dupacks open cwnd on remote peer when congested
- * 2) acks leaving should have the latest rcv_wnd since the
- * burst may have eaten up all of it, so only the old ones
- * could be filtered.
- */
- if (!tcp_session_has_ooo_data (tc0))
- {
- error0 = TCP_ERROR_FILTERED_DUPACKS;
- next0 = TCP_OUTPUT_NEXT_DROP;
- goto done;
- }
- }
+ while (n_left_from >= 4)
+ {
+ u32 error0 = TCP_ERROR_PKTS_SENT, error1 = TCP_ERROR_PKTS_SENT;
+ tcp_connection_t *tc0, *tc1;
- /* Stop DELACK timer and fix flags */
- tc0->flags &= ~(TCP_CONN_SNDACK);
- tcp_timer_reset (tc0, TCP_TIMER_DELACK);
+ {
+ vlib_prefetch_buffer_header (b[2], STORE);
+ CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
- /* If not retransmitting
- * 1) update snd_una_max (SYN, SYNACK, FIN)
- * 2) If we're not tracking an ACK, start tracking */
- if (seq_lt (tc0->snd_una_max, tc0->snd_nxt))
- {
- tc0->snd_una_max = tc0->snd_nxt;
- if (tc0->rtt_ts == 0)
- {
- tc0->rtt_ts = tcp_time_now ();
- tc0->rtt_seq = tc0->snd_nxt;
- }
- }
+ vlib_prefetch_buffer_header (b[3], STORE);
+ CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+ }
- /* Set the retransmit timer if not set already and not
- * doing a pure ACK */
- if (!tcp_timer_is_active (tc0, TCP_TIMER_RETRANSMIT)
- && tc0->snd_nxt != tc0->snd_una)
- {
- tcp_retransmit_timer_set (tc0);
- tc0->rto_boff = 0;
- }
+ next[0] = next[1] = TCP_OUTPUT_NEXT_IP_LOOKUP;
-#if 0
- /* Make sure we haven't lost route to our peer */
- if (PREDICT_FALSE (tc0->last_fib_check
- < tc0->snd_opts.tsval + TCP_FIB_RECHECK_PERIOD))
- {
- if (PREDICT_TRUE
- (tc0->c_rmt_fei == tcp_lookup_rmt_in_fib (tc0)))
- {
- tc0->last_fib_check = tc0->snd_opts.tsval;
- }
- else
- {
- clib_warning ("lost connection to peer");
- tcp_connection_reset (tc0);
- goto done;
- }
- }
+ tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
+ thread_index);
+ tc1 = tcp_connection_get (vnet_buffer (b[1])->tcp.connection_index,
+ thread_index);
- /* Use pre-computed dpo to set next node */
- next0 = tc0->c_rmt_dpo.dpoi_next_node;
- vnet_buffer (b0)->ip.adj_index[VLIB_TX] = tc0->c_rmt_dpo.dpoi_index;
-#endif
+ tcp_output_push_ip (vm, b[0], tc0, is_ip4);
+ tcp_output_push_ip (vm, b[1], tc1, is_ip4);
- done:
- b0->error = node->errors[error0];
- if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
- {
- t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
- if (th0)
- {
- clib_memcpy (&t0->tcp_header, th0, sizeof (t0->tcp_header));
- }
- else
- {
- memset (&t0->tcp_header, 0, sizeof (t0->tcp_header));
- }
- clib_memcpy (&t0->tcp_connection, tc0,
- sizeof (t0->tcp_connection));
- }
+ tcp_output_handle_packet (tc0, b[0], &error0, &next[0], is_ip4);
+ tcp_output_handle_packet (tc1, b[1], &error1, &next[1], is_ip4);
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
- n_left_to_next, bi0, next0);
+ b += 2;
+ next += 2;
+ n_left_from -= 2;
+ }
+ while (n_left_from > 0)
+ {
+ u32 error0 = TCP_ERROR_PKTS_SENT;
+ tcp_connection_t *tc0;
+
+ if (n_left_from > 1)
+ {
+ vlib_prefetch_buffer_header (b[0], STORE);
+ CLIB_PREFETCH (b[0]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
}
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ next[0] = TCP_OUTPUT_NEXT_IP_LOOKUP;
+ tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
+ thread_index);
+
+ tcp_output_push_ip (vm, b[0], tc0, is_ip4);
+ tcp_output_handle_packet (tc0, b[0], &error0, &next[0], is_ip4);
+
+ b += 1;
+ next += 1;
+ n_left_from -= 1;
}
- return from_frame->n_vectors;
+ vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
+ return frame->n_vectors;
}
static uword
@@ -2029,19 +2020,24 @@ VLIB_REGISTER_NODE (tcp6_output_node) =
VLIB_NODE_FUNCTION_MULTIARCH (tcp6_output_node, tcp6_output);
u32
-tcp_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
+tcp_session_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
{
- tcp_connection_t *tc;
-
- tc = (tcp_connection_t *) tconn;
+ tcp_connection_t *tc = (tcp_connection_t *) tconn;
tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED, 0);
+ tc->snd_una_max = tc->snd_nxt;
ASSERT (seq_leq (tc->snd_una_max, tc->snd_una + tc->snd_wnd));
-
+ tcp_validate_txf_size (tc, tc->snd_una_max - tc->snd_una);
+ /* If not tracking an ACK, start tracking */
if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
{
tc->rtt_ts = tcp_time_now ();
tc->rtt_seq = tc->snd_nxt;
}
+ if (PREDICT_FALSE (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)))
+ {
+ tcp_retransmit_timer_set (tc);
+ tc->rto_boff = 0;
+ }
tcp_trajectory_add_start (b, 3);
return 0;
}