summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/vnet/session/session_node.c3
-rw-r--r--src/vnet/session/transport.c40
-rw-r--r--src/vnet/session/transport_interface.h14
-rw-r--r--src/vnet/tcp/tcp.c16
-rw-r--r--src/vnet/tcp/tcp.h7
-rwxr-xr-xsrc/vnet/tcp/tcp_debug.h18
-rw-r--r--src/vnet/tcp/tcp_input.c87
-rw-r--r--src/vnet/tcp/tcp_output.c10
8 files changed, 129 insertions, 66 deletions
diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c
index 6ef461a5f34..34131723665 100644
--- a/src/vnet/session/session_node.c
+++ b/src/vnet/session/session_node.c
@@ -570,7 +570,8 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node,
ctx->tc = session_tx_get_transport (ctx, peek_data);
ctx->snd_mss = ctx->transport_vft->send_mss (ctx->tc);
ctx->snd_space =
- transport_connection_max_tx_burst (ctx->tc, vm->clib_time.last_cpu_time);
+ transport_connection_snd_space (ctx->tc, vm->clib_time.last_cpu_time,
+ ctx->snd_mss);
if (ctx->snd_space == 0 || ctx->snd_mss == 0)
{
vec_add1 (wrk->pending_event_vector, *e);
diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c
index fefedcae8f2..0dd9ccd3213 100644
--- a/src/vnet/session/transport.c
+++ b/src/vnet/session/transport.c
@@ -497,9 +497,14 @@ static inline u32
spacer_max_burst (spacer_t * pacer, u64 norm_time_now)
{
u64 n_periods = norm_time_now - pacer->last_update;
+ u64 inc;
+
+ if (n_periods > 0 && (inc = n_periods * pacer->tokens_per_period) > 10)
+ {
+ pacer->last_update = norm_time_now;
+ pacer->bucket += inc;
+ }
- pacer->last_update = norm_time_now;
- pacer->bucket += n_periods * pacer->tokens_per_period;
return clib_min (pacer->bucket, pacer->max_burst_size);
}
@@ -525,17 +530,22 @@ spacer_set_pace_rate (spacer_t * pacer, u64 rate_bytes_per_sec)
void
transport_connection_tx_pacer_init (transport_connection_t * tc,
- u32 rate_bytes_per_sec, u32 burst_bytes)
+ u32 rate_bytes_per_sec,
+ u32 initial_bucket)
{
vlib_main_t *vm = vlib_get_main ();
u64 time_now = vm->clib_time.last_cpu_time;
spacer_t *pacer = &tc->pacer;
+ f64 dispatch_period;
+ u32 burst_size;
tc->flags |= TRANSPORT_CONNECTION_F_IS_TX_PACED;
- spacer_update_max_burst_size (&tc->pacer, burst_bytes);
+ dispatch_period = transport_dispatch_period (tc->thread_index);
+ burst_size = rate_bytes_per_sec * dispatch_period;
+ spacer_update_max_burst_size (&tc->pacer, burst_size);
spacer_set_pace_rate (&tc->pacer, rate_bytes_per_sec);
pacer->last_update = time_now >> SPACER_CPU_TICKS_PER_PERIOD_SHIFT;
- pacer->bucket = burst_bytes;
+ pacer->bucket = initial_bucket;
}
void
@@ -550,17 +560,24 @@ transport_connection_tx_pacer_update (transport_connection_t * tc,
}
u32
-transport_connection_max_tx_burst (transport_connection_t * tc, u64 time_now)
+transport_connection_tx_pacer_burst (transport_connection_t * tc,
+ u64 time_now)
+{
+ time_now >>= SPACER_CPU_TICKS_PER_PERIOD_SHIFT;
+ return spacer_max_burst (&tc->pacer, time_now);
+}
+
+u32
+transport_connection_snd_space (transport_connection_t * tc, u64 time_now,
+ u16 mss)
{
u32 snd_space, max_paced_burst;
- u32 mss;
snd_space = tp_vfts[tc->proto].send_space (tc);
if (transport_connection_is_tx_paced (tc))
{
time_now >>= SPACER_CPU_TICKS_PER_PERIOD_SHIFT;
max_paced_burst = spacer_max_burst (&tc->pacer, time_now);
- mss = tp_vfts[tc->proto].send_mss (tc);
max_paced_burst = (max_paced_burst < mss) ? 0 : max_paced_burst;
snd_space = clib_min (snd_space, max_paced_burst);
snd_space = snd_space - snd_space % mss;
@@ -577,6 +594,13 @@ transport_connection_update_tx_stats (transport_connection_t * tc, u32 bytes)
}
void
+transport_connection_tx_pacer_update_bytes (transport_connection_t * tc,
+ u32 bytes)
+{
+ spacer_update_bucket (&tc->pacer, bytes);
+}
+
+void
transport_init_tx_pacers_period (void)
{
f64 cpu_freq = os_cpu_clock_frequency ();
diff --git a/src/vnet/session/transport_interface.h b/src/vnet/session/transport_interface.h
index ce3bb7fab7c..b7aa4b7fa3c 100644
--- a/src/vnet/session/transport_interface.h
+++ b/src/vnet/session/transport_interface.h
@@ -111,7 +111,7 @@ void transport_enable_disable (vlib_main_t * vm, u8 is_en);
*/
void transport_connection_tx_pacer_init (transport_connection_t * tc,
u32 rate_bytes_per_sec,
- u32 burst_bytes);
+ u32 initial_bucket);
/**
* Update tx pacer pacing rate
@@ -127,9 +127,13 @@ void transport_connection_tx_pacer_update (transport_connection_t * tc,
*
* @param tc transport connection
* @param time_now current cpu time as returned by @ref clib_cpu_time_now
+ * @param mss transport's mss
*/
-u32 transport_connection_max_tx_burst (transport_connection_t * tc,
- u64 time_now);
+u32 transport_connection_snd_space (transport_connection_t * tc,
+ u64 time_now, u16 mss);
+
+u32 transport_connection_tx_pacer_burst (transport_connection_t * tc,
+ u64 time_now);
/**
* Initialize period for tx pacers
@@ -163,6 +167,10 @@ u8 *format_transport_pacer (u8 * s, va_list * args);
void transport_connection_update_tx_stats (transport_connection_t * tc,
u32 bytes);
+void
+transport_connection_tx_pacer_update_bytes (transport_connection_t * tc,
+ u32 bytes);
+
#endif /* SRC_VNET_SESSION_TRANSPORT_INTERFACE_H_ */
/*
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 8c0c5da07a1..bdf9c7af608 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -558,10 +558,11 @@ tcp_init_snd_vars (tcp_connection_t * tc)
void
tcp_enable_pacing (tcp_connection_t * tc)
{
- u32 max_burst, byte_rate;
- max_burst = 16 * tc->snd_mss;
+ u32 initial_bucket, byte_rate;
+ initial_bucket = 16 * tc->snd_mss;
byte_rate = 2 << 16;
- transport_connection_tx_pacer_init (&tc->connection, byte_rate, max_burst);
+ transport_connection_tx_pacer_init (&tc->connection, byte_rate,
+ initial_bucket);
tc->mrtt_us = (u32) ~ 0;
}
@@ -1318,6 +1319,7 @@ tcp_main_enable (vlib_main_t * vm)
num_threads = 1 /* main thread */ + vtm->n_threads;
vec_validate (tm->connections, num_threads - 1);
+ vec_validate (tm->wrk_ctx, num_threads - 1);
/*
* Preallocate connections. Assume that thread 0 won't
@@ -1339,6 +1341,13 @@ tcp_main_enable (vlib_main_t * vm)
if (preallocated_connections_per_thread)
pool_init_fixed (tm->connections[thread],
preallocated_connections_per_thread);
+ vec_validate (tm->wrk_ctx[thread].pending_fast_rxt, 0);
+ vec_validate (tm->wrk_ctx[thread].ongoing_fast_rxt, 0);
+ vec_validate (tm->wrk_ctx[thread].postponed_fast_rxt, 0);
+ vec_reset_length (tm->wrk_ctx[thread].pending_fast_rxt);
+ vec_reset_length (tm->wrk_ctx[thread].ongoing_fast_rxt);
+ vec_reset_length (tm->wrk_ctx[thread].postponed_fast_rxt);
+ tm->wrk_ctx[thread].vm = vlib_mains[thread];
}
/*
@@ -1358,7 +1367,6 @@ tcp_main_enable (vlib_main_t * vm)
clib_spinlock_init (&tm->half_open_lock);
}
- vec_validate (tm->wrk_ctx, num_threads - 1);
tcp_initialize_timer_wheels (tm);
tm->bytes_per_buffer = vlib_buffer_free_list_buffer_size
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 4ba3d5e9d87..f7424c3202e 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -160,7 +160,7 @@ enum
};
#define TCP_SCOREBOARD_TRACE (0)
-#define TCP_MAX_SACK_BLOCKS 32 /**< Max number of SACK blocks stored */
+#define TCP_MAX_SACK_BLOCKS 256 /**< Max number of SACK blocks stored */
#define TCP_INVALID_SACK_HOLE_INDEX ((u32)~0)
typedef struct _scoreboard_trace_elt
@@ -390,6 +390,9 @@ typedef struct tcp_worker_ctx_
needing fast rxt */
u32 *ongoing_fast_rxt; /**< vector of connections
now doing fast rxt */
+ u32 *postponed_fast_rxt; /**< vector of connections
+ that will do fast rxt */
+ vlib_main_t *vm; /**< pointer to vm */
CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
u8 cached_opts[40]; /**< cached 'on the wire'
@@ -722,8 +725,8 @@ always_inline void
tcp_cc_rcv_ack (tcp_connection_t * tc)
{
tc->cc_algo->rcv_ack (tc);
- tcp_update_pacer (tc);
tc->tsecr_last_ack = tc->rcv_opts.tsecr;
+ tcp_update_pacer (tc);
}
always_inline void
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index 8f626b1afeb..cd4a6f04d6e 100755
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -627,10 +627,8 @@ if (_av > 0) \
#if TCP_DEBUG_CC
-#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) \
+#define TCP_EVT_CC_EVT_PRINT(_tc, _sub_evt) \
{ \
- if (_tc->snd_una != _tc->iss) \
- TCP_EVT_CC_STAT_PRINT (_tc); \
ELOG_TYPE_DECLARE (_e) = \
{ \
.format = "cc: %s snd_space %u snd_una %u out %u flight %u", \
@@ -638,8 +636,8 @@ if (_av > 0) \
.n_enum_strings = 7, \
.enum_strings = { \
"fast-rxt", \
- "rxt-timeout", \
"first-rxt", \
+ "rxt-timeout", \
"recovered", \
"congestion", \
"undo", \
@@ -653,8 +651,18 @@ if (_av > 0) \
ed->data[3] = tcp_bytes_out(_tc); \
ed->data[4] = tcp_flight_size (_tc); \
}
+
+#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) \
+{ \
+ if (_tc->snd_una != _tc->iss) \
+ TCP_EVT_CC_STAT_PRINT (_tc); \
+ if ((_sub_evt <= 1 && TCP_DEBUG_CC > 1) \
+ || (_sub_evt > 1 && TCP_DEBUG_CC > 0)) \
+ TCP_EVT_CC_EVT_PRINT (_tc, _sub_evt); \
+}
#else
-#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...)
+#define TCP_EVT_CC_EVT_HANDLER(_tc, _sub_evt, ...) \
+
#endif
#if TCP_DEBUG_CC > 1
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index ac0e996567e..154b9ac8363 100644
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -1199,67 +1199,60 @@ void
tcp_do_fastretransmits (u32 thread_index)
{
tcp_worker_ctx_t *wrk = &tcp_main.wrk_ctx[thread_index];
- u32 max_burst_size, burst_size, n_segs = 0;
+ u32 max_burst_size, burst_size, n_segs = 0, n_segs_now;
+ u32 *ongoing_fast_rxt, burst_bytes, sent_bytes;
tcp_connection_t *tc;
+ u64 last_cpu_time;
int i;
- if (vec_len (wrk->pending_fast_rxt) == 0)
+ if (vec_len (wrk->pending_fast_rxt) == 0
+ && vec_len (wrk->postponed_fast_rxt) == 0)
return;
- vec_append (wrk->ongoing_fast_rxt, wrk->pending_fast_rxt);
- vec_reset_length (wrk->pending_fast_rxt);
+ last_cpu_time = wrk->vm->clib_time.last_cpu_time;
+ ongoing_fast_rxt = wrk->ongoing_fast_rxt;
+ vec_append (ongoing_fast_rxt, wrk->postponed_fast_rxt);
+ vec_append (ongoing_fast_rxt, wrk->pending_fast_rxt);
+
+ _vec_len (wrk->postponed_fast_rxt) = 0;
+ _vec_len (wrk->pending_fast_rxt) = 0;
max_burst_size = VLIB_FRAME_SIZE / vec_len (wrk->ongoing_fast_rxt);
max_burst_size = clib_max (max_burst_size, 1);
- for (i = 0; i < vec_len (wrk->ongoing_fast_rxt); i++)
+ for (i = 0; i < vec_len (ongoing_fast_rxt); i++)
{
- tc = tcp_connection_get (wrk->ongoing_fast_rxt[i], thread_index);
+ if (n_segs >= VLIB_FRAME_SIZE)
+ {
+ vec_add1 (wrk->postponed_fast_rxt, ongoing_fast_rxt[i]);
+ continue;
+ }
+
+ tc = tcp_connection_get (ongoing_fast_rxt[i], thread_index);
tc->flags &= ~TCP_CONN_FRXT_PENDING;
if (!tcp_in_fastrecovery (tc))
continue;
- /* TODO tx pacer instead of this */
- if (n_segs >= VLIB_FRAME_SIZE)
+ burst_size = clib_min (max_burst_size, VLIB_FRAME_SIZE - n_segs);
+ burst_bytes = transport_connection_tx_pacer_burst (&tc->connection,
+ last_cpu_time);
+ burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
+ if (!burst_size)
{
tcp_program_fastretransmit (tc);
continue;
}
- burst_size = clib_min (max_burst_size, VLIB_FRAME_SIZE - n_segs);
+ n_segs_now = tcp_fast_retransmit (tc, burst_size);
+ sent_bytes = clib_min (n_segs_now * tc->snd_mss, burst_bytes);
+ transport_connection_tx_pacer_update_bytes (&tc->connection,
+ sent_bytes);
- if (tc->cwnd > tc->ssthresh + 3 * tc->snd_mss)
- {
- /* The first segment MUST be retransmitted */
- if (tcp_retransmit_first_unacked (tc))
- {
- tcp_program_fastretransmit (tc);
- continue;
- }
-
- /* Post retransmit update cwnd to ssthresh and account for the
- * three segments that have left the network and should've been
- * buffered at the receiver XXX */
- tc->cwnd = tc->ssthresh + 3 * tc->snd_mss;
-
- /* If cwnd allows, send more data */
- if (tcp_opts_sack_permitted (&tc->rcv_opts))
- {
- scoreboard_init_high_rxt (&tc->sack_sb,
- tc->snd_una + tc->snd_mss);
- tc->sack_sb.rescue_rxt = tc->snd_una - 1;
- n_segs += tcp_fast_retransmit_sack (tc, burst_size);
- }
- else
- {
- n_segs += tcp_fast_retransmit_no_sack (tc, burst_size);
- }
- }
- else
- n_segs += tcp_fast_retransmit (tc, burst_size);
+ n_segs += n_segs_now;
}
- vec_reset_length (wrk->ongoing_fast_rxt);
+ _vec_len (ongoing_fast_rxt) = 0;
+ wrk->ongoing_fast_rxt = ongoing_fast_rxt;
}
/**
@@ -1298,6 +1291,7 @@ tcp_cc_handle_event (tcp_connection_t * tc, u32 is_dack)
}
else if (tcp_should_fastrecover (tc))
{
+ u32 byte_rate;
ASSERT (!tcp_in_fastrecovery (tc));
/* Heuristic to catch potential late dupacks
@@ -1313,8 +1307,21 @@ tcp_cc_handle_event (tcp_connection_t * tc, u32 is_dack)
tc->cc_algo->rcv_cong_ack (tc, TCP_CC_DUPACK);
if (tcp_opts_sack_permitted (&tc->rcv_opts))
- tc->sack_sb.high_rxt = tc->snd_una;
+ {
+ tc->cwnd = tc->ssthresh;
+ scoreboard_init_high_rxt (&tc->sack_sb, tc->snd_una);
+ tc->sack_sb.rescue_rxt = tc->snd_una - 1;
+ }
+ else
+ {
+ /* Post retransmit update cwnd to ssthresh and account for the
+ * three segments that have left the network and should've been
+ * buffered at the receiver XXX */
+ tc->cwnd = tc->ssthresh + 3 * tc->snd_mss;
+ }
+ byte_rate = (0.3 * tc->cwnd) / ((f64) TCP_TICK * tc->srtt);
+ transport_connection_tx_pacer_init (&tc->connection, byte_rate, 0);
tcp_program_fastretransmit (tc);
return;
}
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index f14a61263d4..81579ef96a2 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -1452,10 +1452,10 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID;
}
- TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1);
-
if (tc->state >= TCP_STATE_ESTABLISHED)
{
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 2);
+
/* Lost FIN, retransmit and return */
if (tcp_is_lost_fin (tc))
{
@@ -1536,6 +1536,8 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
return;
}
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 2);
+
/* Try without increasing RTO a number of times. If this fails,
* start growing RTO exponentially */
tc->rto_boff += 1;
@@ -1562,6 +1564,8 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn)
/* Retransmit SYN-ACK */
else if (tc->state == TCP_STATE_SYN_RCVD)
{
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 2);
+
tc->rto_boff += 1;
if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
@@ -1693,7 +1697,7 @@ tcp_retransmit_first_unacked (tcp_connection_t * tc)
old_snd_nxt = tc->snd_nxt;
tc->snd_nxt = tc->snd_una;
- TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 2);
+ TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 1);
n_bytes = tcp_prepare_retransmit_segment (tc, 0, tc->snd_mss, &b);
if (!n_bytes)