summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFlorin Coras <fcoras@cisco.com>2019-08-12 14:17:47 -0700
committerDave Barach <openvpp@barachs.net>2019-08-14 14:32:20 +0000
commit9094b5c319d3f072d3c248fe7c876e4048c13ac2 (patch)
tree24f845eaef7d4b3ae9d0e69c0a10f7c00ce70205
parent6603567785d3aafe6532349ea761d0b93349b7f3 (diff)
tcp: extend protocol configuration
Type: feature Expose more configuration parameters and refactor some of the existing ones. Change-Id: If44c31ff77ce3d7e8da67d39a4ff61346bdf5ccc Signed-off-by: Florin Coras <fcoras@cisco.com>
-rw-r--r--src/vnet/tcp/tcp.c134
-rw-r--r--src/vnet/tcp/tcp.h137
-rwxr-xr-xsrc/vnet/tcp/tcp_debug.h4
-rwxr-xr-xsrc/vnet/tcp/tcp_input.c33
-rw-r--r--src/vnet/tcp/tcp_output.c20
5 files changed, 195 insertions, 133 deletions
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 752257f4100..3b94420e639 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -73,7 +73,7 @@ tcp_add_del_adjacency (tcp_connection_t * tc, u8 is_add)
static void
tcp_cc_init (tcp_connection_t * tc)
{
- tc->cc_algo = tcp_cc_algo_get (tcp_main.cc_algo);
+ tc->cc_algo = tcp_cc_algo_get (tcp_cfg.cc_algo);
tc->cc_algo->init (tc);
}
@@ -343,7 +343,7 @@ tcp_connection_reset (tcp_connection_t * tc)
tcp_connection_timers_reset (tc);
/* Set the cleanup timer, in case the session layer/app don't
* cleanly close the connection */
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
session_transport_reset_notify (&tc->connection);
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
session_transport_closed_notify (&tc->connection);
@@ -354,7 +354,7 @@ tcp_connection_reset (tcp_connection_t * tc)
case TCP_STATE_CLOSING:
case TCP_STATE_LAST_ACK:
tcp_connection_timers_reset (tc);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
/* Make sure we mark the session as closed. In some states we may
* be still trying to send data */
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
@@ -398,7 +398,7 @@ tcp_connection_close (tcp_connection_t * tc)
tcp_connection_timers_reset (tc);
tcp_send_fin (tc);
tcp_connection_set_state (tc, TCP_STATE_FIN_WAIT_1);
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_FINWAIT1_TIME);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait1_time);
break;
case TCP_STATE_ESTABLISHED:
/* If closing with unread data, reset the connection */
@@ -407,7 +407,7 @@ tcp_connection_close (tcp_connection_t * tc)
tcp_send_reset (tc);
tcp_connection_timers_reset (tc);
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
session_transport_closed_notify (&tc->connection);
break;
}
@@ -419,7 +419,7 @@ tcp_connection_close (tcp_connection_t * tc)
/* Set a timer in case the peer stops responding. Otherwise the
* connection will be stuck here forever. */
ASSERT (tc->timers[TCP_TIMER_WAITCLOSE] == TCP_TIMER_HANDLE_INVALID);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_FINWAIT1_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait1_time);
break;
case TCP_STATE_CLOSE_WAIT:
if (!transport_max_tx_dequeue (&tc->connection))
@@ -427,20 +427,20 @@ tcp_connection_close (tcp_connection_t * tc)
tcp_send_fin (tc);
tcp_connection_timers_reset (tc);
tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time);
}
else
tc->flags |= TCP_CONN_FINPNDG;
break;
case TCP_STATE_FIN_WAIT_1:
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait1_time);
break;
case TCP_STATE_CLOSED:
tcp_connection_timers_reset (tc);
/* Delete connection but instead of doing it now wait until next
* dispatch cycle to give the session layer a chance to clear
* unhandled events */
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
break;
default:
TCP_DBG ("state: %u", tc->state);
@@ -664,7 +664,7 @@ tcp_connection_init_vars (tcp_connection_t * tc)
/* tcp_connection_fib_attach (tc); */
if (transport_connection_is_tx_paced (&tc->connection)
- || tcp_main.tx_pacing)
+ || tcp_cfg.enable_tx_pacing)
tcp_enable_pacing (tc);
if (tc->flags & TCP_CONN_RATE_SAMPLE)
@@ -680,17 +680,17 @@ tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr,
int index, port;
if (is_ip4)
{
- index = tm->last_v4_address_rotor++;
- if (tm->last_v4_address_rotor >= vec_len (tm->ip4_src_addresses))
- tm->last_v4_address_rotor = 0;
- lcl_addr->ip4.as_u32 = tm->ip4_src_addresses[index].as_u32;
+ index = tm->last_v4_addr_rotor++;
+ if (tm->last_v4_addr_rotor >= vec_len (tcp_cfg.ip4_src_addrs))
+ tm->last_v4_addr_rotor = 0;
+ lcl_addr->ip4.as_u32 = tcp_cfg.ip4_src_addrs[index].as_u32;
}
else
{
- index = tm->last_v6_address_rotor++;
- if (tm->last_v6_address_rotor >= vec_len (tm->ip6_src_addresses))
- tm->last_v6_address_rotor = 0;
- clib_memcpy_fast (&lcl_addr->ip6, &tm->ip6_src_addresses[index],
+ index = tm->last_v6_addr_rotor++;
+ if (tm->last_v6_addr_rotor >= vec_len (tcp_cfg.ip6_src_addrs))
+ tm->last_v6_addr_rotor = 0;
+ clib_memcpy_fast (&lcl_addr->ip6, &tcp_cfg.ip6_src_addrs[index],
sizeof (ip6_address_t));
}
port = transport_alloc_local_port (TRANSPORT_PROTO_TCP, lcl_addr);
@@ -715,8 +715,8 @@ tcp_session_open (transport_endpoint_cfg_t * rmt)
/*
* Allocate local endpoint
*/
- if ((rmt->is_ip4 && vec_len (tm->ip4_src_addresses))
- || (!rmt->is_ip4 && vec_len (tm->ip6_src_addresses)))
+ if ((rmt->is_ip4 && vec_len (tcp_cfg.ip4_src_addrs))
+ || (!rmt->is_ip4 && vec_len (tcp_cfg.ip6_src_addrs)))
rv = tcp_alloc_custom_local_endpoint (tm, &lcl_addr, &lcl_port,
rmt->is_ip4);
else
@@ -1308,7 +1308,7 @@ tcp_timer_waitclose_handler (u32 conn_index)
if (!(tc->flags & TCP_CONN_FINPNDG))
{
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
break;
}
@@ -1321,7 +1321,7 @@ tcp_timer_waitclose_handler (u32 conn_index)
tcp_connection_set_state (tc, TCP_STATE_LAST_ACK);
/* Make sure we don't wait in LAST ACK forever */
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time);
/* Don't delete the connection yet */
break;
@@ -1334,21 +1334,21 @@ tcp_timer_waitclose_handler (u32 conn_index)
* Notify session layer that transport is closed. */
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
tcp_send_reset (tc);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
}
else
{
/* We've sent the fin but no progress. Close the connection and
* to make sure everything is flushed, setup a cleanup timer */
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
}
break;
case TCP_STATE_LAST_ACK:
case TCP_STATE_CLOSING:
tcp_connection_timers_reset (tc);
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
- tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
session_transport_closed_notify (&tc->connection);
break;
default:
@@ -1395,7 +1395,7 @@ tcp_initialize_timer_wheels (tcp_main_t * tm)
foreach_vlib_main (({
tw = &tm->wrk_ctx[ii].timer_wheel;
tw_timer_wheel_init_16t_2w_512sl (tw, tcp_expired_timers_dispatch,
- 100e-3 /* timer period 100ms */ , ~0);
+ TCP_TIMER_TICK, ~0);
tw->last_run_time = vlib_time_now (this_vlib_main);
}));
/* *INDENT-ON* */
@@ -1443,7 +1443,7 @@ tcp_main_enable (vlib_main_t * vm)
vec_validate (tm->connections, num_threads - 1);
vec_validate (tm->wrk_ctx, num_threads - 1);
n_workers = num_threads == 1 ? 1 : vtm->n_threads;
- prealloc_conn_per_wrk = tm->preallocated_connections / n_workers;
+ prealloc_conn_per_wrk = tcp_cfg.preallocated_connections / n_workers;
for (thread = 0; thread < num_threads; thread++)
{
@@ -1464,9 +1464,9 @@ tcp_main_enable (vlib_main_t * vm)
/*
* Use a preallocated half-open connection pool?
*/
- if (tm->preallocated_half_open_connections)
+ if (tcp_cfg.preallocated_half_open_connections)
pool_init_fixed (tm->half_open_connections,
- tm->preallocated_half_open_connections);
+ tcp_cfg.preallocated_half_open_connections);
/* Initialize clocks per tick for TCP timestamp. Used to compute
* monotonically increasing timestamps. */
@@ -1514,6 +1514,34 @@ tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add)
tm->punt_unknown6 = is_add;
}
+/**
+ * Initialize default values for tcp parameters
+ */
+static void
+tcp_configuration_init (void)
+{
+ /* Initial wnd for SYN. Fifos are not allocated at that point so use some
+ * predefined value. For SYN-ACK we still want the scale to be computed in
+ * the same way */
+ tcp_cfg.max_rx_fifo = 32 << 20;
+ tcp_cfg.min_rx_fifo = 4 << 10;
+
+ tcp_cfg.default_mtu = 1460;
+ tcp_cfg.initial_cwnd_multiplier = 0;
+ tcp_cfg.enable_tx_pacing = 1;
+ tcp_cfg.cc_algo = TCP_CC_NEWRENO;
+
+ /* Time constants defined as timer tick (100ms) multiples */
+ tcp_cfg.delack_time = 1; /* 0.1s */
+ tcp_cfg.closewait_time = 20; /* 2s */
+ tcp_cfg.timewait_time = 100; /* 10s */
+ tcp_cfg.finwait1_time = 600; /* 60s */
+ tcp_cfg.lastack_time = 300; /* 30s */
+ tcp_cfg.finwait2_time = 300; /* 30s */
+ tcp_cfg.closing_time = 300; /* 30s */
+ tcp_cfg.cleanup_time = 1; /* 0.1s */
+}
+
static clib_error_t *
tcp_init (vlib_main_t * vm)
{
@@ -1538,11 +1566,10 @@ tcp_init (vlib_main_t * vm)
FIB_PROTOCOL_IP6, tcp6_output_node.index);
tcp_api_reference ();
+ tcp_configuration_init ();
+
tm->cc_algo_by_name = hash_create_string (0, sizeof (uword));
- tm->tx_pacing = 1;
- tm->cc_algo = TCP_CC_NEWRENO;
- tm->default_mtu = 1460;
- tm->initial_cwnd_multiplier = 0;
+
return 0;
}
@@ -1594,34 +1621,49 @@ unformat_tcp_cc_algo_cfg (unformat_input_t * input, va_list * va)
static clib_error_t *
tcp_config_fn (vlib_main_t * vm, unformat_input_t * input)
{
- tcp_main_t *tm = vnet_get_tcp_main ();
-
while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
{
if (unformat (input, "preallocated-connections %d",
- &tm->preallocated_connections))
+ &tcp_cfg.preallocated_connections))
;
else if (unformat (input, "preallocated-half-open-connections %d",
- &tm->preallocated_half_open_connections))
+ &tcp_cfg.preallocated_half_open_connections))
;
else if (unformat (input, "buffer-fail-fraction %f",
- &tm->buffer_fail_fraction))
+ &tcp_cfg.buffer_fail_fraction))
;
else if (unformat (input, "max-rx-fifo %U", unformat_memory_size,
- &tm->max_rx_fifo))
+ &tcp_cfg.max_rx_fifo))
;
- else if (unformat (input, "mtu %d", &tm->default_mtu))
+ else if (unformat (input, "min-rx-fifo %U", unformat_memory_size,
+ &tcp_cfg.min_rx_fifo))
+ ;
+ else if (unformat (input, "mtu %d", &tcp_cfg.default_mtu))
;
else if (unformat (input, "initial-cwnd-multiplier %d",
- &tm->initial_cwnd_multiplier))
+ &tcp_cfg.initial_cwnd_multiplier))
;
else if (unformat (input, "no-tx-pacing"))
- tm->tx_pacing = 0;
+ tcp_cfg.enable_tx_pacing = 0;
else if (unformat (input, "cc-algo %U", unformat_tcp_cc_algo,
- &tm->cc_algo))
+ &tcp_cfg.cc_algo))
;
else if (unformat (input, "%U", unformat_tcp_cc_algo_cfg))
;
+ else if (unformat (input, "closewait-time %d", &tcp_cfg.closewait_time))
+ tcp_cfg.closewait_time /= TCP_TIMER_TICK;
+ else if (unformat (input, "timewait-time %d", &tcp_cfg.timewait_time))
+ tcp_cfg.timewait_time /= TCP_TIMER_TICK;
+ else if (unformat (input, "finwait1-time %d", &tcp_cfg.finwait1_time))
+ tcp_cfg.finwait1_time /= TCP_TIMER_TICK;
+ else if (unformat (input, "finwait2-time %d", &tcp_cfg.finwait2_time))
+ tcp_cfg.finwait2_time /= TCP_TIMER_TICK;
+ else if (unformat (input, "lastack-time %d", &tcp_cfg.lastack_time))
+ tcp_cfg.lastack_time /= TCP_TIMER_TICK;
+ else if (unformat (input, "closing-time %d", &tcp_cfg.closing_time))
+ tcp_cfg.closing_time /= TCP_TIMER_TICK;
+ else if (unformat (input, "cleanup-time %d", &tcp_cfg.cleanup_time))
+ tcp_cfg.cleanup_time /= TCP_TIMER_TICK;
else
return clib_error_return (0, "unknown input `%U'",
format_unformat_error, input);
@@ -1646,7 +1688,6 @@ tcp_configure_v4_source_address_range (vlib_main_t * vm,
ip4_address_t * start,
ip4_address_t * end, u32 table_id)
{
- tcp_main_t *tm = vnet_get_tcp_main ();
vnet_main_t *vnm = vnet_get_main ();
u32 start_host_byte_order, end_host_byte_order;
fib_prefix_t prefix;
@@ -1697,7 +1738,7 @@ tcp_configure_v4_source_address_range (vlib_main_t * vm,
{
dpo_id_t dpo = DPO_INVALID;
- vec_add1 (tm->ip4_src_addresses, start[0]);
+ vec_add1 (tcp_cfg.ip4_src_addrs, start[0]);
/* Add local adjacencies for the range */
@@ -1735,7 +1776,6 @@ tcp_configure_v6_source_address_range (vlib_main_t * vm,
ip6_address_t * start,
ip6_address_t * end, u32 table_id)
{
- tcp_main_t *tm = vnet_get_tcp_main ();
fib_prefix_t prefix;
u32 fib_index = 0;
fib_node_index_t fei;
@@ -1755,7 +1795,7 @@ tcp_configure_v6_source_address_range (vlib_main_t * vm,
dpo_id_t dpo = DPO_INVALID;
/* Remember this address */
- vec_add1 (tm->ip6_src_addresses, start[0]);
+ vec_add1 (tcp_cfg.ip6_src_addrs, start[0]);
/* Lookup the prefix, to identify the interface involved */
prefix.fp_len = 128;
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index ae50947b797..753a1ca6996 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -33,8 +33,6 @@
#define TCP_CC_DATA_SZ 24
#define TCP_DUPACK_THRESHOLD 3
-#define TCP_MAX_RX_FIFO_SIZE 32 << 20
-#define TCP_MIN_RX_FIFO_SIZE 4 << 10
#define TCP_IW_N_SEGMENTS 10
#define TCP_ALWAYS_ACK 1 /**< On/off delayed acks */
#define TCP_USE_SACKS 1 /**< Disable only for testing */
@@ -91,17 +89,9 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
#define TCP_TIMER_HANDLE_INVALID ((u32) ~0)
-/* Timer delays as multiples of 100ms */
-#define TCP_TO_TIMER_TICK TCP_TICK*10 /* Period for converting from TCP
- * ticks to timer units */
-#define TCP_DELACK_TIME 1 /* 0.1s */
-#define TCP_SYN_RCVD_TIME 600 /* 60s */
-#define TCP_2MSL_TIME 300 /* 30s */
-#define TCP_CLOSEWAIT_TIME 20 /* 2s */
-#define TCP_TIMEWAIT_TIME 100 /* 10s */
-#define TCP_FINWAIT1_TIME 600 /* 60s */
-#define TCP_CLEANUP_TIME 1 /* 0.1s */
-#define TCP_TIMER_PERSIST_MIN 2 /* 0.2s */
+#define TCP_TIMER_TICK 0.1 /**< Timer tick in seconds */
+#define TCP_TO_TIMER_TICK TCP_TICK*10 /**< Factor for converting
+ ticks to timer ticks */
#define TCP_RTO_MAX 60 * THZ /* Min max RTO (60s) as per RFC6298 */
#define TCP_RTO_MIN 0.2 * THZ /* Min RTO (200ms) - lower than standard */
@@ -511,6 +501,66 @@ typedef struct tcp_iss_seed_
u64 second;
} tcp_iss_seed_t;
+typedef struct tcp_configuration_
+{
+ /** Max rx fifo size for a session (in bytes). It is used in to compute the
+ * rfc 7323 window scaling factor */
+ u32 max_rx_fifo;
+
+ /** Min rx fifo for a session (in bytes) */
+ u32 min_rx_fifo;
+
+ /** Default MTU to be used when establishing connections */
+ u16 default_mtu;
+
+ /** Initial CWND multiplier, which multiplies MSS to determine initial CWND.
+ * Set 0 to determine the initial CWND by another way */
+ u16 initial_cwnd_multiplier;
+
+ /** Enable tx pacing for new connections */
+ u8 enable_tx_pacing;
+
+ /** Default congestion control algorithm type */
+ tcp_cc_algorithm_type_e cc_algo;
+
+ /** Delayed ack time (disabled) */
+ u16 delack_time;
+
+ /** Timer ticks to wait for close from app */
+ u16 closewait_time;
+
+ /** Timer ticks to wait in time-wait. Also known as 2MSL */
+ u16 timewait_time;
+
+ /** Timer ticks to wait in fin-wait1 to send fin and rcv fin-ack */
+ u16 finwait1_time;
+
+ /** Timer ticks to wait in last ack for ack */
+ u16 lastack_time;
+
+ /** Timer ticks to wait in fin-wait2 for fin */
+ u16 finwait2_time;
+
+ /** Timer ticks to wait in closing for fin ack */
+ u16 closing_time;
+
+ /** Timer ticks to wait before cleaning up the connection */
+ u16 cleanup_time;
+
+ /** Number of preallocated connections */
+ u32 preallocated_connections;
+
+ /** Number of preallocated half-open connections */
+ u32 preallocated_half_open_connections;
+
+ /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
+ ip4_address_t *ip4_src_addrs;
+ ip6_address_t *ip6_src_addrs;
+
+ /** Fault-injection. Debug only */
+ f64 buffer_fail_fraction;
+} tcp_configuration_t;
+
typedef struct _tcp_main
{
/* Per-worker thread tcp connection pools */
@@ -528,17 +578,17 @@ typedef struct _tcp_main
/** per-worker context */
tcp_worker_ctx_t *wrk_ctx;
- /* Pool of half-open connections on which we've sent a SYN */
+ /** Pool of half-open connections on which we've sent a SYN */
tcp_connection_t *half_open_connections;
clib_spinlock_t half_open_lock;
/** vlib buffer size */
u32 bytes_per_buffer;
- /* Seed used to generate random iss */
+ /** Seed used to generate random iss */
tcp_iss_seed_t iss_seed;
- /* Congestion control algorithms registered */
+ /** Congestion control algorithms registered */
tcp_cc_algorithm_t *cc_algos;
/** Hash table of cc algorithms by name */
@@ -547,45 +597,23 @@ typedef struct _tcp_main
/** Last cc algo registered */
tcp_cc_algorithm_type_e cc_last_type;
- /*
- * Configuration
- */
-
- /* Flag that indicates if stack is on or off */
+ /** Flag that indicates if stack is on or off */
u8 is_enabled;
- /** Max rx fifo size for a session. It is used in to compute the
- * rfc 7323 window scaling factor */
- u32 max_rx_fifo;
-
- /** Default MTU to be used when establishing connections */
- u16 default_mtu;
-
- /** Initial CWND multiplier, which multiplies MSS to determine initial CWND.
- * Set 0 to determine the initial CWND by another way */
- u16 initial_cwnd_multiplier;
-
- /** Number of preallocated connections */
- u32 preallocated_connections;
- u32 preallocated_half_open_connections;
-
- /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
- ip4_address_t *ip4_src_addresses;
- u32 last_v4_address_rotor;
- u32 last_v6_address_rotor;
- ip6_address_t *ip6_src_addresses;
-
- /** Enable tx pacing for new connections */
- u8 tx_pacing;
-
+ /** Flag that indicates if v4 punting is enabled */
u8 punt_unknown4;
+
+ /** Flag that indicates if v6 punting is enabled */
u8 punt_unknown6;
- /** fault-injection */
- f64 buffer_fail_fraction;
+ /** Rotor for v4 source addresses */
+ u32 last_v4_addr_rotor;
- /** Default congestion control algorithm type */
- tcp_cc_algorithm_type_e cc_algo;
+ /** Rotor for v6 source addresses */
+ u32 last_v6_addr_rotor;
+
+ /** Protocol configuration */
+ tcp_configuration_t cfg;
} tcp_main_t;
extern tcp_main_t tcp_main;
@@ -602,6 +630,7 @@ extern vlib_node_registration_t tcp6_rcv_process_node;
extern vlib_node_registration_t tcp4_listen_node;
extern vlib_node_registration_t tcp6_listen_node;
+#define tcp_cfg tcp_main.cfg
#define tcp_node_index(node_id, is_ip4) \
((is_ip4) ? tcp4_##node_id##_node.index : tcp6_##node_id##_node.index)
@@ -842,8 +871,8 @@ tcp_flight_size (const tcp_connection_t * tc)
always_inline u32
tcp_initial_cwnd (const tcp_connection_t * tc)
{
- if (tcp_main.initial_cwnd_multiplier > 0)
- return tcp_main.initial_cwnd_multiplier * tc->snd_mss;
+ if (tcp_cfg.initial_cwnd_multiplier > 0)
+ return tcp_cfg.initial_cwnd_multiplier * tc->snd_mss;
if (tc->snd_mss > 2190)
return 2 * tc->snd_mss;
@@ -1089,16 +1118,14 @@ tcp_persist_timer_set (tcp_connection_t * tc)
{
/* Reuse RTO. It's backed off in handler */
tcp_timer_set (tc, TCP_TIMER_PERSIST,
- clib_max (tc->rto * TCP_TO_TIMER_TICK,
- TCP_TIMER_PERSIST_MIN));
+ clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
}
always_inline void
tcp_persist_timer_update (tcp_connection_t * tc)
{
tcp_timer_update (tc, TCP_TIMER_PERSIST,
- clib_max (tc->rto * TCP_TO_TIMER_TICK,
- TCP_TIMER_PERSIST_MIN));
+ clib_max (tc->rto * TCP_TO_TIMER_TICK, 1));
}
always_inline void
diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h
index 262d3faae10..dd2ada848cf 100755
--- a/src/vnet/tcp/tcp_debug.h
+++ b/src/vnet/tcp/tcp_debug.h
@@ -901,12 +901,12 @@ if (tcp_cc_time_to_print_stats (_tc)) \
num_threads = 1 /* main thread */ + vtm->n_threads; \
vec_validate (buffer_fail_counters, num_threads - 1); \
} \
- if (PREDICT_FALSE (tcp_main.buffer_fail_fraction != 0.0)) \
+ if (PREDICT_FALSE (tcp_cfg.buffer_fail_fraction != 0.0)) \
{ \
if (PREDICT_TRUE (buffer_fail_counters[thread_index] > 0)) \
{ \
if ((1.0 / (f32) (buffer_fail_counters[thread_index])) \
- < tcp_main.buffer_fail_fraction) \
+ < tcp_cfg.buffer_fail_fraction) \
{ \
buffer_fail_counters[thread_index] = 0.0000001; \
return -1; \
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 595a76d6853..ccefe74e69a 100755
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -1650,7 +1650,7 @@ tcp_rcv_fin (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b,
* in CLOSE-WAIT, set timer (reuse WAITCLOSE). */
tcp_connection_set_state (tc, TCP_STATE_CLOSE_WAIT);
tcp_program_disconnect (wrk, tc);
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc);
*error = TCP_ERROR_FIN_RCVD;
}
@@ -1947,7 +1947,7 @@ in_order:
if (tcp_can_delack (tc))
{
if (!tcp_timer_is_active (tc, TCP_TIMER_DELACK))
- tcp_timer_set (tc, TCP_TIMER_DELACK, TCP_DELACK_TIME);
+ tcp_timer_set (tc, TCP_TIMER_DELACK, tcp_cfg.delack_time);
goto done;
}
@@ -2745,7 +2745,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* If a fin was received and data was acked extend wait */
else if ((tc0->flags & TCP_CONN_FINRCVD) && tc0->bytes_acked)
tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE,
- TCP_CLOSEWAIT_TIME);
+ tcp_cfg.closewait_time);
}
/* If FIN is ACKed */
else if (tc0->snd_una == tc0->snd_nxt)
@@ -2759,7 +2759,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
if (tc0->flags & TCP_CONN_FINRCVD)
{
tcp_connection_set_state (tc0, TCP_STATE_CLOSED);
- tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE,
+ tcp_cfg.cleanup_time);
session_transport_closed_notify (&tc0->connection);
goto drop;
}
@@ -2767,7 +2768,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tcp_connection_set_state (tc0, TCP_STATE_FIN_WAIT_2);
/* Enable waitclose because we're willing to wait for peer's
* FIN but not indefinitely. */
- tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait2_time);
/* Don't try to deq the FIN acked */
if (tc0->burst_acked > 1)
@@ -2800,7 +2801,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tcp_send_fin (tc0);
tcp_connection_timers_reset (tc0);
tcp_connection_set_state (tc0, TCP_STATE_LAST_ACK);
- tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time);
break;
case TCP_STATE_CLOSING:
/* In addition to the processing for the ESTABLISHED state, if
@@ -2814,7 +2815,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tcp_connection_timers_reset (tc0);
tcp_connection_set_state (tc0, TCP_STATE_TIME_WAIT);
- tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_TIMEWAIT_TIME);
+ tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.timewait_time);
session_transport_closed_notify (&tc0->connection);
goto drop;
@@ -2841,7 +2842,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
* we can't ensure that we have no packets already enqueued
* to output. Rely instead on the waitclose timer */
tcp_connection_timers_reset (tc0);
- tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
goto drop;
@@ -2858,7 +2859,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
goto drop;
tcp_program_ack (tc0);
- tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_TIMEWAIT_TIME);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.timewait_time);
goto drop;
break;
@@ -2900,7 +2901,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tcp_program_ack (tc0);
tcp_connection_set_state (tc0, TCP_STATE_CLOSE_WAIT);
tcp_program_disconnect (wrk, tc0);
- tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
break;
case TCP_STATE_SYN_RCVD:
/* Send FIN-ACK, enter LAST-ACK and because the app was not
@@ -2910,7 +2911,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tc0->rcv_nxt += 1;
tcp_send_fin (tc0);
tcp_connection_set_state (tc0, TCP_STATE_LAST_ACK);
- tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time);
break;
case TCP_STATE_CLOSE_WAIT:
case TCP_STATE_CLOSING:
@@ -2926,14 +2927,16 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
* sending it. Since we already received a fin, do not wait
* for too long. */
tc0->flags |= TCP_CONN_FINRCVD;
- tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE,
+ tcp_cfg.closewait_time);
}
else
{
tcp_connection_set_state (tc0, TCP_STATE_CLOSING);
tcp_program_ack (tc0);
/* Wait for ACK for our FIN but not forever */
- tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE,
+ tcp_cfg.closing_time);
}
break;
case TCP_STATE_FIN_WAIT_2:
@@ -2941,7 +2944,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tc0->rcv_nxt += 1;
tcp_connection_set_state (tc0, TCP_STATE_TIME_WAIT);
tcp_connection_timers_reset (tc0);
- tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_TIMEWAIT_TIME);
+ tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.timewait_time);
tcp_program_ack (tc0);
session_transport_closed_notify (&tc0->connection);
break;
@@ -2949,7 +2952,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
/* Remain in the TIME-WAIT state. Restart the time-wait
* timeout.
*/
- tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_TIMEWAIT_TIME);
+ tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.timewait_time);
break;
}
error0 = TCP_ERROR_FIN_RCVD;
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index ef8b756e4e8..ea4dc428187 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -85,7 +85,7 @@ void
tcp_update_rcv_mss (tcp_connection_t * tc)
{
/* TODO find our iface MTU */
- tc->mss = tcp_main.default_mtu - sizeof (tcp_header_t);
+ tc->mss = tcp_cfg.default_mtu - sizeof (tcp_header_t);
}
/**
@@ -103,7 +103,7 @@ tcp_initial_wnd_unscaled (tcp_connection_t * tc)
tcp_update_rcv_mss (tc);
TCP_IW_N_SEGMENTS * tc->mss;
*/
- return TCP_MIN_RX_FIFO_SIZE;
+ return tcp_cfg.min_rx_fifo;
}
/**
@@ -113,17 +113,9 @@ tcp_initial_wnd_unscaled (tcp_connection_t * tc)
u32
tcp_initial_window_to_advertise (tcp_connection_t * tc)
{
- tcp_main_t *tm = &tcp_main;
- u32 max_fifo;
-
- /* Initial wnd for SYN. Fifos are not allocated yet.
- * Use some predefined value. For SYN-ACK we still want the
- * scale to be computed in the same way */
- max_fifo = tm->max_rx_fifo ? tm->max_rx_fifo : TCP_MAX_RX_FIFO_SIZE;
-
/* Compute rcv wscale only if peer advertised support for it */
if (tc->state != TCP_STATE_SYN_RCVD || tcp_opts_wscale (&tc->rcv_opts))
- tc->rcv_wscale = tcp_window_compute_scale (max_fifo);
+ tc->rcv_wscale = tcp_window_compute_scale (tcp_cfg.max_rx_fifo);
tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
@@ -276,7 +268,7 @@ tcp_make_syn_options (tcp_options_t * opts, u8 wnd_scale)
u8 len = 0;
opts->flags |= TCP_OPTS_FLAG_MSS;
- opts->mss = tcp_main.default_mtu; /*XXX discover that */
+ opts->mss = tcp_cfg.default_mtu; /*XXX discover that */
len += TCP_OPTION_LEN_MSS;
opts->flags |= TCP_OPTS_FLAG_WSCALE;
@@ -1529,7 +1521,7 @@ tcp_timer_retransmit_handler (u32 tc_index)
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
session_transport_closing_notify (&tc->connection);
tcp_connection_timers_reset (tc);
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time);
return;
}
@@ -1581,7 +1573,7 @@ tcp_timer_retransmit_handler (u32 tc_index)
{
tcp_connection_set_state (tc, TCP_STATE_CLOSED);
tcp_connection_timers_reset (tc);
- tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
+ tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time);
return;
}