diff options
author | Florin Coras <fcoras@cisco.com> | 2019-08-12 14:17:47 -0700 |
---|---|---|
committer | Dave Barach <openvpp@barachs.net> | 2019-08-14 16:27:49 +0000 |
commit | 2f51729bb31fcc2717782aa465835d93d73a567f (patch) | |
tree | 8557598f3c0171bdd8ecbabc8a30a9c51b6dabe6 /src/vnet | |
parent | bb83b16a3c5ceb112589613f09a0c3af195f8fc9 (diff) |
tcp: extend protocol configurationv19.08-rc2
Type: feature
Ticket: VPP-1736
Expose more configuration parameters and refactor some of the existing
ones.
Change-Id: If44c31ff77ce3d7e8da67d39a4ff61346bdf5ccc
Signed-off-by: Florin Coras <fcoras@cisco.com>
(cherry picked from commit 9094b5c319d3f072d3c248fe7c876e4048c13ac2)
Diffstat (limited to 'src/vnet')
-rw-r--r-- | src/vnet/tcp/tcp.c | 134 | ||||
-rw-r--r-- | src/vnet/tcp/tcp.h | 137 | ||||
-rwxr-xr-x | src/vnet/tcp/tcp_debug.h | 4 | ||||
-rwxr-xr-x | src/vnet/tcp/tcp_input.c | 33 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_output.c | 20 |
5 files changed, 195 insertions, 133 deletions
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 752257f4100..3b94420e639 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -73,7 +73,7 @@ tcp_add_del_adjacency (tcp_connection_t * tc, u8 is_add) static void tcp_cc_init (tcp_connection_t * tc) { - tc->cc_algo = tcp_cc_algo_get (tcp_main.cc_algo); + tc->cc_algo = tcp_cc_algo_get (tcp_cfg.cc_algo); tc->cc_algo->init (tc); } @@ -343,7 +343,7 @@ tcp_connection_reset (tcp_connection_t * tc) tcp_connection_timers_reset (tc); /* Set the cleanup timer, in case the session layer/app don't * cleanly close the connection */ - tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME); + tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time); session_transport_reset_notify (&tc->connection); tcp_connection_set_state (tc, TCP_STATE_CLOSED); session_transport_closed_notify (&tc->connection); @@ -354,7 +354,7 @@ tcp_connection_reset (tcp_connection_t * tc) case TCP_STATE_CLOSING: case TCP_STATE_LAST_ACK: tcp_connection_timers_reset (tc); - tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME); + tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time); /* Make sure we mark the session as closed. In some states we may * be still trying to send data */ tcp_connection_set_state (tc, TCP_STATE_CLOSED); @@ -398,7 +398,7 @@ tcp_connection_close (tcp_connection_t * tc) tcp_connection_timers_reset (tc); tcp_send_fin (tc); tcp_connection_set_state (tc, TCP_STATE_FIN_WAIT_1); - tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_FINWAIT1_TIME); + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait1_time); break; case TCP_STATE_ESTABLISHED: /* If closing with unread data, reset the connection */ @@ -407,7 +407,7 @@ tcp_connection_close (tcp_connection_t * tc) tcp_send_reset (tc); tcp_connection_timers_reset (tc); tcp_connection_set_state (tc, TCP_STATE_CLOSED); - tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME); + tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time); session_transport_closed_notify (&tc->connection); break; } @@ -419,7 +419,7 @@ tcp_connection_close (tcp_connection_t * tc) /* Set a timer in case the peer stops responding. Otherwise the * connection will be stuck here forever. */ ASSERT (tc->timers[TCP_TIMER_WAITCLOSE] == TCP_TIMER_HANDLE_INVALID); - tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_FINWAIT1_TIME); + tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait1_time); break; case TCP_STATE_CLOSE_WAIT: if (!transport_max_tx_dequeue (&tc->connection)) @@ -427,20 +427,20 @@ tcp_connection_close (tcp_connection_t * tc) tcp_send_fin (tc); tcp_connection_timers_reset (tc); tcp_connection_set_state (tc, TCP_STATE_LAST_ACK); - tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time); } else tc->flags |= TCP_CONN_FINPNDG; break; case TCP_STATE_FIN_WAIT_1: - tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait1_time); break; case TCP_STATE_CLOSED: tcp_connection_timers_reset (tc); /* Delete connection but instead of doing it now wait until next * dispatch cycle to give the session layer a chance to clear * unhandled events */ - tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); break; default: TCP_DBG ("state: %u", tc->state); @@ -664,7 +664,7 @@ tcp_connection_init_vars (tcp_connection_t * tc) /* tcp_connection_fib_attach (tc); */ if (transport_connection_is_tx_paced (&tc->connection) - || tcp_main.tx_pacing) + || tcp_cfg.enable_tx_pacing) tcp_enable_pacing (tc); if (tc->flags & TCP_CONN_RATE_SAMPLE) @@ -680,17 +680,17 @@ tcp_alloc_custom_local_endpoint (tcp_main_t * tm, ip46_address_t * lcl_addr, int index, port; if (is_ip4) { - index = tm->last_v4_address_rotor++; - if (tm->last_v4_address_rotor >= vec_len (tm->ip4_src_addresses)) - tm->last_v4_address_rotor = 0; - lcl_addr->ip4.as_u32 = tm->ip4_src_addresses[index].as_u32; + index = tm->last_v4_addr_rotor++; + if (tm->last_v4_addr_rotor >= vec_len (tcp_cfg.ip4_src_addrs)) + tm->last_v4_addr_rotor = 0; + lcl_addr->ip4.as_u32 = tcp_cfg.ip4_src_addrs[index].as_u32; } else { - index = tm->last_v6_address_rotor++; - if (tm->last_v6_address_rotor >= vec_len (tm->ip6_src_addresses)) - tm->last_v6_address_rotor = 0; - clib_memcpy_fast (&lcl_addr->ip6, &tm->ip6_src_addresses[index], + index = tm->last_v6_addr_rotor++; + if (tm->last_v6_addr_rotor >= vec_len (tcp_cfg.ip6_src_addrs)) + tm->last_v6_addr_rotor = 0; + clib_memcpy_fast (&lcl_addr->ip6, &tcp_cfg.ip6_src_addrs[index], sizeof (ip6_address_t)); } port = transport_alloc_local_port (TRANSPORT_PROTO_TCP, lcl_addr); @@ -715,8 +715,8 @@ tcp_session_open (transport_endpoint_cfg_t * rmt) /* * Allocate local endpoint */ - if ((rmt->is_ip4 && vec_len (tm->ip4_src_addresses)) - || (!rmt->is_ip4 && vec_len (tm->ip6_src_addresses))) + if ((rmt->is_ip4 && vec_len (tcp_cfg.ip4_src_addrs)) + || (!rmt->is_ip4 && vec_len (tcp_cfg.ip6_src_addrs))) rv = tcp_alloc_custom_local_endpoint (tm, &lcl_addr, &lcl_port, rmt->is_ip4); else @@ -1308,7 +1308,7 @@ tcp_timer_waitclose_handler (u32 conn_index) if (!(tc->flags & TCP_CONN_FINPNDG)) { tcp_connection_set_state (tc, TCP_STATE_CLOSED); - tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); + tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); break; } @@ -1321,7 +1321,7 @@ tcp_timer_waitclose_handler (u32 conn_index) tcp_connection_set_state (tc, TCP_STATE_LAST_ACK); /* Make sure we don't wait in LAST ACK forever */ - tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time); /* Don't delete the connection yet */ break; @@ -1334,21 +1334,21 @@ tcp_timer_waitclose_handler (u32 conn_index) * Notify session layer that transport is closed. */ tcp_connection_set_state (tc, TCP_STATE_CLOSED); tcp_send_reset (tc); - tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); + tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); } else { /* We've sent the fin but no progress. Close the connection and * to make sure everything is flushed, setup a cleanup timer */ tcp_connection_set_state (tc, TCP_STATE_CLOSED); - tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); + tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); } break; case TCP_STATE_LAST_ACK: case TCP_STATE_CLOSING: tcp_connection_timers_reset (tc); tcp_connection_set_state (tc, TCP_STATE_CLOSED); - tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); + tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); session_transport_closed_notify (&tc->connection); break; default: @@ -1395,7 +1395,7 @@ tcp_initialize_timer_wheels (tcp_main_t * tm) foreach_vlib_main (({ tw = &tm->wrk_ctx[ii].timer_wheel; tw_timer_wheel_init_16t_2w_512sl (tw, tcp_expired_timers_dispatch, - 100e-3 /* timer period 100ms */ , ~0); + TCP_TIMER_TICK, ~0); tw->last_run_time = vlib_time_now (this_vlib_main); })); /* *INDENT-ON* */ @@ -1443,7 +1443,7 @@ tcp_main_enable (vlib_main_t * vm) vec_validate (tm->connections, num_threads - 1); vec_validate (tm->wrk_ctx, num_threads - 1); n_workers = num_threads == 1 ? 1 : vtm->n_threads; - prealloc_conn_per_wrk = tm->preallocated_connections / n_workers; + prealloc_conn_per_wrk = tcp_cfg.preallocated_connections / n_workers; for (thread = 0; thread < num_threads; thread++) { @@ -1464,9 +1464,9 @@ tcp_main_enable (vlib_main_t * vm) /* * Use a preallocated half-open connection pool? */ - if (tm->preallocated_half_open_connections) + if (tcp_cfg.preallocated_half_open_connections) pool_init_fixed (tm->half_open_connections, - tm->preallocated_half_open_connections); + tcp_cfg.preallocated_half_open_connections); /* Initialize clocks per tick for TCP timestamp. Used to compute * monotonically increasing timestamps. */ @@ -1514,6 +1514,34 @@ tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add) tm->punt_unknown6 = is_add; } +/** + * Initialize default values for tcp parameters + */ +static void +tcp_configuration_init (void) +{ + /* Initial wnd for SYN. Fifos are not allocated at that point so use some + * predefined value. For SYN-ACK we still want the scale to be computed in + * the same way */ + tcp_cfg.max_rx_fifo = 32 << 20; + tcp_cfg.min_rx_fifo = 4 << 10; + + tcp_cfg.default_mtu = 1460; + tcp_cfg.initial_cwnd_multiplier = 0; + tcp_cfg.enable_tx_pacing = 1; + tcp_cfg.cc_algo = TCP_CC_NEWRENO; + + /* Time constants defined as timer tick (100ms) multiples */ + tcp_cfg.delack_time = 1; /* 0.1s */ + tcp_cfg.closewait_time = 20; /* 2s */ + tcp_cfg.timewait_time = 100; /* 10s */ + tcp_cfg.finwait1_time = 600; /* 60s */ + tcp_cfg.lastack_time = 300; /* 30s */ + tcp_cfg.finwait2_time = 300; /* 30s */ + tcp_cfg.closing_time = 300; /* 30s */ + tcp_cfg.cleanup_time = 1; /* 0.1s */ +} + static clib_error_t * tcp_init (vlib_main_t * vm) { @@ -1538,11 +1566,10 @@ tcp_init (vlib_main_t * vm) FIB_PROTOCOL_IP6, tcp6_output_node.index); tcp_api_reference (); + tcp_configuration_init (); + tm->cc_algo_by_name = hash_create_string (0, sizeof (uword)); - tm->tx_pacing = 1; - tm->cc_algo = TCP_CC_NEWRENO; - tm->default_mtu = 1460; - tm->initial_cwnd_multiplier = 0; + return 0; } @@ -1594,34 +1621,49 @@ unformat_tcp_cc_algo_cfg (unformat_input_t * input, va_list * va) static clib_error_t * tcp_config_fn (vlib_main_t * vm, unformat_input_t * input) { - tcp_main_t *tm = vnet_get_tcp_main (); - while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) { if (unformat (input, "preallocated-connections %d", - &tm->preallocated_connections)) + &tcp_cfg.preallocated_connections)) ; else if (unformat (input, "preallocated-half-open-connections %d", - &tm->preallocated_half_open_connections)) + &tcp_cfg.preallocated_half_open_connections)) ; else if (unformat (input, "buffer-fail-fraction %f", - &tm->buffer_fail_fraction)) + &tcp_cfg.buffer_fail_fraction)) ; else if (unformat (input, "max-rx-fifo %U", unformat_memory_size, - &tm->max_rx_fifo)) + &tcp_cfg.max_rx_fifo)) ; - else if (unformat (input, "mtu %d", &tm->default_mtu)) + else if (unformat (input, "min-rx-fifo %U", unformat_memory_size, + &tcp_cfg.min_rx_fifo)) + ; + else if (unformat (input, "mtu %d", &tcp_cfg.default_mtu)) ; else if (unformat (input, "initial-cwnd-multiplier %d", - &tm->initial_cwnd_multiplier)) + &tcp_cfg.initial_cwnd_multiplier)) ; else if (unformat (input, "no-tx-pacing")) - tm->tx_pacing = 0; + tcp_cfg.enable_tx_pacing = 0; else if (unformat (input, "cc-algo %U", unformat_tcp_cc_algo, - &tm->cc_algo)) + &tcp_cfg.cc_algo)) ; else if (unformat (input, "%U", unformat_tcp_cc_algo_cfg)) ; + else if (unformat (input, "closewait-time %d", &tcp_cfg.closewait_time)) + tcp_cfg.closewait_time /= TCP_TIMER_TICK; + else if (unformat (input, "timewait-time %d", &tcp_cfg.timewait_time)) + tcp_cfg.timewait_time /= TCP_TIMER_TICK; + else if (unformat (input, "finwait1-time %d", &tcp_cfg.finwait1_time)) + tcp_cfg.finwait1_time /= TCP_TIMER_TICK; + else if (unformat (input, "finwait2-time %d", &tcp_cfg.finwait2_time)) + tcp_cfg.finwait2_time /= TCP_TIMER_TICK; + else if (unformat (input, "lastack-time %d", &tcp_cfg.lastack_time)) + tcp_cfg.lastack_time /= TCP_TIMER_TICK; + else if (unformat (input, "closing-time %d", &tcp_cfg.closing_time)) + tcp_cfg.closing_time /= TCP_TIMER_TICK; + else if (unformat (input, "cleanup-time %d", &tcp_cfg.cleanup_time)) + tcp_cfg.cleanup_time /= TCP_TIMER_TICK; else return clib_error_return (0, "unknown input `%U'", format_unformat_error, input); @@ -1646,7 +1688,6 @@ tcp_configure_v4_source_address_range (vlib_main_t * vm, ip4_address_t * start, ip4_address_t * end, u32 table_id) { - tcp_main_t *tm = vnet_get_tcp_main (); vnet_main_t *vnm = vnet_get_main (); u32 start_host_byte_order, end_host_byte_order; fib_prefix_t prefix; @@ -1697,7 +1738,7 @@ tcp_configure_v4_source_address_range (vlib_main_t * vm, { dpo_id_t dpo = DPO_INVALID; - vec_add1 (tm->ip4_src_addresses, start[0]); + vec_add1 (tcp_cfg.ip4_src_addrs, start[0]); /* Add local adjacencies for the range */ @@ -1735,7 +1776,6 @@ tcp_configure_v6_source_address_range (vlib_main_t * vm, ip6_address_t * start, ip6_address_t * end, u32 table_id) { - tcp_main_t *tm = vnet_get_tcp_main (); fib_prefix_t prefix; u32 fib_index = 0; fib_node_index_t fei; @@ -1755,7 +1795,7 @@ tcp_configure_v6_source_address_range (vlib_main_t * vm, dpo_id_t dpo = DPO_INVALID; /* Remember this address */ - vec_add1 (tm->ip6_src_addresses, start[0]); + vec_add1 (tcp_cfg.ip6_src_addrs, start[0]); /* Lookup the prefix, to identify the interface involved */ prefix.fp_len = 128; diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index ae50947b797..753a1ca6996 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -33,8 +33,6 @@ #define TCP_CC_DATA_SZ 24 #define TCP_DUPACK_THRESHOLD 3 -#define TCP_MAX_RX_FIFO_SIZE 32 << 20 -#define TCP_MIN_RX_FIFO_SIZE 4 << 10 #define TCP_IW_N_SEGMENTS 10 #define TCP_ALWAYS_ACK 1 /**< On/off delayed acks */ #define TCP_USE_SACKS 1 /**< Disable only for testing */ @@ -91,17 +89,9 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler; #define TCP_TIMER_HANDLE_INVALID ((u32) ~0) -/* Timer delays as multiples of 100ms */ -#define TCP_TO_TIMER_TICK TCP_TICK*10 /* Period for converting from TCP - * ticks to timer units */ -#define TCP_DELACK_TIME 1 /* 0.1s */ -#define TCP_SYN_RCVD_TIME 600 /* 60s */ -#define TCP_2MSL_TIME 300 /* 30s */ -#define TCP_CLOSEWAIT_TIME 20 /* 2s */ -#define TCP_TIMEWAIT_TIME 100 /* 10s */ -#define TCP_FINWAIT1_TIME 600 /* 60s */ -#define TCP_CLEANUP_TIME 1 /* 0.1s */ -#define TCP_TIMER_PERSIST_MIN 2 /* 0.2s */ +#define TCP_TIMER_TICK 0.1 /**< Timer tick in seconds */ +#define TCP_TO_TIMER_TICK TCP_TICK*10 /**< Factor for converting + ticks to timer ticks */ #define TCP_RTO_MAX 60 * THZ /* Min max RTO (60s) as per RFC6298 */ #define TCP_RTO_MIN 0.2 * THZ /* Min RTO (200ms) - lower than standard */ @@ -511,6 +501,66 @@ typedef struct tcp_iss_seed_ u64 second; } tcp_iss_seed_t; +typedef struct tcp_configuration_ +{ + /** Max rx fifo size for a session (in bytes). It is used in to compute the + * rfc 7323 window scaling factor */ + u32 max_rx_fifo; + + /** Min rx fifo for a session (in bytes) */ + u32 min_rx_fifo; + + /** Default MTU to be used when establishing connections */ + u16 default_mtu; + + /** Initial CWND multiplier, which multiplies MSS to determine initial CWND. + * Set 0 to determine the initial CWND by another way */ + u16 initial_cwnd_multiplier; + + /** Enable tx pacing for new connections */ + u8 enable_tx_pacing; + + /** Default congestion control algorithm type */ + tcp_cc_algorithm_type_e cc_algo; + + /** Delayed ack time (disabled) */ + u16 delack_time; + + /** Timer ticks to wait for close from app */ + u16 closewait_time; + + /** Timer ticks to wait in time-wait. Also known as 2MSL */ + u16 timewait_time; + + /** Timer ticks to wait in fin-wait1 to send fin and rcv fin-ack */ + u16 finwait1_time; + + /** Timer ticks to wait in last ack for ack */ + u16 lastack_time; + + /** Timer ticks to wait in fin-wait2 for fin */ + u16 finwait2_time; + + /** Timer ticks to wait in closing for fin ack */ + u16 closing_time; + + /** Timer ticks to wait before cleaning up the connection */ + u16 cleanup_time; + + /** Number of preallocated connections */ + u32 preallocated_connections; + + /** Number of preallocated half-open connections */ + u32 preallocated_half_open_connections; + + /** Vectors of src addresses. Optional unless one needs > 63K active-opens */ + ip4_address_t *ip4_src_addrs; + ip6_address_t *ip6_src_addrs; + + /** Fault-injection. Debug only */ + f64 buffer_fail_fraction; +} tcp_configuration_t; + typedef struct _tcp_main { /* Per-worker thread tcp connection pools */ @@ -528,17 +578,17 @@ typedef struct _tcp_main /** per-worker context */ tcp_worker_ctx_t *wrk_ctx; - /* Pool of half-open connections on which we've sent a SYN */ + /** Pool of half-open connections on which we've sent a SYN */ tcp_connection_t *half_open_connections; clib_spinlock_t half_open_lock; /** vlib buffer size */ u32 bytes_per_buffer; - /* Seed used to generate random iss */ + /** Seed used to generate random iss */ tcp_iss_seed_t iss_seed; - /* Congestion control algorithms registered */ + /** Congestion control algorithms registered */ tcp_cc_algorithm_t *cc_algos; /** Hash table of cc algorithms by name */ @@ -547,45 +597,23 @@ typedef struct _tcp_main /** Last cc algo registered */ tcp_cc_algorithm_type_e cc_last_type; - /* - * Configuration - */ - - /* Flag that indicates if stack is on or off */ + /** Flag that indicates if stack is on or off */ u8 is_enabled; - /** Max rx fifo size for a session. It is used in to compute the - * rfc 7323 window scaling factor */ - u32 max_rx_fifo; - - /** Default MTU to be used when establishing connections */ - u16 default_mtu; - - /** Initial CWND multiplier, which multiplies MSS to determine initial CWND. - * Set 0 to determine the initial CWND by another way */ - u16 initial_cwnd_multiplier; - - /** Number of preallocated connections */ - u32 preallocated_connections; - u32 preallocated_half_open_connections; - - /** Vectors of src addresses. Optional unless one needs > 63K active-opens */ - ip4_address_t *ip4_src_addresses; - u32 last_v4_address_rotor; - u32 last_v6_address_rotor; - ip6_address_t *ip6_src_addresses; - - /** Enable tx pacing for new connections */ - u8 tx_pacing; - + /** Flag that indicates if v4 punting is enabled */ u8 punt_unknown4; + + /** Flag that indicates if v6 punting is enabled */ u8 punt_unknown6; - /** fault-injection */ - f64 buffer_fail_fraction; + /** Rotor for v4 source addresses */ + u32 last_v4_addr_rotor; - /** Default congestion control algorithm type */ - tcp_cc_algorithm_type_e cc_algo; + /** Rotor for v6 source addresses */ + u32 last_v6_addr_rotor; + + /** Protocol configuration */ + tcp_configuration_t cfg; } tcp_main_t; extern tcp_main_t tcp_main; @@ -602,6 +630,7 @@ extern vlib_node_registration_t tcp6_rcv_process_node; extern vlib_node_registration_t tcp4_listen_node; extern vlib_node_registration_t tcp6_listen_node; +#define tcp_cfg tcp_main.cfg #define tcp_node_index(node_id, is_ip4) \ ((is_ip4) ? tcp4_##node_id##_node.index : tcp6_##node_id##_node.index) @@ -842,8 +871,8 @@ tcp_flight_size (const tcp_connection_t * tc) always_inline u32 tcp_initial_cwnd (const tcp_connection_t * tc) { - if (tcp_main.initial_cwnd_multiplier > 0) - return tcp_main.initial_cwnd_multiplier * tc->snd_mss; + if (tcp_cfg.initial_cwnd_multiplier > 0) + return tcp_cfg.initial_cwnd_multiplier * tc->snd_mss; if (tc->snd_mss > 2190) return 2 * tc->snd_mss; @@ -1089,16 +1118,14 @@ tcp_persist_timer_set (tcp_connection_t * tc) { /* Reuse RTO. It's backed off in handler */ tcp_timer_set (tc, TCP_TIMER_PERSIST, - clib_max (tc->rto * TCP_TO_TIMER_TICK, - TCP_TIMER_PERSIST_MIN)); + clib_max (tc->rto * TCP_TO_TIMER_TICK, 1)); } always_inline void tcp_persist_timer_update (tcp_connection_t * tc) { tcp_timer_update (tc, TCP_TIMER_PERSIST, - clib_max (tc->rto * TCP_TO_TIMER_TICK, - TCP_TIMER_PERSIST_MIN)); + clib_max (tc->rto * TCP_TO_TIMER_TICK, 1)); } always_inline void diff --git a/src/vnet/tcp/tcp_debug.h b/src/vnet/tcp/tcp_debug.h index 262d3faae10..dd2ada848cf 100755 --- a/src/vnet/tcp/tcp_debug.h +++ b/src/vnet/tcp/tcp_debug.h @@ -901,12 +901,12 @@ if (tcp_cc_time_to_print_stats (_tc)) \ num_threads = 1 /* main thread */ + vtm->n_threads; \ vec_validate (buffer_fail_counters, num_threads - 1); \ } \ - if (PREDICT_FALSE (tcp_main.buffer_fail_fraction != 0.0)) \ + if (PREDICT_FALSE (tcp_cfg.buffer_fail_fraction != 0.0)) \ { \ if (PREDICT_TRUE (buffer_fail_counters[thread_index] > 0)) \ { \ if ((1.0 / (f32) (buffer_fail_counters[thread_index])) \ - < tcp_main.buffer_fail_fraction) \ + < tcp_cfg.buffer_fail_fraction) \ { \ buffer_fail_counters[thread_index] = 0.0000001; \ return -1; \ diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 595a76d6853..ccefe74e69a 100755 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -1650,7 +1650,7 @@ tcp_rcv_fin (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, vlib_buffer_t * b, * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */ tcp_connection_set_state (tc, TCP_STATE_CLOSE_WAIT); tcp_program_disconnect (wrk, tc); - tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME); + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time); TCP_EVT_DBG (TCP_EVT_FIN_RCVD, tc); *error = TCP_ERROR_FIN_RCVD; } @@ -1947,7 +1947,7 @@ in_order: if (tcp_can_delack (tc)) { if (!tcp_timer_is_active (tc, TCP_TIMER_DELACK)) - tcp_timer_set (tc, TCP_TIMER_DELACK, TCP_DELACK_TIME); + tcp_timer_set (tc, TCP_TIMER_DELACK, tcp_cfg.delack_time); goto done; } @@ -2745,7 +2745,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* If a fin was received and data was acked extend wait */ else if ((tc0->flags & TCP_CONN_FINRCVD) && tc0->bytes_acked) tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, - TCP_CLOSEWAIT_TIME); + tcp_cfg.closewait_time); } /* If FIN is ACKed */ else if (tc0->snd_una == tc0->snd_nxt) @@ -2759,7 +2759,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (tc0->flags & TCP_CONN_FINRCVD) { tcp_connection_set_state (tc0, TCP_STATE_CLOSED); - tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); + tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, + tcp_cfg.cleanup_time); session_transport_closed_notify (&tc0->connection); goto drop; } @@ -2767,7 +2768,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tcp_connection_set_state (tc0, TCP_STATE_FIN_WAIT_2); /* Enable waitclose because we're willing to wait for peer's * FIN but not indefinitely. */ - tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.finwait2_time); /* Don't try to deq the FIN acked */ if (tc0->burst_acked > 1) @@ -2800,7 +2801,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tcp_send_fin (tc0); tcp_connection_timers_reset (tc0); tcp_connection_set_state (tc0, TCP_STATE_LAST_ACK); - tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time); break; case TCP_STATE_CLOSING: /* In addition to the processing for the ESTABLISHED state, if @@ -2814,7 +2815,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tcp_connection_timers_reset (tc0); tcp_connection_set_state (tc0, TCP_STATE_TIME_WAIT); - tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_TIMEWAIT_TIME); + tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.timewait_time); session_transport_closed_notify (&tc0->connection); goto drop; @@ -2841,7 +2842,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, * we can't ensure that we have no packets already enqueued * to output. Rely instead on the waitclose timer */ tcp_connection_timers_reset (tc0); - tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); + tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); goto drop; @@ -2858,7 +2859,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, goto drop; tcp_program_ack (tc0); - tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_TIMEWAIT_TIME); + tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.timewait_time); goto drop; break; @@ -2900,7 +2901,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tcp_program_ack (tc0); tcp_connection_set_state (tc0, TCP_STATE_CLOSE_WAIT); tcp_program_disconnect (wrk, tc0); - tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME); + tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time); break; case TCP_STATE_SYN_RCVD: /* Send FIN-ACK, enter LAST-ACK and because the app was not @@ -2910,7 +2911,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tc0->rcv_nxt += 1; tcp_send_fin (tc0); tcp_connection_set_state (tc0, TCP_STATE_LAST_ACK); - tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.lastack_time); break; case TCP_STATE_CLOSE_WAIT: case TCP_STATE_CLOSING: @@ -2926,14 +2927,16 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, * sending it. Since we already received a fin, do not wait * for too long. */ tc0->flags |= TCP_CONN_FINRCVD; - tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME); + tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, + tcp_cfg.closewait_time); } else { tcp_connection_set_state (tc0, TCP_STATE_CLOSING); tcp_program_ack (tc0); /* Wait for ACK for our FIN but not forever */ - tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, + tcp_cfg.closing_time); } break; case TCP_STATE_FIN_WAIT_2: @@ -2941,7 +2944,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tc0->rcv_nxt += 1; tcp_connection_set_state (tc0, TCP_STATE_TIME_WAIT); tcp_connection_timers_reset (tc0); - tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_TIMEWAIT_TIME); + tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.timewait_time); tcp_program_ack (tc0); session_transport_closed_notify (&tc0->connection); break; @@ -2949,7 +2952,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* Remain in the TIME-WAIT state. Restart the time-wait * timeout. */ - tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_TIMEWAIT_TIME); + tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, tcp_cfg.timewait_time); break; } error0 = TCP_ERROR_FIN_RCVD; diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index ef8b756e4e8..ea4dc428187 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -85,7 +85,7 @@ void tcp_update_rcv_mss (tcp_connection_t * tc) { /* TODO find our iface MTU */ - tc->mss = tcp_main.default_mtu - sizeof (tcp_header_t); + tc->mss = tcp_cfg.default_mtu - sizeof (tcp_header_t); } /** @@ -103,7 +103,7 @@ tcp_initial_wnd_unscaled (tcp_connection_t * tc) tcp_update_rcv_mss (tc); TCP_IW_N_SEGMENTS * tc->mss; */ - return TCP_MIN_RX_FIFO_SIZE; + return tcp_cfg.min_rx_fifo; } /** @@ -113,17 +113,9 @@ tcp_initial_wnd_unscaled (tcp_connection_t * tc) u32 tcp_initial_window_to_advertise (tcp_connection_t * tc) { - tcp_main_t *tm = &tcp_main; - u32 max_fifo; - - /* Initial wnd for SYN. Fifos are not allocated yet. - * Use some predefined value. For SYN-ACK we still want the - * scale to be computed in the same way */ - max_fifo = tm->max_rx_fifo ? tm->max_rx_fifo : TCP_MAX_RX_FIFO_SIZE; - /* Compute rcv wscale only if peer advertised support for it */ if (tc->state != TCP_STATE_SYN_RCVD || tcp_opts_wscale (&tc->rcv_opts)) - tc->rcv_wscale = tcp_window_compute_scale (max_fifo); + tc->rcv_wscale = tcp_window_compute_scale (tcp_cfg.max_rx_fifo); tc->rcv_wnd = tcp_initial_wnd_unscaled (tc); @@ -276,7 +268,7 @@ tcp_make_syn_options (tcp_options_t * opts, u8 wnd_scale) u8 len = 0; opts->flags |= TCP_OPTS_FLAG_MSS; - opts->mss = tcp_main.default_mtu; /*XXX discover that */ + opts->mss = tcp_cfg.default_mtu; /*XXX discover that */ len += TCP_OPTION_LEN_MSS; opts->flags |= TCP_OPTS_FLAG_WSCALE; @@ -1529,7 +1521,7 @@ tcp_timer_retransmit_handler (u32 tc_index) tcp_connection_set_state (tc, TCP_STATE_CLOSED); session_transport_closing_notify (&tc->connection); tcp_connection_timers_reset (tc); - tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME); + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.closewait_time); return; } @@ -1581,7 +1573,7 @@ tcp_timer_retransmit_handler (u32 tc_index) { tcp_connection_set_state (tc, TCP_STATE_CLOSED); tcp_connection_timers_reset (tc); - tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, tcp_cfg.cleanup_time); return; } |