diff options
Diffstat (limited to 'src/vnet/tcp')
-rw-r--r-- | src/vnet/tcp/tcp.c | 46 | ||||
-rw-r--r-- | src/vnet/tcp/tcp.h | 40 | ||||
-rwxr-xr-x | src/vnet/tcp/tcp_input.c | 16 | ||||
-rw-r--r-- | src/vnet/tcp/tcp_output.c | 18 |
4 files changed, 89 insertions, 31 deletions
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 6ef03dc093d..86e5d949bd1 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -271,7 +271,7 @@ tcp_connection_cleanup (tcp_connection_t * tc) vec_free (tc->rcv_opts.sacks); pool_free (tc->sack_sb.holes); - if (tc->flags & TCP_CONN_RATE_SAMPLE) + if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE) tcp_bt_cleanup (tc); /* Poison the entry */ @@ -737,9 +737,12 @@ tcp_connection_init_vars (tcp_connection_t * tc) || tcp_cfg.enable_tx_pacing) tcp_enable_pacing (tc); - if (tc->flags & TCP_CONN_RATE_SAMPLE) + if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE) tcp_bt_init (tc); + if (!tcp_cfg.allow_tso) + tc->cfg_flags |= TCP_CFG_F_NO_TSO; + tc->start_ts = tcp_time_now_us (tc->c_thread_index); } @@ -839,6 +842,31 @@ format_tcp_state (u8 * s, va_list * args) return s; } +const char *tcp_cfg_flags_str[] = { +#define _(sym, str) str, + foreach_tcp_cfg_flag +#undef _ +}; + +static u8 * +format_tcp_cfg_flags (u8 * s, va_list * args) +{ + tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); + int i, last = -1; + + for (i = 0; i < TCP_CFG_N_FLAG_BITS; i++) + if (tc->cfg_flags & (1 << i)) + last = i; + for (i = 0; i < last; i++) + { + if (tc->cfg_flags & (1 << i)) + s = format (s, "%s, ", tcp_cfg_flags_str[i]); + } + if (last >= 0) + s = format (s, "%s", tcp_cfg_flags_str[last]); + return s; +} + const char *tcp_connection_flags_str[] = { #define _(sym, str) str, foreach_tcp_connection_flag @@ -963,8 +991,9 @@ static u8 * format_tcp_vars (u8 * s, va_list * args) { tcp_connection_t *tc = va_arg (*args, tcp_connection_t *); - s = format (s, " index: %u flags: %U timers: %U\n", tc->c_c_index, - format_tcp_connection_flags, tc, format_tcp_timers, tc); + s = format (s, " index: %u cfg: %U flags: %U timers: %U\n", tc->c_c_index, + format_tcp_cfg_flags, tc, format_tcp_connection_flags, tc, + format_tcp_timers, tc); s = format (s, " snd_una %u snd_nxt %u snd_una_max %u", tc->snd_una - tc->iss, tc->snd_nxt - tc->iss, tc->snd_una_max - tc->iss); @@ -1219,10 +1248,8 @@ tcp_session_send_mss (transport_connection_t * trans_conn) * the current state of the connection. */ tcp_update_burst_snd_vars (tc); - if (PREDICT_FALSE (tc->is_tso)) - { - return tcp_session_cal_goal_size (tc); - } + if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_TSO)) + return tcp_session_cal_goal_size (tc); return tc->snd_mss; } @@ -1621,6 +1648,7 @@ tcp_configuration_init (void) tcp_cfg.default_mtu = 1500; tcp_cfg.initial_cwnd_multiplier = 0; tcp_cfg.enable_tx_pacing = 1; + tcp_cfg.allow_tso = 0; tcp_cfg.cc_algo = TCP_CC_NEWRENO; tcp_cfg.rwnd_min_update_ack = 1; @@ -1744,6 +1772,8 @@ tcp_config_fn (vlib_main_t * vm, unformat_input_t * input) tcp_cfg.initial_cwnd_multiplier = cwnd_multiplier; else if (unformat (input, "no-tx-pacing")) tcp_cfg.enable_tx_pacing = 0; + else if (unformat (input, "tso")) + tcp_cfg.allow_tso = 1; else if (unformat (input, "cc-algo %U", unformat_tcp_cc_algo, &tcp_cfg.cc_algo)) ; diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 5e683f72dbd..097a1475185 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -103,6 +103,29 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler; #define TCP_RTO_BOFF_MAX 8 /* Max number of retries before reset */ #define TCP_ESTABLISH_TIME (60 * THZ) /* Connection establish timeout */ +/** Connection configuration flags */ +#define foreach_tcp_cfg_flag \ + _(RATE_SAMPLE, "Rate sampling") \ + _(NO_CSUM_OFFLOAD, "No csum offload") \ + _(NO_TSO, "TSO off") \ + _(TSO, "TSO") \ + +typedef enum tcp_cfg_flag_bits_ +{ +#define _(sym, str) TCP_CFG_F_##sym##_BIT, + foreach_tcp_cfg_flag +#undef _ + TCP_CFG_N_FLAG_BITS +} tcp_cfg_flag_bits_e; + +typedef enum tcp_cfg_flag_ +{ +#define _(sym, str) TCP_CFG_F_##sym = 1 << TCP_CFG_F_##sym##_BIT, + foreach_tcp_cfg_flag +#undef _ + TCP_CFG_N_FLAGS +} tcp_cfg_flags_e; + /** TCP connection flags */ #define foreach_tcp_connection_flag \ _(SNDACK, "Send ACK") \ @@ -113,16 +136,14 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler; _(HALF_OPEN_DONE, "Half-open completed") \ _(FINPNDG, "FIN pending") \ _(RXT_PENDING, "Retransmit pending") \ - _(FRXT_FIRST, "Fast-retransmit first again") \ - _(DEQ_PENDING, "Pending dequeue acked") \ + _(FRXT_FIRST, "Retransmit first") \ + _(DEQ_PENDING, "Dequeue pending ") \ _(PSH_PENDING, "PSH pending") \ _(FINRCVD, "FIN received") \ - _(RATE_SAMPLE, "Conn does rate sampling") \ _(TRACK_BURST, "Track burst") \ _(ZERO_RWND_SENT, "Zero RWND sent") \ - _(NO_CSUM_OFFLOAD, "No Checksum Offload") \ -typedef enum _tcp_connection_flag_bits +typedef enum tcp_connection_flag_bits_ { #define _(sym, str) TCP_CONN_##sym##_BIT, foreach_tcp_connection_flag @@ -130,7 +151,7 @@ typedef enum _tcp_connection_flag_bits TCP_CONN_N_FLAG_BITS } tcp_connection_flag_bits_e; -typedef enum _tcp_connection_flag +typedef enum tcp_connection_flag_ { #define _(sym, str) TCP_CONN_##sym = 1 << TCP_CONN_##sym##_BIT, foreach_tcp_connection_flag @@ -310,7 +331,7 @@ typedef struct _tcp_connection transport_connection_t connection; /**< Common transport data. First! */ u8 state; /**< TCP state as per tcp_state_t */ - u8 is_tso; /** is connection could use tso */ + u8 cfg_flags; /**< Connection configuration flags */ u16 flags; /**< Connection flags (see tcp_conn_flags_e) */ u32 timers[TCP_N_TIMERS]; /**< Timer handles into timer wheel */ @@ -451,6 +472,8 @@ struct _tcp_cc_algorithm #define tcp_in_cong_recovery(tc) ((tc)->flags & \ (TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY)) +#define tcp_csum_offload(tc) (!((tc)->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD)) + always_inline void tcp_cong_recovery_off (tcp_connection_t * tc) { @@ -534,6 +557,9 @@ typedef struct tcp_configuration_ /** Enable tx pacing for new connections */ u8 enable_tx_pacing; + /** Allow use of TSO whenever available */ + u8 allow_tso; + /** Default congestion control algorithm type */ tcp_cc_algorithm_type_e cc_algo; diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index ea64e4ab754..61e5aa8c662 100755 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -503,7 +503,8 @@ tcp_update_rtt (tcp_connection_t * tc, tcp_rate_sample_t * rs, u32 ack) if (tcp_in_cong_recovery (tc)) { /* Accept rtt estimates for samples that have not been retransmitted */ - if ((tc->flags & TCP_CONN_RATE_SAMPLE) && !(rs->flags & TCP_BTS_IS_RXT)) + if ((tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE) + && !(rs->flags & TCP_BTS_IS_RXT)) { mrtt = rs->rtt_time * THZ; goto estimate_rtt; @@ -1604,7 +1605,7 @@ process_ack: tc->snd_una = vnet_buffer (b)->tcp.ack_number; tcp_validate_txf_size (tc, tc->bytes_acked); - if (tc->flags & TCP_CONN_RATE_SAMPLE) + if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE) tcp_bt_sample_delivery_rate (tc, &rs); tcp_program_dequeue (wrk, tc); @@ -2361,11 +2362,10 @@ tcp_check_tx_offload (tcp_connection_t * tc, int is_ipv4) sw_if_idx = dpo->dpoi_index; hw_if = vnet_get_sup_hw_interface (vnm, sw_if_idx); - tc->is_tso = - ((hw_if->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0) ? 0 : 1; + if (hw_if->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) + tc->cfg_flags |= TCP_CFG_F_TSO; } - always_inline uword tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, int is_ip4) @@ -2582,7 +2582,8 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node, goto drop; } - tcp_check_tx_offload (new_tc0, is_ip4); + if (!(new_tc0->cfg_flags & TCP_CFG_F_NO_TSO)) + tcp_check_tx_offload (new_tc0, is_ip4); /* Read data, if any */ if (PREDICT_FALSE (vnet_buffer (b0)->tcp.data_len)) @@ -2771,7 +2772,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tc0->state = TCP_STATE_ESTABLISHED; TCP_EVT (TCP_EVT_STATE_CHANGE, tc0); - tcp_check_tx_offload (tc0, is_ip4); + if (!(tc0->cfg_flags & TCP_CFG_F_NO_TSO)) + tcp_check_tx_offload (tc0, is_ip4); /* Initialize session variables */ tc0->snd_una = vnet_buffer (b0)->tcp.ack_number; diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 047247e9ebe..fb806446dcc 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -405,7 +405,7 @@ tcp_update_burst_snd_vars (tcp_connection_t * tc) tcp_update_rcv_wnd (tc); - if (tc->flags & TCP_CONN_RATE_SAMPLE) + if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE) tc->flags |= TCP_CONN_TRACK_BURST; if (tc->snd_una == tc->snd_nxt) @@ -499,7 +499,7 @@ static inline u16 tcp_compute_checksum (tcp_connection_t * tc, vlib_buffer_t * b) { u16 checksum = 0; - if (PREDICT_FALSE (tc->flags & TCP_CONN_NO_CSUM_OFFLOAD)) + if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD)) { tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index); vlib_main_t *vm = wrk->vm; @@ -867,7 +867,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt, ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40); ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address, &pkt_ih4->src_address, IP_PROTOCOL_TCP, - (!(tc->flags & TCP_CONN_NO_CSUM_OFFLOAD))); + tcp_csum_offload (tc)); th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4); } else @@ -934,7 +934,7 @@ tcp_push_ip_hdr (tcp_worker_ctx_t * wrk, tcp_connection_t * tc, ip4_header_t *ih; ih = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip4, &tc->c_rmt_ip4, IP_PROTOCOL_TCP, - (!(tc->flags & TCP_CONN_NO_CSUM_OFFLOAD))); + tcp_csum_offload (tc)); th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih); } else @@ -1445,7 +1445,7 @@ tcp_prepare_retransmit_segment (tcp_worker_ctx_t * wrk, tc->snd_rxt_bytes += n_bytes; - if (tc->flags & TCP_CONN_RATE_SAMPLE) + if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE) tcp_bt_track_rxt (tc, start, start + n_bytes); tc->bytes_retrans += n_bytes; @@ -1777,7 +1777,7 @@ tcp_timer_persist_handler (u32 index) || tc->snd_nxt == tc->snd_una_max || tc->rto_boff > 1)); - if (tc->flags & TCP_CONN_RATE_SAMPLE) + if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE) { tcp_bt_check_app_limited (tc); tcp_bt_track_tx (tc); @@ -2301,8 +2301,7 @@ tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0, if (is_ip4) ih0 = vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4, - IP_PROTOCOL_TCP, - (!(tc0->flags & TCP_CONN_NO_CSUM_OFFLOAD))); + IP_PROTOCOL_TCP, tcp_csum_offload (tc0)); else ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6, &tc0->c_rmt_ip6, IP_PROTOCOL_TCP); @@ -2312,8 +2311,9 @@ tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0, always_inline void tcp_check_if_gso (tcp_connection_t * tc, vlib_buffer_t * b) { - if (!tc->is_tso) + if (PREDICT_TRUE (!(tc->cfg_flags & TCP_CFG_F_TSO))) return; + u16 data_len = b->current_length - sizeof (tcp_header_t) - tc->snd_opts_len; if (PREDICT_FALSE (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID)) |