summaryrefslogtreecommitdiffstats
path: root/src/vnet/tcp
diff options
context:
space:
mode:
Diffstat (limited to 'src/vnet/tcp')
-rw-r--r--src/vnet/tcp/tcp.c46
-rw-r--r--src/vnet/tcp/tcp.h40
-rwxr-xr-xsrc/vnet/tcp/tcp_input.c16
-rw-r--r--src/vnet/tcp/tcp_output.c18
4 files changed, 89 insertions, 31 deletions
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index 6ef03dc093d..86e5d949bd1 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -271,7 +271,7 @@ tcp_connection_cleanup (tcp_connection_t * tc)
vec_free (tc->rcv_opts.sacks);
pool_free (tc->sack_sb.holes);
- if (tc->flags & TCP_CONN_RATE_SAMPLE)
+ if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
tcp_bt_cleanup (tc);
/* Poison the entry */
@@ -737,9 +737,12 @@ tcp_connection_init_vars (tcp_connection_t * tc)
|| tcp_cfg.enable_tx_pacing)
tcp_enable_pacing (tc);
- if (tc->flags & TCP_CONN_RATE_SAMPLE)
+ if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
tcp_bt_init (tc);
+ if (!tcp_cfg.allow_tso)
+ tc->cfg_flags |= TCP_CFG_F_NO_TSO;
+
tc->start_ts = tcp_time_now_us (tc->c_thread_index);
}
@@ -839,6 +842,31 @@ format_tcp_state (u8 * s, va_list * args)
return s;
}
+const char *tcp_cfg_flags_str[] = {
+#define _(sym, str) str,
+ foreach_tcp_cfg_flag
+#undef _
+};
+
+static u8 *
+format_tcp_cfg_flags (u8 * s, va_list * args)
+{
+ tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
+ int i, last = -1;
+
+ for (i = 0; i < TCP_CFG_N_FLAG_BITS; i++)
+ if (tc->cfg_flags & (1 << i))
+ last = i;
+ for (i = 0; i < last; i++)
+ {
+ if (tc->cfg_flags & (1 << i))
+ s = format (s, "%s, ", tcp_cfg_flags_str[i]);
+ }
+ if (last >= 0)
+ s = format (s, "%s", tcp_cfg_flags_str[last]);
+ return s;
+}
+
const char *tcp_connection_flags_str[] = {
#define _(sym, str) str,
foreach_tcp_connection_flag
@@ -963,8 +991,9 @@ static u8 *
format_tcp_vars (u8 * s, va_list * args)
{
tcp_connection_t *tc = va_arg (*args, tcp_connection_t *);
- s = format (s, " index: %u flags: %U timers: %U\n", tc->c_c_index,
- format_tcp_connection_flags, tc, format_tcp_timers, tc);
+ s = format (s, " index: %u cfg: %U flags: %U timers: %U\n", tc->c_c_index,
+ format_tcp_cfg_flags, tc, format_tcp_connection_flags, tc,
+ format_tcp_timers, tc);
s = format (s, " snd_una %u snd_nxt %u snd_una_max %u",
tc->snd_una - tc->iss, tc->snd_nxt - tc->iss,
tc->snd_una_max - tc->iss);
@@ -1219,10 +1248,8 @@ tcp_session_send_mss (transport_connection_t * trans_conn)
* the current state of the connection. */
tcp_update_burst_snd_vars (tc);
- if (PREDICT_FALSE (tc->is_tso))
- {
- return tcp_session_cal_goal_size (tc);
- }
+ if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_TSO))
+ return tcp_session_cal_goal_size (tc);
return tc->snd_mss;
}
@@ -1621,6 +1648,7 @@ tcp_configuration_init (void)
tcp_cfg.default_mtu = 1500;
tcp_cfg.initial_cwnd_multiplier = 0;
tcp_cfg.enable_tx_pacing = 1;
+ tcp_cfg.allow_tso = 0;
tcp_cfg.cc_algo = TCP_CC_NEWRENO;
tcp_cfg.rwnd_min_update_ack = 1;
@@ -1744,6 +1772,8 @@ tcp_config_fn (vlib_main_t * vm, unformat_input_t * input)
tcp_cfg.initial_cwnd_multiplier = cwnd_multiplier;
else if (unformat (input, "no-tx-pacing"))
tcp_cfg.enable_tx_pacing = 0;
+ else if (unformat (input, "tso"))
+ tcp_cfg.allow_tso = 1;
else if (unformat (input, "cc-algo %U", unformat_tcp_cc_algo,
&tcp_cfg.cc_algo))
;
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 5e683f72dbd..097a1475185 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -103,6 +103,29 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
#define TCP_RTO_BOFF_MAX 8 /* Max number of retries before reset */
#define TCP_ESTABLISH_TIME (60 * THZ) /* Connection establish timeout */
+/** Connection configuration flags */
+#define foreach_tcp_cfg_flag \
+ _(RATE_SAMPLE, "Rate sampling") \
+ _(NO_CSUM_OFFLOAD, "No csum offload") \
+ _(NO_TSO, "TSO off") \
+ _(TSO, "TSO") \
+
+typedef enum tcp_cfg_flag_bits_
+{
+#define _(sym, str) TCP_CFG_F_##sym##_BIT,
+ foreach_tcp_cfg_flag
+#undef _
+ TCP_CFG_N_FLAG_BITS
+} tcp_cfg_flag_bits_e;
+
+typedef enum tcp_cfg_flag_
+{
+#define _(sym, str) TCP_CFG_F_##sym = 1 << TCP_CFG_F_##sym##_BIT,
+ foreach_tcp_cfg_flag
+#undef _
+ TCP_CFG_N_FLAGS
+} tcp_cfg_flags_e;
+
/** TCP connection flags */
#define foreach_tcp_connection_flag \
_(SNDACK, "Send ACK") \
@@ -113,16 +136,14 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
_(HALF_OPEN_DONE, "Half-open completed") \
_(FINPNDG, "FIN pending") \
_(RXT_PENDING, "Retransmit pending") \
- _(FRXT_FIRST, "Fast-retransmit first again") \
- _(DEQ_PENDING, "Pending dequeue acked") \
+ _(FRXT_FIRST, "Retransmit first") \
+ _(DEQ_PENDING, "Dequeue pending ") \
_(PSH_PENDING, "PSH pending") \
_(FINRCVD, "FIN received") \
- _(RATE_SAMPLE, "Conn does rate sampling") \
_(TRACK_BURST, "Track burst") \
_(ZERO_RWND_SENT, "Zero RWND sent") \
- _(NO_CSUM_OFFLOAD, "No Checksum Offload") \
-typedef enum _tcp_connection_flag_bits
+typedef enum tcp_connection_flag_bits_
{
#define _(sym, str) TCP_CONN_##sym##_BIT,
foreach_tcp_connection_flag
@@ -130,7 +151,7 @@ typedef enum _tcp_connection_flag_bits
TCP_CONN_N_FLAG_BITS
} tcp_connection_flag_bits_e;
-typedef enum _tcp_connection_flag
+typedef enum tcp_connection_flag_
{
#define _(sym, str) TCP_CONN_##sym = 1 << TCP_CONN_##sym##_BIT,
foreach_tcp_connection_flag
@@ -310,7 +331,7 @@ typedef struct _tcp_connection
transport_connection_t connection; /**< Common transport data. First! */
u8 state; /**< TCP state as per tcp_state_t */
- u8 is_tso; /** is connection could use tso */
+ u8 cfg_flags; /**< Connection configuration flags */
u16 flags; /**< Connection flags (see tcp_conn_flags_e) */
u32 timers[TCP_N_TIMERS]; /**< Timer handles into timer wheel */
@@ -451,6 +472,8 @@ struct _tcp_cc_algorithm
#define tcp_in_cong_recovery(tc) ((tc)->flags & \
(TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY))
+#define tcp_csum_offload(tc) (!((tc)->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
+
always_inline void
tcp_cong_recovery_off (tcp_connection_t * tc)
{
@@ -534,6 +557,9 @@ typedef struct tcp_configuration_
/** Enable tx pacing for new connections */
u8 enable_tx_pacing;
+ /** Allow use of TSO whenever available */
+ u8 allow_tso;
+
/** Default congestion control algorithm type */
tcp_cc_algorithm_type_e cc_algo;
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index ea64e4ab754..61e5aa8c662 100755
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -503,7 +503,8 @@ tcp_update_rtt (tcp_connection_t * tc, tcp_rate_sample_t * rs, u32 ack)
if (tcp_in_cong_recovery (tc))
{
/* Accept rtt estimates for samples that have not been retransmitted */
- if ((tc->flags & TCP_CONN_RATE_SAMPLE) && !(rs->flags & TCP_BTS_IS_RXT))
+ if ((tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
+ && !(rs->flags & TCP_BTS_IS_RXT))
{
mrtt = rs->rtt_time * THZ;
goto estimate_rtt;
@@ -1604,7 +1605,7 @@ process_ack:
tc->snd_una = vnet_buffer (b)->tcp.ack_number;
tcp_validate_txf_size (tc, tc->bytes_acked);
- if (tc->flags & TCP_CONN_RATE_SAMPLE)
+ if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
tcp_bt_sample_delivery_rate (tc, &rs);
tcp_program_dequeue (wrk, tc);
@@ -2361,11 +2362,10 @@ tcp_check_tx_offload (tcp_connection_t * tc, int is_ipv4)
sw_if_idx = dpo->dpoi_index;
hw_if = vnet_get_sup_hw_interface (vnm, sw_if_idx);
- tc->is_tso =
- ((hw_if->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0) ? 0 : 1;
+ if (hw_if->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO)
+ tc->cfg_flags |= TCP_CFG_F_TSO;
}
-
always_inline uword
tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
vlib_frame_t * from_frame, int is_ip4)
@@ -2582,7 +2582,8 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
goto drop;
}
- tcp_check_tx_offload (new_tc0, is_ip4);
+ if (!(new_tc0->cfg_flags & TCP_CFG_F_NO_TSO))
+ tcp_check_tx_offload (new_tc0, is_ip4);
/* Read data, if any */
if (PREDICT_FALSE (vnet_buffer (b0)->tcp.data_len))
@@ -2771,7 +2772,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
tc0->state = TCP_STATE_ESTABLISHED;
TCP_EVT (TCP_EVT_STATE_CHANGE, tc0);
- tcp_check_tx_offload (tc0, is_ip4);
+ if (!(tc0->cfg_flags & TCP_CFG_F_NO_TSO))
+ tcp_check_tx_offload (tc0, is_ip4);
/* Initialize session variables */
tc0->snd_una = vnet_buffer (b0)->tcp.ack_number;
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 047247e9ebe..fb806446dcc 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -405,7 +405,7 @@ tcp_update_burst_snd_vars (tcp_connection_t * tc)
tcp_update_rcv_wnd (tc);
- if (tc->flags & TCP_CONN_RATE_SAMPLE)
+ if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
tc->flags |= TCP_CONN_TRACK_BURST;
if (tc->snd_una == tc->snd_nxt)
@@ -499,7 +499,7 @@ static inline u16
tcp_compute_checksum (tcp_connection_t * tc, vlib_buffer_t * b)
{
u16 checksum = 0;
- if (PREDICT_FALSE (tc->flags & TCP_CONN_NO_CSUM_OFFLOAD))
+ if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
{
tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
vlib_main_t *vm = wrk->vm;
@@ -867,7 +867,7 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
&pkt_ih4->src_address, IP_PROTOCOL_TCP,
- (!(tc->flags & TCP_CONN_NO_CSUM_OFFLOAD)));
+ tcp_csum_offload (tc));
th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
}
else
@@ -934,7 +934,7 @@ tcp_push_ip_hdr (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
ip4_header_t *ih;
ih = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip4,
&tc->c_rmt_ip4, IP_PROTOCOL_TCP,
- (!(tc->flags & TCP_CONN_NO_CSUM_OFFLOAD)));
+ tcp_csum_offload (tc));
th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih);
}
else
@@ -1445,7 +1445,7 @@ tcp_prepare_retransmit_segment (tcp_worker_ctx_t * wrk,
tc->snd_rxt_bytes += n_bytes;
- if (tc->flags & TCP_CONN_RATE_SAMPLE)
+ if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
tcp_bt_track_rxt (tc, start, start + n_bytes);
tc->bytes_retrans += n_bytes;
@@ -1777,7 +1777,7 @@ tcp_timer_persist_handler (u32 index)
|| tc->snd_nxt == tc->snd_una_max
|| tc->rto_boff > 1));
- if (tc->flags & TCP_CONN_RATE_SAMPLE)
+ if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
{
tcp_bt_check_app_limited (tc);
tcp_bt_track_tx (tc);
@@ -2301,8 +2301,7 @@ tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0,
if (is_ip4)
ih0 = vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
- IP_PROTOCOL_TCP,
- (!(tc0->flags & TCP_CONN_NO_CSUM_OFFLOAD)));
+ IP_PROTOCOL_TCP, tcp_csum_offload (tc0));
else
ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6, &tc0->c_rmt_ip6,
IP_PROTOCOL_TCP);
@@ -2312,8 +2311,9 @@ tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0,
always_inline void
tcp_check_if_gso (tcp_connection_t * tc, vlib_buffer_t * b)
{
- if (!tc->is_tso)
+ if (PREDICT_TRUE (!(tc->cfg_flags & TCP_CFG_F_TSO)))
return;
+
u16 data_len = b->current_length - sizeof (tcp_header_t) - tc->snd_opts_len;
if (PREDICT_FALSE (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID))