From 6f3621d772d0dafa745081b32a8769b4c42af059 Mon Sep 17 00:00:00 2001
From: Simon Zhang <yuwei1.zhang@intel.com>
Date: Mon, 2 Sep 2019 22:54:00 +0800
Subject: tcp: enable gso in tcp hoststack

Type: feature

Change-Id: If68d07fbe8c6f7fffd2f93c7e854367082927e4f
Signed-off-by: Simon Zhang <yuwei1.zhang@intel.com>
(cherry picked from commit 1146ff4bcd336d8efc19405f1d83914e6115a01f)
---
 src/vnet/ip/ip4.h            |  2 ++
 src/vnet/session/transport.c |  7 ++++---
 src/vnet/tcp/tcp.c           | 16 ++++++++++++++++
 src/vnet/tcp/tcp.h           |  2 ++
 src/vnet/tcp/tcp_input.c     | 37 +++++++++++++++++++++++++++++++++++++
 src/vnet/tcp/tcp_output.c    | 28 ++++++++++++++++++++++++++++
 6 files changed, 89 insertions(+), 3 deletions(-)

(limited to 'src')

diff --git a/src/vnet/ip/ip4.h b/src/vnet/ip/ip4.h
index 0ead3faa1b8..9da5926e13f 100644
--- a/src/vnet/ip/ip4.h
+++ b/src/vnet/ip/ip4.h
@@ -412,6 +412,8 @@ vlib_buffer_push_ip4 (vlib_main_t * vm, vlib_buffer_t * b,
       vnet_buffer (b)->l3_hdr_offset = (u8 *) ih - b->data;
       vnet_buffer (b)->l4_hdr_offset = vnet_buffer (b)->l3_hdr_offset +
 	sizeof (*ih);
+      b->flags |=
+	VNET_BUFFER_F_L3_HDR_OFFSET_VALID | VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
     }
   else
     ih->checksum = ip4_header_checksum (ih);
diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c
index aa8deac5400..1b7928162b6 100644
--- a/src/vnet/session/transport.c
+++ b/src/vnet/session/transport.c
@@ -49,7 +49,7 @@ static double transport_pacer_period;
 
 #define TRANSPORT_PACER_MIN_MSS 	1460
 #define TRANSPORT_PACER_MIN_BURST 	TRANSPORT_PACER_MIN_MSS
-#define TRANSPORT_PACER_MAX_BURST	(32 * TRANSPORT_PACER_MIN_MSS)
+#define TRANSPORT_PACER_MAX_BURST	(43 * TRANSPORT_PACER_MIN_MSS)
 
 u8 *
 format_transport_proto (u8 * s, va_list * args)
@@ -703,9 +703,10 @@ transport_connection_snd_space (transport_connection_t * tc, u64 time_now,
     {
       time_now >>= SPACER_CPU_TICKS_PER_PERIOD_SHIFT;
       max_paced_burst = spacer_max_burst (&tc->pacer, time_now);
-      max_paced_burst = (max_paced_burst < mss) ? 0 : max_paced_burst;
+      max_paced_burst =
+	(max_paced_burst < TRANSPORT_PACER_MIN_BURST) ? 0 : max_paced_burst;
       snd_space = clib_min (snd_space, max_paced_burst);
-      snd_space = snd_space - snd_space % mss;
+      return snd_space >= mss ? snd_space - snd_space % mss : snd_space;
     }
   return snd_space;
 }
diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c
index d060654a037..cbb95777820 100644
--- a/src/vnet/tcp/tcp.c
+++ b/src/vnet/tcp/tcp.c
@@ -1175,6 +1175,17 @@ tcp_half_open_session_get_transport (u32 conn_index)
   return &tc->connection;
 }
 
+static u16
+tcp_session_cal_goal_size (tcp_connection_t * tc)
+{
+  u16 goal_size = tc->snd_mss;
+
+  goal_size = TCP_MAX_GSO_SZ - tc->snd_mss % TCP_MAX_GSO_SZ;
+  goal_size = clib_min (goal_size, tc->snd_wnd / 2);
+
+  return goal_size;
+}
+
 /**
  * Compute maximum segment size for session layer.
  *
@@ -1192,6 +1203,11 @@ tcp_session_send_mss (transport_connection_t * trans_conn)
    * the current state of the connection. */
   tcp_update_burst_snd_vars (tc);
 
+  if (PREDICT_FALSE (tc->is_tso))
+    {
+      return tcp_session_cal_goal_size (tc);
+    }
+
   return tc->snd_mss;
 }
 
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index 9e13de4472a..99735f2af70 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -31,6 +31,7 @@
 #define TCP_FIB_RECHECK_PERIOD	1 * THZ	/**< Recheck every 1s */
 #define TCP_MAX_OPTION_SPACE 40
 #define TCP_CC_DATA_SZ 24
+#define TCP_MAX_GSO_SZ 65536
 
 #define TCP_DUPACK_THRESHOLD 	3
 #define TCP_IW_N_SEGMENTS 	10
@@ -304,6 +305,7 @@ typedef struct _tcp_connection
   transport_connection_t connection;  /**< Common transport data. First! */
 
   u8 state;			/**< TCP state as per tcp_state_t */
+  u8 is_tso;	  /** is connection could use tso */
   u16 flags;			/**< Connection flags (see tcp_conn_flags_e) */
   u32 timers[TCP_N_TIMERS];	/**< Timer handles into timer wheel */
 
diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c
index 1a9eff51a72..9c1f6309b96 100755
--- a/src/vnet/tcp/tcp_input.c
+++ b/src/vnet/tcp/tcp_input.c
@@ -14,6 +14,8 @@
  */
 
 #include <vppinfra/sparse_vec.h>
+#include <vnet/fib/ip4_fib.h>
+#include <vnet/fib/ip6_fib.h>
 #include <vnet/tcp/tcp_packet.h>
 #include <vnet/tcp/tcp.h>
 #include <vnet/session/session.h>
@@ -2292,6 +2294,37 @@ tcp_lookup_connection (u32 fib_index, vlib_buffer_t * b, u8 thread_index,
   return tc;
 }
 
+always_inline void
+tcp_check_tx_offload (tcp_connection_t * tc, int is_ipv4)
+{
+  vnet_main_t *vnm = vnet_get_main ();
+  const dpo_id_t *dpo;
+  const load_balance_t *lb;
+  vnet_hw_interface_t *hw_if;
+  u32 sw_if_idx, lb_idx;
+
+  if (is_ipv4)
+    {
+      ip4_address_t *dst_addr = &(tc->c_rmt_ip.ip4);
+      lb_idx = ip4_fib_forwarding_lookup (tc->c_fib_index, dst_addr);
+    }
+  else
+    {
+      ip6_address_t *dst_addr = &(tc->c_rmt_ip.ip6);
+      lb_idx = ip6_fib_table_fwding_lookup (tc->c_fib_index, dst_addr);
+    }
+
+  lb = load_balance_get (lb_idx);
+  dpo = load_balance_get_bucket_i (lb, 0);
+
+  sw_if_idx = dpo->dpoi_index;
+  hw_if = vnet_get_sup_hw_interface (vnm, sw_if_idx);
+
+  tc->is_tso =
+    ((hw_if->flags & VNET_HW_INTERFACE_FLAG_SUPPORTS_GSO) == 0) ? 0 : 1;
+}
+
+
 always_inline uword
 tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 		       vlib_frame_t * from_frame, int is_ip4)
@@ -2508,6 +2541,8 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  goto drop;
 	}
 
+      tcp_check_tx_offload (new_tc0, is_ip4);
+
       /* Read data, if any */
       if (PREDICT_FALSE (vnet_buffer (b0)->tcp.data_len))
 	{
@@ -2694,6 +2729,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  tc0->state = TCP_STATE_ESTABLISHED;
 	  TCP_EVT (TCP_EVT_STATE_CHANGE, tc0);
 
+	  tcp_check_tx_offload (tc0, is_ip4);
+
 	  /* Initialize session variables */
 	  tc0->snd_una = vnet_buffer (b0)->tcp.ack_number;
 	  tc0->snd_wnd = clib_net_to_host_u16 (tcp0->window)
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index ff281b5661d..15aa85712e3 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -2126,10 +2126,32 @@ tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0,
       b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
       vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data;
       vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
+      b0->flags |=
+	VNET_BUFFER_F_L3_HDR_OFFSET_VALID | VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
       th0->checksum = 0;
     }
 }
 
+always_inline void
+tcp_check_if_gso (tcp_connection_t * tc, vlib_buffer_t * b)
+{
+  if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID)))
+    return;
+  u16 data_len =
+    b->current_length + b->total_length_not_including_first_buffer -
+    sizeof (tcp_header_t) - tc->snd_opts_len;
+
+  if (data_len > tc->snd_mss)
+    {
+      ASSERT ((b->flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) != 0);
+      ASSERT ((b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) != 0);
+      b->flags |= VNET_BUFFER_F_GSO;
+      vnet_buffer2 (b)->gso_l4_hdr_sz =
+	sizeof (tcp_header_t) + tc->snd_opts_len;
+      vnet_buffer2 (b)->gso_size = tc->snd_mss;
+    }
+}
+
 always_inline void
 tcp_output_handle_packet (tcp_connection_t * tc0, vlib_buffer_t * b0,
 			  vlib_node_runtime_t * error_node, u16 * next0,
@@ -2213,6 +2235,9 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
 	  tcp_output_push_ip (vm, b[1], tc1, is_ip4);
 
+	  tcp_check_if_gso (tc0, b[0]);
+	  tcp_check_if_gso (tc1, b[1]);
+
 	  tcp_output_handle_packet (tc0, b[0], error_node, &next[0], is_ip4);
 	  tcp_output_handle_packet (tc1, b[1], error_node, &next[1], is_ip4);
 	}
@@ -2221,6 +2246,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  if (tc0 != 0)
 	    {
 	      tcp_output_push_ip (vm, b[0], tc0, is_ip4);
+	      tcp_check_if_gso (tc0, b[0]);
 	      tcp_output_handle_packet (tc0, b[0], error_node, &next[0],
 					is_ip4);
 	    }
@@ -2232,6 +2258,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 	  if (tc1 != 0)
 	    {
 	      tcp_output_push_ip (vm, b[1], tc1, is_ip4);
+	      tcp_check_if_gso (tc1, b[1]);
 	      tcp_output_handle_packet (tc1, b[1], error_node, &next[1],
 					is_ip4);
 	    }
@@ -2262,6 +2289,7 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
       if (PREDICT_TRUE (tc0 != 0))
 	{
 	  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
+	  tcp_check_if_gso (tc0, b[0]);
 	  tcp_output_handle_packet (tc0, b[0], error_node, &next[0], is_ip4);
 	}
       else
-- 
cgit