From 02833ff3294f4abbd8e3d52b38446e0f8f533ffc Mon Sep 17 00:00:00 2001
From: Srikanth A <srakula@cisco.com>
Date: Wed, 2 Oct 2019 17:48:58 -0700
Subject: tcp: custom checksum calculations for Ipv4/Ipv6

Type: feature

Based on the configuration, we can disable checksum offload capability
and calculate checksum while pushing the TCP & IP header.
This saves some cycles when VPP stack is used in legacy hardware devices.

Signed-off-by: Srikanth A <srakula@cisco.com>
Change-Id: Ic1b3fcf3040917e47ee65263694ebf7437ac5668
(cherry picked from commit 3642782a2748503f5b5ccf89d1575c1d489948ef)
---
 src/vnet/ip/ip.h          |  79 +++++++++++++++++++++++++++++++
 src/vnet/ip/ip4_forward.c |  46 ++----------------
 src/vnet/ip/ip6.h         |   3 +-
 src/vnet/ip/ip6_forward.c |  69 ++++++---------------------
 src/vnet/tcp/tcp.h        |   3 ++
 src/vnet/tcp/tcp_output.c | 116 +++++++++++++++++++++++++++++++++++++---------
 6 files changed, 194 insertions(+), 122 deletions(-)

(limited to 'src')

diff --git a/src/vnet/ip/ip.h b/src/vnet/ip/ip.h
index 7a82dcf8f2b..65ccaef40c2 100644
--- a/src/vnet/ip/ip.h
+++ b/src/vnet/ip/ip.h
@@ -195,6 +195,85 @@ ip_incremental_checksum_buffer (vlib_main_t * vm,
   return sum;
 }
 
+always_inline u16
+ip_calculate_l4_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+			  ip_csum_t sum0, u32 payload_length,
+			  u8 * iph, u32 ip_header_size, u8 * l4h)
+{
+  u16 sum16;
+  u8 *data_this_buffer, length_odd;
+  u32 n_bytes_left, n_this_buffer, n_ip_bytes_this_buffer;
+
+  n_bytes_left = payload_length;
+
+  if (l4h)			/* packet l4 header and no buffer chain involved */
+    {
+      ASSERT (p0 == NULL);
+      n_this_buffer = payload_length;
+      data_this_buffer = l4h;
+    }
+  else
+    {
+      ASSERT (p0);
+      if (iph)			/* ip header pointer set to packet in buffer */
+	{
+	  ASSERT (ip_header_size);
+	  n_this_buffer = payload_length;
+	  data_this_buffer = iph + ip_header_size;	/* at l4 header */
+	  n_ip_bytes_this_buffer =
+	    p0->current_length - (((u8 *) iph - p0->data) - p0->current_data);
+	  if (PREDICT_FALSE (payload_length + ip_header_size >
+			     n_ip_bytes_this_buffer))
+	    {
+	      n_this_buffer = n_ip_bytes_this_buffer - ip_header_size;
+	      if (PREDICT_FALSE (n_this_buffer >> 31))
+		{		/*  error - ip header don't fit this buffer */
+		  ASSERT (0);
+		  return 0xfefe;
+		}
+	    }
+	}
+      else			/* packet in buffer with no ip header  */
+	{			/* buffer current pointer at l4 header */
+	  n_this_buffer = p0->current_length;
+	  data_this_buffer = vlib_buffer_get_current (p0);
+	}
+      n_this_buffer = clib_min (n_this_buffer, n_bytes_left);
+    }
+
+  while (1)
+    {
+      sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
+      n_bytes_left -= n_this_buffer;
+      if (n_bytes_left == 0)
+	break;
+
+      if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
+	{
+	  ASSERT (0);		/* error - more buffer expected */
+	  return 0xfefe;
+	}
+
+      length_odd = (n_this_buffer & 1);
+
+      p0 = vlib_get_buffer (vm, p0->next_buffer);
+      data_this_buffer = vlib_buffer_get_current (p0);
+      n_this_buffer = clib_min (p0->current_length, n_bytes_left);
+
+      if (PREDICT_FALSE (length_odd))
+	{
+	  /* Prepend a 0 byte to maintain 2-byte checksum alignment */
+	  data_this_buffer--;
+	  n_this_buffer++;
+	  n_bytes_left++;
+	  data_this_buffer[0] = 0;
+	}
+    }
+
+  sum16 = ~ip_csum_fold (sum0);
+  return sum16;
+}
+
 void ip_del_all_interface_addresses (vlib_main_t * vm, u32 sw_if_index);
 
 extern vlib_node_registration_t ip4_inacl_node;
diff --git a/src/vnet/ip/ip4_forward.c b/src/vnet/ip/ip4_forward.c
index d4717c6db4b..40c396c4f3b 100644
--- a/src/vnet/ip/ip4_forward.c
+++ b/src/vnet/ip/ip4_forward.c
@@ -1303,10 +1303,6 @@ ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
 {
   ip_csum_t sum0;
   u32 ip_header_length, payload_length_host_byte_order;
-  u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
-  u16 sum16;
-  u8 *data_this_buffer;
-  u8 length_odd;
 
   /* Initialize checksum with ip header. */
   ip_header_length = ip4_header_bytes (ip0);
@@ -1329,45 +1325,9 @@ ip4_tcp_udp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
     sum0 =
       ip_csum_with_carry (sum0, clib_mem_unaligned (&ip0->src_address, u64));
 
-  n_bytes_left = n_this_buffer = payload_length_host_byte_order;
-  data_this_buffer = (u8 *) ip0 + ip_header_length;
-  n_ip_bytes_this_buffer =
-    p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
-  if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
-    {
-      n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
-	n_ip_bytes_this_buffer - ip_header_length : 0;
-    }
-
-  while (1)
-    {
-      sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
-      n_bytes_left -= n_this_buffer;
-      if (n_bytes_left == 0)
-	break;
-
-      ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
-      if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
-	return 0xfefe;
-
-      length_odd = (n_this_buffer & 1);
-
-      p0 = vlib_get_buffer (vm, p0->next_buffer);
-      data_this_buffer = vlib_buffer_get_current (p0);
-      n_this_buffer = clib_min (p0->current_length, n_bytes_left);
-
-      if (PREDICT_FALSE (length_odd))
-	{
-	  /* Prepend a 0 or the resulting checksum will be incorrect. */
-	  data_this_buffer--;
-	  n_this_buffer++;
-	  n_bytes_left++;
-	  data_this_buffer[0] = 0;
-	}
-    }
-
-  sum16 = ~ip_csum_fold (sum0);
-  return sum16;
+  return ip_calculate_l4_checksum (vm, p0, sum0,
+				   payload_length_host_byte_order, (u8 *) ip0,
+				   ip_header_length, NULL);
 }
 
 u32
diff --git a/src/vnet/ip/ip6.h b/src/vnet/ip/ip6.h
index 810fd70ff0c..94c5080a0aa 100644
--- a/src/vnet/ip/ip6.h
+++ b/src/vnet/ip/ip6.h
@@ -683,7 +683,8 @@ vlib_buffer_push_ip6 (vlib_main_t * vm, vlib_buffer_t * b,
 		    sizeof (ip6h->src_address));
   clib_memcpy_fast (ip6h->dst_address.as_u8, dst->as_u8,
 		    sizeof (ip6h->src_address));
-  b->flags |= VNET_BUFFER_F_IS_IP6;
+  vnet_buffer (b)->l3_hdr_offset = (u8 *) ip6h - b->data;
+  b->flags |= VNET_BUFFER_F_IS_IP6 | VNET_BUFFER_F_L3_HDR_OFFSET_VALID;
 
   return ip6h;
 }
diff --git a/src/vnet/ip/ip6_forward.c b/src/vnet/ip/ip6_forward.c
index ea13116d8aa..47fb57ae201 100644
--- a/src/vnet/ip/ip6_forward.c
+++ b/src/vnet/ip/ip6_forward.c
@@ -1011,11 +1011,10 @@ ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
 				   ip6_header_t * ip0, int *bogus_lengthp)
 {
   ip_csum_t sum0;
-  u16 sum16, payload_length_host_byte_order;
-  u32 i, n_this_buffer, n_bytes_left;
+  u16 payload_length_host_byte_order;
+  u32 i;
   u32 headers_size = sizeof (ip0[0]);
   u8 *data_this_buffer;
-  u8 length_odd;
 
   ASSERT (bogus_lengthp);
   *bogus_lengthp = 0;
@@ -1027,14 +1026,10 @@ ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
 
   for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
     {
-      sum0 = ip_csum_with_carry (sum0,
-				 clib_mem_unaligned (&ip0->
-						     src_address.as_uword[i],
-						     uword));
-      sum0 =
-	ip_csum_with_carry (sum0,
-			    clib_mem_unaligned (&ip0->dst_address.as_uword[i],
-						uword));
+      sum0 = ip_csum_with_carry
+	(sum0, clib_mem_unaligned (&ip0->src_address.as_uword[i], uword));
+      sum0 = ip_csum_with_carry
+	(sum0, clib_mem_unaligned (&ip0->dst_address.as_uword[i], uword));
     }
 
   /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets)
@@ -1056,52 +1051,14 @@ ip6_tcp_udp_icmp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
       headers_size += skip_bytes;
     }
 
-  n_bytes_left = n_this_buffer = payload_length_host_byte_order;
-
   if (p0)
-    {
-      u32 n_ip_bytes_this_buffer =
-	p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
-      if (n_this_buffer + headers_size > n_ip_bytes_this_buffer)
-	{
-	  n_this_buffer = p0->current_length > headers_size ?
-	    n_ip_bytes_this_buffer - headers_size : 0;
-	}
-    }
-
-  while (1)
-    {
-      sum0 = ip_incremental_checksum (sum0, data_this_buffer, n_this_buffer);
-      n_bytes_left -= n_this_buffer;
-      if (n_bytes_left == 0)
-	break;
-
-      ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
-      if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
-	{
-	  *bogus_lengthp = 1;
-	  return 0xfefe;
-	}
-
-      length_odd = (n_this_buffer & 1);
-
-      p0 = vlib_get_buffer (vm, p0->next_buffer);
-      data_this_buffer = vlib_buffer_get_current (p0);
-      n_this_buffer = clib_min (p0->current_length, n_bytes_left);
-
-      if (PREDICT_FALSE (length_odd))
-	{
-	  /* Prepend a 0 or the resulting checksum will be incorrect. */
-	  data_this_buffer--;
-	  n_this_buffer++;
-	  n_bytes_left++;
-	  data_this_buffer[0] = 0;
-	}
-    }
-
-  sum16 = ~ip_csum_fold (sum0);
-
-  return sum16;
+    return ip_calculate_l4_checksum (vm, p0, sum0,
+				     payload_length_host_byte_order,
+				     (u8 *) ip0, headers_size, NULL);
+  else
+    return ip_calculate_l4_checksum (vm, 0, sum0,
+				     payload_length_host_byte_order, NULL, 0,
+				     data_this_buffer);
 }
 
 u32
diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h
index a1b7d4cbd0d..1bf32818171 100644
--- a/src/vnet/tcp/tcp.h
+++ b/src/vnet/tcp/tcp.h
@@ -120,6 +120,7 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler;
   _(RATE_SAMPLE, "Conn does rate sampling")	\
   _(TRACK_BURST, "Track burst")			\
   _(ZERO_RWND_SENT, "Zero RWND sent")		\
+  _(NO_CSUM_OFFLOAD, "No Checksum Offload")     \
 
 typedef enum _tcp_connection_flag_bits
 {
@@ -1233,6 +1234,8 @@ vlib_buffer_push_tcp_net_order (vlib_buffer_t * b, u16 sp, u16 dp, u32 seq,
   th->window = wnd;
   th->checksum = 0;
   th->urgent_pointer = 0;
+  vnet_buffer (b)->l4_hdr_offset = (u8 *) th - b->data;
+  b->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
   return th;
 }
 
diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c
index 7ebea37fc43..a7b0e398d36 100644
--- a/src/vnet/tcp/tcp_output.c
+++ b/src/vnet/tcp/tcp_output.c
@@ -444,6 +444,78 @@ tcp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
   return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN);
 }
 
+
+/* Compute TCP checksum in software when offloading is disabled for a connection */
+u16
+ip6_tcp_compute_checksum_custom (vlib_main_t * vm, vlib_buffer_t * p0,
+				 ip46_address_t * src, ip46_address_t * dst)
+{
+  ip_csum_t sum0;
+  u16 payload_length_host_byte_order;
+  u32 i;
+
+  /* Initialize checksum with ip header. */
+  sum0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0)) +
+    clib_host_to_net_u16 (IP_PROTOCOL_TCP);
+  payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
+
+  for (i = 0; i < ARRAY_LEN (src->ip6.as_uword); i++)
+    {
+      sum0 = ip_csum_with_carry
+	(sum0, clib_mem_unaligned (&src->ip6.as_uword[i], uword));
+      sum0 = ip_csum_with_carry
+	(sum0, clib_mem_unaligned (&dst->ip6.as_uword[i], uword));
+    }
+
+  return ip_calculate_l4_checksum (vm, p0, sum0,
+				   payload_length_host_byte_order, NULL, 0,
+				   NULL);
+}
+
+u16
+ip4_tcp_compute_checksum_custom (vlib_main_t * vm, vlib_buffer_t * p0,
+				 ip46_address_t * src, ip46_address_t * dst)
+{
+  ip_csum_t sum0;
+  u32 payload_length_host_byte_order;
+
+  payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
+  sum0 =
+    clib_host_to_net_u32 (payload_length_host_byte_order +
+			  (IP_PROTOCOL_TCP << 16));
+
+  sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&src->ip4, u32));
+  sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&dst->ip4, u32));
+
+  return ip_calculate_l4_checksum (vm, p0, sum0,
+				   payload_length_host_byte_order, NULL, 0,
+				   NULL);
+}
+
+static inline u16
+tcp_compute_checksum (tcp_connection_t * tc, vlib_buffer_t * b)
+{
+  u16 checksum = 0;
+  if (PREDICT_FALSE (tc->flags & TCP_CONN_NO_CSUM_OFFLOAD))
+    {
+      tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
+      vlib_main_t *vm = wrk->vm;
+
+      if (tc->c_is_ip4)
+	checksum = ip4_tcp_compute_checksum_custom
+	  (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
+      else
+	checksum = ip6_tcp_compute_checksum_custom
+	  (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
+    }
+  else
+    {
+      b->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
+    }
+  return checksum;
+}
+
+
 /**
  * Prepare ACK
  */
@@ -466,6 +538,9 @@ tcp_make_ack_i (tcp_connection_t * tc, vlib_buffer_t * b, tcp_state_t state,
 			     tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
 
   tcp_options_write ((u8 *) (th + 1), snd_opts);
+
+  th->checksum = tcp_compute_checksum (tc, b);
+
   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
 
   if (wnd == 0)
@@ -517,6 +592,7 @@ tcp_make_syn (tcp_connection_t * tc, vlib_buffer_t * b)
 			     initial_wnd);
   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
   tcp_options_write ((u8 *) (th + 1), &snd_opts);
+  th->checksum = tcp_compute_checksum (tc, b);
 }
 
 /**
@@ -541,6 +617,7 @@ tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b)
   tcp_options_write ((u8 *) (th + 1), snd_opts);
 
   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
+  th->checksum = tcp_compute_checksum (tc, b);
 }
 
 always_inline void
@@ -786,7 +863,8 @@ tcp_send_reset_w_pkt (tcp_connection_t * tc, vlib_buffer_t * pkt,
     {
       ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
       ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
-				  &pkt_ih4->src_address, IP_PROTOCOL_TCP, 1);
+				  &pkt_ih4->src_address, IP_PROTOCOL_TCP,
+				  (!(tc->flags & TCP_CONN_NO_CSUM_OFFLOAD)));
       th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
     }
   else
@@ -833,6 +911,7 @@ tcp_send_reset (tcp_connection_t * tc)
 			     tc->rcv_nxt, tcp_hdr_opts_len, flags,
 			     advertise_wnd);
   opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
+  th->checksum = tcp_compute_checksum (tc, b);
   ASSERT (opts_write_len == tc->snd_opts_len);
   vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
   tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
@@ -851,7 +930,8 @@ tcp_push_ip_hdr (tcp_worker_ctx_t * wrk, tcp_connection_t * tc,
     {
       ip4_header_t *ih;
       ih = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip4,
-				 &tc->c_rmt_ip4, IP_PROTOCOL_TCP, 1);
+				 &tc->c_rmt_ip4, IP_PROTOCOL_TCP,
+				 (!(tc->flags & TCP_CONN_NO_CSUM_OFFLOAD)));
       th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih);
     }
   else
@@ -1082,6 +1162,9 @@ tcp_push_hdr_i (tcp_connection_t * tc, vlib_buffer_t * b, u32 snd_nxt,
   tc->bytes_out += data_len;
   tc->data_segs_out += 1;
 
+
+  th->checksum = tcp_compute_checksum (tc, b);
+
   TCP_EVT (TCP_EVT_PKTIZE, tc);
 }
 
@@ -2160,30 +2243,19 @@ always_inline void
 tcp_output_push_ip (vlib_main_t * vm, vlib_buffer_t * b0,
 		    tcp_connection_t * tc0, u8 is_ip4)
 {
-  tcp_header_t *th0 = 0;
+  u8 __clib_unused *ih0;
+  tcp_header_t __clib_unused *th0 = vlib_buffer_get_current (b0);
 
-  th0 = vlib_buffer_get_current (b0);
   TCP_EVT (TCP_EVT_OUTPUT, tc0, th0->flags, b0->current_length);
+
   if (is_ip4)
-    {
-      vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
-			    IP_PROTOCOL_TCP, 1);
-      b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
-      vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
-      th0->checksum = 0;
-    }
+    ih0 = vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
+				IP_PROTOCOL_TCP,
+				(!(tc0->flags & TCP_CONN_NO_CSUM_OFFLOAD)));
   else
-    {
-      ip6_header_t *ih0;
-      ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6,
-				  &tc0->c_rmt_ip6, IP_PROTOCOL_TCP);
-      b0->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
-      vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data;
-      vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
-      b0->flags |=
-	VNET_BUFFER_F_L3_HDR_OFFSET_VALID | VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
-      th0->checksum = 0;
-    }
+    ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6, &tc0->c_rmt_ip6,
+				IP_PROTOCOL_TCP);
+
 }
 
 always_inline void
-- 
cgit