summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2018-06-18 22:21:40 +0200
committerDamjan Marion <dmarion@me.com>2018-06-28 10:00:05 +0000
commit08bca80c998bdae87060306350124714606a1604 (patch)
treeb207c71fc42030e9beb26addf5e4e4056a39e73d
parent0d427d8d8a48af404dfc6535ae72d334b790f809 (diff)
ip: vectorized ip checksum
Change-Id: Ida678e6f31daa8decb18189da712a350336326e2 Signed-off-by: Damjan Marion <damarion@cisco.com>
-rw-r--r--src/vnet/ip/ip4_input.h97
-rw-r--r--src/vnet/ip/ip_packet.h91
-rw-r--r--src/vppinfra/vector_avx2.h28
3 files changed, 147 insertions, 69 deletions
diff --git a/src/vnet/ip/ip4_input.h b/src/vnet/ip/ip4_input.h
index e0873039990..889b423d700 100644
--- a/src/vnet/ip/ip4_input.h
+++ b/src/vnet/ip/ip4_input.h
@@ -56,6 +56,26 @@ typedef enum
IP4_INPUT_N_NEXT,
} ip4_input_next_t;
+static_always_inline void
+check_ver_opt_csum (ip4_header_t * ip, u8 * error, int verify_checksum)
+{
+ if (PREDICT_FALSE (ip->ip_version_and_header_length != 0x45))
+ {
+ if ((ip->ip_version_and_header_length & 0xf) != 5)
+ {
+ *error = IP4_ERROR_OPTIONS;
+ if (verify_checksum && ip_csum (ip, ip4_header_bytes (ip)) != 0)
+ *error = IP4_ERROR_BAD_CHECKSUM;
+ }
+ else
+ *error = IP4_ERROR_VERSION;
+ }
+ else
+ if (PREDICT_FALSE (verify_checksum &&
+ ip_csum (ip, sizeof (ip4_header_t)) != 0))
+ *error = IP4_ERROR_BAD_CHECKSUM;
+}
+
always_inline void
ip4_input_check_x4 (vlib_main_t * vm,
vlib_node_runtime_t * error_node,
@@ -71,22 +91,10 @@ ip4_input_check_x4 (vlib_main_t * vm,
error0 = error1 = error2 = error3 = IP4_ERROR_NONE;
- /* Punt packets with options or wrong version. */
- if (PREDICT_FALSE (ip[0]->ip_version_and_header_length != 0x45))
- error0 = (ip[0]->ip_version_and_header_length & 0xf) != 5 ?
- IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
-
- if (PREDICT_FALSE (ip[1]->ip_version_and_header_length != 0x45))
- error1 = (ip[1]->ip_version_and_header_length & 0xf) != 5 ?
- IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
-
- if (PREDICT_FALSE (ip[2]->ip_version_and_header_length != 0x45))
- error2 = (ip[2]->ip_version_and_header_length & 0xf) != 5 ?
- IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
-
- if (PREDICT_FALSE (ip[3]->ip_version_and_header_length != 0x45))
- error3 = (ip[3]->ip_version_and_header_length & 0xf) != 5 ?
- IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+ check_ver_opt_csum (ip[0], &error0, verify_checksum);
+ check_ver_opt_csum (ip[1], &error1, verify_checksum);
+ check_ver_opt_csum (ip[2], &error2, verify_checksum);
+ check_ver_opt_csum (ip[3], &error3, verify_checksum);
if (PREDICT_FALSE (ip[0]->ttl < 1))
error0 = IP4_ERROR_TIME_EXPIRED;
@@ -97,26 +105,6 @@ ip4_input_check_x4 (vlib_main_t * vm,
if (PREDICT_FALSE (ip[3]->ttl < 1))
error3 = IP4_ERROR_TIME_EXPIRED;
- /* Verify header checksum. */
- if (verify_checksum)
- {
- ip_csum_t sum0, sum1, sum2, sum3;
-
- ip4_partial_header_checksum_x1 (ip[0], sum0);
- ip4_partial_header_checksum_x1 (ip[1], sum1);
- ip4_partial_header_checksum_x1 (ip[2], sum2);
- ip4_partial_header_checksum_x1 (ip[3], sum3);
-
- error0 = 0xffff != ip_csum_fold (sum0) ?
- IP4_ERROR_BAD_CHECKSUM : error0;
- error1 = 0xffff != ip_csum_fold (sum1) ?
- IP4_ERROR_BAD_CHECKSUM : error1;
- error2 = 0xffff != ip_csum_fold (sum2) ?
- IP4_ERROR_BAD_CHECKSUM : error2;
- error3 = 0xffff != ip_csum_fold (sum3) ?
- IP4_ERROR_BAD_CHECKSUM : error3;
- }
-
/* Drop fragmentation offset 1 packets. */
error0 = ip4_get_fragment_offset (ip[0]) == 1 ?
IP4_ERROR_FRAGMENT_OFFSET_ONE : error0;
@@ -226,34 +214,14 @@ ip4_input_check_x2 (vlib_main_t * vm,
error0 = error1 = IP4_ERROR_NONE;
- /* Punt packets with options or wrong version. */
- if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45))
- error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ?
- IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
-
- if (PREDICT_FALSE (ip1->ip_version_and_header_length != 0x45))
- error1 = (ip1->ip_version_and_header_length & 0xf) != 5 ?
- IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
+ check_ver_opt_csum (ip0, &error0, verify_checksum);
+ check_ver_opt_csum (ip1, &error1, verify_checksum);
if (PREDICT_FALSE (ip0->ttl < 1))
error0 = IP4_ERROR_TIME_EXPIRED;
if (PREDICT_FALSE (ip1->ttl < 1))
error1 = IP4_ERROR_TIME_EXPIRED;
- /* Verify header checksum. */
- if (verify_checksum)
- {
- ip_csum_t sum0, sum1;
-
- ip4_partial_header_checksum_x1 (ip0, sum0);
- ip4_partial_header_checksum_x1 (ip1, sum1);
-
- error0 = 0xffff != ip_csum_fold (sum0) ?
- IP4_ERROR_BAD_CHECKSUM : error0;
- error1 = 0xffff != ip_csum_fold (sum1) ?
- IP4_ERROR_BAD_CHECKSUM : error1;
- }
-
/* Drop fragmentation offset 1 packets. */
error0 = ip4_get_fragment_offset (ip0) == 1 ?
IP4_ERROR_FRAGMENT_OFFSET_ONE : error0;
@@ -320,22 +288,13 @@ ip4_input_check_x1 (vlib_main_t * vm,
error0 = IP4_ERROR_NONE;
+ check_ver_opt_csum (ip0, &error0, verify_checksum);
+
/* Punt packets with options or wrong version. */
if (PREDICT_FALSE (ip0->ip_version_and_header_length != 0x45))
error0 = (ip0->ip_version_and_header_length & 0xf) != 5 ?
IP4_ERROR_OPTIONS : IP4_ERROR_VERSION;
- /* Verify header checksum. */
- if (verify_checksum)
- {
- ip_csum_t sum0;
-
- ip4_partial_header_checksum_x1 (ip0, sum0);
-
- error0 = 0xffff != ip_csum_fold (sum0) ?
- IP4_ERROR_BAD_CHECKSUM : error0;
- }
-
/* Drop fragmentation offset 1 packets. */
error0 = ip4_get_fragment_offset (ip0) == 1 ?
IP4_ERROR_FRAGMENT_OFFSET_ONE : error0;
diff --git a/src/vnet/ip/ip_packet.h b/src/vnet/ip/ip_packet.h
index 6c86e3e046e..c4990976188 100644
--- a/src/vnet/ip/ip_packet.h
+++ b/src/vnet/ip/ip_packet.h
@@ -86,6 +86,97 @@ typedef enum
/* IP checksum support. */
+static_always_inline u16
+ip_csum (void *data, u16 n_left)
+{
+ u32 sum;
+#ifdef CLIB_HAVE_VEC256
+ u16x16 v1, v2;
+ u32x8 zero = { 0 };
+ u32x8 sum8 = { 0 };
+ u32x4 sum4;
+#endif
+
+ /* if there is odd number of bytes, pad by zero and store in sum */
+ sum = (n_left & 1) ? ((u8 *) data)[n_left - 1] << 8 : 0;
+
+ /* we deal with words */
+ n_left >>= 1;
+
+#ifdef CLIB_HAVE_VEC256
+ while (n_left >= 32)
+ {
+ v1 = u16x16_load_unaligned (data);
+ v2 = u16x16_load_unaligned (data + 32);
+
+#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
+ v1 = u16x16_byte_swap (v1);
+ v2 = u16x16_byte_swap (v2);
+#endif
+ sum8 += u16x8_extend_to_u32x8 (u16x16_extract_lo (v1));
+ sum8 += u16x8_extend_to_u32x8 (u16x16_extract_hi (v1));
+ sum8 += u16x8_extend_to_u32x8 (u16x16_extract_lo (v2));
+ sum8 += u16x8_extend_to_u32x8 (u16x16_extract_hi (v2));
+ n_left -= 32;
+ data += 64;
+ }
+
+ if (n_left >= 16)
+ {
+ v1 = u16x16_load_unaligned (data);
+#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
+ v1 = u16x16_byte_swap (v1);
+#endif
+ v1 = u16x16_byte_swap (u16x16_load_unaligned (data));
+ sum8 += u16x8_extend_to_u32x8 (u16x16_extract_lo (v1));
+ sum8 += u16x8_extend_to_u32x8 (u16x16_extract_hi (v1));
+ n_left -= 16;
+ data += 32;
+ }
+
+ if (n_left)
+ {
+ v1 = u16x16_load_unaligned (data);
+#ifdef CLIB_ARCH_IS_LITTLE_ENDIAN
+ v1 = u16x16_byte_swap (v1);
+#endif
+ v1 = u16x16_mask_last (v1, 16 - n_left);
+ sum8 += u16x8_extend_to_u32x8 (u16x16_extract_lo (v1));
+ sum8 += u16x8_extend_to_u32x8 (u16x16_extract_hi (v1));
+ }
+
+ sum8 = u32x8_hadd (sum8, zero);
+ sum4 = u32x8_extract_lo (sum8) + u32x8_extract_hi (sum8);
+ sum = sum4[0] + sum4[1];
+
+#else
+ /* scalar version */
+ while (n_left >= 8)
+ {
+ sum += clib_net_to_host_u16 (*((u16 *) data + 0));
+ sum += clib_net_to_host_u16 (*((u16 *) data + 1));
+ sum += clib_net_to_host_u16 (*((u16 *) data + 2));
+ sum += clib_net_to_host_u16 (*((u16 *) data + 3));
+ sum += clib_net_to_host_u16 (*((u16 *) data + 4));
+ sum += clib_net_to_host_u16 (*((u16 *) data + 5));
+ sum += clib_net_to_host_u16 (*((u16 *) data + 6));
+ sum += clib_net_to_host_u16 (*((u16 *) data + 7));
+ n_left -= 8;
+ data += 16;
+ }
+ while (n_left)
+ {
+ sum += clib_net_to_host_u16 (*(u16 *) data);
+ n_left -= 1;
+ data += 2;
+ }
+#endif
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+ return ~((u16) sum);
+}
+
/* Incremental checksum update. */
typedef uword ip_csum_t;
diff --git a/src/vppinfra/vector_avx2.h b/src/vppinfra/vector_avx2.h
index 6b298be8bfd..66c46f226aa 100644
--- a/src/vppinfra/vector_avx2.h
+++ b/src/vppinfra/vector_avx2.h
@@ -137,6 +137,34 @@ u32x8_hadd (u32x8 v1, u32x8 v2)
return (u32x8) _mm256_hadd_epi32 ((__m256i) v1, (__m256i) v2);
}
+static_always_inline u16x16
+u16x16_mask_last (u16x16 v, u8 n_last)
+{
+ const u16x16 masks[17] = {
+ {0},
+ {-1},
+ {-1, -1},
+ {-1, -1, -1},
+ {-1, -1, -1, -1},
+ {-1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
+ };
+
+ ASSERT (n_last < 17);
+
+ return v & masks[16 - n_last];
+}
+
#endif /* included_vector_avx2_h */
/*