1 files changed, 117 insertions, 2 deletions
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
index 4107c998..782350d1 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -1,7 +1,7 @@
 /*-
  *   BSD LICENSE
  *
- *   Copyright(c) 2015 Cavium Networks. All rights reserved.
+ *   Copyright(c) 2015 Cavium, Inc. All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
  *   modification, are permitted provided that the following conditions
@@ -13,7 +13,7 @@
  *       notice, this list of conditions and the following disclaimer in
  *       the documentation and/or other materials provided with the
  *       distribution.
- *     * Neither the name of Cavium Networks nor the names of its
+ *     * Neither the name of Cavium, Inc nor the names of its
  *       contributors may be used to endorse or promote products derived
  *       from this software without specific prior written permission.
  *
@@ -35,6 +35,7 @@
 
 #include <stdint.h>
 #include "generic/rte_vect.h"
+#include "rte_debug.h"
 #include "arm_neon.h"
 
 #ifdef __cplusplus
@@ -76,8 +77,122 @@ vqtbl1q_u8(uint8x16_t a, uint8x16_t b)
 
 	return vld1q_u8(rte_ret.u8);
 }
+
+static inline uint16_t
+vaddvq_u16(uint16x8_t a)
+{
+	uint32x4_t m = vpaddlq_u16(a);
+	uint64x2_t n = vpaddlq_u32(m);
+	uint64x1_t o = vget_low_u64(n) + vget_high_u64(n);
+
+	return vget_lane_u32((uint32x2_t)o, 0);
+}
+
 #endif
 
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70000)
+static inline uint32x4_t
+vcopyq_laneq_u32(uint32x4_t a, const int lane_a,
+		 uint32x4_t b, const int lane_b)
+{
+	return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a, lane_a);
+}
+#endif
+
+#if defined(RTE_ARCH_ARM64)
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70000)
+
+#if (GCC_VERSION < 40900)
+typedef uint64_t poly64_t;
+typedef uint64x2_t poly64x2_t;
+typedef uint8_t poly128_t __attribute__((vector_size(16), aligned(16)));
+#endif
+
+/* NEON intrinsic vreinterpretq_u64_p128() is supported since GCC version 7 */
+static inline uint64x2_t
+vreinterpretq_u64_p128(poly128_t x)
+{
+	return (uint64x2_t)x;
+}
+
+/* NEON intrinsic vreinterpretq_p64_u64() is supported since GCC version 7 */
+static inline poly64x2_t
+vreinterpretq_p64_u64(uint64x2_t x)
+{
+	return (poly64x2_t)x;
+}
+
+/* NEON intrinsic vgetq_lane_p64() is supported since GCC version 7 */
+static inline poly64_t
+vgetq_lane_p64(poly64x2_t x, const int lane)
+{
+	RTE_ASSERT(lane >= 0 && lane <= 1);
+
+	poly64_t *p = (poly64_t *)&x;
+
+	return p[lane];
+}
+#endif
+#endif
+
+/*
+ * If (0 <= index <= 15), then call the ASIMD ext intruction on the
+ * 128 bit regs v0 and v1 with the appropriate index.
+ *
+ * Else returns a zero vector.
+ */
+static inline uint8x16_t
+vextract(uint8x16_t v0, uint8x16_t v1, const int index)
+{
+	switch (index) {
+	case 0: return vextq_u8(v0, v1, 0);
+	case 1: return vextq_u8(v0, v1, 1);
+	case 2: return vextq_u8(v0, v1, 2);
+	case 3: return vextq_u8(v0, v1, 3);
+	case 4: return vextq_u8(v0, v1, 4);
+	case 5: return vextq_u8(v0, v1, 5);
+	case 6: return vextq_u8(v0, v1, 6);
+	case 7: return vextq_u8(v0, v1, 7);
+	case 8: return vextq_u8(v0, v1, 8);
+	case 9: return vextq_u8(v0, v1, 9);
+	case 10: return vextq_u8(v0, v1, 10);
+	case 11: return vextq_u8(v0, v1, 11);
+	case 12: return vextq_u8(v0, v1, 12);
+	case 13: return vextq_u8(v0, v1, 13);
+	case 14: return vextq_u8(v0, v1, 14);
+	case 15: return vextq_u8(v0, v1, 15);
+	}
+	return vdupq_n_u8(0);
+}
+
+/**
+ * Shifts right 128 bit register by specified number of bytes
+ *
+ * Value of shift parameter must be in range 0 - 16
+ */
+static inline uint64x2_t
+vshift_bytes_right(uint64x2_t reg, const unsigned int shift)
+{
+	return vreinterpretq_u64_u8(vextract(
+				vreinterpretq_u8_u64(reg),
+				vdupq_n_u8(0),
+				shift));
+}
+
+/**
+ * Shifts left 128 bit register by specified number of bytes
+ *
+ * Value of shift parameter must be in range 0 - 16
+ */
+static inline uint64x2_t
+vshift_bytes_left(uint64x2_t reg, const unsigned int shift)
+{
+	return vreinterpretq_u64_u8(vextract(
+				vdupq_n_u8(0),
+				vreinterpretq_u8_u64(reg),
+				16 - shift));
+}
+
 #ifdef __cplusplus
 }
 #endif