aboutsummaryrefslogtreecommitdiffstats
path: root/lib/librte_eal/common/include/arch/arm/rte_vect.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/librte_eal/common/include/arch/arm/rte_vect.h')
-rw-r--r--lib/librte_eal/common/include/arch/arm/rte_vect.h119
1 files changed, 117 insertions, 2 deletions
diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h
index 4107c998..782350d1 100644
--- a/lib/librte_eal/common/include/arch/arm/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h
@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2015 Cavium Networks. All rights reserved.
+ * Copyright(c) 2015 Cavium, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -13,7 +13,7 @@
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
- * * Neither the name of Cavium Networks nor the names of its
+ * * Neither the name of Cavium, Inc nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
@@ -35,6 +35,7 @@
#include <stdint.h>
#include "generic/rte_vect.h"
+#include "rte_debug.h"
#include "arm_neon.h"
#ifdef __cplusplus
@@ -76,8 +77,122 @@ vqtbl1q_u8(uint8x16_t a, uint8x16_t b)
return vld1q_u8(rte_ret.u8);
}
+
+static inline uint16_t
+vaddvq_u16(uint16x8_t a)
+{
+ uint32x4_t m = vpaddlq_u16(a);
+ uint64x2_t n = vpaddlq_u32(m);
+ uint64x1_t o = vget_low_u64(n) + vget_high_u64(n);
+
+ return vget_lane_u32((uint32x2_t)o, 0);
+}
+
#endif
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70000)
+static inline uint32x4_t
+vcopyq_laneq_u32(uint32x4_t a, const int lane_a,
+ uint32x4_t b, const int lane_b)
+{
+ return vsetq_lane_u32(vgetq_lane_u32(b, lane_b), a, lane_a);
+}
+#endif
+
+#if defined(RTE_ARCH_ARM64)
+#if defined(RTE_TOOLCHAIN_GCC) && (GCC_VERSION < 70000)
+
+#if (GCC_VERSION < 40900)
+typedef uint64_t poly64_t;
+typedef uint64x2_t poly64x2_t;
+typedef uint8_t poly128_t __attribute__((vector_size(16), aligned(16)));
+#endif
+
+/* NEON intrinsic vreinterpretq_u64_p128() is supported since GCC version 7 */
+static inline uint64x2_t
+vreinterpretq_u64_p128(poly128_t x)
+{
+ return (uint64x2_t)x;
+}
+
+/* NEON intrinsic vreinterpretq_p64_u64() is supported since GCC version 7 */
+static inline poly64x2_t
+vreinterpretq_p64_u64(uint64x2_t x)
+{
+ return (poly64x2_t)x;
+}
+
+/* NEON intrinsic vgetq_lane_p64() is supported since GCC version 7 */
+static inline poly64_t
+vgetq_lane_p64(poly64x2_t x, const int lane)
+{
+ RTE_ASSERT(lane >= 0 && lane <= 1);
+
+ poly64_t *p = (poly64_t *)&x;
+
+ return p[lane];
+}
+#endif
+#endif
+
+/*
+ * If (0 <= index <= 15), then call the ASIMD ext intruction on the
+ * 128 bit regs v0 and v1 with the appropriate index.
+ *
+ * Else returns a zero vector.
+ */
+static inline uint8x16_t
+vextract(uint8x16_t v0, uint8x16_t v1, const int index)
+{
+ switch (index) {
+ case 0: return vextq_u8(v0, v1, 0);
+ case 1: return vextq_u8(v0, v1, 1);
+ case 2: return vextq_u8(v0, v1, 2);
+ case 3: return vextq_u8(v0, v1, 3);
+ case 4: return vextq_u8(v0, v1, 4);
+ case 5: return vextq_u8(v0, v1, 5);
+ case 6: return vextq_u8(v0, v1, 6);
+ case 7: return vextq_u8(v0, v1, 7);
+ case 8: return vextq_u8(v0, v1, 8);
+ case 9: return vextq_u8(v0, v1, 9);
+ case 10: return vextq_u8(v0, v1, 10);
+ case 11: return vextq_u8(v0, v1, 11);
+ case 12: return vextq_u8(v0, v1, 12);
+ case 13: return vextq_u8(v0, v1, 13);
+ case 14: return vextq_u8(v0, v1, 14);
+ case 15: return vextq_u8(v0, v1, 15);
+ }
+ return vdupq_n_u8(0);
+}
+
+/**
+ * Shifts right 128 bit register by specified number of bytes
+ *
+ * Value of shift parameter must be in range 0 - 16
+ */
+static inline uint64x2_t
+vshift_bytes_right(uint64x2_t reg, const unsigned int shift)
+{
+ return vreinterpretq_u64_u8(vextract(
+ vreinterpretq_u8_u64(reg),
+ vdupq_n_u8(0),
+ shift));
+}
+
+/**
+ * Shifts left 128 bit register by specified number of bytes
+ *
+ * Value of shift parameter must be in range 0 - 16
+ */
+static inline uint64x2_t
+vshift_bytes_left(uint64x2_t reg, const unsigned int shift)
+{
+ return vreinterpretq_u64_u8(vextract(
+ vdupq_n_u8(0),
+ vreinterpretq_u8_u64(reg),
+ 16 - shift));
+}
+
#ifdef __cplusplus
}
#endif