diff options
author | Christian Ehrhardt <christian.ehrhardt@canonical.com> | 2016-07-06 09:22:35 +0200 |
---|---|---|
committer | Christian Ehrhardt <christian.ehrhardt@canonical.com> | 2016-07-06 16:09:40 +0200 |
commit | 8b25d1ad5d2264bdfc2818c7bda74ee2697df6db (patch) | |
tree | 8c3c769777f7e66a2d1ba7dd7651b563cfde370b /lib/librte_eal/common/include/arch | |
parent | 97f17497d162afdb82c8704bf097f0fee3724b2e (diff) |
Imported Upstream version 16.07-rc1
Change-Id: I40a523e52f12e8496fdd69e902824b0226c303de
Signed-off-by: Christian Ehrhardt <christian.ehrhardt@canonical.com>
Diffstat (limited to 'lib/librte_eal/common/include/arch')
5 files changed, 34 insertions, 109 deletions
diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h index 988125b3..da6c233a 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h @@ -323,12 +323,6 @@ rte_memcpy(void *dst, const void *src, size_t n) return memcpy(dst, src, n); } -static inline void * -rte_memcpy_func(void *dst, const void *src, size_t n) -{ - return memcpy(dst, src, n); -} - #endif /* RTE_ARCH_ARM_NEON_MEMCPY */ #ifdef __cplusplus diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h index 917cdc1b..5db66b63 100644 --- a/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h @@ -80,12 +80,6 @@ rte_mov256(uint8_t *dst, const uint8_t *src) #define rte_memcpy(d, s, n) memcpy((d), (s), (n)) -static inline void * -rte_memcpy_func(void *dst, const void *src, size_t n) -{ - return memcpy(dst, src, n); -} - #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/arch/tile/rte_memcpy.h b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h index 9b5b37ef..e606957c 100644 --- a/lib/librte_eal/common/include/arch/tile/rte_memcpy.h +++ b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h @@ -80,12 +80,6 @@ rte_mov256(uint8_t *dst, const uint8_t *src) #define rte_memcpy(d, s, n) memcpy((d), (s), (n)) -static inline void * -rte_memcpy_func(void *dst, const void *src, size_t n) -{ - return memcpy(dst, src, n); -} - #ifdef __cplusplus } #endif diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h index f463ab30..413035e7 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h @@ -363,71 +363,26 @@ rte_mov128(uint8_t *dst, const uint8_t *src) } /** - * Copy 256 bytes from one location to another, - * locations should not overlap. - */ -static inline void -rte_mov256(uint8_t *dst, const uint8_t *src) -{ - rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); - rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32); - rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32); - rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32); - rte_mov32((uint8_t *)dst + 4 * 32, (const uint8_t *)src + 4 * 32); - rte_mov32((uint8_t *)dst + 5 * 32, (const uint8_t *)src + 5 * 32); - rte_mov32((uint8_t *)dst + 6 * 32, (const uint8_t *)src + 6 * 32); - rte_mov32((uint8_t *)dst + 7 * 32, (const uint8_t *)src + 7 * 32); -} - -/** - * Copy 64-byte blocks from one location to another, - * locations should not overlap. - */ -static inline void -rte_mov64blocks(uint8_t *dst, const uint8_t *src, size_t n) -{ - __m256i ymm0, ymm1; - - while (n >= 64) { - ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32)); - n -= 64; - ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32)); - src = (const uint8_t *)src + 64; - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0); - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1); - dst = (uint8_t *)dst + 64; - } -} - -/** - * Copy 256-byte blocks from one location to another, + * Copy 128-byte blocks from one location to another, * locations should not overlap. */ static inline void -rte_mov256blocks(uint8_t *dst, const uint8_t *src, size_t n) +rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n) { - __m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7; + __m256i ymm0, ymm1, ymm2, ymm3; - while (n >= 256) { + while (n >= 128) { ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32)); - n -= 256; + n -= 128; ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32)); ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 2 * 32)); ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 3 * 32)); - ymm4 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 4 * 32)); - ymm5 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 5 * 32)); - ymm6 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 6 * 32)); - ymm7 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 7 * 32)); - src = (const uint8_t *)src + 256; + src = (const uint8_t *)src + 128; _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0); _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1); _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2); _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3); - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 4 * 32), ymm4); - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 5 * 32), ymm5); - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 6 * 32), ymm6); - _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 7 * 32), ymm7); - dst = (uint8_t *)dst + 256; + dst = (uint8_t *)dst + 128; } } @@ -466,51 +421,56 @@ rte_memcpy(void *dst, const void *src, size_t n) } /** - * Fast way when copy size doesn't exceed 512 bytes + * Fast way when copy size doesn't exceed 256 bytes */ if (n <= 32) { rte_mov16((uint8_t *)dst, (const uint8_t *)src); - rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 48) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst + 16, (const uint8_t *)src + 16); + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); return ret; } if (n <= 64) { rte_mov32((uint8_t *)dst, (const uint8_t *)src); - rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); return ret; } - if (n <= 512) { - if (n >= 256) { - n -= 256; - rte_mov256((uint8_t *)dst, (const uint8_t *)src); - src = (const uint8_t *)src + 256; - dst = (uint8_t *)dst + 256; - } + if (n <= 256) { if (n >= 128) { n -= 128; rte_mov128((uint8_t *)dst, (const uint8_t *)src); src = (const uint8_t *)src + 128; dst = (uint8_t *)dst + 128; } +COPY_BLOCK_128_BACK31: if (n >= 64) { n -= 64; rte_mov64((uint8_t *)dst, (const uint8_t *)src); src = (const uint8_t *)src + 64; dst = (uint8_t *)dst + 64; } -COPY_BLOCK_64_BACK31: if (n > 32) { rte_mov32((uint8_t *)dst, (const uint8_t *)src); - rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); return ret; } if (n > 0) { - rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); } return ret; } /** - * Make store aligned when copy size exceeds 512 bytes + * Make store aligned when copy size exceeds 256 bytes */ dstofss = (uintptr_t)dst & 0x1F; if (dstofss > 0) { @@ -522,35 +482,19 @@ COPY_BLOCK_64_BACK31: } /** - * Copy 256-byte blocks. - * Use copy block function for better instruction order control, - * which is important when load is unaligned. + * Copy 128-byte blocks */ - rte_mov256blocks((uint8_t *)dst, (const uint8_t *)src, n); + rte_mov128blocks((uint8_t *)dst, (const uint8_t *)src, n); bits = n; - n = n & 255; + n = n & 127; bits -= n; src = (const uint8_t *)src + bits; dst = (uint8_t *)dst + bits; /** - * Copy 64-byte blocks. - * Use copy block function for better instruction order control, - * which is important when load is unaligned. - */ - if (n >= 64) { - rte_mov64blocks((uint8_t *)dst, (const uint8_t *)src, n); - bits = n; - n = n & 63; - bits -= n; - src = (const uint8_t *)src + bits; - dst = (uint8_t *)dst + bits; - } - - /** * Copy whatever left */ - goto COPY_BLOCK_64_BACK31; + goto COPY_BLOCK_128_BACK31; } #else /* RTE_MACHINE_CPUFLAG */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/lib/librte_eal/common/include/arch/x86/rte_rtm.h index d9356419..0649f794 100644 --- a/lib/librte_eal/common/include/arch/x86/rte_rtm.h +++ b/lib/librte_eal/common/include/arch/x86/rte_rtm.h @@ -50,11 +50,10 @@ void rte_xend(void) asm volatile(".byte 0x0f,0x01,0xd5" ::: "memory"); } -static __attribute__((__always_inline__)) inline -void rte_xabort(const unsigned int status) -{ - asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory"); -} +/* not an inline function to workaround a clang bug with -O0 */ +#define rte_xabort(status) do { \ + asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory"); \ +} while (0) static __attribute__((__always_inline__)) inline int rte_xtest(void) |