diff options
author | Damjan Marion <damarion@cisco.com> | 2016-03-13 02:22:06 +0100 |
---|---|---|
committer | Damjan Marion <damarion@cisco.com> | 2016-04-22 17:29:47 +0200 |
commit | f1213b82771ce929c076339c24a777cfd59690e6 (patch) | |
tree | 3c74305e8848047d8ccd1228ee511d57cbf1b1a6 /vppinfra | |
parent | 2b836cf4d1e4e59ca34229a9fdf49d79216da20e (diff) |
Add clib_memcpy macro based on DPDK rte_memcpy implementation
Change-Id: I22cb443c4bd0bf298abb6f06e8e4ca65a44a2854
Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'vppinfra')
-rw-r--r-- | vppinfra/Makefile.am | 2 | ||||
-rw-r--r-- | vppinfra/tools/elftool.c | 6 | ||||
-rw-r--r-- | vppinfra/vppinfra/bihash_template.c | 10 | ||||
-rw-r--r-- | vppinfra/vppinfra/elf.c | 4 | ||||
-rw-r--r-- | vppinfra/vppinfra/elf.h | 2 | ||||
-rw-r--r-- | vppinfra/vppinfra/elog.c | 4 | ||||
-rw-r--r-- | vppinfra/vppinfra/fifo.c | 6 | ||||
-rw-r--r-- | vppinfra/vppinfra/fifo.h | 4 | ||||
-rw-r--r-- | vppinfra/vppinfra/hash.c | 14 | ||||
-rw-r--r-- | vppinfra/vppinfra/heap.h | 2 | ||||
-rw-r--r-- | vppinfra/vppinfra/mem.h | 2 | ||||
-rw-r--r-- | vppinfra/vppinfra/memcpy_avx.h | 261 | ||||
-rw-r--r-- | vppinfra/vppinfra/memcpy_sse3.h | 330 | ||||
-rw-r--r-- | vppinfra/vppinfra/mhash.c | 4 | ||||
-rw-r--r-- | vppinfra/vppinfra/pfhash.c | 34 | ||||
-rw-r--r-- | vppinfra/vppinfra/serialize.c | 12 | ||||
-rw-r--r-- | vppinfra/vppinfra/socket.c | 4 | ||||
-rw-r--r-- | vppinfra/vppinfra/string.h | 22 | ||||
-rw-r--r-- | vppinfra/vppinfra/test_elf.c | 2 | ||||
-rw-r--r-- | vppinfra/vppinfra/test_elog.c | 2 | ||||
-rw-r--r-- | vppinfra/vppinfra/vec.c | 2 | ||||
-rw-r--r-- | vppinfra/vppinfra/vec.h | 20 |
22 files changed, 683 insertions, 66 deletions
diff --git a/vppinfra/Makefile.am b/vppinfra/Makefile.am index adcd32cc868..1b4d627dfb6 100644 --- a/vppinfra/Makefile.am +++ b/vppinfra/Makefile.am @@ -189,6 +189,8 @@ nobase_include_HEADERS = \ vppinfra/math.h \ vppinfra/md5.h \ vppinfra/mem.h \ + vppinfra/memcpy_sse3.h \ + vppinfra/memcpy_avx.h \ vppinfra/mhash.h \ vppinfra/mheap.h \ vppinfra/mheap_bootstrap.h \ diff --git a/vppinfra/tools/elftool.c b/vppinfra/tools/elftool.c index b8acd055602..f5d70b56d5f 100644 --- a/vppinfra/tools/elftool.c +++ b/vppinfra/tools/elftool.c @@ -101,7 +101,7 @@ static clib_error_t * elf_set_interpreter (elf_main_t * em, /* Put in new null terminated string. */ memset (s->contents, 0, vec_len (s->contents)); - memcpy (s->contents, interp, strlen (interp)); + clib_memcpy (s->contents, interp, strlen (interp)); return 0; } @@ -336,11 +336,11 @@ set_interpreter_rpath (elf_tool_main_t * tm) } if (tm->interpreter_offset) - memcpy (&idp[tm->interpreter_offset], tm->set_interpreter, + clib_memcpy (&idp[tm->interpreter_offset], tm->set_interpreter, strlen (tm->set_interpreter)+1); if (tm->rpath_offset) - memcpy (&idp[tm->rpath_offset], tm->set_rpath, + clib_memcpy (&idp[tm->rpath_offset], tm->set_rpath, strlen (tm->set_rpath)+1); /* Write the output file... */ diff --git a/vppinfra/vppinfra/bihash_template.c b/vppinfra/vppinfra/bihash_template.c index 6242e0ffe5a..3ed3dca19c2 100644 --- a/vppinfra/vppinfra/bihash_template.c +++ b/vppinfra/vppinfra/bihash_template.c @@ -129,7 +129,7 @@ BV(make_working_copy) v = BV(clib_bihash_get_value) (h, b->offset); - memcpy (working_copy, v, sizeof (*v)*(1<<b->log2_pages)); + clib_memcpy (working_copy, v, sizeof (*v)*(1<<b->log2_pages)); working_bucket.as_u64 = b->as_u64; working_bucket.offset = BV(clib_bihash_get_offset) (h, working_copy); CLIB_MEMORY_BARRIER(); @@ -167,7 +167,7 @@ static BVT(clib_bihash_value) * { if (BV(clib_bihash_is_free)(&(new_v->kvp[k]))) { - memcpy (&(new_v->kvp[k]), &(v->kvp[j]), + clib_memcpy (&(new_v->kvp[k]), &(v->kvp[j]), sizeof (new_v->kvp[k])); goto doublebreak; } @@ -243,7 +243,7 @@ int BV(clib_bihash_add_del) { if (!memcmp(&(v->kvp[i]), &add_v->key, sizeof (add_v->key))) { - memcpy (&(v->kvp[i]), add_v, sizeof (*add_v)); + clib_memcpy (&(v->kvp[i]), add_v, sizeof (*add_v)); CLIB_MEMORY_BARRIER(); /* Restore the previous (k,v) pairs */ b->as_u64 = h->saved_bucket.as_u64; @@ -254,7 +254,7 @@ int BV(clib_bihash_add_del) { if (BV(clib_bihash_is_free)(&(v->kvp[i]))) { - memcpy (&(v->kvp[i]), add_v, sizeof (*add_v)); + clib_memcpy (&(v->kvp[i]), add_v, sizeof (*add_v)); CLIB_MEMORY_BARRIER(); b->as_u64 = h->saved_bucket.as_u64; goto unlock; @@ -301,7 +301,7 @@ int BV(clib_bihash_add_del) { if (BV(clib_bihash_is_free)(&(new_v->kvp[i]))) { - memcpy (&(new_v->kvp[i]), add_v, sizeof (*add_v)); + clib_memcpy (&(new_v->kvp[i]), add_v, sizeof (*add_v)); goto expand_ok; } } diff --git a/vppinfra/vppinfra/elf.c b/vppinfra/vppinfra/elf.c index bbd321b5780..8a09cd41fe0 100644 --- a/vppinfra/vppinfra/elf.c +++ b/vppinfra/vppinfra/elf.c @@ -818,7 +818,7 @@ add_relocation_table (elf_main_t * em, elf_section_t * s) } vec_resize (t->relocations, vec_len (rs)); - memcpy (t->relocations, rs, vec_bytes (t->relocations)); + clib_memcpy (t->relocations, rs, vec_bytes (t->relocations)); vec_free (rs); } else @@ -1909,7 +1909,7 @@ elf_create_section_with_contents (elf_main_t * em, sts->contents = st; vec_resize (c, n_content_bytes); - memcpy (c, contents, n_content_bytes); + clib_memcpy (c, contents, n_content_bytes); s->contents = c; em->file_header.section_header_count += is_new_section && s->header.type != ~0; diff --git a/vppinfra/vppinfra/elf.h b/vppinfra/vppinfra/elf.h index 1a0102d0838..52989166a5b 100644 --- a/vppinfra/vppinfra/elf.h +++ b/vppinfra/vppinfra/elf.h @@ -951,7 +951,7 @@ elf_get_section_contents (elf_main_t * em, vec_len (s->contents), /* header_bytes */ 0, /* align */ 0); - memcpy (result, s->contents, vec_len (s->contents)); + clib_memcpy (result, s->contents, vec_len (s->contents)); } return result; diff --git a/vppinfra/vppinfra/elog.c b/vppinfra/vppinfra/elog.c index b748963933c..06b97d8a390 100644 --- a/vppinfra/vppinfra/elog.c +++ b/vppinfra/vppinfra/elog.c @@ -263,7 +263,7 @@ static u8 * fixed_format (u8 * s, char * fmt, char * result, uword * result_len) ASSERT (*result_len > f - percent); l = clib_min (f - percent, *result_len - 1); - memcpy (result, percent, l); + clib_memcpy (result, percent, l); result[l] = 0; done: @@ -836,7 +836,7 @@ unserialize_elog_event (serialize_main_t * m, va_list * va) unserialize_cstring (m, &t); if (n_bytes == 0) n_bytes = strlen (t) + 1; - memcpy (d, t, clib_min (n_bytes, vec_len (t))); + clib_memcpy (d, t, clib_min (n_bytes, vec_len (t))); vec_free (t); break; } diff --git a/vppinfra/vppinfra/fifo.c b/vppinfra/vppinfra/fifo.c index 3b8349d402e..fc287a2a29f 100644 --- a/vppinfra/vppinfra/fifo.c +++ b/vppinfra/vppinfra/fifo.c @@ -111,11 +111,11 @@ void * _clib_fifo_resize (void * v_old, uword n_new_elts, uword elt_bytes) if (head + n_copy_bytes >= end) { uword n = end - head; - memcpy (v_new, head, n); - memcpy (v_new + n, v_old, n_copy_bytes - n); + clib_memcpy (v_new, head, n); + clib_memcpy (v_new + n, v_old, n_copy_bytes - n); } else - memcpy (v_new, head, n_copy_bytes); + clib_memcpy (v_new, head, n_copy_bytes); } /* Zero empty space. */ diff --git a/vppinfra/vppinfra/fifo.h b/vppinfra/vppinfra/fifo.h index 54aa8f2d0e5..10c7b65673d 100644 --- a/vppinfra/vppinfra/fifo.h +++ b/vppinfra/vppinfra/fifo.h @@ -210,9 +210,9 @@ do { \ _n1 = _i + _n0 - _l; \ _n1 = _n1 < 0 ? 0 : _n1; \ _n0 -= _n1; \ - memcpy ((f) + _i, (e), _n0 * sizeof ((f)[0])); \ + clib_memcpy ((f) + _i, (e), _n0 * sizeof ((f)[0])); \ if (_n1) \ - memcpy ((f) + 0, (e) + _n0, _n1 * sizeof ((f)[0])); \ + clib_memcpy ((f) + 0, (e) + _n0, _n1 * sizeof ((f)[0])); \ } while (0) /* Subtract element from fifo. */ diff --git a/vppinfra/vppinfra/hash.c b/vppinfra/vppinfra/hash.c index adadf010a0c..86231b4db5e 100644 --- a/vppinfra/vppinfra/hash.c +++ b/vppinfra/vppinfra/hash.c @@ -348,7 +348,7 @@ set_indirect_is_user (void * v, log2_bytes = 1 + hash_pair_log2_bytes (h); q = clib_mem_alloc (1 << log2_bytes); } - memcpy (q, &p->direct, hash_pair_bytes (h)); + clib_memcpy (q, &p->direct, hash_pair_bytes (h)); pi->pairs = q; if (h->log2_pair_size > 0) @@ -428,7 +428,7 @@ static void unset_indirect (void * v, uword i, hash_pair_t * q) if (len == 2) { - memcpy (p, q == r ? hash_forward1 (h, r) : r, hash_pair_bytes (h)); + clib_memcpy (p, q == r ? hash_forward1 (h, r) : r, hash_pair_bytes (h)); set_is_user (v, i, 1); } else @@ -443,7 +443,7 @@ static void unset_indirect (void * v, uword i, hash_pair_t * q) { /* If deleting a pair we need to keep non-null pairs together. */ if (q < e) - memcpy (q, e, hash_pair_bytes (h)); + clib_memcpy (q, e, hash_pair_bytes (h)); else zero_pair (h, q); if (is_vec) @@ -484,7 +484,7 @@ static hash_pair_t * lookup (void * v, uword key, enum lookup_opcode op, { set_is_user (v, i, 0); if (old_value) - memcpy (old_value, p->direct.value, hash_value_bytes (h)); + clib_memcpy (old_value, p->direct.value, hash_value_bytes (h)); zero_pair (h, &p->direct); } } @@ -517,7 +517,7 @@ static hash_pair_t * lookup (void * v, uword key, enum lookup_opcode op, if (found_key && op == UNSET) { if (old_value) - memcpy (old_value, &p->direct.value, hash_value_bytes (h)); + clib_memcpy (old_value, &p->direct.value, hash_value_bytes (h)); unset_indirect (v, i, &p->direct); @@ -532,8 +532,8 @@ static hash_pair_t * lookup (void * v, uword key, enum lookup_opcode op, { /* Save away old value for caller. */ if (old_value && found_key) - memcpy (old_value, &p->direct.value, hash_value_bytes (h)); - memcpy (&p->direct.value, new_value, hash_value_bytes (h)); + clib_memcpy (old_value, &p->direct.value, hash_value_bytes (h)); + clib_memcpy (&p->direct.value, new_value, hash_value_bytes (h)); } if (op == SET) diff --git a/vppinfra/vppinfra/heap.h b/vppinfra/vppinfra/heap.h index 912e865e75b..c6605dac5f0 100644 --- a/vppinfra/vppinfra/heap.h +++ b/vppinfra/vppinfra/heap.h @@ -180,7 +180,7 @@ always_inline void * _heap_dup (void * v_old, uword v_bytes) HEAP_DATA_ALIGN); h_new = heap_header (v_new); heap_dup_header (h_old, h_new); - memcpy (v_new, v_old, v_bytes); + clib_memcpy (v_new, v_old, v_bytes); return v_new; } diff --git a/vppinfra/vppinfra/mem.h b/vppinfra/vppinfra/mem.h index 301bcdd0d32..a4c679c2c71 100644 --- a/vppinfra/vppinfra/mem.h +++ b/vppinfra/vppinfra/mem.h @@ -171,7 +171,7 @@ always_inline void * clib_mem_realloc (void * p, uword new_size, uword old_size) copy_size = old_size; else copy_size = new_size; - memcpy (q, p, copy_size); + clib_memcpy (q, p, copy_size); clib_mem_free (p); } return q; diff --git a/vppinfra/vppinfra/memcpy_avx.h b/vppinfra/vppinfra/memcpy_avx.h new file mode 100644 index 00000000000..0ec6032a0f6 --- /dev/null +++ b/vppinfra/vppinfra/memcpy_avx.h @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef included_clib_memcpy_avx_h +#define included_clib_memcpy_avx_h + +#include <stdint.h> +#include <x86intrin.h> + +static inline void +clib_mov16(u8 *dst, const u8 *src) +{ + __m128i xmm0; + + xmm0 = _mm_loadu_si128((const __m128i *)src); + _mm_storeu_si128((__m128i *)dst, xmm0); +} + +static inline void +clib_mov32(u8 *dst, const u8 *src) +{ + __m256i ymm0; + + ymm0 = _mm256_loadu_si256((const __m256i *)src); + _mm256_storeu_si256((__m256i *)dst, ymm0); +} + +static inline void +clib_mov64(u8 *dst, const u8 *src) +{ + clib_mov32((u8 *)dst + 0 * 32, (const u8 *)src + 0 * 32); + clib_mov32((u8 *)dst + 1 * 32, (const u8 *)src + 1 * 32); +} + +static inline void +clib_mov128(u8 *dst, const u8 *src) +{ + clib_mov64((u8 *)dst + 0 * 64, (const u8 *)src + 0 * 64); + clib_mov64((u8 *)dst + 1 * 64, (const u8 *)src + 1 * 64); +} + +static inline void +clib_mov256(u8 *dst, const u8 *src) +{ + clib_mov128((u8 *)dst + 0 * 128, (const u8 *)src + 0 * 128); + clib_mov128((u8 *)dst + 1 * 128, (const u8 *)src + 1 * 128); +} + +static inline void +clib_mov64blocks(u8 *dst, const u8 *src, size_t n) +{ + __m256i ymm0, ymm1; + + while (n >= 64) { + ymm0 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 0 * 32)); + n -= 64; + ymm1 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 1 * 32)); + src = (const u8 *)src + 64; + _mm256_storeu_si256((__m256i *)((u8 *)dst + 0 * 32), ymm0); + _mm256_storeu_si256((__m256i *)((u8 *)dst + 1 * 32), ymm1); + dst = (u8 *)dst + 64; + } +} + +static inline void +clib_mov256blocks(u8 *dst, const u8 *src, size_t n) +{ + __m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7; + + while (n >= 256) { + ymm0 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 0 * 32)); + n -= 256; + ymm1 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 1 * 32)); + ymm2 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 2 * 32)); + ymm3 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 3 * 32)); + ymm4 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 4 * 32)); + ymm5 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 5 * 32)); + ymm6 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 6 * 32)); + ymm7 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 7 * 32)); + src = (const u8 *)src + 256; + _mm256_storeu_si256((__m256i *)((u8 *)dst + 0 * 32), ymm0); + _mm256_storeu_si256((__m256i *)((u8 *)dst + 1 * 32), ymm1); + _mm256_storeu_si256((__m256i *)((u8 *)dst + 2 * 32), ymm2); + _mm256_storeu_si256((__m256i *)((u8 *)dst + 3 * 32), ymm3); + _mm256_storeu_si256((__m256i *)((u8 *)dst + 4 * 32), ymm4); + _mm256_storeu_si256((__m256i *)((u8 *)dst + 5 * 32), ymm5); + _mm256_storeu_si256((__m256i *)((u8 *)dst + 6 * 32), ymm6); + _mm256_storeu_si256((__m256i *)((u8 *)dst + 7 * 32), ymm7); + dst = (u8 *)dst + 256; + } +} + +static inline void * +clib_memcpy(void *dst, const void *src, size_t n) +{ + uword dstu = (uword)dst; + uword srcu = (uword)src; + void *ret = dst; + size_t dstofss; + size_t bits; + + /** + * Copy less than 16 bytes + */ + if (n < 16) { + if (n & 0x01) { + *(u8 *)dstu = *(const u8 *)srcu; + srcu = (uword)((const u8 *)srcu + 1); + dstu = (uword)((u8 *)dstu + 1); + } + if (n & 0x02) { + *(uint16_t *)dstu = *(const uint16_t *)srcu; + srcu = (uword)((const uint16_t *)srcu + 1); + dstu = (uword)((uint16_t *)dstu + 1); + } + if (n & 0x04) { + *(uint32_t *)dstu = *(const uint32_t *)srcu; + srcu = (uword)((const uint32_t *)srcu + 1); + dstu = (uword)((uint32_t *)dstu + 1); + } + if (n & 0x08) { + *(uint64_t *)dstu = *(const uint64_t *)srcu; + } + return ret; + } + + /** + * Fast way when copy size doesn't exceed 512 bytes + */ + if (n <= 32) { + clib_mov16((u8 *)dst, (const u8 *)src); + clib_mov16((u8 *)dst - 16 + n, (const u8 *)src - 16 + n); + return ret; + } + if (n <= 64) { + clib_mov32((u8 *)dst, (const u8 *)src); + clib_mov32((u8 *)dst - 32 + n, (const u8 *)src - 32 + n); + return ret; + } + if (n <= 512) { + if (n >= 256) { + n -= 256; + clib_mov256((u8 *)dst, (const u8 *)src); + src = (const u8 *)src + 256; + dst = (u8 *)dst + 256; + } + if (n >= 128) { + n -= 128; + clib_mov128((u8 *)dst, (const u8 *)src); + src = (const u8 *)src + 128; + dst = (u8 *)dst + 128; + } + if (n >= 64) { + n -= 64; + clib_mov64((u8 *)dst, (const u8 *)src); + src = (const u8 *)src + 64; + dst = (u8 *)dst + 64; + } +COPY_BLOCK_64_BACK31: + if (n > 32) { + clib_mov32((u8 *)dst, (const u8 *)src); + clib_mov32((u8 *)dst - 32 + n, (const u8 *)src - 32 + n); + return ret; + } + if (n > 0) { + clib_mov32((u8 *)dst - 32 + n, (const u8 *)src - 32 + n); + } + return ret; + } + + /** + * Make store aligned when copy size exceeds 512 bytes + */ + dstofss = (uword)dst & 0x1F; + if (dstofss > 0) { + dstofss = 32 - dstofss; + n -= dstofss; + clib_mov32((u8 *)dst, (const u8 *)src); + src = (const u8 *)src + dstofss; + dst = (u8 *)dst + dstofss; + } + + /** + * Copy 256-byte blocks. + * Use copy block function for better instruction order control, + * which is important when load is unaligned. + */ + clib_mov256blocks((u8 *)dst, (const u8 *)src, n); + bits = n; + n = n & 255; + bits -= n; + src = (const u8 *)src + bits; + dst = (u8 *)dst + bits; + + /** + * Copy 64-byte blocks. + * Use copy block function for better instruction order control, + * which is important when load is unaligned. + */ + if (n >= 64) { + clib_mov64blocks((u8 *)dst, (const u8 *)src, n); + bits = n; + n = n & 63; + bits -= n; + src = (const u8 *)src + bits; + dst = (u8 *)dst + bits; + } + + /** + * Copy whatever left + */ + goto COPY_BLOCK_64_BACK31; +} + + +#endif /* included_clib_mamcpy_avx_h */ + diff --git a/vppinfra/vppinfra/memcpy_sse3.h b/vppinfra/vppinfra/memcpy_sse3.h new file mode 100644 index 00000000000..12748f78b48 --- /dev/null +++ b/vppinfra/vppinfra/memcpy_sse3.h @@ -0,0 +1,330 @@ +/* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef included_clib_memcpy_sse3_h +#define included_clib_memcpy_sse3_h + +#include <stdint.h> +#include <x86intrin.h> + +static inline void +clib_mov16(u8 *dst, const u8 *src) +{ + __m128i xmm0; + + xmm0 = _mm_loadu_si128((const __m128i *)src); + _mm_storeu_si128((__m128i *)dst, xmm0); +} + +static inline void +clib_mov32(u8 *dst, const u8 *src) +{ + clib_mov16((u8 *)dst + 0 * 16, (const u8 *)src + 0 * 16); + clib_mov16((u8 *)dst + 1 * 16, (const u8 *)src + 1 * 16); +} + +static inline void +clib_mov64(u8 *dst, const u8 *src) +{ + clib_mov32((u8 *)dst + 0 * 32, (const u8 *)src + 0 * 32); + clib_mov32((u8 *)dst + 1 * 32, (const u8 *)src + 1 * 32); +} + +static inline void +clib_mov128(u8 *dst, const u8 *src) +{ + clib_mov64((u8 *)dst + 0 * 64, (const u8 *)src + 0 * 64); + clib_mov64((u8 *)dst + 1 * 64, (const u8 *)src + 1 * 64); +} + +static inline void +clib_mov256(u8 *dst, const u8 *src) +{ + clib_mov128((u8 *)dst + 0 * 128, (const u8 *)src + 0 * 128); + clib_mov128((u8 *)dst + 1 * 128, (const u8 *)src + 1 * 128); +} + +/** + * Macro for copying unaligned block from one location to another with constant load offset, + * 47 bytes leftover maximum, + * locations should not overlap. + * Requirements: + * - Store is aligned + * - Load offset is <offset>, which must be immediate value within [1, 15] + * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading + * - <dst>, <src>, <len> must be variables + * - __m128i <xmm0> ~ <xmm8> must be pre-defined + */ +#define CLIB_MVUNALIGN_LEFT47_IMM(dst, src, len, offset) \ +({ \ + int tmp; \ + while (len >= 128 + 16 - offset) { \ + xmm0 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 0 * 16)); \ + len -= 128; \ + xmm1 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 1 * 16)); \ + xmm2 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 2 * 16)); \ + xmm3 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 3 * 16)); \ + xmm4 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 4 * 16)); \ + xmm5 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 5 * 16)); \ + xmm6 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 6 * 16)); \ + xmm7 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 7 * 16)); \ + xmm8 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 8 * 16)); \ + src = (const u8 *)src + 128; \ + _mm_storeu_si128((__m128i *)((u8 *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \ + _mm_storeu_si128((__m128i *)((u8 *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \ + _mm_storeu_si128((__m128i *)((u8 *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \ + _mm_storeu_si128((__m128i *)((u8 *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \ + _mm_storeu_si128((__m128i *)((u8 *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \ + _mm_storeu_si128((__m128i *)((u8 *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \ + _mm_storeu_si128((__m128i *)((u8 *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \ + _mm_storeu_si128((__m128i *)((u8 *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \ + dst = (u8 *)dst + 128; \ + } \ + tmp = len; \ + len = ((len - 16 + offset) & 127) + 16 - offset; \ + tmp -= len; \ + src = (const u8 *)src + tmp; \ + dst = (u8 *)dst + tmp; \ + if (len >= 32 + 16 - offset) { \ + while (len >= 32 + 16 - offset) { \ + xmm0 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 0 * 16)); \ + len -= 32; \ + xmm1 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 1 * 16)); \ + xmm2 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 2 * 16)); \ + src = (const u8 *)src + 32; \ + _mm_storeu_si128((__m128i *)((u8 *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \ + _mm_storeu_si128((__m128i *)((u8 *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \ + dst = (u8 *)dst + 32; \ + } \ + tmp = len; \ + len = ((len - 16 + offset) & 31) + 16 - offset; \ + tmp -= len; \ + src = (const u8 *)src + tmp; \ + dst = (u8 *)dst + tmp; \ + } \ +}) + +/** + * Macro for copying unaligned block from one location to another, + * 47 bytes leftover maximum, + * locations should not overlap. + * Use switch here because the aligning instruction requires immediate value for shift count. + * Requirements: + * - Store is aligned + * - Load offset is <offset>, which must be within [1, 15] + * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading + * - <dst>, <src>, <len> must be variables + * - __m128i <xmm0> ~ <xmm8> used in CLIB_MVUNALIGN_LEFT47_IMM must be pre-defined + */ +#define CLIB_MVUNALIGN_LEFT47(dst, src, len, offset) \ +({ \ + switch (offset) { \ + case 0x01: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x01); break; \ + case 0x02: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x02); break; \ + case 0x03: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x03); break; \ + case 0x04: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x04); break; \ + case 0x05: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x05); break; \ + case 0x06: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x06); break; \ + case 0x07: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x07); break; \ + case 0x08: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x08); break; \ + case 0x09: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x09); break; \ + case 0x0A: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0A); break; \ + case 0x0B: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0B); break; \ + case 0x0C: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0C); break; \ + case 0x0D: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0D); break; \ + case 0x0E: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0E); break; \ + case 0x0F: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0F); break; \ + default:; \ + } \ +}) + +static inline void * +clib_memcpy(void *dst, const void *src, size_t n) +{ + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8; + uword dstu = (uword)dst; + uword srcu = (uword)src; + void *ret = dst; + size_t dstofss; + size_t srcofs; + + /** + * Copy less than 16 bytes + */ + if (n < 16) { + if (n & 0x01) { + *(u8 *)dstu = *(const u8 *)srcu; + srcu = (uword)((const u8 *)srcu + 1); + dstu = (uword)((u8 *)dstu + 1); + } + if (n & 0x02) { + *(u16 *)dstu = *(const u16 *)srcu; + srcu = (uword)((const u16 *)srcu + 1); + dstu = (uword)((u16 *)dstu + 1); + } + if (n & 0x04) { + *(u32 *)dstu = *(const u32 *)srcu; + srcu = (uword)((const u32 *)srcu + 1); + dstu = (uword)((u32 *)dstu + 1); + } + if (n & 0x08) { + *(u64 *)dstu = *(const u64 *)srcu; + } + return ret; + } + + /** + * Fast way when copy size doesn't exceed 512 bytes + */ + if (n <= 32) { + clib_mov16((u8 *)dst, (const u8 *)src); + clib_mov16((u8 *)dst - 16 + n, (const u8 *)src - 16 + n); + return ret; + } + if (n <= 48) { + clib_mov32((u8 *)dst, (const u8 *)src); + clib_mov16((u8 *)dst - 16 + n, (const u8 *)src - 16 + n); + return ret; + } + if (n <= 64) { + clib_mov32((u8 *)dst, (const u8 *)src); + clib_mov16((u8 *)dst + 32, (const u8 *)src + 32); + clib_mov16((u8 *)dst - 16 + n, (const u8 *)src - 16 + n); + return ret; + } + if (n <= 128) { + goto COPY_BLOCK_128_BACK15; + } + if (n <= 512) { + if (n >= 256) { + n -= 256; + clib_mov128((u8 *)dst, (const u8 *)src); + clib_mov128((u8 *)dst + 128, (const u8 *)src + 128); + src = (const u8 *)src + 256; + dst = (u8 *)dst + 256; + } +COPY_BLOCK_255_BACK15: + if (n >= 128) { + n -= 128; + clib_mov128((u8 *)dst, (const u8 *)src); + src = (const u8 *)src + 128; + dst = (u8 *)dst + 128; + } +COPY_BLOCK_128_BACK15: + if (n >= 64) { + n -= 64; + clib_mov64((u8 *)dst, (const u8 *)src); + src = (const u8 *)src + 64; + dst = (u8 *)dst + 64; + } +COPY_BLOCK_64_BACK15: + if (n >= 32) { + n -= 32; + clib_mov32((u8 *)dst, (const u8 *)src); + src = (const u8 *)src + 32; + dst = (u8 *)dst + 32; + } + if (n > 16) { + clib_mov16((u8 *)dst, (const u8 *)src); + clib_mov16((u8 *)dst - 16 + n, (const u8 *)src - 16 + n); + return ret; + } + if (n > 0) { + clib_mov16((u8 *)dst - 16 + n, (const u8 *)src - 16 + n); + } + return ret; + } + + /** + * Make store aligned when copy size exceeds 512 bytes, + * and make sure the first 15 bytes are copied, because + * unaligned copy functions require up to 15 bytes + * backwards access. + */ + dstofss = 16 - ((uword)dst & 0x0F) + 16; + n -= dstofss; + clib_mov32((u8 *)dst, (const u8 *)src); + src = (const u8 *)src + dstofss; + dst = (u8 *)dst + dstofss; + srcofs = ((uword)src & 0x0F); + + /** + * For aligned copy + */ + if (srcofs == 0) { + /** + * Copy 256-byte blocks + */ + for (; n >= 256; n -= 256) { + clib_mov256((u8 *)dst, (const u8 *)src); + dst = (u8 *)dst + 256; + src = (const u8 *)src + 256; + } + + /** + * Copy whatever left + */ + goto COPY_BLOCK_255_BACK15; + } + + /** + * For copy with unaligned load + */ + CLIB_MVUNALIGN_LEFT47(dst, src, n, srcofs); + + /** + * Copy whatever left + */ + goto COPY_BLOCK_64_BACK15; +} + + +#undef CLIB_MVUNALIGN_LEFT47_IMM +#undef CLIB_MVUNALIGN_LEFT47 + +#endif /* included_clib_memcpy_sse3_h */ + diff --git a/vppinfra/vppinfra/mhash.c b/vppinfra/vppinfra/mhash.c index 7d2dc36a648..a7ff8587981 100644 --- a/vppinfra/vppinfra/mhash.c +++ b/vppinfra/vppinfra/mhash.c @@ -288,7 +288,7 @@ uword mhash_set_mem (mhash_t * h, void * key, uword * new_value, uword * old_val sk = (void *) (h->key_vector_or_heap + i); sk->heap_handle = handle; sk->vec.len = n_key_bytes; - memcpy (sk->vec.vector_data, key, n_key_bytes); + clib_memcpy (sk->vec.vector_data, key, n_key_bytes); /* Advance key past vector header. */ i += sizeof (sk[0]); @@ -309,7 +309,7 @@ uword mhash_set_mem (mhash_t * h, void * key, uword * new_value, uword * old_val } n_key_bytes = h->n_key_bytes; - memcpy (k, key, n_key_bytes); + clib_memcpy (k, key, n_key_bytes); } ikey = i; diff --git a/vppinfra/vppinfra/pfhash.c b/vppinfra/vppinfra/pfhash.c index 4bea0682e10..81a5f491bd5 100644 --- a/vppinfra/vppinfra/pfhash.c +++ b/vppinfra/vppinfra/pfhash.c @@ -79,7 +79,7 @@ u8 * format_pfhash (u8 * s, va_list * args) if (kv16->values[j] != (u32)~0) { vec_add2 (shs, sh, 1); - memcpy (sh->key, &kv16->kb.k_u32x4[j], p->key_size); + clib_memcpy (sh->key, &kv16->kb.k_u32x4[j], p->key_size); sh->value = kv16->values[j]; } } @@ -93,7 +93,7 @@ u8 * format_pfhash (u8 * s, va_list * args) if (kv8->values[j] != (u32)~0) { vec_add2 (shs, sh, 1); - memcpy (sh->key, &kv8->kb.k_u64[j], p->key_size); + clib_memcpy (sh->key, &kv8->kb.k_u64[j], p->key_size); sh->value = kv8->values[j]; } } @@ -106,7 +106,7 @@ u8 * format_pfhash (u8 * s, va_list * args) if (kv8v8->values[j] != (u64)~0) { vec_add2 (shs, sh, 1); - memcpy (sh->key, &kv8v8->kb.k_u64[j], p->key_size); + clib_memcpy (sh->key, &kv8v8->kb.k_u64[j], p->key_size); sh->value = kv8v8->values[j]; } } @@ -120,7 +120,7 @@ u8 * format_pfhash (u8 * s, va_list * args) if (kv4->values[j] != (u32)~0) { vec_add2 (shs, sh, 1); - memcpy (sh->key, &kv4->kb.kb[j], p->key_size); + clib_memcpy (sh->key, &kv4->kb.kb[j], p->key_size); sh->value = kv4->values[j]; } } @@ -131,7 +131,7 @@ u8 * format_pfhash (u8 * s, va_list * args) hash_foreach_pair (hp, p->overflow_hash, ({ vec_add2 (shs, sh, 1); - memcpy (sh->key, (u8 *)hp->key, p->key_size); + clib_memcpy (sh->key, (u8 *)hp->key, p->key_size); sh->value = hp->value[0]; })); @@ -408,7 +408,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value) return; } kcopy = clib_mem_alloc (p->key_size); - memcpy (kcopy, key, p->key_size); + clib_memcpy (kcopy, key, p->key_size); hash_set_mem (p->overflow_hash, kcopy, value); p->nitems++; p->nitems_in_overflow++; @@ -462,7 +462,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value) { if (kv16->values[i] == (u32)~0) { - memcpy (&kv16->kb.k_u32x4[i], key, p->key_size); + clib_memcpy (&kv16->kb.k_u32x4[i], key, p->key_size); kv16->values[i] = (u32)(u64) value; return; } @@ -471,13 +471,13 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value) for (i = 0; i < 3; i++) { kcopy = clib_mem_alloc (p->key_size); - memcpy (kcopy, &kv16->kb.k_u32x4[i], p->key_size); + clib_memcpy (kcopy, &kv16->kb.k_u32x4[i], p->key_size); hash_set_mem (p->overflow_hash, kcopy, kv16->values[i]); p->nitems_in_overflow++; } /* Add new key to overflow */ kcopy = clib_mem_alloc (p->key_size); - memcpy (kcopy, key, p->key_size); + clib_memcpy (kcopy, key, p->key_size); hash_set_mem (p->overflow_hash, kcopy, value); p->buckets[bucket] = PFHASH_BUCKET_OVERFLOW; p->overflow_count++; @@ -491,7 +491,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value) { if (kv8->values[i] == (u32)~0) { - memcpy (&kv8->kb.k_u64[i], key, 8); + clib_memcpy (&kv8->kb.k_u64[i], key, 8); kv8->values[i] = (u32)(u64) value; return; } @@ -500,7 +500,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value) for (i = 0; i < 5; i++) { kcopy = clib_mem_alloc (p->key_size); - memcpy (kcopy, &kv8->kb.k_u64[i], 8); + clib_memcpy (kcopy, &kv8->kb.k_u64[i], 8); hash_set_mem (p->overflow_hash, kcopy, kv8->values[i]); p->nitems_in_overflow++; } @@ -511,7 +511,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value) { if (kv8v8->values[i] == (u64)~0) { - memcpy (&kv8v8->kb.k_u64[i], key, 8); + clib_memcpy (&kv8v8->kb.k_u64[i], key, 8); kv8v8->values[i] = (u64) value; return; } @@ -520,7 +520,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value) for (i = 0; i < 4; i++) { kcopy = clib_mem_alloc (p->key_size); - memcpy (kcopy, &kv8v8->kb.k_u64[i], 8); + clib_memcpy (kcopy, &kv8v8->kb.k_u64[i], 8); hash_set_mem (p->overflow_hash, kcopy, kv8v8->values[i]); p->nitems_in_overflow++; } @@ -528,7 +528,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value) } /* Add new key to overflow */ kcopy = clib_mem_alloc (p->key_size); - memcpy (kcopy, key, p->key_size); + clib_memcpy (kcopy, key, p->key_size); hash_set_mem (p->overflow_hash, kcopy, value); p->buckets[bucket] = PFHASH_BUCKET_OVERFLOW; p->overflow_count++; @@ -540,7 +540,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value) { if (kv4->values[i] == (u32)~0) { - memcpy (&kv4->kb.kb[i], key, 4); + clib_memcpy (&kv4->kb.kb[i], key, 4); kv4->values[i] = (u32)(u64) value; return; } @@ -549,13 +549,13 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value) for (i = 0; i < 8; i++) { kcopy = clib_mem_alloc (p->key_size); - memcpy (kcopy, &kv4->kb.kb[i], 4); + clib_memcpy (kcopy, &kv4->kb.kb[i], 4); hash_set_mem (p->overflow_hash, kcopy, kv4->values[i]); p->nitems_in_overflow++; } /* Add new key to overflow */ kcopy = clib_mem_alloc (p->key_size); - memcpy (kcopy, key, p->key_size); + clib_memcpy (kcopy, key, p->key_size); hash_set_mem (p->overflow_hash, kcopy, value); p->buckets[bucket] = PFHASH_BUCKET_OVERFLOW; p->overflow_count++; diff --git a/vppinfra/vppinfra/serialize.c b/vppinfra/vppinfra/serialize.c index ab533e87d37..4025b704700 100644 --- a/vppinfra/vppinfra/serialize.c +++ b/vppinfra/vppinfra/serialize.c @@ -141,7 +141,7 @@ void serialize_cstring (serialize_main_t * m, char * s) if (len > 0) { p = serialize_get (m, len); - memcpy (p, s, len); + clib_memcpy (p, s, len); } } @@ -161,7 +161,7 @@ void unserialize_cstring (serialize_main_t * m, char ** s) { r = vec_new (char, len + 1); p = unserialize_get (m, len); - memcpy (r, p, len); + clib_memcpy (r, p, len); /* Null terminate. */ r[len] = 0; @@ -175,7 +175,7 @@ void serialize_vec_8 (serialize_main_t * m, va_list * va) u8 * s = va_arg (*va, u8 *); u32 n = va_arg (*va, u32); u8 * p = serialize_get (m, n * sizeof (u8)); - memcpy (p, s, n * sizeof (u8)); + clib_memcpy (p, s, n * sizeof (u8)); } void unserialize_vec_8 (serialize_main_t * m, va_list * va) @@ -183,7 +183,7 @@ void unserialize_vec_8 (serialize_main_t * m, va_list * va) u8 * s = va_arg (*va, u8 *); u32 n = va_arg (*va, u32); u8 * p = unserialize_get (m, n); - memcpy (s, p, n); + clib_memcpy (s, p, n); } #define _(n_bits) \ @@ -582,7 +582,7 @@ void serialize_magic (serialize_main_t * m, void * magic, u32 magic_bytes) void * p; serialize_integer (m, magic_bytes, sizeof (magic_bytes)); p = serialize_get (m, magic_bytes); - memcpy (p, magic, magic_bytes); + clib_memcpy (p, magic, magic_bytes); } void unserialize_check_magic (serialize_main_t * m, void * magic, @@ -664,7 +664,7 @@ static void * serialize_write_not_inline (serialize_main_header_t * m, if (n_left_o > 0 && n_left_b > 0) { uword n = clib_min (n_left_b, n_left_o); - memcpy (s->buffer + cur_bi, s->overflow_buffer, n); + clib_memcpy (s->buffer + cur_bi, s->overflow_buffer, n); cur_bi += n; n_left_b -= n; n_left_o -= n; diff --git a/vppinfra/vppinfra/socket.c b/vppinfra/vppinfra/socket.c index ae55dc64262..44ceb65984a 100644 --- a/vppinfra/vppinfra/socket.c +++ b/vppinfra/vppinfra/socket.c @@ -100,7 +100,7 @@ socket_config (char * config, { struct sockaddr_un * su = addr; su->sun_family = PF_LOCAL; - memcpy (&su->sun_path, config, + clib_memcpy (&su->sun_path, config, clib_min (sizeof (su->sun_path), 1 + strlen (config))); *addr_len = sizeof (su[0]); } @@ -157,7 +157,7 @@ socket_config (char * config, if (! host) error = clib_error_return (0, "unknown host `%s'", config); else - memcpy (&sa->sin_addr.s_addr, host->h_addr_list[0], host->h_length); + clib_memcpy (&sa->sin_addr.s_addr, host->h_addr_list[0], host->h_length); } else diff --git a/vppinfra/vppinfra/string.h b/vppinfra/vppinfra/string.h index 1b4d7664103..12498215c66 100644 --- a/vppinfra/vppinfra/string.h +++ b/vppinfra/vppinfra/string.h @@ -1,4 +1,18 @@ /* + * Copyright (c) 2016 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus Permission is hereby granted, free of charge, to any person obtaining @@ -41,4 +55,12 @@ /* Exchanges source and destination. */ void clib_memswap (void * _a, void * _b, uword bytes); +#if __AVX__ +#include <vppinfra/memcpy_avx.h> +#elif __SSE3__ +#include <vppinfra/memcpy_sse3.h> +#else +#define clib_memcpy(a,b,c) memcpy(a,b,c) +#endif + #endif /* included_clib_string_h */ diff --git a/vppinfra/vppinfra/test_elf.c b/vppinfra/vppinfra/test_elf.c index 914513abf90..afdb4708a5c 100644 --- a/vppinfra/vppinfra/test_elf.c +++ b/vppinfra/vppinfra/test_elf.c @@ -70,7 +70,7 @@ static clib_error_t * elf_set_interpreter (elf_main_t * em, char * interp) /* Put in new null terminated string. */ memset (s->contents, 0, vec_len (s->contents)); - memcpy (s->contents, interp, strlen (interp)); + clib_memcpy (s->contents, interp, strlen (interp)); return 0; } diff --git a/vppinfra/vppinfra/test_elog.c b/vppinfra/vppinfra/test_elog.c index b109e9965ce..bb82c275354 100644 --- a/vppinfra/vppinfra/test_elog.c +++ b/vppinfra/vppinfra/test_elog.c @@ -188,7 +188,7 @@ int test_elog_main (unformat_input_t * input) d = ELOG_DATA (em, e); v = format (0, "foo %d%c", i, 0); - memcpy (d->s, v, clib_min (vec_len (v), sizeof (d->s))); + clib_memcpy (d->s, v, clib_min (vec_len (v), sizeof (d->s))); } { diff --git a/vppinfra/vppinfra/vec.c b/vppinfra/vppinfra/vec.c index 58ffb34dce4..912038d56eb 100644 --- a/vppinfra/vppinfra/vec.c +++ b/vppinfra/vppinfra/vec.c @@ -87,7 +87,7 @@ void * vec_resize_allocate_memory (void * v, clib_panic ("vec_resize fails, length increment %d, data bytes %d, alignment %d", length_increment, data_bytes, data_align); - memcpy (new, old, old_alloc_bytes); + clib_memcpy (new, old, old_alloc_bytes); clib_mem_free (old); v = new; diff --git a/vppinfra/vppinfra/vec.h b/vppinfra/vppinfra/vec.h index 06080062b47..0c3237d3852 100644 --- a/vppinfra/vppinfra/vec.h +++ b/vppinfra/vppinfra/vec.h @@ -319,7 +319,7 @@ do { \ if (_v(l) > 0) \ { \ vec_resize_ha (_v(v), _v(l), (H), (A)); \ - memcpy (_v(v), (V), _v(l) * sizeof ((V)[0])); \ + clib_memcpy (_v(v), (V), _v(l) * sizeof ((V)[0]));\ } \ _v(v); \ }) @@ -346,7 +346,8 @@ do { \ @param DST destination @param SRC source */ -#define vec_copy(DST,SRC) memcpy (DST, SRC, vec_len (DST) * sizeof ((DST)[0])) +#define vec_copy(DST,SRC) clib_memcpy (DST, SRC, vec_len (DST) * \ + sizeof ((DST)[0])) /** \brief Clone a vector. Make a new vector with the same size as a given vector but possibly with a different type. @@ -543,7 +544,7 @@ do { \ word _v(n) = (N); \ word _v(l) = vec_len (V); \ V = _vec_resize ((V), _v(n), (_v(l) + _v(n)) * sizeof ((V)[0]), (H), (A)); \ - memcpy ((V) + _v(l), (E), _v(n) * sizeof ((V)[0])); \ + clib_memcpy ((V) + _v(l), (E), _v(n) * sizeof ((V)[0])); \ } while (0) /** \brief Add N elements to end of vector V (no header, unspecified alignment) @@ -705,7 +706,8 @@ do { \ memmove ((V) + _v(m) + _v(n), \ (V) + _v(m), \ (_v(l) - _v(m)) * sizeof ((V)[0])); \ - memcpy ((V) + _v(m), (E), _v(n) * sizeof ((V)[0])); \ + clib_memcpy ((V) + _v(m), (E), \ + _v(n) * sizeof ((V)[0])); \ } while (0) /** \brief Insert N vector elements starting at element M, @@ -779,7 +781,7 @@ do { \ \ v1 = _vec_resize ((v1), _v(l2), \ (_v(l1) + _v(l2)) * sizeof ((v1)[0]), 0, 0); \ - memcpy ((v1) + _v(l1), (v2), _v(l2) * sizeof ((v2)[0])); \ + clib_memcpy ((v1) + _v(l1), (v2), _v(l2) * sizeof ((v2)[0])); \ } while (0) /** \brief Append v2 after v1. Result in v1. Specified alignment. @@ -795,7 +797,7 @@ do { \ \ v1 = _vec_resize ((v1), _v(l2), \ (_v(l1) + _v(l2)) * sizeof ((v1)[0]), 0, align); \ - memcpy ((v1) + _v(l1), (v2), _v(l2) * sizeof ((v2)[0])); \ + clib_memcpy ((v1) + _v(l1), (v2), _v(l2) * sizeof ((v2)[0])); \ } while (0) /** \brief Prepend v2 before v1. Result in v1. @@ -811,7 +813,7 @@ do { \ v1 = _vec_resize ((v1), _v(l2), \ (_v(l1) + _v(l2)) * sizeof ((v1)[0]), 0, 0); \ memmove ((v1) + _v(l2), (v1), _v(l1) * sizeof ((v1)[0])); \ - memcpy ((v1), (v2), _v(l2) * sizeof ((v2)[0])); \ + clib_memcpy ((v1), (v2), _v(l2) * sizeof ((v2)[0])); \ } while (0) /** \brief Prepend v2 before v1. Result in v1. Specified alignment @@ -828,7 +830,7 @@ do { \ v1 = _vec_resize ((v1), _v(l2), \ (_v(l1) + _v(l2)) * sizeof ((v1)[0]), 0, align); \ memmove ((v1) + _v(l2), (v1), _v(l1) * sizeof ((v1)[0])); \ - memcpy ((v1), (v2), _v(l2) * sizeof ((v2)[0])); \ + clib_memcpy ((v1), (v2), _v(l2) * sizeof ((v2)[0])); \ } while (0) @@ -909,7 +911,7 @@ do { \ vec_reset_length (V); \ vec_validate ((V), (L)); \ if ((S) && (L)) \ - memcpy ((V), (S), (L)); \ + clib_memcpy ((V), (S), (L)); \ (V)[(L)] = 0; \ } while (0) |