summaryrefslogtreecommitdiffstats
path: root/vppinfra
diff options
context:
space:
mode:
authorDamjan Marion <damarion@cisco.com>2016-03-13 02:22:06 +0100
committerDamjan Marion <damarion@cisco.com>2016-04-22 17:29:47 +0200
commitf1213b82771ce929c076339c24a777cfd59690e6 (patch)
tree3c74305e8848047d8ccd1228ee511d57cbf1b1a6 /vppinfra
parent2b836cf4d1e4e59ca34229a9fdf49d79216da20e (diff)
Add clib_memcpy macro based on DPDK rte_memcpy implementation
Change-Id: I22cb443c4bd0bf298abb6f06e8e4ca65a44a2854 Signed-off-by: Damjan Marion <damarion@cisco.com>
Diffstat (limited to 'vppinfra')
-rw-r--r--vppinfra/Makefile.am2
-rw-r--r--vppinfra/tools/elftool.c6
-rw-r--r--vppinfra/vppinfra/bihash_template.c10
-rw-r--r--vppinfra/vppinfra/elf.c4
-rw-r--r--vppinfra/vppinfra/elf.h2
-rw-r--r--vppinfra/vppinfra/elog.c4
-rw-r--r--vppinfra/vppinfra/fifo.c6
-rw-r--r--vppinfra/vppinfra/fifo.h4
-rw-r--r--vppinfra/vppinfra/hash.c14
-rw-r--r--vppinfra/vppinfra/heap.h2
-rw-r--r--vppinfra/vppinfra/mem.h2
-rw-r--r--vppinfra/vppinfra/memcpy_avx.h261
-rw-r--r--vppinfra/vppinfra/memcpy_sse3.h330
-rw-r--r--vppinfra/vppinfra/mhash.c4
-rw-r--r--vppinfra/vppinfra/pfhash.c34
-rw-r--r--vppinfra/vppinfra/serialize.c12
-rw-r--r--vppinfra/vppinfra/socket.c4
-rw-r--r--vppinfra/vppinfra/string.h22
-rw-r--r--vppinfra/vppinfra/test_elf.c2
-rw-r--r--vppinfra/vppinfra/test_elog.c2
-rw-r--r--vppinfra/vppinfra/vec.c2
-rw-r--r--vppinfra/vppinfra/vec.h20
22 files changed, 683 insertions, 66 deletions
diff --git a/vppinfra/Makefile.am b/vppinfra/Makefile.am
index adcd32cc868..1b4d627dfb6 100644
--- a/vppinfra/Makefile.am
+++ b/vppinfra/Makefile.am
@@ -189,6 +189,8 @@ nobase_include_HEADERS = \
vppinfra/math.h \
vppinfra/md5.h \
vppinfra/mem.h \
+ vppinfra/memcpy_sse3.h \
+ vppinfra/memcpy_avx.h \
vppinfra/mhash.h \
vppinfra/mheap.h \
vppinfra/mheap_bootstrap.h \
diff --git a/vppinfra/tools/elftool.c b/vppinfra/tools/elftool.c
index b8acd055602..f5d70b56d5f 100644
--- a/vppinfra/tools/elftool.c
+++ b/vppinfra/tools/elftool.c
@@ -101,7 +101,7 @@ static clib_error_t * elf_set_interpreter (elf_main_t * em,
/* Put in new null terminated string. */
memset (s->contents, 0, vec_len (s->contents));
- memcpy (s->contents, interp, strlen (interp));
+ clib_memcpy (s->contents, interp, strlen (interp));
return 0;
}
@@ -336,11 +336,11 @@ set_interpreter_rpath (elf_tool_main_t * tm)
}
if (tm->interpreter_offset)
- memcpy (&idp[tm->interpreter_offset], tm->set_interpreter,
+ clib_memcpy (&idp[tm->interpreter_offset], tm->set_interpreter,
strlen (tm->set_interpreter)+1);
if (tm->rpath_offset)
- memcpy (&idp[tm->rpath_offset], tm->set_rpath,
+ clib_memcpy (&idp[tm->rpath_offset], tm->set_rpath,
strlen (tm->set_rpath)+1);
/* Write the output file... */
diff --git a/vppinfra/vppinfra/bihash_template.c b/vppinfra/vppinfra/bihash_template.c
index 6242e0ffe5a..3ed3dca19c2 100644
--- a/vppinfra/vppinfra/bihash_template.c
+++ b/vppinfra/vppinfra/bihash_template.c
@@ -129,7 +129,7 @@ BV(make_working_copy)
v = BV(clib_bihash_get_value) (h, b->offset);
- memcpy (working_copy, v, sizeof (*v)*(1<<b->log2_pages));
+ clib_memcpy (working_copy, v, sizeof (*v)*(1<<b->log2_pages));
working_bucket.as_u64 = b->as_u64;
working_bucket.offset = BV(clib_bihash_get_offset) (h, working_copy);
CLIB_MEMORY_BARRIER();
@@ -167,7 +167,7 @@ static BVT(clib_bihash_value) *
{
if (BV(clib_bihash_is_free)(&(new_v->kvp[k])))
{
- memcpy (&(new_v->kvp[k]), &(v->kvp[j]),
+ clib_memcpy (&(new_v->kvp[k]), &(v->kvp[j]),
sizeof (new_v->kvp[k]));
goto doublebreak;
}
@@ -243,7 +243,7 @@ int BV(clib_bihash_add_del)
{
if (!memcmp(&(v->kvp[i]), &add_v->key, sizeof (add_v->key)))
{
- memcpy (&(v->kvp[i]), add_v, sizeof (*add_v));
+ clib_memcpy (&(v->kvp[i]), add_v, sizeof (*add_v));
CLIB_MEMORY_BARRIER();
/* Restore the previous (k,v) pairs */
b->as_u64 = h->saved_bucket.as_u64;
@@ -254,7 +254,7 @@ int BV(clib_bihash_add_del)
{
if (BV(clib_bihash_is_free)(&(v->kvp[i])))
{
- memcpy (&(v->kvp[i]), add_v, sizeof (*add_v));
+ clib_memcpy (&(v->kvp[i]), add_v, sizeof (*add_v));
CLIB_MEMORY_BARRIER();
b->as_u64 = h->saved_bucket.as_u64;
goto unlock;
@@ -301,7 +301,7 @@ int BV(clib_bihash_add_del)
{
if (BV(clib_bihash_is_free)(&(new_v->kvp[i])))
{
- memcpy (&(new_v->kvp[i]), add_v, sizeof (*add_v));
+ clib_memcpy (&(new_v->kvp[i]), add_v, sizeof (*add_v));
goto expand_ok;
}
}
diff --git a/vppinfra/vppinfra/elf.c b/vppinfra/vppinfra/elf.c
index bbd321b5780..8a09cd41fe0 100644
--- a/vppinfra/vppinfra/elf.c
+++ b/vppinfra/vppinfra/elf.c
@@ -818,7 +818,7 @@ add_relocation_table (elf_main_t * em, elf_section_t * s)
}
vec_resize (t->relocations, vec_len (rs));
- memcpy (t->relocations, rs, vec_bytes (t->relocations));
+ clib_memcpy (t->relocations, rs, vec_bytes (t->relocations));
vec_free (rs);
}
else
@@ -1909,7 +1909,7 @@ elf_create_section_with_contents (elf_main_t * em,
sts->contents = st;
vec_resize (c, n_content_bytes);
- memcpy (c, contents, n_content_bytes);
+ clib_memcpy (c, contents, n_content_bytes);
s->contents = c;
em->file_header.section_header_count += is_new_section && s->header.type != ~0;
diff --git a/vppinfra/vppinfra/elf.h b/vppinfra/vppinfra/elf.h
index 1a0102d0838..52989166a5b 100644
--- a/vppinfra/vppinfra/elf.h
+++ b/vppinfra/vppinfra/elf.h
@@ -951,7 +951,7 @@ elf_get_section_contents (elf_main_t * em,
vec_len (s->contents),
/* header_bytes */ 0,
/* align */ 0);
- memcpy (result, s->contents, vec_len (s->contents));
+ clib_memcpy (result, s->contents, vec_len (s->contents));
}
return result;
diff --git a/vppinfra/vppinfra/elog.c b/vppinfra/vppinfra/elog.c
index b748963933c..06b97d8a390 100644
--- a/vppinfra/vppinfra/elog.c
+++ b/vppinfra/vppinfra/elog.c
@@ -263,7 +263,7 @@ static u8 * fixed_format (u8 * s, char * fmt, char * result, uword * result_len)
ASSERT (*result_len > f - percent);
l = clib_min (f - percent, *result_len - 1);
- memcpy (result, percent, l);
+ clib_memcpy (result, percent, l);
result[l] = 0;
done:
@@ -836,7 +836,7 @@ unserialize_elog_event (serialize_main_t * m, va_list * va)
unserialize_cstring (m, &t);
if (n_bytes == 0)
n_bytes = strlen (t) + 1;
- memcpy (d, t, clib_min (n_bytes, vec_len (t)));
+ clib_memcpy (d, t, clib_min (n_bytes, vec_len (t)));
vec_free (t);
break;
}
diff --git a/vppinfra/vppinfra/fifo.c b/vppinfra/vppinfra/fifo.c
index 3b8349d402e..fc287a2a29f 100644
--- a/vppinfra/vppinfra/fifo.c
+++ b/vppinfra/vppinfra/fifo.c
@@ -111,11 +111,11 @@ void * _clib_fifo_resize (void * v_old, uword n_new_elts, uword elt_bytes)
if (head + n_copy_bytes >= end)
{
uword n = end - head;
- memcpy (v_new, head, n);
- memcpy (v_new + n, v_old, n_copy_bytes - n);
+ clib_memcpy (v_new, head, n);
+ clib_memcpy (v_new + n, v_old, n_copy_bytes - n);
}
else
- memcpy (v_new, head, n_copy_bytes);
+ clib_memcpy (v_new, head, n_copy_bytes);
}
/* Zero empty space. */
diff --git a/vppinfra/vppinfra/fifo.h b/vppinfra/vppinfra/fifo.h
index 54aa8f2d0e5..10c7b65673d 100644
--- a/vppinfra/vppinfra/fifo.h
+++ b/vppinfra/vppinfra/fifo.h
@@ -210,9 +210,9 @@ do { \
_n1 = _i + _n0 - _l; \
_n1 = _n1 < 0 ? 0 : _n1; \
_n0 -= _n1; \
- memcpy ((f) + _i, (e), _n0 * sizeof ((f)[0])); \
+ clib_memcpy ((f) + _i, (e), _n0 * sizeof ((f)[0])); \
if (_n1) \
- memcpy ((f) + 0, (e) + _n0, _n1 * sizeof ((f)[0])); \
+ clib_memcpy ((f) + 0, (e) + _n0, _n1 * sizeof ((f)[0])); \
} while (0)
/* Subtract element from fifo. */
diff --git a/vppinfra/vppinfra/hash.c b/vppinfra/vppinfra/hash.c
index adadf010a0c..86231b4db5e 100644
--- a/vppinfra/vppinfra/hash.c
+++ b/vppinfra/vppinfra/hash.c
@@ -348,7 +348,7 @@ set_indirect_is_user (void * v,
log2_bytes = 1 + hash_pair_log2_bytes (h);
q = clib_mem_alloc (1 << log2_bytes);
}
- memcpy (q, &p->direct, hash_pair_bytes (h));
+ clib_memcpy (q, &p->direct, hash_pair_bytes (h));
pi->pairs = q;
if (h->log2_pair_size > 0)
@@ -428,7 +428,7 @@ static void unset_indirect (void * v, uword i, hash_pair_t * q)
if (len == 2)
{
- memcpy (p, q == r ? hash_forward1 (h, r) : r, hash_pair_bytes (h));
+ clib_memcpy (p, q == r ? hash_forward1 (h, r) : r, hash_pair_bytes (h));
set_is_user (v, i, 1);
}
else
@@ -443,7 +443,7 @@ static void unset_indirect (void * v, uword i, hash_pair_t * q)
{
/* If deleting a pair we need to keep non-null pairs together. */
if (q < e)
- memcpy (q, e, hash_pair_bytes (h));
+ clib_memcpy (q, e, hash_pair_bytes (h));
else
zero_pair (h, q);
if (is_vec)
@@ -484,7 +484,7 @@ static hash_pair_t * lookup (void * v, uword key, enum lookup_opcode op,
{
set_is_user (v, i, 0);
if (old_value)
- memcpy (old_value, p->direct.value, hash_value_bytes (h));
+ clib_memcpy (old_value, p->direct.value, hash_value_bytes (h));
zero_pair (h, &p->direct);
}
}
@@ -517,7 +517,7 @@ static hash_pair_t * lookup (void * v, uword key, enum lookup_opcode op,
if (found_key && op == UNSET)
{
if (old_value)
- memcpy (old_value, &p->direct.value, hash_value_bytes (h));
+ clib_memcpy (old_value, &p->direct.value, hash_value_bytes (h));
unset_indirect (v, i, &p->direct);
@@ -532,8 +532,8 @@ static hash_pair_t * lookup (void * v, uword key, enum lookup_opcode op,
{
/* Save away old value for caller. */
if (old_value && found_key)
- memcpy (old_value, &p->direct.value, hash_value_bytes (h));
- memcpy (&p->direct.value, new_value, hash_value_bytes (h));
+ clib_memcpy (old_value, &p->direct.value, hash_value_bytes (h));
+ clib_memcpy (&p->direct.value, new_value, hash_value_bytes (h));
}
if (op == SET)
diff --git a/vppinfra/vppinfra/heap.h b/vppinfra/vppinfra/heap.h
index 912e865e75b..c6605dac5f0 100644
--- a/vppinfra/vppinfra/heap.h
+++ b/vppinfra/vppinfra/heap.h
@@ -180,7 +180,7 @@ always_inline void * _heap_dup (void * v_old, uword v_bytes)
HEAP_DATA_ALIGN);
h_new = heap_header (v_new);
heap_dup_header (h_old, h_new);
- memcpy (v_new, v_old, v_bytes);
+ clib_memcpy (v_new, v_old, v_bytes);
return v_new;
}
diff --git a/vppinfra/vppinfra/mem.h b/vppinfra/vppinfra/mem.h
index 301bcdd0d32..a4c679c2c71 100644
--- a/vppinfra/vppinfra/mem.h
+++ b/vppinfra/vppinfra/mem.h
@@ -171,7 +171,7 @@ always_inline void * clib_mem_realloc (void * p, uword new_size, uword old_size)
copy_size = old_size;
else
copy_size = new_size;
- memcpy (q, p, copy_size);
+ clib_memcpy (q, p, copy_size);
clib_mem_free (p);
}
return q;
diff --git a/vppinfra/vppinfra/memcpy_avx.h b/vppinfra/vppinfra/memcpy_avx.h
new file mode 100644
index 00000000000..0ec6032a0f6
--- /dev/null
+++ b/vppinfra/vppinfra/memcpy_avx.h
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef included_clib_memcpy_avx_h
+#define included_clib_memcpy_avx_h
+
+#include <stdint.h>
+#include <x86intrin.h>
+
+static inline void
+clib_mov16(u8 *dst, const u8 *src)
+{
+ __m128i xmm0;
+
+ xmm0 = _mm_loadu_si128((const __m128i *)src);
+ _mm_storeu_si128((__m128i *)dst, xmm0);
+}
+
+static inline void
+clib_mov32(u8 *dst, const u8 *src)
+{
+ __m256i ymm0;
+
+ ymm0 = _mm256_loadu_si256((const __m256i *)src);
+ _mm256_storeu_si256((__m256i *)dst, ymm0);
+}
+
+static inline void
+clib_mov64(u8 *dst, const u8 *src)
+{
+ clib_mov32((u8 *)dst + 0 * 32, (const u8 *)src + 0 * 32);
+ clib_mov32((u8 *)dst + 1 * 32, (const u8 *)src + 1 * 32);
+}
+
+static inline void
+clib_mov128(u8 *dst, const u8 *src)
+{
+ clib_mov64((u8 *)dst + 0 * 64, (const u8 *)src + 0 * 64);
+ clib_mov64((u8 *)dst + 1 * 64, (const u8 *)src + 1 * 64);
+}
+
+static inline void
+clib_mov256(u8 *dst, const u8 *src)
+{
+ clib_mov128((u8 *)dst + 0 * 128, (const u8 *)src + 0 * 128);
+ clib_mov128((u8 *)dst + 1 * 128, (const u8 *)src + 1 * 128);
+}
+
+static inline void
+clib_mov64blocks(u8 *dst, const u8 *src, size_t n)
+{
+ __m256i ymm0, ymm1;
+
+ while (n >= 64) {
+ ymm0 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 0 * 32));
+ n -= 64;
+ ymm1 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 1 * 32));
+ src = (const u8 *)src + 64;
+ _mm256_storeu_si256((__m256i *)((u8 *)dst + 0 * 32), ymm0);
+ _mm256_storeu_si256((__m256i *)((u8 *)dst + 1 * 32), ymm1);
+ dst = (u8 *)dst + 64;
+ }
+}
+
+static inline void
+clib_mov256blocks(u8 *dst, const u8 *src, size_t n)
+{
+ __m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7;
+
+ while (n >= 256) {
+ ymm0 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 0 * 32));
+ n -= 256;
+ ymm1 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 1 * 32));
+ ymm2 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 2 * 32));
+ ymm3 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 3 * 32));
+ ymm4 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 4 * 32));
+ ymm5 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 5 * 32));
+ ymm6 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 6 * 32));
+ ymm7 = _mm256_loadu_si256((const __m256i *)((const u8 *)src + 7 * 32));
+ src = (const u8 *)src + 256;
+ _mm256_storeu_si256((__m256i *)((u8 *)dst + 0 * 32), ymm0);
+ _mm256_storeu_si256((__m256i *)((u8 *)dst + 1 * 32), ymm1);
+ _mm256_storeu_si256((__m256i *)((u8 *)dst + 2 * 32), ymm2);
+ _mm256_storeu_si256((__m256i *)((u8 *)dst + 3 * 32), ymm3);
+ _mm256_storeu_si256((__m256i *)((u8 *)dst + 4 * 32), ymm4);
+ _mm256_storeu_si256((__m256i *)((u8 *)dst + 5 * 32), ymm5);
+ _mm256_storeu_si256((__m256i *)((u8 *)dst + 6 * 32), ymm6);
+ _mm256_storeu_si256((__m256i *)((u8 *)dst + 7 * 32), ymm7);
+ dst = (u8 *)dst + 256;
+ }
+}
+
+static inline void *
+clib_memcpy(void *dst, const void *src, size_t n)
+{
+ uword dstu = (uword)dst;
+ uword srcu = (uword)src;
+ void *ret = dst;
+ size_t dstofss;
+ size_t bits;
+
+ /**
+ * Copy less than 16 bytes
+ */
+ if (n < 16) {
+ if (n & 0x01) {
+ *(u8 *)dstu = *(const u8 *)srcu;
+ srcu = (uword)((const u8 *)srcu + 1);
+ dstu = (uword)((u8 *)dstu + 1);
+ }
+ if (n & 0x02) {
+ *(uint16_t *)dstu = *(const uint16_t *)srcu;
+ srcu = (uword)((const uint16_t *)srcu + 1);
+ dstu = (uword)((uint16_t *)dstu + 1);
+ }
+ if (n & 0x04) {
+ *(uint32_t *)dstu = *(const uint32_t *)srcu;
+ srcu = (uword)((const uint32_t *)srcu + 1);
+ dstu = (uword)((uint32_t *)dstu + 1);
+ }
+ if (n & 0x08) {
+ *(uint64_t *)dstu = *(const uint64_t *)srcu;
+ }
+ return ret;
+ }
+
+ /**
+ * Fast way when copy size doesn't exceed 512 bytes
+ */
+ if (n <= 32) {
+ clib_mov16((u8 *)dst, (const u8 *)src);
+ clib_mov16((u8 *)dst - 16 + n, (const u8 *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 64) {
+ clib_mov32((u8 *)dst, (const u8 *)src);
+ clib_mov32((u8 *)dst - 32 + n, (const u8 *)src - 32 + n);
+ return ret;
+ }
+ if (n <= 512) {
+ if (n >= 256) {
+ n -= 256;
+ clib_mov256((u8 *)dst, (const u8 *)src);
+ src = (const u8 *)src + 256;
+ dst = (u8 *)dst + 256;
+ }
+ if (n >= 128) {
+ n -= 128;
+ clib_mov128((u8 *)dst, (const u8 *)src);
+ src = (const u8 *)src + 128;
+ dst = (u8 *)dst + 128;
+ }
+ if (n >= 64) {
+ n -= 64;
+ clib_mov64((u8 *)dst, (const u8 *)src);
+ src = (const u8 *)src + 64;
+ dst = (u8 *)dst + 64;
+ }
+COPY_BLOCK_64_BACK31:
+ if (n > 32) {
+ clib_mov32((u8 *)dst, (const u8 *)src);
+ clib_mov32((u8 *)dst - 32 + n, (const u8 *)src - 32 + n);
+ return ret;
+ }
+ if (n > 0) {
+ clib_mov32((u8 *)dst - 32 + n, (const u8 *)src - 32 + n);
+ }
+ return ret;
+ }
+
+ /**
+ * Make store aligned when copy size exceeds 512 bytes
+ */
+ dstofss = (uword)dst & 0x1F;
+ if (dstofss > 0) {
+ dstofss = 32 - dstofss;
+ n -= dstofss;
+ clib_mov32((u8 *)dst, (const u8 *)src);
+ src = (const u8 *)src + dstofss;
+ dst = (u8 *)dst + dstofss;
+ }
+
+ /**
+ * Copy 256-byte blocks.
+ * Use copy block function for better instruction order control,
+ * which is important when load is unaligned.
+ */
+ clib_mov256blocks((u8 *)dst, (const u8 *)src, n);
+ bits = n;
+ n = n & 255;
+ bits -= n;
+ src = (const u8 *)src + bits;
+ dst = (u8 *)dst + bits;
+
+ /**
+ * Copy 64-byte blocks.
+ * Use copy block function for better instruction order control,
+ * which is important when load is unaligned.
+ */
+ if (n >= 64) {
+ clib_mov64blocks((u8 *)dst, (const u8 *)src, n);
+ bits = n;
+ n = n & 63;
+ bits -= n;
+ src = (const u8 *)src + bits;
+ dst = (u8 *)dst + bits;
+ }
+
+ /**
+ * Copy whatever left
+ */
+ goto COPY_BLOCK_64_BACK31;
+}
+
+
+#endif /* included_clib_mamcpy_avx_h */
+
diff --git a/vppinfra/vppinfra/memcpy_sse3.h b/vppinfra/vppinfra/memcpy_sse3.h
new file mode 100644
index 00000000000..12748f78b48
--- /dev/null
+++ b/vppinfra/vppinfra/memcpy_sse3.h
@@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef included_clib_memcpy_sse3_h
+#define included_clib_memcpy_sse3_h
+
+#include <stdint.h>
+#include <x86intrin.h>
+
+static inline void
+clib_mov16(u8 *dst, const u8 *src)
+{
+ __m128i xmm0;
+
+ xmm0 = _mm_loadu_si128((const __m128i *)src);
+ _mm_storeu_si128((__m128i *)dst, xmm0);
+}
+
+static inline void
+clib_mov32(u8 *dst, const u8 *src)
+{
+ clib_mov16((u8 *)dst + 0 * 16, (const u8 *)src + 0 * 16);
+ clib_mov16((u8 *)dst + 1 * 16, (const u8 *)src + 1 * 16);
+}
+
+static inline void
+clib_mov64(u8 *dst, const u8 *src)
+{
+ clib_mov32((u8 *)dst + 0 * 32, (const u8 *)src + 0 * 32);
+ clib_mov32((u8 *)dst + 1 * 32, (const u8 *)src + 1 * 32);
+}
+
+static inline void
+clib_mov128(u8 *dst, const u8 *src)
+{
+ clib_mov64((u8 *)dst + 0 * 64, (const u8 *)src + 0 * 64);
+ clib_mov64((u8 *)dst + 1 * 64, (const u8 *)src + 1 * 64);
+}
+
+static inline void
+clib_mov256(u8 *dst, const u8 *src)
+{
+ clib_mov128((u8 *)dst + 0 * 128, (const u8 *)src + 0 * 128);
+ clib_mov128((u8 *)dst + 1 * 128, (const u8 *)src + 1 * 128);
+}
+
+/**
+ * Macro for copying unaligned block from one location to another with constant load offset,
+ * 47 bytes leftover maximum,
+ * locations should not overlap.
+ * Requirements:
+ * - Store is aligned
+ * - Load offset is <offset>, which must be immediate value within [1, 15]
+ * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading
+ * - <dst>, <src>, <len> must be variables
+ * - __m128i <xmm0> ~ <xmm8> must be pre-defined
+ */
+#define CLIB_MVUNALIGN_LEFT47_IMM(dst, src, len, offset) \
+({ \
+ int tmp; \
+ while (len >= 128 + 16 - offset) { \
+ xmm0 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 0 * 16)); \
+ len -= 128; \
+ xmm1 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 1 * 16)); \
+ xmm2 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 2 * 16)); \
+ xmm3 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 3 * 16)); \
+ xmm4 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 4 * 16)); \
+ xmm5 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 5 * 16)); \
+ xmm6 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 6 * 16)); \
+ xmm7 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 7 * 16)); \
+ xmm8 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 8 * 16)); \
+ src = (const u8 *)src + 128; \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \
+ dst = (u8 *)dst + 128; \
+ } \
+ tmp = len; \
+ len = ((len - 16 + offset) & 127) + 16 - offset; \
+ tmp -= len; \
+ src = (const u8 *)src + tmp; \
+ dst = (u8 *)dst + tmp; \
+ if (len >= 32 + 16 - offset) { \
+ while (len >= 32 + 16 - offset) { \
+ xmm0 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 0 * 16)); \
+ len -= 32; \
+ xmm1 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 1 * 16)); \
+ xmm2 = _mm_loadu_si128((const __m128i *)((const u8 *)src - offset + 2 * 16)); \
+ src = (const u8 *)src + 32; \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \
+ _mm_storeu_si128((__m128i *)((u8 *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \
+ dst = (u8 *)dst + 32; \
+ } \
+ tmp = len; \
+ len = ((len - 16 + offset) & 31) + 16 - offset; \
+ tmp -= len; \
+ src = (const u8 *)src + tmp; \
+ dst = (u8 *)dst + tmp; \
+ } \
+})
+
+/**
+ * Macro for copying unaligned block from one location to another,
+ * 47 bytes leftover maximum,
+ * locations should not overlap.
+ * Use switch here because the aligning instruction requires immediate value for shift count.
+ * Requirements:
+ * - Store is aligned
+ * - Load offset is <offset>, which must be within [1, 15]
+ * - For <src>, make sure <offset> bit backwards & <16 - offset> bit forwards are available for loading
+ * - <dst>, <src>, <len> must be variables
+ * - __m128i <xmm0> ~ <xmm8> used in CLIB_MVUNALIGN_LEFT47_IMM must be pre-defined
+ */
+#define CLIB_MVUNALIGN_LEFT47(dst, src, len, offset) \
+({ \
+ switch (offset) { \
+ case 0x01: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x01); break; \
+ case 0x02: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x02); break; \
+ case 0x03: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x03); break; \
+ case 0x04: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x04); break; \
+ case 0x05: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x05); break; \
+ case 0x06: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x06); break; \
+ case 0x07: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x07); break; \
+ case 0x08: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x08); break; \
+ case 0x09: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x09); break; \
+ case 0x0A: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0A); break; \
+ case 0x0B: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0B); break; \
+ case 0x0C: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0C); break; \
+ case 0x0D: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0D); break; \
+ case 0x0E: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0E); break; \
+ case 0x0F: CLIB_MVUNALIGN_LEFT47_IMM(dst, src, n, 0x0F); break; \
+ default:; \
+ } \
+})
+
+static inline void *
+clib_memcpy(void *dst, const void *src, size_t n)
+{
+ __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8;
+ uword dstu = (uword)dst;
+ uword srcu = (uword)src;
+ void *ret = dst;
+ size_t dstofss;
+ size_t srcofs;
+
+ /**
+ * Copy less than 16 bytes
+ */
+ if (n < 16) {
+ if (n & 0x01) {
+ *(u8 *)dstu = *(const u8 *)srcu;
+ srcu = (uword)((const u8 *)srcu + 1);
+ dstu = (uword)((u8 *)dstu + 1);
+ }
+ if (n & 0x02) {
+ *(u16 *)dstu = *(const u16 *)srcu;
+ srcu = (uword)((const u16 *)srcu + 1);
+ dstu = (uword)((u16 *)dstu + 1);
+ }
+ if (n & 0x04) {
+ *(u32 *)dstu = *(const u32 *)srcu;
+ srcu = (uword)((const u32 *)srcu + 1);
+ dstu = (uword)((u32 *)dstu + 1);
+ }
+ if (n & 0x08) {
+ *(u64 *)dstu = *(const u64 *)srcu;
+ }
+ return ret;
+ }
+
+ /**
+ * Fast way when copy size doesn't exceed 512 bytes
+ */
+ if (n <= 32) {
+ clib_mov16((u8 *)dst, (const u8 *)src);
+ clib_mov16((u8 *)dst - 16 + n, (const u8 *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 48) {
+ clib_mov32((u8 *)dst, (const u8 *)src);
+ clib_mov16((u8 *)dst - 16 + n, (const u8 *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 64) {
+ clib_mov32((u8 *)dst, (const u8 *)src);
+ clib_mov16((u8 *)dst + 32, (const u8 *)src + 32);
+ clib_mov16((u8 *)dst - 16 + n, (const u8 *)src - 16 + n);
+ return ret;
+ }
+ if (n <= 128) {
+ goto COPY_BLOCK_128_BACK15;
+ }
+ if (n <= 512) {
+ if (n >= 256) {
+ n -= 256;
+ clib_mov128((u8 *)dst, (const u8 *)src);
+ clib_mov128((u8 *)dst + 128, (const u8 *)src + 128);
+ src = (const u8 *)src + 256;
+ dst = (u8 *)dst + 256;
+ }
+COPY_BLOCK_255_BACK15:
+ if (n >= 128) {
+ n -= 128;
+ clib_mov128((u8 *)dst, (const u8 *)src);
+ src = (const u8 *)src + 128;
+ dst = (u8 *)dst + 128;
+ }
+COPY_BLOCK_128_BACK15:
+ if (n >= 64) {
+ n -= 64;
+ clib_mov64((u8 *)dst, (const u8 *)src);
+ src = (const u8 *)src + 64;
+ dst = (u8 *)dst + 64;
+ }
+COPY_BLOCK_64_BACK15:
+ if (n >= 32) {
+ n -= 32;
+ clib_mov32((u8 *)dst, (const u8 *)src);
+ src = (const u8 *)src + 32;
+ dst = (u8 *)dst + 32;
+ }
+ if (n > 16) {
+ clib_mov16((u8 *)dst, (const u8 *)src);
+ clib_mov16((u8 *)dst - 16 + n, (const u8 *)src - 16 + n);
+ return ret;
+ }
+ if (n > 0) {
+ clib_mov16((u8 *)dst - 16 + n, (const u8 *)src - 16 + n);
+ }
+ return ret;
+ }
+
+ /**
+ * Make store aligned when copy size exceeds 512 bytes,
+ * and make sure the first 15 bytes are copied, because
+ * unaligned copy functions require up to 15 bytes
+ * backwards access.
+ */
+ dstofss = 16 - ((uword)dst & 0x0F) + 16;
+ n -= dstofss;
+ clib_mov32((u8 *)dst, (const u8 *)src);
+ src = (const u8 *)src + dstofss;
+ dst = (u8 *)dst + dstofss;
+ srcofs = ((uword)src & 0x0F);
+
+ /**
+ * For aligned copy
+ */
+ if (srcofs == 0) {
+ /**
+ * Copy 256-byte blocks
+ */
+ for (; n >= 256; n -= 256) {
+ clib_mov256((u8 *)dst, (const u8 *)src);
+ dst = (u8 *)dst + 256;
+ src = (const u8 *)src + 256;
+ }
+
+ /**
+ * Copy whatever left
+ */
+ goto COPY_BLOCK_255_BACK15;
+ }
+
+ /**
+ * For copy with unaligned load
+ */
+ CLIB_MVUNALIGN_LEFT47(dst, src, n, srcofs);
+
+ /**
+ * Copy whatever left
+ */
+ goto COPY_BLOCK_64_BACK15;
+}
+
+
+#undef CLIB_MVUNALIGN_LEFT47_IMM
+#undef CLIB_MVUNALIGN_LEFT47
+
+#endif /* included_clib_memcpy_sse3_h */
+
diff --git a/vppinfra/vppinfra/mhash.c b/vppinfra/vppinfra/mhash.c
index 7d2dc36a648..a7ff8587981 100644
--- a/vppinfra/vppinfra/mhash.c
+++ b/vppinfra/vppinfra/mhash.c
@@ -288,7 +288,7 @@ uword mhash_set_mem (mhash_t * h, void * key, uword * new_value, uword * old_val
sk = (void *) (h->key_vector_or_heap + i);
sk->heap_handle = handle;
sk->vec.len = n_key_bytes;
- memcpy (sk->vec.vector_data, key, n_key_bytes);
+ clib_memcpy (sk->vec.vector_data, key, n_key_bytes);
/* Advance key past vector header. */
i += sizeof (sk[0]);
@@ -309,7 +309,7 @@ uword mhash_set_mem (mhash_t * h, void * key, uword * new_value, uword * old_val
}
n_key_bytes = h->n_key_bytes;
- memcpy (k, key, n_key_bytes);
+ clib_memcpy (k, key, n_key_bytes);
}
ikey = i;
diff --git a/vppinfra/vppinfra/pfhash.c b/vppinfra/vppinfra/pfhash.c
index 4bea0682e10..81a5f491bd5 100644
--- a/vppinfra/vppinfra/pfhash.c
+++ b/vppinfra/vppinfra/pfhash.c
@@ -79,7 +79,7 @@ u8 * format_pfhash (u8 * s, va_list * args)
if (kv16->values[j] != (u32)~0)
{
vec_add2 (shs, sh, 1);
- memcpy (sh->key, &kv16->kb.k_u32x4[j], p->key_size);
+ clib_memcpy (sh->key, &kv16->kb.k_u32x4[j], p->key_size);
sh->value = kv16->values[j];
}
}
@@ -93,7 +93,7 @@ u8 * format_pfhash (u8 * s, va_list * args)
if (kv8->values[j] != (u32)~0)
{
vec_add2 (shs, sh, 1);
- memcpy (sh->key, &kv8->kb.k_u64[j], p->key_size);
+ clib_memcpy (sh->key, &kv8->kb.k_u64[j], p->key_size);
sh->value = kv8->values[j];
}
}
@@ -106,7 +106,7 @@ u8 * format_pfhash (u8 * s, va_list * args)
if (kv8v8->values[j] != (u64)~0)
{
vec_add2 (shs, sh, 1);
- memcpy (sh->key, &kv8v8->kb.k_u64[j], p->key_size);
+ clib_memcpy (sh->key, &kv8v8->kb.k_u64[j], p->key_size);
sh->value = kv8v8->values[j];
}
}
@@ -120,7 +120,7 @@ u8 * format_pfhash (u8 * s, va_list * args)
if (kv4->values[j] != (u32)~0)
{
vec_add2 (shs, sh, 1);
- memcpy (sh->key, &kv4->kb.kb[j], p->key_size);
+ clib_memcpy (sh->key, &kv4->kb.kb[j], p->key_size);
sh->value = kv4->values[j];
}
}
@@ -131,7 +131,7 @@ u8 * format_pfhash (u8 * s, va_list * args)
hash_foreach_pair (hp, p->overflow_hash,
({
vec_add2 (shs, sh, 1);
- memcpy (sh->key, (u8 *)hp->key, p->key_size);
+ clib_memcpy (sh->key, (u8 *)hp->key, p->key_size);
sh->value = hp->value[0];
}));
@@ -408,7 +408,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value)
return;
}
kcopy = clib_mem_alloc (p->key_size);
- memcpy (kcopy, key, p->key_size);
+ clib_memcpy (kcopy, key, p->key_size);
hash_set_mem (p->overflow_hash, kcopy, value);
p->nitems++;
p->nitems_in_overflow++;
@@ -462,7 +462,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value)
{
if (kv16->values[i] == (u32)~0)
{
- memcpy (&kv16->kb.k_u32x4[i], key, p->key_size);
+ clib_memcpy (&kv16->kb.k_u32x4[i], key, p->key_size);
kv16->values[i] = (u32)(u64) value;
return;
}
@@ -471,13 +471,13 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value)
for (i = 0; i < 3; i++)
{
kcopy = clib_mem_alloc (p->key_size);
- memcpy (kcopy, &kv16->kb.k_u32x4[i], p->key_size);
+ clib_memcpy (kcopy, &kv16->kb.k_u32x4[i], p->key_size);
hash_set_mem (p->overflow_hash, kcopy, kv16->values[i]);
p->nitems_in_overflow++;
}
/* Add new key to overflow */
kcopy = clib_mem_alloc (p->key_size);
- memcpy (kcopy, key, p->key_size);
+ clib_memcpy (kcopy, key, p->key_size);
hash_set_mem (p->overflow_hash, kcopy, value);
p->buckets[bucket] = PFHASH_BUCKET_OVERFLOW;
p->overflow_count++;
@@ -491,7 +491,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value)
{
if (kv8->values[i] == (u32)~0)
{
- memcpy (&kv8->kb.k_u64[i], key, 8);
+ clib_memcpy (&kv8->kb.k_u64[i], key, 8);
kv8->values[i] = (u32)(u64) value;
return;
}
@@ -500,7 +500,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value)
for (i = 0; i < 5; i++)
{
kcopy = clib_mem_alloc (p->key_size);
- memcpy (kcopy, &kv8->kb.k_u64[i], 8);
+ clib_memcpy (kcopy, &kv8->kb.k_u64[i], 8);
hash_set_mem (p->overflow_hash, kcopy, kv8->values[i]);
p->nitems_in_overflow++;
}
@@ -511,7 +511,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value)
{
if (kv8v8->values[i] == (u64)~0)
{
- memcpy (&kv8v8->kb.k_u64[i], key, 8);
+ clib_memcpy (&kv8v8->kb.k_u64[i], key, 8);
kv8v8->values[i] = (u64) value;
return;
}
@@ -520,7 +520,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value)
for (i = 0; i < 4; i++)
{
kcopy = clib_mem_alloc (p->key_size);
- memcpy (kcopy, &kv8v8->kb.k_u64[i], 8);
+ clib_memcpy (kcopy, &kv8v8->kb.k_u64[i], 8);
hash_set_mem (p->overflow_hash, kcopy, kv8v8->values[i]);
p->nitems_in_overflow++;
}
@@ -528,7 +528,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value)
}
/* Add new key to overflow */
kcopy = clib_mem_alloc (p->key_size);
- memcpy (kcopy, key, p->key_size);
+ clib_memcpy (kcopy, key, p->key_size);
hash_set_mem (p->overflow_hash, kcopy, value);
p->buckets[bucket] = PFHASH_BUCKET_OVERFLOW;
p->overflow_count++;
@@ -540,7 +540,7 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value)
{
if (kv4->values[i] == (u32)~0)
{
- memcpy (&kv4->kb.kb[i], key, 4);
+ clib_memcpy (&kv4->kb.kb[i], key, 4);
kv4->values[i] = (u32)(u64) value;
return;
}
@@ -549,13 +549,13 @@ void pfhash_set (pfhash_t * p, u32 bucket, void * key, void * value)
for (i = 0; i < 8; i++)
{
kcopy = clib_mem_alloc (p->key_size);
- memcpy (kcopy, &kv4->kb.kb[i], 4);
+ clib_memcpy (kcopy, &kv4->kb.kb[i], 4);
hash_set_mem (p->overflow_hash, kcopy, kv4->values[i]);
p->nitems_in_overflow++;
}
/* Add new key to overflow */
kcopy = clib_mem_alloc (p->key_size);
- memcpy (kcopy, key, p->key_size);
+ clib_memcpy (kcopy, key, p->key_size);
hash_set_mem (p->overflow_hash, kcopy, value);
p->buckets[bucket] = PFHASH_BUCKET_OVERFLOW;
p->overflow_count++;
diff --git a/vppinfra/vppinfra/serialize.c b/vppinfra/vppinfra/serialize.c
index ab533e87d37..4025b704700 100644
--- a/vppinfra/vppinfra/serialize.c
+++ b/vppinfra/vppinfra/serialize.c
@@ -141,7 +141,7 @@ void serialize_cstring (serialize_main_t * m, char * s)
if (len > 0)
{
p = serialize_get (m, len);
- memcpy (p, s, len);
+ clib_memcpy (p, s, len);
}
}
@@ -161,7 +161,7 @@ void unserialize_cstring (serialize_main_t * m, char ** s)
{
r = vec_new (char, len + 1);
p = unserialize_get (m, len);
- memcpy (r, p, len);
+ clib_memcpy (r, p, len);
/* Null terminate. */
r[len] = 0;
@@ -175,7 +175,7 @@ void serialize_vec_8 (serialize_main_t * m, va_list * va)
u8 * s = va_arg (*va, u8 *);
u32 n = va_arg (*va, u32);
u8 * p = serialize_get (m, n * sizeof (u8));
- memcpy (p, s, n * sizeof (u8));
+ clib_memcpy (p, s, n * sizeof (u8));
}
void unserialize_vec_8 (serialize_main_t * m, va_list * va)
@@ -183,7 +183,7 @@ void unserialize_vec_8 (serialize_main_t * m, va_list * va)
u8 * s = va_arg (*va, u8 *);
u32 n = va_arg (*va, u32);
u8 * p = unserialize_get (m, n);
- memcpy (s, p, n);
+ clib_memcpy (s, p, n);
}
#define _(n_bits) \
@@ -582,7 +582,7 @@ void serialize_magic (serialize_main_t * m, void * magic, u32 magic_bytes)
void * p;
serialize_integer (m, magic_bytes, sizeof (magic_bytes));
p = serialize_get (m, magic_bytes);
- memcpy (p, magic, magic_bytes);
+ clib_memcpy (p, magic, magic_bytes);
}
void unserialize_check_magic (serialize_main_t * m, void * magic,
@@ -664,7 +664,7 @@ static void * serialize_write_not_inline (serialize_main_header_t * m,
if (n_left_o > 0 && n_left_b > 0)
{
uword n = clib_min (n_left_b, n_left_o);
- memcpy (s->buffer + cur_bi, s->overflow_buffer, n);
+ clib_memcpy (s->buffer + cur_bi, s->overflow_buffer, n);
cur_bi += n;
n_left_b -= n;
n_left_o -= n;
diff --git a/vppinfra/vppinfra/socket.c b/vppinfra/vppinfra/socket.c
index ae55dc64262..44ceb65984a 100644
--- a/vppinfra/vppinfra/socket.c
+++ b/vppinfra/vppinfra/socket.c
@@ -100,7 +100,7 @@ socket_config (char * config,
{
struct sockaddr_un * su = addr;
su->sun_family = PF_LOCAL;
- memcpy (&su->sun_path, config,
+ clib_memcpy (&su->sun_path, config,
clib_min (sizeof (su->sun_path), 1 + strlen (config)));
*addr_len = sizeof (su[0]);
}
@@ -157,7 +157,7 @@ socket_config (char * config,
if (! host)
error = clib_error_return (0, "unknown host `%s'", config);
else
- memcpy (&sa->sin_addr.s_addr, host->h_addr_list[0], host->h_length);
+ clib_memcpy (&sa->sin_addr.s_addr, host->h_addr_list[0], host->h_length);
}
else
diff --git a/vppinfra/vppinfra/string.h b/vppinfra/vppinfra/string.h
index 1b4d7664103..12498215c66 100644
--- a/vppinfra/vppinfra/string.h
+++ b/vppinfra/vppinfra/string.h
@@ -1,4 +1,18 @@
/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
Copyright (c) 2001, 2002, 2003 Eliot Dresselhaus
Permission is hereby granted, free of charge, to any person obtaining
@@ -41,4 +55,12 @@
/* Exchanges source and destination. */
void clib_memswap (void * _a, void * _b, uword bytes);
+#if __AVX__
+#include <vppinfra/memcpy_avx.h>
+#elif __SSE3__
+#include <vppinfra/memcpy_sse3.h>
+#else
+#define clib_memcpy(a,b,c) memcpy(a,b,c)
+#endif
+
#endif /* included_clib_string_h */
diff --git a/vppinfra/vppinfra/test_elf.c b/vppinfra/vppinfra/test_elf.c
index 914513abf90..afdb4708a5c 100644
--- a/vppinfra/vppinfra/test_elf.c
+++ b/vppinfra/vppinfra/test_elf.c
@@ -70,7 +70,7 @@ static clib_error_t * elf_set_interpreter (elf_main_t * em, char * interp)
/* Put in new null terminated string. */
memset (s->contents, 0, vec_len (s->contents));
- memcpy (s->contents, interp, strlen (interp));
+ clib_memcpy (s->contents, interp, strlen (interp));
return 0;
}
diff --git a/vppinfra/vppinfra/test_elog.c b/vppinfra/vppinfra/test_elog.c
index b109e9965ce..bb82c275354 100644
--- a/vppinfra/vppinfra/test_elog.c
+++ b/vppinfra/vppinfra/test_elog.c
@@ -188,7 +188,7 @@ int test_elog_main (unformat_input_t * input)
d = ELOG_DATA (em, e);
v = format (0, "foo %d%c", i, 0);
- memcpy (d->s, v, clib_min (vec_len (v), sizeof (d->s)));
+ clib_memcpy (d->s, v, clib_min (vec_len (v), sizeof (d->s)));
}
{
diff --git a/vppinfra/vppinfra/vec.c b/vppinfra/vppinfra/vec.c
index 58ffb34dce4..912038d56eb 100644
--- a/vppinfra/vppinfra/vec.c
+++ b/vppinfra/vppinfra/vec.c
@@ -87,7 +87,7 @@ void * vec_resize_allocate_memory (void * v,
clib_panic ("vec_resize fails, length increment %d, data bytes %d, alignment %d",
length_increment, data_bytes, data_align);
- memcpy (new, old, old_alloc_bytes);
+ clib_memcpy (new, old, old_alloc_bytes);
clib_mem_free (old);
v = new;
diff --git a/vppinfra/vppinfra/vec.h b/vppinfra/vppinfra/vec.h
index 06080062b47..0c3237d3852 100644
--- a/vppinfra/vppinfra/vec.h
+++ b/vppinfra/vppinfra/vec.h
@@ -319,7 +319,7 @@ do { \
if (_v(l) > 0) \
{ \
vec_resize_ha (_v(v), _v(l), (H), (A)); \
- memcpy (_v(v), (V), _v(l) * sizeof ((V)[0])); \
+ clib_memcpy (_v(v), (V), _v(l) * sizeof ((V)[0]));\
} \
_v(v); \
})
@@ -346,7 +346,8 @@ do { \
@param DST destination
@param SRC source
*/
-#define vec_copy(DST,SRC) memcpy (DST, SRC, vec_len (DST) * sizeof ((DST)[0]))
+#define vec_copy(DST,SRC) clib_memcpy (DST, SRC, vec_len (DST) * \
+ sizeof ((DST)[0]))
/** \brief Clone a vector. Make a new vector with the
same size as a given vector but possibly with a different type.
@@ -543,7 +544,7 @@ do { \
word _v(n) = (N); \
word _v(l) = vec_len (V); \
V = _vec_resize ((V), _v(n), (_v(l) + _v(n)) * sizeof ((V)[0]), (H), (A)); \
- memcpy ((V) + _v(l), (E), _v(n) * sizeof ((V)[0])); \
+ clib_memcpy ((V) + _v(l), (E), _v(n) * sizeof ((V)[0])); \
} while (0)
/** \brief Add N elements to end of vector V (no header, unspecified alignment)
@@ -705,7 +706,8 @@ do { \
memmove ((V) + _v(m) + _v(n), \
(V) + _v(m), \
(_v(l) - _v(m)) * sizeof ((V)[0])); \
- memcpy ((V) + _v(m), (E), _v(n) * sizeof ((V)[0])); \
+ clib_memcpy ((V) + _v(m), (E), \
+ _v(n) * sizeof ((V)[0])); \
} while (0)
/** \brief Insert N vector elements starting at element M,
@@ -779,7 +781,7 @@ do { \
\
v1 = _vec_resize ((v1), _v(l2), \
(_v(l1) + _v(l2)) * sizeof ((v1)[0]), 0, 0); \
- memcpy ((v1) + _v(l1), (v2), _v(l2) * sizeof ((v2)[0])); \
+ clib_memcpy ((v1) + _v(l1), (v2), _v(l2) * sizeof ((v2)[0])); \
} while (0)
/** \brief Append v2 after v1. Result in v1. Specified alignment.
@@ -795,7 +797,7 @@ do { \
\
v1 = _vec_resize ((v1), _v(l2), \
(_v(l1) + _v(l2)) * sizeof ((v1)[0]), 0, align); \
- memcpy ((v1) + _v(l1), (v2), _v(l2) * sizeof ((v2)[0])); \
+ clib_memcpy ((v1) + _v(l1), (v2), _v(l2) * sizeof ((v2)[0])); \
} while (0)
/** \brief Prepend v2 before v1. Result in v1.
@@ -811,7 +813,7 @@ do { \
v1 = _vec_resize ((v1), _v(l2), \
(_v(l1) + _v(l2)) * sizeof ((v1)[0]), 0, 0); \
memmove ((v1) + _v(l2), (v1), _v(l1) * sizeof ((v1)[0])); \
- memcpy ((v1), (v2), _v(l2) * sizeof ((v2)[0])); \
+ clib_memcpy ((v1), (v2), _v(l2) * sizeof ((v2)[0])); \
} while (0)
/** \brief Prepend v2 before v1. Result in v1. Specified alignment
@@ -828,7 +830,7 @@ do { \
v1 = _vec_resize ((v1), _v(l2), \
(_v(l1) + _v(l2)) * sizeof ((v1)[0]), 0, align); \
memmove ((v1) + _v(l2), (v1), _v(l1) * sizeof ((v1)[0])); \
- memcpy ((v1), (v2), _v(l2) * sizeof ((v2)[0])); \
+ clib_memcpy ((v1), (v2), _v(l2) * sizeof ((v2)[0])); \
} while (0)
@@ -909,7 +911,7 @@ do { \
vec_reset_length (V); \
vec_validate ((V), (L)); \
if ((S) && (L)) \
- memcpy ((V), (S), (L)); \
+ clib_memcpy ((V), (S), (L)); \
(V)[(L)] = 0; \
} while (0)